# R code to analyze the Toluca company data # using simple linear regression # reading the file off the web: # (This may not work in some implementations of R) toluca.data <- read.table(file = url("http:/www.stat.sc.edu/~hitchcock/stat701/tolucadata.txt"), header=FALSE, col.names = c('lotsize', 'workhrs')) # Or, save the data file into a directory and # use the full path name: toluca.data <- read.table(file = "y:/My Documents/teaching/stat_701/tolucadata.txt", header=FALSE, col.names = c('lotsize', 'workhrs')) # attaching the data frame: attach(toluca.data) # fitting the regression model: toluca.reg <- lm(workhrs ~ lotsize) # getting the summary regression output: summary(toluca.reg) # getting the ANOVA table: anova(toluca.reg) # getting the fitted values: fitted(toluca.reg) # getting the residuals: resid(toluca.reg) # getting the 95% confidence intervals for the mean at X=65 and X=100: xh.values <- data.frame(lotsize = c(65,100)) predict(toluca.reg, xh.values, interval="confidence", level=0.95) # finding the correlation coefficient between workhrs and lotsize: cor(workhrs, lotsize) # a scatter plot (note in R, the X variable listed first): plot(lotsize, workhours) # overlaying the regression line on this scatter plot: abline(toluca.reg)