# R code to analyze the Toluca company data # using simple linear regression # Save the data file into a directory and # use the full path name: toluca.data <- read.table(file = "z:/My Documents/teaching/stat_704/tolucadata.txt", header=FALSE, col.names = c('lotsize', 'workhrs')) # attaching the data frame: attach(toluca.data) # fitting the regression model: toluca.reg <- lm(workhrs ~ lotsize) # getting the summary regression output: summary(toluca.reg) # getting the ANOVA table: anova(toluca.reg) # getting the fitted values: fitted(toluca.reg) # getting the residuals: resid(toluca.reg) # getting a 95% confidence interval for the true slope beta_1 (an indirect way): alpha <- 0.05 b1 <- summary(toluca.reg)$coef[2,1] s.b1 <- summary(toluca.reg)$coef[2,2] error.df <- summary(toluca.reg)$df[2] lower <- b1 - qt(1-alpha/2, df=error.df)*s.b1 upper <- b1 + qt(1-alpha/2, df=error.df)*s.b1 print(paste(100*(1-alpha), "percent CI for slope:", lower, upper)) # getting the 90% confidence intervals for the mean at X=65 and X=100: xh.values <- data.frame(lotsize = c(65,100)) predict(toluca.reg, xh.values, interval="confidence", level=0.90) # getting the 90% prediction intervals for a new observation with X=65 and X=100: xh.values <- data.frame(lotsize = c(65,100)) predict(toluca.reg, xh.values, interval="prediction", level=0.90) # a scatter plot (note in R, the X variable listed first): plot(lotsize, workhours) # overlaying the regression line on this scatter plot: abline(toluca.reg)