# R code to analyze the Toluca company data
# using simple linear regression



# Save the data file into a directory and 
# use the full path name:

toluca.data <- read.table(file = "z:/My Documents/teaching/stat_704/tolucadata.txt", 
header=FALSE, col.names = c('lotsize', 'workhrs'))

# attaching the data frame:

attach(toluca.data)

# fitting the regression model:

toluca.reg <- lm(workhrs ~ lotsize)

# getting the summary regression output:

summary(toluca.reg)

# getting the ANOVA table:

anova(toluca.reg)

# getting the fitted values:

fitted(toluca.reg)

# getting the residuals:

resid(toluca.reg)

# getting a 95% confidence interval for the true slope beta_1 (an indirect way):

alpha <- 0.05
b1 <- summary(toluca.reg)$coef[2,1]
s.b1 <- summary(toluca.reg)$coef[2,2]
error.df <- summary(toluca.reg)$df[2]
lower <- b1 - qt(1-alpha/2, df=error.df)*s.b1
upper <- b1 + qt(1-alpha/2, df=error.df)*s.b1
print(paste(100*(1-alpha), "percent CI for slope:", lower, upper))

# getting the 90% confidence intervals for the mean at X=65 and X=100:

xh.values <- data.frame(lotsize = c(65,100))

predict(toluca.reg, xh.values, interval="confidence", level=0.90)

# getting the 90% prediction intervals for a new observation with X=65 and X=100:

xh.values <- data.frame(lotsize = c(65,100))

predict(toluca.reg, xh.values, interval="prediction", level=0.90)

# a scatter plot (note in R, the X variable listed first):

plot(lotsize, workhours)

# overlaying the regression line on this scatter plot:

abline(toluca.reg)