# R code to analyze the Toluca company data
# using simple linear regression



# reading the file off the web:
# (This may not work in some implementations of R)

toluca.data <- read.table(file = url("http:/www.stat.sc.edu/~hitchcock/stat701/tolucadata.txt"), 
header=FALSE, col.names = c('lotsize', 'workhrs'))

# Or, save the data file into a directory and 
# use the full path name:

toluca.data <- read.table(file = "y:/My Documents/teaching/stat_701/tolucadata.txt", 
header=FALSE, col.names = c('lotsize', 'workhrs'))

# attaching the data frame:

attach(toluca.data)

# fitting the regression model:

toluca.reg <- lm(workhrs ~ lotsize)

# getting the summary regression output:

summary(toluca.reg)

# getting the ANOVA table:

anova(toluca.reg)

# getting the fitted values:

fitted(toluca.reg)

# getting the residuals:

resid(toluca.reg)

# getting the 95% confidence intervals for the mean at X=65 and X=100:

xh.values <- data.frame(lotsize = c(65,100))

predict(toluca.reg, xh.values, interval="confidence", level=0.95)

# finding the correlation coefficient between workhrs and lotsize:

cor(workhrs, lotsize)

# a scatter plot (note in R, the X variable listed first):

plot(lotsize, workhours)

# overlaying the regression line on this scatter plot:

abline(toluca.reg)