# R example for ANCOVA  
# We use the Cracker Data from class and Chapter 22  

# Save the data file into a directory and 
# use the full path name:

cracker.data <- read.table(file = "z:/My Documents/teaching/stat_705/crackerdata.txt", 
header=FALSE, col.names = c('sales', 'x', 'treat', 'store'))

# attaching the data frame:

attach(cracker.data)

# Making "treat" a factor:

treat <- factor(treat)


# ########################################################################**  

# Symbolic scatter plot:  

# Setting up the axes and labels:

plot(c(15,35), c(10,50), type='n', ylab='Sales in Promotion Period', xlab='Sales in Preceding Period')

# Making the plots for each treatment:

lines(x[treat==1], sales[treat==1], type='p', col='blue', pch = 1, cex=1.2)
lines(x[treat==2], sales[treat==2], type='p', col='red', pch = 20, cex=1.2)
lines(x[treat==3], sales[treat==3], type='p', col='green', pch = 8, cex=1.2)

legend('topleft', c('Treatment 1','Treatment 2','Treatment 3'), pch=c(1,20,8), col=c('blue','red','green'), cex=1.5)

# This should look like Figure 22.5, p. 927.  
# We don't see evidence of the treatments affecting the covariate at all.  

# ########################################################################**  

# We use the lm() function to do the ANCOVA analysis.  The response is sales and the factor is  
# treat. The covariate here is x.          

# The summary() function gives us least squares estimates of   
# mu_dot, tau_1, tau_2, tau_3, and (most importantly in this case) gamma. 

cracker.fit <- lm(sales ~ treat + x)
summary(cracker.fit)

# Note that R sets the FIRST tau-hat equal to zero whereas SAS sets the LAST tau-hat equal to zero.
# Either way is fine since the least-squares estimates are not unique.

# The test for treatment effects can be done with a reduced vs. full approach:

cracker.fit.reduced <- lm(sales ~ x)
anova(cracker.fit.reduced, cracker.fit)

# What does the test for the effect of "treat" (F = 59.48, P-value < .0001) tell you?                                
     
############################################################################

# Getting Bonferroni CIs:

error.df <- anova(cracker.fit)["Residuals", "Df"]

Xmat <- model.matrix(cracker.fit)
vcov <- anova(cracker.fit)["Residuals", "Mean Sq"]*solve(t(Xmat) %*% Xmat)

# Since R sets tau1_hat = 0:
var.tau1.minus.tau2 <- vcov["treat2","treat2"]
var.tau1.minus.tau3 <- vcov["treat3","treat3"]
var.tau2.minus.tau3 <- vcov["treat2","treat2"] + vcov["treat3","treat3"] - 2*vcov["treat2","treat3"]
  
# Of particular interest are the estimated values of: tau_1 - tau_2, tau_1 - tau_3,  
# and tau_2 - tau_3.                                                                 

# We get 3 Bonferroni simultaneous CIs (here 95%) with the CL and ADJUST = BON options.  

alpha <- 0.05
g <- 3

tau.hats <- c(0,coef(cracker.fit)[2:3])

LCL.12 <- (tau.hats[1]-tau.hats[2]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau2)
UCL.12 <- (tau.hats[1]-tau.hats[2]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau2)
LCL.13 <- (tau.hats[1]-tau.hats[3]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau3)
UCL.13 <- (tau.hats[1]-tau.hats[3]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau3)
LCL.23 <- (tau.hats[2]-tau.hats[3]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau2.minus.tau3)
UCL.23 <- (tau.hats[2]-tau.hats[3]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau2.minus.tau3)

print(paste("CI for tau1 - tau2:", round(LCL.12,4), round(UCL.12,4)))
print(paste("CI for tau1 - tau3:", round(LCL.13,4), round(UCL.13,4)))
print(paste("CI for tau2 - tau3:", round(LCL.23,4), round(UCL.23,4)))

# The test for the significance of the covariate X can be done with a t-test (t*=8.76)  
# (find this in the summary() output)
# or equivalently, an F-test (F* = 76.72).  
# F* = 76.72 can be found by: anova(cracker.fit)                                        


# ########################################################################**  

# Some Residual Plots to Check the Standard Model Assumptions:  

# Residual Plots and Q-Q plots:  

# Residuals Plotted vs. Fitted Values

plot(fitted.values(cracker.fit), residuals(cracker.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0)
  
# Residuals Plotted for each Treatment Level  

plot(as.numeric(treat), residuals(cracker.fit), xlab="Treatments", ylab="Residuals"); abline(h=0)

# Normal Q-Q Plot of Residuals  

qqnorm(residuals(cracker.fit))

# ########################################################################**  

# Testing the Equal-Slopes Assumption  

# To test whether the regression lines at each level of the factor are truly     
# parallel (have equal slopes), we simply include the TREAT*X interaction term:  

cracker.fit.unequal <- lm(sales ~ treat + x + treat:x)
summary(cracker.fit.unequal)

# Then do the "full vs. reduced" F-test:

anova(cracker.fit, cracker.fit.unequal)

# The interaction term is NOT significant here (F* = 1.01, p-value = 0.4032),  
# so we fail to reject H_0. We conclude the equal-slopes model is reasonable.  
# There is NOT evidence that the slopes are unequal.