# R example for ANCOVA # We use the Cracker Data from class and Chapter 22 # Save the data file into a directory and # use the full path name: cracker.data <- read.table(file = "z:/My Documents/teaching/stat_705/crackerdata.txt", header=FALSE, col.names = c('sales', 'x', 'treat', 'store')) # attaching the data frame: attach(cracker.data) # Making "treat" a factor: treat <- factor(treat) # ########################################################################** # Symbolic scatter plot: # Setting up the axes and labels: plot(c(15,35), c(10,50), type='n', ylab='Sales in Promotion Period', xlab='Sales in Preceding Period') # Making the plots for each treatment: lines(x[treat==1], sales[treat==1], type='p', col='blue', pch = 1, cex=1.2) lines(x[treat==2], sales[treat==2], type='p', col='red', pch = 20, cex=1.2) lines(x[treat==3], sales[treat==3], type='p', col='green', pch = 8, cex=1.2) legend('topleft', c('Treatment 1','Treatment 2','Treatment 3'), pch=c(1,20,8), col=c('blue','red','green'), cex=1.5) # This should look like Figure 22.5, p. 927. # We don't see evidence of the treatments affecting the covariate at all. # ########################################################################** # We use the lm() function to do the ANCOVA analysis. The response is sales and the factor is # treat. The covariate here is x. # The summary() function gives us least squares estimates of # mu_dot, tau_1, tau_2, tau_3, and (most importantly in this case) gamma. cracker.fit <- lm(sales ~ treat + x) summary(cracker.fit) # Note that R sets the FIRST tau-hat equal to zero whereas SAS sets the LAST tau-hat equal to zero. # Either way is fine since the least-squares estimates are not unique. # The test for treatment effects can be done with a reduced vs. full approach: cracker.fit.reduced <- lm(sales ~ x) anova(cracker.fit.reduced, cracker.fit) # What does the test for the effect of "treat" (F = 59.48, P-value < .0001) tell you? ############################################################################ # Getting Bonferroni CIs: error.df <- anova(cracker.fit)["Residuals", "Df"] Xmat <- model.matrix(cracker.fit) vcov <- anova(cracker.fit)["Residuals", "Mean Sq"]*solve(t(Xmat) %*% Xmat) # Since R sets tau1_hat = 0: var.tau1.minus.tau2 <- vcov["treat2","treat2"] var.tau1.minus.tau3 <- vcov["treat3","treat3"] var.tau2.minus.tau3 <- vcov["treat2","treat2"] + vcov["treat3","treat3"] - 2*vcov["treat2","treat3"] # Of particular interest are the estimated values of: tau_1 - tau_2, tau_1 - tau_3, # and tau_2 - tau_3. # We get 3 Bonferroni simultaneous CIs (here 95%) with the CL and ADJUST = BON options. alpha <- 0.05 g <- 3 tau.hats <- c(0,coef(cracker.fit)[2:3]) LCL.12 <- (tau.hats[1]-tau.hats[2]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau2) UCL.12 <- (tau.hats[1]-tau.hats[2]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau2) LCL.13 <- (tau.hats[1]-tau.hats[3]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau3) UCL.13 <- (tau.hats[1]-tau.hats[3]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau1.minus.tau3) LCL.23 <- (tau.hats[2]-tau.hats[3]) - qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau2.minus.tau3) UCL.23 <- (tau.hats[2]-tau.hats[3]) + qt(1-alpha/(2*g), df=error.df)*sqrt(var.tau2.minus.tau3) print(paste("CI for tau1 - tau2:", round(LCL.12,4), round(UCL.12,4))) print(paste("CI for tau1 - tau3:", round(LCL.13,4), round(UCL.13,4))) print(paste("CI for tau2 - tau3:", round(LCL.23,4), round(UCL.23,4))) # The test for the significance of the covariate X can be done with a t-test (t*=8.76) # (find this in the summary() output) # or equivalently, an F-test (F* = 76.72). # F* = 76.72 can be found by: anova(cracker.fit) # ########################################################################** # Some Residual Plots to Check the Standard Model Assumptions: # Residual Plots and Q-Q plots: # Residuals Plotted vs. Fitted Values plot(fitted.values(cracker.fit), residuals(cracker.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0) # Residuals Plotted for each Treatment Level plot(as.numeric(treat), residuals(cracker.fit), xlab="Treatments", ylab="Residuals"); abline(h=0) # Normal Q-Q Plot of Residuals qqnorm(residuals(cracker.fit)) # ########################################################################** # Testing the Equal-Slopes Assumption # To test whether the regression lines at each level of the factor are truly # parallel (have equal slopes), we simply include the TREAT*X interaction term: cracker.fit.unequal <- lm(sales ~ treat + x + treat:x) summary(cracker.fit.unequal) # Then do the "full vs. reduced" F-test: anova(cracker.fit, cracker.fit.unequal) # The interaction term is NOT significant here (F* = 1.01, p-value = 0.4032), # so we fail to reject H_0. We conclude the equal-slopes model is reasonable. # There is NOT evidence that the slopes are unequal.