# Data for the ANCOVA example (the Trigonometry scores) # that we studied in class # Entering the data and defining the variables: ########## ## # Reading the data into R: my.datafile <- tempfile() cat(file=my.datafile, " 1 1 3 10 122 2 2 24 34 129 3 3 10 21 114 4 1 5 10 121 5 2 18 27 114 6 3 3 18 114 7 1 6 14 101 8 2 11 20 116 9 3 10 20 110 10 1 11 29 131 11 2 10 13 126 12 3 3 9 94 13 1 11 17 129 14 2 11 19 110 15 3 6 13 102 16 1 13 21 115 17 2 2 28 138 18 3 9 24 128 19 1 7 5 122 20 2 10 13 119 21 3 13 19 111 22 1 12 17 112 23 2 14 21 123 24 3 7 25 119 25 1 13 17 123 26 2 11 14 115 27 3 10 24 120 28 1 8 22 119 29 2 12 17 116 30 3 9 21 112 31 1 9 22 122 32 2 14 16 125 33 3 7 21 105 34 1 10 18 111 35 2 7 10 122 36 3 4 17 120 37 1 6 11 117 38 2 8 18 120 39 3 7 24 120 40 1 13 20 112 41 2 10 13 111 42 3 12 25 118 43 1 7 8 122 44 2 11 17 127 45 3 6 23 110 46 1 11 20 124 47 2 12 13 122 48 3 7 22 127 49 1 5 15 118 50 2 6 13 127 51 1 9 25 113 52 2 3 13 115 53 1 8 25 126 54 2 4 13 112 55 1 2 14 132 56 1 11 17 93 ", sep=" ") options(scipen=999) # suppressing scientific notation trig <- read.table(my.datafile, header=FALSE, col.names=c("OBS", "CLASSTYPE", "PRE", "POST", "IQ")) # Note we could also save the data columns into a file and use a command such as: # trig <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("OBS", "CLASSTYPE", "PRE", "POST", "IQ")) attach(trig) # The data frame called trig is now created, # with five variables, "OBS", "CLASSTYPE", "PRE", "POST", "IQ". ## ######### #################################################################### # A scatter plot of the data, plotted separately by class type: # Setting up the axes and labels: # (For these data, the X-values are all within 0 and 25, and the Y-values are all within 0 and 35.) plot(c(0,25), c(0,35), type='n', ylab='Post-class Score', xlab='Pre-class score') # Making the plots for each treatment: lines(PRE[CLASSTYPE==1], POST[CLASSTYPE==1], type='p', col='blue', pch = 1, cex=1.2) lines(PRE[CLASSTYPE==2], POST[CLASSTYPE==2], type='p', col='red', pch = 20, cex=1.2) lines(PRE[CLASSTYPE==3], POST[CLASSTYPE==3], type='p', col='green', pch = 8, cex=1.2) legend('topleft', c('CLASSTYPE 1','CLASSTYPE 2','CLASSTYPE 3'), pch=c(1,20,8), col=c('blue','red','green'), cex=1) ############################################################################## ############################################################################## # We use the lm() function to do the ANCOVA analysis. The response is POST and the factor is # CLASSTYPE. The covariate here is PRE. # Making "CLASSTYPE" a factor: CLASSTYPE <- factor(CLASSTYPE) # The summary() function gives us least squares estimates of # mu_dot, tau_1, tau_2, tau_3, and (most importantly in this case) gamma. trig.fit <- lm(POST ~ CLASSTYPE + PRE) summary(trig.fit) # Note that R sets the FIRST tau-hat equal to zero whereas SAS sets the LAST tau-hat equal to zero. # Either way is fine since the least-squares estimates are not unique. anova(trig.fit) # The test for treatment effects can be done with a reduced vs. full approach: trig.fit.reduced <- lm(POST ~ PRE) anova(trig.fit.reduced, trig.fit) # The Overall F-test can also be done with a reduced vs. full approach: trig.fit.really.reduced <- lm(POST ~ 1) anova(trig.fit.really.reduced, trig.fit) # Results: What does the Overall F-test (F=8.46) tell you? # What do the (equivalent) tests for the effect of # pre-class score (F=20.57 or t=4.54) tell you? # What does the test for the effect of type of class (F=4.77) tell you? # How do we interpret the estimate (0.773) of beta_1 for our model? #####################################################################################* # We can include multiple covariates by simply adding covariate terms # into the lm() statement. Here IQ is another covariate: trig.fit2 <- lm(POST ~ CLASSTYPE + PRE + IQ) summary(trig.fit2) anova(trig.fit2) #####################################################################################* # We can test for unequal slopes by including an interaction term in the lm() statement. trig.fit3 <- lm(POST ~ CLASSTYPE + PRE + CLASSTYPE:PRE) summary(trig.fit3) anova(trig.fit3) # The interaction term is NOT significant here (F=0.33), so we fail to reject H_0. # We conclude the equal-slopes model is reasonable. There is NOT evidence that # the slopes are unequal.