# Data for the ANCOVA example (the Trigonometry scores) 
# that we studied in class                             

# Entering the data and defining the variables:  
  

##########
##
# Reading the data into R:

my.datafile <- tempfile()
cat(file=my.datafile, "    
 1 1 3 10 122 
 2 2 24 34 129 
 3 3 10 21 114 
 4 1 5 10 121 
 5 2 18 27 114 
 6 3 3 18 114 
 7 1 6 14 101 
 8 2 11 20 116 
 9 3 10 20 110 
 10 1 11 29 131 
 11 2 10 13 126 
 12 3 3 9 94 
 13 1 11 17 129 
 14 2 11 19 110 
 15 3 6 13 102 
 16 1 13 21 115 
 17 2 2 28 138 
 18 3 9 24 128 
 19 1 7 5 122 
 20 2 10 13 119 
 21 3 13 19 111 
 22 1 12 17 112 
 23 2 14 21 123 
 24 3 7 25 119 
 25 1 13 17 123 
 26 2 11 14 115 
 27 3 10 24 120 
 28 1 8 22 119 
 29 2 12 17 116 
 30 3 9 21 112 
 31 1 9 22 122 
 32 2 14 16 125 
 33 3 7 21 105 
 34 1 10 18 111 
 35 2 7 10 122 
 36 3 4 17 120 
 37 1 6 11 117 
 38 2 8 18 120 
 39 3 7 24 120 
 40 1 13 20 112 
 41 2 10 13 111 
 42 3 12 25 118 
 43 1 7 8 122 
 44 2 11 17 127 
 45 3 6 23 110 
 46 1 11 20 124 
 47 2 12 13 122 
 48 3 7 22 127 
 49 1 5 15 118 
 50 2 6 13 127 
 51 1 9 25 113 
 52 2 3 13 115 
 53 1 8 25 126 
 54 2 4 13 112 
 55 1 2 14 132 
 56 1 11 17 93
", sep=" ")

options(scipen=999) # suppressing scientific notation

trig <- read.table(my.datafile, header=FALSE, col.names=c("OBS", "CLASSTYPE", "PRE", "POST", "IQ")) 
  
# Note we could also save the data columns into a file and use a command such as:
# trig <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("OBS", "CLASSTYPE", "PRE", "POST", "IQ"))

attach(trig)

# The data frame called trig is now created, 
# with five variables, "OBS", "CLASSTYPE", "PRE", "POST", "IQ".
##
#########
####################################################################


# A scatter plot of the data, plotted separately by class type:

# Setting up the axes and labels:
# (For these data, the X-values are all within 0 and 25, and the Y-values are all within 0 and 35.)

plot(c(0,25), c(0,35), type='n', ylab='Post-class Score', xlab='Pre-class score')

# Making the plots for each treatment:

lines(PRE[CLASSTYPE==1], POST[CLASSTYPE==1], type='p', col='blue', pch = 1, cex=1.2)
lines(PRE[CLASSTYPE==2], POST[CLASSTYPE==2], type='p', col='red', pch = 20, cex=1.2)
lines(PRE[CLASSTYPE==3], POST[CLASSTYPE==3], type='p', col='green', pch = 8, cex=1.2)

legend('topleft', c('CLASSTYPE 1','CLASSTYPE 2','CLASSTYPE 3'), pch=c(1,20,8), col=c('blue','red','green'), cex=1)

##############################################################################
##############################################################################

# We use the lm() function to do the ANCOVA analysis.  The response is POST and the factor is  
# CLASSTYPE.  The covariate here is PRE. 

# Making "CLASSTYPE" a factor:

CLASSTYPE <- factor(CLASSTYPE)  

# The summary() function gives us least squares estimates of   
# mu_dot, tau_1, tau_2, tau_3, and (most importantly in this case) gamma.                                                                                   

trig.fit <- lm(POST ~ CLASSTYPE + PRE)
summary(trig.fit)

# Note that R sets the FIRST tau-hat equal to zero whereas SAS sets the LAST tau-hat equal to zero.
# Either way is fine since the least-squares estimates are not unique.

anova(trig.fit)

# The test for treatment effects can be done with a reduced vs. full approach:

trig.fit.reduced <- lm(POST ~ PRE)
anova(trig.fit.reduced, trig.fit)

# The Overall F-test can also be done with a reduced vs. full approach:

trig.fit.really.reduced <- lm(POST ~ 1)
anova(trig.fit.really.reduced, trig.fit)

# Results:  What does the Overall F-test (F=8.46) tell you?              
# What do the (equivalent) tests for the effect of                       
# pre-class score (F=20.57 or t=4.54) tell you?                          
# What does the test for the effect of type of class (F=4.77) tell you?  
# How do we interpret the estimate (0.773) of beta_1 for our model?      

#####################################################################################* 

# We can include multiple covariates by simply adding covariate terms  
# into the lm() statement.  Here IQ is another covariate:             

trig.fit2 <- lm(POST ~ CLASSTYPE + PRE + IQ)
summary(trig.fit2)

anova(trig.fit2)

#####################################################################################* 

# We can test for unequal slopes by including an interaction term in the lm() statement. 


trig.fit3 <- lm(POST ~ CLASSTYPE + PRE + CLASSTYPE:PRE)
summary(trig.fit3)

anova(trig.fit3)

# The interaction term is NOT significant here (F=0.33), so we fail to reject H_0.  
# We conclude the equal-slopes model is reasonable.  There is NOT evidence that     
# the slopes are unequal.