/* Data for the ANCOVA example (the Trigonometry scores) */ /* that we studied in class */ /* Entering the data and naming the variables: */ DATA trigclass; INPUT OBS CLASSTYPE PRE POST IQ ; CARDS; 1 1 3 10 122 2 2 24 34 129 3 3 10 21 114 4 1 5 10 121 5 2 18 27 114 6 3 3 18 114 7 1 6 14 101 8 2 11 20 116 9 3 10 20 110 10 1 11 29 131 11 2 10 13 126 12 3 3 9 94 13 1 11 17 129 14 2 11 19 110 15 3 6 13 102 16 1 13 21 115 17 2 2 28 138 18 3 9 24 128 19 1 7 5 122 20 2 10 13 119 21 3 13 19 111 22 1 12 17 112 23 2 14 21 123 24 3 7 25 119 25 1 13 17 123 26 2 11 14 115 27 3 10 24 120 28 1 8 22 119 29 2 12 17 116 30 3 9 21 112 31 1 9 22 122 32 2 14 16 125 33 3 7 21 105 34 1 10 18 111 35 2 7 10 122 36 3 4 17 120 37 1 6 11 117 38 2 8 18 120 39 3 7 24 120 40 1 13 20 112 41 2 10 13 111 42 3 12 25 118 43 1 7 8 122 44 2 11 17 127 45 3 6 23 110 46 1 11 20 124 47 2 12 13 122 48 3 7 22 127 49 1 5 15 118 50 2 6 13 127 51 1 9 25 113 52 2 3 13 115 53 1 8 25 126 54 2 4 13 112 55 1 2 14 132 56 1 11 17 93 ; run; /* Symbolic scatter plot: */ data cplot; set trigclass; if classtype=1 then type1 = post; else if classtype=2 then type2=post; else type3=post; run; goptions reset=all; symbol1 c=blue v=circle h=.8; symbol2 c=red v=dot h=.8; symbol3 c=green v=star h=.8; axis1 order=(0 to 35 by 35) label=(a=90 'Postclass Trig Score'); axis2 order=(0 to 25 by 5) label=('Preclass Trig Score'); legend1 label=none value=(height=1 font=swiss 'Treatment 1' 'Treatment 2' 'Treatment 3' ) position=(bottom right inside) mode=share cborder=black; proc gplot data=cplot; plot (type1 type2 type3)*pre/overlay legend=legend1 vaxis=axis1 haxis=axis2; run; /* We use PROC GLM to do the ANCOVA analysis. The response is POST and the factor is */ /* CLASSTYPE (we tell SAS this using a CLASS statement). The covariate here is PRE. */ /* The SOLUTION option to the MODEL statement gives us least squares estimates of */ /* beta_0, tau_1, tau_2, tau_3, and (most importantly in this case) beta_1. */ /* We use an LSMEANS statement here since the data are unbalanced (different number of */ /* students in each CLASSTYPE). The STDERR option gives standard errors of the estimates. */ /* The PDIFF option gives P-values for LSD-type comparisons between pairs of levels of */ /* CLASSTYPE. */ PROC GLM data=trigclass; CLASS CLASSTYPE; MODEL POST = CLASSTYPE PRE / SOLUTION; LSMEANS CLASSTYPE / STDERR PDIFF; RUN; /* Results: What does the Overall F-test (F=8.46) tell you? */ /* What do the (equivalent) tests for the effect of */ /* pre-class score (F=20.57 or t=4.54) tell you? */ /* What does the test for the effect of type of class (F=4.77) tell you? */ /* How do we interpret the estimate (0.773) of beta_1 for our model? */ /***************************************************************************************/ /* We can include multiple covariates by simply adding covariate terms */ /* into the MODEL statement. Here IQ is another covariate: */ PROC GLM data=trigclass; CLASS CLASSTYPE; MODEL POST = CLASSTYPE PRE IQ/ SOLUTION; LSMEANS CLASSTYPE / STDERR PDIFF; RUN; /***************************************************************************************/ /************************************************************************************/ /* A look at the data with least-squares lines by treatment group using PROC SGPLOT */ proc sgplot data=trigclass; reg y=post x=pre / group=classtype; run; /* Is there enough evidence to reject the equal-slopes assumption? */ /************************************************************************************/ /* We can test for unequal slopes by including an interaction term in the MODEL statement.*/ PROC GLM data=trigclass; CLASS CLASSTYPE; MODEL POST = CLASSTYPE PRE CLASSTYPE*PRE / SOLUTION; LSMEANS CLASSTYPE / STDERR PDIFF; RUN; /* The interaction term is NOT significant here (F=0.33), so we fail to reject H_0. */ /* We conclude the equal-slopes model is reasonable. There is NOT evidence that */ /* the slopes are unequal. */