/* Data for the ANCOVA example (the Trigonometry scores) */
/* that we studied in class                              */

/* Entering the data and naming the variables:           */

DATA trigclass; 
INPUT OBS CLASSTYPE PRE POST IQ ; 
CARDS; 
 1 1 3 10 122 
 2 2 24 34 129 
 3 3 10 21 114 
 4 1 5 10 121 
 5 2 18 27 114 
 6 3 3 18 114 
 7 1 6 14 101 
 8 2 11 20 116 
 9 3 10 20 110 
 10 1 11 29 131 
 11 2 10 13 126 
 12 3 3 9 94 
 13 1 11 17 129 
 14 2 11 19 110 
 15 3 6 13 102 
 16 1 13 21 115 
 17 2 2 28 138 
 18 3 9 24 128 
 19 1 7 5 122 
 20 2 10 13 119 
 21 3 13 19 111 
 22 1 12 17 112 
 23 2 14 21 123 
 24 3 7 25 119 
 25 1 13 17 123 
 26 2 11 14 115 
 27 3 10 24 120 
 28 1 8 22 119 
 29 2 12 17 116 
 30 3 9 21 112 
 31 1 9 22 122 
 32 2 14 16 125 
 33 3 7 21 105 
 34 1 10 18 111 
 35 2 7 10 122 
 36 3 4 17 120 
 37 1 6 11 117 
 38 2 8 18 120 
 39 3 7 24 120 
 40 1 13 20 112 
 41 2 10 13 111 
 42 3 12 25 118 
 43 1 7 8 122 
 44 2 11 17 127 
 45 3 6 23 110 
 46 1 11 20 124 
 47 2 12 13 122 
 48 3 7 22 127 
 49 1 5 15 118 
 50 2 6 13 127 
 51 1 9 25 113 
 52 2 3 13 115 
 53 1 8 25 126 
 54 2 4 13 112 
 55 1 2 14 132 
 56 1 11 17 93
;
run;

/* Symbolic scatter plot: */

data cplot;
  set trigclass;
  if classtype=1 then type1 = post;
  else if classtype=2 then type2=post;
  else type3=post;
run;
goptions reset=all;
symbol1 c=blue v=circle h=.8;
symbol2 c=red v=dot h=.8;
symbol3 c=green v=star h=.8;
axis1 order=(0 to 35 by 35) label=(a=90 'Postclass Trig Score');
axis2 order=(0 to 25 by 5) label=('Preclass Trig Score');
legend1 label=none value=(height=1 font=swiss 'Treatment 1' 'Treatment 2' 'Treatment 3' ) 
        position=(bottom right inside) mode=share cborder=black;
proc gplot data=cplot;
  plot (type1 type2 type3)*pre/overlay legend=legend1 vaxis=axis1 haxis=axis2;
run;


/* We use PROC GLM to do the ANCOVA analysis.  The response is POST and the factor is */
/* CLASSTYPE (we tell SAS this using a CLASS statement).  The covariate here is PRE.  */

/* The SOLUTION option to the MODEL statement gives us least squares estimates of     */
/* beta_0, tau_1, tau_2, tau_3, and (most importantly in this case) beta_1.           */

/* We use an LSMEANS statement here since the data are unbalanced (different number of     */
/* students in each CLASSTYPE).  The STDERR option gives standard errors of the estimates. */
/* The PDIFF option gives P-values for LSD-type comparisons between pairs of levels of     */
/* CLASSTYPE.                                                                              */

PROC GLM data=trigclass;
CLASS CLASSTYPE;
MODEL POST = CLASSTYPE PRE / SOLUTION;
LSMEANS CLASSTYPE / STDERR PDIFF;
RUN;

/* Results:  What does the Overall F-test (F=8.46) tell you?             */
/* What do the (equivalent) tests for the effect of                      */
/* pre-class score (F=20.57 or t=4.54) tell you?                         */
/* What does the test for the effect of type of class (F=4.77) tell you? */
/* How do we interpret the estimate (0.773) of beta_1 for our model?     */


/***************************************************************************************/

/* We can include multiple covariates by simply adding covariate terms */
/* into the MODEL statement.  Here IQ is another covariate:            */

PROC GLM data=trigclass;
CLASS CLASSTYPE;
MODEL POST = CLASSTYPE PRE IQ/ SOLUTION;
LSMEANS CLASSTYPE / STDERR PDIFF;
RUN;

/***************************************************************************************/


/************************************************************************************/
/* A look at the data with least-squares lines by treatment group using PROC SGPLOT */

proc sgplot data=trigclass;
reg y=post x=pre / group=classtype;
run;

/* Is there enough evidence to reject the equal-slopes assumption?                  */
/************************************************************************************/


/* We can test for unequal slopes by including an interaction term in the MODEL statement.*/


PROC GLM data=trigclass;
CLASS CLASSTYPE;
MODEL POST = CLASSTYPE PRE CLASSTYPE*PRE / SOLUTION;
LSMEANS CLASSTYPE / STDERR PDIFF;
RUN;

/* The interaction term is NOT significant here (F=0.33), so we fail to reject H_0. */
/* We conclude the equal-slopes model is reasonable.  There is NOT evidence that    */
/* the slopes are unequal.                                                          */