/* SAS example for ANCOVA */
/* We use the Cracker Data from class and Chapter 22 */

data cracker;
   input sales x treat store;
cards;
  38  21  1  1
  39  26  1  2
  36  22  1  3
  45  28  1  4
  33  19  1  5
  43  34  2  1
  38  26  2  2
  38  29  2  3
  27  18  2  4
  34  25  2  5
  24  23  3  1
  32  29  3  2
  31  30  3  3
  21  16  3  4
  28  29  3  5
;
run;

/* ************************************************************************** */

/* Symbolic scatter plot: */

data cplot;
  set cracker;
  if treat=1 then treat1 = sales;
  else if treat=2 then treat2=sales;
  else treat3=sales;
run;
goptions reset=all;
symbol1 c=blue v=circle h=.8;
symbol2 c=red v=dot h=.8;
symbol3 c=green v=star h=.8;
axis1 order=(10 to 50 by 10) label=(a=90 'Sales in Promotion Period');
axis2 order=(15 to 35 by 5) label=('Sales in Preceding Period');
legend1 label=none value=(height=1 font=swiss 'Treatment 1' 'Treatment 2' 'Treatment 3' ) 
        position=(bottom right inside) mode=share cborder=black;
proc gplot data=cplot;
  plot (treat1 treat2 treat3)*x/overlay legend=legend1 vaxis=axis1 haxis=axis2;
run;

/* This should look like Figure 22.5, p. 927. */
/* We don't see evidence of the treatments affecting the covariate at all. */

/* ************************************************************************** */

/* We use PROC GLM to do the ANCOVA analysis.  The response is SALES and the factor is */
/* TREAT (we tell SAS this using a CLASS statement).  The covariate here is X.         */

/* The SOLUTION option to the MODEL statement gives us least squares estimates of  */
/* mu_dot, tau_1, tau_2, tau_3, and (most importantly in this case) gamma.         */

/* We use an LSMEANS statement here.  The STDERR option gives standard errors     */
/* of the estimates.  The PDIFF option here gives P-values and CIs for Bonferroni */
/* comparisons between pairs of levels of TREAT (since we included ADJUST = BON). */

PROC GLM data=cracker;
CLASS TREAT;
MODEL SALES = TREAT X / SOLUTION;
LSMEANS TREAT / STDERR PDIFF CL ADJUST = BON;
OUTPUT OUT = pred p=ybar r=resid;
RUN;

/* What does the test for the effect of TREAT (F = 59.48, P-value < .0001) tell you? */
/* Note that the SAS Type III SS does the proper "reduced vs. full" test here.       */

/* Of particular interest are the estimated values of: tau_1 - tau_2, tau_1 - tau_3, */
/* and tau_2 - tau_3.                                                                */

/* We get Bonferroni simultaneous CIs (here 95%) with the CL and ADJUST = BON options. */

/* The test for the significance of the covariate X can be done with a t-test (t*=8.76) */
/* or equivalently, an F-test (F* = 76.72).                                             */

/* Note the "least squares means" here are the estimated mean sales for each treatment  */
/* WHEN THE COVARIATE IS AT ITS SAMPLE MEAN VALUE!                                      */


/* ************************************************************************** */

/* Some Residual Plots to Check the Standard Model Assumptions: */

/* Residual Plots and Q-Q plots: */

goptions reset=all;

symbol1 v=circle l=32  c = black;
PROC GPLOT data=pred;
 PLOT resid*ybar/vref=0;    /* Residuals Plotted vs. Fitted Values */
 PLOT resid*TREAT/vref=0;   /* Residuals Plotted for each Treatment Level */
run;
PROC UNIVARIATE noprint data=pred;
  QQPLOT resid / normal;    /* Normal Q-Q Plot of Residuals */
run;

/* ************************************************************************** */

/* Testing the Equal-Slopes Assumption */

/* To test whether the regression lines at each level of the factor are truly    */
/* parallel (have equal slopes), we simply include the TREAT*X interaction term: */

PROC GLM data=cracker;
CLASS TREAT;
MODEL SALES = TREAT X TREAT*X/ SOLUTION;
LSMEANS TREAT / STDERR PDIFF CL ADJUST = BON;
RUN;

/* The interaction term is NOT significant here (F* = 1.01, p-value = 0.4032), */
/* so we fail to reject H_0. We conclude the equal-slopes model is reasonable. */
/* There is NOT evidence that the slopes are unequal.                          */