/* SAS Analysis of a Two-Factor Study                */

/* We use the Castle Bakery data found in Chapter 19 */
/* and used for the example from class.              */

data bakery;
  input sales height width store;
cards;
  47  1  1  1
  43  1  1  2
  46  1  2  1
  40  1  2  2
  62  2  1  1
  68  2  1  2
  67  2  2  1
  71  2  2  2
  41  3  1  1
  39  3  1  2
  42  3  2  1
  46  3  2  2
;
run;


PROC GLM data=bakery;
  CLASS height width;
  MODEL sales = height width height*width;
  LSMEANS height width height*width;
  OUTPUT out=pred p=ybar r=resid;
run;

/* The CLASS statement tells SAS that height and width are both factors.          */
/* The LSMEANS statement here produces the sample mean sales for each level       */
/* of height, and for each level of width, and for each height-width combination. */

/* The ANOVA table is printed out in the PROC GLM output.  The first thing we do */
/* is to test for significant interaction.  The SAS output shows the P-value for */
/* this test is 0.3747, so at a 0.05 significance level, we have no significant  */
/* interaction between height and width.                                         */

/* Since there are no significant interaction effects, we may test for the        */
/* effects of the height factor and of the weight factor directly.  At alpha=.05, */
/* there is a significant effect on sales due to display height (P-value < .0001) */
/* implying that the mean sales are significantly different at the various levels */
/* of height.  However, there is no significant effect on sales due to width      */
/* (P-value = 0.3226).                                                            */

/* Note that SAS gives both Type I SS and Type III SS.  Since these data are     */
/* balanced (same number of observations in each "cell"), these outputs are the  */
/* same.  We will later look at an example of unbalanced data.                   */

/***********************************************************************************/

/* Interaction plots for the Castle Bakery data:                                   */
/* These use the "pred" data set created in the OUTPUT statement of PROC GLM above */ 

/* We can plot sales against height for each value of width:                     */

symbol1 i = join v=circle l=32  c = black;
symbol2 i = join v=star l=32  c = black;
PROC GPLOT data=pred;
PLOT ybar*height = width;
run;

/* or we can plot sales against width for each value of height: */ 

symbol1 i = join v=circle l=32  c = black;
symbol2 i = join v=star l=32  c = black;
symbol3 i = join v=plus l=32  c = black;
PROC GPLOT data=pred;
PLOT ybar*width = height;
run;


/* These plots show graphically (not formally) that there is some interaction       */
/* between height and width, but it is very mild.  In fact, the formal              */
/* hypothesis test for interaction reveals that the interaction is not significant. */

/***********************************************************************************/

/* Plots to Check Model Assumptions:  */

goptions reset=all;

/* The above line resets the graphical plotting options. */

symbol1 v=circle l=32  c = black;
PROC GPLOT data=pred;
 PLOT resid*ybar/vref=0;
run;
PROC UNIVARIATE noprint ;
  QQPLOT resid / normal;
run;

/* This produces a residual plot (against fitted values) and a normal Q-Q plot */
/* of the residuals.  We see no evidence of nonconstant error variance in the  */
/* residual plot, but there may be some non-normality of errors, based on the  */
/* Q-Q plot.                                                                   */

/* These figures may be compared to those in Figure 19.10 (pg. 843) of the book. */


/***********************************************************************************/

/* Further Investigation of Significant Factor Effects:  */

/* *********************************************************************************** */

/* Investigating particular differences among factor level means */


/* The CL option to the LSMEANS statement produces (here, 95%) confidence */
/* intervals for each population factor level mean.                       */

PROC GLM DATA = bakery;
CLASS height width;
MODEL sales = height width height*width;
LSMEANS height width / CL ALPHA = 0.05;
run;

/* ***************************************************************************** */

/* Contrasts:  CIs and Hypothesis Tests */

/* Example:  We want a 95% CI for the difference in the mean sales of the */
/* middle height and the mean sales of the other heights.		  */

/* The relevant contrast here is:  -(1/2)mu_1-dot + mu_2-dot - (1/2)mu_3-dot     */

/* The ESTIMATE statement defines the coefficients of the contrast (these must   */
/* be in the proper order!) and gives the test statistic and P-value of the test */
/* for whether the contrast equals zero.                                         */
/* The CLPARM option to the MODEL statement tells SAS to give a CI (by default,  */
/* a 95% CI) for the contrast.                                                   */

PROC GLM DATA = bakery;
CLASS height width;
MODEL sales = height width height*width / CLPARM;
LSMEANS height width;
ESTIMATE 'MiddleVsOthers' height -0.5 1 -0.5;
RUN;

/* ***************************************************************************** */

/* Multiple Comparison Procedures */

/* In the MEANS statement, the CLDIFF option gives CIs for all pairwise height level */
/* mean differences, based on the Tukey procedure.  The ALPHA=0.05 ensures that      */
/* the family confidence level is 95%.  SAS also provides an indication of which     */
/* pairs of treatment means are judged to be significantly different, at the         */
/* 0.05 family significance level, by the Tukey procedure.                           */

PROC GLM DATA = bakery;
CLASS height width;
MODEL sales = height width height*width;
LSMEANS height width height*width;
MEANS height / TUKEY ALPHA=0.05;    /* Produces simpler output for Tukey test */
MEANS height / TUKEY ALPHA=0.05 CLDIFF; /* Produces Tukey CIs and testing results */
run;

/* We could change TUKEY to SCHEFFE or BON to get the Scheffe or Bonferroni results, */
/* but if we're interested in all pairwise comparisons, these will not be as         */
/* efficient as the Tukey procedure.                                                 */

/* ***************************************************************************** */