/* SAS code to analyze data from Latin Square Design */

/* We use the bank teller productivity data from class and chapter 28 */

DATA bank;
INPUT productivity week day music;
CARDS;
  18  1  1  4
  13  2  1  3
   7  3  1  1
  17  4  1  5
  21  5  1  2
  17  1  2  3
  34  2  2  2
  29  3  2  4
  13  4  2  1
  26  5  2  5
  14  1  3  1
  21  2  3  5
  32  3  3  2
  24  4  3  3
  26  5  3  4
  21  1  4  2
  16  2  4  1
  27  3  4  5
  31  4  4  4
  31  5  4  3
  17  1  5  5
  15  2  5  4
  13  3  5  3
  25  4  5  2
   7  5  5  1
;
run;

PROC GLM DATA = bank;
CLASS week day music;
MODEL productivity = week day music;
MEANS music / TUKEY CLDIFF ALPHA=0.10;
OUTPUT OUT=pred p=YBAR r=resid;
run;

/* Main effects plot to visually examine differences */
/* in mean response across treatments:               */
PROC SORT data = bank;
BY music;
PROC MEANS DATA = bank;
BY music;
VAR productivity;
OUTPUT OUT = smpmeans MEAN(productivity) = TrtMeans;
goptions reset=all;
symbol1 i = join v=circle l=32  c = black;
PROC GPLOT data=smpmeans;
PLOT TrtMeans*music/vref=20.6;
RUN;

/* Note we are calculating and plotting the mean productivity */
/* at each level of "music" here.  The 20.6 value in "vref="  */
/* is the OVERALL sample mean productivity.  The number is    */
/* found on the main PROC GLM output with the ANOVA table.    */

/* ************************************************************** */

/* We see from the PROC GLM output that there are significant       */
/* differences among the music types in terms of mean productivity. */
/* (F* = 10.58, P-value = 0.0007)  The Tukey output shows which     */
/* pairs of music types are significantly different from each other */
/* in terms of mean productivity.                                   */



/* **************** Checking Model Assumptions: ***************** */

/* Residual Plots and Q-Q plots: */

goptions reset=all;
symbol1 v=circle l=32  c = black;
PROC GPLOT data=pred;
 PLOT resid*ybar/vref=0;
run;
PROC UNIVARIATE noprint data=pred;
  QQPLOT resid / normal;
run;


/*** Example on Table 28.8 data:  Latin Square with Replication ***/

DATA retrain;
INPUT score IQ age method;
datalines;
  19  1  1  2
  16  1  1  2
  24  2  1  3
  22  2  1  3
  10  3  1  1
  14  3  1  1
  20  1  2  1
  24  1  2  1
  14  2  2  2
  15  2  2  2
  12  3  2  3
  13  3  2  3
  25  1  3  3
  21  1  3  3
  14  2  3  1
  14  2  3  1
   7  3  3  2
   4  3  3  2
;
run;

/* Testing lack-of-fit of the additive model: */

/* The MSE of this model will be the Mean Square for Pure Error (MSPE): */

PROC GLM;
CLASS IQ age method;
MODEL score = IQ|age|method;
run;

/* MSPE = 36.0 / 9 = 4.0 here. */

/* Using the additive model: */

PROC GLM;
CLASS IQ age method;
MODEL score = IQ age method;
run;

/* The SSE from the additive model is SSLF + SSPE */

/* So SSLF = 52.33 - 36 = 16.33 */
/* df(LF) = 11 - 9 = 2 */
/* So MSLF = 16.33/2 = 8.17 */


/* So the "lack-of-fit F-statistic is F* = 8.17/4.0 = 2.04,                       */
/* which is not significant at the 0.05 level since 2.04 < F(0.95, 2, 9) = 4.26.  */
/* The additive model is reasonable to use.                                       */