/* SAS example to analyze Nested Design */

/* these are the training-school data from class and Chapter 26 */

DATA trschool;
INPUT SCORE SCHOOL INSTRUCTOR OBS;
cards;
  25  1  1  1
  29  1  1  2
  14  1  2  1
  11  1  2  2
  11  2  1  1
   6  2  1  2
  22  2  2  1
  18  2  2  2
  17  3  1  1
  20  3  1  2
   5  3  2  1
   2  3  2  2
;
run;

/* We run PROC GLM with SCHOOL and INSTRUCTOR as the two factors listed */
/* in the CLASS statement. In the MODEL statement, we indicate that the */
/* levels of INSTRUCTOR are nested within the levels of SCHOOL with the */
/* INSTRUCTOR(SCHOOL) syntax for the second factor.                     */

PROC GLM data = trschool;
  CLASS SCHOOL INSTRUCTOR;
  MODEL SCORE = SCHOOL INSTRUCTOR(SCHOOL);
  OUTPUT OUT = pred  p=ybar r=resid;
run;

/* We see that the 3 schools differ in mean score (F* = 11.18, P-value = .0095) */
/* and that instructors within at least one school have different mean scores   */
/* (F* = 27.02, P-value = .0007).                                               */

/* ************************************************************************* */

/* To determine in WHICH schools the instructor effects are significant, we    */
/* further decompose the SSB(A) into SSB(A_1), SSB(A_2), and SSB(A_3).         */

PROC GLM data = trschool;
  BY SCHOOL;
  CLASS INSTRUCTOR;
  MODEL SCORE = INSTRUCTOR;
run;

/* Note SAS gives (over 3 pages) the following results:                              */
/* Atlanta school: MSB(A_1)=210.25 => F* = 210.25/7 = 30.0  (we divide this by hand) */
/* Chicago school: MSB(A_2)=132.25 => F* = 132.25/7 = 18.9  (we divide this by hand) */
/* San Fran school: MSB(A_3)=225.0 => F* = 225/7 = 32.14    (we divide this by hand) */
/* Note that each of these is divided by the original MSE of 7.                      */

/* Since F(0.95,1,6) = 5.99, then: At the 0.05 level, instructors within the     */
/* Atlanta school have different mean scores; at the 0.05 level, instructors     */
/* within the Chicago school have different mean scores; and at the 0.05         */
/* level, instructors within the San Francisco school have different mean        */
/* scores.  The FAMILY significance level for this SET of tests is at most 0.15. */

/* ************************************************************************** */

/* Some Residual Plots to Check the Standard Model Assumptions: */

/* Residual Plots and Q-Q plots: */

goptions reset=all;

symbol1 v=circle l=32  c = black;
PROC GPLOT data=pred;
 PLOT resid*ybar/vref=0;    /* Residuals Plotted vs. Fitted Values */
 PLOT resid*SCHOOL/vref=0;   /* Residuals Plotted for each SCHOOL Level */
run;
PROC UNIVARIATE noprint data=pred;
  QQPLOT resid / normal;    /* Normal Q-Q Plot of Residuals */
run;

/* ****************************************************** */

/* Further Investigation of Treatment Means: */


/* Tukey Simultaneous 90% CIs for differences in pairs of school mean scores can by found by: */

PROC GLM data = trschool;
  CLASS SCHOOL INSTRUCTOR;
  MODEL SCORE = SCHOOL INSTRUCTOR(SCHOOL);
  MEANS SCHOOL / TUKEY CLDIFF ALPHA=0.10;
run;

/* Getting the Bonferroni CIs at the bottom of p. 1102 is best done by hand, since the relevant */
/* mean differences are only a few of the pairwise comparisons among all (3)(2) = 6 treatments. */


/**************************************************************/

/* Example of Partially Nested Design  from Sec. 26.9 */

data decision;
  input score a b c rep ;
  label a ='Nationality'
        b ='Size'
	c ='Observer';
cards;
  16  1  1  1  1
  20  1  1  1  2
  14  1  1  2  1
  19  1  1  2  2
   7  2  1  1  1
   5  2  1  1  2
   4  2  1  2  1
   9  2  1  2  2
  21  1  2  1  1
  25  1  2  1  2
  28  1  2  2  1
  19  1  2  2  2
  11  2  2  1  1
  17  2  2  1  2
  12  2  2  2  1
  15  2  2  2  2
;
run;

data plot;
  set decision;
  if a=1 and c=1 then a1c1=score;
  if a=1 and c=2 then a1c2=score;
  if a=2 and c=1 then a2c1=score;
  if a=2 and c=2 then a2c2=score;
run;
goptions reset=all;
 
symbol1 c=red v=square;
symbol2 c=green v=circle;
symbol3 c=blue v=:;
symbol4 c=cyan v=dot;
axis1 label=(a=90 'Size of Team') value=('4' '8') offset=(10,5) order=(1 2);
axis2 label=('Number of Group Interactions');
legend1 label=none value=(height=.8 font=swiss 'US, observer 1' 'US, observer 2' 
        'Foreign, observer 1' 'Foreign, observer 2' ) across=2 
        position=(bottom right inside) mode=share cborder=black;
proc gplot data=plot;
  plot b*(a1c1 a1c2 a2c1 a2c2) / overlay vaxis=axis1 haxis=axis2 legend=legend1; 
run;
quit;

/* From Table on Page 1114, we see:   */
/* Factor C is nested within factor A */
/* Factors A and B are crossed.       */
/* Factors B and C are crossed.       */

/* This implies:  The A*B interaction can be included in the model. */
/* The B*C interaction is included and is nested within factor A.   */
/* The A*C and A*B*C interactions are NOT in the model since factor C is nested within factor A. */

proc glm data=decision;
  class A B C;
  MODEL score = A | C(A) | B;
 /* In this case, since some interactions are not allowed, this MODEL statement is equivalent to: */
 * MODEL score = A C(A) B A*B B*C(A);
 RANDOM C(A) B*C(A);
  TEST h=A e=C(A);
  TEST h=B e=B*C(A);
  TEST h=A*B e=B*C(A);
 * For testing about factor A effects, the appropriate denominator MS is MSC(A) -- see Table 26.11, p. 1116 for details;
  LSMEANS A / e=C(A) cl pdiff;
run;
quit;