/* SAS example to analyze Nested Design */ /* these are the training-school data from class and Chapter 26 */ DATA trschool; INPUT SCORE SCHOOL INSTRUCTOR OBS; cards; 25 1 1 1 29 1 1 2 14 1 2 1 11 1 2 2 11 2 1 1 6 2 1 2 22 2 2 1 18 2 2 2 17 3 1 1 20 3 1 2 5 3 2 1 2 3 2 2 ; run; /* We run PROC GLM with SCHOOL and INSTRUCTOR as the two factors listed */ /* in the CLASS statement. In the MODEL statement, we indicate that the */ /* levels of INSTRUCTOR are nested within the levels of SCHOOL with the */ /* INSTRUCTOR(SCHOOL) syntax for the second factor. */ PROC GLM data = trschool; CLASS SCHOOL INSTRUCTOR; MODEL SCORE = SCHOOL INSTRUCTOR(SCHOOL); OUTPUT OUT = pred p=ybar r=resid; run; /* We see that the 3 schools differ in mean score (F* = 11.18, P-value = .0095) */ /* and that instructors within at least one school have different mean scores */ /* (F* = 27.02, P-value = .0007). */ /* ************************************************************************* */ /* To determine in WHICH schools the instructor effects are significant, we */ /* further decompose the SSB(A) into SSB(A_1), SSB(A_2), and SSB(A_3). */ PROC GLM data = trschool; BY SCHOOL; CLASS INSTRUCTOR; MODEL SCORE = INSTRUCTOR; run; /* Note SAS gives (over 3 pages) the following results: */ /* Atlanta school: MSB(A_1)=210.25 => F* = 210.25/7 = 30.0 (we divide this by hand) */ /* Chicago school: MSB(A_2)=132.25 => F* = 132.25/7 = 18.9 (we divide this by hand) */ /* San Fran school: MSB(A_3)=225.0 => F* = 225/7 = 32.14 (we divide this by hand) */ /* Note that each of these is divided by the original MSE of 7. */ /* Since F(0.95,1,6) = 5.99, then: At the 0.05 level, instructors within the */ /* Atlanta school have different mean scores; at the 0.05 level, instructors */ /* within the Chicago school have different mean scores; and at the 0.05 */ /* level, instructors within the San Francisco school have different mean */ /* scores. The FAMILY significance level for this SET of tests is at most 0.15. */ /* ************************************************************************** */ /* Some Residual Plots to Check the Standard Model Assumptions: */ /* Residual Plots and Q-Q plots: */ goptions reset=all; symbol1 v=circle l=32 c = black; PROC GPLOT data=pred; PLOT resid*ybar/vref=0; /* Residuals Plotted vs. Fitted Values */ PLOT resid*SCHOOL/vref=0; /* Residuals Plotted for each SCHOOL Level */ run; PROC UNIVARIATE noprint data=pred; QQPLOT resid / normal; /* Normal Q-Q Plot of Residuals */ run; /* ****************************************************** */ /* Further Investigation of Treatment Means: */ /* Tukey Simultaneous 90% CIs for differences in pairs of school mean scores can by found by: */ PROC GLM data = trschool; CLASS SCHOOL INSTRUCTOR; MODEL SCORE = SCHOOL INSTRUCTOR(SCHOOL); MEANS SCHOOL / TUKEY CLDIFF ALPHA=0.10; run; /* Getting the Bonferroni CIs at the bottom of p. 1102 is best done by hand, since the relevant */ /* mean differences are only a few of the pairwise comparisons among all (3)(2) = 6 treatments. */ /**************************************************************/ /* Example of Partially Nested Design from Sec. 26.9 */ data decision; input score a b c rep ; label a ='Nationality' b ='Size' c ='Observer'; cards; 16 1 1 1 1 20 1 1 1 2 14 1 1 2 1 19 1 1 2 2 7 2 1 1 1 5 2 1 1 2 4 2 1 2 1 9 2 1 2 2 21 1 2 1 1 25 1 2 1 2 28 1 2 2 1 19 1 2 2 2 11 2 2 1 1 17 2 2 1 2 12 2 2 2 1 15 2 2 2 2 ; run; data plot; set decision; if a=1 and c=1 then a1c1=score; if a=1 and c=2 then a1c2=score; if a=2 and c=1 then a2c1=score; if a=2 and c=2 then a2c2=score; run; goptions reset=all; symbol1 c=red v=square; symbol2 c=green v=circle; symbol3 c=blue v=:; symbol4 c=cyan v=dot; axis1 label=(a=90 'Size of Team') value=('4' '8') offset=(10,5) order=(1 2); axis2 label=('Number of Group Interactions'); legend1 label=none value=(height=.8 font=swiss 'US, observer 1' 'US, observer 2' 'Foreign, observer 1' 'Foreign, observer 2' ) across=2 position=(bottom right inside) mode=share cborder=black; proc gplot data=plot; plot b*(a1c1 a1c2 a2c1 a2c2) / overlay vaxis=axis1 haxis=axis2 legend=legend1; run; quit; /* From Table on Page 1114, we see: */ /* Factor C is nested within factor A */ /* Factors A and B are crossed. */ /* Factors B and C are crossed. */ /* This implies: The A*B interaction can be included in the model. */ /* The B*C interaction is included and is nested within factor A. */ /* The A*C and A*B*C interactions are NOT in the model since factor C is nested within factor A. */ proc glm data=decision; class A B C; MODEL score = A | C(A) | B; /* In this case, since some interactions are not allowed, this MODEL statement is equivalent to: */ * MODEL score = A C(A) B A*B B*C(A); RANDOM C(A) B*C(A); TEST h=A e=C(A); TEST h=B e=B*C(A); TEST h=A*B e=B*C(A); * For testing about factor A effects, the appropriate denominator MS is MSC(A) -- see Table 26.11, p. 1116 for details; LSMEANS A / e=C(A) cl pdiff; run; quit;