/* This example shows the analyses for the one-way ANOVA */ /* using the rice data example we looked at in class */ /* Entering the data and defining the variables: */ DATA rice; INPUT VARIETY YIELD; CARDS; 1 934 1 1041 1 1028 1 935 2 880 2 963 2 924 2 946 3 987 3 951 3 976 3 840 4 992 4 1143 4 1140 4 1191 ; run; /******************************************************************************/ /* PROC GLM will do a standard analysis of variance */ /* We specify that VARIETY is a (qualitative) factor with a CLASS statement */ /* The model statement specifies that YIELD is the response */ /* and VARIETY is the factor */ PROC GLM data=rice; CLASS VARIETY; MODEL YIELD = VARIETY; run; /******************************************************************************/ /* The following code produces some residual plots */ /* The MEANS statement produces the output */ /* for Levene's test for unequal variances */ PROC GLM data=rice; CLASS VARIETY; MODEL YIELD = VARIETY; MEANS VARIETY/HOVTEST=LEVENE (TYPE=ABS); OUTPUT OUT=diagnost p=ybar r=resid; run; PROC SGPLOT data=diagnost; SCATTER y=resid x=ybar; REFLINE 0; run; PROC UNIVARIATE noprint ; QQPLOT resid / normal; run; /* Note that according to Levene's test (P-value = 0.4654) we would */ /* FAIL TO REJECT the null hypothesis that all variances are equal. */ /* So the equal-variance assumption seems reasonable for these data. */ /* Notice there is some evidence of nonnormality based on the Q-Q plot */ /******************************************************************************/ /* Estimating and testing contrasts */ /* The following code estimates the contrasts in the example from class */ /* The ESTIMATE statement defines the constrast. Note we need the divisor=3 option */ /* The ESTIMATE statement also uses a t-test to test whether a contrast is zero */ PROC GLM DATA=rice; CLASS VARIETY; MODEL YIELD = VARIETY; MEANS VARIETY; ESTIMATE 'Var4vsOthers' VARIETY 1 1 1 -3 / divisor=3; ESTIMATE '1vs2' VARIETY 1 -1 0 0; RUN; /******************************************************************************/ /* Post Hoc Multiple Comparisons in SAS */ /* Adding options to a MEANS statement is the easiest way to implement a */ /* multiple comparisons procedure in SAS. */ PROC GLM DATA=rice; CLASS VARIETY; MODEL YIELD = VARIETY; MEANS VARIETY / ALPHA=0.05 LSD TUKEY; MEANS VARIETY / ALPHA=0.05 LSD TUKEY CLDIFF; /* same as above but output given in different style */ run; /* ALPHA=0.05 is actually the default significance level. We could choose */ /* another significance level if desired. */ /* Notice the results for the Fisher LSD and Tukey procedures. According to */ /* Fisher, the mean for variety 4 is significantly different from the means of */ /* each other variety. Tukey gives similar results, but Tukey's method does */ /* NOT find a significant difference between varieties 1 and 4. */ /* Recall: Tukey is more conservative (less likely to reject H_0). Tukey */ /* offers more protection against Type I errors, but less power. */ /* In place of LSD or TUKEY (or in addition to those), we could request */ /* DUNCAN, SNK, or SCHEFFE, among others. */ /******************************************************************************/ /* Simultaneous 95% Confidence Intervals for All Pairwise Differences */ /* Uses the Tukey method. */ PROC GLM DATA=rice; CLASS VARIETY; MODEL YIELD = VARIETY; MEANS VARIETY / ALPHA=0.05 TUKEY CLDIFF; run; /* Simultaneous 95% Tukey Confidence Intervals for Specified Contrasts */ PROC GLM DATA=rice; CLASS VARIETY; MODEL YIELD = VARIETY; MEANS VARIETY; ESTIMATE 'Var4 vs Others' VARIETY 1 1 1 -3 / divisor=3; ESTIMATE '1 vs 2' VARIETY 1 -1 0 0; RUN; /* Page 741 says for alpha =0.05, t=4 and error df=12, the Tukey q value is 4.20. */ /* So the two CIs for these constrasts are: */ /* -166.08 +/- (4.20)(37.22) and 56.25 +/- (4.20)(45.59). */ /******************************************************************************/