/* SAS Example of Residual Analysis to */ /* check assumptions in the ANOVA model */ /* We will analyze the Kenton Foods data from the example in class */ /* The response variable is sales and the factor is package design. */ /* The store label is also given in the data set. */ DATA kenton; INPUT SALES DESIGN STORE; cards; 11 1 1 17 1 2 16 1 3 14 1 4 15 1 5 12 2 1 10 2 2 15 2 3 19 2 4 11 2 5 23 3 1 20 3 2 18 3 3 17 3 4 27 4 1 33 4 2 22 4 3 26 4 4 28 4 5 ; run; /* ***************************************************************************** */ /* Residual Plots to Check ANOVA model assumptions */ /* The following code produces some residual plots. */ /* The residuals are plotted against the fitted values */ /* and the normal Q-Q plot of the residuals is produced. */ /* The MEANS statement with the option HOVTEST=BF produces */ /* the output for Brown-Forsythe test for unequal variances. */ PROC GLM data = kenton; CLASS DESIGN; MODEL SALES = DESIGN; LSMEANS DESIGN; MEANS DESIGN / HOVTEST=BF; OUTPUT OUT=diagnost p=ybar r=resid; run; PROC GPLOT data=diagnost; PLOT resid*ybar/vref=0; run; /* More modern-looking plot: */ PROC SGPLOT data=diagnost; SCATTER y=resid x=ybar; refline 0; run; PROC UNIVARIATE noprint ; QQPLOT resid / normal; run; /* Note that according to the Brown-Forsythe test (P-value = 0.8659) we would */ /* FAIL TO REJECT the null hypothesis that all variances are equal. */ /* So the equal-variance assumption seems reasonable for these data. */ /*** And the Shapiro-Wilk test for normality is produced by: ****/ PROC UNIVARIATE DATA=diagnost normal; VAR resid; RUN; /* Look in the output under "Tests of Normality". */ /* We see a test statistic of 0.972 and a P-value of 0.8216. */ /* So the hypothesis of normal errors is reasonable. */ /******* Outlier Detection ***************/ /* This prints the internally studentized residuals (called ISR here) */ /* along with the fitted values and regular residuals. */ PROC GLM data = kenton; CLASS DESIGN; MODEL SALES = DESIGN; OUTPUT OUT=new p=pred r=resid student=ISR; run; PROC PRINT data = new; VAR pred resid ISR; run; /* No observations seem to be outliers. */