/* This example shows the analyses for the two-factor factorial experiment */ /* using the gas mileage data example we looked at in class */ /* Entering the data and defining the variables: */ data mileage; input OBS CYL OIL $ rep MPG; cards; 1 6 STANDARD 1 23.6 2 6 STANDARD 2 21.7 3 6 STANDARD 3 20.3 4 6 STANDARD 4 21.0 5 6 STANDARD 5 22.0 6 6 MULTI 1 23.5 7 6 MULTI 2 22.8 8 6 MULTI 3 24.6 9 6 MULTI 4 24.6 10 6 MULTI 5 22.5 11 6 GASMISER 1 21.4 12 6 GASMISER 2 20.7 13 6 GASMISER 3 20.5 14 6 GASMISER 4 23.2 15 6 GASMISER 5 21.3 16 4 STANDARD 1 22.6 17 4 STANDARD 2 24.5 18 4 STANDARD 3 23.1 19 4 STANDARD 4 25.3 20 4 STANDARD 5 22.1 21 4 MULTI 1 23.7 22 4 MULTI 2 24.6 23 4 MULTI 3 25.0 24 4 MULTI 4 24.0 25 4 MULTI 5 23.1 26 4 GASMISER 1 26.0 27 4 GASMISER 2 25.0 28 4 GASMISER 3 26.9 29 4 GASMISER 4 26.0 30 4 GASMISER 5 25.4 ; run; /********************************************************************************/ /* PROC GLM will do a standard analysis of variance */ /* We specify that CYL and OIL are factors with a CLASS statement */ /* The model statement specifies that MPG is the response */ /* and CYL (engine type) and OIL are the two factors */ /* We also include the interaction term CYL*OIL */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; OUTPUT out=pred p=YBARS; run; /***********************************************************************************/ /* Interaction plots: */ /* These use the "pred" data set created in the OUTPUT statement of PROC GLM above */ /* We can plot mileage against oil type for each value of CYL: */ PROC SGPLOT data=pred; SERIES X=OIL Y=YBARS / GROUP=CYL; RUN; /* or we can plot mileage against engine type for each value of OIL: */ PROC SGPLOT data=pred; SERIES X=CYL Y=YBARS / GROUP=OIL; RUN; /* We can see the exact values being plotted with MEANS statement for the interaction: */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; MEANS CYL*OIL; run; /* The values in the column labeled "Mean" are the values plotted in the interaction plot */ /**************************** Contrasts in Two-Factor Experiments ************************/ /* Investigating contrasts is relatively simple when there is no significant interaction */ /* The syntax is similar to the one-way analysis */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; /* Comparing 4-cylinder to 6-cylinder engines: */ /* The CLASS output (first page of PROC GLM output) tells us how SAS orders the levels */ ESTIMATE '4cyl Vs 6cyl' CYL 1 -1; /* Comparing cheap oil (standard) to the average of the more expensive types: */ ESTIMATE 'Cheap Vs Expensive' OIL -1 -1 2 / divisor=2; RUN; /* These simple comparisons are not really valid when there is significant interaction. */ /* We must compare levels of one factor at each level of the other factor. */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; /* The different combinations are 4G, 4M, 4S, 6G, 6M, 6S */ /* This can be seen by looking at the "class level information' */ /* This will affect how we specify the interaction effects of interest */ /* Comparing 4-cylinder to 6-cylinder engines for each oil type: */ ESTIMATE '4cyl Vs 6cyl, Gasmiser' CYL 1 -1 OIL 0 0 0 CYL*OIL 1 0 0 -1 0 0; ESTIMATE '4cyl Vs 6cyl, Multi ' CYL 1 -1 OIL 0 0 0 CYL*OIL 0 1 0 0 -1 0; ESTIMATE '4cyl Vs 6cyl, Standard' CYL 1 -1 OIL 0 0 0 CYL*OIL 0 0 1 0 0 -1; /* Comparing cheap oil (standard) to expensive, for each engine type: */ ESTIMATE 'Cheap Vs Expensive, 4-cyl' CYL 0 0 OIL -1 -1 2 CYL*OIL -1 -1 2 0 0 0 / divisor=2; ESTIMATE 'Cheap Vs Expensive, 6-cyl' CYL 0 0 OIL -1 -1 2 CYL*OIL 0 0 0 -1 -1 2 / divisor=2; RUN; /****************** Multiple Comparisons in Two-Factor Experiments *********************/ /* Without interaction, multiple comparisons are done for each factor in the same way */ /* as in Chapter 6. */ /* Assuming NO significant interaction between CYL and OIL: */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; MEANS CYL / TUKEY; MEANS OIL / TUKEY; * Or use, to get the output in a different style: ; MEANS CYL / TUKEY CLDIFF; MEANS OIL / TUKEY CLDIFF; RUN; /* WITH interaction, the pairwise differences in mean response are calculated for all */ /* pairs of factor level combinations: */ PROC GLM data=mileage; CLASS CYL OIL; MODEL MPG = CYL OIL CYL*OIL; LSMEANS CYL*OIL / PDIFF ADJUST=TUKEY; RUN; /* This allows us to find pairwise differences in mean response for all factor level combinations. */ /* Verify that the difference in mean mileage between 4-cylinder multi and */ /* 6-cylinder standard is 24.08 - 21.72 = 2.36. */ /* The table shows the P-values for comparison among all factor level pairs. */ /* These P-values below each t statistic are adjusted for the Tukey procedure. */ /* So, for example, the difference in mean mileage between 4-cylinder multi and */ /* 6-cylinder standard would be judged significant by Tukey's procedure since */ /* the P-value for "Combination 2 vs. Combination 6" is 0.0166. */ /* Tukey's procedure the long way: From Table A.7, q_.05(t=6,df=24) is 4.37. */ /* And MSW = 1.084 and n = 5 replicates. */ /* So we compare each absolute pairwise difference to */ /* 4.37*sqrt(1.084/5) = 2.035. */ /* So, for example, the difference in mean mileage between 4-cylinder multi and */ /* 6-cylinder standard would be judged significant by Tukey's procedure since */ /* the absolute value of 2.36 is greater than 2.035. */ /* We could make similar comparisons for all other pairs of factor level combinations. */ /***** Example of a Three-Factor Factorial Experiment with ONE observation per cell *****/ data threefactor; ***COMMENT LOCATION IS NAME OF CITY, L IS SINGLE CHARACTER CODE FOR CITY***; input LOC $ L $ VARIETY $ NIT YIELD; cards; BAY_CITY B B 60 3910 BAY_CITY B B 90 4015 BAY_CITY B B 120 3894 BAY_CITY B B 150 4870 BAY_CITY B L 60 2481 BAY_CITY B L 90 3514 BAY_CITY B L 120 3726 BAY_CITY B L 150 4071 BAY_CITY B N 60 3146 BAY_CITY B N 90 2806 BAY_CITY B N 120 3739 BAY_CITY B N 150 4681 EAGLE_LK E B 60 1561 EAGLE_LK E B 90 3088 EAGLE_LK E B 120 2869 EAGLE_LK E B 150 3957 EAGLE_LK E L 60 4917 EAGLE_LK E L 90 5466 EAGLE_LK E L 120 4672 EAGLE_LK E L 150 5680 EAGLE_LK E N 60 1330 EAGLE_LK E N 90 2642 EAGLE_LK E N 120 2252 EAGLE_LK E N 150 1715 EL_CAMPO C B 60 4340 EL_CAMPO C B 90 4024 EL_CAMPO C B 120 4306 EL_CAMPO C B 150 4479 EL_CAMPO C L 60 4804 EL_CAMPO C L 90 4480 EL_CAMPO C L 120 4619 EL_CAMPO C L 150 4048 EL_CAMPO C N 60 3768 EL_CAMPO C N 90 4167 EL_CAMPO C N 120 4212 EL_CAMPO C N 150 4293 KATY K B 60 6129 KATY K B 90 5697 KATY K B 120 6853 KATY K B 150 6457 KATY K L 60 5641 KATY K L 90 5544 KATY K L 120 6318 KATY K L 150 6297 KATY K N 60 4193 KATY K N 90 4681 KATY K N 120 4758 KATY K N 150 4463 ; run; PROC GLM DATA=threefactor; CLASS L VARIETY NIT; MODEL YIELD = L VARIETY NIT L*VARIETY L*NIT VARIETY*NIT L*VARIETY*NIT; OUTPUT out=pred p=YBARS; run; /* Notice there is no estimate for sigma^2 (no MSW) */ /* What if we leave out the three-way interaction term? */ PROC GLM DATA=threefactor; CLASS L VARIETY NIT; MODEL YIELD = L VARIETY NIT L*VARIETY L*NIT VARIETY*NIT; run; /* Now we do have an estimate of sigma^2. */ /* But we must hope our assumption of no significant three-way interaction is correct. */ /* A graphical check of whether there is 3-factor interaction */ /* Do several interaction plots, separate for the levels of the third factor: */ PROC SORT data=pred; BY L; run; PROC SGPLOT data=pred; SERIES X=NIT Y=YBARS / GROUP=VARIETY; BY L; RUN; /***************************************************************************************/