# This example shows the analyses for the two-factor factorial experiment # using the gas mileage data example we looked at in class # Entering the data and defining the variables: ########## ## # Reading the data into R: my.datafile <- tempfile() cat(file=my.datafile, " 1 6 STANDARD 1 23.6 2 6 STANDARD 2 21.7 3 6 STANDARD 3 20.3 4 6 STANDARD 4 21.0 5 6 STANDARD 5 22.0 6 6 MULTI 1 23.5 7 6 MULTI 2 22.8 8 6 MULTI 3 24.6 9 6 MULTI 4 24.6 10 6 MULTI 5 22.5 11 6 GASMISER 1 21.4 12 6 GASMISER 2 20.7 13 6 GASMISER 3 20.5 14 6 GASMISER 4 23.2 15 6 GASMISER 5 21.3 16 4 STANDARD 1 22.6 17 4 STANDARD 2 24.5 18 4 STANDARD 3 23.1 19 4 STANDARD 4 25.3 20 4 STANDARD 5 22.1 21 4 MULTI 1 23.7 22 4 MULTI 2 24.6 23 4 MULTI 3 25.0 24 4 MULTI 4 24.0 25 4 MULTI 5 23.1 26 4 GASMISER 1 26.0 27 4 GASMISER 2 25.0 28 4 GASMISER 3 26.9 29 4 GASMISER 4 26.0 30 4 GASMISER 5 25.4 ", sep=" ") options(scipen=999) # suppressing scientific notation mileage <- read.table(my.datafile, header=FALSE, col.names=c("OBS", "cyl", "oil", "rep", "mpg")) # Note we could also save the data columns into a file and use a command such as: # mileage <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("OBS", "cyl", "oil", "rep", "mpg")) attach(mileage) # The data frame called mileage is now created, # with five variables, OBS, cyl, oil, rep, and mpg. ## ######### ################ ## # # We specify that cyl (engine type) and oil are (qualitative) factors # with the factor() function: # Making "cyl" and "oil" factors: cyl <- factor(cyl) oil <- factor(oil) # We can get SS(Cells), SSW, MS(Cells), and MSW # for the OVERALL F-test by: anova(lm(mpg ~ cyl:oil)) # Note that TSS = SS(Cells) + SSW: TSS <- anova(lm(mpg ~ cyl:oil))["cyl:oil","Sum Sq"] + anova(lm(mpg ~ cyl:oil))["Residuals","Sum Sq"] print(TSS) # ## ################ ############################################################################* # lm() and anova() will do a standard analysis of variance # We also include the interaction term cyl:oil # The lm statement specifies that mpg is the response # and cyl and oil are the factors: # The ANOVA table for the two-factor model including interaction # is produced by the anova() function: mpg.fit <- lm(mpg ~ cyl + oil + cyl:oil); anova(mpg.fit) ################################################################################## # Producing the "Interaction Plots": # An easy way to plot mpg against cyl at each level of oil: interaction.plot(cyl, oil, mpg) # An easy way to plot mpg against oil at each level of cyl: interaction.plot(oil, cyl, mpg) # We can see the exact values being plotted with mean for the interaction: # Sample mean mpg for each cyl-oil combination: tapply(mpg, cyl:oil, mean) # The values given are the values plotted in the interaction plot. ### Also possibly of interest: # Sample mean mpg for each level of cyl: tapply(mpg, cyl, mean) # Sample mean mpg for each level of oil: tapply(mpg, oil, mean) ############################ Contrasts in Two-Factor Experiments #####################** ###### CASE OF NO INTERACTION: ### # Investigating contrasts is relatively simple when there is no significant interaction # The syntax is similar to the one-way analysis # Estimating and testing about contrasts: # Comparing 4-cylinder to 6-cylinder engines: # contrasts(cyl) <- matrix(c(1, -1), nrow=2, ncol=1) summary(lm(mpg ~ cyl + oil + cyl:oil))$coef["cyl1",] # Look on line 'cyl1' line (selected with the above code) # for P-value of t-test about this contrast about cylinder types. # Comparing cheap oil (standard) to the average of the more expensive types: contrasts(oil) <- matrix(c(-1/2, -1/2, 1), nrow=3, ncol=1); summary(lm(mpg ~ cyl + oil + cyl:oil))$coef["oil1",] # Look on line 'oil1' line (selected with the above code) # for P-value of t-test about this contrast about oil types. ###### CASE OF INTERACTION: ### # These simple comparisons above are not really valid when there is significant interaction. # We must compare levels of one factor at each level of the other factor. my.interaction <- cyl:oil # The different combinations are 4G, 4M, 4S, 6G, 6M, 6S # This can be seen by looking at: levels(my.interaction) ## Example 1 contrast (from class): # Comparing 4-cylinder to 6-cylinder engines when oil type is "Gasmiser": contrasts(my.interaction) <- matrix(c(1, 0, 0, -1, 0, 0), nrow=6, ncol=1) summary(lm(mpg ~ my.interaction))$coef["my.interaction1",] # Look on line 'my.interaction1' line (selected with code above) # for P-value of t-test about this contrast. # Other contrasts that might be of interest: # Comparing 4-cylinder to 6-cylinder engines when oil type is "Multi": contrasts(my.interaction) <- matrix(c(0, 1, 0, 0, -1, 0), nrow=6, ncol=1) summary(lm(mpg ~ my.interaction))$coef["my.interaction1",] # Look on line 'my.interaction1' line (selected with code above) # for P-value of t-test about this contrast. # Comparing 4-cylinder to 6-cylinder engines when oil type is "Standard": contrasts(my.interaction) <- matrix(c(0, 0, 1, 0, 0, -1), nrow=6, ncol=1) summary(lm(mpg ~ my.interaction))$coef["my.interaction1",] # Look on line 'my.interaction1' line (selected with code above) # for P-value of t-test about this contrast. ## Example 2 contrast (from class): # Comparing cheap oil (standard) to expensive, when engine type is 4-cylinder: contrasts(my.interaction) <- matrix(c(-1/2, -1/2, 1, 0, 0, 0), nrow=6, ncol=1) summary(lm(mpg ~ my.interaction))$coef["my.interaction1",] # Look on line 'my.interaction1' line (selected with code above) # for P-value of t-test about this contrast. # Other contrast that might be of interest: # Comparing cheap oil (standard) to expensive, when engine type is 6-cylinder: contrasts(my.interaction) <- matrix(c(0, 0, 0, -1/2, -1/2, 1), nrow=6, ncol=1) summary(lm(mpg ~ my.interaction))$coef["my.interaction1",] # Look on line 'my.interaction1' line (selected with code above) # for P-value of t-test about this contrast. ################** Multiple Comparisons in Two-Factor Experiments ##################** # Without interaction, multiple comparisons are done for each factor in the same way # as in Chapter 6. # Tukey procedure: # The TukeyHSD function does the Tukey multiple comparison procedure in R. # The last column gives the ADJUSTED P-values. Using alpha=0.05 for the # experimentwise error rate, any pair of means with P-value LESS THAN 0.05 # in that column are judged to be significantly different by Tukey. TukeyHSD(aov(mpg.fit),conf.level=0.95) # conf.level=0.95 is actually the default level. We could choose # another level if desired. # Assuming NO significant interaction between cyl and oil, we may look at the individual # outputs marked $cyl and $oil for comparing the factor level means using Tukey: # WITH interaction, the pairwise differences in mean response are calculated for all # pairs of factor level combinations: tapply(mpg, cyl:oil, mean) # This produces pairwise differences in mean response for all factor level combinations. # Verify that the difference in mean mileage between 4-cylinder multi and # 6-cylinder standard is 24.08 - 21.72 = 2.36. # Assuming significant interaction between cyl and oil, we can use the same code but # we may look at the output marked $cyl:oil for comparing ALL factor level combinations using Tukey: TukeyHSD(aov(mpg.fit),conf.level=0.95) # The table shows the P-values for comparison among all factor level pairs. # These P-values below each t statistic are adjusted for the Tukey procedure. # So, for example, the difference in mean mileage between 4-cylinder multi and # 6-cylinder standard would be judged significant by Tukey's procedure since # the P-value for "6:STANDARD-4:MULTI" is 0.0166. # Tukey's procedure the long way: From Table A.7, q_.05(t=6,df=24) is 4.37. # And MSW = 1.084 and n = 5 replicates. # So we compare each absolute pairwise difference to # 4.37*sqrt(1.084/5) = 2.035. # So, for example, the difference in mean mileage between 4-cylinder multi and # 6-cylinder standard would be judged significant by Tukey's procedure since # the absolute value of 2.36 is greater than 2.035. # We could make similar comparisons for all other pairs of factor level combinations. ####* Example of a Three-Factor Factorial Experiment with ONE Observation per cell ###* ###COMMENT: LOCATION IS NAME OF CITY, L IS SINGLE CHARACTER CODE FOR CITY### # Entering the data and defining the variables: ########## ## # Reading the data into R: my.datafile <- tempfile() cat(file=my.datafile, " BAY_CITY B B 60 3910 BAY_CITY B B 90 4015 BAY_CITY B B 120 3894 BAY_CITY B B 150 4870 BAY_CITY B L 60 2481 BAY_CITY B L 90 3514 BAY_CITY B L 120 3726 BAY_CITY B L 150 4071 BAY_CITY B N 60 3146 BAY_CITY B N 90 2806 BAY_CITY B N 120 3739 BAY_CITY B N 150 4681 EAGLE_LK E B 60 1561 EAGLE_LK E B 90 3088 EAGLE_LK E B 120 2869 EAGLE_LK E B 150 3957 EAGLE_LK E L 60 4917 EAGLE_LK E L 90 5466 EAGLE_LK E L 120 4672 EAGLE_LK E L 150 5680 EAGLE_LK E N 60 1330 EAGLE_LK E N 90 2642 EAGLE_LK E N 120 2252 EAGLE_LK E N 150 1715 EL_CAMPO C B 60 4340 EL_CAMPO C B 90 4024 EL_CAMPO C B 120 4306 EL_CAMPO C B 150 4479 EL_CAMPO C L 60 4804 EL_CAMPO C L 90 4480 EL_CAMPO C L 120 4619 EL_CAMPO C L 150 4048 EL_CAMPO C N 60 3768 EL_CAMPO C N 90 4167 EL_CAMPO C N 120 4212 EL_CAMPO C N 150 4293 KATY K B 60 6129 KATY K B 90 5697 KATY K B 120 6853 KATY K B 150 6457 KATY K L 60 5641 KATY K L 90 5544 KATY K L 120 6318 KATY K L 150 6297 KATY K N 60 4193 KATY K N 90 4681 KATY K N 120 4758 KATY K N 150 4463 ", sep=" ") threefactor <- read.table(my.datafile, header=FALSE, col.names=c("LOC", "L", "VARIETY", "NIT", "YIELD")) # Note we could also save the data columns into a file and use a command such as: # threefactor <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("LOC", "L", "VARIETY", "NIT", "YIELD")) attach(threefactor) # The data frame called threefactor is now created. # ## ######### # Making the factors: L <- factor(L) VARIETY <- factor(VARIETY) NIT <- factor(NIT) # Fit with all interactions: fit1 <- lm(YIELD ~ L + VARIETY + NIT + L:VARIETY + L:NIT + VARIETY:NIT + L:VARIETY:NIT) anova(fit1) # Notice there is no estimate for sigma^2 (no MSW) # What if we leave out the three-way interaction term? fit2 <- lm(YIELD ~ L + VARIETY + NIT + L:VARIETY + L:NIT + VARIETY:NIT) anova(fit2) # Now we do have an estimate of sigma^2. # But we must hope our assumption of no significant three-way interaction is correct. #####################################################################################