# This example shows the analysis for factorial experiment with unbalanced data # using the Table 11.2 (p. 514) data example we looked at in class # Entering the data and defining the variables: ########## ## # Reading the data into R: my.datafile <- tempfile() cat(file=my.datafile, " 1 1 4 1 1 5 1 1 6 1 2 8 2 1 5 2 2 7 2 2 9 ", sep=" ") options(scipen=999) # suppressing scientific notation table112 <- read.table(my.datafile, header=FALSE, col.names=c("A", "C", "Y")) # Note we could also save the data columns into a file and use a command such as: # table112 <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("A", "C", "Y")) attach(table112) # The data frame called table112 is now created, # with 3 variables, A, C, and Y. ## ######### # Defining A and C as factors: A <- factor(A) C <- factor(C) # With UNBALANCED data, the least squares means approach does it correctly. # Using the least squares means, we can see that the proper estimate # of (alpha_1 - alpha_2) is (6.5 - 6.5) = 0. # Getting the cell sample means: tapply(Y, A:C, mean) # Getting the least squares row means: # In this example, these are for the levels of "A": apply(matrix(tapply(Y, A:C, mean), nrow=2, ncol=2, byrow=T), 1, mean) # Getting the least squares column means: # In this example, these are for the levels of "C": apply(matrix(tapply(Y, A:C, mean), nrow=2, ncol=2, byrow=T), 2, mean) # Note also that for unbalanced data in the two-way ANOVA, the Type I SS and Type III SS # are NOT the same. In this case, we should look at the Type III SS. This correctly # gives a SSA of zero for this example, since we have seen in class that the proper # conclusion is that there is zero sample variation between the means of the levels # of factor A. ########################################################################## # We will use the lm() function to get the standard ANOVA table: # To match the book's output on page 517, we include the interaction A*C. # As it turns out, the interaction is not significant here and can be ignored. # Here is a way to get the Type III SS in this example: # (This is the long way) dummy1.A <- rep(0, times=nrow(table112) ) dummy1.A[A==1] <- 1 dummy1.A[A==2] <- -1 dummy1.C <- rep(0, times=nrow(table112) ) dummy1.C[C==1] <- 1 dummy1.C[C==2] <- -1 full.model <- lm(Y ~ dummy1.A + dummy1.C + dummy1.A:dummy1.C) reduced.model.AC <- lm(Y ~ dummy1.A + dummy1.C ) reduced.model.A <- lm(Y ~ dummy1.C + dummy1.A:dummy1.C) reduced.model.C <- lm(Y ~ dummy1.A + dummy1.A:dummy1.C) # Gives Type III SS list for A, C, and A*C Type.III.SS.table <- rbind( anova(reduced.model.A,full.model)[2,-(1:2)], anova(reduced.model.C,full.model)[2,-(1:2)], anova(reduced.model.AC,full.model)[2,-(1:2)] ) row.names(Type.III.SS.table) <- c("A", "C", "A*C") Type.III.SS.table <- round(Type.III.SS.table,3) # rounding off... print(Type.III.SS.table) ############################################################################### ############################################################################### ############################################################################### ## IMPORTANT: What if one (or more) of the factors had more than 2 levels? # We need (t-1) dummy variables to represent the t categories! # When setting up the dummy variables, we use -1 for the LAST category. # This implies the restriction that the tau_i values sum to zero. # Example 1: Suppose factor A had three possible levels (1, 2, 3) instead of two. # Suppose factor C still had two levels (1, 2). ########## ## # Adjusted data table (now factor A has three separate levels): my.datafile <- tempfile() cat(file=my.datafile, " 1 1 4 1 1 5 1 1 6 1 2 8 2 1 5 2 2 7 2 2 9 3 1 11 3 2 12 ", sep=" ") options(scipen=999) # suppressing scientific notation table.adjust <- read.table(my.datafile, header=FALSE, col.names=c("A", "C", "Y")) # Note we could also save the data columns into a file and use a command such as: # table.adjust <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("A", "C", "Y")) attach(table.adjust) # The data frame called table112 is now created, # with 3 variables, A, C, and Y. ## ######### # Defining A and C as factors: A <- factor(A) C <- factor(C) # Need TWO dummy variables associated with factor A! dummy1.A <- rep(0, times=nrow(table.adjust) ) dummy1.A[A==1] <- 1 dummy1.A[A==3] <- -1 dummy2.A <- rep(0, times=nrow(table.adjust) ) dummy2.A[A==2] <- 1 dummy2.A[A==3] <- -1 # Need only ONE dummy variable associated with factor C: dummy1.C <- rep(0, times=nrow(table.adjust) ) dummy1.C[C==1] <- 1 dummy1.C[C==2] <- -1 full.model <- lm(Y ~ dummy1.A + dummy2.A + dummy1.C + dummy1.A:dummy1.C + dummy2.A:dummy1.C) reduced.model.AC <- lm(Y ~ dummy1.A + dummy2.A + dummy1.C) # remove interaction terms reduced.model.A <- lm(Y ~ dummy1.C + dummy1.A:dummy1.C + dummy2.A:dummy1.C) # remove factor A's dummy variables reduced.model.C <- lm(Y ~ dummy1.A + dummy2.A + dummy1.A:dummy1.C + dummy2.A:dummy1.C) # remove factor C's dummy variable # Gives Type III SS list for A, C, and A*C Type.III.SS.table <- rbind( anova(reduced.model.A,full.model)[2,-(1:2)], anova(reduced.model.C,full.model)[2,-(1:2)], anova(reduced.model.AC,full.model)[2,-(1:2)] ) row.names(Type.III.SS.table) <- c("A", "C", "A*C") Type.III.SS.table <- round(Type.III.SS.table,3) # rounding off... print(Type.III.SS.table)