# R example to analyze Nested Design # these are the training-school data from class and Chapter 26 # Save the data file into a directory and # use the full path name: trainingschool.data <- read.table(file = "z:/My Documents/teaching/stat_705/trainingschooldata.txt", header=FALSE, col.names = c('score', 'school', 'instructor', 'obs')) # attaching the data frame: attach(trainingschool.data) # Making "school" and "instructor" factors: school <- factor(school) instructor <- factor(instructor) # We run lm() and anova() with "school" and "instructor" as the two factors. # We indicate that the levels of "instructor" are nested within the levels # of "school" with the "school/instructor" syntax for the second factor. trainingschool.fit <- lm(score ~ school + school/instructor) anova(trainingschool.fit) # We see that the 3 schools differ in mean score (F* = 11.18, P-value = .0095) # and that instructors within at least one school have different mean scores # (F* = 27.02, P-value = .0007). # ########################################################################* # To determine in WHICH schools the instructor effects are significant, we # further decompose the SSB(A) into SSB(A_1), SSB(A_2), and SSB(A_3). anova.table <- function(inputmatrix){ column.1 <- inputmatrix[,1] column.2 <- inputmatrix[,2] my.lm <- lm(column.1 ~ factor(column.2)) my.anova.table <- anova(my.lm) return(my.anova.table) } by(cbind(score, instructor), INDICES = school, anova.table) # Note R gives (over 3 pieces) the following results: # Atlanta school: MSB(A_1)=210.25 => F* = 210.25/7 = 30.0 (we divide this by hand) # Chicago school: MSB(A_2)=132.25 => F* = 132.25/7 = 18.9 (we divide this by hand) # San Fran school: MSB(A_3)=225.0 => F* = 225/7 = 32.14 (we divide this by hand) # Note that each of these is divided by the original MSE of 7. # Since F(0.95,1,6) = 5.99, then: At the 0.05 level, instructors within the # Atlanta school have different mean scores; at the 0.05 level, instructors # within the Chicago school have different mean scores; and at the 0.05 # level, instructors within the San Francisco school have different mean # scores. The FAMILY significance level for this SET of tests is at most 0.15. # ########################################################################** # Some Residual Plots to Check the Standard Model Assumptions: # Residual Plots and Q-Q plots: # Residuals Plotted vs. Fitted Values: plot(fitted(trainingschool.fit), residuals(trainingschool.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0) # Residuals Plotted for each SCHOOL Level plot(as.numeric(school), residuals(trainingschool.fit), xlab="School", ylab="Residuals"); abline(h=0) # Normal Q-Q Plot of Residuals: qqnorm(residuals(trainingschool.fit)) # ######################################################################## # Further Investigation of Treatment Means: # Tukey Simultaneous 90% CIs for differences in pairs of school mean scores can by found by: TukeyHSD(aov(trainingschool.fit),conf.level=0.90)$school # Getting the Bonferroni CIs at the bottom of p. 1102 is best done by hand, since the relevant # mean differences are only a few of the pairwise comparisons among all (3)(2) = 6 treatments.