# Example of Randomized Complete Block Design in R # We use the Executive Data from the example in class # and Table 21.1 of the book # Save the data file into a directory and # use the full path name: executive.data <- read.table(file = "z:/My Documents/teaching/stat_705/executivedata.txt", header=FALSE, col.names = c('confid', 'agegroup', 'method')) # attaching the data frame: attach(executive.data) # Making "agegroup" and "method" factors: agegroup <- factor(agegroup) method <- factor(method) # We may analyze these data with the lm() and anova() functions. # The two factors are agegroup and method. # The blocking factor is agegroup and the treatment factor is method. # We assume agegroup and method are both factors with fixed levels. # Fitting the ANOVA model and producing the ANOVA table: exec.fit <- lm(confid ~ agegroup + method); anova(exec.fit) # We see there is a significant effect due to method (F*=33.99, Pvalue=0.0001). # If we were interested in block effects, they are also significant #(F*=14.3, P-value=0.001). # ###############* Diagnostics: ############### # Residual Plots and Q-Q plots: # Residual plots for checking ANOVA model assumptions: # plotting residuals versus fitted values: plot(fitted.values(exec.fit), residuals(exec.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0) # A normal Q-Q plot of the residuals: qqnorm(residuals(exec.fit)) # Tukey's Test for Additivity: # The following is an R function to perform Tukey's test of additivity. # This function was originally written by Jim Robison-Cox. # Just copy it into R and run the function with the name of # your response and your factors A and B, as shown in the # example below. tukeys.add.test <- function(y,A,B){ ## Y is the response vector ## A and B are factors used to predict the mean of y ## Note the ORDER of arguments: Y first, then A and B dname <- paste(deparse(substitute(A)), "and", deparse(substitute(B)), "on",deparse(substitute(y)) ) A <- factor(A); B <- factor(B) ybar.. <- mean(y) ybari. <- tapply(y,A,mean) ybar.j <- tapply(y,B,mean) len.means <- c(length(levels(A)), length(levels(B))) SSAB <- sum( rep(ybari. - ybar.., len.means[2]) * rep(ybar.j - ybar.., rep(len.means[1], len.means[2])) * tapply(y, interaction(A,B), mean))^2 / ( sum((ybari. - ybar..)^2) * sum((ybar.j - ybar..)^2)) aovm <- anova(lm(y ~ A+B)) SSrem <- aovm[3,2] - SSAB dfdenom <- aovm[3,1] - 1 STATISTIC <- SSAB/SSrem*dfdenom names(STATISTIC) <- "F" PARAMETER <- c(1, dfdenom) names(PARAMETER) <- c("num df", "denom df") D <- sqrt(SSAB/ ( sum((ybari. - ybar..)^2) * sum((ybar.j - ybar..)^2))) names(D) <- "D estimate" RVAL <- list(statistic = STATISTIC, parameter = PARAMETER, p.value = 1 - pf(STATISTIC, 1,dfdenom), estimate = D, method = "Tukey's one df F test for Additivity", data.name = dname) attr(RVAL, "class") <- "htest" return(RVAL) } ## Example of running the function: tukeys.add.test(y = confid, A = agegroup, B = method) # We see that the P-value of this test is 0.788. There is no evidence of # interaction between blocks and treatments, so this model assumption is fine. #############* Further investigation of Treatment Effects ############** # We may investigate which particular treatment means are significantly # different using Tukey's method: TukeyHSD(aov(exec.fit),conf.level=0.95)$method # At family significance level 0.05, each pair of treatment means is # significantly different. # ###############* Random Blocks in a RCBD ######################## # What if our blocks (in this case, age groups) are assumed to be # a random sample from some population? # The analysis is similar; the lm() function has no equivalent of the RANDOM statement. # We must be aware that are conclusions about the block effects are stated differently. exec.fit <- lm(confid ~ agegroup + method); anova(exec.fit) # We see the test statistics are the same, but the hypotheses # are stated differently for the test about random effects.