# Example of Randomized Complete Block Design in R 

# We use the Executive Data from the example in class  
# and Table 21.1 of the book  

# Save the data file into a directory and 
# use the full path name:

executive.data <- read.table(file = "z:/My Documents/teaching/stat_705/executivedata.txt", 
header=FALSE, col.names = c('confid', 'agegroup', 'method'))

# attaching the data frame:

attach(executive.data)

# Making "agegroup" and "method" factors:

agegroup <- factor(agegroup)
method <- factor(method)

# We may analyze these data with the lm() and anova() functions.
#  The two factors are agegroup and method.  
# The blocking factor is agegroup and the treatment factor is method.                             
# We assume agegroup and method are both factors with fixed levels.                                    


# Fitting the ANOVA model and producing the ANOVA table:

exec.fit <- lm(confid ~ agegroup + method);
anova(exec.fit)

# We see there is a significant effect due to method (F*=33.99, Pvalue=0.0001).  
# If we were interested in block effects, they are also significant              
#(F*=14.3, P-value=0.001).                                                       

# ###############* Diagnostics: ###############

# Residual Plots and Q-Q plots:  

# Residual plots for checking ANOVA model assumptions:

# plotting residuals versus fitted values:

plot(fitted.values(exec.fit), residuals(exec.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0)

# A normal Q-Q plot of the residuals:

qqnorm(residuals(exec.fit))

# Tukey's Test for Additivity:  

# The following is an R function to perform Tukey's test of additivity.   
# This function was originally written by Jim Robison-Cox.
# Just copy it into R and run the function with the name of   
# your response and your factors A and B, as shown in the  
# example below.                                                    


tukeys.add.test <- function(y,A,B){
## Y is the response vector
## A and B are factors used to predict the mean of y
## Note the ORDER of arguments: Y first, then A and B
   dname <- paste(deparse(substitute(A)), "and", deparse(substitute(B)),
                  "on",deparse(substitute(y)) )
   A <- factor(A); B <- factor(B)
   ybar.. <- mean(y)
   ybari. <- tapply(y,A,mean)
   ybar.j <- tapply(y,B,mean)
   len.means <- c(length(levels(A)), length(levels(B)))
   SSAB <- sum( rep(ybari. - ybar.., len.means[2]) * 
                rep(ybar.j - ybar.., rep(len.means[1], len.means[2])) *
                tapply(y, interaction(A,B), mean))^2 / 
                  ( sum((ybari. - ybar..)^2) * sum((ybar.j - ybar..)^2))
   aovm <- anova(lm(y ~ A+B))
   SSrem <- aovm[3,2] - SSAB
   dfdenom <- aovm[3,1] - 1
    STATISTIC <- SSAB/SSrem*dfdenom
    names(STATISTIC) <- "F"
    PARAMETER <- c(1, dfdenom)
    names(PARAMETER) <- c("num df", "denom df")
    D <- sqrt(SSAB/  ( sum((ybari. - ybar..)^2) * sum((ybar.j - ybar..)^2)))
    names(D) <- "D estimate"
    RVAL <- list(statistic = STATISTIC, parameter = PARAMETER, 
        p.value = 1 - pf(STATISTIC, 1,dfdenom), estimate = D,
        method = "Tukey's one df F test for Additivity", 
        data.name = dname)
    attr(RVAL, "class") <- "htest"
    return(RVAL)
   }

## Example of running the function:

tukeys.add.test(y = confid, A = agegroup, B = method)

# We see that the P-value of this test is 0.788.  There is no evidence of       
# interaction between blocks and treatments, so this model assumption is fine.  

#############* Further investigation of Treatment Effects ############** 

# We may investigate which particular treatment means are significantly  
# different using Tukey's method:                                         

TukeyHSD(aov(exec.fit),conf.level=0.95)$method    

# At family significance level 0.05, each pair of treatment means is  
# significantly different.                                            

# ###############* Random Blocks in a RCBD ########################  

# What if our blocks (in this case, age groups) are assumed to be  
# a random sample from some population?                            

# The analysis is similar; the lm() function has no equivalent of the RANDOM statement.
# We must be aware that are conclusions about the block effects are stated differently.                       

exec.fit <- lm(confid ~ agegroup + method);
anova(exec.fit)

# We see the test statistics are the same, but the hypotheses  
# are stated differently for the test about random effects.