#  R Example of the Two-Factor ANOVA model

# We will analyze the Castle Bakery data from the example in class 

# Save the data file into a directory and 
# use the full path name:

bakery.data <- read.table(file = "z:/My Documents/teaching/stat_705/bakerydata.txt", 
header=FALSE, col.names = c('sales', 'height', 'width', 'store'))

# attaching the data frame:

attach(bakery.data)

# Making "height" and "width" factors:

height <- factor(height)
width <- factor(width)

# Fitting the ANOVA model and producing the ANOVA table:

bakery.fit <- lm(sales ~ height + width + height:width);
anova(bakery.fit)

# Note the F* for the test for interaction is 1.16 and the P-value is 0.3747.
# Note the F* for the test for an effect due to height is 74.7 and the P-value is near 0.
# Note the F* for the test for an effect due to width is 1.16 and the P-value is 0.3226.

# The following mimics SAS's LSMEANS output:

# Sample mean sales for each level of height:
tapply(sales, height, mean)

# Sample mean sales for each level of width:
tapply(sales, width, mean)

# Sample mean sales for each height-width combination:
tapply(sales, height:width, mean)

#####################################################################################

# Producing the "Interaction Plots":

# An easy way to plot sales against height at each level of width:

interaction.plot(height, width, sales)

# An easy way to plot sales against width at each level of height:

interaction.plot(width, height, sales)

# A longer way is given below --- some of this may be instructive:
######
###
samp.means <- tapply(sales, list(height, width), mean)

# Plotting sales against height at each level of width:

#first line draws blank plot:
plot(c(0.5,3.5), range(sales), xlab='Height', ylab='Sales', type='n', axes=F) 
axis(1, at=1:3, labels=c('bottom','middle','top')); axis(2); box()
lines(1:3, samp.means[,1], lty=1, type='b', pch=19)
lines(1:3, samp.means[,2], lty=3, type='b', pch=1)

legend(x=1, y=70, c('regular','wide'), lty=c(1,3), pch=c(19,1))

# Plotting sales against width at each level of height:

#first line draws blank plot:
plot(c(0.5,2.5), range(sales), xlab='Width', ylab='Sales', type='n', axes=F) 
axis(1, at=1:2, labels=c('regular','wide')); axis(2); box()
lines(1:2, samp.means[1,], lty=1, type='b', pch=19)
lines(1:2, samp.means[2,], lty=2, type='b', pch=3)
lines(1:2, samp.means[3,], lty=3, type='b', pch=1)

legend(x=1.5, y=60, c('bottom','middle','top'), lty=c(1,2,3), pch=c(19,3,1))
###
######

#####################################################################################

# Residual plots for checking ANOVA model assumptions:

# plotting residuals versus fitted values:

plot(fitted.values(bakery.fit), residuals(bakery.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0)

# A normal Q-Q plot of the residuals:

qqnorm(residuals(bakery.fit))


#####################################################################################


# Formal Tests of Model Assumptions

# For the Brown-Forsythe test when there are multiple factors, we use the B-F function
# where the "group" variable contains all the treatments:

## A short function to perform the Brown-Forsythe test 
## for equal variances:

BF.test <- function(y, group)
{
group <- as.factor(group) # precautionary
meds <- tapply(y, group, median)
resp <- abs(y - meds[group])
anova(lm(resp ~ group))[1, 4:5]
}

# Implementing the function for our data:

BF.test(sales, height:width)

# Note that this isn't useful when there are so few observations 
# per treatment.  The denominator of the F-statistic of the B-F test
# is essentially 0.  The code should work when there are more 
# observations per cell, though.

# The Shapiro-Wilk test for normality is produced by:

shapiro.test(resid(bakery.fit))

# The Shapiro-Wilk P-value is 0.0433.  We can formally reject the normality           
# assumption.  Possibly a transformation of the response variable is appropriate here.