# R Example of the Two-Factor ANOVA model # We will analyze the Castle Bakery data from the example in class # Save the data file into a directory and # use the full path name: bakery.data <- read.table(file = "z:/My Documents/teaching/stat_705/bakerydata.txt", header=FALSE, col.names = c('sales', 'height', 'width', 'store')) # attaching the data frame: attach(bakery.data) # Making "height" and "width" factors: height <- factor(height) width <- factor(width) # Fitting the ANOVA model and producing the ANOVA table: bakery.fit <- lm(sales ~ height + width + height:width); anova(bakery.fit) # Note the F* for the test for interaction is 1.16 and the P-value is 0.3747. # Note the F* for the test for an effect due to height is 74.7 and the P-value is near 0. # Note the F* for the test for an effect due to width is 1.16 and the P-value is 0.3226. # The following mimics SAS's LSMEANS output: # Sample mean sales for each level of height: tapply(sales, height, mean) # Sample mean sales for each level of width: tapply(sales, width, mean) # Sample mean sales for each height-width combination: tapply(sales, height:width, mean) ##################################################################################### # Producing the "Interaction Plots": # An easy way to plot sales against height at each level of width: interaction.plot(height, width, sales) # An easy way to plot sales against width at each level of height: interaction.plot(width, height, sales) # A longer way is given below --- some of this may be instructive: ###### ### samp.means <- tapply(sales, list(height, width), mean) # Plotting sales against height at each level of width: #first line draws blank plot: plot(c(0.5,3.5), range(sales), xlab='Height', ylab='Sales', type='n', axes=F) axis(1, at=1:3, labels=c('bottom','middle','top')); axis(2); box() lines(1:3, samp.means[,1], lty=1, type='b', pch=19) lines(1:3, samp.means[,2], lty=3, type='b', pch=1) legend(x=1, y=70, c('regular','wide'), lty=c(1,3), pch=c(19,1)) # Plotting sales against width at each level of height: #first line draws blank plot: plot(c(0.5,2.5), range(sales), xlab='Width', ylab='Sales', type='n', axes=F) axis(1, at=1:2, labels=c('regular','wide')); axis(2); box() lines(1:2, samp.means[1,], lty=1, type='b', pch=19) lines(1:2, samp.means[2,], lty=2, type='b', pch=3) lines(1:2, samp.means[3,], lty=3, type='b', pch=1) legend(x=1.5, y=60, c('bottom','middle','top'), lty=c(1,2,3), pch=c(19,3,1)) ### ###### ##################################################################################### # Residual plots for checking ANOVA model assumptions: # plotting residuals versus fitted values: plot(fitted.values(bakery.fit), residuals(bakery.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0) # A normal Q-Q plot of the residuals: qqnorm(residuals(bakery.fit)) ##################################################################################### # Formal Tests of Model Assumptions # For the Brown-Forsythe test when there are multiple factors, we use the B-F function # where the "group" variable contains all the treatments: ## A short function to perform the Brown-Forsythe test ## for equal variances: BF.test <- function(y, group) { group <- as.factor(group) # precautionary meds <- tapply(y, group, median) resp <- abs(y - meds[group]) anova(lm(resp ~ group))[1, 4:5] } # Implementing the function for our data: BF.test(sales, height:width) # Note that this isn't useful when there are so few observations # per treatment. The denominator of the F-statistic of the B-F test # is essentially 0. The code should work when there are more # observations per cell, though. # The Shapiro-Wilk test for normality is produced by: shapiro.test(resid(bakery.fit)) # The Shapiro-Wilk P-value is 0.0433. We can formally reject the normality # assumption. Possibly a transformation of the response variable is appropriate here.