# R Example of the Single-Factor ANOVA model # We will analyze the Kenton Foods data from the example in class # reading the file off the web: # (This may not work in some implementations of R) kenton.data <- read.table(file = url("http:/www.stat.sc.edu/~hitchcock/stat701/kentondata.txt"), header=FALSE, col.names = c('sales', 'design', 'store')) # Or, save the data file into a directory and # use the full path name: kenton.data <- read.table(file = "y:/My Documents/teaching/stat_701/kentondata.txt", header=FALSE, col.names = c('sales', 'design', 'store')) # attaching the data frame: attach(kenton.data) # Making "design" a factor: design <- factor(design) # Fitting the ANOVA model and producing the ANOVA table: kenton.fit <- lm(sales ~ design); anova(kenton.fit) # Note the F* = MSTR/MSE = 18.59 here. ##################################################################################### # Producing the "Main Effects Plot": plot(as.numeric(design), fitted.values(kenton.fit),type='p', main="Main Effects Plot, Kenton Data Set") lines(as.numeric(design), fitted.values(kenton.fit),lty=2) abline(h=mean(sales),lty=3) # 95% CIs for each individual population treatment mean: # (The output is repeated for each observation in the level) predict(kenton.fit, interval='confidence', level=0.95) ####################################################################################### # 95% CI for mu_3 - mu_4: lower.limit <- (mean(sales[design==3]) - mean(sales[design==4])) - qt(1 - .05/2, df = length(sales) - length(levels(design))) * sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) upper.limit <- (mean(sales[design==3]) - mean(sales[design==4])) + qt(1 - .05/2, df = length(sales) - length(levels(design))) * sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) print(c(lower.limit, upper.limit)) # P-value for two-sided test of H_0: mu_3 = mu_4: test.stat <- (mean(sales[design==3]) - mean(sales[design==4])) / sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) my.p.value <- 2*(min(pt(test.stat,df = length(sales) - length(levels(design)), lower.tail=F),pt(test.stat,df = length(sales) - length(levels(design)), lower.tail=T))) print(my.p.value) ####################################################################################### # Estimating and testing about contrasts: # Cartoon design versus non-cartoon design: contrasts(design) <- matrix(c(1/2, -1/2, 1/2, -1/2), nrow=4, ncol=1) summary(lm(sales ~ design)) # Look on line labeled 'design1' for P-value of t-test about contrast. # 95% CI for contrast: ll.contrast <- coef(summary(lm(sales ~ design)))[2,1]-qt(0.975,df=anova(kenton.fit)[2,1])*coef(summary(lm(sales ~ design)))[2,2] ul.contrast <- coef(summary(lm(sales ~ design)))[2,1]+qt(0.975,df=anova(kenton.fit)[2,1])*coef(summary(lm(sales ~ design)))[2,2] print(c(ll.contrast, ul.contrast)) ######################################################################################### # Tukey CIs for pairwise treatment mean differences: TukeyHSD(aov(kenton.fit),conf.level=0.90) # NOTE: The CIs which do NOT contain zero indicate the treatment means # that are significantly different at (here) the 0.10 family significance level. #################################################################################### # Residual plots for checking ANOVA model assumptions: # plotting residuals versus fitted values: plot(fitted.values(kenton.fit), residuals(kenton.fit), xlab="Fitted Values", ylab="Residuals"); abline(h=0) # A normal Q-Q plot of the residuals: qqnorm(residuals(kenton.fit)) ## A short function to perform the Brown-Forsythe test ## for equal variances across the populations: BF.test <- function(y, group) { group <- as.factor(group) # precautionary meds <- tapply(y, group, median) resp <- abs(y - meds[group]) anova(lm(resp ~ group))[1, 4:5] } # Implementing the function for our data: BF.test(sales, design)