# R Example of investigating treatment differences # in the Single-Factor ANOVA model # We again analyze the Kenton Foods data from the example in class # Save the data file into a directory and # use the full path name: kenton.data <- read.table(file = "y:/My Documents/teaching/stat_705/kentondata.txt", header=FALSE, col.names = c('sales', 'design', 'store')) # Or simply reading it directly from the webpage: kenton.data <- read.table(file = "https://people.stat.sc.edu/hitchcock/kentondata.txt", header=FALSE, col.names = c('sales', 'design', 'store')) # attaching the data frame: attach(kenton.data) # Making "design" a factor: design <- factor(design) # Fitting the ANOVA model and producing the ANOVA table: kenton.fit <- lm(sales ~ design); anova(kenton.fit) # Note the F* = MSTR/MSE = 18.59 here. # The F* statistic in the ANOVA table is significant, so we conclude the # population means are significantly different. ##################################################################################### # Producing the "Bar Graph": barplot(height = tapply(sales, design, mean), names.arg = unique(as.numeric(design)), xlab = "Package Design", ylab = "Mean Sales") plot(as.numeric(design), fitted.values(kenton.fit),type='p', main="Main Effects Plot, Kenton Data Set") lines(as.numeric(design), fitted.values(kenton.fit),lty=2) abline(h=mean(sales),lty=3) # Producing the "Main Effects Plot": plot(as.numeric(design), fitted.values(kenton.fit),type='p', main="Main Effects Plot, Kenton Data Set") lines(as.numeric(design), fitted.values(kenton.fit),lty=2) abline(h=mean(sales),lty=3) # 95% CIs for each individual population treatment mean: # (The output is repeated for each observation in the level) predict(kenton.fit, interval='confidence', level=0.95) ####################################################################################### # 95% CI for mu_3 - mu_4: lower.limit <- (mean(sales[design==3]) - mean(sales[design==4])) - qt(1 - .05/2, df = length(sales) - length(levels(design))) * sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) upper.limit <- (mean(sales[design==3]) - mean(sales[design==4])) + qt(1 - .05/2, df = length(sales) - length(levels(design))) * sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) print(c(lower.limit, upper.limit)) # P-value for two-sided test of H_0: mu_3 = mu_4: test.stat <- (mean(sales[design==3]) - mean(sales[design==4])) / sqrt(summary(kenton.fit)$sigma^2*(1/length(sales[design==3])+1/length(sales[design==4]))) my.p.value <- 2*(min(pt(test.stat,df = length(sales) - length(levels(design)), lower.tail=F),pt(test.stat,df = length(sales) - length(levels(design)), lower.tail=T))) print(my.p.value) ####################################################################################### # Estimating and testing about contrasts: # Cartoon design versus non-cartoon design: contrasts(design) <- matrix(c(1/2, -1/2, 1/2, -1/2), nrow=4, ncol=1) summary(lm(sales ~ design)) # Look on line labeled 'design1' for P-value of t-test about contrast. # 95% CI for contrast: ll.contrast <- coef(summary(lm(sales ~ design)))[2,1]-qt(0.975,df=anova(kenton.fit)[2,1])*coef(summary(lm(sales ~ design)))[2,2] ul.contrast <- coef(summary(lm(sales ~ design)))[2,1]+qt(0.975,df=anova(kenton.fit)[2,1])*coef(summary(lm(sales ~ design)))[2,2] print(c(ll.contrast, ul.contrast)) ######################################################################################### # Tukey CIs for pairwise treatment mean differences: TukeyHSD(aov(kenton.fit),conf.level=0.90) # NOTE: The CIs which do NOT contain zero indicate the treatment means # that are significantly different at (here) the 0.10 family significance level. #################################################################################### # Testing Multiple Contrasts Simultaneously contrasts(design) <- matrix(c(1/2, -1/2, 1/2, -1/2), nrow=4, ncol=1) summary(lm(sales ~ design)) # Look on line labeled 'design1' for P-value of t-test about contrast. contrasts(design) <- matrix(c(1, 0, -1, 0), nrow=4, ncol=1) #Which is the better cartoon design? summary(lm(sales ~ design)) # Look on line labeled 'design1' for P-value of t-test about contrast. # The P-values from the INDIVIDUAL t-tests about these contrasts are 0.0464 and 0.0399, respectively. # Adjusting these for the multiple tests: p.adjust(p=c(.0464, .0399), method="bon") p.adjust(p=c(.0464, .0399), method="fdr") p.adjust(p=c(.0464, .0399), method="holm") # The 'bon' option performs the Bonferroni adjustment to the P-values. # This controls the familywise (experimentwise) error rate (FDR), which is the # probability of having at least one H0 rejected if all the H0's are true. # The 'fdr' option performs the Benjamini-Hochberg adjustment to the P-values. # This controls the false discovery rate (FDR), which is the expected proportion # of incorrectly rejected hypotheses among all rejected hypotheses: # The Bonferroni method is more conservative and will not detect significant differences # as often, ESPECIALLY when the number of simultaneous tests is fairly large. # For this reason, it has become popular to choose the control the FDR instead. # The 'holm' option is an adjustment to the Bonferroni method that still controls the # familywise error rate, but is slightly less conservative than the Bonferroni.