# STAT_516_Lec_05.R link <- url("https://people.stat.sc.edu/gregorkb/data/KNNLrust.txt") rust <- read.csv(link,col.names=c("score","brand","rep"),sep = "", header = FALSE) head(rust) boxplot(score ~ brand, data = rust) aggregate(score ~ brand, mean,data = rust) aggregate(score ~ brand, sd,data = rust) aggregate(score ~ brand, length ,data = rust) lm_out <- lm(score ~ as.factor(brand), data = rust) summary(lm_out) Y <- rust$score Yhat <- lm_out$fitted.values ehat <- Y - Yhat N <- length(Y) a <- 4 n <- 10 sigma_hat <- sqrt(sum(ehat**2)/(N-a)) sigma_hat # build ANOVA table SStot <- sum((Y - mean(Y))**2) SSerror <- sum( ehat**2) SStrt <- SStot - SSerror MStrt <- SStrt / (a - 1) MSerror <- SSerror / (N - a) Ftest <- MStrt / MSerror alpha <- 0.05 Fcrit <- qf(1 - alpha, a-1,N-a) Fcrit pval <- 1 - pf(Ftest,a-1, N-a) # also get the table from anova(lm_out) # compare all pairs of means (naively -- without # adjusting for multiple comparisons) # for illustration just compare groups 1 and 2 Y1. <- mean(rust$score[rust$brand==1]) Y2. <- mean(rust$score[rust$brand==2]) Y3. <- mean(rust$score[rust$brand==3]) Y4. <- mean(rust$score[rust$brand==4]) sigma_hat <- sqrt(MSerror) tval <- qt(1-alpha/2,a*(n-1)) lo <- Y2. - Y1. - tval*sigma_hat *sqrt(2/n) up <- Y2. - Y1. + tval*sigma_hat *sqrt(2/n) c(lo,up) # compare group 4 to group 1 naively... lo <- Y4. - Y1. - tval*sigma_hat *sqrt(2/n) up <- Y4. - Y1. + tval*sigma_hat *sqrt(2/n) c(lo,up) tval*sqrt(2) # Tukey's version (get a wider interval): lo <- Y2. - Y1. - 3.85*sigma_hat *sqrt(1/n) up <- Y2. - Y1. + 3.85*sigma_hat *sqrt(1/n) c(lo,up) # get Tukey intervals automatically: aov_out <- aov(lm_out) TukeyHSD(aov_out) # HSD: honest significant difference plot(TukeyHSD(aov_out)) # Suppose we are using Dunnett's to compare all brands with # brand 1 as the baseline: lo <- Y2. - Y1. - 2.47*sigma_hat *sqrt(2/n) up <- Y2. - Y1. + 2.47*sigma_hat *sqrt(2/n) c(lo,up) library(DescTools) Dunnett_out <- DunnettTest(score ~ as.factor(brand), data = rust, control="1") plot(Dunnett_out) # Bonferroni (always valid) for all pairs B <- 6 # total number pairs tvalB <- qt(1-(alpha/B)/2,a*(n-1)) lo <- Y2. - Y1. - tvalB*sigma_hat *sqrt(2/n) up <- Y2. - Y1. + tvalB*sigma_hat *sqrt(2/n) c(lo,up)