################################################################################## 

# Further Investigation of Significant Factor Effects:   

# We use the Castle Bakery data found in Chapter 19  
# and used for the example from class.               

# Save the data file into a directory and 
# use the full path name:

bakery.data <- read.table(file = "z:/My Documents/teaching/stat_705/bakerydata.txt", 
header=FALSE, col.names = c('sales', 'height', 'width', 'store'))

# attaching the data frame:

attach(bakery.data)

# Making "height" and "width" factors:

height <- factor(height)
width <- factor(width)

# Fitting the ANOVA model and producing the ANOVA table:

bakery.fit <- lm(sales ~ height + width + height:width);
anova(bakery.fit)

# We saw earlier there was NO significant interaction between height and width: 

# Investigating particular differences among factor level means  

# Here, we get 95% confidence  
# intervals for each population factor level mean. 

# Sample mean sales for each level of height:
height.means <- tapply(sales, height, mean)

# Sample mean sales for each level of width:
width.means <- tapply(sales, width, mean)

alpha <- 0.05

Error.df <- anova(bakery.fit)["Residuals","Df"]
my.MSE <- anova(bakery.fit)["Residuals","Mean Sq"]
my.n <- 1 + Error.df/(length(height.means)*length(width.means))

lower.limit.height <- (height.means - qt(1 - alpha/2, df = Error.df) * sqrt(my.MSE/(length(height.means)*my.n)))

upper.limit.height <- (height.means + qt(1 - alpha/2, df = Error.df) * sqrt(my.MSE/(length(height.means)*my.n)))

print("Individual CIs for the population mean for each level of height:")
print(cbind(levels(height), height.means, lower.limit.height, upper.limit.height))    

lower.limit.width <- (width.means - qt(1 - alpha/2, df = Error.df) * sqrt(my.MSE/(length(width.means)*my.n)))

upper.limit.width <- (width.means + qt(1 - alpha/2, df = Error.df) * sqrt(my.MSE/(length(width.means)*my.n)))

print("Individual CIs for the population mean for each level of width:")
print(cbind(levels(width), width.means, lower.limit.width, upper.limit.width))    
            

# ###########################################################################**  

# Contrasts:  CIs and Hypothesis Tests  

# Example:  We want a 95% CI for the difference in the mean sales of the  
# middle height and the mean sales of the other heights.		   

# The relevant contrast here is:  -(1/2)mu_1-dot + mu_2-dot - (1/2)mu_3-dot           

contrasts(height) <- matrix(c(-1/2, 1, -1/2), nrow=3, ncol=1)

bakery.fit.summ <- summary(lm(sales ~ height + width + height:width)); bakery.fit.summ
# Look on line labeled 'height1' for P-value of t-test about contrast.

# 95% CI for contrast:

ll.contrast <- coef(bakery.fit.summ)[2,1]-qt(0.975,df=anova(bakery.fit)["Residuals","Df"])*coef(bakery.fit.summ)[2,2]

ul.contrast <- coef(bakery.fit.summ)[2,1]+qt(0.975,df=anova(bakery.fit)["Residuals","Df"])*coef(bakery.fit.summ)[2,2]

print(c(ll.contrast, ul.contrast))

# For some reason this seems to give the wrong P-value and CI.
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# Here we try it the long way:

my.b <- length(width.means)

L.hat <- as.numeric(c(-1/2, 1, -1/2) %*% height.means)

se.L.hat <- sqrt( (my.MSE/(my.b*my.n))*sum((c(-1/2, 1, -1/2))^2) )

t.star <- L.hat/se.L.hat

my.P.value <- 2*pt(t.star, df=anova(bakery.fit)["Residuals","Df"], lower.tail=FALSE)
print(paste("P-value is ", round(my.P.value,4)))

ll.contrast.long <- L.hat-qt(0.975,df=anova(bakery.fit)["Residuals","Df"])*se.L.hat

ul.contrast.long <- L.hat+qt(0.975,df=anova(bakery.fit)["Residuals","Df"])*se.L.hat

print(c(ll.contrast.long, ul.contrast.long))

# That matches what SAS gives.

# ###########################################################################**  

# Multiple Comparison Procedures  

#########################################################################################

# Tukey CIs for pairwise treatment mean differences:

TukeyHSD(aov(bakery.fit),conf.level=0.95)

# NOTE:  The CIs which do NOT contain zero indicate the treatment means 
# that are significantly different at (here) the 0.05 family significance level.

####################################################################################