# This example shows the analysis for the Latin Square experiment
# using the productivity data example we looked at in class

# Entering the data and defining the variables: 

##########
##
# Reading the data into R:

my.datafile <- tempfile()
cat(file=my.datafile, "
 1 A 1 1 6.3 
 2 B 1 2 9.8 
 3 C 1 3 14.3 
 4 D 1 4 12.3 
 5 E 1 5 9.1 
 6 B 2 1 7.7 
 7 C 2 2 13.5 
 8 D 2 3 13.4 
 9 E 2 4 12.6 
 10 A 2 5 9.9 
 11 C 3 1 11.7 
 12 D 3 2 10.7 
 13 E 3 3 13.8 
 14 A 3 4 9.0 
 15 B 3 5 10.3 
 16 D 4 1 9.0 
 17 E 4 2 10.5 
 18 A 4 3 9.3 
 19 B 4 4 9.8 
 20 C 4 5 12.0 
 21 E 5 1 4.5 
 22 A 5 2 5.3 
 23 B 5 3 8.4 
 24 C 5 4 9.6 
 25 D 5 5 11.0
", sep=" ")

options(scipen=999) # suppressing scientific notation

musicprod <- read.table(my.datafile, header=FALSE, col.names=c("OBS", "MUSIC", "DAY", "TIME", "PRODUCT")) 
  
# Note we could also save the data columns into a file and use a command such as:
# musicprod <- read.table(file = "z:/stat_516/filename.txt", header=FALSE, col.names = c("OBS", "MUSIC", "DAY", "TIME", "PRODUCT"))

attach(musicprod)

# The data frame called musicprod is now created, 
# with five variables, OBS, MUSIC, DAY, TIME, and PRODUCT.
##
#########

############################################################################

# lm() and anova() will do a standard analysis of variance                        
# We specify our (qualitative) factors with the factor() function: 
                                    
# Making MUSIC, DAY, TIME factors:

MUSIC <- factor(MUSIC)
DAY <- factor(DAY)
TIME <- factor(TIME)

# The lm statement specifies that PRODUCT is the response                  
# and MUSIC, DAY, TIME are the factors
# MUSIC is the treatment factor here, and TIME and DAY are the row and column factors.
# The ANOVA table is produced by the anova() function         

musicprod.fit <- lm(PRODUCT ~ MUSIC + DAY + TIME);
anova(musicprod.fit)

# From the F-tests and their P-values, there is a significant effect of music type    
# on mean productivity.  We also see a significant row (TIME) effect and column (DAY) 
# effect. 

############################################################################ 

# The sample mean productivity values for each music type, listed from smallest to largest:

sort( tapply(PRODUCT, MUSIC, mean) )

# Now, which of these means are significantly different?

# Tukey's procedure tells us which pairs of music types are significantly    
# different:                                                                        

# Tukey CIs for pairwise treatment mean differences:

TukeyHSD(aov(musicprod.fit),conf.level=0.95)$MUSIC

# NOTE:  The CIs which do NOT contain zero indicate the treatment means 
# that are significantly different at (here) the 0.05 experimentwise significance level.