# R Example of the Regression Approach to the ANOVA model # We will analyze the Kenton Foods data from the example in class # The response variable is sales and the factor is package design. # The store label is also given in the data set. #### With the Factor-Effects Model #### # Save the data file into a directory and # use the full path name: kenton.data <- read.table(file = "z:/My Documents/teaching/stat_705/kentondata.txt", header=FALSE, col.names = c('sales', 'design', 'store')) # attaching the data frame: attach(kenton.data) # Making "design" a factor: design <- factor(design) # Defining r-1 = 3 indicator variables: x1 <- ifelse(design==1,1,0) - ifelse(design==4,1,0) # because r=4 here x2 <- ifelse(design==2,1,0) - ifelse(design==4,1,0) # because r=4 here x3 <- ifelse(design==3,1,0) - ifelse(design==4,1,0) # because r=4 here # Fitting this ANOVA model via a regression approach with lm(): kenton.reg.fe <- lm(sales ~ x1 + x2 + x3) summary(kenton.reg.fe) # Note the estimates for the regression parameters are: # 18.675 (estimate of mu_dot), -4.075 (estimate of tau_1) # -5.275 (estimate of tau_2), 0.825 (estimate of tau_3) # Estimate of tau_4 is thus -(-4.075)-(-5.275)-0.825. # Estimates of the Factor Level Means: # For Design 1: 18.675 - 4.075 = 14.6. # For Design 2: 18.675 - 5.275 = 13.4. # For Design 3: 18.675 + 0.825 = 19.5. # For Design 4: 18.675-(-4.075)-(-5.275)-0.825 = 27.2. # Verify that these are the same estimates we obtained # with the other approach. #### With the Cell-Means Model #### # Save the data file into a directory and # use the full path name: kenton.data <- read.table(file = "z:/My Documents/teaching/stat_701/kentondata.txt", header=FALSE, col.names = c('sales', 'design', 'store')) # attaching the data frame: attach(kenton.data) # Making "design" a factor: design <- factor(design) # Defining r = 4 indicator variables: x1 <- ifelse(design==1, 1, 0) x2 <- ifelse(design==2, 1, 0) x3 <- ifelse(design==3, 1, 0) x4 <- ifelse(design==4, 1, 0) # Fitting this ANOVA model via a regression approach with lm(): # We must specify a no-intercept fit with the inclusion of "0" # after the ~. kenton.reg.cm <- lm(sales ~ 0 + x1 + x2 + x3 + x4) summary(kenton.reg.cm) # Note the estimates for the regression parameters are: # 14.6 (estimate of mu_1), 13.4 (estimate of mu_2), # 19.5 (estimate of mu_3), 27.2 (estimate of mu_4). # These are exactly the estimates of the factor level means.