# R code to analyze the portrait studio data # using multiple linear regression # Save the data file into a directory and # use the full path name: studio.data <- read.table(file = "z:/My Documents/teaching/stat_704/studiodata.txt", header=FALSE, col.names = c('people16', 'income', 'sales')) # attaching the data frame: attach(studio.data) # fitting the regression model: studio.reg <- lm(sales ~ people16 + income) # getting the summary regression output: # (This includes the overall F* = 99.1) summary(studio.reg) # getting the ANOVA table: anova(studio.reg) # Note that to get the "regression df" and SSR to reflect the SAS # ANOVA table, we must add the first two lines together. # getting the fitted values: fitted(studio.reg) # getting the residuals: resid(studio.reg) # Bonferroni and Holm adjustments to the t-test P-values if doing simultaneous tests: raw.ps <- c(.0001, .0333) p.adjust(raw.ps, method="bonferroni") p.adjust(raw.ps, method="holm") # getting the 95% confidence interval for the mean at X1=65.4 and X2=17.6: xh.values <- data.frame(cbind(people16 = 65.4, income = 17.6)) predict(studio.reg, xh.values, interval="confidence", level=0.95) # getting the 95% prediction interval for a new response at X1=65.4 and X2=17.6: predict(studio.reg, xh.values, interval="prediction", level=0.95) #### Residual plots and other plots #### # producing the scatter plot matrix pairs(studio.data) # residual plot (against fitted values) plot(fitted(studio.reg), resid(studio.reg), ylab="Residuals", xlab="Fitted Values"); abline(h=0) # Q-Q plot of residuals qqnorm(resid(studio.reg)) ## Formal tests for the error assumptions: # Breusch-Pagan test for nonconstant error variance # You MUST (Under Packages -> "Install Packages from CRAN") install the # lmtest package first library(lmtest); bptest(sales ~ people16 + income) # The Shapiro-Wilk test for normality is produced by: shapiro.test(resid(studio.reg)) # We see a test statistic of 0.954 and a P-value of 0.4056. # So the hypothesis of normal errors is reasonable.