# R code to analyze the portrait studio data
# using multiple linear regression


# Save the data file into a directory and 
# use the full path name:

studio.data <- read.table(file = "z:/My Documents/teaching/stat_704/studiodata.txt", 
header=FALSE, col.names = c('people16', 'income', 'sales'))

# attaching the data frame:

attach(studio.data)

# fitting the regression model:

studio.reg <- lm(sales ~ people16 + income)

# getting the summary regression output:
# (This includes the overall F* = 99.1)

summary(studio.reg)

# getting the ANOVA table:

anova(studio.reg)

# Note that to get the "regression df" and SSR to reflect the SAS 
# ANOVA table, we must add the first two lines together.

# getting the fitted values:

fitted(studio.reg)

# getting the residuals:

resid(studio.reg)

# Bonferroni and Holm adjustments to the t-test P-values if doing simultaneous tests:

raw.ps <- c(.0001, .0333)
p.adjust(raw.ps, method="bonferroni")
p.adjust(raw.ps, method="holm")

# getting the 95% confidence interval for the mean at X1=65.4 and X2=17.6:

xh.values <- data.frame(cbind(people16 = 65.4, income = 17.6))

predict(studio.reg, xh.values, interval="confidence", level=0.95)

# getting the 95% prediction interval for a new response at X1=65.4 and X2=17.6:

predict(studio.reg, xh.values, interval="prediction", level=0.95)

#### Residual plots and other plots ####

# producing the scatter plot matrix

pairs(studio.data)

# residual plot (against fitted values)

plot(fitted(studio.reg), resid(studio.reg), ylab="Residuals", xlab="Fitted Values"); abline(h=0)

# Q-Q plot of residuals

qqnorm(resid(studio.reg))

## Formal tests for the error assumptions:

# Breusch-Pagan test for nonconstant error variance

# You MUST (Under Packages -> "Install Packages from CRAN") install the 
# lmtest package first

library(lmtest); bptest(sales ~ people16 + income)

# The Shapiro-Wilk test for normality is produced by:

shapiro.test(resid(studio.reg))

# We see a test statistic of 0.954 and a P-value of 0.4056.
# So the hypothesis of normal errors is reasonable.