###############################################
## Author: Joshua M. Tebbs
## Date: 14 October 2021
## Update: 22 November 2021
## STAT 513 course notes: R Code
## Chapter 12
###############################################

# Figure 12.1
# Page 102
plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)",
	xlim=c(600,1110),ylim=c(30,70),pch=16)

# Exercise data
time = c(918,805,891,962,968,907,770,743,1045,810,927,813,858,860,760,747,
	743,803,683,844,755,700,748,775)
max.O2 = c(42.33,53.10,42.08,50.06,42.45,42.46,47.82,49.92,36.23,49.66,41.49,
	46.17,46.18,43.21,51.81,53.28,53.29,47.18,56.91,47.80,48.65,53.67,60.62,56.76)


# Figure 12.2
# Page 115
fit = lm(max.O2 ~ time)
plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)",
	xlim=c(600,1110),ylim=c(30,70),pch=16)
abline(fit,col="red")


# Figure 12.3
# Page 117
x = seq(-8,8,0.001)
pdf = dt(x,22)
plot(x,pdf,type="l",lty=1,xlab="",ylab="",xaxp=c(-8,8,10),ylim=c(0,0.4))
abline(h=0)
points(x=-6.525,y=0,pch=19,cex=1.5)


# Figure 12.4
# Page 118
fit = lm(max.O2~time)
x.star = seq(600,1100,1)
conf = predict(fit,data.frame(time=x.star),level=0.95,interval="confidence")
pred = predict(fit,data.frame(time=x.star),level=0.95,interval="prediction")
plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)",
	xlim=c(600,1100),ylim=c(30,70),pch=16)
abline(fit,lwd=2,col="red")
lines(x.star,conf[,2],type="l",lty=2)
lines(x.star,conf[,3],type="l",lty=2)
lines(x.star,pred[,2],type="l",lty=4)
lines(x.star,pred[,3],type="l",lty=4)
# Add legend
legend(910,67.5,lty = c(2,4), 
	c(expression(paste("95% confidence")),expression(paste("95% prediction"))))


# Example 12.2
# Page 127
# Waste data
energy = c(947,1407,1452,1553,989,1162,1466,1656,1254,1336,1097,1266,1401,1223,1216,1334,1155,1453,1278, 
	1153,1225,1237,1327,1229,1205,1221,1138,1295,1391,1372)
intercept = rep(1,30)
plastic = c(18.69,19.43,19.24,22.64,16.54,21.44,19.53,23.97,21.45,20.34,17.03,21.03,20.49,20.45,18.81,
	18.28,21.41,25.11,21.04,17.99,18.73,18.49,22.08,14.28,17.74,20.54,18.25,19.09,21.25,21.62)
paper = c(15.65,23.51,24.23,22.20,23.56,23.65,24.45,19.39,23.84,26.50,23.46,26.99,19.87,23.03,22.62,21.87,
	20.47,22.59,26.27,28.22,29.39,26.58,24.88,26.27,23.61,26.58,13.77,25.62,20.63,22.71)
garbage = c(45.01,39.69,43.16,35.76,41.20,35.56,40.18,44.11,35.41,34.21,32.45,38.19,41.35,43.59,42.20,41.50,
	41.20,37.02,38.66,44.18,34.77,37.55,37.07,35.80,37.36,35.40,51.32,39.54,40.72,36.22)
moisture = c(58.21,46.31,46.63,45.85,55.14,54.24,47.20,43.82,51.01,49.06,53.23,51.78,46.69,53.57,52.98,47.44,
	54.68,48.74,53.22,53.37,51.06,50.66,50.72,48.24,49.92,53.58,51.38,50.13,48.67,48.19)

# Page 128
X = cbind(intercept,plastic,paper,garbage,moisture) # creates design matrix

# Page 130-131
X = cbind(intercept,plastic,paper,garbage,moisture) # creates design matrix
Y = energy # response
t(X)%*%X # X'X matrix
solve(t(X)%*%X) # X'X inverse
t(X)%*%Y # X'Y
solve(t(X)%*%X)%*%t(X)%*%Y # least squares estimate
fit = lm(energy~plastic+paper+garbage+moisture) # R code to calculate least squares estimate (using lm function)

# Page 143-146
fit = lm(energy~plastic+paper+garbage+moisture)
summary(fit)
X = cbind(intercept,plastic,paper,garbage,moisture) # design matrix
Y = energy # response
I = diag(30) # 30 by 30 identity matrix
H = X%*%solve(t(X)%*%X)%*%t(X) # hat matrix
SSE = t(Y)%*%(I-H)%*%Y # residual sum of squares
Y.hat = H%*%Y # vector of fitted values
e = (I-H)%*%Y # vector of residuals
# Confidence interval
predict(fit,data.frame
    (plastic=mean(plastic),paper=mean(paper),garbage=mean(garbage),moisture=mean(moisture)),
	level=0.95,interval="confidence")
# Prediction interval
predict(fit,data.frame
    (plastic=mean(plastic),paper=mean(paper),garbage=mean(garbage),moisture=mean(moisture)),
	level=0.95,interval="prediction")

# Page 153-157
# Sequential SS
fit = lm(energy~plastic+paper+garbage+moisture)
anova(fit)
fit = lm(energy~garbage+moisture+paper+plastic)
anova(fit)


# Figure 12.5
# Page 154
x = seq(0,6,0.001)
pdf = df(x,4,25)
plot(x,pdf,type="l",lty=1,xlab="F",ylab="Sampling distribution")
abline(h=0)
x = seq(qf(0.95,4,25),6,0.001)
y = df(x,4,25)
polygon(c(qf(0.95,4,25),x,6),c(0,y,0),col="lightblue")
points(x=qf(0.95,4,25),y=0,pch=19,cex=1.5)