############################################### ## Author: Joshua M. Tebbs ## Date: 14 October 2021 ## Update: 22 November 2021 ## STAT 513 course notes: R Code ## Chapter 12 ############################################### # Figure 12.1 # Page 102 plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)", xlim=c(600,1110),ylim=c(30,70),pch=16) # Exercise data time = c(918,805,891,962,968,907,770,743,1045,810,927,813,858,860,760,747, 743,803,683,844,755,700,748,775) max.O2 = c(42.33,53.10,42.08,50.06,42.45,42.46,47.82,49.92,36.23,49.66,41.49, 46.17,46.18,43.21,51.81,53.28,53.29,47.18,56.91,47.80,48.65,53.67,60.62,56.76) # Figure 12.2 # Page 115 fit = lm(max.O2 ~ time) plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)", xlim=c(600,1110),ylim=c(30,70),pch=16) abline(fit,col="red") # Figure 12.3 # Page 117 x = seq(-8,8,0.001) pdf = dt(x,22) plot(x,pdf,type="l",lty=1,xlab="",ylab="",xaxp=c(-8,8,10),ylim=c(0,0.4)) abline(h=0) points(x=-6.525,y=0,pch=19,cex=1.5) # Figure 12.4 # Page 118 fit = lm(max.O2~time) x.star = seq(600,1100,1) conf = predict(fit,data.frame(time=x.star),level=0.95,interval="confidence") pred = predict(fit,data.frame(time=x.star),level=0.95,interval="prediction") plot(time,max.O2,xlab="Time (in seconds)",ylab="Maximum oxygen uptake (mL/kg/min)", xlim=c(600,1100),ylim=c(30,70),pch=16) abline(fit,lwd=2,col="red") lines(x.star,conf[,2],type="l",lty=2) lines(x.star,conf[,3],type="l",lty=2) lines(x.star,pred[,2],type="l",lty=4) lines(x.star,pred[,3],type="l",lty=4) # Add legend legend(910,67.5,lty = c(2,4), c(expression(paste("95% confidence")),expression(paste("95% prediction")))) # Example 12.2 # Page 127 # Waste data energy = c(947,1407,1452,1553,989,1162,1466,1656,1254,1336,1097,1266,1401,1223,1216,1334,1155,1453,1278, 1153,1225,1237,1327,1229,1205,1221,1138,1295,1391,1372) intercept = rep(1,30) plastic = c(18.69,19.43,19.24,22.64,16.54,21.44,19.53,23.97,21.45,20.34,17.03,21.03,20.49,20.45,18.81, 18.28,21.41,25.11,21.04,17.99,18.73,18.49,22.08,14.28,17.74,20.54,18.25,19.09,21.25,21.62) paper = c(15.65,23.51,24.23,22.20,23.56,23.65,24.45,19.39,23.84,26.50,23.46,26.99,19.87,23.03,22.62,21.87, 20.47,22.59,26.27,28.22,29.39,26.58,24.88,26.27,23.61,26.58,13.77,25.62,20.63,22.71) garbage = c(45.01,39.69,43.16,35.76,41.20,35.56,40.18,44.11,35.41,34.21,32.45,38.19,41.35,43.59,42.20,41.50, 41.20,37.02,38.66,44.18,34.77,37.55,37.07,35.80,37.36,35.40,51.32,39.54,40.72,36.22) moisture = c(58.21,46.31,46.63,45.85,55.14,54.24,47.20,43.82,51.01,49.06,53.23,51.78,46.69,53.57,52.98,47.44, 54.68,48.74,53.22,53.37,51.06,50.66,50.72,48.24,49.92,53.58,51.38,50.13,48.67,48.19) # Page 128 X = cbind(intercept,plastic,paper,garbage,moisture) # creates design matrix # Page 130-131 X = cbind(intercept,plastic,paper,garbage,moisture) # creates design matrix Y = energy # response t(X)%*%X # X'X matrix solve(t(X)%*%X) # X'X inverse t(X)%*%Y # X'Y solve(t(X)%*%X)%*%t(X)%*%Y # least squares estimate fit = lm(energy~plastic+paper+garbage+moisture) # R code to calculate least squares estimate (using lm function) # Page 143-146 fit = lm(energy~plastic+paper+garbage+moisture) summary(fit) X = cbind(intercept,plastic,paper,garbage,moisture) # design matrix Y = energy # response I = diag(30) # 30 by 30 identity matrix H = X%*%solve(t(X)%*%X)%*%t(X) # hat matrix SSE = t(Y)%*%(I-H)%*%Y # residual sum of squares Y.hat = H%*%Y # vector of fitted values e = (I-H)%*%Y # vector of residuals # Confidence interval predict(fit,data.frame (plastic=mean(plastic),paper=mean(paper),garbage=mean(garbage),moisture=mean(moisture)), level=0.95,interval="confidence") # Prediction interval predict(fit,data.frame (plastic=mean(plastic),paper=mean(paper),garbage=mean(garbage),moisture=mean(moisture)), level=0.95,interval="prediction") # Page 153-157 # Sequential SS fit = lm(energy~plastic+paper+garbage+moisture) anova(fit) fit = lm(energy~garbage+moisture+paper+plastic) anova(fit) # Figure 12.5 # Page 154 x = seq(0,6,0.001) pdf = df(x,4,25) plot(x,pdf,type="l",lty=1,xlab="F",ylab="Sampling distribution") abline(h=0) x = seq(qf(0.95,4,25),6,0.001) y = df(x,4,25) polygon(c(qf(0.95,4,25),x,6),c(0,y,0),col="lightblue") points(x=qf(0.95,4,25),y=0,pch=19,cex=1.5)