############################################### ## Author: Joshua M. Tebbs ## Date: 24 October 2025 ## Update: 2 November 2025 ## STAT 509 course notes: R Code Chapter 11 ############################################### # Use the following command as necessary par(mar=c(4,4.5,4,3.5)) # adjust plotting region to avoid cutting off vertical axis label # Install and load car R package # See https://cran.r-project.org/web/packages/car/car.pdf for documentation # Example 11.1 # Page 192-212 # Enter data energy = c(947,1407,1452,1553,989,1162,1466,1656,1254,1336,1097,1266,1401,1223,1216,1334,1155,1453,1278, 1153,1225,1237,1327,1229,1205,1221,1138,1295,1391,1372) plastic = c(18.69,19.43,19.24,22.64,16.54,21.44,19.53,23.97,21.45,20.34,17.03,21.03,20.49,20.45,18.81, 18.28,21.41,25.11,21.04,17.99,18.73,18.49,22.08,14.28,17.74,20.54,18.25,19.09,21.25,21.62) paper = c(15.65,23.51,24.23,22.20,23.56,23.65,24.45,19.39,23.84,26.50,23.46,26.99,19.87,23.03,22.62,21.87, 20.47,22.59,26.27,28.22,29.39,26.58,24.88,26.27,23.61,26.58,13.77,25.62,20.63,22.71) garbage = c(45.01,39.69,43.16,35.76,41.20,35.56,40.18,44.11,35.41,34.21,32.45,38.19,41.35,43.59,42.20,41.50, 41.20,37.02,38.66,44.18,34.77,37.55,37.07,35.80,37.36,35.40,51.32,39.54,40.72,36.22) moisture = c(58.21,46.31,46.63,45.85,55.14,54.24,47.20,43.82,51.01,49.06,53.23,51.78,46.69,53.57,52.98,47.44, 54.68,48.74,53.22,53.37,51.06,50.66,50.72,48.24,49.92,53.58,51.38,50.13,48.67,48.19) # Calculate least squares estimate # Page 196 fit = lm(energy ~ plastic + paper + garbage + moisture) # ANOVA table # Page 199 Model = cbind(plastic,paper,garbage,moisture) fit = lm(energy ~ Model) anova(fit) # Figure 11.1 # Page 200 f = seq(0,8,0.01) plot(f,df(f,4,25),type="l",lty=1,xlab="F",ylab="F(4,25) pdf",cex.lab=1.25) abline(h=0) # Sequential sums of squares # Page 202 fit = lm(energy ~ plastic + paper + garbage + moisture) anova(fit) # Sequential sums of squares (different ordering) # Page 204 fit = lm(energy ~ garbage + moisture + paper + plastic) anova(fit) # Confidence intervals for individual regression parameters # Page 206 fit = lm(energy ~ plastic + paper + garbage + moisture) confint(fit,level=0.95) # Confidence interval for E(Y) and prediction interval for Y^* # Page 209-210 fit = lm(energy ~ plastic + paper + garbage + moisture) x.0 = data.frame(plastic=20,paper=25,garbage=40,moisture=50) predict(fit,x.0,conf.level=0.95,interval="confidence") predict(fit,x.0,conf.level=0.95,interval="prediction") # Figure 11.2 # Page 211 fit = lm(energy ~ plastic + paper + garbage + moisture) resid = resid(fit) library(car) qqPlot(resid,distribution="norm",mean=mean(resid),sd=sd(resid), xlab="Normal quantiles",ylab="Residuals",pch=16, envelope=list(border=TRUE,style="lines"),id=FALSE) # Figure 11.3 # Page 212 fit = lm(energy ~ plastic + paper + garbage + moisture) plot(fitted(fit),resid(fit),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) #################################################################################################### #################################################################################################### # Example 11.2 # Page 213-216 # Import data into R electricity = read.table( "C:\\Users\\Tebbs\\Documents\\texfiles\\Classes\\USC\\stat509\\f25\\data\\electricity.txt",header=TRUE) # Define variables monthly.usage = electricity[,1] peak.demand = electricity[,2] # Calculate least squares estimate # Page 213 fit = lm(peak.demand ~ monthly.usage) fit # Figure 11.4 # Page 214 # Scatterplot with least-squares line superimposed plot(monthly.usage,peak.demand,xlab="Monthly usage (in kWh)",ylab ="Peak demand (in kWh)", xlim=c(0,max(monthly.usage)),pch=16) abline(fit,lwd=2,col="red") # Residual plot plot(fitted(fit),resid(fit),pch=16,xlab="Fitted values",ylab="Residuals",ylim=c(-4,4)) abline(h=0,lty=2) # Fit the transformed model # Page 214 sqrt.peak.demand = sqrt(peak.demand) fit.2 = lm(sqrt.peak.demand ~ monthly.usage) fit.2 # Figure 11.5 # Page 215 # Scatterplot with least-squares line superimposed (transformed model) plot(monthly.usage,sqrt.peak.demand,xlab="Monthly usage (in kWh)",ylab=expression("Peak demand (in"~sqrt(kWh)~")"), xlim=c(0,max(monthly.usage)),pch=16) abline(fit.2,lwd=2,col="red") # Residual plot plot(fitted(fit.2),resid(fit.2),pch=16,xlab="Fitted values",ylab="Residuals",ylim=c(-1.5,1)) abline(h=0,lty=2) # Figure 11.6 # Page 216 fit.2 = lm(sqrt.peak.demand ~ monthly.usage) resid = resid(fit.2) library(car) qqPlot(resid,distribution="norm",mean=mean(resid),sd=sd(resid), xlab="Normal quantiles",ylab="Residuals",pch=16, envelope=list(border=TRUE,style="lines"),id=FALSE) # Confidence intervals for individual regression parameters # Page 216 confint(fit.2,conf.level=0.95) #################################################################################################### #################################################################################################### # Example 11.3 # Page 217-219 # Enter data velocity = c(5,6,3.4,2.7,10,9.7,9.55,3.05,8.15,6.2,2.9,6.35,4.6,5.8,7.4,3.6,7.85,8.8,7,5.45,9.1,10.2,4.1,3.95,2.45) DC.output = c(1.582,1.822,1.057,0.5,2.236,2.386,2.294,0.558,2.166,1.866,0.653,1.93,1.562,1.737,2.088,1.137,2.179, 2.112,1.8,1.501,2.303,2.31,1.194,1.144,0.123) # ANOVA table # Page 217 fit = lm(DC.output ~ velocity) anova(fit) # Figure 11.7 # Page 217 # Scatterplot with least-squares line superimposed plot(velocity,DC.output,xlab="Wind velocity (in mph)",ylab="DC output (in kwh)",pch=16) abline(fit,lwd=2,col="red") # Residual plot plot(fitted(fit),residuals(fit),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Calculate least squares estimate for quadratic model # Page 218 velocity.sq = velocity^2 fit.2 = lm(DC.output ~ velocity + velocity.sq) fit.2 # Figure 11.8 # Page 219 # Scatterplot with least-squares quadratic fit superimposed plot(velocity,DC.output,xlab = "Wind velocity (in mph)",ylab = "DC output (in kwh)",pch=16) curve(expr = fit.2$coefficients[1] + fit.2$coefficients[2]*x + fit.2$coefficients[3]*x^2, col="red",lwd=2,add=TRUE) # Residual plot plot(fitted(fit.2),residuals(fit.2),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Confidence intervals for individual regression parameters # Page 219 confint(fit.2,conf.level=0.95)