############################################### ## Author: Joshua M. Tebbs ## Date: 11 December 2011 ## Update: 31 December 2017 ## STAT 509 course notes: R Code ## Chapter 11 ############################################### ############### # Example 11.1 ############### # Cheese data # Page 170-171 ## Find data web site ## Cut and paste the data set into a Notepad file ## Save this file (e.g., cheese.txt) to a directory that you know ## Use R to import the data from a directory on your computer cheese = read.table( "C:\\Users\\Tebbs\\Documents\\texfiles\\Classes\\USC\\stat509\\s18\\data\\cheese.txt",header=TRUE) # Define variables taste <- cheese[,1] acetic <- cheese[,2] h2s <- cheese[,3] lactic <- cheese[,4] # Page 174 # Fit the model fit = lm(taste~acetic+h2s+lactic) fit # Page 175 # Calculate fitted values and residuals fit = lm(taste~acetic+h2s+lactic) fitted.values = predict(fit) residuals = taste-fitted.values # Figure 11.1 # Page 179 # F(3,26) pdf with F statistic f = seq(0,20,0.01) pdf = df(f,3,26) plot(f,pdf,type="l",lty=1,xlab="F",ylab="PDF") abline(h=0) # Add points points(x=16.22,y=0,pch=4,cex=1.5) # Page 181 # R's ANOVA with sequential SS fit = lm(taste~acetic+h2s+lactic) anova(fit) # Page 182 # Different ordering fit.2 = lm(taste~h2s+lactic+acetic) anova(fit.2) # Page 184 # Confidence intervals for regression parameters fit = lm(taste~acetic+h2s+lactic) confint(fit,level=0.95) # Page 185 # Confidence/Prediction intervals fit = lm(taste~acetic+h2s+lactic) predict(fit,data.frame(acetic=5.5,h2s=6.0,lactic=1.4),level=0.95,interval="confidence") predict(fit,data.frame(acetic=5.5,h2s=6.0,lactic=1.4),level=0.95,interval="prediction") # Page 187 # Create normal qqplots for residuals # Need to install (then load) car package in R fit = lm(taste~acetic+h2s+lactic) qqPlot(residuals(fit),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16) # Page 189 # Create residual plot for cheese data fit plot(fitted(fit),residuals(fit),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) ############### # Example 11.2 ############### # Electricity data # Page 189 electricity = read.table( "C:\\Users\\Tebbs\\Documents\\texfiles\\Classes\\USC\\stat509\\s18\\data\\electricity.txt",header=TRUE) # Define variables monthly.usage = electricity[,1] peak.demand = electricity[,2] # Fit the model fit = lm(peak.demand~monthly.usage) fit # Page 190 # Plots were constructed separately # Scatterplot plot(monthly.usage,peak.demand,xlab="Monthly Usage (kWh)",ylab ="Peak Demand (kWh)",pch=16) abline(fit,lwd=2,col="red") # Residual plot plot(fitted(fit),residuals(fit),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Page 191 # Fit the transformed model # Square-root transformation fit.2 = lm(sqrt(peak.demand)~monthly.usage) fit.2 # Confidence intervals for regression parameters confint(fit.2,level=0.95) # Page 192 # Transformed model (square-root transformation) # Plots were constructed separately # Scatterplot plot(monthly.usage,sqrt(peak.demand),xlab="Monthly Usage (kWh)",ylab="Peak Demand (kWh): Square root scale",pch=16) abline(fit.2,lwd=2,col="red") # Residual plot plot(fitted(fit.2),residuals(fit.2),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Create normal qqplots for residuals (not shown in notes) # Need to install (then load) car package in R fit.2 = lm(sqrt(peak.demand)~monthly.usage) qqPlot(residuals(fit.2),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16) ############### # Example 11.3 ############### # Windmill data # Page 193 wind.velocity = c(5,6,3.4,2.7,10,9.7,9.55,3.05,8.15,6.2,2.9,6.35,4.6,5.8,7.4,3.6,7.85,8.8,7,5.45,9.1,10.2,4.1,3.95,2.45) DC.output = c(1.582,1.822,1.057,0.5,2.236,2.386,2.294,0.558,2.166,1.866,0.653,1.93,1.562,1.737,2.088,1.137,2.179,2.112,1.8,1.501,2.303,2.31,1.194,1.144,0.123) # Fit the model fit = lm(DC.output~wind.velocity) fit anova(fit) # Page 193 # Plots were constructed separately # Scatterplot with simple linear regression model fit plot(wind.velocity,DC.output,xlab="Wind Velocity (mph)",ylab="DC Output",pch=16) abline(fit,lwd=2,col="red") # Residual plot plot(fitted(fit),residuals(fit),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Page 194 # Fit quadratic model wind.velocity.sq = wind.velocity^2 fit.2 = lm(DC.output~wind.velocity+wind.velocity.sq) fit.2 # Page 195 # Plots were constructed separately # Scatterplot with quadratic regression model fit plot(wind.velocity,DC.output,xlab = "Wind Velocity (mph)",ylab = "DC Output",pch=16) curve(expr = fit.2$coefficients[1] + fit.2$coefficients[2]*x + fit.2$coefficients[3]*x^2, col = "red", lwd=2,add=TRUE) # Residual plot plot(fitted(fit.2),residuals(fit.2),pch=16,xlab="Fitted values",ylab="Residuals") abline(h=0,lty=2) # Page 195 # Confidence intervals for regression parameters fit.2 = lm(DC.output~wind.velocity+wind.velocity.sq) confint(fit.2,level=0.95) # Page 196 # Create normal qqplots for residuals # Need to install (then load) car package in R # Simple linear regression fit qqPlot(residuals(fit),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16) # Quadratic regression fit qqPlot(residuals(fit.2),distribution="norm",xlab="N(0,1) quantiles",ylab="Residuals",pch=16)