############################################### ## Author: Joshua M. Tebbs ## Date: 27 July 2016 ## Update: 25 March 2024 ## STAT 110 course notes: R Code Chapter 15 ############################################### # Figure 15.1 # Page 154 # Hypertension data # Enter the data age = c(39,47,45,47,65,46,67,42,67,56,64,56,59,34,42,48,45,17,20,19,36,50,39,21,44,53,63,29,25,69) SBP = c(144,220,138,145,162,142,170,124,158,154,162,150,140,110,128,130,135,114,116,124,136,142,120,120,160,158,144,130,125,175) plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16) # Figure 15.2 # Page 155 # Calculate intercept (a) and slope (b) of least-squares regression line fit = lm(SBP~age) fit plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16) abline(fit,col="red",lwd=2) # Figure 15.3 # Page 157 # Make scatterplot with vertical distances plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16,xlim=c(20,35),ylim=c(110,135)) abline(fit,col="red",lwd=2) lines(c(20,20),c(116,fit$coefficients[1] + fit$coefficients[2]*20),col="blue",lwd=2) lines(c(21,21),c(120,fit$coefficients[1] + fit$coefficients[2]*21),col="blue",lwd=2) lines(c(25,25),c(125,fit$coefficients[1] + fit$coefficients[2]*25),col="blue",lwd=2) lines(c(29,29),c(130,fit$coefficients[1] + fit$coefficients[2]*29),col="blue",lwd=2) lines(c(34,34),c(110,fit$coefficients[1] + fit$coefficients[2]*34),col="blue",lwd=2) # Figure 15.4 # Page 158 # Make scatterplot which includes age = 0 plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16,xlim=c(0,max(age)),ylim=c(95,max(SBP))) abline(fit,col="red",lwd=2) abline(v=0,lty=4) # Figure 15.5 # Page 159 # Make scatterplot with least squares line superimposed # Prediction highlighted plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16) abline(fit,col="red",lwd=2) lines(c(50,50),c(95,fit$coefficients[1] + fit$coefficients[2]*50),col="blue",lwd=2,lty=2) lines(c(50,15),c(fit$coefficients[1] + fit$coefficients[2]*50,fit$coefficients[1] + fit$coefficients[2]*50),col="blue",lwd=2,lty=2) # Figure 15.6 # Page 161 # Hypertension data without outlier age.no.outlier = c(39,45,47,65,46,67,42,67,56,64,56,59,34,42,48,45,17,20,19,36,50,39,21,44,53,63,29,25,69) SBP.no.outlier = c(144,138,145,162,142,170,124,158,154,162,150,140,110,128,130,135,114,116,124,136,142,120,120,160,158,144,130,125,175) fit.no.outlier = lm(SBP.no.outlier~age.no.outlier) fit.no.outlier plot(age,SBP,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16) abline(fit,lwd=2,col="red") abline(fit.no.outlier,lty=2,lwd=2,col="blue") # Figure 15.7 # Page 162 # Hypertension data with outlier changed age.changed.outlier = c(39,17,45,47,65,46,67,42,67,56,64,56,59,34,42,48,45,17,20,19,36,50,39,21,44,53,63,29,25,69) SBP.changed.outlier = c(144,220,138,145,162,142,170,124,158,154,162,150,140,110,128,130,135,114,116,124,136,142,120,120,160,158,144,130,125,175) fit.changed.outlier = lm(SBP.changed.outlier~age.changed.outlier) fit.changed.outlier plot(age.changed.outlier,SBP.changed.outlier,xlab="Age (in years)",ylab="Systolic blood pressure (mm Hg)",pch=16) abline(fit,lwd=2,col="red") abline(fit.changed.outlier,lty=2,lwd=2,col="blue") # Figure 15.8 # Page 163 # This is the same as Figure 15.2. # Figure 15.9 # Page 164 # FCAT data # Enter the data FCAT.reading = c(165,157,164,162,162,164,162,165,163,161,169,172,172,174,174,170,181,180,178,175,181,183) poverty = c(91.7,90.2,86.0,83.9,80.4,76.5,66.0,65.8,75.6,75.0,74.7,63.2,52.9,48.5,39.1,38.4,34.3,30.3,30.3,29.6,26.5,13.8) # Calculate the regression values a (intercept) and b (slope) fit = lm(FCAT.reading~poverty) fit plot(poverty,FCAT.reading,xlab="Students below poverty level (%)",ylab="Average FCAT reading score",pch=16) abline(fit,lwd=2,col="red") # Figure 15.10 # Page 151 # Satisfaction and happiness data hours = c(6,9,12,14,20,30,35,40,47,51,55,60,65) satisfaction = c(14,28,50,70,80,89,94,90,75,59,44,27,18) # Straight-line regression fit = lm(satisfaction~hours) # Quadratic regression hours.sq = hours^2 fit.2 = lm(satisfaction~hours+hours.sq) # Scatterplot plot(hours,satisfaction,xlab="Number of hours worked per week",ylab="Satisfaction score",pch=16) # Add straight line curve(expr = fit$coefficients[1] + fit$coefficients[2]*x,col="blue",lty=2,lwd=2,add=TRUE) # Add quadratic curve curve(expr = fit.2$coefficients[1] + fit.2$coefficients[2]*x + fit.2$coefficients[3]*x^2,col="red",lty="solid",lwd=2,add=TRUE) cor(hours,satisfaction)