############################################### ## Author: Joshua M. Tebbs ## Date: 27 July 2016 ## Update: 21 March 2024 ## STAT 110 course notes: R Code Chapter 14 ############################################### # Example 14.1 # Page 138 # Manatee data manatee = read.table("C:\\Users\\tebbs\\OneDrive - University of South Carolina\\Documents\\texfiles\\Classes\\USC\\stat110\\s24\\data\\manatees.txt",header=TRUE) # Create variables boats = manatee[,2] manatees = manatee[,3] # Histograms (Figure 14.1) hist(boats,xlab="Number of boat registrations (in thousands)",ylab="Count",main="",col="lightblue") hist(manatees,xlab="Number of manatees killed",ylab="Count",main="",col="lightblue") # Line graphs (Figure 14.1) boats.ts = ts(data=boats,start=1977,end=2018,frequency=1) plot(boats.ts,ylab="Number of boat registrations (in thousands)",xlab="Year",type="o",pch=19,cex=0.75) manatees.ts = ts(data=manatees,start=1977,end=2018,frequency=1) plot(manatees.ts,ylab="Number of manatees killed",xlab="Year",type="o",pch=19,cex=0.75) # Scatterplot (Figure 14.2) plot(boats,manatees,xlab="Number of boat registrations (in thousands)",ylab="Number of manatees killed",pch=16) # Example 14.2 # Page 141 # Florida FCAT data FCAT.reading = c(165,157,164,162,162,164,162,165,163,161,169,172,172,174,174,170,181,180,178,175,181,183) poverty = c(91.7,90.2,86.0,83.9,80.4,76.5,66.0,65.8,75.6,75.0,74.7,63.2,52.9,48.5,39.1,38.4,34.3,30.3,30.3,29.6,26.5,13.8) # Scatterplot (Figure 14.3) plot(poverty,FCAT.reading,xlab="Students below poverty level (%)",ylab="Average FCAT reading score",pch=16) # Figure 14.4 # Page 144 # Each subfigure is created separately # Upper left x = rnorm(100,50,10) y.strong = x + rnorm(100,0,1) plot(x,y.strong,xlab="x",ylab="y",pch=16,cex.lab=1.5) # Upper right y.moderate = x + rnorm(100,0,10) plot(x,y.moderate,xlab="x",ylab="y",pch=16,cex.lab=1.5) # Lower left y.weak = x + rnorm(100,0,25) plot(x,y.weak,xlab="x",ylab="y",pch=16,cex.lab=1.5) # Lower right y.none = rnorm(100,0,1) plot(x,y.none,xlab="x",ylab="y",pch=16,cex.lab=1.5) # Figure 14.5 # Page 145 # STAT 110 exam data scores = read.table("C:\\Users\\tebbs\\OneDrive - University of South Carolina\\Documents\\texfiles\\Classes\\USC\\stat110\\s24\\data\\examscores.txt",header=TRUE) # Create variables exam.1 = scores[,1] exam.2 = scores[,2] exam.3 = scores[,3] final = scores[,4] # Create average midterm score midterm = (exam.1+exam.2+exam.3)/3 # Scatterplot plot(midterm,final,xlab="Average of three midterm scores",ylab="Final exam score",pch=16) # Scatterplots with different correlations # Page 147 # Each subfigure is created separately using the code below rho = -0.5 # correlation library(MASS) # must type this first Sigma = matrix(c(1,rho,rho,1),2,2) Sigma data = mvrnorm(n = 200, rep(0,2), Sigma) cor(data) plot(data[,1],data[,2],xlab="x",ylab="y",pch=16,cex.lab=1.5,title(main="Correlation = -0.5",cex.main=1.5)) # Figure 14.6 # Page 149 # These are the same scatterplots in Figures 14.2 and 14.3. # Figure 14.7 # Page 150 # Manatee data in Example 14.1 plot(boats,manatees,xlab="Number of boat registrations (in thousands)",ylab="Number of manatees killed",pch=16) # Switch the roles of x and y plot(manatees,boats,ylab="Number of boat registrations (in thousands)",xlab="Number of manatees killed",pch=16) # Calculate correlations cor(boats,manatees) cor(manatees,boats) # Figure 14.8 # Page 151 # Satisfaction and happiness data hours = c(6,9,12,14,20,30,35,40,47,51,55,60,65) satisfaction = c(14,28,50,70,80,89,94,90,75,59,44,27,18) plot(hours,satisfaction,xlab="Number of hours worked per week",ylab="Satisfaction score",pch=16) # Superimpose quadratic regression hours.sq = hours^2 fit = lm(satisfaction~hours+hours.sq) plot(hours,satisfaction,xlab="Number of hours worked per week",ylab="Satisfaction score",pch=16) # Add red curve curve(expr = fit$coefficients[1] + fit$coefficients[2]*x + fit$coefficients[3]*x^2,col="red",lty="solid",lwd=1.5,add=TRUE) # Calculate correlation cor(hours,satisfaction) # Figure 14.9 # Page 152 # Effect of outliers FCAT.reading = c(165,157,164,162,162,164,162,165,163,161,169,172,172,174,174,170,181,180,178,175,181,183) # The "9.2" (second observation) was changed from "90.2" in the original data set (poverty) poverty = c(91.7,9.2,86.0,83.9,80.4,76.5,66.0,65.8,75.6,75.0,74.7,63.2,52.9,48.5,39.1,38.4,34.3,30.3,30.3,29.6,26.5,13.8) # Construct scatterplot (with and without outlier) # Left; same as Figure 14.3 # Right plot(poverty,FCAT.reading,xlab="Students below poverty level (%)",ylab="Average FCAT reading score",pch=16)