############################################### ## Author: Joshua M. Tebbs ## Date: 11 December 2011 ## Update: 31 December 2017 ## STAT 509 course notes: R Code ## Chapter 8 ############################################### # Figure 8.1 # Page 111 # Two normal distributions y = seq(35,75,0.01) plot(y,dnorm(y,50,3),type="l",lty=1,xlab="y",ylab="PDF",ylim=c(0,0.15)) lines(y,dnorm(y,60,3),lty=4) abline(h=0) # Add legend legend(63,0.15,lty = c(1,4), c( expression(paste("Population 1")), expression(paste("Population 2")) )) # Figure 8.2 # Page 112 # t pdf with quantiles and shading x = seq(-4,4,0.001) pdf = dt(x,10) plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4)) abline(h=0) x = seq(-4,qt(0.025,10),0.001) y = dt(x,10) polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey") # Add points points(x=qt(0.025,10),y=0,pch=19,cex=1.5) x = seq(qt(0.975,10),4,0.001) y = dt(x,10) polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey") # Add points points(x=qt(0.975,10),y=0,pch=19,cex=1.5) # Add text text(-0.025,0.1,expression(1-alpha)) text(-3.5,0.05,expression(alpha/2)) text(3.5,0.05,expression(alpha/2)) # Example 8.1 # Fish weight data # Page 113-116 loc.1 = c(21.9,18.5,12.3,16.7,21.0,15.1,18.2,23.0,36.8,26.6) loc.2 = c(21.0,19.6,14.4,16.9,23.4,14.6,10.4,16.5) # Create side by side boxplots # Use range=1.5 to identify the outlier boxplot(loc.1,loc.2,range=1.5,xlab="",names=c("Location 1","Location 2"),ylab="Weight (in ounces)",ylim=c(0,40),col="lightblue") # Calculate confidence interval t.test(loc.1,loc.2,conf.level=0.90,var.equal=TRUE)$conf.int # Page 116 # Create normal qqplots for each sample # Need to install (then load) car package in R qqPlot(loc.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 1",pch=16) qqPlot(loc.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 2",pch=16) # Example 8.2 # White paper data # Page 117-119 plant.1 = c(3.01,2.58,3.04,1.75,2.87,2.57,2.51,2.93,2.85,3.09, 1.43,3.36,3.18,2.74,2.25,1.95,3.68,2.29,1.86,2.63, 2.83,2.04,2.23,1.92,3.02) plant.2 = c(3.99,2.08,3.66,1.53,4.27,4.31,2.62,4.52,3.80,5.30, 3.41,0.82,3.03,1.95,6.45,1.86,1.87,3.98,2.74,4.81) # Create side by side boxplots boxplot(plant.1,plant.2,range=1.5,xlab="",names=c("Plant 1","Plant 2"),ylab="Weight (in 100s lb)",ylim=c(0,8),col="lightblue") # Calculate confidence interval t.test(plant.1,plant.2,conf.level=0.95,var.equal=FALSE)$conf.int # Page 119 # Create normal qqplots for each sample # Need to install (then load) car package in R qqPlot(plant.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 1",pch=16) qqPlot(plant.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 2",pch=16) # Example 8.3 # Ergonomics data: Matched pairs analysis # Page 119-124 before = c(81.3,87.2,86.1,82.2,90.8,86.9,96.5,73.0,84.2,74.5,72.0,73.8,74.2, 74.9,75.8,72.6,80.8,66.5,72.2,56.5,82.4,88.8,80.0,91.1,97.5,70.0) after = c(78.9,91.4,78.3,78.3,84.4,67.4,92.8,69.9,63.8,69.7,68.4,71.8,58.3, 58.3,62.5,70.2,58.7,66.6,60.7,65.0,73.7,80.4,78.8,81.8,91.6,74.2) # Create data differences diff = before-after # Calculate confidence interval t.test(diff,conf.level=0.95)$conf.int # Page 124 # Create normal qqplots for each sample # Need to install (then load) car package in R qqPlot(diff,distribution="norm",xlab="N(0,1) quantiles",ylab="Data differences",pch=16) # Figure 8.8 # Page 125 # F PDFs y = seq(0,8,0.01) plot(y,df(y,5,10),type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.85)) # Add other F pdfs lines(y,df(y,5,20),lty=4) lines(y,df(y,10,20),lty=8) abline(h=0) # Add legend legend(4,0.5,lty = c(1,4,8), c( expression(paste(nu[1], "=5, ", nu[2], "=10")), expression(paste(nu[1], "=5, ", nu[2], "=20")), expression(paste(nu[1], "=10, ", nu[2], "=20")) )) # Figure 8.9 # Page 127 # F pdf with quantiles and shading x = seq(0,6,0.001) pdf = df(x,10,10) plot(x,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.8)) abline(h=0) x = seq(0,qf(0.025,10,10),0.001) y = df(x,10,10) polygon(c(0,x,qf(0.025,10,10)),c(0,y,0),col="grey") # Add points points(x=qf(0.025,10,10),y=0,pch=19,cex=1.5) x = seq(qf(0.975,10,10),6,0.001) y = df(x,10,10) polygon(c(qf(0.975,10,10),x,6),c(0,y,0),col="grey") # Add points points(x=qf(0.975,10,10),y=0,pch=19,cex=1.5) # Add text text(1.2,0.15,expression(1-alpha)) text(0.0,0.2,expression(alpha/2)) text(4.5,0.05,expression(alpha/2)) # Example 8.4 # Paint fill volume data # Page 128-130 process.1 = c(127.75,127.87,127.86,127.92,128.03,127.94,127.91,128.10,128.01,128.11,127.79,127.93, 127.89,127.96,127.80,127.94,128.02,127.82,128.11,127.92,127.74,127.78,127.85,127.96) process.2 = c(127.90,127.90,127.74,127.93,127.62,127.76,127.63,127.93,127.86,127.73,127.82,127.84, 128.06,127.88,127.85,127.60,128.02,128.05,127.95,127.89,127.82,127.92,127.71,127.78) # Create side by side boxplots boxplot(process.1,process.2,xlab="",names=c("Process 1","Process 2"),ylab="Fluid ounces",ylim=c(127.5,128.25),col="lightblue") # R does not have an internal function to calculate CI for the ratio of population variances (not that I could find quickly) # I wrote this function to do it ratio.var.interval = function(data.1,data.2,conf.level=0.95){ df.1 = length(data.1)-1 df.2 = length(data.2)-1 F.lower = qf((1-conf.level)/2,df.1,df.2) F.upper = qf((1+conf.level)/2,df.1,df.2) s2.1 = var(data.1) s2.2 = var(data.2) c((s2.2/s2.1)*F.lower,(s2.2/s2.1)*F.upper) } # CI for ratio of population variances ratio.var.interval(process.1,process.2) # Page 130 # Create normal qqplots for each sample # Need to install (then load) car package in R qqPlot(process.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 1",pch=16) qqPlot(process.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 2",pch=16) # Example 8.5 # Irish HBV data # Page 133 # R does not have an internal function to calculate CI for the difference of two population proportions (not that I could find quickly) # I wrote this function to do it proportion.diff.interval = function(y.1,n.1,y.2,n.2,conf.level=0.95){ z.upper = qnorm((1+conf.level)/2) var.1 = (y.1/n.1)*(1-y.1/n.1)/n.1 var.2 = (y.2/n.2)*(1-y.2/n.2)/n.2 se = sqrt(var.1+var.2) moe = z.upper*se c((y.1/n.1-y.2/n.2)-moe,(y.1/n.1-y.2/n.2)+moe) } # CI for difference of two population proportions proportion.diff.interval(18,82,28,555)