###############################################
## Author: Joshua M. Tebbs
## Date: 11 December 2011
## Update: 31 December 2017
## STAT 509 course notes: R Code
## Chapter 8
###############################################

# Figure 8.1
# Page 111
# Two normal distributions
y = seq(35,75,0.01)
plot(y,dnorm(y,50,3),type="l",lty=1,xlab="y",ylab="PDF",ylim=c(0,0.15))
lines(y,dnorm(y,60,3),lty=4)
abline(h=0)
# Add legend
legend(63,0.15,lty = c(1,4), 
	c(
	expression(paste("Population 1")),
	expression(paste("Population 2"))
	))


# Figure 8.2
# Page 112
# t pdf with quantiles and shading
x = seq(-4,4,0.001)
pdf = dt(x,10)
plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qt(0.025,10),0.001)
y = dt(x,10)
polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qt(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qt(0.975,10),4,0.001)
y = dt(x,10)
polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey")
# Add points
points(x=qt(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-3.5,0.05,expression(alpha/2))
text(3.5,0.05,expression(alpha/2))


# Example 8.1
# Fish weight data
# Page 113-116
loc.1 = c(21.9,18.5,12.3,16.7,21.0,15.1,18.2,23.0,36.8,26.6)
loc.2 = c(21.0,19.6,14.4,16.9,23.4,14.6,10.4,16.5)
# Create side by side boxplots
# Use range=1.5 to identify the outlier
boxplot(loc.1,loc.2,range=1.5,xlab="",names=c("Location 1","Location 2"),ylab="Weight (in ounces)",ylim=c(0,40),col="lightblue")
# Calculate confidence interval
t.test(loc.1,loc.2,conf.level=0.90,var.equal=TRUE)$conf.int
# Page 116
# Create normal qqplots for each sample
# Need to install (then load) car package in R 
qqPlot(loc.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 1",pch=16)
qqPlot(loc.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 2",pch=16)


# Example 8.2
# White paper data
# Page 117-119
plant.1 = c(3.01,2.58,3.04,1.75,2.87,2.57,2.51,2.93,2.85,3.09, 
         1.43,3.36,3.18,2.74,2.25,1.95,3.68,2.29,1.86,2.63, 
         2.83,2.04,2.23,1.92,3.02)
plant.2 = c(3.99,2.08,3.66,1.53,4.27,4.31,2.62,4.52,3.80,5.30,
         3.41,0.82,3.03,1.95,6.45,1.86,1.87,3.98,2.74,4.81)
# Create side by side boxplots
boxplot(plant.1,plant.2,range=1.5,xlab="",names=c("Plant 1","Plant 2"),ylab="Weight (in 100s lb)",ylim=c(0,8),col="lightblue")
# Calculate confidence interval
t.test(plant.1,plant.2,conf.level=0.95,var.equal=FALSE)$conf.int
# Page 119
# Create normal qqplots for each sample
# Need to install (then load) car package in R 
qqPlot(plant.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 1",pch=16)
qqPlot(plant.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 2",pch=16)


# Example 8.3
# Ergonomics data: Matched pairs analysis
# Page 119-124
before = c(81.3,87.2,86.1,82.2,90.8,86.9,96.5,73.0,84.2,74.5,72.0,73.8,74.2,
	74.9,75.8,72.6,80.8,66.5,72.2,56.5,82.4,88.8,80.0,91.1,97.5,70.0)
after = c(78.9,91.4,78.3,78.3,84.4,67.4,92.8,69.9,63.8,69.7,68.4,71.8,58.3,
	58.3,62.5,70.2,58.7,66.6,60.7,65.0,73.7,80.4,78.8,81.8,91.6,74.2)
# Create data differences
diff = before-after
# Calculate confidence interval
t.test(diff,conf.level=0.95)$conf.int
# Page 124
# Create normal qqplots for each sample
# Need to install (then load) car package in R 
qqPlot(diff,distribution="norm",xlab="N(0,1) quantiles",ylab="Data differences",pch=16)


# Figure 8.8
# Page 125
# F PDFs
y = seq(0,8,0.01)
plot(y,df(y,5,10),type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.85))
# Add other F pdfs
lines(y,df(y,5,20),lty=4)
lines(y,df(y,10,20),lty=8)
abline(h=0)
# Add legend
legend(4,0.5,lty = c(1,4,8), 
	c(
	expression(paste(nu[1], "=5, ", nu[2], "=10")),
	expression(paste(nu[1], "=5, ", nu[2], "=20")),
	expression(paste(nu[1], "=10, ", nu[2], "=20"))
	))


# Figure 8.9
# Page 127
# F pdf with quantiles and shading
x = seq(0,6,0.001)
pdf = df(x,10,10)
plot(x,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.8))
abline(h=0)
x = seq(0,qf(0.025,10,10),0.001)
y = df(x,10,10)
polygon(c(0,x,qf(0.025,10,10)),c(0,y,0),col="grey")
# Add points
points(x=qf(0.025,10,10),y=0,pch=19,cex=1.5)
x = seq(qf(0.975,10,10),6,0.001)
y = df(x,10,10)
polygon(c(qf(0.975,10,10),x,6),c(0,y,0),col="grey")
# Add points
points(x=qf(0.975,10,10),y=0,pch=19,cex=1.5)
# Add text
text(1.2,0.15,expression(1-alpha))
text(0.0,0.2,expression(alpha/2))
text(4.5,0.05,expression(alpha/2))


# Example 8.4
# Paint fill volume data
# Page 128-130
process.1 = c(127.75,127.87,127.86,127.92,128.03,127.94,127.91,128.10,128.01,128.11,127.79,127.93,
           127.89,127.96,127.80,127.94,128.02,127.82,128.11,127.92,127.74,127.78,127.85,127.96)
process.2 = c(127.90,127.90,127.74,127.93,127.62,127.76,127.63,127.93,127.86,127.73,127.82,127.84,
           128.06,127.88,127.85,127.60,128.02,128.05,127.95,127.89,127.82,127.92,127.71,127.78)
# Create side by side boxplots
boxplot(process.1,process.2,xlab="",names=c("Process 1","Process 2"),ylab="Fluid ounces",ylim=c(127.5,128.25),col="lightblue")

# R does not have an internal function to calculate CI for the ratio of population variances (not that I could find quickly)
# I wrote this function to do it
ratio.var.interval = function(data.1,data.2,conf.level=0.95){
	df.1 = length(data.1)-1
	df.2 = length(data.2)-1
	F.lower = qf((1-conf.level)/2,df.1,df.2)
	F.upper = qf((1+conf.level)/2,df.1,df.2)
	s2.1 = var(data.1)
	s2.2 = var(data.2)
	c((s2.2/s2.1)*F.lower,(s2.2/s2.1)*F.upper)
	}

# CI for ratio of population variances
ratio.var.interval(process.1,process.2)
# Page 130
# Create normal qqplots for each sample
# Need to install (then load) car package in R 
qqPlot(process.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 1",pch=16)
qqPlot(process.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 2",pch=16)


# Example 8.5
# Irish HBV data
# Page 133
# R does not have an internal function to calculate CI for the difference of two population proportions (not that I could find quickly)
# I wrote this function to do it
proportion.diff.interval = function(y.1,n.1,y.2,n.2,conf.level=0.95){
	z.upper = qnorm((1+conf.level)/2)
	var.1 = (y.1/n.1)*(1-y.1/n.1)/n.1
	var.2 = (y.2/n.2)*(1-y.2/n.2)/n.2
	se = sqrt(var.1+var.2)
	moe = z.upper*se
	c((y.1/n.1-y.2/n.2)-moe,(y.1/n.1-y.2/n.2)+moe)
	}
# CI for difference of two population proportions
proportion.diff.interval(18,82,28,555)