R is one of the most popular scripting langurages in both academic and industry, which is widely employed in ITechology, engineering, Biomedical, econometrics and finance etc. The Comprehensive R Archieve Network(CRAN) has a considerable and consistenly increasing number of packages that contain lots of high-quality functions that make R more popular in many disciplines.
This notebook includes the R code that produces related outputs in the lecture notes. You can copy-and-paste the code to your RStudio console for realizing the results and plots. In order to use some certain packages, you need first install them from the CRAN by using install.packages(“package_name”). Then, you can source the according package by doing library(packagename) in order to use the functions inside it.
x1=rnorm(30,630,50) ##SAT MATH scores
x2=rnorm(30,3,.25) ##the MATH 141 grades
y=0.5*x1+2*x2+rnorm(30) ##linear relationship with random noise
y=(max(y)-y)/(max(y)-min(y))*100 ##scaled to range from 0 to 100
library(scatterplot3d)
with(mtcars, {
s3d<-scatterplot3d(x1, x2, y, #x y and z axis
color="blue",pch=19, #filled blue circles
type="h", #vertical lines to the x-y plane
main=" ",
xlab="SAT MATH",
ylab="MATH 141",
zlab="STAT 509")
})
n=480
xx <- vector("numeric",n)
ww <- rnorm(n) # innovations (process errors)
xx[1:2] <- c(0,1) # set first 2 times to innovations
# simulate AR(2)
for(t in 3:n) {
xx[t] <- 0.571+0.7*xx[t-1] + 0.25*xx[t-2] + ww[t]
}
x = c(0,1,2,3,4,5,6)
prob = c(0.10,0.15,0.20,0.25,0.20,0.06,0.04)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(x,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(x,prob,type="h",xlab="x",ylab="PMF",ylim=c(0,0.26))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="x",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
y = 0:10; prob = dbinom(y,10,0.4)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(y,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.275))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
y = 1:30; prob = dgeom(y-1,0.25)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(y,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.25))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
y = 3:70; prob = dnbinom(y-3,3,0.15)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(y,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.05))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
# Hypergeometric PMF and CDF
y = 0:5; prob = dhyper(y,10,100-10,5)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(y,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.6))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
y = 0:10; prob = dpois(y,2.5)
cdf = c(0,cumsum(prob)); cdf.plot = stepfun(y,cdf,f=0)
op<-par(mfrow=c(1,2))
# Plot PMF
plot(y,prob,type="h",xlab="y",ylab="PMF",ylim=c(0,0.275))
abline(h=0)
# Plot CDF
plot.stepfun(cdf.plot,xlab="y",ylab="CDF",verticals=FALSE,do.points=TRUE,main="",pch=16)
mypdf<- function(y) ifelse(y>12.5,20*exp(-20*(y-12.5)),0)
mycdf<- function(y) ifelse(y>12.5,1-exp(-20*(y-12.5)),0)
par(mfrow=c(1,2))
curve(mypdf,12.4,13,xlab="y",ylab = "pdf")
curve(mycdf,12.4,13,xlab="y",ylab = "cdf")
##calculate E(Y)
integrand1<-function(y) y*20*exp(-20*(y-12.5))
integrate(integrand1,lower=12.5,upper = Inf)
## 12.55 with absolute error < 1.3e-07
##calculate Var(Y)
integrand2<-function(y) (y-12.55)^2*20*exp(-20*(y-12.5))
integrate(integrand2,lower=12.5,upper = Inf)
## 0.0025 with absolute error < 4.1e-08
##calculate the median
integrand3<-function(y) 1-exp(-20*(y-12.5))-0.5
uniroot(integrand3,interval = c(12.5,13))$root
## [1] 12.53463
par(mfrow=c(1,2))
curve(dexp(x,0.5),0,8,lty=1,col=1,ylab="pdf")
curve(dexp(x,1),0,8,lty=2,col=2,add=TRUE)
curve(dexp(x,2),0,8,lty=3,col=3,add=TRUE)
legend("topright",legend=c(expression(lambda==1/2),
expression(lambda==1),
expression(lambda==2)),
col=1:3,lty=1:3,seg.len=1,bty="n")
curve(pexp(x,0.5),0,8,lty=1,col=1,ylab="cdf")
curve(pexp(x,1),0,8,lty=2,col=2,add=TRUE)
curve(pexp(x,2),0,8,lty=3,col=3,add=TRUE)
par(mfrow=c(1,2))
curve(dgamma(x,1,.5),0,15,ylim=c(0,0.5),col=1,ylab="pdf")
curve(dgamma(x,2,1),0,15,col=2,add=TRUE)
curve(dgamma(x,5,2),0,15,col=3,add=TRUE)
curve(dgamma(x,9,2),0,15,col=4,add=TRUE)
legend("topright",
legend=c(expression(paste(alpha==1,", ",lambda==0.5)),
expression(paste(alpha==2,", ",lambda==1)),
expression(paste(alpha==5,", ",lambda==2)),
expression(paste(alpha==9,", ",lambda==2))),
col=1:4,lty=rep(1,4),seg.len=1,bty="n")
curve(pgamma(x,1,.5),0,15,col=1,ylab="cdf")
curve(pgamma(x,2,1),0,15,col=2,add=TRUE)
curve(pgamma(x,5,2),0,15,col=3,add=TRUE)
curve(pgamma(x,9,2),0,15,col=4,add=TRUE)
par(mfrow=c(1,2))
curve(dnorm(x,-2,2),-10,10,ylim=c(0,0.5),col=1,ylab="pdf")
curve(dnorm(x,0,1),-10,10,col=2,add=TRUE)
curve(dnorm(x,1,3),-10,10,col=3,add=TRUE)
legend("topright",
legend=c(expression(paste(mu==-2,", ",sigma==2)),
expression(paste(mu==0,", ",sigma==1)),
expression(paste(mu==1,", ",sigma==3))),
col=1:3,lty=rep(1,3),seg.len=1,bty="n")
curve(pnorm(x,-2,2),-10,10,col=1,ylab="cdf")
curve(pnorm(x,0,1),-10,10,col=2,add=TRUE)
curve(pnorm(x,1,3),-10,10,col=3,add=TRUE)
par(mfrow=c(1,2))
curve(dweibull(x,2,5),0,25,col=1,lty=1,xlab="t",ylab="pdf")
curve(dweibull(x,2,10),0,25,col=2,lty=2,add=T)
curve(dweibull(x,3,10),0,25,col=3,lty=3,add=T)
legend("topright",
legend=c(expression(paste(beta==2,", ",eta==5)),
expression(paste(beta==2,", ",eta==10)),
expression(paste(beta==3,", ",eta==10))),
col=1:3,lty=1:3,seg.len=1,bty="n")
curve(pweibull(x,2,5),0,25,col=1,lty=1,xlab="t",ylab="cdf")
curve(pweibull(x,2,10),0,25,col=2,lty=2,add=T)
curve(pweibull(x,3,10),0,25,col=3,lty=3,add=T)
Given \(T \sim Weibull(\beta,\eta)\), the hazard function of \(T\) is \[h_T(t)=\frac{f_T(t)}{S_T(t)}=\frac{\frac{\beta}{\eta}(\frac{t}{\eta})^{\beta-1} e^{-(t/\eta)^{\beta}}}{e^{-(t/\eta)^{\beta}}}=\frac{\beta}{\eta}(\frac{t}{\eta})^{\beta-1} \]
par(mfrow=c(2,2))
curve(dweibull(x,3)/pweibull(x,3,lower.tail=F),0,4,col=1,
lty=1,xlab="t",ylab="hazard")
legend("bottomright",legend=expression(paste(beta==3,", ",eta==1)),
col=1,lty=1,seg.len=1,bty="n")
curve(dweibull(x,1.5)/pweibull(x,1.5,lower.tail = F),0,4,
col=2,lty=2,xlab="t",ylab="")
legend("bottomright",legend=expression(paste(beta==1.5,", ",eta==1)),
col=2,lty=2,seg.len=1,bty="n")
curve(dweibull(x,1)/pweibull(x,1,lower.tail = F),0,4,
col=3,lty=3,xlab="t",ylab="hazard")
legend("topright",legend=expression(paste(beta==1,", ",eta==1)),
col=3,lty=3,seg.len=1,bty="n")
curve(dweibull(x,0.5)/pweibull(x,0.5,lower.tail = F),0,4,
col=4,lty=4,xlab = "t",ylab = " ")
legend("topright",legend=expression(paste(beta==0.5,", ",eta==1)),
col=4,lty=4,seg.len=1,bty="n")
library(MASS)
##input the cart data
cart.data = c(3.9,4.2,5.4,6.5,7.0,8.8,9.2,11.4,14.3,15.1,
15.3,15.5,17.9,18.0,19.0,19.0,23.9,24.8,26.0,34.2)
# Fit Weibull model
fitdistr(cart.data,densfun="weibull")
## shape scale
## 1.9889187 16.9359807
## ( 0.3504024) ( 2.0081518)
beta.hat=1.99; eta.hat=16.94
##draw the QQ plot
library(car)
par(mfrow=c(1,1))
qqPlot(cart.data,distribution="weibull",shape=beta.hat,scale=eta.hat,
xlab="Weibull quantiles",ylab="Cart data",pch=16)
battery = c(4285,2066,2584,1009,318,1429,981,1402,1137,414,
564,604,14,4152,737,852,1560,1786,520,396,
1278,209,349,478,3032,1461,701,1406,261,83,
205,602,3770,726,3894,2662,497,35,2778,1379,
3920,1379,99,510,582,308,3367,99,373,454)
# Create histogram and boxplot
op<-par(mfrow=c(1,2))
hist(battery,xlab="Lifetime (in hours)",ylab="Count",main="")
boxplot(battery,xlab="",ylab="Lifetime (in hours)",col="grey")
par(op)
# Calculate summary statistics
mean(battery) ## sample mean
## [1] 1274.14
var(battery) ## sample variance
## [1] 1505156
sd(battery) ## sample standard deviation
## [1] 1226.848
y = seq(0,3,0.01)
# Plot population distribution
curve(dnorm(x,1.5,sqrt(0.16)),0,3,type="l",lty=1,col=1,lwd=2,
xlab="Brake times (sec)",ylab="PDF",ylim=c(0,5))
# Add sampling distribution when n=5
curve(dnorm(x,1.5,sqrt(0.16/5)),0,3,lty=4,col=2,lwd=2,add=TRUE)
# Add sampling distribution when n=25
curve(dnorm(x,1.5,sqrt(0.16/25)),0,3,lty=8,col=3,lwd=2,add=TRUE)
abline(h=0)
# Add legend
legend("topleft",lty = c(1,4,8),bty = "n",cex = 0.8,col=1:3,lwd=rep(2,3),
c(
expression(paste("Population distribution")),
expression(paste("Sample mean, n=5")),
expression(paste("Sample mean, n=25"))
))
y = seq(0,20,0.01)
# Plot population distribution
curve(dexp(x,1/5),0,20,type="l",lty=1,col=1,lwd=2,
xlab="Time to death (in days)",ylab="PDF",ylim=c(0,0.4))
# Add the exact distribution for sample mean when n=5
curve(dgamma(x,5,1),0,20,col=2,lwd=2,lty=4,add=TRUE)
# Add the exact distribution for sample mean when n=25
curve(dgamma(x,25,5),0,20,col=3,lwd=2,lty=8,add=TRUE)
abline(h=0)
abline(v=0,lty=2)
# Add legend
legend("topright",lty = c(1,4,8),bty="n",cex=0.8,col=1:3,lwd=rep(2,3),
c(
expression(paste("Population distribution")),
expression(paste("Sample mean, n=5")),
expression(paste("Sample mean, n=25"))
))
# Plot N(0,1) pdf
curve(dnorm(x,0,1),-5,5,type="l",lty=1,col=1,lwd=2,
xlab="",ylab="PDF",ylim=c(0,0.4))
# Add other pdfs
curve(dt(x,2),-5,5,col=2,lwd=2,lty=4,add=TRUE)
curve(dt(x,10),-5,5,col=3,lwd=2,lty=8,add=TRUE)
abline(h=0)
# Add legend
legend("topleft",lty = c(1,4,8),col=1:3,lwd=rep(2,3),cex=.8,
c(
expression(paste("N(0,1)")),
expression(paste("t(2)")),
expression(paste("t(10)"))
))
pipes = c(1.296,1.320,1.311,1.298,1.315,1.305,1.278,1.294,1.311,1.290,1.284,1.287,1.289,1.292,1.301,
1.298,1.287,1.302,1.304,1.301,1.313,1.315,1.306,1.289,1.291)
# Calculate summary statistics
mean(pipes) ## sample mean
## [1] 1.29908
sd(pipes) ## sample standard deviation
## [1] 0.01108272
# Plot t(24) pdf
curve(dt(x,24),-5,5,type="l",lty=1,lwd=2,
xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
# Add points
points(x=4.096,y=0,pch=4,cex=1.5)
# Create normal qqplot
# Need to install (then load) car package in R
qqPlot(pipes,distribution="norm",xlab="N(0,1) quantiles",ylab="Pipe data",pch=16)
x = seq(-4,4,0.001)
pdf = dt(x,10)
plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qt(0.025,10),0.001)
y = dt(x,10)
polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qt(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qt(0.975,10),4,0.001)
y = dt(x,10)
polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey")
# Add points
points(x=qt(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-3.5,0.05,expression(alpha/2))
text(3.5,0.05,expression(alpha/2))
cadmium = c(0.044,0.030,0.052,0.044,0.046,0.020,0.066,
0.052,0.049,0.030,0.040,0.045,0.039,0.039,
0.039,0.057,0.050,0.056,0.061,0.042,0.055,
0.037,0.062,0.062,0.070,0.061,0.061,0.058,
0.053,0.060,0.047,0.051,0.054,0.042,0.051)
# Calculate summary statistics
mean(cadmium) ## sample mean
## [1] 0.04928571
sd(cadmium) ## sample standard deviation
## [1] 0.0110894
# Calculate confidence interval directly
t.test(cadmium,conf.level=0.99)$conf.int
## [1] 0.04417147 0.05439996
## attr(,"conf.level")
## [1] 0.99
# Create normal qqplot
# Need to install (then load) car package in R
qqPlot(cadmium,distribution="norm",xlab="N(0,1) quantiles",ylab="Cadmium data",pch=16)
y = seq(0,45,0.01)
# Plot chi-square pdf with 5 df
plot(y,dchisq(y,5),type="l",lty=1,xlab=expression(chi^2),ylab="PDF",ylim=c(0,0.16),col=1,lwd=2)
# Add other pdfs
lines(y,dchisq(y,10),lty=4,col=2,lwd=2)
lines(y,dchisq(y,20),lty=8,col=3,lwd=2)
abline(h=0)
# Add legend
legend(25,0.125,lty = c(1,4,8),col=1:3,lwd=rep(2,3),
c(
expression(paste(nu, "=5")),
expression(paste(nu, "=10")),
expression(paste(nu, "=20"))
))
x = seq(0,30,0.001)
pdf = dchisq(x,10)
plot(x,pdf,type="l",lty=1,xlab=expression(chi^2),ylab="PDF",ylim=c(0,0.1))
abline(h=0)
x = seq(0,qchisq(0.025,10),0.001)
y = dchisq(x,10)
polygon(c(0,x,qchisq(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qchisq(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qchisq(0.975,10),30,0.001)
y = dchisq(x,10)
polygon(c(qchisq(0.975,10),x,30),c(0,y,0),col="grey")
# Add points
points(x=qchisq(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(9.8,0.02,expression(1-alpha))
text(0.5,0.01,expression(alpha/2))
text(25,0.0075,expression(alpha/2))
diameters = c(1.206,1.190,1.200,1.195,1.201,1.200,1.198,1.196,1.195,
1.202,1.203,1.210,1.206,1.193,1.207,1.201,1.199,1.200,1.199,
1.204,1.194,1.203,1.194,1.199,1.203,1.200,1.197,1.208,1.199,
1.205,1.199,1.204,1.202,1.196,1.211,1.204)
# R does not have an internal function to calculate CI for population variance
# I wrote this function to do it
var.interval = function(data,conf.level=0.95){
df = length(data)-1
chi.lower = qchisq((1-conf.level)/2,df)
chi.upper = qchisq((1+conf.level)/2,df)
s2 = var(data)
c(df*s2/chi.upper,df*s2/chi.lower)
}
# CI for population variance
var.interval(diameters)
## [1] 1.545590e-05 3.997717e-05
# CI for population standard deviation
sd.interval = sqrt(var.interval(diameters))
sd.interval
## [1] 0.003931399 0.006322751
# Create normal qqplot
# Need to install (then load) car package in R
qqPlot(diameters,distribution="norm",xlab="N(0,1) quantiles",ylab="Diameter data",pch=16)
x = seq(-4,4,0.001)
pdf = dnorm(x,0,1)
plot(x,pdf,type="l",lty=1,xlab="z",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qnorm(0.025,0,1),0.001)
y = dnorm(x,0,1)
polygon(c(-4,x,qnorm(0.025,0,1)),c(0,y,0),col="grey")
# Add points
points(x=qnorm(0.025,0,1),y=0,pch=19,cex=1.5)
x = seq(qnorm(0.975,0,1),4,0.001)
y = dnorm(x,0,1)
polygon(c(qnorm(0.975,0,1),x,4),c(0,y,0),col="grey")
# Add points
points(x=qnorm(0.975,0,1),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-2.8,0.04,expression(alpha/2))
text(2.8,0.04,expression(alpha/2))
y = seq(35,75,0.01)
curve(dnorm(x,50,3),35,75,type="l",lty=1,xlab="y",ylab="PDF",
ylim=c(0,0.15),col="red",lwd=2)
curve(dnorm(x,60,3),35,75,lty=4,col="blue",lwd=2,add=TRUE)
abline(h=0)
# Add legend
legend("topright",lty = c(1,4),col=c("red","blue"),lwd=rep(2,2),
c(
expression(paste("Population 1")),
expression(paste("Population 2"))
))
x = seq(-4,4,0.001)
pdf = dt(x,10)
plot(x,pdf,type="l",lty=1,xlab="t",ylab="PDF",ylim=c(0,0.4))
abline(h=0)
x = seq(-4,qt(0.025,10),0.001)
y = dt(x,10)
polygon(c(-4,x,qt(0.025,10)),c(0,y,0),col="grey")
# Add points
points(x=qt(0.025,10),y=0,pch=19,cex=1.5)
x = seq(qt(0.975,10),4,0.001)
y = dt(x,10)
polygon(c(qt(0.975,10),x,4),c(0,y,0),col="grey")
# Add points
points(x=qt(0.975,10),y=0,pch=19,cex=1.5)
# Add text
text(-0.025,0.1,expression(1-alpha))
text(-3.5,0.05,expression(alpha/2))
text(3.5,0.05,expression(alpha/2))
loc.1 = c(21.9,18.5,12.3,16.7,21.0,15.1,18.2,23.0,36.8,26.6)
loc.2 = c(21.0,19.6,14.4,16.9,23.4,14.6,10.4,16.5)
# Create side by side boxplots
boxplot(loc.1,loc.2,xlab="",names=c("Location 1","Location 2"),ylab="Weight (in ounces)",ylim=c(0,40),col="grey")
# Calculate confidence interval
t.test(loc.1,loc.2,conf.level=0.90,var.equal=TRUE)$conf.int
## [1] -0.9404376 8.7604376
## attr(,"conf.level")
## [1] 0.9
# Page 116
# Create normal qqplots for each sample
# Need to install (then load) car package in R
op<-par(mfrow=c(1,2))
qqPlot(loc.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 1",pch=16)
qqPlot(loc.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Location 2",pch=16)
par(op)
plant.1 = c(3.01,2.58,3.04,1.75,2.87,2.57,2.51,2.93,2.85,3.09,
1.43,3.36,3.18,2.74,2.25,1.95,3.68,2.29,1.86,2.63,
2.83,2.04,2.23,1.92,3.02)
plant.2 = c(3.99,2.08,3.66,1.53,4.27,4.31,2.62,4.52,3.80,5.30,
3.41,0.82,3.03,1.95,6.45,1.86,1.87,3.98,2.74,4.81)
# Create side by side boxplots
par(mfrow=c(1,1 ))
boxplot(plant.1,plant.2,xlab="",names=c("Plant 1","Plant 2"),ylab="Weight (in 100s lb)",ylim=c(0,8),col="grey")
# Calculate confidence interval
t.test(plant.1,plant.2,conf.level=0.95,var.equal=FALSE)$conf.int
## [1] -1.46179176 -0.06940824
## attr(,"conf.level")
## [1] 0.95
# Create normal qqplots for each sample
# Need to install (then load) car package in R
op<-par(mfrow=c(1,2))
qqPlot(plant.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 1",pch=16)
qqPlot(plant.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Plant 2",pch=16)
par(op)
before = c(81.3,87.2,86.1,82.2,90.8,86.9,96.5,73.0,84.2,74.5,72.0,73.8,74.2,
74.9,75.8,72.6,80.8,66.5,72.2,56.5,82.4,88.8,80.0,91.1,97.5,70.0)
after = c(78.9,91.4,78.3,78.3,84.4,67.4,92.8,69.9,63.8,69.7,68.4,71.8,58.3,
58.3,62.5,70.2,58.7,66.6,60.7,65.0,73.7,80.4,78.8,81.8,91.6,74.2)
# Create data differences
diff = before-after
# Calculate confidence interval
t.test(diff,conf.level=0.95)$conf.int
## [1] 3.636034 9.894735
## attr(,"conf.level")
## [1] 0.95
# Create normal qqplots for each sample
# Need to install (then load) car package in R
par(mfrow=c(1,1))
qqPlot(diff,distribution="norm",xlab="N(0,1) quantiles",ylab="Data differences",pch=16)
curve(df(x,5,10),0,8,type="l",lty=1,xlab="F",ylab="PDF",
ylim=c(0,0.85),col=1,lwd=2)
# Add other F pdfs
curve(df(x,5,20),0,8,lty=4,col=2,lwd=2,add=TRUE)
curve(df(x,10,20),0,8,lty=8,col=3,lwd=3,add=TRUE)
abline(h=0)
# Add legend
legend(4,0.5,lty = c(1,4,8),col=1:3,lwd=rep(2,3),
c(
expression(paste(nu[1], "=5, ", nu[2], "=10")),
expression(paste(nu[1], "=5, ", nu[2], "=20")),
expression(paste(nu[1], "=10, ", nu[2], "=20"))
))
x = seq(0,6,0.001)
pdf = df(x,10,10)
plot(x,pdf,type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.8))
abline(h=0)
x = seq(0,qf(0.025,10,10),0.001)
y = df(x,10,10)
polygon(c(0,x,qf(0.025,10,10)),c(0,y,0),col="grey")
# Add points
points(x=qf(0.025,10,10),y=0,pch=19,cex=1.5)
x = seq(qf(0.975,10,10),6,0.001)
y = df(x,10,10)
polygon(c(qf(0.975,10,10),x,6),c(0,y,0),col="grey")
# Add points
points(x=qf(0.975,10,10),y=0,pch=19,cex=1.5)
# Add text
text(1.2,0.15,expression(1-alpha))
text(0.0,0.2,expression(alpha/2))
text(4.5,0.05,expression(alpha/2))
process.1 = c(127.75,127.87,127.86,127.92,128.03,127.94,127.91,128.10,128.01,128.11,127.79,127.93,
127.89,127.96,127.80,127.94,128.02,127.82,128.11,127.92,127.74,127.78,127.85,127.96)
process.2 = c(127.90,127.90,127.74,127.93,127.62,127.76,127.63,127.93,127.86,127.73,127.82,127.84,
128.06,127.88,127.85,127.60,128.02,128.05,127.95,127.89,127.82,127.92,127.71,127.78)
# Create side by side boxplots
boxplot(process.1,process.2,xlab="",names=c("Process 1","Process 2"),
ylab="Fluid ounces",ylim=c(127.5,128.25),col="grey")
# R does not have an internal function to calculate CI for the ratio of population variances
# I wrote this function to do it
ratio.var.interval = function(data.1,data.2,conf.level=0.95){
df.1 = length(data.1)-1
df.2 = length(data.2)-1
F.lower = qf((1-conf.level)/2,df.1,df.2)
F.upper = qf((1+conf.level)/2,df.1,df.2)
s2.1 = var(data.1)
s2.2 = var(data.2)
c((s2.2/s2.1)*F.lower,(s2.2/s2.1)*F.upper)
}
# CI for ratio of population variances
ratio.var.interval(process.1,process.2)
## [1] 0.5885236 3.1448830
# Page 130
# Create normal qqplots for each sample
# Need to install (then load) car package in R
op<-par(mfrow=c(1,2))
qqPlot(process.1,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 1",pch=16)
qqPlot(process.2,distribution="norm",xlab="N(0,1) quantiles",ylab="Process 2",pch=16)
par(op)
# R does not have an internal function to calculate CI for the difference of two population proportions
# I wrote this function to do it
proportion.diff.interval = function(y.1,n.1,y.2,n.2,conf.level=0.95){
z.upper = qnorm((1+conf.level)/2)
var.1 = (y.1/n.1)*(1-y.1/n.1)/n.1
var.2 = (y.2/n.2)*(1-y.2/n.2)/n.2
se = sqrt(var.1+var.2)
moe = z.upper*se
c((y.1/n.1-y.2/n.2)-moe,(y.1/n.1-y.2/n.2)+moe)
}
# CI for difference of two population proportions
proportion.diff.interval(18,82,28,555)
## [1] 0.07764115 0.26048234
OCM = c(51.45,42.96,41.11,48.06,38.27,38.88,42.74,49.62)
PIM = c(64.97,64.21,57.39,52.79,64.87,53.27,51.24,55.87,61.76,67.15)
RM = c(48.95,62.41,52.11,60.45,58.07,52.16,61.71,61.06,57.63,56.80)
PCM = c(35.28,38.59,48.64,50.99,51.52,52.85,46.75,48.31)
# Create side-by-side boxplots
boxplot(OCM,PIM,RM,PCM,
xlab="",names=c("OCM","PIM","RM","PCM"),ylab="Strength (MPa)",col="grey")
# Create response with all data
strength = c(OCM,PIM,RM,PCM)
# Create a treatment indicator variable
mortar.type = c(
rep("OCM",length(OCM)),
rep("PIM",length(PIM)),
rep("RM",length(RM)),
rep("PCM",length(PCM))
)
# mortar.type = factor(mortar.type)
mortar=data.frame(type=mortar.type,strength=strength)
# Analysis of variance
pander(anova(lm(strength ~ mortar.type)))
Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
---|---|---|---|---|---|
mortar.type | 3 | 1521 | 507 | 16.85 | 9.576e-07 |
Residuals | 32 | 962.9 | 30.09 | NA | NA |
f = seq(0,18,0.01)
plot(f,df(f,3,32),type="l",lty=1,xlab="F",ylab="PDF",ylim=c(0,0.75))
abline(h=0)
# Add points
points(x=16.848,y=0,pch=4,cex=1.5)
# Tukey confidence intervals
TukeyHSD(aov(lm(strength ~ mortar.type)),conf.level=0.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = lm(strength ~ mortar.type))
##
## $mortar.type
## diff lwr upr p adj
## PCM-OCM 2.48000 -4.950955 9.910955 0.8026758
## PIM-OCM 15.21575 8.166127 22.265373 0.0000097
## RM-OCM 12.99875 5.949127 20.048373 0.0001138
## PIM-PCM 12.73575 5.686127 19.785373 0.0001522
## RM-PCM 10.51875 3.469127 17.568373 0.0016850
## RM-PIM -2.21700 -8.863448 4.429448 0.8029266
filtration.rate=c(125.3,98.2,201.4,147.3,145.9,124.7,112.2,120.2,161.2,178.9,
159.5,145.8,75.1,151.4,144.2,125.0,198.8,132.5,159.6,110.7)
moisture=c(77.9,76.8,81.5,79.8,78.2,78.3,77.5,77.0,80.1,80.2,
79.9,79.0,76.7,78.2,79.5,78.1,81.5,77.0,79.0,78.6)
# Fit the model
fit = lm(moisture~filtration.rate)
# Construct scatterplot (with least squares line superimposed)
plot(filtration.rate,moisture,xlab="Filtration rate (kg-DS/m/hr)",
ylab="Moisture (Percentage)",pch=16)
abline(fit)
# Confidence interval for beta0 and beta1
confint(fit,level=0.95)
## 2.5 % 97.5 %
## (Intercept) 71.49309400 74.42399995
## filtration.rate 0.03087207 0.05119547
# Regression output for inference
summary(fit)
##
## Call:
## lm(formula = moisture ~ filtration.rate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39552 -0.27694 0.03548 0.42913 1.09901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.958547 0.697528 104.596 < 2e-16 ***
## filtration.rate 0.041034 0.004837 8.484 1.05e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6653 on 18 degrees of freedom
## Multiple R-squared: 0.7999, Adjusted R-squared: 0.7888
## F-statistic: 71.97 on 1 and 18 DF, p-value: 1.052e-07
# Calculate confidence/prediction intervals
predict(fit,data.frame(filtration.rate=150),level=0.95,interval="confidence")
## fit lwr upr
## 1 79.11361 78.78765 79.43958
predict(fit,data.frame(filtration.rate=150),level=0.95,interval="prediction")
## fit lwr upr
## 1 79.11361 77.6783 80.54893
curve(dt(x,18),-9,9,xlab="t",ylab="PDF",ylim=c(0,0.4),lwd=1.5)
abline(h=0)
points(x=8.484,y=0,pch=4,cex=1.5)
fit = lm(moisture~filtration.rate)
x.not = seq(80,200,1)
conf = predict(fit,data.frame(filtration.rate=x.not),level=0.95,interval="confidence")
pred = predict(fit,data.frame(filtration.rate=x.not),level=0.95,interval="prediction")
plot(filtration.rate,moisture,xlab = "Filtration rate (kg-DS/m/hr)",
ylab = "Moisture (Percentage)",pch=16,
xlim=c(min(filtration.rate),max(filtration.rate)),
ylim=c(min(moisture),max(moisture)))
abline(fit)
par(new=T)
plot(x.not,conf[,2],type="l",lty=2,xlab="",ylab="",col="red",
xlim=c(min(filtration.rate),max(filtration.rate)),
ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,conf[,3],type="l",lty=2,xlab="",ylab="",col="red",
xlim=c(min(filtration.rate),max(filtration.rate)),
ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,pred[,2],type="l",lty=4,xlab="",ylab="",col="blue",
xlim=c(min(filtration.rate),max(filtration.rate)),
ylim=c(min(moisture),max(moisture)))
par(new=T)
plot(x.not,pred[,3],type="l",lty=4,xlab="",ylab="",col="blue",
xlim=c(min(filtration.rate),max(filtration.rate)),
ylim=c(min(moisture),max(moisture)))
# Create legend
# You MUST "click" where you want it to go (on figure)
names = c("95% confidence","95% prediction")
legend("topleft",legend=names,lty=c(2,4),bty="n",col=c("red","blue"))