############################################### ## Author: Joshua M. Tebbs ## Date: 22 October 2014 ## Update: 23 January 2018 ## STAT 713 course notes: R Code ## Chapters 6-10 (CB) ############################################### ############## ## CHAPTER 6 # ############## # Example 6.11 # Pages 14-15 # Figure 6.1 # Sufficient/Ancillary example n = 10 B = 1000 data = matrix(rnorm(n*B,0,10),nrow=B,ncol=n) t = data[,1]*0 s = data[,1]*0 for (i in 1:length(t)){ t[i] = sum((data[i,])^2) } for (j in 1:length(s)){ s[j] = sqrt(n)*mean(data[j,])/sd(data[j,]) } plot(t,s,xlab="t",ylab="s",pch=16,cex.lab=1.25) # Calculate correlation cor(t,s) ############## ## CHAPTER 7 # ############## # Example 7.5 # Pages 31-32 # Figure 7.1 # N(theta,1) MLE n = 5 theta.1 = seq(0,5,0.001) x = rnorm(n,1.5,1) # > round(x,3) # [1] 2.437 0.993 1.123 1.900 3.794 norm.like = function(x,theta){ (1/sqrt(2*pi))^n*exp(-(1/2)*sum((x-theta)^2)) } like = theta.1*0 for (i in 1:length(like)){ like[i] = norm.like(x,theta.1[i]) } plot(theta.1,like,type="l",lty=1,xlab=expression(theta),ylab="Likelihood function") abline(h=0) ############## ## CHAPTER 8 # ############## # Example 8.9 # English Premier League data # Pages 77-78 goals = c(rep(0,27),rep(1,73),rep(2,80),rep(3,72),rep(4,65), rep(5,39),rep(6,17),rep(7,4),rep(8,1),rep(9,2)) sum(goals) # Figure 8.1 # Gamma prior and posterior # Plot Prior PDF y = seq(0,15,0.01) pdf = dgamma(y,1.5,1/2) plot(y,pdf,type="l",xlab=expression(theta),ylab="Prior distribution",cex.lab=1.25) abline(h=0) # Plot Posterior PDF y = seq(2,3.5,0.01) pdf = dgamma(y,1061.5,1/0.002628) plot(y,pdf,type="l",xlab=expression(theta),ylab="Posterior distribution",cex.lab=1.25) abline(h=0) # Example 8.10 # Pages 80-81 # Figure 8.2 # Power function mu=seq(0,4,0.01) mu.0=1.5 n=5 alpha=0.10 sigma.0=1 power <- function(mu){ 1-pnorm((mu.0+qnorm(1-alpha)*(sigma.0/sqrt(n))-mu)/(sigma.0/sqrt(n))) } plot(mu,power(mu),type="l",xlab=expression(mu),ylab="Power function",lty=1) abline(h=0) abline(h=0.10,lty=2) abline(h=0.80,lty=2) # Example 8.11 # Pages 82-83 # Figure 8.3 # Power functions theta = seq(0,5,0.01) plot(theta,exp(-theta),type="l",lty=1,xlab=expression(theta),ylab="Power function") lines(theta,exp(-2*theta)+2*theta*exp(-2*theta),lty=4) abline(h=0) abline(v=0,lty=2) # Add legend legend(2,0.6,lty = c(1,4), c( expression(paste(beta[1](theta))), expression(paste(beta[2](theta))) )) # Example 8.16 # Pages 91-92 # Figure 8.4 # Power function of UMP level 0.0611 test theta = seq(0,1,0.001) plot(theta,1-pbinom(9,30,theta),type="l",lty=1,xlab=expression(theta),ylab="Power function") abline(h=0) abline(h=0.0611,lty=2) # Example 8.17 # Pages 92-94 # Figure 8.5 # UMP power function n=10 alpha=0.10 theta.0=4 theta = seq(0,8,0.01) plot(theta,1-pchisq(theta*qchisq(1-alpha,2*n)/theta.0,2*n),type="l",lty=1,xlab=expression(theta),ylab="Power function") abline(h=0) abline(h=0.10,lty=2) # Example 8.18 # Pages 97-101 # Figure 8.6 # N(0,1) pdf with quantiles and shading x = seq(-4,4,0.001) pdf = dnorm(x,0,1) plot(x,pdf,type="l",lty=1,xlab="z",ylab="PDF",ylim=c(0,0.4)) abline(h=0) x = seq(-4,qnorm(0.025,0,1),0.001) y = dnorm(x,0,1) polygon(c(-4,x,qnorm(0.025,0,1)),c(0,y,0),col="grey") # Add points points(x=qnorm(0.025,0,1),y=0,pch=19,cex=1.5) x = seq(qnorm(0.975,0,1),4,0.001) y = dnorm(x,0,1) polygon(c(qnorm(0.975,0,1),x,4),c(0,y,0),col="grey") # Add points points(x=qnorm(0.975,0,1),y=0,pch=19,cex=1.5) # Add text text(-0.025,0.1,expression(1-alpha)) text(-2.8,0.04,expression(alpha/2)) text(2.8,0.04,expression(alpha/2)) # Figure 8.7 # Plot UMPU power function and two UMP power functions mu = seq(2,10,0.001) mu.0 = 6 n = 10 alpha = 0.05 sigma.0 = 2 # UMP power function for H_1: mu > mu_0 (one sided, greater than) plot(mu,1-pnorm((mu.0+qnorm(1-alpha/2)*(sigma.0/sqrt(n))-mu)/(sigma.0/sqrt(n))) + pnorm((mu.0-qnorm(1-alpha/2)*(sigma.0/sqrt(n))-mu)/(sigma.0/sqrt(n))), type="l",lty=1,xlab=expression(mu),ylab="Power function",xlim=c(2,10),ylim=c(0,1)) # Add UMP power function for H_1: mu > mu_0 (one sided, greater than) lines(mu,1-pnorm((mu.0+qnorm(1-alpha)*(sigma.0/sqrt(n))-mu)/(sigma.0/sqrt(n))),lty=8) # Add UMP power function for H_1: mu < mu_0 (one sided, less than) lines(mu,pnorm((mu.0-qnorm(1-alpha)*(sigma.0/sqrt(n))-mu)/(sigma.0/sqrt(n))),lty=4) abline(h=0.05) # Add legend legend(4.85,1,lty = c(1,8,4), c( expression(paste("UMPU")), expression(paste("UMP ", mu, " > ", mu[0])), expression(paste("UMP ", mu, " < ", mu[0])) )) # Example 8.20 # Page 103 # Figure 8.8 # Uniform qq plot of p-values n = 30 # sample size B = 200 # number of Monte Carlo samples data = matrix(rnorm(n*B,0,1),nrow = B,ncol = n) # Calculate sample mean for each row (sample) sample.mean = apply(data, 1, mean) # Calculate sample sd for each row (sample) sample.sd = apply(data, 1, sd) t = sample.mean*0 for (i in 1:B){ t[i] = sample.mean[i]/(sample.sd[i]/sqrt(n)) } p.values = 1-pt(t,n-1) # Need to install (then load) car package in R qqPlot(p.values,distribution="unif",xlab="U(0,1) quantiles", ylab="p-values",pch=16) ############## ## CHAPTER 9 # ############## # Example 9.2 # Pages 106-107 # Figure 9.1 # Binomial CI coverage plot # Stolen from Chris Bilder! Note: Iowa > Nebraska # Initial settings and calculations alpha<-0.05 n<-40 w<-0:n pi.hat<-w/n pi.seq<-seq(from = 0.0001, to = 0.9999, by = 0.0005) # Wald var.wald<-pi.hat*(1-pi.hat)/n lower.wald<-pi.hat - qnorm(p = 1-alpha/2) * sqrt(var.wald) upper.wald<-pi.hat + qnorm(p = 1-alpha/2) * sqrt(var.wald) # Save true confidence levels in a matrix save.true.conf<-matrix(data = NA, nrow = length(pi.seq), ncol = 2) # Create counter for the loop counter<-1 # Loop over each pi that the true confidence level is calculated on for(pi in pi.seq) { pmf<-dbinom(x = w, size = n, prob = pi) save.wald<-ifelse(test = pi>lower.wald, yes = ifelse(test = pi data # [1] 10.03078 10.09153 10.20267 10.07584 10.18109 # > t # [1] 10.03078 theta = seq(t-1.25,t,0.001) # Make the plot plot(theta,1-exp(-n*(t-theta)),type="l",lty=1,xlab=expression(theta),ylab="CDF",ylim=c(0,1)) abline(h=0.025,lty=2) abline(h=0.975,lty=2) abline(h=1) abline(h=0) # Example 9.9 # Pages 116-118 # Figure 9.3 # Poisson CI coverage plot alpha=0.10 n=10 t = seq(0,100,1) theta.seq = seq(0.001,3,0.001) # Interval lower = qchisq(alpha/2,2*t)/(2*n) upper = qchisq(1-alpha/2,2*(t+1))/(2*n) # Save true confidence levels in a matrix save.true.conf<-matrix(data = NA, nrow = length(theta.seq), ncol = 2) # Create counter for the loop counter<-1 # Loop over each theta that the true confidence level is calculated on for(theta in theta.seq) { pmf<-dpois(t,n*theta) save.ci<-ifelse(test = theta>lower, yes = ifelse(test = thetaqchisq(0.95,1)])/B ## approximate size ## note that I am using the square of the Wald statistic ## in the limit, compare to chi-square(1) # Score T.S<-y*0 for (i in 1:B){ T.S[i]<-n*((y[i]/n-p.0)^2)/(p.0*(1-p.0)) } alpha.S = length(T.S[T.S>qchisq(0.95,1)])/B ## approximate size ## note that I am using the square of the score statistic ## in the limit, compare to chi-square(1) # LR T.LR<-y*0 for (i in 1:B){ T.LR[i]<-(-2)*(y[i]*log(n*p.0/y[i])+(n-y[i])*log((1-p.0)/(1-y[i]/n))) } alpha.LR = length(T.LR[T.LR>qchisq(0.95,1)])/B ## approximate size # Calculate estimated size for each test size = data.frame(alpha.W,alpha.S,alpha.LR) size