# STAT 720 sp 2019 Lec 11 supplement: bootstrapping ############################################################### ############################################################### # IID bootstrap: Consider the pivot # T_n = \sqrt{n}(\bar X_n - \mu)/S_n # use IID bootstrap to construc a CI for \mu. ############################################################### ############################################################### # generate a random sample from a non-Normal distribution: n <- 15 nu <- 2 x <- rchisq(n,nu) # has expected value equal to nu x.bar <- mean(x) sigma.hat <- sd(x) # run Monte-Carlo simulation to get approximate quantiles of # distribution of T_n^* B <- 1000 T.star <- numeric() for( b in 1:B) { x.star <- sample(x,n,replace=TRUE) sigma.hat.star <- sd(x.star) x.bar.star <- mean(x.star) T.star[b] <- sqrt(n)*(x.bar.star - x.bar)/sigma.hat.star } # get Monte-Carlo-approximated alpha/2 and 1-alpha/2 quantiles alpha <- 0.05 G.alphaby2 <- sort(T.star)[floor(B*(1-alpha/2))] G.1minusalphaby2 <- sort(T.star)[floor(B*(alpha/2))] # construct (1-alpha)*100% CI for the mean using the bootstrap # estimates of the quantiles; record whether the interval # contains the true mean. lo.ci <- x.bar - G.alphaby2 * sigma.hat / sqrt(n) up.ci <- x.bar - G.1minusalphaby2 * sigma.hat / sqrt(n) covered <- (lo.ci < nu) & (up.ci > nu) lo.ci up.ci covered ############################################################### # run simulation to get coverage probability: ############################################################### # specify parameters n <- 15 nu <- 2 alpha <- 0.05 S <- 100 B <- 1000 # use the bootstrap to build a (1-alpha)*100% CI for the mean # on S randomly generated datasets lo.ci <- up.ci <- lo.ci.t <- up.ci.t <- numeric() boot.covered <- t.covered <- logical(S) for( s in 1:S ) { # generate data x <- rchisq(n,nu) x.bar <- mean(x) sigma.hat <- sd(x) # run Monte-Carlo simulation to get bootstrap distribution of # the pivot quantity T.star <- numeric(B) for( b in 1:B ) { x.star <- sample(x,n,replace=TRUE) sigma.hat.star <- sd(x.star) x.bar.star <- mean(x.star) T.star[b] <- sqrt(n)*(x.bar.star - x.bar)/sigma.hat.star } # extract desired quantiles and construct (1-alpha)*100% CI for the mean G.alphaby2 <- sort(T.star)[floor(B*(1-alpha/2))] G.1minusalphaby2 <- sort(T.star)[floor(B*(alpha/2))] lo.ci[s] <- x.bar - G.alphaby2 * sigma.hat / sqrt(n) up.ci[s] <- x.bar - G.1minusalphaby2 * sigma.hat / sqrt(n) # for comparison construct t-based (1-alpha)*100% CI for the mean lo.ci.t[s] <- x.bar - qt(1-alpha/2,n-1) * sigma.hat / sqrt(n) up.ci.t[s] <- x.bar + qt(1-alpha/2,n-1) * sigma.hat / sqrt(n) # record whether the intervals covered the true mean boot.covered[s] <- (lo.ci[s] < nu) & (up.ci[s] > nu) t.covered[s] <- (lo.ci.t[s] < nu) & (up.ci.t[s] > nu) } # plot the confidence intervals and display the coverage proportion over the # simulated data sets par(mfrow=c(1,2)) # the bootstrap intervals: plot(NA,main = paste("IID bootstrap CI coverage (",mean(boot.covered),")",sep=""), ylim=c(0,1), xlim = range(lo.ci, up.ci), xlab = "Confidence intervals", yaxt = "n", ylab= "") abline(v = nu,lty=3) for(s in 1:S) { lines(y = c(s/S,s/S),x = c(lo.ci[s], up.ci[s]),lty = 2 - boot.covered[s]) } # the t-based intervals: plot(NA,main = paste("t-based CI coverage (",mean(t.covered),")",sep=""), ylim=c(0,1), xlim = range(lo.ci.t, up.ci.t), xlab = "Confidence intervals", yaxt = "n", ylab= "") abline(v = nu,lty=3) for(s in 1:S) { lines(y = c(s/S,s/S), x = c(lo.ci.t[s], up.ci.t[s]), lty = 2 - t.covered[s]) } ############################################################### ############################################################### # Moving-block bootstrap for time series: Consider the pivot # T_n = sqrt{n}(\bar X_n - \mu) and use MBB to build confidence # intervals for \mu. In addition, estimate Var(T_n) with MBB. ############################################################### ############################################################### library(tscourse) # generate some data phi <- c(.3,.2) theta <- c(.4) n <- 300 sigma <- 2 X <- get.ARMA.data(phi,theta,sigma,n) # true mean is equal to zero. X.bar <- mean(X) # choose block size l and build matrix with n- l + 1 rows # such that each row is a block; we will resample the rows # of this matrix l <- floor(sqrt(n)) X.blocks <- matrix(NA,n-l+1,l) for( j in 1:(n-l+1)) { X.blocks[j,] <- X[j:(j+l-1)] } # compute the mean of the MBB-induced distribution: X.bar.MBB <- mean(X.blocks) # compute MBB bootstrap estimator of var.T analytically accum <- 0 for( j in 1:(n-l+1) ) for(k in 1:l) for(m in 1:l) { accum <- accum + (X.blocks[j,k] - X.bar.MBB)*(X.blocks[j,m] - X.bar.MBB) } var.T.hat.analytic <- accum / (l*(n-l+1)) # run Monte-Carlo simulation to get approximate quantiles of # distribution of T_n^* B <- 500 T.star <- numeric(B) for( b in 1:B) { n.blocks <- floor(n/l) + 1 # choose an extra block if n/l is non-integer which.blocks <- sample(1:(n-l+1),n.blocks,replace=TRUE) X.star.mat <- X.blocks[which.blocks,] X.star <- as.vector(t(X.star.mat))[1:n] # truncate to length n X.bar.star <- mean(X.star) T.star[b] <- sqrt(n)*( X.bar.star - X.bar.MBB ) } # compute MC version of Var(T_n) in order to compare it # with the analytical one var.T.hat.MC <- var(T.star) var.T.hat.MC var.T.hat.analytic # get Monte-Carlo-approximated alpha/2 and 1-alpha/2 quantiles alpha <- 0.05 G.alphaby2 <- sort(T.star)[floor(B*(1-alpha/2))] G.1minusalphaby2 <- sort(T.star)[floor(B*(alpha/2))] # construct (1-alpha)*100% CI for the mean using the MBB # estimates of the quantiles; record whether the interval # contains the true mean. lo.ci <- X.bar - G.alphaby2 / sqrt(n) up.ci <- X.bar - G.1minusalphaby2 / sqrt(n) covered <- (lo.ci < 0) & (up.ci > 0) lo.ci up.ci covered ############################################################### # run simulation to get coverage probability: ############################################################### rm(list=ls()) # specify parameters: phi <- c(.3,.2) theta <- c(.4) n <- 300 sigma <- 2 alpha <- 0.05 S <- 100 B <- 1000 l <- floor(sqrt(n)) # use the bootstrap to build a (1-alpha)*100% CI for the mean # and to estimate Var(T_n) on S randomly generated datasets lo.ci <- up.ci <- var.T.hat.analytic <- var.T.hat.MC <- numeric() covered <- logical() for(s in 1:S) { # generate data; compute sample mean. X <- get.ARMA.data(phi,theta,sigma,n) X.bar <- mean(X) # build matrix with n- l + 1 rows such that each row is a block X.blocks <- matrix(NA,n-l+1,l) for( j in 1:(n-l+1)) { X.blocks[j,] <- X[j:(j+l-1)] } # compute the mean of the MBB-induced distribution X.bar.MBB <- mean(X.blocks) # compute MBB bootstrap estimator of var.T analytically accum <- 0 for( j in 1:(n-l+1) ) for(k in 1:l) for(m in 1:l) { accum <- accum + (X.blocks[j,k] - X.bar.MBB)*(X.blocks[j,m] - X.bar.MBB) } var.T.hat.analytic[s] <- accum / (l*(n-l+1)) # run Monte-Carlo simulation to get approximate quantiles of # distribution of T_n^* T.star <- numeric(B) for( b in 1:B) { n.blocks <- floor(n/l) + 1 # choose an extra block if n/l is non-integer which.blocks <- sample(1:(n-l+1),n.blocks,replace=TRUE) X.star.mat <- X.blocks[which.blocks,] X.star <- as.vector(t(X.star.mat))[1:n] # truncate to length n X.bar.star <- mean(X.star) T.star[b] <- sqrt(n)*( X.bar.star - X.bar.MBB ) } # compute MC version of Var(T_n) in order to compare it # with the analytical one var.T.hat.MC[s] <- var(T.star) # get Monte-Carlo-approximated alpha/2 and 1-alpha/2 quantiles G.alphaby2 <- sort(T.star)[floor(B*(1-alpha/2))] G.1minusalphaby2 <- sort(T.star)[floor(B*(alpha/2))] # construct (1-alpha)*100% CI for the mean using the MBB # estimates of the quantiles; record whether the interval # contains the true mean. lo.ci[s] <- X.bar - G.alphaby2 / sqrt(n) up.ci[s] <- X.bar - G.1minusalphaby2 / sqrt(n) covered[s] <- (lo.ci[s] < 0) & (up.ci[s] > 0) } # plot the confidence intervals and display the coverage proportion over the # simulated data sets par(mfrow=c(1,1)) plot(NA,main = paste("Moving-blocks bootstrap CI coverage (",mean(covered),")",sep=""), ylim=c(0,1), xlim = range(lo.ci, up.ci), xlab = "Confidence intervals", yaxt = "n", ylab= "") abline(v = 0,lty=3) for(s in 1:S) { lines(y = c(s/S,s/S),x = c(lo.ci[s], up.ci[s]),lty = 2 - covered[s]) } # compute the true value of Var(T_n) and check performance of # the MBB estimator gamma <- ARMAacvf(phi,theta,sigma,max.lag=l) var.T <- sum(gamma,gamma[-1]) var.T E.var.T.hat.analytic <- mean(var.T.hat.analytic) # plot histogram of the MBB estimates of Var(T_n) with vertical lines # at the true value and the mean of the estimates. This gives us a sense of # the bias. hist(var.T.hat.analytic) abline(v=var.T,lty=3) abline(v=E.var.T.hat.analytic,lty=2)