# STAT 824 sp 2023 Lec 11 in class


### Residual bootstrap for multiple linear regression

n <- 20
p <- 10
Sigma <- (1/2)^abs(outer(1:p,1:p,"-"))
chol_Sigma <- chol(Sigma)
beta <- c(1,2,-1/2,0,1,rep(0,p-5))
sigma <- 2
cc <- c(1,rep(0,p-1))
ccb_true <- t(cc) %*% beta
a <- 2; b <- 3;

B <- 500
S <- 100
cov_t <- cov_TnS <- logical(S)
for(s in 1:S){
  
  # generate data
  X <- matrix(rnorm(n*p),n,p) %*% chol_Sigma
  # error <- rnorm(n,0,sigma)
  # try drawing error terms from a centered gamma distribution:
  error <- (rgamma(n,shape = a, scale = b) - a*b) / sqrt(a*b^2) * sigma
  Y <- X %*% beta + error
  
  # compute beta_hat and sigma_hat
  XtX_inv_Xt <- solve(t(X)%*% X)%*% t(X)
  beta_hat <- XtX_inv_Xt %*% Y
  Y_hat <- as.numeric(X %*% beta_hat)
  e_hat <- Y - Y_hat
  sigma_hat <- sqrt(sum(e_hat^2)/(n-p))
  
  ccb_hat <- t(cc) %*% beta_hat
  Omega_hat <- solve( 1/n * t(X)%*% X)
  cc_Omega_hat_cc <- t(cc) %*% Omega_hat %*% cc
  se_hat <- sigma_hat / sqrt(n) * sqrt(cc_Omega_hat_cc)
  
  # build the t-based confidence interval
  lo_t <- ccb_hat - qt(.975,n-p) * se_hat
  up_t <- ccb_hat + qt(.975,n-p) * se_hat
  cov_t[s] <- (lo_t < ccb_true) & (up_t > ccb_true)
  
  TnS_boot <- numeric(B)
  for(b in 1:B){
  
    e_boot <- e_hat[sample(1:n,n,replace = TRUE)]
    Y_boot <- X %*% beta_hat + e_boot
    beta_hat_boot <- XtX_inv_Xt %*% Y_boot
  
    Y_hat_boot <- X %*% beta_hat_boot
    e_hat_boot <- Y_boot - Y_hat_boot
  
    sigma_hat_boot <- sqrt( sum(e_hat_boot^2)/(n-p))
    ccb_hat_boot <- t(cc) %*% beta_hat_boot
    TnS_boot[b] <- sqrt(n)*(ccb_hat_boot - ccb_hat) / (sigma_hat_boot*sqrt(cc_Omega_hat_cc))
  
  }
  
  TnS_boot <- sort(TnS_boot)
  
  lo_TnS <- ccb_hat - TnS_boot[ceiling(0.975*B)] * se_hat
  up_TnS <- ccb_hat - TnS_boot[ceiling(0.025*B)] * se_hat
  cov_TnS[s] <- (lo_TnS < ccb_true) & (up_TnS > ccb_true)
  
}

mean(cov_TnS)
mean(cov_t)


###  Wild bootstrap for multiple linear regression with non-constant variance

rm(list=ls())
n <- 30
r <- .7
R <- r^abs( outer(1:8,1:8,"-"))
P <- 2*sin( R * pi / 6)
X <- cbind(1,(pnorm( matrix(rnorm(n*8),ncol = 8) %*% chol(P)) - .5) * 5)
beta <- c(-1,c(4:1)*(-1)^(4:1),0,0,0,0)
sigma <- sqrt(1/4 + 2*abs(X[,2] + 2.5)^2)
error <- rnorm(n,0,sigma)
Y <- as.numeric(X %*% beta) + error

plot(Y~X[,4])
par(mfrow=c(2,4), mar = c(0,0,0,0))

for(j in 2:9){
plot( Y - X[,-j]%*% beta[-j] ~ X[,j],
      xaxt="n",
      yaxt="n")
  abline(a = 0, b = beta[j], col = "red")
}

# do the wild bootstrap
cc <- c(0,1,0,0,0,0,0,0,0)
ccb_true <- as.numeric(t(cc) %*% beta )
B <- 500
S <- 500
cov_asymp <-logical(S)
cov_wild <- logical(S)
for(s in 1:S){
  
  X <- cbind(1,(pnorm( matrix(rnorm(n*8),ncol = 8) %*% chol(P)) - .5) * 5)
  beta <- c(-1,c(4:1)*(-1)^(4:1),0,0,0,0)
  sigma <- sqrt(1/4 + 2*abs(X[,2] + 2.5)^2)
  error <- rnorm(n,0,sigma)
  Y <- as.numeric(X %*% beta) + error
  
  # compute beta_hat and sigma_hat
  XtX_inv_Xt <- solve(t(X)%*% X)%*% t(X)
  beta_hat <- XtX_inv_Xt %*% Y
  Y_hat <- as.numeric(X %*% beta_hat)
  e_hat <- Y - Y_hat

  ccb_hat <- t(cc) %*% beta_hat
  
  v.c <- as.numeric(t(cc) %*% XtX_inv_Xt %*% diag( e_hat^2 ) %*% t(XtX_inv_Xt) %*% cc)
  
  # build the N(0,1) confidence interval
  lo_asymp <- ccb_hat - 1.96 * sqrt(v.c)
  up_asymp <- ccb_hat + 1.96 * sqrt(v.c)
  cov_asymp[s] <- (lo_asymp < ccb_true) & (up_asymp > ccb_true)
   
  Tn_boot <- numeric(B)
  for(b in 1:B){
    
    U <- rbeta(n,shape1 = 1/2, shape2 = 3/2)
    e_boot <- e_hat * 4 *( U - 1/4)
    Y_boot <- X %*% beta_hat + e_boot
    beta_hat_boot <- XtX_inv_Xt %*% Y_boot

    Y_hat_boot <- X %*% beta_hat_boot
    e_hat_boot <- as.numeric(Y_boot - Y_hat_boot)

    v.c_boot <- as.numeric(t(cc) %*% XtX_inv_Xt %*% diag( e_hat_boot^2 ) %*% t(XtX_inv_Xt) %*% cc)
    
    ccb_hat_boot <- t(cc) %*% beta_hat_boot
    Tn_boot[b] <- (ccb_hat_boot - ccb_hat) / sqrt(v.c_boot)

  }

  Tn_boot <- sort(Tn_boot)

  lo_wild <- ccb_hat - Tn_boot[ceiling(0.975*B)] * sqrt(v.c)
  up_wild <- ccb_hat - Tn_boot[ceiling(0.025*B)] * sqrt(v.c)
  cov_wild[s] <- (lo_wild < ccb_true) & (up_wild > ccb_true)

}

mean(cov_wild)
mean(cov_asymp)


### Confidence intervals / bands in nonparametric regression
 
rm(list=ls())
m <- function(x){sin(3* pi * x / 2)/(1 + 18 * x^2*(sign(x) + 1))}
n <- 200
X <- runif(n,-1,1)
sigma <- (1.5 - X)^2/4
error <- rnorm(n,0,sigma)
Y <- m(X) + error

plot(Y~X,col="gray")
N <- 401
x <- seq(-1,1,length= N)
lines(m(x)~x, lty = 2)

# Nadaraya-W estimator

h <- .1
S.x <- dnorm(outer(x,X,FUN = "-"),0,h)
S.x <- S.x / apply(S.x,1,sum) 

m.hat.x <- S.x %*% Y
lines(m.hat.x ~ x)

# assume constant variance:

sigma.hat <- sqrt( mean(diff(Y[order(X)])^2)/2)
s.x <- sqrt(apply(S.x^2,1,sum))

lo.asymp.ptwise <- m.hat.x - 1.96 * sigma.hat*s.x 
up.asymp.ptwise <- m.hat.x + 1.96 * sigma.hat*s.x 

lines(lo.asymp.ptwise ~ x, col = "red")
lines(up.asymp.ptwise ~ x, col = "red")

# allow variance to be non-constant:

S <- dnorm(outer(X,X,FUN = "-"),0,h)
S <- S / apply(S,1,sum) 
m.hat.X <- S %*% Y
e.hat <- Y - m.hat.X

s.het.x <- sqrt(apply(S.x^2 %*% e.hat^2,1,sum) )

lo.asymp.het.ptwise <- m.hat.x - 1.96 * s.het.x 
up.asymp.het.ptwise <- m.hat.x + 1.96 * s.het.x

lines(lo.asymp.het.ptwise ~ x, col = "blue")
lines(up.asymp.het.ptwise ~ x, col = "blue")


# asymptotic confidence bands:

plot(Y~X,col="gray")
lines(m(x)~x, lty = 2)
# assuming constant variance
M.x <- S.x / s.x
D <- cbind(diag(N-1),0) - cbind(0,diag(N-1))
dM.x <- D %*% M.x
kappa0 <- sum(sqrt(apply(dM.x^2,1,sum)))
kappa0

find_c0 <- function(x,kappa0,alpha){
  
  val <- 2*(1 - pnorm(x)) + kappa0 / pi * exp( - x^2 / 2) - alpha
  return(val)
  
}
c0 <- uniroot(find_c0,kappa0 = kappa0,alpha = 0.05,lower = 1,upper = 5)$root
c0

lo.asymp.band <- m.hat.x - c0 * sigma.hat * s.x
up.asymp.band <- m.hat.x + c0 * sigma.hat * s.x

lines(lo.asymp.band ~ x)
lines(up.asymp.band ~ x)


# allow non-constant variance:

M.het.x <- S.x * abs(as.numeric(e.hat)) / s.het.x 
D <- cbind(diag(N-1),0) - cbind(0,diag(N-1))
dM.het.x <- D %*% M.het.x
kappa0 <- sum(sqrt(apply(dM.het.x^2,1,sum)))
kappa0

c0.het <- uniroot(find_c0,kappa0 = kappa0,alpha = 0.05,lower = 1,upper = 5)$root
c0.het

lo.asymp.het.band <- m.hat.x - c0.het * s.het.x
up.asymp.het.band <- m.hat.x + c0.het * s.het.x

lines(lo.asymp.het.band ~ x, col = "blue")
lines(up.asymp.het.band ~ x, col = "blue")


# bootstrap confidenc bands

B <- 1000
Tn <- numeric(B)
Tn.het <- numeric(B)
for(b in 1:B){
  
  # constant variance case:
  e.star <- e.hat[sample(1:n,n,replace=TRUE)]
  Y.star <- m.hat.X + e.star
  sigma.hat.star <- sqrt(mean(diff(Y.star[order(X)])^2)/2)
  Tn[b] <- max(abs(S.x %*% e.star / (sigma.hat.star * s.x)))
  
  # allowing non-constant variance (wild bootstrap):
  e.star <- e.hat * 4 * (rbeta(n,shape1=1/2, shape=3/2) - 1/4)
  Y.star <- m.hat.X + e.star
  m.hat.X.star <- S %*% Y.star
  e.hat.star <- Y.star - m.hat.X.star
  Tn.het[b] <- max(abs(S.x %*% e.star / sqrt(apply(S.x^2 %*% e.hat.star^2,1,sum)) ))
  
}

Tn <- sort(Tn)
Tn.het <- sort(Tn.het)


plot(Y~X,col="gray")
lines(m(x)~x, lty = 2)

# assuming constant variance
lo.boot.band <- m.hat.x - Tn[ceiling(0.95*B)] * sigma.hat * s.x
up.boot.band <- m.hat.x + Tn[ceiling(0.95*B)] * sigma.hat * s.x

lines(lo.boot.band ~ x)
lines(up.boot.band ~ x)

# allowing nonconstant variance
lo.boot.band <- m.hat.x - Tn.het[ceiling(0.95*B)] * s.het.x
up.boot.band <- m.hat.x + Tn.het[ceiling(0.95*B)] * s.het.x

lines(lo.boot.band ~ x, col = "blue")
lines(up.boot.band ~ x, col = "blue")