############################################
# STAT 740 R examples
# Tim Hanson
# U. South Carolina
# November 15, 2017
############################################

#######################################
# bootstrapped quantile CI
#######################################

library(boot)

np=function(x,indices){
 f=quantile(x[indices],0.9)
 names(f)=NULL
 return(f)
}

# simulate exp(1) data...
qexp(0.9) # truth
n=100; x=rexp(n)
q=quantile(x,0.9)
q # estimate

f=density(x,bw=bw.bcv(x),n=1,from=q,to=q)$y
se=sqrt(0.9*0.1/(n*f^2))
cat("large-sample 95% CI = (",q-1.96*se,",",q+1.96*se,") \n")

b=boot(x,np,2000)
print(b)
plot(b) # notice distribution is discrete!  normality OK?
boot.ci(b,type="perc")

#######################################
# Challenger data
# bootstrapped SEs and CIs
#######################################

library(boot)
library(logistf)

y=c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0)
t=c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81)
xy=cbind(t,y) # predictors & data

lr=function(xy,indices){
 p=length(xy[1,])
 f=logistf(xy[indices,p]~xy[indices,1:(p-1)],family="binomial")$coef
 names(f)=NULL
 return(f)
}

summary(logistf(y~t,family="binomial"))

b=boot(xy,lr,2000)
print(b)
plot(b,index=1)
plot(b,index=2)
boot.ci(b,index=1,type="perc")
boot.ci(b,index=2,type="perc")

#######################################
# testing independence in r x c table
#######################################

pt=function(n){
 mu=rowSums(n)%*%t(colSums(n))/sum(n) 
 sum((n-mu)^2/mu) # does not check one of mu is zero
}

pt.boot=function(n,M){
 pi=as.vector(rowSums(n)%*%t(colSums(n))/sum(n)^2)
 w=pt(n)
 s=0; m=0
 repeat{
  m=m+1
  s=s+ifelse(pt(matrix(rmultinom(1,sum(n),pi),dim(n)[1],dim(n)[2]))>=w,1,0)
 if(m==M){break}
 }
cat("Parametric bootstrapped p-value = ",s/M,"\n")
}

# STAT 205 example from Lecture 18
surgery=matrix(c(41,8,15,11),nrow=2)
colnames(surgery)=c("Real","Sham")
rownames(surgery)=c("Success","No success")
surgery
chisq.test(surgery) # w/ Yate's continuity correction
chisq.test(surgery,correct=F) # no correction

pt.boot(surgery,10000)

# STAT 205 example from Lecture 18
plover=matrix(c(21,17,5,19,38,6,26,12,9),nrow=3)
chisq.test(plover,correct=F)

pt.boot(plover,10000)

#######################################
# testing independence in normal data;
# parametric bootstrap sensitive to
# normality assumption
#######################################

it=function(x){ # x is data frame (rows are x_i)
 S=cov(x); n=dim(x)[1]; p=dim(x)[2]
 n*p*(log(sum(diag(S))/p)-log(det(S))/p)
}

pt.boot=function(x,M){
 mu=colMeans(x); n=dim(x)[1]; p=dim(x)[2]
 covsq=diag(p)*sqrt(sum(diag(cov(x)))/p)
 s=0; m=0; w=it(x)
 repeat{
  m=m+1
	xs=t(covsq%*%matrix(rnorm(n*p),nrow=p)+mu)
  s=s+ifelse(it(xs)>=w,1,0)
 if(m==M){break}
 }
cat("Parametric bootstrapped p-value = ",s/M,"\n")
}

# example with independence 
n=100; p=3
mu=c(1,2,3) # mean
m=diag(3) # covariance is identity
x=t(m%*%matrix(rnorm(n*p),nrow=p)+mu)
pairs(x)

pt.boot(x,2000)
cat("Asymptotic p-value = ",1-pchisq(it(x),0.5*(p-1)*(p+2)),"\n")

# example with dependence
n=100; p=3
mu=c(1,2,3) # mean, below is square root of matrix
m=t(chol(matrix(c(2,1,1,1,2,1,1,1,2),nrow=3)))
x=t(m%*%matrix(rnorm(n*p),nrow=p)+mu)
pairs(x)

pt.boot(x,2000)
cat("Asymptotic p-value = ",1-pchisq(it(x),0.5*(p-1)*(p+2)),"\n")