############################################ # STAT 740 R examples # Tim Hanson # U. South Carolina # November 15, 2017 ############################################ ####################################### # bootstrapped quantile CI ####################################### library(boot) np=function(x,indices){ f=quantile(x[indices],0.9) names(f)=NULL return(f) } # simulate exp(1) data... qexp(0.9) # truth n=100; x=rexp(n) q=quantile(x,0.9) q # estimate f=density(x,bw=bw.bcv(x),n=1,from=q,to=q)$y se=sqrt(0.9*0.1/(n*f^2)) cat("large-sample 95% CI = (",q-1.96*se,",",q+1.96*se,") \n") b=boot(x,np,2000) print(b) plot(b) # notice distribution is discrete! normality OK? boot.ci(b,type="perc") ####################################### # Challenger data # bootstrapped SEs and CIs ####################################### library(boot) library(logistf) y=c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0) t=c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81) xy=cbind(t,y) # predictors & data lr=function(xy,indices){ p=length(xy[1,]) f=logistf(xy[indices,p]~xy[indices,1:(p-1)],family="binomial")$coef names(f)=NULL return(f) } summary(logistf(y~t,family="binomial")) b=boot(xy,lr,2000) print(b) plot(b,index=1) plot(b,index=2) boot.ci(b,index=1,type="perc") boot.ci(b,index=2,type="perc") ####################################### # testing independence in r x c table ####################################### pt=function(n){ mu=rowSums(n)%*%t(colSums(n))/sum(n) sum((n-mu)^2/mu) # does not check one of mu is zero } pt.boot=function(n,M){ pi=as.vector(rowSums(n)%*%t(colSums(n))/sum(n)^2) w=pt(n) s=0; m=0 repeat{ m=m+1 s=s+ifelse(pt(matrix(rmultinom(1,sum(n),pi),dim(n)[1],dim(n)[2]))>=w,1,0) if(m==M){break} } cat("Parametric bootstrapped p-value = ",s/M,"\n") } # STAT 205 example from Lecture 18 surgery=matrix(c(41,8,15,11),nrow=2) colnames(surgery)=c("Real","Sham") rownames(surgery)=c("Success","No success") surgery chisq.test(surgery) # w/ Yate's continuity correction chisq.test(surgery,correct=F) # no correction pt.boot(surgery,10000) # STAT 205 example from Lecture 18 plover=matrix(c(21,17,5,19,38,6,26,12,9),nrow=3) chisq.test(plover,correct=F) pt.boot(plover,10000) ####################################### # testing independence in normal data; # parametric bootstrap sensitive to # normality assumption ####################################### it=function(x){ # x is data frame (rows are x_i) S=cov(x); n=dim(x)[1]; p=dim(x)[2] n*p*(log(sum(diag(S))/p)-log(det(S))/p) } pt.boot=function(x,M){ mu=colMeans(x); n=dim(x)[1]; p=dim(x)[2] covsq=diag(p)*sqrt(sum(diag(cov(x)))/p) s=0; m=0; w=it(x) repeat{ m=m+1 xs=t(covsq%*%matrix(rnorm(n*p),nrow=p)+mu) s=s+ifelse(it(xs)>=w,1,0) if(m==M){break} } cat("Parametric bootstrapped p-value = ",s/M,"\n") } # example with independence n=100; p=3 mu=c(1,2,3) # mean m=diag(3) # covariance is identity x=t(m%*%matrix(rnorm(n*p),nrow=p)+mu) pairs(x) pt.boot(x,2000) cat("Asymptotic p-value = ",1-pchisq(it(x),0.5*(p-1)*(p+2)),"\n") # example with dependence n=100; p=3 mu=c(1,2,3) # mean, below is square root of matrix m=t(chol(matrix(c(2,1,1,1,2,1,1,1,2),nrow=3))) x=t(m%*%matrix(rnorm(n*p),nrow=p)+mu) pairs(x) pt.boot(x,2000) cat("Asymptotic p-value = ",1-pchisq(it(x),0.5*(p-1)*(p+2)),"\n")