############################################################################ # COMPUTATIONAL STATISTICS # by Geof H. Givens and J. A. Hoeting # CHAPTER 7 EXAMPLES (Last update: 1/9/2013) ############################################################################ ### EXAMPLE 7.2 ESTIMATING A MIXTURE PARAMETER ############################################################################ ############################################################################ # y = observed data # n = number of iterations # x.val = contains mixture parameter values # f = posterior density # R = computes Metropolis-Hastings ratio # g = proposal density ############################################################################ # NOTES # In the following, the density of the prior is uniform over the # support for the proposal and hence not included in the computation of the # Metropolis-Hastings ratio. ############################################################################ ## Mixture data mixture.dat=read.table("http://people.stat.sc.edu/hansont/stat740/mixture.txt",header=T) y = mixture.dat$y ## HISTOGRAM OF DATA AND PLOT OF MIXTURE DISTRIBUTION (See figure 7.1) par(mfrow=c(1,1)) x=seq(5,14,by=.01) d=.7*dnorm(x,7,.5) + .3*dnorm(x,10,.5) hist(y,breaks=20,freq=FALSE,main="Histogram of mixture data \n See Fig 7.1 in Givens and Hoeting",ylab="Density") points(x,d,type="l") ## INITIAL VALUES n = 10000 x.val1 = NULL x.val2=NULL set.seed(0) ## FUNCTIONS f = function(x){prod(x*dnorm(y,7,0.5) + (1-x)*dnorm(y,10,0.5))} R = function(xt,x){f(x)*g(xt)/(f(xt)*g(x))} ## MAIN # BETA(1,1) PROPOSAL DENSITY g = function(x){dbeta(x,1,1)} x.val1[1] = rbeta(1,1,1) for(i in 1:n){ xt = x.val1[i] x = rbeta(1,1,1) p = min(R(xt,x),1) d = rbinom(1,1,p) x.val1[i+1] = x*d + xt*(1-d) } mean(x.val1[201:(n+1)]) par(mfrow=c(2,2)) plot(x.val1[201:(n+1)],ylim=c(0,1),type="l",ylab="delta",xlab="t") title("Sample path for Beta(1,1) Proposal Dist.") hist(x.val1[201:(n+1)],breaks=20,xlab="delta", main="Hist. for Beta(1,1) Proposal Dist.") # BETA(2,10) PROPOSAL DENSITY g = function(x){dbeta(x,2,10)} x.val2[1] = rbeta(1,2,10) for(i in 1:n){ xt = x.val2[i] x = rbeta(1,2,10) p = min(R(xt,x),1) d = rbinom(1,1,p) x.val2[i+1] = x*d + xt*(1-d) } mean(x.val2[201:(n+1)]) plot(x.val2[201:(n+1)],ylim=c(0,1),type="l",ylab="delta",xlab="t") title("Sample path for Beta(2,10) Proposal Dist.") hist(x.val2[201:(n+1)],breaks=20,xlab="delta", main="Hist. for Beta(2,10) Proposal Dist.") ############################################################################ ### EXAMPLE 7.3 ESTIMATING A MIXTURE PARAMETER (RANDOM WALK) ############################################################################ # # x = observed data # log.like = function to compute the log likelihood # p = mixture parameter values # u = logit of the mixture parameter values # R = Metropolis-Hastings ratio # num.its = number of iterations for the MCMC chain ############################################################################ ## NOTES # In the following, the density of the prior distribution is uniform over the # support for the proposal and hence not included in the computation of # the Metropolis-Hastings ratio. Also, since the proposal density is # symmetric about zero, the Metropolis-Hastings ratio can be # reduced to just the posterior densities and Jacobian. In # this example we leave the unreduced version of the ratio to show # implementation, but in subsequent implementations it may be reduced when # possible. ############################################################################ # UNIFORM(-1,1) WALK ## INITIAL VALUES #READ IN THE DATA mixture.dat = read.table(file.choose(),header=TRUE) x = mixture.dat$y #ESTABLISH INITIAL VALUES u=rep(0,num.its) u[1]= runif(1,-1,1) p=rep(0,num.its) p[1]=exp(u[1])/(1+exp(u[1])) set.seed(1) num.its = 10000 ## FUNCTIONS log.like<-function(p,x) { sum(log(p*dnorm(x,7,.5)+(1-p)*dnorm(x,10,.5))) } ## MAIN for (i in 1:(num.its-1)) { u[i+1]=u[i]+runif(1,-1,1) p[i+1]=exp(u[i+1])/(1+exp(u[i+1])) R=exp(log.like(p[i+1],x)-log.like(p[i],x))*exp(u[i])/exp(u[i+1]) if (R<1) if(rbinom(1,1,R)==0) {p[i+1]=p[i]; u[i+1]=u[i]} } mean(p[-burn.in]) par(mfrow=c(2,2)) plot(p,ylim=c(0,1),type="l",ylab="delta",xlab="t") hist(p,breaks=20,xlab="delta", main="Hist. for Unif(-1,1) Walk") # UNIFORM(-0.01,0.01) WALK ##INITIAL VALUES p2=rep(0,num.its) p2[1]=exp(u[1])/(1+exp(u[1])) ## MAIN for (i in 1:(num.its-1)) { u[i+1]=u[i]+runif(1,-.01,.01) p2[i+1]=exp(u[i+1])/(1+exp(u[i+1])) R= exp(log.like(p2[i+1],x)-log.like(p2[i],x))*exp(u[i])/exp(u[i+1]) if (R<1) if(rbinom(1,1,R)==0) {p2[i+1]=p2[i]; u[i+1]=u[i]} } mean(p2[-burn.in]) plot(p2,ylim=c(0,1),type="l",ylab="delta",xlab="t") hist(p2,breaks=20,xlab="delta", main="Hist. for Unif(-0.01,0.01) Walk") #Exercises: # 1. Recode example 7.3 using the likelihood instead of the log-likelihood. What changes in the code? # 2. Re-run the Uniform(-0.01,0.01) code using more iterations. What happens? # 3. Is either case sensitive to starting value? Why or why not? ############################################################################ ### EXAMPLE 7.6 FUR SEAL PUP CAPTURE-RECAPTURE STUDY (GIBBS SAMPLING) ############################################################################ # ci = number captured during each census # mi = number newly caught during each census # r = number of unique fur seal pups observed # I = number of census attempts # N = estimated population size # alpha = computes estimated capture probabilities for each census # num.its = number of iterations # burn.in = iterations to discard for burn-in ############################################################################ ## INITIAL VALUES ci = c(30,22,29,26,31,32,35) mi = c(30,8,17,7,9,8,5) r = sum(mi) I=7 alpha= matrix(0,num.its,I) N=rep(0,num.its) N[1]=sample(84:500,1) set.seed(4) num.its=100000 burn.in = 1:1000 #MAIN for (i in 2:num.its) { alpha[i,]=rbeta(I,ci+.5,N[i-1]-ci+.5) N[i]=rnbinom(1,r+1,1-prod(1-alpha[i,]))+r } # DISCARDING BURN-IN alpha.out = alpha[-burn.in,] N.out = N[-burn.in] ## OUTPUT mean(N.out) # POSTERIOR MEAN OF N #MANUAL COMPUTATION OF AN HPD INTERVAL #NOTE: this is not a sophisticated method to compute an HPD interval, but #it is adequate for a one-time computation of an HPD interval and a good learning tool. table(N.out) sum(N.out>=85 & N.out<=95)/length(N.out) sum(N.out>=85 & N.out<=94)/length(N.out) sum(N.out>=84 & N.out<=93)/length(N.out) sum(N.out>=84 & N.out<=94)/length(N.out) ## OUTPUT PLOTS plot((max(burn.in)+1):num.its,N.out,type="l", xlab="t",ylab="N",main="Sample path for N") boxplot(split(rowMeans(alpha.out),N.out),ylab="Mean Capture Probability", xlab="N",main="Split boxplot for seal pup example") hist(N.out,freq=FALSE,xlab="N", main="Estimated marginal posterior probabilities for N") #Exercise: #1. Write a function to compute the HPD interval for discrete data like the N in the seal example. ############################################################################ ### EXAMPLE 7.8 MIXTURE DISTRIBUTION, CONTINUED ############################################################################ #See EXAMPLE 7.2 to calculate these inputs: #x.val1 = delta values from independence chain with Beta(1,1) proposal density #x.val2 = delta values from independence chain with Beta(2,10) proposal density acf(x.val1) acf(x.val2) ############################################################################ ### EXAMPLE 7.10 FUR SEAL PUP CAPTURE-RECAPTURE STUDY, CONTINUED ############################################################################ # ci = number captured during each census # mi = number newly caught during each census # r = number of unique fur seal pups observed # I = number of census attempts # N.val = estimated population size # alpha.val = computes estimated capture probabilities for each census # u.val = contains log values of the parameters for estimating the capture probs # num.its = number of iterations # burn.in = iterations to discard for burn-in # f = computes posterior density, p(theta|alpha) # J = computes Jacobian # R = computes Metropolis-Hastings ratio ############################################################################ ############################################################################ ## NOTES # Since the proposal density is symmetric about zero, the # Metropolis-Hastings ratio is reduced to just the posterior # densities and Jacobian. ############################################################################ ## INITIAL VALUES ci = c(30,22,29,26,31,32,35) mi = c(30,8,17,7,9,8,5) r = sum(mi) I=7 set.seed(4) num.its=100000 burn.in = 1:1000 alpha.val= matrix(0,num.its,I) u.val = matrix(0,num.its,2) N.val=rep(0,num.its) N.val[1]=sample(84:500,1) ## FUNCTIONS new.log.target=function(exp.uv,p) { I*(lgamma(sum(exp.uv))-lgamma(exp.uv[1])-lgamma(exp.uv[2])) + exp.uv[1]*sum(log(p))+exp.uv[2]*sum(log(1-p))-sum(exp.uv)/1000+ log(exp.uv[1])+log(exp.uv[2]) } R = function(new,old,alphas) { exp(new.log.target(exp(new),alphas)-new.log.target(exp(old),alphas)) } ## MAIN for (i in 2:num.its) { alpha.val[i,]=rbeta(I,ci+exp(u.val[i-1,1]),N.val[i-1]-ci+exp(u.val[i-1,2])) N.val[i]=rnbinom(1,r,1-prod(1-alpha.val[i,])) + r u.star=u.val[i-1,]+rnorm(2,0,.085) p=min(R(u.star,u.val[i-1,],alpha.val[i,]),1) d = rbinom(1,1,p) u.val[i,] = u.star*d + u.val[i-1,]*(1-d) } # DISCARDING BURN-IN N.out = N.val[-burn.in] u.out = u.val[-burn.in,] alpha.out = alpha.val[-burn.in,] ## OUTPUT mean(N.out) # POSTERIOR MEAN OF N #MANUAL COMPUTATION OF AN HPD INTERVAL #NOTE: this is not a sophisticated method to compute an HPD interval, but #it is adequate for a one-time computation of an HPD interval and a good learning tool. table(N.out) sum(N.out>=85 & N.out<=95)/length(N.out) sum(N.out>=85 & N.out<=94)/length(N.out) sum(N.out>=84 & N.out<=93)/length(N.out) sum(N.out>=84 & N.out<=94)/length(N.out) ## OUTPUT PLOTS #Plots like Figures 7.10 acf(alpha.out[,1]) a=(dim(u.out)[1]-999):dim(u.out)[1] plot(u.out[a,2],u.out[a,1],type="l") #These plots aren't in the book, but are similar to the plots in Example 7.6 plot((max(burn.in)+1):num.its,N.out,type="l", xlab="t",ylab="N",main="Sample path for N") boxplot(split(rowMeans(alpha.out),N.out),ylab="Mean Capture Probability", xlab="N",main="Split boxplot for seal pup example") hist(N.out,freq=FALSE,xlab="N", main="Estimated marginal posterior probabilities for N") ############################################################################ ### END OF FILE