################################
# Monte Carlo and MCMC Methods #
################################

####################################################
##### Example 1, Chapter 6a notes (Birth rate data)
####################################################

# Prior parameters:

alpha.pri <- 2; beta.pri <- 1

# Summary statistics from data:

sum.x1 <- 217; n1 <- 111
sum.x2 <- 66;  n2 <- 44

# Sampling from the posterior distribution:

theta1.post.draws <- rgamma(10000, shape = alpha.pri + sum.x1, rate = beta.pri + n1)
theta2.post.draws <- rgamma(10000, shape = alpha.pri + sum.x2, rate = beta.pri + n2)

# Approximating the posterior probability that theta1 > theta2:

mean(theta1.post.draws > theta2.post.draws)

# Examining the posterior distribution of the ratio theta1 / theta2:

my.ratio <- theta1.post.draws / theta2.post.draws

plot(density(my.ratio))  # plot of estimated posterior for theta1/theta2

quantile(my.ratio, probs=0.5)  # approximate posterior median

quantile(my.ratio, probs=c(0.025,0.975) )  # approximate 95% quantile-based interval for theta1/theta2

library(TeachingDemos)

emp.hpd(my.ratio, conf=0.95)  # approximate 95% HPD interval for theta1/theta2

####################################################
##### Example 2, Chapter 6a notes (Flu shot data)
####################################################

flu.data <- c(1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,0,0,1)
# data for the observed 19 individuals

my.y <- sum(flu.data)  # count of all "successes"

tot.draws <- 10000 # number of sampled values from each full conditional

# Initial values for quantities of interest:
X.20.init <- 1
theta.init <- 0.5

# Dummy Vectors that will hold values for samples of interest:
X.20.vec <- c(X.20.init, rep(NULL, times = tot.draws) )
theta.vec <- c(theta.init, rep(NULL, times = tot.draws) )

for (j in 2:(tot.draws+1) ) {
 theta.vec[j] <- rbeta(n=1, my.y + X.20.vec[j-1] + 1, 20 - my.y - X.20.vec[j-1] + 1)
 X.20.vec[j] <- rbinom(n=1, size=1, prob=theta.vec[j])
}

# Remove initial values:
theta.vec <- theta.vec[-1]
X.20.vec <- X.20.vec[-1]

# remove first 2000 sampled values as "burn-in":

theta.post <- theta.vec[-(1:2000)]
X.20.post <- X.20.vec[-(1:2000)]

## Posterior summary for theta:

plot(density(theta.post))  # plot of estimated posterior for theta

mean(theta.post)  # Posterior mean of the other 8000 sampled theta values

median(theta.post)  # Posterior median of the other 8000 sampled theta values

quantile(theta.post, probs=c(0.025,0.975) )  # approximate 95% quantile-based interval for theta

emp.hpd(theta.post, conf=0.95)  # approximate 95% HPD interval for theta

## NOTE:  The observed-data MLE for theta here is:
mean(flu.data)
# which is biased in this case because it lacks the missing value.

## Posterior summary for the missing data value X.20:

mean(X.20.post)  # Posterior mean for X.20

var(X.20.post) # Posterior variance for X.20


####################################################
##### Example 3, Chapter 6a notes (Coal mining data)
####################################################

# The data:
disasters <- c(4,5,4,1,0,4,3,4,0,6,3,3,4,0,2,6,3,3,5,
               4,5,3,1,4,4,1,5,5,3,4,2,5,2,2,3,4,2,1,
               3,2,2,1,1,1,1,3,0,0,1,0,1,1,0,0,3,1,0,
               3,2,2,0,1,1,1,0,1,0,1,0,0,0,2,1,0,0,0,
               1,1,0,2,3,3,1,1,2,1,1,1,1,2,4,2,0,0,0,
               1,4,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1)

my.alpha <- 4; my.beta <- 1  # hyperparameters for prior on lambda
my.gamma <- 1; my.delta <- 2 # hyperparameters for prior on phi


### R function to repeatedly sample values of theta = (lambda,phi,k) using Gibbs sampler:

bcp <- function(theta.matrix,y,a,b,g,d) {
 n <- length(y)
 k.prob <- rep(0,times=n)
 for (i in 2:nrow(theta.matrix)) {
  # Sampling values of lambda from the appropriate gamma distribution:
  lambda <- rgamma(1,a+sum(y[1:theta.matrix[(i-1),3]]),b+theta.matrix[(i-1),3])

  # Sampling values of phi from the appropriate gamma distribution:
  phi <- rgamma(1,g+sum(y[theta.matrix[(i-1),3]:n]),d+n-theta.matrix[(i-1),3])

 for (j in 1:n) {
  k.prob[j] <- exp(j*(phi-lambda))*(lambda/phi)^sum(y[1:j])
  }
 k.prob <- k.prob/sum(k.prob)
 k <- sample(1:n,size=1,prob=k.prob)
 theta.matrix[i,] <- c(lambda,phi,k)  # storing the set of parameters for this Gibbs iteration
 }
return(theta.matrix)
}

#### End of bcp function ###

tot.draws <- 2000 # number of sampled values from each full conditional

init.param.values <- c(4, 0.5, 55) # initial values for (lambda, phi, k)
# just used prior means as initial values...

init.theta.matrix <- matrix(0, nrow=tot.draws, ncol=3)

init.theta.matrix <- rbind(init.param.values,init.theta.matrix) 

my.Gibbs.samples <- bcp(init.theta.matrix, y=disasters, a=my.alpha, b=my.beta, g=my.gamma, d=my.delta)

# remove first 1000 sampled values as "burn-in":

my.post <- my.Gibbs.samples[-(1:1000),]

# Posterior summaries for lambda, phi, and k:

apply(my.post,2,median)  # Posterior medians for lambda, phi, and k:

cbind(apply(my.post, 2, quantile, probs=0.025),   apply(my.post, 2, quantile, probs=0.975) )
# approximate 0.025 and 0.975 quantiles for lambda, phi, and k
# to get approximate 95% quantile-based intervals

rbind( emp.hpd(my.post[,1], conf=0.95), 
emp.hpd(my.post[,2], conf=0.95), 
emp.hpd(my.post[,3], conf=0.95) ) # approximate 95% HPD intervals



####################################################
##### Example 5, Chapter 6a notes (Sparrow data)
####################################################

library(mvtnorm)
# May need to install the mvtnorm package first?
# If so, type at the command line:  install.packages("mvtnorm", dependencies=T)
# while plugged in to the internet.

sparrow.data <- read.table("http://www.stat.sc.edu/~hitchcock/sparrowdata.txt", header=T)

y <- sparrow.data[,1]
x <- sparrow.data[,2]; xsq <- x^2

X <- cbind(rep(1,times=length(x)), x, xsq)

p <- dim(X)[2]  # number of columns of matrix X

beta.prior.mean <- rep(0, times=p)
beta.prior.sd <- rep(10,times=p)

proposal.cov.matrix <- var(log(y+1/2))*solve(t(X)%*%X)  
# should be reasonable in this case:   should be close to (sigma^2)*(X'X)^{-1}

S <- 10000
beta.current <- rep(0,times=p) # initial value for M-H algorithm
acs <- 0 # will be to track "acceptance rate"

beta.values <- matrix(0,nrow=S,ncol=p)  # will store sampled values of beta vector

for (s in 1:S) {
 beta.proposed <- t(rmvnorm(1, beta.current, proposal.cov.matrix))

 log.accept.ratio <- sum(dpois(y,exp(X %*% beta.proposed), log=T)) - sum(dpois(y,exp(X %*% beta.current), log=T)) +
        sum(dnorm(beta.proposed, beta.prior.mean, beta.prior.sd, log=T)) - 
        sum(dnorm(beta.current, beta.prior.mean, beta.prior.sd, log=T) )

  if (log.accept.ratio > log(runif(1)) ) {
    beta.current <- beta.proposed
    acs <- acs + 1
  }

beta.values[s,] <- beta.current
}

acs/S  # gives the acceptance rate

acf(beta.values[,1])  # plot autocorrelation values for beta_0
acf(beta.values[,2])  # plot autocorrelation values for beta_1
acf(beta.values[,3])  # plot autocorrelation values for beta_2

# Seems to be an issue with serial dependence.

# Thinning out the sampled values by taking every 10th row:

beta.values.thin <- beta.values[10*(1:(S/10) ),]

# Looks much better...

### Posterior summary:

apply(beta.values.thin,2,median)  # Posterior medians for each regression coefficent

cbind(apply(beta.values.thin, 2, quantile, probs=0.025),   apply(beta.values.thin, 2, quantile, probs=0.975) )
# approximate 0.025 and 0.975 quantiles for each regression coefficent
# to get approximate 95% quantile-based intervals

rbind( emp.hpd(beta.values.thin[,1], conf=0.95), 
emp.hpd(beta.values.thin[,2], conf=0.95), 
emp.hpd(beta.values.thin[,3], conf=0.95) ) # approximate 95% HPD intervals


# Plots of posterior median for expected offspring for ages 1,2,3,4,5,6:

my.X <- cbind( rep(1,times=6), (1:6), (1:6)^2 )

y.hats <- exp( my.X %*% as.matrix( apply(beta.values.thin,2,median), ncol=1 ) )

plot( (1:6), y.hats, type='b', xlab='age', ylab='expected offspring')

# Trace plots for the sampled beta_0 and beta_1 values:

plot(beta.values.thin[,1], type='l')

plot(beta.values.thin[,2], type='l')