################################ # Bayesian Hypothesis Testing # ################################ coal.strikes.data <- c(9,8,13,23,15,23,13,6,13,15,10) n <- length(coal.strikes.data); sum.y <- sum(coal.strikes.data) #################### # A one-sided test # #################### # Posterior probability that theta is less than or equal to 0.05: pbeta(0.05, n, sum.y + 0.5) # By Monte Carlo approach: my.rand.betas <- rbeta(50000, n, sum.y + 0.5) mean(my.rand.betas <= 0.05) #################### # A two-sided test # #################### # Posterior probability that theta is between (0.097, 0.103): pbeta(0.103, n, sum.y + 0.5) - pbeta(0.097, n, sum.y + 0.5) ############################################### # Comparing Two Means: Bayes Factor Approach # ############################################### # Data for blood pressure reduction on 21 subjects # (10 took calcium supplements, 11 took placebo): calcium <- c(7,-4,18,17,-3,-5,1,10,11,-2) placebo <- c(-1,12,-1,-3,3,-5,5,2,-11,-1,-3) x1 <- calcium x2 <- placebo xbar.1 <- mean(x1); xbar.2 <- mean(x2); s2.1 <- var(x1); s2.2 <- var(x2); n1 <- length(x1); n2 <- length(x2); s2.p <- ((n1-1)*s2.1 + (n2-1)*s2.2)/(n1+n2-2) n.alt <- 1 / (1/n1 + 1/n2) # Usual two-sample t-statistic: t.star <- (xbar.1 - xbar.2) / sqrt(s2.p/n.alt) # prior mean and variance for Delta = |mu1 - mu2|/sigma: mean.Delta <- 0 var.Delta <- 1/9 ## "objective" prior information # mean.Delta <- 0.5 # var.Delta <- 1/9 ## stronger prior belief that the two population means differ my.pv <- sqrt(1 + n.alt * var.Delta) # scale parameter for noncentral-t my.ncp <- mean.Delta * sqrt(n.alt) / my.pv # noncentrality parameter for noncentral-t # Calculating Bayes Factor: B <- dt(t.star, df = n1+n2-2, ncp = 0) / ( dt(t.star/my.pv, df = n1+n2-2, ncp = my.ncp)/my.pv ) print(B) # Probability that H_0 is true, given equal prior probabilities for H_0 and H_a: 1 / (1 + (1/B)) ################################################# # Comparing Two Means: Gibbs Sampling Approach # ################################################# # Data: calcium <- c(7,-4,18,17,-3,-5,1,10,11,-2) placebo <- c(-1,12,-1,-3,3,-5,5,2,-11,-1,-3) x1 <- calcium x2 <- placebo xbar.1 <- mean(x1); xbar.2 <- mean(x2); s2.1 <- var(x1); s2.2 <- var(x2); n1 <- length(x1); n2 <- length(x2); ### S <- 5000 # number of Gibbs iterations # Prior parameters for mu: mean.mu <- 2 # expect slight overall mean reduction? var.mu <- 50 # chosen quite wide # Prior parameters for tau: mean.tau <- 0 # not favoring either group a priori var.tau <- 50 # chosen quite wide again # Prior parameters for sigma^2: nu1 <- 1 nu2 <- 20 ## Vectors to hold values of mu, tau, and sigma^2: mu.vec <- rep(0, times=S) tau.vec <- rep(0, times=S) sigma.sq.vec <- rep(0, times=S) ## Starting values for mu and tau: mu.vec[1] <- 2 tau.vec[1] <- 0 ## Starting Gibbs algorithm: for (s in 2:S){ # update sigma.sq: sigma.sq.vec[s] <- 1/rgamma(1,(nu1+n1+n2)/2,(nu1*nu2+sum((x1-mu.vec[s-1]-tau.vec[s-1])^2)+ sum((x2-mu.vec[s-1]+tau.vec[s-1])^2))/2) # update mu: var.mu.cond <- 1/(1/var.mu+(n1+n2)/sigma.sq.vec[s]) mean.mu.cond <- var.mu.cond*(mean.mu/var.mu+sum(x1-tau.vec[s-1])/sigma.sq.vec[s]+ sum(x2+tau.vec[s-1])/sigma.sq.vec[s]) mu.vec[s] <- rnorm(1,mean.mu.cond,sqrt(var.mu.cond)) # update tau: var.tau.cond <- 1/(1/var.tau+(n1+n2)/sigma.sq.vec[s]) mean.tau.cond <- var.tau.cond*(mean.tau/var.tau+sum(x1-mu.vec[s])/sigma.sq.vec[s] - sum(x2-mu.vec[s])/sigma.sq.vec[s]) tau.vec[s] <- rnorm(1,mean.tau.cond,sqrt(var.tau.cond)) } ## End Gibbs algorithm. # MCMC diagnostics for mu, tau, sigma^2 par(mfrow=c(3,1)) # set up 3-by-1 array of plots plot(sigma.sq.vec, type='l') plot(mu.vec, type='l') plot(tau.vec, type='l') #windows() # open new plotting window par(mfrow=c(3,1)) acf(sigma.sq.vec) acf(mu.vec) acf(tau.vec) par(mfrow=c(1,1)) # reset window # Plotting estimated posterior density for mu (after deleting first 100 values as burn-in): plot(density(mu.vec[-(1:100)])) windows() # open new plotting window # Plotting estimated posterior density for tau (after deleting first 100 values as burn-in): plot(density(tau.vec[-(1:100)])) # Posterior probability that mean for first group is higher than mean for second group: mean( tau.vec[-(1:100)] > 0) # Sampling from the posterior predictive distribution: sample.1.pred <- rnorm(4900, mean = (mu.vec[-(1:100)] + tau.vec[-(1:100)]), sd=sqrt(sigma.sq.vec[-(1:100)]) ) sample.2.pred <- rnorm(4900, mean = (mu.vec[-(1:100)] - tau.vec[-(1:100)]), sd=sqrt(sigma.sq.vec[-(1:100)]) ) # Posterior predictive probability that a random subject from the first group has a greater # blood pressure reduction than a random subject from the second group: mean(sample.1.pred > sample.2.pred) ################################## # BICs for the Calcium Example: # ################################## # Data: calcium <- c(7,-4,18,17,-3,-5,1,10,11,-2) placebo <- c(-1,12,-1,-3,3,-5,5,2,-11,-1,-3) my.x <- c(calcium,placebo) # combining both samples into a single vector # defining a grouping variable: my.group <- factor(c(rep(1,times=length(calcium)), rep(2,times=length(placebo))) ) # Model 1: Assuming equal means: M.1 <- lm(my.x ~ 1) # Model 2: Assuming different means: M.2 <- lm(my.x ~ my.group) # BICs (uses AIC code with k = ln(n) ): AIC(M.1, M.2, k= log(length(my.x)) ) # Model 1 is SLIGHTLY favored... # The sample size is MUCH TOO SMALL here to view the difference in BICs as # as an approximation to a Bayes Factor. ############################### # BICs in regression setting: # ############################### ### Example 1, Chapter 4 notes (Oxygen uptake data) y <- c(-0.87,-10.74,-3.27,-1.97,7.50,-7.25,17.05,4.96,10.40,11.05,0.26,2.51) # y = change in maximal oxygen uptake (positive number = improvement) x1 <- c(0,0,0,0,0,0,1,1,1,1,1,1) # 0 = running group, 1 = aerobics group x2 <- c(23,22,22,25,27,20,31,23,27,28,22,24) # ages of subjects x3 <- x1*x2 # cross-product term (interaction b/w group and age) X <- as.matrix( cbind(rep(1, length(x1)), x1, x2, x3) ) # Model 1: Assuming no useful predictors: M.1 <- lm(y ~ 1) # Model 2: Assuming only group as a useful predictor: M.2 <- lm(y ~ x1) # Model 3: Assuming only age as a useful predictor: M.3 <- lm(y ~ x2) # Model 4: Assuming age and group as useful predictors: M.4 <- lm(y ~ x1 + x2) # Model 5: Assuming age, group, and age-by-group interaction as useful predictors: M.5 <- lm(y ~ x1 + x2 + x3) # BICs (uses AIC code with k = ln(n) ): AIC(M.1, M.2, M.3, M.4, M.5, k= log(nrow(X)) ) # Model 4 is favored, with Model 5 second-best. #