############################################ # STAT 740 R examples # Tim Hanson # U. South Carolina # November 15, 2017 ############################################ ############################################ # Testing V.A. data ############################################ library(MASS) # has VA data library(survival) # has survfit & survreg # censored data histogram chist=function(x,d,J){ a=min(x-(max(x)-min(x))/10000); b=max(x); J=10 delta=(b-a)/J t=seq(a,b,length=(J+1)) f=summary(survfit(Surv(x,d)~1),times=t) y=c(0,rep(f$surv[1:J]-f$surv[2:(J+1)],1,each=2),0)/delta x=rep(t,1,each=2) plot(x,y,type="l",col="blue",xlab="censored data", ylab="density",ylim=c(0,max(y*1.1))) } VAs=subset(VA,prior==0) attach(VAs) f=survreg(Surv(log(stime),status)~1,dist="gaussian") # the easy way! chist(stime,status,10) # histogram x=seq(0,600,length=1000) lines(x,dlnorm(x,f$coef,f$scale),col="red") # plot of log-normal fit summary(f) # MLEs of mu and sigma; compare to values you get via E-M # mean of log-normal is exp{mu+exp(2*log.sigma)/2} # Let's test E(T)=6 months... g=function(coef){ mu=coef[1]; tau=coef[2] exp(mu+exp(2*tau)/2) } dg=function(coef){ mu=coef[1]; tau=coef[2] c(exp(mu+exp(2*tau)/2),exp(mu+exp(2*tau)/2+2*tau)) } est=g(f$icoef) se=(sqrt(dg(f$icoef)%*%vcov(f)%*%dg(f$icoef)))[1,1] cat("95% CI for mean survival=(",est-1.96*se,",",est+1.96*se,") \n") # test H0: mean = 6 months W=((est-365/2)/se)^2 pchisq(W,1,lower.tail=F) ############################################ # AIC V.A. data, scale=days not log-days ############################################ AIC(survreg(Surv(stime,status)~1,dist="weib")) AIC(survreg(Surv(stime,status)~1,dist="lognormal")) AIC(survreg(Surv(stime,status)~1,dist="loglogistic")) AIC(survreg(Surv(stime,status)~1,dist="exp")) ############################################ # Ache hunting via JAGS ############################################ library(R2jags) d=read.table("http://people.stat.sc.edu/hansont/stat740/ache.txt",header=T) attach(d) # makes 'age', 'days', and 'monkeys' available to R n=length(age) id=1:47 m=monkeys; a=age; t=days f=glmer(monkeys~age+(1|id),family="poisson",offset=log(days),nAGQ=100) # starting values for JAGS start=list(list("beta"=c(0,0),"tau"=1,"u"=rep(0,n))) start=list(list("beta"=f@beta,"tau"=1/f@theta^2,"u"=f@u)) model=function(){ # model for JAGS for(i in 1:n){ m[i]~dpois(mu[i]) mu[i]<-t[i]*exp(beta[1]+beta[2]*a[i]+u[i]) u[i]~dnorm(0,tau) } for(i in 1:2){ beta[i]~dnorm(0,0.0001) } tau~dgamma(0.0001,0.0001) sigma<-1/sqrt(tau) AE<-exp(beta[2]) # age effect on mean } kills=list("m","a","t","n") # data for JAGS param=c("beta","sigma","AE") f=jags(data=kills,inits=start,parameters.to.save=param, n.chains=1,n.iter=101000,n.burnin=1000,model.file=model,n.thin=5) print(f) traceplot(f) effectiveSize(f) f2=as.mcmc(f) plot(f2,ask=T) # approximately normal? # test slope=0 <=> AE=1 # What is 95% CI for slope? AE? dnorm(0,0.049,0.026)/dnorm(0,0,1/sqrt(0.0001)) dnorm(1,1.050,0.027)/dlnorm(1,0,1/sqrt(0.0001)) # What is happening? Hint: flat prior is highly informative! # BFs are sensitive to prior assumptions! g=seq(-0.1,0.1,length=1000) plot(g,dnorm(g,0.049,0.026),type="l") lines(g,dnorm(g,0.0,1/sqrt(0.0001)),lty=2) lines(g,dnorm(g,0.0,0.1),lty=2) # would give BF=0.5 # more work needs to be done on "default" priors ############################################ # Blood flow via JAGS: two means ############################################ library(R2jags) normoxia=c(3.45,3.09,3.09,2.65,2.49,2.33,2.28,2.24,2.17,1.34) hypoxia=c(6.37,5.69,5.58,5.27,5.11,4.88,4.68,3.50) mbf=c(normoxia,hypoxia) group=factor(c(rep("normoxia",length(normoxia)),rep("hypoxia",length(hypoxia)))) grp=factor(c(rep(1,length(normoxia)),rep(2,length(hypoxia)))) boxplot(mbf~group) t.test(mbf~group) library(R2jags) # use default starting values # ll[i] below is for loo and waic functions # can instead define ll as a function of parameters # which uses M x k for storage instead of M x n model=function(){ # model for JAGS for(i in 1:length(mbf)){ mbf[i]~dnorm(mu[grp[i]],tau[grp[i]]) ll[i]<-log(dnorm(mbf[i],mu[grp[i]],tau[grp[i]])) } mu[2]<-mu[1]+delta mu[1]~ dnorm(0,0.0001) delta~ dnorm(0,0.0001) tau[1]~ dgamma(0.0001,0.0001) tau[2]~ dgamma(0.0001,0.0001) } dat=list("mbf","grp") # data for JAGS par=c("mu1","mu2","delta","tau1","tau2","ll") # default thinning leaves 1000 iterates! f=jags(data=dat,parameters.to.save=par, # inits=start, n.chains=1,n.iter=101000,n.burnin=1000,model.file=model,n.thin=1) # compute LPML and WAIC pars=f$BUGSoutput$sims.list loo(pars$ll) # first row is LPML waic(pars$ll) # BF for H0: delta=0 vs. Ha: not H0 dnorm(0,2.622,0.418)/dnorm(0,0,1/(sqrt(0.0001))) # BF for Ha vs H0 dnorm(0,0,1/(sqrt(0.0001)))/dnorm(0,2.622,0.418) # lower bound on BF from p-value -1/(exp(1)*7.307e-06*log(7.307e-06)) # Again, prior needs to be thought about a bit more ############################################ # Blood flow via JAGS: one mean ############################################ model=function(){ # model for JAGS for(i in 1:length(mbf)){ mbf[i]~dnorm(mu,tau[grp[i]]) ll[i]<-log(dnorm(mbf[i],mu,tau[grp[i]])) } mu~ dnorm(0,0.0001) tau[1]~ dgamma(0.0001,0.0001) tau[2]~ dgamma(0.0001,0.0001) } dat=list("mbf","grp") # data for JAGS par=c("mu","tau1","tau2","ll") # default thinning leaves 1000 iterates! f=jags(data=dat,parameters.to.save=par, # inits=start, n.chains=1,n.iter=101000,n.burnin=1000,model.file=model,n.thin=1) # compute LPML and WAIC pars=f$BUGSoutput$sims.list loo(pars$ll) # first row is LPML waic(pars$ll) # pseudo Bayes factor comparing two means to one exp(-23.1-(-31.0))