############################################
# STAT 740 R examples
# Tim Hanson
# U. South Carolina
# August 23, 2017
############################################

############################################
# Simple Bernoulli (binomial) data
# Newton-Raphson in one dimension
############################################

y=100; n=200
g=seq(0,1,length=1000)
ll=y*log(g)+(n-y)*log(1-g)
ll2=y/g-(n-y)/(1-g)
plot(g,ll,type="l",main="log-likelihood")
plot(g,ll2,type="l",main="log-likelihood derivative")

pi.old=0.1
it=0
repeat{
 pi.new=pi.old-(y/pi.old-(n-y)/(1-pi.old))/(-y^2/pi.old^2-(n-y)/(1-pi.old)^2)
 it=it+1; cat("iteration=",it," pi",pi.new,"\n")
 if(abs(pi.new-pi.old)<0.000001){break}
 pi.old=pi.new
}

############################################
# Data of Problem 1.13
# number of days until rat was diagnosed
# with cancer after being exposed to carcinogen
#
# fit gamma density to right-censored data
# E(x)=a*b & var(x)=a*b^2
############################################

library(numDeriv)

x=c(143,164,188,188,190,192,206,209,213,216,220,227,230,234,246,265,304,216,244)
c=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0)

ll=function(theta){
 a=theta[1]; b=theta[2]
 sum(c*dgamma(x,shape=a,scale=b,log=T)+(1-c)*pgamma(x,shape=a,scale=b,log.p=T,lower.tail=F))
}

# method-of-moments starting values
m=mean(x)
v=var(x)
a=m^2/v; b=v/m

# crude starting values look okay
hist(x,freq=F,ylim=c(0,0.02))
g=seq(130,320,length=1000)
y=dgamma(g,shape=a,scale=b)
lines(g,y,lty=1)

# quasi-Newton via R built-in function
theta.old=c(a,b)
optim(theta.old,ll,control=list(fnscale=-1))

# Newton-Raphson
it=0
repeat{
 theta.new=theta.old-solve(hessian(ll,theta.old))%*%t(jacobian(ll,theta.old))
 it=it+1; cat("iteration=",it," theta=",theta.new,"\n")
 if(max(abs(theta.new-theta.old))<0.000001){break}
 theta.old=theta.new
}

# standard errors from observed Fisher information
sqrt(diag(solve(-hessian(ll,theta.new))))

# gamma mean is g(a,b)=a*b
# use mult. delta method to get SE
a=theta.new[1]; b=theta.new[2]
est=a*b
se=sqrt(c(b,a)%*%solve(-hessian(ll,theta.new))%*%c(b,a))
cat("est. mean=",est,"days and 95% CI=(",est-1.96*se,",",est+1.96*se,").","\n")

############################################
# Data of Example 1.13
# failure=0, success=1, vs. temperature
# in Fahrenheit.
# logistic-regression model
############################################

# Challenger data
y=c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0)
t=c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81)
X=cbind(rep(1,23),t) # design matrix

# log-likelihood function
ll=function(theta){
 sum(X%*%theta*y-log(1+exp(X%*%theta)))
}

# crude l.s. starting values
theta.old=4*solve(t(X)%*%X)%*%t(X)%*%y

# Newton-Raphson
it=0
repeat{
 theta.new=theta.old-solve(hessian(ll,theta.old))%*%t(jacobian(ll,theta.old))
 it=it+1; cat("iteration=",it," theta=",theta.new,"\n")
 if(max(abs(theta.new-theta.old))<0.0000001){break}
 theta.old=theta.new
}

# standard errors
sqrt(diag(solve(-hessian(ll,theta.new))))

# built-in R function for logistic regression
summary(glm(y~t,family="binomial"))

############################################
# Ache data
############################################

kills=c(2,3,0,0,3,27,0,7,0,0,1)
days=c(73,7,13,4,104,126,63,88,7,62,4)

# Garnett says prior for mean kills over 10 days, 
# 10*lambda, is one and 90% interval is 0.25 to 4
# prior mean is one: want Gamma(a,a) over 10 days
# want P(lambda<4)=0.95
pgamma(4,.44,.44) # trial and error
x=seq(0,0.15,length=1000)
# prior
plot(x,dgamma(x,shape=0.44,rate=0.44*10),type="l",
lty=2,ylim=c(0,37),xlab=expression(lambda),
ylab=expression(paste(pi,"(",lambda,"|",bold("x"),")")))
# exact
lines(x,dgamma(x,shape=0.44+sum(kills),0.44*10+sum(days)))
post.mode=(0.44+sum(kills)-1)/(0.44*10+sum(days))
approx.sd=sqrt(post.mode^2/(0.44+sum(kills)-1))
lines(x,dnorm(x,post.mode,approx.sd),lty=3)
legend(0.09,36,legend=c("exact","prior","approx"),lty=1:3)

############################################
# Challenger data Bayesian approximation
############################################

# Challenger data
y=c(1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0)
t=c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81)
X=cbind(rep(1,23),t) # design matrix

# log-posterior function
m=matrix(c(168.75,-2.402,-2.402,0.03453),2,2)
m.inv=solve(m)
lp=function(theta){
 sum(X%*%theta*y-log(1+exp(X%*%theta)))-0.5*t(theta)%*%m.inv%*%theta
}

# crude l.s. starting values
theta.old=4*solve(t(X)%*%X)%*%t(X)%*%y

# Newton-Raphson
it=0
repeat{
 theta.new=theta.old-solve(hessian(lp,theta.old))%*%t(jacobian(lp,theta.old))
 it=it+1; cat("iteration=",it," theta=",theta.new,"\n")
 if(max(abs(theta.new-theta.old))<0.0000001){break}
 theta.old=theta.new
}

# posterior standard deviations (not standard errors!)
sqrt(diag(solve(-hessian(ll,theta.new))))

# use of optim
theta.old=4*solve(t(X)%*%X)%*%t(X)%*%y
f=optim(theta.old,lp,control=list(fnscale=-1),hessian=T,method="BFGS")
cov=solve(-f$hessian)
est=f$par

x=seq(35,85,length=1000); X=cbind(rep(1,1000),x)
p.est=exp(X%*%est)/(1+exp(X%*%est))
plot(x,p.est,type="l",xlab="temp",ylab="prob. failure")
points(t,y)

u=1:1000; l=u
for(i in 1:1000){
 u[i]=c(1,x[i])%*%est+1.96*sqrt(t(c(1,x[i]))%*%cov%*%c(1,x[i]))
 l[i]=c(1,x[i])%*%est-1.96*sqrt(t(c(1,x[i]))%*%cov%*%c(1,x[i]))
}
lines(x,exp(u)/(1+exp(u)),lty=2)
lines(x,exp(l)/(1+exp(l)),lty=2)