############################################
# STAT 740 R examples
# Tim Hanson
# U. South Carolina
# September 3, 2017
############################################

############################################
# generate multivariate normals
############################################

rmvn=function(n,mu,sigma){
 p=length(mu)
 t(mu+t(matrix(rnorm(p*n),nrow=n,ncol=p)%*%chol(sigma)))
}

s=rmvn(10000,c(5,3),matrix(c(2,1,1,2),2,2))
plot(s)
colMeans(s)
cov(s)

############################################
# generate Dirichlet
############################################

library(MCMCpack)
s=rdirichlet(1000,c(5,4,1))
plot(s[,1:2])
colMeans(s)
cov(s)

############################################
# accept-reject for multimodal density p. 50
############################################

f=Vectorize(function(x){exp(-0.5*x^2)*(sin(6*x)^2+3*cos(x)^2*sin(4*x)^2+1)})
x=seq(-4,4,length=1000)
plot(x,f(x),type="l")
coi=integrate(f,-5,5)
coi

M=max(f(x)/dnorm(x,0,1))
 
sim=function(n){
 y=1:n
 for(i in 1:n){
  u=-1; x=0
  repeat{
   u=runif(1); x=rnorm(1)
   if(u<=f(x)/(M*dnorm(x,0,1))) break
  } # end repeat
 y[i]=x
 } # i=1,...,n
 y
}

hist(sim(100000),freq=F,breaks=100,main="simulated r.v.")
lines(x,f(x)/coi$value)

############################################
# ARS to sample N(0,1)
# note second derivative is -1<0 for all x
# so log density is concave
############################################

library(numDeriv)
library(ars)
f=function(x){-0.5*x^2}
fp=function(x){jacobian(f,x)} # in general
fp=function(x){-x}
hist(ars(1000,f,fp),freq=F)
lines(x,dnorm(x))

############################################
# MH for multimodal density p. 50
############################################

f=Vectorize(function(x){exp(-0.5*x^2)*(sin(6*x)^2+3*cos(x)^2*sin(4*x)^2+1)})
x=seq(-4,4,length=1000)
plot(x,f(x),type="l")
coi=integrate(f,-5,5)
coi

sim=function(n){
 y=rep(0,n)
 for(j in 2:n){
  u=runif(1); x=rnorm(1)
  if(u<=f(x)*dnorm(y[j-1])/(f(y[j-1])*dnorm(x))){y[j]=x} 
  else{y[j]=y[j-1]}
 } # i=1,...,n
 y
}

hist(sim(100000),freq=F,breaks=100,main="simulated r.v.")
lines(x,f(x)/coi$value)

############################################
# sample mixture of two normals
############################################

n=10000
y=rbinom(n,1,0.3) # Bernoulli
x=y*rnorm(n,-2,1)+(1-y)*rnorm(n,2,1)
hist(x,freq=F,ylim=c(0,0.3),breaks=20)
x=seq(-6,6,length=1000)
lines(x,0.3*dnorm(x,-2,1)+0.7*dnorm(x,2,1))

############################################
# sample t(3)
############################################

n=10000
y=rchisq(n,3)
x=rnorm(n,0,sqrt(3/y))
hist(x,freq=F,ylim=c(0,0.4),xlim=c(-6,6),breaks=100)
x=seq(-6,6,length=1000)
lines(x,dt(x,3))

############################################
# frequentist properties of estimators
############################################

library(quantreg) # median regression
library(L1pack) # Laplace distribution
library(xtable) # make latex table for paper

# Properties of median regression (LAD)
# & normal errors regression
# for data with Laplace errors
# LAD is MLE for Laplace errors

m=matrix(0,3,4) # holds the frequentist properties
colnames(m)=c("Bias","SD","SD-Est","95% cov.")
rownames(m)=c("theta_1=1","theta_2=1","theta_3=1")
n=500  # sample size of each replicated data set
t=1000 # number of replicated data sets
theta=c(1,1,1) # true value!
for(i in 1:t){
 X=matrix(1,n,3)       # intercept is '1'
 X[,2]=rbinom(n,1,0.5) # binary predictor
 X[,3]=rnorm(n)        # N(0,1) predictor
 y=X%*%theta+rlaplace(n,0,1) # linear model
 s=summary(rq(y~X-1),covariance=T)$coef # LAD
 # s=summary(lm(y~X-1))$coef            # LS
 m[,1]=m[,1]+s[,1]   # sums to make mean of estimates
 m[,2]=m[,2]+s[,2]   # sums to make mean of sd's
 m[,3]=m[,3]+s[,1]^2 # used in sd of estimates
 l=s[,1]-1.96*s[,2]; u=s[,1]+1.96*s[,2]
 m[,4]=m[,4]+ifelse((l<theta)&(theta<u),1,0)
} # t replicated data sets of size n
m=m/t # mean of t estimates
m[,3]=sqrt(m[,3]-m[,1]^2) # std of t est.
m[,1]=m[,1]-theta # bias

xtable(m) # gives latex to create table

# show last data set
plot(X[,3],y,pch=(X[,2]+1))