#########################
#                       #
# STAT 599 Spring 2013  #
#                       #
# Example R code        #
#                       #
# Chapter 5             #
#                       #
#########################

### Installing the add-on packages needed for this course:

# If you haven't already done so, 
# type at the command line (without the preceding # ):

# install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4'))

# and choose a U.S. mirror site when the window pops up.
# This will install the needed packages for you.

# load the 'faraway' package:

library(faraway)

# loading the 'nes96' dataset:

data(nes96)

# attaching the 'nes96' dataset:

attach(nes96)

# defining a 3-category party variable:

sPID <- nes96$PID
levels(sPID) <- c("Democrat","Democrat", "Independent", "Independent", "Independent", "Republican", "Republican")

# creating a numeric income variable (see p. 98 for details)

inca <- c(1.5,4,6,8,9.5,10.5,11.5,12.5,13.5,14.5,16,18.5,21,23.5,27.5,32.5,37.5,42.5,47.5,55,67.5,82.5,97.5,115)
nincome <- inca[unclass(nes96$income)]


######################################
##
#  Section 5.1 Multinomial Logit Model
##
######################################

# loading the 'nnet' package:

library(nnet)

# Fitting a multinomial logit model:

multin.nes96 <- multinom(sPID ~ age + educ + nincome, data=nes96)

# A stepwise procedure to help decide the set of predictors:

step(multin.nes96)

# Model with only age and nincome:

multin.nes96.2 <- multinom(sPID ~ age + nincome, data=nes96)


# Deviance test indicates we should drop 'educ' from the model:

pchisq(deviance(multin.nes96.2) - deviance(multin.nes96), df=multin.nes96$edf-multin.nes96.2$edf, lower=F)

# Model with only nincome:

multin.nes96.3 <- multinom(sPID ~ nincome, data=nes96)

# Deviance test indicates we should drop 'age' from the model:

pchisq(deviance(multin.nes96.3) - deviance(multin.nes96.2), df=multin.nes96.2$edf-multin.nes96.3$edf, lower=F)


## Predicting party affiliation corresponding to a set of income values:

inc.vals <- c(8,26,42,58,74,90,107)

predict(multin.nes96.3, data.frame(nincome=inc.vals), type='probs')

# Just giving the most probable category:

predict(multin.nes96.3, data.frame(nincome=inc.vals))

# Examining estimated regression coefficients:

summary(multin.nes96.3)

######

### Section 5.2: Hierarchical (Nested) Models:

# loading and attaching the 'cns' data set:

data(cns)
attach(cns)

# Combining the 'Type of CNS' categories into one overall CNS category:

CNS <- An + Sp + Other

# A plot of empirical logits against water hardness, separate by type of worker:

plot(log(CNS/NoCNS) ~ Water, data=cns, pch=as.character(Work))

# Fitting the initial binomial model:

# Choosing whether to include 'Water' and 'Work' as predictors (simpler model)
# or 'Area ' and 'Work' as predictors (more complicated model):

bin.mod.w <- glm(cbind(CNS, NoCNS) ~ Water + Work, data=cns, family="binomial")
bin.mod.a <- glm(cbind(CNS, NoCNS) ~ Area + Work, data=cns, family="binomial")

anova(bin.mod.w, bin.mod.a, test="Chi")

# Comparing deviances indicates the simpler model is reasonable.

# Checking residuals:

halfnorm(residuals(bin.mod.w))

# Getting the estimated coefficients:

summary(bin.mod.w)

#### Second-stage multinomial model for CNS type, conditional on the fact that CNS has occurred:

library(nnet)

cond.multin.model <- multinom(cbind(An, Sp, Other) ~ Water + Work, data=cns)
summary(cond.multin.model)

# A stepwise procedure for variable selection:
step(cond.multin.model)

# Or simply using AICs to compare models:

cond.multin.model.simp1 <- multinom(cbind(An, Sp, Other) ~ Water, data=cns)
cond.multin.model.simp2 <- multinom(cbind(An, Sp, Other) ~ Work, data=cns)
cond.multin.model.null <- multinom(cbind(An, Sp, Other) ~ 1, data=cns)

AIC(cond.multin.model, cond.multin.model.simp1, cond.multin.model.simp2, cond.multin.model.null)

# The AIC is best for the null model ... use your best judgment.

# Deviance test to compare the null model to the model with both predictors:

pchisq(deviance(cond.multin.model.null)-deviance(cond.multin.model), df=cond.multin.model$edf-cond.multin.model.null$edf, lower=F)

# The simpler (null) model seems fine here.

#######################################
#
#### Section 5.3: Models for Ordinal Multinomial Responses



# load the 'faraway' package:

library(faraway)

# loading the 'nes96' dataset:

data(nes96)

# attaching the 'nes96' dataset:

attach(nes96)

# defining a 3-category party variable:

sPID <- nes96$PID
levels(sPID) <- c("Democrat","Democrat", "Independent", "Independent", "Independent", "Republican", "Republican")

# creating a numeric income variable (see p. 98 for details)

inca <- c(1.5,4,6,8,9.5,10.5,11.5,12.5,13.5,14.5,16,18.5,21,23.5,27.5,32.5,37.5,42.5,47.5,55,67.5,82.5,97.5,115)
nincome <- inca[unclass(nes96$income)]

# loading the 'MASS' package:

library(MASS)

# Proportional Odds Model with all three predictors:

pomod <- polr(sPID ~ age + educ + nincome, data=nes96)

# Proportional Odds Model with only income as predictor:

pomod.i <- polr(sPID ~ nincome, data=nes96)

# Deviance test to compare these models:

pchisq(deviance(pomod.i) -deviance(pomod), df=pomod$edf-pomod.i$edf, lower=F)

# The AICs also support the simpler model:

AIC(pomod.i, pomod)

# Checking the proportional odds assumption:

pim <- prop.table(table(nincome,sPID),1)
print(pim)

# The log of the odds proportions are simply differences in logits (log-odds) for categories 1 and 2:

plot(logit(pim[,1]) - logit(pim[,1] + pim[,2]), type='b',ylab="logit differences", xlab="Income levels")

# This plot should be roughly constant (no trend).

# Looking at the estimated coefficients:

summary(pomod.i)

# Getting the estimates of the theta_j's:

ilogit(pomod.i[[2]][1])  # Gets estimate of theta_1 (Democrat intercept)

ilogit(pomod.i[[2]][2]) - ilogit(pomod.i[[2]][1])  # Gets estimate of theta_2 (Independent intercept)


## Predicting party affiliation corresponding to a set of income values:

inc.vals <- c(8,26,42,58,74,90,107)

predict(pomod.i, data.frame(nincome=inc.vals, row.names=inc.vals), type='probs')


### Ordered Probit Model with only income as predictor:

opmod.i <- polr(sPID ~ nincome, data=nes96, method="probit")
summary(opmod.i)

predict(opmod.i, data.frame(nincome=inc.vals, row.names=inc.vals), type='probs')

# A proportional hazards model would be fit using the 'cloglog' method:

phmod.i <- polr(sPID ~ nincome, data=nes96, method="cloglog")
summary(phmod.i)