########################
#                       #
# STAT 599 Spring 2013  #
#                       #
# Example R code        #
#                       #
# Chapter 7             #
#                       #
#########################

### Installing the add-on packages needed for this course:

# If you haven't already done so, 
# type at the command line (without the preceding # ):

# install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4'))

# and choose a U.S. mirror site when the window pops up.
# This will install the needed packages for you.

# load the 'faraway' package:

library(faraway)

####################
## 7.1 Gamma GLMs
####################

### Example 1:

# loading the 'wafer' dataset:

data(wafer)

# attaching the 'wafer' dataset:

attach(wafer)

# Fitting a normal linear model 
# with a log transformation of the response, 'resist':

lognorm.wafer <- lm (log(resist) ~ (x1+x2+x3+x4)^2, data=wafer)

# The "^2" tells R to consider all first-order terms, plus all two-way interactions.

# A stepwise approach to select predictor variables:

step.lognorm.wafer <- step(lognorm.wafer)

summary(step.lognorm.wafer)

# Fitting the same model but with a gamma GLM with a log link:

gamma.wafer <- glm(resist ~ (x1+x2+x3+x4)^2, family=Gamma(link=log), data=wafer)

# "link=log" must be specified, since the canonical link 
# (the inverse link here) is the default.

step.gamma.wafer <- step(gamma.wafer)

summary(step.gamma.wafer)

# The estimate of the dispersion given is based on the Pearson chi-square statistic:
sum(residuals(step.gamma.wafer, type='pearson')^2)/8


#### Example 2:


# loading the 'motorins' dataset:

data(motorins)

# attaching the 'motorins' dataset:

attach(motorins)

# Some initial subsetting:
# Choosing only the data in Zone 1 for the analysis:

motori <- motorins[motorins$Zone == 1,]

# Fitting the same model but with a gamma GLM with a log link:

gamma.motor <- glm(Payment ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = Gamma(link=log), motori)

# "link=log" must be specified, since the canonical link 
# (the inverse link here) is the default.

summary(gamma.motor)

# Fitting a normal linear model 
# with a log transformation of the response
# using the 'glm' function:

lognorm.motor <- glm(log(Payment) ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = gaussian, motori)

# 'family = gaussian' specifies a normal distribution for the response.
# The default link for this is the identity link.
# Note the log transformation has already been done.

summary(lognorm.motor)

# Some parameter estimates are different for the two models.  Interpretations?

#### Predictions:

# Predicting the payment for an observation with Make="1", Kilometres=1, Bonus=1, Insured=100 :

# Under the gamma model:

x0 <- data.frame(Make="1", Kilometres=1, Bonus=1, Insured=100)
predict(gamma.motor, new=x0, se=T, type="response")

# Under the log-normal model:

predict(lognorm.motor, new=x0, se=T, type="response")

# For the log-normal model, these are in logged units.  
# To convert back to original units, use:

exp(predict(lognorm.motor, new=x0, se=T, type="response")$fit)

# with standard error:

exp(predict(lognorm.motor, new=x0, se=T, type="response")$fit)*predict(lognorm.motor, new=x0, se=T, type="response")$se.fit

###############################
## 7.4 Quasi-Likelihood Methods
###############################


# load the 'faraway' package:

library(faraway)

# loading the 'mammalsleep' dataset:

data(mammalsleep)

# attaching the 'mammalsleep' dataset:

attach(mammalsleep)

# Creating the proportion variable:
mammalsleep$pdr <- with(mammalsleep, dream/sleep)

dream.quasi.full <- glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) + predation + exposure + danger, family=quasibinomial, data=mammalsleep)

# Cannot use the step' function because there is no AIC, but could do
# backward elimination via a series of F-tests with the 'drop1' function:

drop1(dream.quasi.full, test='F')
drop1(glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) +  exposure + danger, family=quasibinomial, data=mammalsleep), test='F')
drop1(glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) + danger, family=quasibinomial, data=mammalsleep), test='F')
drop1(glm(pdr ~ log(body) + log(lifespan) + log(gestation) + danger, family=quasibinomial, data=mammalsleep), test='F')
drop1(glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep), test='F')

## Final model:

dream.quasi.final <- glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep)

summary(dream.quasi.final)

# How could we interpret the estimated coefficients?

### Some diagnostic plots:

mammal.names <- row.names(na.omit(mammalsleep[,c(1,6,10,11)])) 
# picking out only the columns for the variables in the final model...

# Plot of Cook's Distances:
halfnorm(cooks.distance(dream.quasi.final), labs=mammal.names)

# Plot of Pearson residuals against predicted values (on scale of linear predictor):
plot(predict(dream.quasi.final), residuals(dream.quasi.final, type='pearson'), xlab='Linear Predictor', ylab='Pearson Residuals')

##################################################
#
## Final model with 'Asian elephant' (case 5) deleted:

dream.quasi.final.sub <- glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep, subset=-5)

summary(dream.quasi.final.sub)

# Plot of Cook's Distances:
halfnorm(cooks.distance(dream.quasi.final.sub), labs=mammal.names)

# Plot of Pearson residuals against predicted values (on scale of linear predictor):
plot(predict(dream.quasi.final.sub), residuals(dream.quasi.final.sub, type='pearson'), xlab='Linear Predictor', ylab='Pearson Residuals')

# The substantive conclusions do not change much, although the deviance is somewhat lower.