######################## # # # STAT 599 Spring 2013 # # # # Example R code # # # # Chapter 7 # # # ######################### ### Installing the add-on packages needed for this course: # If you haven't already done so, # type at the command line (without the preceding # ): # install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4')) # and choose a U.S. mirror site when the window pops up. # This will install the needed packages for you. # load the 'faraway' package: library(faraway) #################### ## 7.1 Gamma GLMs #################### ### Example 1: # loading the 'wafer' dataset: data(wafer) # attaching the 'wafer' dataset: attach(wafer) # Fitting a normal linear model # with a log transformation of the response, 'resist': lognorm.wafer <- lm (log(resist) ~ (x1+x2+x3+x4)^2, data=wafer) # The "^2" tells R to consider all first-order terms, plus all two-way interactions. # A stepwise approach to select predictor variables: step.lognorm.wafer <- step(lognorm.wafer) summary(step.lognorm.wafer) # Fitting the same model but with a gamma GLM with a log link: gamma.wafer <- glm(resist ~ (x1+x2+x3+x4)^2, family=Gamma(link=log), data=wafer) # "link=log" must be specified, since the canonical link # (the inverse link here) is the default. step.gamma.wafer <- step(gamma.wafer) summary(step.gamma.wafer) # The estimate of the dispersion given is based on the Pearson chi-square statistic: sum(residuals(step.gamma.wafer, type='pearson')^2)/8 #### Example 2: # loading the 'motorins' dataset: data(motorins) # attaching the 'motorins' dataset: attach(motorins) # Some initial subsetting: # Choosing only the data in Zone 1 for the analysis: motori <- motorins[motorins$Zone == 1,] # Fitting the same model but with a gamma GLM with a log link: gamma.motor <- glm(Payment ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = Gamma(link=log), motori) # "link=log" must be specified, since the canonical link # (the inverse link here) is the default. summary(gamma.motor) # Fitting a normal linear model # with a log transformation of the response # using the 'glm' function: lognorm.motor <- glm(log(Payment) ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = gaussian, motori) # 'family = gaussian' specifies a normal distribution for the response. # The default link for this is the identity link. # Note the log transformation has already been done. summary(lognorm.motor) # Some parameter estimates are different for the two models. Interpretations? #### Predictions: # Predicting the payment for an observation with Make="1", Kilometres=1, Bonus=1, Insured=100 : # Under the gamma model: x0 <- data.frame(Make="1", Kilometres=1, Bonus=1, Insured=100) predict(gamma.motor, new=x0, se=T, type="response") # Under the log-normal model: predict(lognorm.motor, new=x0, se=T, type="response") # For the log-normal model, these are in logged units. # To convert back to original units, use: exp(predict(lognorm.motor, new=x0, se=T, type="response")$fit) # with standard error: exp(predict(lognorm.motor, new=x0, se=T, type="response")$fit)*predict(lognorm.motor, new=x0, se=T, type="response")$se.fit ############################### ## 7.4 Quasi-Likelihood Methods ############################### # load the 'faraway' package: library(faraway) # loading the 'mammalsleep' dataset: data(mammalsleep) # attaching the 'mammalsleep' dataset: attach(mammalsleep) # Creating the proportion variable: mammalsleep$pdr <- with(mammalsleep, dream/sleep) dream.quasi.full <- glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) + predation + exposure + danger, family=quasibinomial, data=mammalsleep) # Cannot use the step' function because there is no AIC, but could do # backward elimination via a series of F-tests with the 'drop1' function: drop1(dream.quasi.full, test='F') drop1(glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) + exposure + danger, family=quasibinomial, data=mammalsleep), test='F') drop1(glm(pdr ~ log(body) + log(brain) + log(lifespan) + log(gestation) + danger, family=quasibinomial, data=mammalsleep), test='F') drop1(glm(pdr ~ log(body) + log(lifespan) + log(gestation) + danger, family=quasibinomial, data=mammalsleep), test='F') drop1(glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep), test='F') ## Final model: dream.quasi.final <- glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep) summary(dream.quasi.final) # How could we interpret the estimated coefficients? ### Some diagnostic plots: mammal.names <- row.names(na.omit(mammalsleep[,c(1,6,10,11)])) # picking out only the columns for the variables in the final model... # Plot of Cook's Distances: halfnorm(cooks.distance(dream.quasi.final), labs=mammal.names) # Plot of Pearson residuals against predicted values (on scale of linear predictor): plot(predict(dream.quasi.final), residuals(dream.quasi.final, type='pearson'), xlab='Linear Predictor', ylab='Pearson Residuals') ################################################## # ## Final model with 'Asian elephant' (case 5) deleted: dream.quasi.final.sub <- glm(pdr ~ log(body) + log(lifespan) + danger, family=quasibinomial, data=mammalsleep, subset=-5) summary(dream.quasi.final.sub) # Plot of Cook's Distances: halfnorm(cooks.distance(dream.quasi.final.sub), labs=mammal.names) # Plot of Pearson residuals against predicted values (on scale of linear predictor): plot(predict(dream.quasi.final.sub), residuals(dream.quasi.final.sub, type='pearson'), xlab='Linear Predictor', ylab='Pearson Residuals') # The substantive conclusions do not change much, although the deviance is somewhat lower.