######################### # # # STAT 599 Spring 2013 # # # # Example R code # # # # Chapter 3 # # # ######################### ### Installing the add-on packages needed for this course: # If you haven't already done so, # type at the command line (without the preceding # ): # install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4')) # and choose a U.S. mirror site when the window pops up. # This will install the needed packages for you. # load the 'faraway' package: library(faraway) # loading the 'gala' dataset: data(gala) # attaching the 'gala' dataset: attach(gala) # Fitting a normal linear regression model: model.1 <- lm(Species ~ Area + Elevation + Nearest + Scruz + Adjacent, data=gala) # Residuals plotted against fitted values: plot(predict(model.1), residuals(model.1), xlab='Fitted', ylab='Residuals') # Fitting a normal linear regression model with a square-root transformed response: model.2 <- lm(sqrt(Species) ~ Area + Elevation + Nearest + Scruz + Adjacent, data=gala) # Residuals plotted against fitted values: plot(predict(model.2), residuals(model.2), xlab='Fitted', ylab='Residuals') ###### # Fitting a Poisson GLM: model.p <- glm(Species ~ Area + Elevation + Nearest + Scruz + Adjacent, family=poisson, data=gala) summary(model.p) # Investigating the variance function: # Plotting (y - mu-hat)^2 against mu-hat (on the scale of the response): plot(fitted(model.p), (Species-fitted(model.p))^2, xlab=expression(hat(mu)), ylab=expression((y-hat(mu))^2)) abline(0,1) # Taking logs will plot the (y - mu-hat)^2 against mu-hat on the scale of the linear predictor: plot(log(fitted(model.p)), log((Species-fitted(model.p))^2), xlab=expression(hat(mu)), ylab=expression((y-hat(mu))^2)) abline(0,1) # Estimating a dispersion parameter: my.disp <- sum(residuals(model.p, type='pearson')^2)/model.p$df.res print(my.disp) # Fitting the model with the overdispersion accounted for will change the standard errors: summary(model.p, dispersion=my.disp) # With the overdispersion in the model, need to use # F-test to test significance of individual predictors: drop1(model.p, test="F") # A couple of the predictors are not significant. # Should we fit a simpler model without them? model.p.simp1 <- glm(Species ~ Area + Elevation + Adjacent, family=poisson, data=gala) model.p.simp2 <- glm(Species ~ Area + Elevation + Scruz + Adjacent, family=poisson, data=gala) AIC(model.p.simp1, model.p.simp2, model.p) # Use your best judgment... ##################### # # 3.2 Rate Models # ##################### # loading the 'faraway' package: library(faraway) # loading the 'dicentric' data set: data(dicentric) # attaching the 'dicentric' data frame: attach(dicentric) # An interaction plot, treating rate=ca/cells as the response: with(dicentric,interaction.plot(doserate, doseamt, ca/cells)) # There appears to be some interaction. # A normal linear model, with rate=ca/cells as the response: # Note the '*' between the predictors will tell R # to include the interaction(s): norm.lmod <- lm(ca/cells ~ doserate*factor(doseamt), data=dicentric) summary(norm.lmod) # Log-transforming the doserate predictor: norm.lmod.2 <- lm(ca/cells ~ log(doserate)*factor(doseamt), data=dicentric) summary(norm.lmod.2) # The second model has a higher adjusted R2... # Checking residual diagnostics: plot(residuals(norm.lmod.2) ~ fitted(norm.lmod.2), xlab='Fitted', ylab='Residuals') abline(h=0) # Some definite nonconstant error variance... ### A rate model: dosef <- factor(doseamt) # Note the default log link for the Poisson GLM: pois.ratemod <- glm(ca ~ log(cells) + log(doserate)*dosef, family=poisson, data=dicentric) summary(pois.ratemod) # The book suggests setting the coefficient of # log(cells) to 1 with the offset function. # This allows us to model the rate=ca/cells # with a Poisson GLM with log link. pois.ratemod2 <- glm(ca ~ offset(log(cells)) + log(doserate)*dosef, family=poisson, data=dicentric) summary(pois.ratemod2) # The deviance indicates a good fit. drop1(pois.ratemod2, test="Chi") # The interaction is highly significant, so we stick with the # interaction model. # The plot of page 62 (top) indicates the effect of doseamt # may be quadratic, but this is difficult to determine # unless we collect data at more dose amounts. # Plot of deviance residuals against linear predictor values: plot(residuals(pois.ratemod2) ~ predict(pois.ratemod2, type='link'), xlab=expression(hat(eta)), ylab='Deviance residuals') # No apparent problem. ################################## # # 3.3 Negative Binomial Models # ################################## # loading the 'faraway' package: library(faraway) # loading the 'solder' data set: data(solder) # attaching the 'solder' data frame: attach(solder) # Trying a Poisson model: mod.pois <- glm(skips ~ Opening + Solder + Mask + PadType + Panel, family=poisson, data=solder) summary(mod.pois) # Not a good fit based on the deviance. # Trying a Poisson model with all two-way interactions included: mod.pois2 <- glm(skips ~ (Opening + Solder + Mask + PadType + Panel)^2, family=poisson, data=solder) summary(mod.pois2) # Still not a great fit. # Plotting estimated variance versus the mean: plot(fitted(mod.pois2), (skips-fitted(mod.pois2))^2, xlab=expression(hat(mu)), ylab=expression((y-hat(mu))^2)) abline(0,1) # Maybe some overdispersion? #### A negative binomial model (with k=1) library(MASS) # loading the 'MASS' package mod.neg.bin1 <- glm(skips ~ Opening + Solder + Mask + PadType + Panel, family=negative.binomial(1), data=solder) summary(mod.neg.bin1) #### A negative binomial model (with k=3) mod.neg.bin3 <- glm(skips ~ Opening + Solder + Mask + PadType + Panel, family=negative.binomial(3), data=solder) summary(mod.neg.bin3) # AIC is a little better... #### A negative binomial model (with k estimated via ML) mod.neg.bin <- glm.nb(skips ~ Opening + Solder + Mask + PadType + Panel, data=solder) summary(mod.neg.bin) # The AIC is good: drop1(mod.neg.bin, test="Chi") # Looks like none of the predictors should be dropped.