# R example for Poisson regression # The data are from Table 14.14 of the book. # The number of customers were charted for 110 census tracts. # Some predictor variables were also measured for each tract. # Save the data file into a directory and # use the full path name: miller.data <- read.table(file = "z:/My Documents/teaching/stat_704/millerlumberdata.txt", header=FALSE, col.names = c('customers', 'housing', 'income', 'age', 'compet.dist', 'store.dist')) # attaching the data frame: attach(miller.data) income <- income/1000; # Now income is more conveniently measured in thousands of dollars, not dollars ############ FITTING A POISSON MODEL ############### # glm() is a general R function for fitting generalized linear models. # Here we will specify the Poisson distribution and the (natural) log link. # This is a simple Poisson regression using number of # housing units as the only predictor variable: miller.reg <- glm(customers ~ housing, family = poisson) # Note that the natural log link is the default for the Poisson family # getting the summary regression output: summary(miller.reg) pearsonX2 <- sum(resid(miller.reg, type="pearson")^2); pearsonX2 # Note, based on the Deviance and Pearson X^2 statistics, the fit of this # model is not so good. # the fitted values: fitted(miller.reg) ############ PLOTTING THE FITTED POISSON REGRESSION FUNCTION: ############ plot(housing, customers); lines(sort(housing), sort(fitted(miller.reg))) ############ Diagnostics: ############## # Getting the Pearson residuals: residuals(miller.reg,type="pearson") # Getting some influence measures: influence.measures(miller.reg) ##### EXAMPLE OF PREDICTION WITH THE POISSON REGRESSION MODEL ##### # Including an extra observation with "housing" = 600 and # a missing value for "customers" and the other variables xh.value <- data.frame(housing = 600) # Estimates for mu_i: mu.i.hat <- predict(miller.reg, xh.value, se.fit=TRUE,type="response") # This gives the point estimate for mu_i: mu.i.hat$fit # The 95% CI for mu_i is obtained by: alpha <- 0.05 lin.pred.hat <- predict(miller.reg, xh.value, se.fit=TRUE, type="link") lower95 <- exp(lin.pred.hat$fit - qnorm(1-alpha/2)*lin.pred.hat$se.fit) upper95 <- exp(lin.pred.hat$fit + qnorm(1-alpha/2)*lin.pred.hat$se.fit) print(paste(100*(1-alpha), "percent CI for mu_i:", lower95, upper95)) ##################################################################################### # This is a Poisson regression using five different predictor variables: miller.reg.mult <- glm(customers ~ housing + income + age + compet.dist + store.dist, family = poisson) # getting the summary regression output: summary(miller.reg.mult) pearsonX2 <- sum(resid(miller.reg.mult, type="pearson")^2); pearsonX2 # Note, based on the Deviance and Pearson X^2 statistics, the fit of this # model is MUCH better.