# R example for Poisson regression  

# The data are from Table 14.14 of the book.  
# The number of customers were charted for 110 census tracts.  
# Some predictor variables were also measured for each tract.  


# Save the data file into a directory and 
# use the full path name:

miller.data <- read.table(file = "z:/My Documents/teaching/stat_704/millerlumberdata.txt", 
header=FALSE, col.names = c('customers', 'housing', 'income', 'age', 'compet.dist', 'store.dist'))

# attaching the data frame:

attach(miller.data)

income <- income/1000;
# Now income is more conveniently measured in thousands of dollars, not dollars  


############ FITTING A POISSON MODEL ###############

# glm() is a general R function for fitting generalized linear models.  
# Here we will specify the Poisson distribution and the (natural) log link.      

# This is a simple Poisson regression using number of  
# housing units as the only predictor variable:        

miller.reg <- glm(customers ~ housing, family = poisson)

# Note that the natural log link is the default for the Poisson family

# getting the summary regression output:

summary(miller.reg)

pearsonX2 <- sum(resid(miller.reg, type="pearson")^2); pearsonX2

# Note, based on the Deviance and Pearson X^2 statistics, the fit of this 
# model is not so good.                                                   

# the fitted values:

fitted(miller.reg)

############ PLOTTING THE FITTED POISSON REGRESSION FUNCTION: ############

plot(housing, customers); lines(sort(housing), sort(fitted(miller.reg)))

############ Diagnostics: ##############

# Getting the Pearson residuals:

residuals(miller.reg,type="pearson")

# Getting some influence measures:

influence.measures(miller.reg)

##### EXAMPLE OF PREDICTION WITH THE POISSON REGRESSION MODEL #####

# Including an extra observation with "housing" = 600 and 
# a missing value for "customers" and the other variables 

xh.value <- data.frame(housing = 600)

# Estimates for mu_i:

mu.i.hat <- predict(miller.reg, xh.value, se.fit=TRUE,type="response")

# This gives the point estimate for mu_i:

mu.i.hat$fit

# The 95% CI for mu_i is obtained by:

alpha <- 0.05
lin.pred.hat <- predict(miller.reg, xh.value, se.fit=TRUE, type="link")
lower95 <- exp(lin.pred.hat$fit - qnorm(1-alpha/2)*lin.pred.hat$se.fit)
upper95 <- exp(lin.pred.hat$fit + qnorm(1-alpha/2)*lin.pred.hat$se.fit)
print(paste(100*(1-alpha), "percent CI for mu_i:", lower95, upper95))

#####################################################################################

# This is a Poisson regression using five different predictor variables:

miller.reg.mult <- glm(customers ~ housing + income + age + compet.dist + store.dist, family = poisson)


# getting the summary regression output:

summary(miller.reg.mult)

pearsonX2 <- sum(resid(miller.reg.mult, type="pearson")^2); pearsonX2

# Note, based on the Deviance and Pearson X^2 statistics, the fit of this 
# model is MUCH better.