/* SAS example for Poisson regression */ /* The data are from Table 14.14 of the book. */ /* The number of customers were charted for 110 census tracts. */ /* Some predictor variables were also measured for each tract. */ DATA miller; INPUT customers housing income age compet_dist store_dist; income = income/1000; /* Now income is more conveniently measured in thousands of dollars, not dollars */ cards; 9 606 41393 3 3.04 6.32 6 641 23635 18 1.95 8.89 28 505 55475 27 6.54 2.05 11 866 64646 31 1.67 5.81 4 599 31972 7 0.72 8.11 4 520 41755 23 2.24 6.81 0 354 46014 26 0.77 9.27 14 483 34626 1 3.51 7.92 16 1034 85207 13 4.23 4.40 13 456 33021 32 3.07 6.03 9 19 39198 22 2.96 6.09 14 530 38794 5 2.77 6.08 5 337 30855 1 1.33 9.86 9 586 28852 7 2.98 8.64 9 1113 120065 9 3.58 5.26 7 525 32229 3 1.27 7.56 4 377 36828 15 1.92 8.91 26 1127 90302 26 5.83 1.74 32 877 51707 27 5.19 3.66 26 1007 89860 55 5.03 2.03 11 657 60513 32 4.38 8.30 12 302 42191 54 3.41 5.21 3 603 28736 41 0.34 8.29 15 556 49129 33 4.78 3.89 12 635 29308 42 2.53 6.17 9 386 26734 14 4.99 9.70 14 1011 57862 54 4.60 3.94 10 925 70030 36 4.58 8.66 22 898 46027 44 3.03 5.60 8 731 32202 43 5.15 9.67 3 584 32871 13 1.47 8.02 11 439 29564 18 3.67 5.10 2 153 46806 21 0.84 9.18 6 1069 59805 22 2.50 9.43 11 443 42555 53 2.62 5.75 10 392 36998 7 1.03 7.74 0 828 85664 4 1.30 9.66 15 159 21238 4 2.98 8.66 9 830 47972 40 2.28 9.26 16 234 33246 26 3.95 4.61 29 1004 45927 24 4.90 2.69 6 643 58315 8 0.78 6.26 26 741 69177 9 6.61 0.87 13 306 40886 27 4.53 2.68 0 180 44588 14 0.88 9.38 8 644 47347 35 2.94 7.69 8 109 31791 9 4.37 9.31 21 809 42740 17 4.10 4.75 12 722 59175 35 2.38 5.09 26 1006 48862 48 5.04 2.21 3 786 54678 20 3.59 8.52 7 1041 59835 40 1.68 7.59 5 524 51756 39 0.57 9.10 9 725 34817 18 1.88 7.96 13 482 29942 14 3.17 6.91 28 666 68684 25 5.78 2.55 10 450 64790 3 4.35 6.03 12 667 58535 25 2.78 5.59 6 921 42919 13 2.48 7.69 11 412 40722 32 2.47 9.43 12 526 42120 30 4.29 6.15 11 523 28647 43 2.69 7.54 9 1066 61464 40 1.15 8.25 8 1001 70136 29 2.58 9.67 9 669 34595 38 4.06 8.78 8 582 30878 58 1.91 6.86 6 872 39366 52 0.73 8.67 6 758 61563 31 3.08 8.33 15 782 38412 26 2.72 6.71 15 551 41045 2 3.62 7.45 12 201 23864 43 4.80 8.74 10 730 38647 9 0.67 7.92 8 738 58387 13 2.01 6.60 3 469 37242 40 1.42 8.37 10 898 38337 32 2.63 9.56 10 780 68201 5 4.12 6.69 15 622 41066 46 4.48 4.10 6 391 40873 19 1.67 6.90 9 531 54655 40 2.32 5.69 21 566 49826 1 3.06 4.03 13 410 29013 50 2.68 7.58 8 719 78082 31 2.70 4.89 6 684 57506 51 2.13 8.31 8 865 47118 46 2.17 9.06 21 1031 72373 48 6.27 1.75 7 862 67787 1 2.10 8.63 19 758 40305 15 3.95 5.58 13 1141 50026 45 2.79 6.18 24 1289 98701 8 5.87 2.73 7 674 58195 54 4.30 6.40 3 683 47991 57 1.54 9.52 8 650 63123 15 3.17 9.46 9 406 39051 29 3.11 9.62 18 966 114633 38 6.33 2.22 12 1103 55773 44 4.58 8.68 8 312 43393 41 2.25 6.43 16 787 61765 53 5.39 3.37 5 416 33348 48 1.48 7.66 8 528 44541 31 4.91 9.67 11 919 40795 8 2.97 7.79 12 482 55972 9 2.91 5.85 14 781 33140 30 1.42 5.71 17 120 19673 21 2.65 6.25 17 693 36190 6 4.70 9.54 6 348 25768 42 1.43 7.11 15 780 53974 47 4.21 6.41 10 752 71814 1 3.13 5.47 6 817 54429 47 1.90 9.90 4 268 34022 54 1.20 9.51 6 519 52850 43 2.92 8.62 ; run; /****** FITTING A POISSON MODEL **********/ /* PROC GENMOD is a general SAS procedure for fitting generalized linear models. */ /* Here we will specify the Poisson distribution and the (natural) log link. */ /* Fitting a Poisson regression model with a log (ln) link: */ /* This is a simple Poisson regression using number of */ /* housing units as the only predictor variable: */ /* The OBSTATS option to the MODEL statement produces various statistics */ /* for the individual observations: predicted response values, different */ /* types of residuals, and confidence limits for the predicted mean */ /* response values. */ PROC GENMOD data = miller; MODEL customers = housing / dist = poisson link = log OBSTATS ALPHA=0.05; OUTPUT OUT=NEW p=PRED; run; /* Note, based on the Deviance and Pearson X^2 statistics, the fit of this */ /* model is not so good. */ /********* PLOTTING THE FITTED POISSON REGRESSION FUNCTION: *********/ PROC SORT DATA=NEW; BY housing; symbol1 v=circle l=32 c = black; symbol2 i = join v=none l=32 c = black; PROC GPLOT DATA=NEW; PLOT customers*housing PRED*housing / OVERLAY; RUN; /**** EXAMPLE OF PREDICTION WITH THE POISSON REGRESSION MODEL ****/ /* Including an extra observation with "housing" = 600 and */ /* a missing value for "customers" and the other variables */ DATA Xvalues; INPUT customers housing income age compet_dist store_dist; cards; . 600 . . . . ; DATA miller; SET miller Xvalues; run; /* Running the Poisson regression again: */ PROC GENMOD data = miller; MODEL customers = housing / dist = poisson link = log OBSTATS ALPHA=0.05; run; /* We see on the last line of the output, the predicted mean number of */ /* customers when housing = 600 is 10.64 customers. The 95% CI for this */ /* is (10.02, 11.29). */ /**************************************************************************/ /* This is a Poisson regression using five different predictor variables: */ proc genmod data = miller; model customers = housing income age compet_dist store_dist / dist = poisson link = log OBSTATS; run; /* Note, based on the Deviance and Pearson X^2 statistics, the fit of this */ /* model is MUCH better. */