/* SAS example for Nonlinear Regression */ /* We use the injured patients data in Table 13.1 of the book */ DATA injured; INPUT prognosis days; cards; 54 2 50 5 45 7 37 10 35 14 25 19 20 26 16 31 18 34 13 38 8 45 11 52 8 53 4 60 6 65 ; run; /* First, a simple scatter plot of the data to see the exponential trend: */ PROC SGPLOT DATA = injured; SCATTER y=prognosis x=days; run; /* We use Gauss-Newton method by specifying METHOD=GAUSS in PROC NLIN. */ /* Do NOT leave the METHOD option off, otherwise SAS will use a default */ /* method, which is not as good. */ /* The PARAMETERS statement defines which elements of the */ /* model statement are parameters and to be estimated. You */ /* can give them any valid SAS names (up to eight characters long, */ /* not starting with a number). */ /* We also specify some initial values for the estimates. */ PROC NLIN DATA = injured METHOD=GAUSS HOUGAARD; PARAMETERS gamma1=75 gamma2= -10; MODEL prognosis = gamma1*exp(gamma2*days); run; /* Seems to be a problem with convergence of estimates. */ /* Let's try setting a range of initial values: */ PROC NLIN DATA = injured METHOD=GAUSS HOUGAARD; PARAMETERS gamma1= 50 to 100 by 2 gamma2= -20 to 0 by 0.5; MODEL prognosis = gamma1*exp(gamma2*days); run; /* Now the output says "Convergence criterion met" with no warning message. */ /* We can probably trust these estimators. (In fact, they match those */ /* obtained in the book (equation 13.30, page 524). */ /* Estimate of gamma1 is 58.6065, estimate of gamma2 is -0.0396. */ /* Final minimized SSE is 49.4593. */ /*********************************************************/ /* Trying PROC NLIN with the MARQUARDT search method: */ PROC NLIN DATA = injured METHOD=MARQUARDT HOUGAARD; PARAMETERS gamma1= 50 to 100 by 2 gamma2= -20 to 0 by 0.5; MODEL prognosis = gamma1*exp(gamma2*days); run; /* This results in the same estimated parameter values, which is reassuring. */ /* The steepest-descent method is executed by specifying METHOD=GRADIENT */ /* but this does not appear to work well for this problem. */ /******* INFERENCE FOR PARAMETER ESTIMATES *********/ /* SAS gives 95% CIs for the various parameters in the model. */ /* Note that with 95% confidence, the true value of gamma1 is between */ /* 55.4 and 61.8. */ /* With 95% confidence, the true value of gamma2 is between */ /* -0.0433 and -0.0359. */ /* These intervals assume the normality of the parameter estimates. */ /* Hougaard's measure indicates whether this assumption is reasonable. */ /* The Hougaard measures for both estimates are near zero, so we are */ /* fairly safe. */ /**********************************************************************/ /* If we want to use the 3-parameter exponential model for these data, */ /* just add another parameter and change the model statement: */ PROC NLIN DATA = injured METHOD=MARQUARDT HOUGAARD; PARAMETERS gamma0= -10 to 20 by 2 gamma1= 50 to 100 by 2 gamma2= -20 to 0 by 0.5; MODEL prognosis = gamma0 + gamma1*exp(gamma2*days); run; /* The 95% CI for gamma0 includes 0, so we probably don't need to include */ /* this extra parameter. However, Hougaard's measure indicates that the */ /* CI for gamma0 may not be trustworthy */ /* (Hougaard's measure = -0.45 for gamma0). */ /**********************************************************/ /* Residual plots and plots of predicted values */ PROC NLIN DATA = injured METHOD=MARQUARDT HOUGAARD; PARAMETERS gamma1= 50 to 100 by 2 gamma2= -20 to 0 by 0.5; MODEL prognosis = gamma1*exp(gamma2*days); OUTPUT out=nlinout predicted=pred r=res; run; /* A residual plot vs. fitted values: */ symbol1 v=circle l=32 c = black; PROC SGPLOT DATA=nlinout; SCATTER y=res x=pred; REFLINE 0; run; PROC UNIVARIATE noprint ; QQPLOT RES / normal; RUN; /* Predicted values joined to give a picture of the fitted function: */ symbol1 v=circle l=32 c = black; symbol2 i = join v=star l=32 c = black; PROC GPLOT DATA=nlinout; PLOT prognosis*days pred*days/ OVERLAY; RUN; /* If you only have a few observations, this plot can look non-smooth. */ /* Here is a way to trick SAS into making a smooth-looking plot: */ data filler; do days = 0 to 70 by 0.5; predict=1; output; end; run; data tricky; set injured filler; run; PROC SORT data=tricky; BY days; run; PROC NLIN DATA = tricky METHOD=MARQUARDT HOUGAARD; PARAMETERS gamma1= 50 to 100 by 2 gamma2= -20 to 0 by 0.5; MODEL prognosis = gamma1*exp(gamma2*days); OUTPUT out=nlinout2 predicted=pred r=res; run; symbol1 v=circle l=32 c = black; symbol2 i = join l=32 c = black; PROC GPLOT DATA=nlinout2; PLOT prognosis*days pred*days/ OVERLAY; RUN;