* SAS example for Weighted Least Squares ; * Blood pressure data in Table 11.1 ; data bloodp; input age dbp @@; datalines; 27 73 21 66 22 63 24 75 25 71 23 70 20 65 20 70 29 79 24 72 25 68 28 67 26 79 38 91 32 76 33 69 31 66 34 73 37 78 38 87 33 76 35 79 30 73 31 80 37 68 39 75 46 89 49 101 40 70 42 72 43 80 46 83 43 75 44 71 46 80 47 96 45 92 49 80 48 70 40 90 42 85 55 76 54 71 57 99 52 86 53 79 56 92 52 85 50 71 59 90 50 91 52 100 58 80 57 109 ; run; * Regress the response, dbp, against the predictor, age ; * The plots show definite nonconstant error variance ; ods graphics on; proc reg data=bloodp; model dbp=age; output out=temp r=residual; *plot dbp*age r.*age; *I left this in since the use of r. is interesting; run; ods graphics; * Plot of absolute residuals against age shows that absolute residuals increase approximately linearly; data temp; set temp; absr = abs(residual); rsq=residual**2; run; proc sgplot data=temp; scatter x=age y=absr/markerattrs=(color=blue symbol=Diamond); loess x=age y=absr/nomarkers lineattrs=(color=blue); xaxis label="Age"; yaxis label="Absolute Residuals"; run; proc sgplot data=temp; scatter x=age y=rsq/markerattrs=(color=blue symbol=Diamond); loess x=age y=rsq/nomarkers lineattrs=(color=blue); xaxis label="Age"; yaxis label="Absolute Residuals"; run; * Regress absolute residuals against the age ; * This second regression is done on the data set temp ; proc reg data=temp; model absr=age; output out=temp1 p=s_hat ; run; * Define weights using the fitted values from this second regression ; data temp1; set temp1; w=1/(s_hat**2); run; * Using the WEIGHT option in PROC REG to get the WLS estimates ; * This last regression is done on the data set temp1 ; proc reg data=temp1; weight w; model dbp=age / clb; output out=temp2 r=residual; run; * Model fit directly using PROC NLMIXED ; * Starting values obtained from regressions 1 and 2 ; proc nlmixed data=bloodp; parms beta0=50 beta1=0.5 tau0=0 tau1=0.2; mu=beta0+beta1*age; sigma=exp(tau0+tau1*age); model dbp ~ normal(mu,sigma*sigma); run; ******************************* * Body fat data from Chapter 7 *******************************; data body; input triceps thigh midarm bodyfat @@; cards; 19.5 43.1 29.1 11.9 24.7 49.8 28.2 22.8 30.7 51.9 37.0 18.7 29.8 54.3 31.1 20.1 19.1 42.2 30.9 12.9 25.6 53.9 23.7 21.7 31.4 58.5 27.6 27.1 27.9 52.1 30.6 25.4 22.1 49.9 23.2 21.3 25.5 53.5 24.8 19.3 31.1 56.6 30.0 25.4 30.4 56.7 28.3 27.2 18.7 46.5 23.0 11.7 19.7 44.2 28.6 17.8 14.6 42.7 21.3 12.8 29.5 54.4 30.1 23.9 27.7 55.3 25.7 22.6 30.2 58.6 24.6 25.4 22.7 48.2 27.1 14.8 25.2 51.0 27.5 21.1 ; run; proc reg data=body outest=ridge outvif ridge=0.01 to 0.5 by .01; model bodyfat=triceps thigh midarm; run; * I would probably take c=0.1 or c=0.2 based on the plot; proc print data=ridge; run; proc reg data=body outest=ridge ridge=0.2 outseb; model bodyfat=triceps thigh midarm; run; proc print data=ridge; run; proc glmselect data=body plot=coefficients; * can also have class statement; * default for LASSO picks model w/ smallest BIC (i.e. SBC); * plot is each coefficient as c is increased; model bodyfat=triceps thigh midarm / selection=lasso details=all; run; *Validation data set may make a huge difference; *This data set is actually too small to use a validation data set--results are erratic; proc glmselect data=body plots=all; partition fraction(validate=0.5); *Add two way terms; model bodyfat=triceps thigh midarm @2/ selection=lasso(stop=none choose=validate); run;