proc import out=horseshoecrab datafile="/home/grego1/STAT 770/HorseshoeCrab.txt" replace; run; data crabs; set horseshoecrab; weight=weight/1000; width_sq=width*width; color=color-1; *Color starts at 2 otherwise; y=0; n=1; if satellite>0 then y=1; *Create binary variable rather than a count; id=_n_; w=22.75; if width>23.25 then w=23.75; if width>24.25 then w=24.75; if width>25.25 then w=25.75; if width>26.25 then w=26.75; if width>27.25 then w=27.75; if width>28.25 then w=28.75; if width>29.25 then w=29.75; run; *Save residuals and linear predictor; proc logistic data=crabs descedning plots=all; class color / param=ref; model y=color width; output out=diag1 stdreschi=r xbeta=eta p=p; run; proc sort data=diag1; by color width; run; *Residual plot; proc sgscatter data=diag1; title "Std. Pearson residual plots"; plot r*(width eta) r*color / loess; run; *Plot fits; proc sgplot data=diag1; title1 "Preidcted probabilities"; series x=width y=p/group=color; yaxis min=0 max=1; run; *Plot a variety of diagnostic plots; proc logistic data=crabs descending plots=all; class color / param=ref; model y=color width; output out=diag2 stdreschi=r c=c; run; proc sgscatter data=diag2; title "Cook's distance and Std. Pearson resids"; plot c*id r*id ; run; *Print unusual cases; proc print data=diag2(where=(c>0.2 or r>3 or r<-3)); var y width color c r; run; *ROC curve; proc logistic data=crabs descending plots; class color / param=ref; model y=color width/outroc=crabroc; run;