PROC IMPORT OUT= WORK.elec DATAFILE= "W:\courses\stat704\elec.txt" DBMS=TAB REPLACE; GETNAMES=YES; DATAROW=2; RUN; data elec; set elec; log_time=log(time); inv_ratio=1/ratio; exp_ratio=exp(-ratio); run; proc sgplot data=elec; scatter x=ratio y=time; * non-constant variance; run; proc transreg data=elec; * gets Box-Cox analysis; *Use convenient to obtain the simplest possible transformation; *Use pboxcoxtable to obtain loglikelihood table; model boxcox(time / convenient) = identity(ratio)/pboxcoxtable; run; proc sgplot data=elec; scatter x=ratio y=log_time; run; proc sgplot data=elec; scatter x=inv_ratio y=log_time; reg x=inv_ratio y=log_time/nomarker; loess x=inv_ratio y=log_time/nomarker; run; proc sgplot data=elec; *Overcompensates; scatter x=exp_ratio y=log_time; reg x=exp_ratio y=log_time/nomarker; loess x=exp_ratio y=log_time/nomarker; run; proc reg data=elec; model log_time=inv_ratio; run; *Water quality data; PROC IMPORT OUT= WORK.WQ DATAFILE= "W:\courses\stat704\EColi.xls" DBMS=EXCEL REPLACE; RANGE="EColi$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; data lcongwq; set wq; where station in ('C-001','C-021','C-068','C-076','C-077'); lEcoli=log(EColi); lFC=log(FecalColi); Ecoli25=Ecoli**(.25); FC25=FecalColi**(.25); Entero25=Enterococci**(.25); lentero=log(Enterococci); label lFC='log(Fecal coliform)' lEntero='log(Enterococci)' lEcoli='log(Ecoli)'; run; proc transreg data=lcongwq; model boxcox(Ecoli / convenient) = identity(FecalColi Enterococci)/pboxcoxtable; run; *Suggest y^.25 transformation; *...but plot is inscrutable on regular x scale; proc sgplot data=lcongwq; scatter x=FecalColi y=EColi25; run; proc sgplot data=lcongwq; scatter x=lFC y=EColi25; reg x=lFC y=EColi25/nomarker; loess x=lFC y=EColi25/nomarker; run; *Log overcompensates--let's try x^.25 too; proc sgplot data=lcongwq; scatter x=FC25 y=EColi25; reg x=FC25 y=EColi25/nomarker; loess x=FC25 y=EColi25/nomarker; run; *x^.25 undercompensates--log may be better here; proc sgplot data=lcongwq; scatter x=lEntero y=EColi25; reg x=lEntero y=EColi25/nomarker; loess x=lEntero y=EColi25/nomarker; run; *Log looks good for enterococci; proc reg data=lcongwq; model EColi25=lFC lEntero; run; *Diagnostics looked bad--return to FC25; proc reg data=lcongwq; model EColi25=FC25 lEntero; run; *The regression results are interesting--let's revisit our scatterplot matrix; proc sgscatter data=lcongwq; matrix EColi25 FC25 lEntero; run;