/* Data sets from the reference below with identical statistical properties */ /* but quite different scatterplots */ /* Anscombe, Francis J. (1973) Graphs in statistical analysis. */ /* American Statistician, 27, 17�21. */ data anscombe; input x1-x4 y1-y4; datalines; 10 10 10 8 8.04 9.14 7.46 6.58 8 8 8 8 6.95 8.14 6.77 5.76 13 13 13 8 7.58 8.74 12.74 7.71 9 9 9 8 8.81 8.77 7.11 8.84 11 11 11 8 8.33 9.26 7.81 8.47 14 14 14 8 9.96 8.10 8.84 7.04 6 6 6 8 7.24 6.13 6.08 5.25 4 4 4 19 4.26 3.10 5.39 12.50 12 12 12 8 10.84 9.13 8.15 5.56 7 7 7 8 4.82 7.26 6.42 7.91 5 5 5 8 5.68 4.74 5.73 6.89 ; run; /* Rearrange data for plotting */ data anscombeg; set anscombe; array x x1-x4; array y y1-y4; do i=1 to 4; xg=x(i); yg=y(i); grp=i; output; end; drop x1-x4 y1-y4 i; label xg="X" yg="Y" grp="Group"; run; proc sort data=anscombeg; by grp; proc print data=anscombeg; run; /* I plot everything together here--using filled circles makes it a little easier */ /* to visualize the four patterns */ title 'Anscombe Data Sets'; proc sgplot data=anscombeg; scatter x=xg y=yg/group=grp markerattrs=(symbol=circlefilled); run; /* Time series plot using Broad River data */ libname broad '/home/grego1/STAT 540/'; run; data Broadalstonrd; set broad.Broadalstonrd; format date datetime7.; run; proc sgplot data=Broadalstonrd; series x=date y=flowmean; series x=date y=flowmax; series x=date y=flowmin; xaxis label="Date"; yaxis label="Flow (cfs)"; run; /* Fix legend, add title, and change lines */ proc sgplot data=Broadalstonrd; series x=date y=flowmean/legendlabel="Mean Daily Flow" lineattrs=(thickness=2 pattern=solid); series x=date y=flowmax/legendlabel="Maximum Daily Flow" lineattrs=(thickness=2 pattern=solid); series x=date y=flowmin/legendlabel="Minimum Daily Flow" lineattrs=(thickness=2 pattern=solid); xaxis label="Date"; yaxis label="Flow (cfs)"; title "USGS 02161000 Broad River at Alston, SC"; run; title; /* Back to Anscombe */ /* NOMARKERS avoids re-marking the plotted points */ /* Run both a regression line and a smoother */ proc sgplot data=anscombe; scatter x=x1 y=y1; reg x=x1 y=y1; loess x=x1 y=y1/nomarkers; run; /* Let's under-smooth */ proc sgplot data=anscombe; scatter x=x1 y=y1; reg x=x1 y=y1; *Both smooth and degree are influential here; loess x=x1 y=y1/nomarkers smooth=.7 degree=2; loess x=x1 y=y1/nomarkers smooth=.7; run; /* Read in Old Faithful data set and call it faithful */ proc import out=work.faithful datafile="/home/grego1/STAT 540/Faithful.xlsx" dbms=xlsx replace; sheet="Faithful"; run; proc sgplot data=faithful; scatter x=duration y=spacing; xaxis label='Duration (minutes)'; yaxis label='Waiting time (minutes)'; loess x=duration y=spacing/nomarkers; reg x=duration y=spacing/nomarkers; run; /* Add confidence band. Adjust color and transparency */ proc sgplot data=faithful; scatter x=duration y=spacing; xaxis label='Duration (minutes)'; yaxis label='Waiting time (minutes)'; loess x=duration y=spacing/nomarkers clm lineattrs=(color=blue) clmtransparency=0.5; run; /* PROC SGPANEL demo with Edisto River monthly flow data */ libname broad '/home/grego1/STAT 540/'; run; data Edisto; set broad.mon; run; * This doesn't look very nice; proc sgpanel data=Edisto; panelby month; series x=year y=flow; rowaxis label="Flow (cfs)"; run; *Let's add better labels, a reference line and a better layout; proc format; value cmonth 1="January" 2="February" 3="March" 4="April" 5="May" 6="June" 7="July" 8="August" 9="September" 10="October" 11="November" 12="December"; run; *This is more in the spirit of a panel display; proc sgpanel data=Edisto; panelby month/novarname columns=4 rows=3; format month cmonth.; series x=year y=flow; refline 2500/axis=y lineattrs=(color=red); rowaxis label="Flow (cfs)"; run;