libname anpp "/home/grego1/STAT 540"; run; /* Clean up the data set to simply the subsetting */ data beoverunder; set anpp.beoverunder; if site='Wet site' and Year>2003 and Year<2007; run; proc print data=beoverunder (obs=20); run; /* This is how I usually split out data sets--with subsetting IF's */ data pines; set beoverunder; if planttype='Pine'; pineanpp=anpp; drop anpp; run; data oak; set beoverunder; if planttype='Nonpine'; oakanpp=anpp; drop anpp; run; data unders; set beoverunder; if planttype='Understory'; underanpp=anpp; drop anpp; run; /* Here is an alternate approach using multiple OUTPUT statements */ data pines oak unders; set beoverunder; if planttype='Pine' then output pines; else if planttype='Nonpine' then output oak; else output unders; run; /* Do some clean-up */ data pines; set pines; pineanpp=anpp; drop anpp; run; data oak; set oak; oakanpp=anpp; drop anpp; run; data unders; set unders; underanpp=anpp; drop anpp; run; /* For either approach, the data can now be sorted to prepare for a match merge */ proc sort data=pines; by plot year; run; proc sort data=oak; by plot year; run; proc sort data=unders; by plot year; run; data combined; *Merge the data; merge pines (rename=(anpp=pineanpp)) oak (rename=(anpp=oakanpp)) unders (rename=(anpp=underanpp)) ; by plot year; /* Construct some possible response variables--some missing values will be generated, but sum minimizes missing values */ totalanpp=sum(pineanpp, oakanpp, underanpp); overstoryanpp=sum(pineanpp, oakanpp); sqrtunderanpp=underanpp**.5; sqrtpineanpp=pineanpp**.5; sqrtoakanpp=oakanpp**.5; sqrttotalanpp=totalanpp**.5; sqrtoverstoryanpp=overstoryanpp**.5; run; /* Save the data set */ data anpp.allanppwet; set combined; run; */ proc print data=combined (obs=20); run;