*slide 5; *Lookup with IF-THEN/ELSE; data old; input id @@; datalines; 1 3 2 3 2 1 1 ; run; data new; set old; if id=1 then x=4; else if id=2 then x=5; else x=6; run; proc print data=new; run; *Slide 7; *Lookup with temporary array; data one; input name $4. +1 (r1-r3) ($1.); array answer {3} $1 _temporary_ ('B','D','C'); array response r1-r3; score=0; do _i_=1 to 3; if answer{_i_}=response{_i_} then score+1; end; datalines; A4T7 BDD J3G6 ADC U66J BCD C3M8 AAA L6Q4 BBB ; run; proc print data=one label noobs width=full; label name='Code' score='Score on Item 1-Item 3'; var name r1-r3 score; run; *Slide 9; *Match merge inner join; data a; input student $ quiz; datalines; Brad 7 Amy 9 Li 9 ; run; data b; input name $ test; datalines; Amy 87 Li 86 Sean 54 Sophie 92 ; run; proc sort data=a; by student; proc sort data=b; by name; data gradebook; merge a(in=in_a) b(in=in_b rename=(name=student)); by student; if in_a and in_b; proc print data=gradebook; run; *Slide 12 *Match merge outer join with one-to-many match; data members; input name $15. id locker; datalines; Cale Johnson 1245 23 Ron Cole 9877 54 Armand Cayne 4567 206 Sam Crump 787 202 Isadore Khuryan 4555 87 Chris Santino 1456 27 ; data attendance; input date mmddyy8. id; datalines; 10/27/11 1245 10/27/11 4555 10/27/11 1456 10/29/11 4567 10/29/11 1245 10/29/11 4555 10/29/11 5386 10/31/11 1245 10/31/11 787 10/31/11 5386 ; proc sort data=members; by id; proc sort data=attendance; by id; data member_activity; merge members attendance; by id; proc print; format date mmddyy8.; run; /* Match merge to create a sum of squares */ data a; input x id @@; datalines; 13 1 14 2 12 1 19 2 23 1 42 3 19 3 27 1 28 3 28 2 19 3 ; proc sort data=a; by id; proc means data=a; by id; output out=outa; run; proc sql; select * from outa; quit; /* The BY statement creates alot of junk that needs cleaned up */ data outb; set outa; if _stat_='MEAN'; drop _stat_ _type_ _freq_ x; xbar=x; run; /* One more sort just to be safe */ proc sort data=outb; by id; /* Accumulate the sum of squares to create a pooled SS and group SS's */ data c; merge a outb; by id; devsq=(x-xbar)*(x-xbar); ssqpool+devsq; if first.id then ssqgroup=devsq; else ssqgroup+devsq; run; proc print data=c; by id; run; *Let's send the output in which we're really interested to separate files; data c cpool; merge a outb end=last; by id; devsq=(x-xbar)*(x-xbar); ssqpool+devsq; if first.id then ssqgroup=devsq; else ssqgroup+devsq; if last.id then output c; if last.id and last then output cpool; run; proc print data=c(keep=ssqgroup id) noobs label; label ssqgroup="Group SS"; run; proc print data=cpool(keep=ssqpool) noobs label; label ssqpool="Pooled SS"; run; *Slide 16; *Load Fall08 and Fall10 and create Fall10ms; *libname stat541 'e:\stat 541'; run; *data fall08; set stat541.fall08; run; *data fall10; set stat541.fall10; run; /* In SAS Studio, upload data files into folder and then: */ *Import data sets--these files will have variables with different lengths and formats; proc import out=fall08 datafile="/home/davidhitchcock/sasuser.v94/stat541/Fall 2008.xls" dbms=xls replace; sheet="Sheet1"; run; proc import out=fall09 datafile="/home/davidhitchcock/sasuser.v94/stat541/Fall 2009.xls" dbms=xls replace; sheet="Sheet1"; run; proc import out=fall10 datafile="/home/davidhitchcock/sasuser.v94/stat541/Fall 2010.xls" dbms=xls replace; sheet="Sheet1"; run; proc sql; create index noobs on fall08(noobs); quit; proc sql; create table fall10ms as select noobs, major as major10 label="Major 2010", class as class10 label="Class 2010", cltotgpa as gpa10 label="GPA 2010", regstat as regstat10 label="Registration 2010", enroll as enroll10 label="Enrollment Status 2010" from fall10 where upcase(major) contains 'MARINE'; quit; data marine_retro; set fall10ms; set fall08 (keep=noobs major class cltotgpa regstat enroll) key=noobs; used=_iorc_; *If used=0, a match was found. Superfluous here since we know Fall10 has the same noobs values as Fall08; run; title "2008 Retrospective on 2010 Marine Science majors"; proc print data=marine_retro (rename=(major=major08 class=class08 cltotgpa=cltotgpa08 regstat=regstat08 enroll=enroll08)); run; *Update example; *Create a small data set of the retrospective cases with reasonable variables to update; data marine_prosp_08; set fall08 (keep=noobs major class regstat enroll); where upcase(major) contains 'MARINE'; term='fall08'; *I include this to track the original 2008 majors; run; title; proc sql; select * from marine_prosp_08; quit; *Modify fall10ms so that it has the same variable names as marine_retro_08; proc sql; create table fall10ms as select noobs, major, class, regstat, enroll from fall10; quit; proc sort data=marine_prosp_08; by noobs; proc sort data=fall10ms; by noobs; run; data retro_update; update marine_prosp_08 fall10ms; by noobs; run; *Some of these records may represent graduating students; title "2010 prospective on 2008 Marine Science undergraduates"; proc print data=retro_update; where term='fall08'; run;