*libname stat541 'f:\stat 541'; *run; *Placing LENGTH in the right place; data cars; set sashelp.cars; *This actually generates a warning; length model $19.; run; proc contents data=cars; run; data cars; length model $19.; set sashelp.cars; run; proc contents data=cars; run; proc sql outobs=10; select model from cars; quit; proc sql outobs=10; select model from sashelp.cars; quit; data cars2; set sashelp.cars; if origin not in ('USA') then orgtype='foreign'; else orgtype='domestic'; run; proc sql outobs=100; select make, model, orgtype from cars2; quit; data cars3; length orgtype $ 8; set sashelp.cars; if origin not in ('USA') then orgtype='foreign'; else orgtype='domestic'; run; proc sql outobs=100; select make, model, orgtype from cars3; quit; *Truncation and PROC COMPARE; data citidayrt; length ratio 4.; /* Try again with length 3 */ set sashelp.citiday; ratio=(dcd1m-1)/dtbd3m; run; data citidayr; set sashelp.citiday; ratio=(dcd1m-1)/dtbd3m; run; proc compare base=citidayr compare=citidayrt out=citidaycomp outdif maxprint=250; run; *Look at the output for record 147; proc sql outobs=150; select dcd1m, dtbd3m, ratio from citidayrt; run; proc sql outobs=150; select dcd1m, dtbd3m, ratio from citidayr; run; *Truncation and PROC COMPARE; * An example with an integer numeric variable; data basert; length crabt 3.; set sashelp.baseball; crabt=cratbat; run; data baser; set sashelp.baseball; crabt=cratbat; run; proc compare base=baser compare=basert out=basecomp outdif maxprint=250; run; proc sql; select name, crabt from baser (obs=71); quit; proc sql; select name, crabt from basert (obs=71); quit; *Compress data sets with different options. Fall2008 and fall2009 are; *permanent SAS data sets; /* In SAS Studio, upload data files into folder and then: */ filename cohort08 ("/home/davidhitchcock/sasuser.v94/stat541/Fall8.txt" "/home/davidhitchcock/sasuser.v94/stat541/Fall9.txt" "/home/davidhitchcock/sasuser.v94/stat541/Fall10.txt"); data all; infile cohort08 dlm='09'X missover dsd truncover; input major :$38. degree :$16. class :$9. cltotgpa gender $ nobs; run; *Create separate data sets by year; data fall08 fall09 fall10; set all; if _n_ le 3364 then output fall08; else if _n_ le 6728 then output fall09; else output fall10; run; data f08comp (compress=yes); set fall08; data f09comp (compress=binary); set fall09; run; /* ACT data */ /* In SAS Studio, upload data files into folder and then: */ filename actall ("/home/davidhitchcock/sasuser.v94/stat541/ACTfemale.txt" "/home/davidhitchcock/sasuser.v94/stat541/ACTmale.txt"); data allact; infile actall dlm=','; input item1-item60; run; data allactcomp (compress=yes); set allact; run; data allactcomp (compress=binary); set allact; run; /* National Champion Data */ data nchamp; FILENAME webpage URL 'http://people.stat.sc.edu/hitchcock/champ36pres.txt'; INFILE webpage firstobs=7 truncover; INPUT year 4-7 team $ 9-27 @30 wins 30-31 losses 33 ties 35 Coach $19.; retain maxyear; maxyear=max(maxyear, year); if year= . then year + maxyear; drop maxyear; run; proc print data=nchamp; run; data nchampcomp (compress=yes); set nchamp; run; data nchampcomp (compress=binary); set nchamp; run; /* Artificially create a big data set */ data nchampbig; set nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp nchamp; run; data nchampbigcomp (compress=yes); set nchampbig; run; data nchampbigcomp (compress=binary); set nchampbig; run; *Create a random sample with a lot of repeats; proc format; value tffmt 1="T" 0="F"; run; data tftest (drop=i); array quiz $ q1-q40; do i=1 to 100000; do over quiz; quiz=put(ceil(2*ranuni(0)-1),tffmt.); end; output; end; run; data tfcomp (compress=binary); set tftest; run;