/* STAT_540_SAS_data_manipulate */ /* proc import for reading in excel spreadsheets */ proc import datafile = '/home/u64378492/STAT540_data/gaf_2007_daily_lbs.xls' dbms = xls out = gaf2007; run; proc import datafile = '/home/u64378492/STAT540_data/gaf_2008_daily_lbs.xlsx' dbms = xlsx out = gaf2008; run; proc print data = gaf2007; run; libname stat540 '/home/u64378492/STAT540_data'; run; data stat540.gaf2007; set gaf2007 (rename=("Total Daily Lbs"n=Lbs)); m = month(date); d = day(date); run; data stat540.gaf2008; set gaf2008 (rename=("Total Daily Lbs"n = Lbs)); m = month(date); d = day(date); run; proc print data = stat540.gaf2007; run; /* before merging we need to sort the two data sets on the "by" variables */ proc sort data = stat540.gaf2007; by m d; run; proc sort data = stat540.gaf2008; by m d; run; data stat540.gaf; merge stat540.gaf2007 (rename=(Lbs = Lbs2007)) stat540.gaf2008 (rename=(Lbs = Lbs2008)); by m d; day = cat(m,'/',d); drop date m d; run; proc print data = stat540.gaf; run; proc contents data = stat540.gaf2007; run; proc sql; CREATE TABLE stat540.gaf_sql AS SELECT g7.Lbs AS Lbs2007, g8.Lbs AS Lbs2008, CAT(COALESCE(g7.m,g8.m),'/',COALESCE(g7.d,g8.d)) AS day FROM stat540.gaf2007 AS g7 FULL OUTER JOIN stat540.gaf2008 AS g8 ON g7.m = g8.m AND g7.d = g8.d ; quit; /* What about stacking data sets? */ data gaf_stacked; set stat540.gaf2007 stat540.gaf2008; run; proc print data = gaf_stacked; run; /* Table rock data */ data tr24; infile '/home/u64378492/STAT540_data/tr50k_2024.txt' dsd dlm='09'x; length first $12 last $12 city $18; input null $ place first $ last $ city $ state $ age division $ dp time $ rank; drop null; run; proc print data = tr24; run; data stat540.tr24; set tr24; time = translate(time,'','`'); /* replace ` with nothing */ newtime = input(time,time8.); format newtime time8.; drop time; rename newtime = time; run; proc print data = stat540.tr24; run; data tr23; infile '/home/u64378492/STAT540_data/tr50k_2023.txt' dsd dlm='09'x; length first $12 last $12 city $18; input null $ place first $ last $ city $ state $ age division $ dp time $ rank; drop null; run; data stat540.tr23; set tr23; time = translate(time,'','`'); /* replace ` with nothing */ newtime = input(time,time8.); format newtime time8.; drop time; rename newtime = time; run; proc print data = stat540.tr23; run; /* stack the two tr data sets */ /* add a column telling me which year each record came from */ data tr_stacked; set stat540.tr23 (in = x) stat540.tr24 (in = y); if x then year = 2023; if y then year = 2024; run; proc print data = tr_stacked; run; /* reshaping data */ data stat540.adipoer; infile '/home/u64378492/STAT540_data/adipoer.dat' firstobs=2; input resp gene $ diet $; run; proc print data = stat540.adipoer; run; /* from long to wide format */ proc sort data = stat540.adipoer; by gene diet; run; proc transpose data = stat540.adipoer out = adipo_wide (drop = _NAME_) prefix = resp; by gene diet; run; proc print data = adipo_wide; run; /* aggregating data (summarizing) */ proc means data = stat540.adipoer; class gene; var resp; run; proc means data = stat540.adipoer; class diet; var resp; run; proc means data = stat540.adipoer maxdec=2 mean std; class gene diet; var resp; title 'Summary of the mouse experiment'; run; /* for getting counts across groups */ proc freq data = tr_stacked; tables year*state; run;