* Code to import data set that I have saved in a folder ; * I generated this using the Import Data menu choice; /* Generated Code (IMPORT) */ /* Source File: MedDB.csv */ /* Source Path: /home/davidhitchcock/sasuser.v94/stat541 */ /* Code generated on: Thursday, January 28, 2016 10:25:22 PM */ %web_drop_table(WORK.MEDDB); FILENAME REFFILE "/home/davidhitchcock/sasuser.v94/stat541/MedDB.csv" TERMSTR=CR; PROC IMPORT DATAFILE=REFFILE DBMS=CSV OUT=WORK.MEDDB; GETNAMES=YES; DATAROW=2; RUN; /* The above code works for those using SAS Studio, assuming you have saved the MedDB.csv file into an analogously named folder in SAS Studio, as shown in class. For those using regular SAS on your computer, just right-click on the MedDB.csv file on the course web page and save it in a directory on your computer. Then change the FILENAME statement to something like: FILENAME REFFILE "C:/Users/STUDIO/MedDB.csv" TERMSTR=CR; or whatever the full path name is for the folder where you saved the file. Then the PROC IMPORT code should work fine. */ /* Alternatively, since the data file is stored on a webpage, you could directly enter the URL of the data file like: FILENAME REFFILE URL "http://people.stat.sc.edu/hitchcock/MedDB.csv" TERMSTR=CR; and then the PROC IMPORT code should work. */ PROC CONTENTS DATA=WORK.MEDDB; RUN; %web_open_table(WORK.MEDDB); *Be sure to assign formats to meddb when you import it; *libname stat541 'e:\stat 541\'; proc sql; create table meddb as select ssn format=ssn11., btrim(tos) as tos label='Type of Service', dos format=mmddyy9. label='Date of Service', provider, claim format=dollar11.2 from meddb; quit; proc sql; describe table meddb; quit; *Slide 6; proc sql outobs=30; select ssn format=ssn11., tos label='Type of Service', dos format=mmddyy9. label='Date of Service', provider, claim format=dollar11.2 from Meddb; quit; *Back-up table with no indexes; proc sql; create table meddbX as select ssn format=ssn11., tos label='Type of Service', dos format=mmddyy9. label='Date of Service', provider, claim format=dollar11.2 from Meddb; quit; *Table sorted on provider; proc sql; create table meddbprov as select ssn format=ssn11., tos label='Type of Service', dos format=mmddyy9. label='Date of Service', provider as provsort, claim format=dollar11.2 from Meddb order by provider; quit; *EDA; proc sgplot data=meddb; histogram claim; run; proc sgplot data=meddb; vbox claim; xaxis label="Claims"; yaxis max=6000; run; proc sgplot data=meddb; vbox claim; yaxis label="Claims"; run; proc sgplot data=meddb; vbar tos; xaxis label="Type of Service"; run; proc freq data=meddb; table provider/noprint out=outtab; proc sgplot data=outtab; needle x=provider y=count; run; proc freq data=meddb; table dos/noprint out=outtab; proc sgplot data=outtab; needle x=dos y=count; xaxis label='Date of Service'; run; *indexes; proc sql; create index provider on meddb(provider); quit; proc sql; create index provsort on meddbprov(provsort); quit; proc sql; create index tos on meddb(tos); quit; proc sql; create index dos on meddb(dos); quit; proc sql; create index td on meddb(tos,dos); quit; *Slide 11; proc sql; describe table meddb; quit; *Slide 12; options msglevel=I; *tos pairs; proc sql; create table tosa as select * from meddb where tos contains 'A'; quit; proc sql; create table tosb as select * from meddbx where tos contains 'B'; quit; proc sql; drop table tosa, tosb; quit; *dos pairs; proc sql; create table dos1 as select * from meddbX where dos between '01Sep2000'd and '30Sep2000'd; quit; proc sql; create table dos2 as select * from meddb where dos between '16Apr2000'd and '15May2000'd; quit; proc sql; drop table dos1, dos2; quit; *provider triples; proc sql; create table prov1 as select * from meddbx where provider between 200 and 220; quit; proc sql; create table prov2 as select * from meddb where provider between 120 and 140; quit; proc sql; create table prov3 as select * from meddbprov where provsort between 120 and 140; quit; proc sql; drop table prov1, prov2, prov3; quit; *summary table pair; proc sql; title 'April 2000 claims'; select tos label='Type of Service', count(*) as count label='Number of claims', sum(claim) as total label='Total claims' format=dollar12.2 from meddb where month(dos)=4 group by tos; quit; proc sql; title 'April 2000 claims'; select tos label='Type of Service', count(*) as count label='Number of claims', sum(claim) as total label='Total claims' format=dollar12.2 from meddbx where month(dos)=4 group by tos; quit; *Slide 13; proc sql; create table tos13a as select * from meddb (idxwhere=yes) where tos contains 'A'; quit; proc sql; create table tos13b as select * from meddb (idxwhere=no) where tos contains 'A'; quit; proc sql; create table tos13c as select * from meddb (idxname=tos) where tos contains 'A'; quit; proc sql; drop table tos13a, tos13b, tos13c; quit; *Force index; proc sql; create table tos13d as select * from meddb (idxname=td) where tos contains 'A'; quit; *Should generate error; proc sql; create table tos13e as select * from meddb (idxname=dos) where tos contains 'A'; quit; proc sql; drop table tos13d, tos13e; quit; *Compound WHERE; proc sql; select * from meddb (idxname=td) where tos contains 'A' and month(dos)=9; quit; proc sql; select * from meddb (idxname=tos) where tos contains 'A' and month(dos)=9; quit; proc sql; select * from meddb (idxwhere=no) where tos contains 'A' and month(dos)=9; quit; proc sql; select * from meddb where tos contains 'A' and month(dos)=9; quit; *Another Compound WHERE; proc sql; select * from meddb (idxname=td) where tos contains 'A' and dos between '01Sep2000'd and '05Sep2000'd; quit; proc sql; select * from meddb (idxname=tos) where tos contains 'A' and dos between '01Sep2000'd and '05Sep2000'd; quit; proc sql; select * from meddb (idxwhere=no) where tos contains 'A' and dos between '01Sep2000'd and '05Sep2000'd; quit; proc sql; select * from meddb where tos contains 'A' and dos between '01Sep2000'd and '05Sep2000'd; quit; *Slide 14; proc sql; drop index tos from meddb; quit; proc sql; describe table meddb; quit;