/* Example of finding summary statistics and graphs using SAS */ /* The data are the gas mileage data from p. 43 on the textbook */ /* The following commands name the data set and variables and list the data */ /* Please read the "Introduction to SAS" on the course web page first */ /* AN EXAMPLE OF A DATA ANALYSIS IN SAS WITH A QUALITATIVE (CATEGORICAL) DATA SET */ /* With qualitative data, each individual is classified into one of several categories. */ DATA aphasia; /* names the SAS dataset */ INPUT type :$10.; /* names each variable */ /* (the $ after the variable name indicates that this is a CHARACTER variable, NOT numerical) */ /* The :$10. says that some categories have up to 10 letters. */ LINES; /* tells SAS that data is about to be entered. Could also use CARDS; or DATALINES; instead of LINES; */ Brocas Anomic Anomic Conduction Brocas Conduction Conduction Anomic Conduction Anomic Conduction Brocas Anomic Brocas Anomic Anomic Anomic Conduction Brocas Anomic Conduction Anomic ; RUN; /* Simple frequency counts: */ PROC FREQ DATA=aphasia; run; /* Bar graphs */ PROC GCHART DATA=aphasia; VBAR type; run; /* Pie charts */ PROC GCHART DATA=aphasia; PIE type; run; /* or */ PROC GCHART DATA=aphasia; PIE type / slice=outside value=none percent=outside; run; /* AN EXAMPLE OF A DATA ANALYSIS IN SAS WITH A QUANTITATIVE (NUMERICAL) DATA SET */ /* With quantitative data, one or more measurements on a numerical scale are taken on each individual. */ DATA gasdata; /* names the SAS dataset */ INPUT mileage; /* names each variable */ CARDS; /* tells SAS that data is about to be entered. Could also use LINES; or DATALINES; instead of CARDS; */ 36.3 41 36.9 37.1 44.9 36.8 30 37.2 42.1 36.7 32.7 37.3 41.2 36.6 32.9 36.5 33.2 37.4 37.5 33.6 40.5 36.5 37.6 33.9 40.2 36.4 37.7 37.7 40 34.2 36.2 37.9 36 37.9 35.9 38.2 38.3 35.7 35.6 35.1 38.5 39 35.5 34.8 38.6 39.4 35.3 34.4 38.8 39.7 36.3 36.8 32.5 36.4 40.5 36.6 36.1 38.2 38.4 39.3 41 31.8 37.3 33.1 37 37.6 37 38.7 39 35.8 37 37.2 40.7 37.4 37.1 37.8 35.9 35.6 36.7 34.5 37.1 40.3 36.7 37 33.9 40.1 38 35.2 34.8 39.5 39.9 36.9 32.9 33.8 39.8 34 36.8 35 38.1 36.9 ; run; /* Some old-fashioned plots, including a stem-and-leaf plot */ ODS graphics off; ODS select plots; PROC UNIVARIATE DATA=gasdata PLOT FREQ; VAR mileage; RUN; /* The following procedure produces several summary statistics and more modern-looking plots */ /* DATA= tells SAS which data set to use */ /* VAR tells SAS which variable to compute the statistics and graphs for */ /* If there are several variables in the data set */ /* you could specify more than one variable with VAR */ ODS graphics on; PROC UNIVARIATE DATA=gasdata PLOT; VAR mileage; TITLE 'Summary of the Gas Mileages for the 100 Cars'; /* Adds a TITLE to the output */ RUN; title; /* Shuts off that title for future outputs */ /* PROC UNIVARIATE gives summary statistics about the variable "mileage" */ /* Look for the mean, median, variance, standard deviation, IQR, 5-number summary, etc. */ /* The PLOT command produces a histogram (sideways), boxplot, and normal probability plot. */ /* Looks very symmetric! Notice the sample mean and sample median are almost the same! */ /* For a vertically oriented histogram: */ PROC UNIVARIATE DATA=gasdata NOPRINT; /* NOPRINT tells SAS to output ONLY the plots, not the summary statistics */ VAR mileage; histogram; RUN; /* ANOTHER EXAMPLE OF A DATA ANALYSIS IN SAS WITH A QUANTITATIVE (NUMERICAL) DATA SET */ /* With these data, there is more than one value input on each data line: */ DATA emissions; INPUT co2 @@; /* The @@ tells SAS to read in more than one observation per line */ lines; 3.3 4.2 5.6 5.6 5.7 5.7 6.2 6.3 7.0 7.6 8.0 8.1 8.3 8.6 8.7 9.4 9.7 9.9 10.3 10.3 10.4 11.3 12.7 13.1 24.5 ; run; ODS graphics on; PROC UNIVARIATE DATA=emissions PLOT; VAR co2; TITLE 'Summary of CO2 emissions for 25 European Countries'; /* Adds a TITLE to the output */ RUN; title; /* Note the boxplot: Luxembourg with 24.5 metric tons is an outlier. */ /* Let's delete Luxembourg:*/ DATA emissnolux; SET emissions; IF co2=24.5 THEN DELETE; run; ODS graphics on; PROC UNIVARIATE DATA=emissnolux PLOT; VAR co2; TITLE 'Summary of CO2 emissions for 24 European Countries (no Luxembourg)'; /* Adds a TITLE to the output */ RUN; title; /* Which measure of center (and which measure of spread?) was more affected by the outlier? */ /* ANOTHER EXAMPLE OF A DATA ANALYSIS IN SAS WITH A QUANTITATIVE (NUMERICAL) DATA SET */ /* With these data, I will read the data in from an external website: */ DATA rain; FILENAME webpage URL 'http://people.stat.sc.edu/hitchcock/rainfall.txt'; INFILE webpage; INPUT rainfall; RUN; ODS graphics on; PROC UNIVARIATE DATA=rain PLOT; VAR rainfall; RUN;