/* SAS codes in Chapter 4 */ ***4.2***; * Import CSV file of artists; LIBNAME art 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Artists.csv' OUT = art.style REPLACE; RUN; * Print list of Impressionist painters; PROC PRINT DATA = art.style; WHERE Genre = 'Impressionism'; TITLE 'Impressionist Painters'; FOOTNOTE 'F = France U = USA'; RUN; ***4.3***; DATA marine; INFILE 'c:\MyRawData\Lengths.dat'; INPUT Name $ Family $ Length @@; RUN; * Sort the data and remove duplicates; PROC SORT DATA = marine OUT = seasort NODUPKEY; BY Family Name; RUN; * Print the sorted data; PROC PRINT DATA = seasort; TITLE 'Whales and Sharks'; RUN; ***4.4***; PROC IMPORT DATAFILE = 'c:\MyRawData\Mail.txt' OUT = addresses REPLACE; RUN; * Sort addresses using linguistic sorting with numeric collation; PROC SORT DATA = addresses OUT = sortout SORTSEQ = LINGUISTIC (STRENGTH = PRIMARY NUMERIC_COLLATION = ON); BY State City Street; RUN; * Print the liguistically sorted data; PROC PRINT DATA = sortout; TITLE 'Addresses Sorted by State, City, and Street'; RUN; ***4.5***; LIBNAME class 'c:\MySASLib'; DATA class.sales; INFILE 'c:\MyRawData\CookieSales.dat'; INPUT Name $ Class DateReturned MMDDYY10. CookieType $ Quantity @@; Profit = Quantity * 2.5; RUN; PROC SORT DATA = class.sales OUT = salessort; BY Class; RUN; PROC PRINT DATA = salessort; BY Class; SUM Profit; VAR Name DateReturned CookieType Profit; TITLE 'Cookie Sales for Field Trip by Class'; RUN; ***4.6***; LIBNAME class 'c:\MySASLib'; * Print cookie sales data with formatted values; PROC PRINT DATA = class.sales; VAR Name DateReturned CookieType Profit; FORMAT DateReturned DATE9. Profit DOLLAR6.2; TITLE 'Cookie Sale Data Using Formats'; RUN; ***4.8***; PROC IMPORT DATAFILE = 'c:\MyRawData\Cars.csv' OUT = carsurvey REPLACE; RUN; PROC FORMAT; VALUE gender 1 = 'Male' 2 = 'Female'; VALUE agegroup 13 -< 20 = 'Teen' 20 -< 65 = 'Adult' 65 - HIGH = 'Senior'; VALUE $col 'W' = 'Moon White' 'B' = 'Sky Blue' 'Y' = 'Sunburst Yellow' 'G' = 'Rain Cloud Gray'; RUN; * Print data using user-defined and standard formats; PROC PRINT DATA = carsurvey; FORMAT Sex gender. Age agegroup. Color $col. Income DOLLAR8.; TITLE 'Survey Results Printed with User-Defined Formats'; RUN; ***4.9***; * Write a report with FILE and PUT statements; LIBNAME class 'c:\MySASLib'; DATA _NULL_; SET class.sales; FILE 'c:\MyTextFiles\Student.txt' PRINT; TITLE; PUT @5 'Cookie sales report for ' Name 'from classroom ' Class // @5 'Congratulations! You sold ' Quantity 'boxes of cookies' / @5 'and earned ' Profit DOLLAR6.2 ' for our field trip.'; PUT _PAGE_; RUN; ***4.10***; LIBNAME garden 'c:\MySASLib'; DATA garden.plants; INFILE 'c:\MyRawData\Flowers.dat'; INPUT CustID $ @9 SaleDate MMDDYY10. Petunia SnapDragon Marigold; Month = MONTH(SaleDate); FORMAT SaleDate MMDDYY10; RUN; * Calculate means by Month for flower sales; PROC MEANS DATA = garden.plants MAXDEC = 0; CLASS Month; VAR Petunia SnapDragon Marigold; TITLE 'Summary of Flower Sales by Month'; RUN; ***4.11***; LIBNAME garden 'c:\MySASLib'; * Calculate means by CustID, output sum and max to new data set; PROC MEANS NOPRINT DATA = garden.plants; CLASS CustID; VAR Petunia SnapDragon Marigold; OUTPUT OUT = totals MAX(Petunia SnapDragon Marigold) = MaxP MaxS MaxM SUM(Petunia SnapDragon Marigold) = Petunia SnapDragon Marigold; RUN; ***4.12***; LIBNAME drinks 'c:\MySASLib'; DATA drinks.orders; INFILE 'c:\MyRawData\Coffee.dat'; INPUT Coffee $ Window $ @@; RUN; * Print tables for Window and Coffee; PROC FREQ DATA = drinks.orders; TABLES Window Coffee; TITLE 'Coffee Sales by Window and by Type of Drink'; RUN; ***4.13***; LIBNAME drinks 'c:\MySASLib'; * Print two tables for Window by Coffee; PROC FREQ DATA = drinks.orders; TABLES Window * Coffee; TABLES Window * Coffee / MISSING NOPERCENT NOCOL NOROW; TITLE 'Coffee Sales by Window and Type of Drink'; RUN; ***4.14***; *Define formats to group the data; PROC FORMAT; VALUE $typ 'bio','non','ref' = 'Non-Fiction' 'fic','mys','sci' = 'Fiction'; VALUE agegpa 0-18 = '0 to 18' 19-25 = '19 to 25' 26-49 = '26 to 49' 50-HIGH = ' 50+ '; VALUE agegpb 0-25 = '0 to 25' 26-HIGH = ' 26+ '; RUN; DATA books; INFILE 'c:\MyRawData\LibraryBooks.dat'; INPUT Age Book $ @@; BookType = PUT(Book,$typ.); RUN; *Create two way table with Age grouped into four categories; PROC FREQ DATA = books; TITLE 'Patron Age by Book Type: Four Age Groups'; TABLES BookType * Age / NOPERCENT NOROW NOCOL; FORMAT Age agegpa.; RUN; *Create two way table with Age grouped into two categories; PROC FREQ DATA = books; TITLE 'Patron Age by Book Type: Two Age Groups'; TABLES BookType * Age / NOPERCENT NOROW NOCOL; FORMAT Age agegpb.; RUN; ***4.15***; LIBNAME trips 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Boats.txt' OUT = trips.boats REPLACE; RUN; * Tabulations with three dimensions; PROC TABULATE DATA = trips.boats; CLASS Port Type Vessel; TABLE Port, Type, Vessel; TITLE 'Number of Boats by Port, Type, and Vessel'; RUN; ***4.16***; LIBNAME trips 'c:\MySASLib'; * Tabulations with two dimensions and statistics; PROC TABULATE DATA = trips.boats; CLASS Type Vessel; VAR Price; TABLE Type ALL, MEAN*Price*(Vessel ALL); TITLE 'Mean Price by Type and Vessel'; RUN; ***4.17***; LIBNAME trips 'c:\MySASLib'; * PROC TABULATE report with options; PROC TABULATE DATA = trips.boats FORMAT = DOLLAR7.2; CLASS Type Vessel; VAR Price Length; TABLE Type ALL, MEAN * (Price Length*FORMAT=2.0) * (Vessel ALL) /BOX='Full Day Excursions' MISSTEXT='none'; TITLE; RUN; ***4.18***; LIBNAME trips 'c:\MySASLib'; * Changing headers; PROC FORMAT; VALUE $ves 'cat' = 'catamaran' 'sch' = 'schooner' 'yac' = 'yacht'; RUN; PROC TABULATE DATA = trips.boats FORMAT=DOLLAR7.2; CLASS Type Vessel; VAR Price; FORMAT Vessel $ves.; TABLE Type='' ALL, MEAN=''*Price='Mean Price by Kind of Vessel'*(Vessel='' ALL) /BOX='Full Day Excursions' MISSTEXT='none'; TITLE; RUN; ***4.19***; LIBNAME visit 'c:\MySASLib'; DATA visit.natparks; INFILE 'c:\MyRawData\Parks.dat'; INPUT Name $ 1-21 Type $ Region $ Museums Camping; RUN; PROC REPORT DATA = visit.natparks; TITLE 'Report with Character and Numeric Variables'; RUN; PROC REPORT DATA = visit.natparks; COLUMN Museums Camping; TITLE 'Report with Only Numeric Variables'; RUN; ***4.20***; LIBNAME visit 'c:\MySASLib'; * PROC REPORT with ORDER variable, MISSING option, and column header; PROC REPORT DATA = visit.natparks MISSING; COLUMN Region Name Museums Camping; DEFINE Region / ORDER; DEFINE Camping / ANALYSIS 'Campgrounds'; TITLE 'National Parks and Monuments Arranged by Region'; RUN; ***4.21***; LIBNAME visit 'c:\MySASLib'; * Region and Type as GROUP variables; PROC REPORT DATA = visit.natparks; COLUMN Region Type Museums Camping; DEFINE Region / GROUP; DEFINE Type / GROUP; TITLE 'Summary Report with Two Group Variables'; RUN; * Region as GROUP and Type as ACROSS with sums; PROC REPORT DATA = visit.natparks; COLUMN Region Type,(Museums Camping); DEFINE Region / GROUP; DEFINE Type / ACROSS; TITLE 'Summary Report with a Group and an Across Variable'; RUN; ***4.22***; LIBNAME visit 'c:\MySASLib'; * PROC REPORT with breaks; PROC REPORT DATA = visit.natparks; COLUMN Name Region Museums Camping; DEFINE Region / ORDER; BREAK AFTER Region / SUMMARIZE; RBREAK AFTER / SUMMARIZE; TITLE 'Detail Report with Summary Breaks'; RUN; ***4.23***; LIBNAME visit 'c:\MySASLib'; *Statistics in COLUMN statement with two group variables; PROC REPORT DATA = visit.natparks; COLUMN Region Type N (Museums Camping),MEAN; DEFINE Region / GROUP; DEFINE Type / GROUP; TITLE 'Statistics with Two Group Variables'; RUN; *Statistics in COLUMN statement with group and across variables; PROC REPORT DATA = visit.natparks; COLUMN Region N Type,(Museums Camping),MEAN; DEFINE Region / GROUP; DEFINE Type / ACROSS; TITLE 'Statistics with a Group and Across Variable'; RUN; ***4.24***; LIBNAME visit 'c:\MySASLib'; * COMPUTE new variables that are numeric and character; PROC REPORT DATA = visit.natparks; COLUMN Name Region Museums Camping Facilities Note; DEFINE Museums / ANALYSIS SUM NOPRINT; DEFINE Camping / ANALYSIS SUM NOPRINT; DEFINE Facilities / COMPUTED 'Campgrounds/and/Museums'; DEFINE Note / COMPUTED; COMPUTE Facilities; Facilities = Museums.SUM + Camping.SUM; ENDCOMP; COMPUTE Note / CHAR LENGTH = 10; IF Camping.SUM = 0 THEN Note = 'No Camping'; ENDCOMP; TITLE 'Report with Two Computed Variables'; RUN;