/* SAS codes in Chapter 5 */ ***5.2***; LIBNAME ocean 'c:\MySASLib'; DATA ocean.marine; INFILE 'c:\MyRawData\Lengths8.dat'; INPUT Name $ Family $ Length @@; RUN; * Create the HTML file and remove procedure name; ODS HTML PATH = 'c:\MyHTMLFiles' BODY = 'Marine.html' STYLE = BARRETTSBLUE; ODS NOPROCTITLE; PROC MEANS DATA = ocean.marine MEAN MIN MAX; CLASS Family; TITLE 'Whales and Sharks'; RUN; PROC PRINT DATA = ocean.marine; RUN; ODS HTML CLOSE; ***5.3***; * Create an RTF file; LIBNAME ocean 'c:\MySASLib'; ODS RTF PATH = 'c:\MyRTFFiles' FILE= 'Marine.rtf' BODYTITLE STARTPAGE = NO; ODS NOPROCTITLE; PROC MEANS DATA = ocean.marine MEAN MIN MAX; CLASS Family; TITLE 'Whales and Sharks'; RUN; PROC PRINT DATA = ocean.marine; RUN; ODS RTF CLOSE; ***5.4***; * Create the PDF file; LIBNAME ocean 'c:\MySASLib'; ODS PDF FILE = 'c:\MyPDFFiles\Marine.pdf' STARTPAGE = NO; ODS NOPROCTITLE; PROC MEANS DATA = ocean.marine MEAN MIN MAX; CLASS Family; TITLE 'Whales and Sharks'; RUN; PROC PRINT DATA = ocean.marine; RUN; ODS PDF CLOSE; ***5.5***; * Create the text output and remove procedure name; LIBNAME ocean 'c:\MySASLib'; ODS LISTING FILE = 'c:\MyTextFiles\Marine.lst'; ODS NOPROCTITLE; PROC MEANS DATA = ocean.marine MEAN MIN MAX; CLASS Family; TITLE 'Whales and Sharks'; RUN; PROC PRINT DATA = ocean.marine; RUN; ODS LISTING CLOSE; ***5.7***; LIBNAME skate 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Mens5000.csv' OUT = skate.results REPLACE; RUN; PROC PRINT DATA = skate.results; ID Place; VAR Name Country Time; TITLE "Men's 5000m Speed Skating"; RUN; * Use STYLE= option in PROC, ID, and VAR statements; PROC PRINT DATA = skate.results STYLE(DATA) = {BACKGROUNDCOLOR = GRAY COLOR = WHITE}; ID Place / STYLE(DATA) = {TEXTALIGN = CENTER}; VAR Name / STYLE(DATA) = {FONTSTYLE = ITALIC}; VAR Country Time; TITLE "Men's 5000m Speed Skating"; RUN; ***5.8***; LIBNAME skate 'c:\MySASLib'; PROC REPORT DATA = skate.results; COLUMN Country Name Time Place; DEFINE Country / ORDER; TITLE "Men's 5000m Speed Skating"; RUN; * Use STYLE= option in PROC and DEFINE statements; PROC REPORT DATA = skate.results SPANROWS STYLE(COLUMN) = {BACKGROUNDCOLOR = GRAY COLOR = WHITE}; COLUMN Country Name Time Place; DEFINE Country / ORDER; DEFINE Name / STYLE(COLUMN) = {FONTSTYLE = ITALIC}; DEFINE Place / STYLE(COLUMN) = {TEXTALIGN = CENTER}; TITLE "Men's 5000m Speed Skating"; RUN; ***5.9***; LIBNAME ocean 'c:\MySASLib'; PROC TABULATE DATA = ocean.marine; CLASS Family; VAR Length; TABLE Family, Length*(Min Max Mean); TITLE 'Whales and Sharks'; RUN; * Use STYLE= option in PROC, CLASS, and VAR statements; PROC TABULATE DATA = ocean.marine STYLE = {BACKGROUNDCOLOR = GRAY COLOR = WHITE}; CLASS Family / STYLE = {FONTSTYLE = ITALIC}; VAR Length / STYLE = {FONTSTYLE = ITALIC}; TABLE Family, Length*(Min Max Mean); TITLE 'Whales and Sharks'; RUN; ***5.10***; LIBNAME skate 'c:\MySASLib'; PROC PRINT DATA = skate.results; ID Place; TITLE "Men's 5000m Speed Skating"; RUN; * Create user-defined format for colors; PROC FORMAT; VALUE rec 0 -< 378.72 = 'LIGHT GRAY' 378.72 -< 382.20 = 'VERY LIGHT GRAY' 382.20 - HIGH = 'WHITE'; RUN; * Use STYLE= option to apply format in VAR statement; PROC PRINT DATA = skate.results; ID Place; VAR Name Country; VAR Time / STYLE = {BACKGROUNDCOLOR = rec.}; TITLE "Men's 5000m Speed Skating"; RUN; ***5.12***; LIBNAME tomatoes 'c:\MySASLib'; DATA tomatoes.giant; INFILE 'c:\MyRawData\GiantTom.dat' DSD; INPUT Name :$15. Color $ Days Weight @@; RUN; * Trace PROC MEANS; ODS TRACE ON; PROC MEANS DATA = tomatoes.giant; BY Color; RUN; ODS TRACE OFF; * Print only the first BY group; PROC MEANS DATA = tomatoes.giant; BY Color; ODS SELECT Means.ByGroup1.Summary; RUN; ***5.13***; LIBNAME tomatoes 'c:\MySASLib'; ODS TRACE ON; PROC TABULATE DATA = tomatoes.giant; CLASS Color; VAR Days Weight; TABLE Color ALL, (Days Weight) * MEAN; RUN; ODS TRACE OFF; PROC TABULATE DATA = tomatoes.giant; CLASS Color; VAR Days Weight; TABLE Color ALL, (Days Weight) * MEAN; TITLE 'Standard TABULATE Output'; ODS OUTPUT Table = tabout; RUN; ***6.1***; PROC IMPORT DATAFILE = 'c:\MyRawData\South.csv' OUT = southent REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\North.csv' OUT = northent REPLACE; RUN; * Create a data set, both, combining northent and southent; * Create a variable, AmountPaid, based on value of variable Age; DATA both; SET southent northent; IF Age = . THEN AmountPaid = .; ELSE IF Age < 3 THEN AmountPaid = 0; ELSE IF Age < 65 THEN AmountPaid = 35; ELSE AmountPaid = 27; RUN; ***6.2***; PROC IMPORT DATAFILE = 'c:\MyRawData\South.csv' OUT = southent REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\North.csv' OUT = northent REPLACE; RUN; PROC SORT DATA = northent; BY PassNumber; RUN; * Interleave observations by PassNumber; DATA interleave; SET southent northent; BY PassNumber; RUN; ***6.3***; PROC IMPORT DATAFILE = 'c:\MyRawData\Chocolate.txt' OUT = names REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\Chocsales.txt' OUT = sales REPLACE; RUN; PROC SORT DATA = sales; BY Code; RUN; * Merge data sets by Code; DATA chocolates; MERGE sales names; BY Code; RUN; ***6.4***; LIBNAME athshoes 'c:\MySASLib'; DATA athshoes.shoedata; INFILE 'c:\MyRawData\Shoe.dat'; INPUT Style $ 1-15 ExerciseType $ RegularPrice; RUN; PROC SORT DATA = athshoes.shoedata OUT = regular; BY ExerciseType; RUN; DATA athshoes.discount; INFILE 'c:\MyRawData\Disc.dat'; INPUT ExerciseType $ Adjustment; RUN; * Perform many-to-one match merge; DATA prices; MERGE regular athshoes.discount; BY ExerciseType; NewPrice = ROUND(RegularPrice - (RegularPrice * Adjustment), .01); RUN; ***6.5***; LIBNAME athshoes 'c:\MySASLib'; * Perform an inner join using PROC SQL; PROC SQL; CREATE TABLE prices AS SELECT * FROM athshoes.shoedata, athshoes.discount WHERE shoedata.ExerciseType = discount.ExerciseType; QUIT; ***6.6***; LIBNAME athshoes 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Shoesales.txt' OUT = athshoes.shoesales REPLACE; RUN; PROC SORT DATA = athshoes.shoesales OUT = shoes; BY ExerciseType; RUN; * Summarize sales by ExerciseType; PROC MEANS NOPRINT DATA = shoes; VAR Sales; BY ExerciseType; OUTPUT OUT = summarydata SUM(Sales) = Total; RUN; * Merge totals with the original data set; DATA shoesummary; MERGE shoes summarydata; BY ExerciseType; Percent = Sales / Total * 100; RUN; PROC PRINT DATA = shoesummary LABEL; ID ExerciseType; VAR Style Sales Total Percent; LABEL Percent = 'Percent By Type'; TITLE 'Sales Share by Type of Exercise'; RUN; ***6.7***; LIBNAME athshoes 'c:\MySASLib'; * Output grand total of sales to a data set; PROC MEANS NOPRINT DATA = athshoes.shoesales; VAR Sales; OUTPUT OUT = summarydata SUM(Sales) = GrandTotal; RUN; * Combine the grand total with the original data; DATA shoesummary; IF _N_ = 1 THEN SET summarydata; SET athshoes.shoesales; Percent = Sales / GrandTotal * 100; RUN; PROC PRINT DATA = shoesummary; ID Style; VAR ExerciseType Sales GrandTotal Percent; TITLE 'Overall Sales Share'; RUN; ***6.8***; LIBNAME athshoes 'c:\MySASLib'; *Create summary variables by exercise type; PROC SQL; CREATE TABLE shoesums AS SELECT *, SUM(Sales) AS TotalByType, (Sales/SUM(Sales))*100 AS PercentByType FROM athshoes.shoesales GROUP BY ExerciseType; QUIT; *Create summary variables for whole data set; PROC SQL; CREATE TABLE shoetotal AS SELECT *, SUM(Sales) AS GrandTotal, (Sales/SUM(Sales))*100 AS Percent FROM athshoes.shoesales; QUIT; ***6.9***; LIBNAME records 'c:\MySASLib'; DATA records.patientmaster; INFILE 'c:\MyRawData\Admit.dat'; INPUT Account LastName $ 8-16 Address $ 17-34 BirthDate MMDDYY10. Sex $ InsCode $ 48-50 @52 LastUpdate MMDDYY10.; FORMAT BirthDate LastUpdate Date9.; RUN; LIBNAME records 'c:\MySASLib'; DATA transactions; INFILE 'c:\MyRawData\NewAdmit.dat'; INPUT Account LastName $ 8-16 Address $ 17-34 BirthDate MMDDYY10. Sex $ InsCode $ 48-50 @52 LastUpdate MMDDYY10.; RUN; PROC SORT DATA = transactions; BY Account; RUN; * Update patient data with transactions; DATA records.patientmaster; UPDATE records.patientmaster transactions; BY Account; RUN; ***6.11***; PROC IMPORT DATAFILE = 'c:\MyRawData\CustAdd.txt' OUT = customer REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\OrdersQ3.txt' OUT = orders REPLACE; RUN; PROC SORT DATA = orders; BY CustNum; RUN; * Combine the data sets using the IN= option; DATA noorders; MERGE customer orders (IN = Recent); BY CustNum; IF Recent = 0; RUN; ***6.12***; *Input the data and create two subsets; DATA tallpeaks (WHERE = (Height > 6000)) american (WHERE = (Continent CONTAINS ('America'))); INFILE 'c:\MyRawData\Mountains.dat'; INPUT Name $1-14 Continent $15-28 Height; RUN; PROC PRINT DATA = tallpeaks; TITLE 'Members of the Seven Summits above 6,000 Meters'; RUN; PROC PRINT DATA = american; TITLE 'Members of the Seven Summits in the Americas'; RUN; ***6.13***; PROC IMPORT DATAFILE = 'c:\MyRawData\Transpos.csv' OUT = baseball REPLACE; PROC SORT DATA = baseball; BY Team Player; RUN; * Transpose data so salary and batavg are variables; PROC TRANSPOSE DATA = baseball OUT = flipped; BY Team Player; ID Type; VAR Entry; RUN; ***6.14***; DATA walkers; INFILE 'c:\MyRawData\Walk.dat'; INPUT Entry AgeGroup $ Time @@; RUN; PROC SORT DATA = walkers; BY Time; RUN; * Create a new variable, Place; DATA ordered; SET walkers; Place = _N_; RUN; PROC PRINT DATA = ordered; ID Place; TITLE 'Results of Walk'; RUN; PROC SORT DATA = ordered; BY AgeGroup Time; RUN; * Keep the first observation in each age group; DATA winners; SET ordered; BY AgeGroup; IF FIRST.AgeGroup = 1; PROC PRINT DATA = winners; ID Place; TITLE 'Winners in Each Age Group'; RUN;