/* SAS codes in Chapter 6 */ ***6.1***; PROC IMPORT DATAFILE = 'c:\MyRawData\South.csv' OUT = southent REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\North.csv' OUT = northent REPLACE; RUN; * Create a data set, both, combining northent and southent; * Create a variable, AmountPaid, based on value of variable Age; DATA both; SET southent northent; IF Age = . THEN AmountPaid = .; ELSE IF Age < 3 THEN AmountPaid = 0; ELSE IF Age < 65 THEN AmountPaid = 35; ELSE AmountPaid = 27; RUN; ***6.2***; PROC IMPORT DATAFILE = 'c:\MyRawData\South.csv' OUT = southent REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\North.csv' OUT = northent REPLACE; RUN; PROC SORT DATA = northent; BY PassNumber; RUN; * Interleave observations by PassNumber; DATA interleave; SET southent northent; BY PassNumber; RUN; ***6.3***; PROC IMPORT DATAFILE = 'c:\MyRawData\Chocolate.txt' OUT = names REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\Chocsales.txt' OUT = sales REPLACE; RUN; PROC SORT DATA = sales; BY Code; RUN; * Merge data sets by Code; DATA chocolates; MERGE sales names; BY Code; RUN; ***6.4***; LIBNAME athshoes 'c:\MySASLib'; DATA athshoes.shoedata; INFILE 'c:\MyRawData\Shoe.dat'; INPUT Style $ 1-15 ExerciseType $ RegularPrice; RUN; PROC SORT DATA = athshoes.shoedata OUT = regular; BY ExerciseType; RUN; DATA athshoes.discount; INFILE 'c:\MyRawData\Disc.dat'; INPUT ExerciseType $ Adjustment; RUN; * Perform many-to-one match merge; DATA prices; MERGE regular athshoes.discount; BY ExerciseType; NewPrice = ROUND(RegularPrice - (RegularPrice * Adjustment), .01); RUN; ***6.5***; LIBNAME athshoes 'c:\MySASLib'; * Perform an inner join using PROC SQL; PROC SQL; CREATE TABLE prices AS SELECT * FROM athshoes.shoedata, athshoes.discount WHERE shoedata.ExerciseType = discount.ExerciseType; QUIT; ***6.6***; LIBNAME athshoes 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Shoesales.txt' OUT = athshoes.shoesales REPLACE; RUN; PROC SORT DATA = athshoes.shoesales OUT = shoes; BY ExerciseType; RUN; * Summarize sales by ExerciseType; PROC MEANS NOPRINT DATA = shoes; VAR Sales; BY ExerciseType; OUTPUT OUT = summarydata SUM(Sales) = Total; RUN; * Merge totals with the original data set; DATA shoesummary; MERGE shoes summarydata; BY ExerciseType; Percent = Sales / Total * 100; RUN; PROC PRINT DATA = shoesummary LABEL; ID ExerciseType; VAR Style Sales Total Percent; LABEL Percent = 'Percent By Type'; TITLE 'Sales Share by Type of Exercise'; RUN; ***6.7***; LIBNAME athshoes 'c:\MySASLib'; * Output grand total of sales to a data set; PROC MEANS NOPRINT DATA = athshoes.shoesales; VAR Sales; OUTPUT OUT = summarydata SUM(Sales) = GrandTotal; RUN; * Combine the grand total with the original data; DATA shoesummary; IF _N_ = 1 THEN SET summarydata; SET athshoes.shoesales; Percent = Sales / GrandTotal * 100; RUN; PROC PRINT DATA = shoesummary; ID Style; VAR ExerciseType Sales GrandTotal Percent; TITLE 'Overall Sales Share'; RUN; ***6.8***; LIBNAME athshoes 'c:\MySASLib'; *Create summary variables by exercise type; PROC SQL; CREATE TABLE shoesums AS SELECT *, SUM(Sales) AS TotalByType, (Sales/SUM(Sales))*100 AS PercentByType FROM athshoes.shoesales GROUP BY ExerciseType; QUIT; *Create summary variables for whole data set; PROC SQL; CREATE TABLE shoetotal AS SELECT *, SUM(Sales) AS GrandTotal, (Sales/SUM(Sales))*100 AS Percent FROM athshoes.shoesales; QUIT; ***6.9***; LIBNAME records 'c:\MySASLib'; DATA records.patientmaster; INFILE 'c:\MyRawData\Admit.dat'; INPUT Account LastName $ 8-16 Address $ 17-34 BirthDate MMDDYY10. Sex $ InsCode $ 48-50 @52 LastUpdate MMDDYY10.; FORMAT BirthDate LastUpdate Date9.; RUN; LIBNAME records 'c:\MySASLib'; DATA transactions; INFILE 'c:\MyRawData\NewAdmit.dat'; INPUT Account LastName $ 8-16 Address $ 17-34 BirthDate MMDDYY10. Sex $ InsCode $ 48-50 @52 LastUpdate MMDDYY10.; RUN; PROC SORT DATA = transactions; BY Account; RUN; * Update patient data with transactions; DATA records.patientmaster; UPDATE records.patientmaster transactions; BY Account; RUN; ***6.11***; PROC IMPORT DATAFILE = 'c:\MyRawData\CustAdd.txt' OUT = customer REPLACE; PROC IMPORT DATAFILE = 'c:\MyRawData\OrdersQ3.txt' OUT = orders REPLACE; RUN; PROC SORT DATA = orders; BY CustNum; RUN; * Combine the data sets using the IN= option; DATA noorders; MERGE customer orders (IN = Recent); BY CustNum; IF Recent = 0; RUN; ***6.12***; *Input the data and create two subsets; DATA tallpeaks (WHERE = (Height > 6000)) american (WHERE = (Continent CONTAINS ('America'))); INFILE 'c:\MyRawData\Mountains.dat'; INPUT Name $1-14 Continent $15-28 Height; RUN; PROC PRINT DATA = tallpeaks; TITLE 'Members of the Seven Summits above 6,000 Meters'; RUN; PROC PRINT DATA = american; TITLE 'Members of the Seven Summits in the Americas'; RUN; ***6.13***; PROC IMPORT DATAFILE = 'c:\MyRawData\Transpos.csv' OUT = baseball REPLACE; PROC SORT DATA = baseball; BY Team Player; RUN; * Transpose data so salary and batavg are variables; PROC TRANSPOSE DATA = baseball OUT = flipped; BY Team Player; ID Type; VAR Entry; RUN; ***6.14***; DATA walkers; INFILE 'c:\MyRawData\Walk.dat'; INPUT Entry AgeGroup $ Time @@; RUN; PROC SORT DATA = walkers; BY Time; RUN; * Create a new variable, Place; DATA ordered; SET walkers; Place = _N_; RUN; PROC PRINT DATA = ordered; ID Place; TITLE 'Results of Walk'; RUN; PROC SORT DATA = ordered; BY AgeGroup Time; RUN; * Keep the first observation in each age group; DATA winners; SET ordered; BY AgeGroup; IF FIRST.AgeGroup = 1; PROC PRINT DATA = winners; ID Place; TITLE 'Winners in Each Age Group'; RUN;