/* SAS codes in Chapter 3 */ ***3.1***; LIBNAME hotels 'c:\MySASLib'; DATA hotels.kyotohotels; INFILE 'c:\MyRawData\KyotoHotels.dat'; INPUT Hotel $ 1-25 Yen Kilometers; USD = Yen * 0.0089; RUN; LIBNAME hotels 'C:\MySASLib'; DATA hotels; SET hotels.kyotohotels; Miles = Kilometers * 0.62; RUN; ***3.2***; * Read csv file with PROC IMPORT; PROC IMPORT DATAFILE = 'c:\MyRawData\Garden.csv' OUT = homegarden REPLACE; RUN; * Modify homegarden data set with assignment statements; DATA homegarden; SET homegarden; Zone = 14; Type = 'home'; Zucchini = Zucchini * 10; Total = Tomato + Zucchini + Peas + Grapes; PerTom = (Tomato / Total) * 100; RUN; ***3.3***; LIBNAME pump 'c:\MySASLib'; *Use SAS functions to create and modify variables; DATA pumpkin; SET pump.contest; AvgScore = MEAN(Score1, Score2, Score3); DayEntered = DAY(Date); Type = UPCASE(Type); RUN; ***3.6***; PROC IMPORT DATAFILE = 'c:\MyRawData\Auction.txt' OUT = oldcars REPLACE; RUN; *Use IF-THEN statements to create and modify variables; DATA oldcars; SET oldcars; IF YearMade < 1890 THEN Veteran = 'Yes'; IF Model = 'F-88' THEN DO; Make = 'Oldsmobile'; Seats = 2; END; RUN; ***3.7***; PROC IMPORT DATAFILE = 'c:\MyRawData\Home.txt' OUT = homeimp REPLACE; RUN; DATA homeimprovements; SET homeimp; *Group observations by cost; LENGTH CostGroup $6; IF Cost = . THEN CostGroup = 'TBD'; ELSE IF Cost < 2000 THEN CostGroup = 'low'; ELSE IF Cost < 10000 THEN CostGroup = 'medium'; ELSE CostGroup = 'high'; RUN; ***3.8***; LIBNAME feed'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\Zoo.csv' OUT = feed.zoo REPLACE; RUN; *Choose only mammals; DATA mammals; SET feed.zoo; IF Class = 'Mammalia'; IF Enclosure =: 'E' THEN Area = 'East'; ELSE IF Enclosure =: 'W' THEN Area = 'West'; RUN; ***3.9***; LIBNAME feed'c:\MySASLib'; *Choose only mammals; PROC SQL; CREATE TABLE mammals AS SELECT * FROM feed.zoo WHERE Class = 'Mammalia'; QUIT; ***3.10***; LIBNAME feed'c:\MySASLib'; *Create data sets for morning and afternoon feedings; DATA morning afternoon; SET feed.zoo; IF FeedTime = 'am' THEN OUTPUT morning; ELSE IF FeedTime = 'pm' THEN OUTPUT afternoon; ELSE IF FeedTime = 'both' THEN OUTPUT; RUN; ***3.11***; PROC IMPORT DATAFILE = 'C:\MyRawData\Schedule.csv' OUT = GameDates REPLACE; RUN; DATA Games; SET GameDates; *If a double header, output twice; IF Type = 'D' THEN DO; OUTPUT; OUTPUT; END; *Else if not a double header output only once; ELSE OUTPUT; RUN; * Create three observations for each data line read * using three OUTPUT statements; DATA theaters; INFILE 'c:\MyRawData\Movies.dat'; INPUT Month $ Location $ Tickets @; OUTPUT; INPUT Location $ Tickets @; OUTPUT; INPUT Location $ Tickets; OUTPUT; RUN; ***3.12***; DATA numyears; DO InterestRate = 0.02 TO 0.06 BY 0.01; *Initialize Savings and Year for each interest rate; Savings = 100; Years = 0; *Find number of years until savings greater than $1000; DO UNTIL (Savings > 1000); Years = Years + 1; Savings = Savings + (InterestRate * Savings); END; *Write results to years data set; OUTPUT; END; RUN; ***3.13***; DATA librarycards; INFILE 'c:\MyRawData\Library.dat' TRUNCOVER; INPUT Name $11. + 1 BirthDate MMDDYY10. +1 IssueDate ANYDTDTE10. DueDate DATE11.; DaysOverDue = TODAY() - DueDate; CurrentAge = INT(YRDIF(BirthDate, TODAY(), 'AGE')); IF IssueDate > '01JAN2020'D THEN NewCard = 'yes'; RUN; PROC PRINT DATA = librarycards; FORMAT Issuedate MMDDYY8. DueDate WEEKDATE17.; TITLE 'SAS Dates without and with Formats'; RUN; ***3.15***; PROC IMPORT DATAFILE = 'c:\MyRawData\Games.txt' OUT = gamestats REPLACE; RUN; * Using RETAIN and sum statements to find most runs and total runs; DATA gamestats; SET gamestats; RETAIN MaxRuns; MaxRuns = MAX(MaxRuns, Runs); RunsToDate + Runs; RUN; ***3.16***; * Create a permanent SAS data set; LIBNAME radio 'c:\MySASLib'; PROC IMPORT DATAFILE = 'c:\MyRawData\KBRK.csv' OUT = radio.songs REPLACE; RUN; * Change all 9s to missing values; DATA fixsongs; SET radio.songs; ARRAY song (5) wj kt tr filp ttr; DO i = 1 TO 5; IF song(i) = 9 THEN song(i) = .; END; RUN; ***3.17***; LIBNAME radio 'c:\MySASLib'; DATA fixsongs; SET radio.songs; ARRAY new (5) Song1 - Song5; ARRAY old (5) wj -- ttr; DO i = 1 TO 5; IF old(i) = 9 THEN new(i) = .; ELSE new(i) = old(i); END; AvgScore = MEAN(OF Song1 - Song5); RUN; ***3.18***; *Read data using ANY rules for variable names; OPTIONS VALIDVARNAME = ANY; PROC IMPORT DATAFILE = 'C:\MyRawData\CampEquip.txt' OUT = campequipment_any REPLACE; RUN; DATA campequipment_any; SET campequipment_any; IF 'Country of Origin'N = 'USA'; PriceDiff = 'Store$'N - 'Online$'N; RUN; *Read data using V7 rules for variable names; OPTIONS VALIDVARNAME = V7; PROC IMPORT DATAFILE = 'C:\LSB6\Data\CampEquip.txt' OUT = CampEquipment_V7 REPLACE; RUN; DATA CampEquipment_V7; SET CampEquipment_V7; IF Country_of_Origin = 'USA'; PriceDiff = Store_ - Online_; RUN;