/* multiple linear regression */ proc import datafile = '/home/u64378492/STAT540_data/cardekho.csv' dbms = csv out = cars replace; run; proc contents data = cars; run; proc freq data = cars; tables seller_type fuel transmission owner; run; data carsreg; set cars; automatic = (transmission = 'Automatic'); diesel = (fuel = 'Diesel'); firstowner = (owner = 'First Owner'); individual = (seller_type = 'Individual'); log_price = log(selling_price); if fuel in ('Diesel','Petrol') & seller_type in ('Individual','Dealer') then output; drop transmission fuel owner seller_type; run; proc reg data = carsreg; model selling_price = year km_driven automatic diesel firstowner individual; run; proc reg data = carsreg; model log_price = year km_driven automatic diesel firstowner individual; output out = carsreg_diag h=lev cookd = cooks; run; proc sort data = carsreg_diag; by descending lev; run; /* two-way factorial experiment */ data mouse; input resp gene $ diet $; datalines; 493 WT LFD 172 WT LFD 617 WT LFD 534 WT LFD 500.5 WT LFD 127 WT LFD 1224 WT LFD 143 WT LFD 852 WT LFD 134 WT LFD 547.5 WT LFD 555 ADIPO LFD 302 ADIPO LFD 434 ADIPO LFD 90 ADIPO LFD 72 ADIPO LFD 489 ADIPO LFD 453 ADIPO LFD 552.5 ADIPO LFD 635.5 ADIPO LFD 330.9 ADIPO LFD 169.5 ADIPO LFD 737.5 ADIPO LFD 595.5 WT HFD 418.5 WT HFD 642 WT HFD 743.5 WT HFD 1351 WT HFD 1180 WT HFD 938.5 WT HFD 670 WT HFD 1319 WT HFD 1007 WT HFD 589 WT HFD 481.5 WT HFD 785 WT HFD 1060 WT HFD 435 WT HFD 535.5 WT HFD 1153 ADIPO HFD 890.5 ADIPO HFD 539 ADIPO HFD 651.5 ADIPO HFD 585 ADIPO HFD 783.5 ADIPO HFD 533.5 ADIPO HFD 895 ADIPO HFD 579 ADIPO HFD 551 ADIPO HFD 800 ADIPO HFD 1026 ADIPO HFD 308 ADIPO HFD 1415 ADIPO HFD 342.5 ADIPO HFD 197.5 ADIPO HFD ; run; proc glm data = mouse; class gene diet; model resp = gene diet gene*diet; run; /* ANCOVA */ data exercise; input grp $ chg age; datalines; aerobic 17.05 31 aerobic 4.96 23 aerobic 10.40 27 aerobic 11.05 28 aerobic 0.26 22 aerobic 2.51 24 running -0.87 23 running -10.74 22 running -3.27 22 running -1.97 25 running 7.50 27 running -7.25 20 ; run; proc glm data = exercise; class grp; model chg = grp age; run; /* add interaction to see if we need different slopes in the two groups */ proc glm data = exercise; class grp; model chg = grp age grp*age; means grp; /* compute unadjusted means for the two groups */ lsmeans grp / cl diff; /* compute covariate-adusted means */ run;