/* Some of these example codes are based on those in the excellent article */ /* "Handling Missing Data by Maximum Likelihood" by Paul D. Allison of Statistical Horizons */ /* Data step for read in "dental" data in long form */ DATA dntllong; INPUT sex $ @@; if sex='girl' then s=11; if sex='boy' then s=16; do i = 1 to s; INPUT subj @@; do j = 1 to 4; INPUT age mm @@; output; end; end; drop i; drop j; cards; girl 1 1 21 2 20 3 21.5 4 23 2 1 21 2 21.5 3 24 4 25.5 3 1 20.5 2 24 3 24.5 4 26 4 1 23.5 2 24.5 3 25 4 26.5 5 1 21.5 2 23 3 22.5 4 23.5 6 1 20 2 21 3 21 4 22.5 7 1 21.5 2 22.5 3 23 4 25 8 1 23 2 23 3 23.5 4 24 9 1 20 2 21 3 22 4 21.5 10 1 16.5 2 19 3 19 4 19.5 11 1 24.5 2 25 3 28 4 28 boy 12 1 26 2 25 3 29 4 31 13 1 21.5 2 22.5 3 23 4 26.5 14 1 23 2 22.5 3 24 4 27.5 15 1 25.5 2 27.5 3 26.5 4 27 16 1 20 2 23.5 3 22.5 4 26 17 1 24.5 2 25.5 3 27 4 28.5 18 1 22 2 22 3 24.5 4 26.5 19 1 24 2 21.5 3 24.5 4 25.5 20 1 23 2 20.5 3 31 4 26 21 1 27.5 2 28 3 31 4 31.5 22 1 23 2 23 3 23.5 4 25 23 1 21.5 2 23.5 3 24 4 28 24 1 17 2 24.5 3 26 4 29.5 25 1 22.5 2 25.5 3 25.5 4 26 26 1 23 2 24.5 3 26 4 30 27 1 22 2 21.5 3 23.5 4 25 ; run; /* Just for comparison's purposes, here is a regression model */ /* with the full data set. PROC SURVEYREG is used because it */ /* will account for the within-subj correlation. */ PROC SURVEYREG data=dntllong; CLASS sex; MODEL mm = sex age sex*age; CLUSTER subj; run; /* Data step for reading in "dental" data in long form */ /* These data include dropouts */ DATA dntldrop; INPUT sex $ @@; if sex='girl' then s=11; if sex='boy' then s=16; do i = 1 to s; INPUT subj @@; do j = 1 to 4; INPUT age mm @@; output; end; end; drop i; drop j; cards; girl 1 1 21 2 20 3 21.5 4 23 2 1 21 2 21.5 3 . 4 . 3 1 20.5 2 24 3 24.5 4 26 4 1 23.5 2 24.5 3 25 4 . 5 1 21.5 2 23 3 22.5 4 23.5 6 1 20 2 21 3 21 4 22.5 7 1 21.5 2 22.5 3 23 4 . 8 1 23 2 23 3 23.5 4 24 9 1 20 2 21 3 22 4 21.5 10 1 16.5 2 19 3 19 4 19.5 11 1 24.5 2 . 3 . 4 . boy 12 1 26 2 25 3 29 4 31 13 1 21.5 2 22.5 3 23 4 26.5 14 1 23 2 22.5 3 24 4 . 15 1 25.5 2 27.5 3 26.5 4 . 16 1 20 2 23.5 3 22.5 4 26 17 1 24.5 2 25.5 3 27 4 28.5 18 1 22 2 22 3 . 4 . 19 1 24 2 21.5 3 24.5 4 25.5 20 1 23 2 20.5 3 31 4 26 21 1 27.5 2 . 3 . 4 . 22 1 23 2 23 3 23.5 4 25 23 1 21.5 2 23.5 3 24 4 28 24 1 17 2 24.5 3 26 4 . 25 1 22.5 2 25.5 3 25.5 4 26 26 1 23 2 24.5 3 . 4 . 27 1 22 2 21.5 3 23.5 4 25 ; run; /* Using PROC MIXED on the data with missing values */ /* (this uses the maximum likelihood approach) */ PROC MIXED data=dntldrop; CLASS sex; MODEL mm = sex age sex*age / solution; RANDOM intercept / SUBJECT=subj; run; /* The above "random intercepts" model implies a */ /* compound symmetry covariance structure */ /* Using PROC MIXED on the data with missing values */ /* specifying an unstructured covariance structure */ DATA dntldrop; SET dntldrop; age_cat = age; run; PROC MIXED data=dntldrop; CLASS age_cat sex; MODEL mm = sex age sex*age / solution; REPEATED age_cat / SUBJECT=subj TYPE=un; run; /* Using PROC MIXED on the data with missing values */ /* specifying an autoregressive covariance structure */ DATA dntldrop; SET dntldrop; age_cat = age; run; PROC MIXED data=dntldrop; CLASS age_cat sex; MODEL mm = sex age sex*age / solution; REPEATED age_cat / SUBJECT=subj TYPE=AR(1); run; /**********************************************************************/ /* Multiple Imputation in Regression */ data nlsmiss; FILENAME webpage URL 'http://people.stat.sc.edu/hitchcock/nlsmissdata.txt'; INFILE webpage; input anti self pov black hispanic childage divorce gender momage momwork; run; /* Listwise deletion (for comparison) */ PROC REG data=nlsmiss; MODEL anti=self pov black hispanic childage divorce gender momage momwork; run; /* Note: NIMPUTE=0 in the PROC MI statement does ML estimation */ PROC MI data= nlsmiss nimpute=10 out=mi_mvn; VAR anti self pov black hispanic childage divorce gender momage momwork; FCS regpmm(self pov black hispanic childage divorce gender momage momwork); run; TITLE " MULTIPLE IMPUTATION REGRESSION - MVN"; PROC GLM data = mi_mvn ; MODEL anti = self pov black hispanic childage divorce gender momage momwork ; BY _imputation_; ods output ParameterEstimates=a_mvn; run; quit; PROC MIANALYZE parms=a_mvn; MODELEFFECTS intercept self pov black hispanic childage divorce gender momage momwork; run;