library(mice) library(VIM) library(lattice) library(ggplot2) nlsmiss <- read.table(file="http://people.stat.sc.edu/hitchcock/nlsmissdata.txt", header=F, na.strings=".") names(nlsmiss)=c('anti', 'self', 'pov', 'black', 'hispanic', 'childage', 'divorce', 'gender', 'momage', 'momwork') # diagnostic plot: pbox(nlsmiss) # Multiple Imputation by the Predictive Mean Matching (PMM) method imp1 <- mice(nlsmiss) # 5 imputed data sets are created by default; use "m=" argument to adjust this # Seeing which values were imputed: imp1$imp$self # More diagnostic plots to compare distributions of original values (blue) to distributions of imputed values (red) xyplot(imp1,anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork,pch=18,cex=1) densityplot(imp1) stripplot(imp1, pch = 20, cex = 1.2) # completed.Data1 <- complete(imp1, 1) # uses the 1st imputed data set lm.1 <- lm(anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork, data=completed.Data1) summary(lm.1) # To account for imputation variation, should do the analysis # with SEVERAL imputed data sets (5 are produced by default) # and see whether results change: completed.Data2 <- complete(imp1, 2) # uses the 2nd imputed data set lm.2 <- lm(anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork, data=completed.Data2) summary(lm.2) completed.Data3 <- complete(imp1, 3) # uses the 3rd imputed data set lm.3 <- lm(anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork, data=completed.Data3) summary(lm.3) ### Looking at all the imputed data sets at once: # Combining the imputed data with the original data: imp_tot2 <- complete(imp1, 'long', inc=TRUE) # Doing a regression, and pooling estimates and standard errors across all 5 imputed data sets: modelFit1 <- with(imp1,lm(anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork)) summary(pool(modelFit1)) # Compare to original linear regression with the complete cases only: lm.comp.cases <- lm(anti~self+pov+black+hispanic+childage+divorce+gender+momage+momwork, data=nlsmiss) summary(lm.comp.cases)