/* SAS example of factor analysis */ /* Creating a SAS data set for the data for the life expectancy example */ DATA life; INPUT Country $ 1-21 m0 m25 m50 m75 w0 w25 w50 w75; cards; Algeria 63 51 30 13 67 54 34 15 Cameroon 34 29 13 5 38 32 17 6 Madagascar 38 30 17 7 38 34 20 7 Mauritius 59 42 20 6 64 46 25 8 Reunion 56 38 18 7 62 46 25 10 Seychelles 62 44 24 7 69 50 28 14 South Africa(C) 50 39 20 7 55 43 23 8 South Africa(W) 65 44 22 7 72 50 27 9 Tunisia 56 46 24 11 63 54 33 19 Canada 69 47 24 8 75 53 29 10 CostaRica 65 48 26 9 68 50 27 10 Dominican Rep 64 50 28 11 66 51 29 11 El Salvador 56 44 25 10 61 48 27 12 Greenland 60 44 22 6 65 45 25 9 Grenada 61 45 22 8 65 49 27 10 Guatemala 49 40 22 9 51 41 23 8 Honduras 59 42 22 6 61 43 22 7 Jamaica 63 44 23 8 67 48 26 9 Mexico 59 44 24 8 63 46 25 8 Nicaragua 65 48 28 14 68 51 29 13 Panama 65 48 26 9 67 49 27 10 Trinidad(62) 64 63 21 7 68 47 25 9 Trinidad (67) 64 43 21 6 68 47 24 8 United States (66) 67 45 23 8 74 51 28 10 United States (NW66) 61 40 21 10 67 46 25 11 United States (W66) 68 46 23 8 75 52 29 10 United States (67) 67 45 23 8 74 51 28 10 Argentina 65 46 24 9 71 51 28 10 Chile 59 43 23 10 66 49 27 12 Columbia 58 44 24 9 62 47 25 10 Ecuador 57 46 28 9 60 49 28 11 ; run; /* ML factor analysis with a varimax rotation */ PROC FACTOR DATA=life HEY METHOD=ML /* Change this to METHOD=PRIN for a principal factor solution */ PRIORS=MAX ROTATE=VARIMAX /* Change this to ROTATE=NONE for an unrotated solution */ NFACT=3 /* Specifies the number of factors desired */ RESIDUALS OUTSTAT=factout; VAR m0 m25 m50 m75 w0 w25 w50 w75; RUN; /* ******* Some diagnostic plots ******** */ /* Just copy all this into SAS */ DATA tempc; SET factout ; vtemp=_NAME_; KEEP vtemp _NUMERIC_; WHERE _TYPE_="CORR"; RUN; DATA tempr; SET factout ; vtemp=_NAME_; KEEP vtemp _NUMERIC_; WHERE _TYPE_="RESIDUAL"; RUN; PROC TRANSPOSE DATA=tempc OUT=tempc2; VAR _NUMERIC_; BY vtemp; RUN; PROC TRANSPOSE DATA=tempr OUT=tempr2; VAR _NUMERIC_; BY vtemp; RUN; DATA tempc3; SET tempc2; pair=trim(vtemp)||trim(_NAME_); original=COL1; KEEP pair original; WHERE vtemp>_NAME_; RUN; DATA tempr3; SET tempr2; pair=trim(vtemp)||trim(_NAME_); residual=COL1; KEEP pair residual; WHERE vtemp>_NAME_; RUN; DATA fitdata; MERGE tempc3 tempr3; BY pair; predicted=original-residual; RUN; /* Predicted correlations vs. original correlations */ PROC GPLOT DATA=fitdata; PLOT predicted*original; run; /* Residual correlations vs. Predicted correlations */ PROC GPLOT DATA=fitdata; PLOT residual*predicted / vref=0; run; /* Histogram, Stem-and-leaf plot, and summary statistics for the residuals */ PROC UNIVARIATE DATA=fitdata PLOT; VAR residual; HISTOGRAM residual; run; /* ************************************************************************* */ /* SAS factor analysis example on a correlation matrix */ /* rather than the raw data: */ DATA WAIS(TYPE=CORR); INPUT _NAME_ $ _TYPE_ $ X1-X13; LABEL X1 = 'Information' X2 = 'Comprehension' X3 = 'Arithmetic' X4 = 'Similarities' X5 = 'Digit.span' X6 = 'Vocabulary' X7 = 'Digit.symbol' X8 = 'Picture.completion' X9 = 'Block.design' X10 = 'Picture.arrangement' X11 = 'Object.assembly' X12 = 'Age' X13 = 'Education' ; CARDS; N N 933 933 933 933 933 933 933 933 933 933 933 933 933 X1 CORR 1.00 0.67 0.62 0.66 0.47 0.81 0.47 0.60 0.49 0.51 0.41 -0.07 0.66 X2 CORR 0.67 1.00 0.54 0.60 0.39 0.72 0.40 0.54 0.45 0.49 0.38 -0.08 0.52 X3 CORR 0.62 0.54 1.00 0.51 0.51 0.58 0.41 0.46 0.48 0.43 0.37 -0.08 0.49 X4 CORR 0.66 0.60 0.51 1.00 0.41 0.68 0.49 0.56 0.50 0.50 0.41 -0.19 0.55 X5 CORR 0.47 0.39 0.51 0.41 1.00 0.45 0.45 0.42 0.39 0.42 0.31 -0.19 0.43 X6 CORR 0.81 0.72 0.58 0.68 0.45 1.00 0.49 0.57 0.46 0.52 0.40 -0.02 0.62 X7 CORR 0.47 0.40 0.41 0.49 0.45 0.49 1.00 0.50 0.50 0.52 0.46 -0.46 0.57 X8 CORR 0.60 0.54 0.46 0.56 0.42 0.57 0.50 1.00 0.61 0.59 0.51 -0.28 0.48 X9 CORR 0.49 0.45 0.48 0.50 0.39 0.46 0.50 0.61 1.00 0.54 0.59 -0.32 0.44 X10 CORR 0.51 0.49 0.43 0.50 0.42 0.52 0.52 0.59 0.54 1.00 0.46 -0.37 0.49 X11 CORR 0.41 0.38 0.37 0.41 0.31 0.40 0.46 0.51 0.59 0.46 1.00 -0.28 0.40 X12 CORR -0.07 -0.08 -0.08 -0.19 -0.19 -0.02 -0.46 -0.28 -0.32 -0.37 -0.28 1.00 -0.29 X13 CORR 0.66 0.52 0.49 0.55 0.43 0.62 0.57 0.48 0.44 0.49 0.40 -0.29 1.00 ; PROC FACTOR DATA=WAIS HEY METHOD=ML PRIORS=MAX ROTATE=VARIMAX NFACT=6 RESIDUALS OUTSTAT=factout; VAR X1-X13; RUN; /* SAS automatically recognizes that the "data" form a correlation matrix, not actual raw data. */