/* SAS example code for multidimensional scaling and correspondence analysis: */ /* ******************************************************************************************** */ /* University Crime example */ /* These are the per capita data, already scaled by dividing through by the std. deviations: */ DATA unvcrstd; INPUT Number Violentcrime Burglary Larcenytheft Motorvehicletheft Arson; cards; 1 4.37156490 4.331006574 2.4563778 1.7088723 4.5454950 2 0.26261707 0.137170029 0.7378213 0.3849700 0.6826643 3 1.31996603 0.306419583 0.7031289 0.0000000 0.7624906 4 0.40810544 0.103142637 0.7047725 0.1994137 0.0000000 5 0.00000000 0.154591004 0.6306380 0.0000000 0.0000000 6 0.36745076 0.000000000 0.0000000 0.0000000 0.0000000 7 0.00000000 0.000000000 0.6672249 0.0000000 0.0000000 8 0.14939574 0.241648463 1.0935995 1.1679947 0.0000000 9 0.32294554 0.081619726 0.2746913 0.1578018 0.0000000 10 0.47945610 0.129253829 0.6426196 0.0000000 0.0000000 11 1.13769244 0.061340800 4.5745799 0.0000000 0.0000000 12 0.05391978 0.000000000 0.2334851 0.6323274 0.0000000 13 0.00000000 0.065941360 0.6556898 0.0000000 0.0000000 14 1.87233779 2.003237839 1.3633407 3.6595432 0.0000000 15 0.00000000 0.976296175 0.3733780 2.8313248 0.0000000 16 0.52478650 0.188632255 0.5590940 0.0000000 0.0000000 17 0.04780268 0.006443425 0.1577117 0.1868637 0.0000000 18 0.00000000 0.000000000 0.3461318 0.6907150 0.0000000 19 0.25400344 0.205426004 0.2902497 1.1584014 0.0000000 20 0.11742578 0.063312291 0.4963740 0.0000000 0.0000000 21 0.27135623 0.207267892 0.5408891 0.7071657 0.0000000 ; run; /* Calculating Euclidean distances between these 21 scaled observations using PROC DISTANCE */ PROC DISTANCE DATA=unvcrstd OUT=univdist; VAR INTERVAL (Violentcrime Burglary Larcenytheft Motorvehicletheft Arson); RUN; /* Using PROC MDS to do the multidimensional scaling */ PROC MDS DATA=univdist LEVEL=ABSOLUTE FIT=D FORMULA=1 DIM=5 OUT=COORDS OCONFIG; RUN; /* DIM must equal the TOTAL number of variables used in the MDS */ /* The "Badness-of-Fit" criterion printed by SAS is */ /* related to SStress, although not quite equivalent. */ /* Creating a data set that contains the coordinates */ /* of the "best" configuration of points to */ /* fit these distances: */ DATA univcoor; MERGE unvcrstd COORDS; KEEP Number Dim1-Dim9; RUN; /* Printing out the coordinates: */ /* Note that although we print the coordinates for all q=5 dimensions, */ /* we will only use the first 2 or 3 dimensions, as in the R example. */ PROC PRINT DATA=univcoor; RUN; /* Plotting the MDS solution for k = 2 dimensions: */ PROC PLOT DATA=univcoor; PLOT Dim2*Dim1; PLOT Dim2*Dim1='*' $ Number; RUN; /* ******************************************************************************************** */ /* Inputting the distances for a USA cities data set */ data city; input (Atlanta Chicago Denver Houston LosAngeles Miami NewYork SanFrancisco Seattle WashingtonDC) (5.) @56 City $15.; datalines; 0 Atlanta 587 0 Chicago 1212 920 0 Denver 701 940 879 0 Houston 1936 1745 831 1374 0 Los Angeles 604 1188 1726 968 2339 0 Miami 748 713 1631 1420 2451 1092 0 New York 2139 1858 949 1645 347 2594 2571 0 San Francisco 2182 1737 1021 1891 959 2734 2408 678 0 Seattle 543 597 1494 1220 2300 923 205 2442 2329 0 Washington D.C. ; proc mds data=city OUT=coords level=absolute; id city; run; proc print data=coords; run; PROC PLOT DATA=coords; PLOT Dim2*Dim1='*' $ City; RUN; /* The "Badness-of-Fit" criterion printed by SAS is */ /* related to SStress, although not quite equivalent. */ /* Plotting the MDS solution for k = 2 dimensions: */ PROC PLOT DATA=univcoor; PLOT Dim2*Dim1='*' $ Number; RUN;