/* Two different clustering approaches generated */ /* different data sets, bestxy and best 8; we match */ /* merge these data sets to compare the clusters */ /* Both data sets use clusname as the cluster ID; we */ /* need to reassign the name for one of them to avoid */ /* confusion in the merge */ data best3; input point_id clusname; datalines; 1 3 2 2 3 1 4 3 5 3 6 1 7 2 8 2 9 2 10 2 11 1 12 1 13 3 14 3 15 1 16 1 17 2 18 2 19 2 20 3 21 1 22 2 23 3 24 2 25 1 26 1 27 2 28 2 29 3 30 2 31 1 32 3 33 3 34 3 35 3 36 1 37 3 38 1 39 3 40 1 41 2 42 2 43 3 44 3 45 3 46 3 47 1 48 3 49 1 50 3 ; run; data best3xy; input point_id clusname; datalines; 1 2 2 3 3 2 4 2 5 3 6 3 7 1 8 1 9 3 10 2 11 3 12 3 13 1 14 2 15 3 16 1 17 1 18 1 19 1 20 2 21 3 22 1 23 2 24 1 25 3 26 2 27 1 28 1 29 2 30 2 31 1 32 2 33 2 34 1 35 2 36 3 37 2 38 3 39 3 40 3 41 1 42 2 43 1 44 2 45 3 46 2 47 3 48 1 49 3 50 2 ; run; data best3n; set best3; clusn=clusname; drop clusname; /* Sort then merge by point_id */ /* We're simply copying over the old data */ proc sort data=best3n; by point_id; proc sort data=best3xy; by point_id; data compa; merge best3n best3xy; by point_id; /* A simple two-way table compares the cluster methods */ proc freq data=compa; label clusn='Method 1' clusname='Method 2'; table clusn*clusname; run;