proc import out=plot17lidar datafile="/home/grego1/STAT 540/Plot17lidar.txt" replace dbms=csv; run; data lidar17; set Plot17lidar; /* Plot17lidar is a subset of a comma-delimited dataset that had approximately 600,000 */ /* height observations recorded in a 4x4 meter grid; there are many instances of */ /* duplicate heights at the same X,Y coordinates */ /* Import the comma-delimited text file Plot17Lidar.txt from the NODUPKEY example */ UTMEast=X; UTMNorth=Y; Height=Z; Drop x y z; run; /* Sort by group variable(s) then by measurement--can be used to find group "winners" */ proc sort data=lidar17; by UTMEast UTMNorth Height; run; /* Save FIRST and LAST to understand how the data set has been ordered */ data sort17; set lidar17; by UTMEast UTMNorth; EFirst=FIRST.UTMEast; ELast=LAST.UTMEast; NFirst=FIRST.UTMNorth; NLast=LAST.UTMNorth; run; /* Whenever Elast=1, Nlast=1 too; whenever EFirst=1, NFirst=1 */ /* This simplifies our logic in the later commands */ proc freq data=sort17; tables ELast*NLast EFirst*NFirst; run; /* Inspect the table; we have 8792 unique sets of coordinates */ /* UTMEast has only 24 unique values */ /* 30938 of the records are duplicates */ /* Find the max and min elevation for each of our 8792 unique sets of coordinates */ data Max17; set sort17; by UTMEast UTMNorth; if LAST.UTMNorth; Max=Height; Drop Height ELast NLast EFirst NFirst; run; data Min17; set sort17; by UTMEast UTMNorth; if First.UTMNorth; Min=Height; Drop Height ELast NLast EFirst NFirst; run; /* Match merge Max17 and Min17 to find the range for each of our 8792 unique coordinates */ data diff; merge Max17 Min17; by UTMEast UTMNorth; Delta=Max-Min; run; proc print data=diff (obs=20); run; /* Let's look at plots of max, min and range */ proc g3d data=diff; scatter UTMEast*UTMNorth=Max/noneedle shape="balloon" size=0.1; run; proc g3d data=diff; scatter UTMEast*UTMNorth=Min/noneedle shape="balloon" size=0.1; run; proc g3d data=diff; scatter UTMEast*UTMNorth=Delta/noneedle shape="balloon" size=0.1; run; /* Separate the data based on whether the coordinates only appear once */ data unique duplicate; set sort17; by UTMEast UTMNorth; if LAST.UTMNorth*FIRST.UTMNorth=1 then output unique; else output duplicate; run;