# leisure time data collect in stat 730 november 2014 d=read.csv("c:/tim/stat730/leisure.csv",header=T,row.names=1) d=read.csv("http://www.stat.sc.edu/~hansont/stat730/leisure.csv",header=T,row.names=1) plot(d[,3:22]) cor(d[,3:22]) f=prcomp(d[,3:22]) summary(f) # ugh! people are highly variable across all activities...spherical plot(f,npcs=20,type="l") f$rotation[,1:2] # interpretation of PC1? PC2? ("zero out" small loadings) plot(f,type="l") biplot(f,scale=0) # activity maps D=dist(t(d[,3:22])) par(mfrow=c(1,2)) f=cmdscale(D,k=2) # not very good because 1st 2 PCAs don't explain much! par(mfrow=c(1,2)) plot(f,xlab="Coord. 1",ylab="Coord. 2",main="Classical MDS",type="n") text(f,labels=colnames(d[,3:22]),cex=.7) library(MASS) # final stress is 15.6, fair to poor f=isoMDS(D) # Kruskal's non-metric multidimensional scaling, pp. 413-415 x=f$points[,1]; y=f$points[,2] plot(x,y,xlab="Coord. 1", ylab="Coord. 2",main="Nonmetric MDS", type="n") text(x,y,labels=colnames(d[,3:22]),cex=.7) # person maps D=dist(d[,3:22]) par(mfrow=c(1,2)) f=cmdscale(D,k=2) par(mfrow=c(1,2)) plot(f,xlab="Coord. 1",ylab="Coord. 2",main="Classical MDS",type="n") text(f,labels=rownames(d[,3:22]),cex=.7) library(MASS) # final stress is 24...poor agreement f=isoMDS(D) # Kruskal's non-metric multidimensional scaling, pp. 413-415 x=f$points[,1]; y=f$points[,2] plot(x,y,xlab="Coord. 1", ylab="Coord. 2",main="Nonmetric MDS", type="n") text(x,y,labels=rownames(d[,3:22]),cex=.7) # both variables and people need to be "viewed" or "clustered" in # a space higher than the plane. we'll try clustering techniques next # eliminating 10 variables using PCA d2=d[,3:22] f=prcomp(d2) f # internet # 7 a=1:20; a=a[-7] f=prcomp(d2[a]) f # culture # 17 a=a[-17] f=prcomp(d2[a]) f # music # 13 a=a[-13] f=prcomp(d2[a]) f # motor # 9 a=a[-9] f=prcomp(d2[a]) f # nightlife # 14 a=a[-14] f=prcomp(d2[a]) f # martial arts # 4 a=a[-4] f=prcomp(d2[a]) f # hunting fishing # 12 a=a[-12] f=prcomp(d2[a]) f # garden crafts # 8 a=a[-8] f=prcomp(d2[a]) f # games # 9 a=a[-9] f=prcomp(d2[a]) f # cardio # 3 a=a[-3] f=prcomp(d2[a]) # left with the following 10 a=c(1,2,5,6,8,9,12,15,19,20) row.names(t(d2[,a])) f=prcomp(d2[,a]) summary(f) # ugh! people are highly variable across all remaining activities