# HW 3 solution R code. # This is merely the R code; # Actual HWs should also include clearly stated answers # and insightful comments! ################# # 1 my.S <- matrix(c(5,0,0,0,9,0,0,0,9), nrow=3, ncol=3, byrow=T) eigen(my.S) pc1 <- princomp(covmat=my.S) summary(pc1, loadings=T) ################## #2 my.S <- matrix(c(36,5,5,4), nrow=2, ncol=2, byrow=T) pc2a <- princomp(covmat=my.S) summary(pc2a,loadings=T) my.R <- cov2cor(my.S) # or D.minus.1.2 <- diag(c(1/5, 1/2)) my.R <- D.minus.1.2 %*% my.S %*% D.minus.1.2 pc2c <- princomp(covmat=my.R) summary(pc2c,loadings=T) ################### #3 my.cor.mat <- matrix(c(1,.402,.396,.301,.305,.339,.340, .402,1,.618,.150,.135,.206,.183, .396,.618,1,.321,.289,.363,.345, .301,.150,.321,1,.846,.759,.661, .305,.135,.289,.846,1,.797,.800, .339,.206,.363,.759,.797,1,.736, .340,.183,.345,.661,.800,.736,1), ncol=7, nrow=7, byrow=T) pc3 <- princomp(covmat=my.cor.mat) summary(pc3,loadings=T) ################## #4 # 3.6 food.full <- read.table("http://www.stat.sc.edu/~hitchcock/foodstuffs.txt", header=T) food.labels <- as.character(food.full[,1]) food.data <- food.full[,-1] attach(food.data) pairs(food.data) food.pc <- princomp(food.data, cor=T) summary(food.pc,loadings=T) # A scree plot: plot(1:(length(food.pc$sdev)), (food.pc$sdev)^2, type='b', main="Scree Plot", xlab="Number of Components", ylab="Eigenvalue Size") # Plotting the PC scores for the sample data in the space of the first two principal components: par(pty="s") plot(food.pc$scores[,1], food.pc$scores[,2], ylim=range(food.pc$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(food.pc$scores[,1], food.pc$scores[,2], labels=food.labels, cex=0.7, lwd=2) biplot(food.pc,xlabs=food.labels) ################## #4 # 3.7 USairpol.full <- read.table("http://www.stat.sc.edu/~hitchcock/usair.txt", header=T) city.names <- as.character(USairpol.full[,1]) USairpol.data <- USairpol.full[,-(1:2)] # to also remove the SO2 variable as the book does... attach(USairpol.data) # A PCA on the full data set: air.pc.full <- princomp(USairpol.data, cor=T) summary(air.pc.full,loadings=T) biplot(air.pc.full,xlabs=city.names) # To remove one or two outlying observations, try code such as: out.row1 <- 11 # Put the row number of the most severe outlier, Chicago, here USairpol.data.reduced <- USairpol.data[-out.row1,] out.row2 <- 1 # Put the row number of another outlier, Phoenix, here USairpol.data.reduced.more <- USairpol.data[-c(out.row1,out.row2),] # Deleting, say, Chicago: air.pc <- princomp(USairpol.data.reduced, cor=T) summary(air.pc,loadings=T) # A scree plot: plot(1:(length(air.pc$sdev)), (air.pc$sdev)^2, type='b', main="Scree Plot", xlab="Number of Components", ylab="Eigenvalue Size") # Plotting the PC scores for the sample data in the space of the first two principal components: par(pty="s") plot(air.pc$scores[,1], air.pc$scores[,2], ylim=range(air.pc$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(air.pc$scores[,1], air.pc$scores[,2], labels=city.names[-out.row1,], cex=0.7, lwd=2) biplot(air.pc,xlabs=city.names[-out.row1,]) # Deleting both Chicago and Phoenix: air.pc2 <- princomp(USairpol.data.reduced.more, cor=T) summary(air.pc2,loadings=T) # A scree plot: plot(1:(length(air.pc2$sdev)), (air.pc2$sdev)^2, type='b', main="Scree Plot", xlab="Number of Components", ylab="Eigenvalue Size") # Plotting the PC scores for the sample data in the space of the first two principal components: par(pty="s") plot(air.pc2$scores[,1], air.pc2$scores[,2], ylim=range(air.pc2$scores[,1]), xlab="PC 1", ylab="PC 2", type ='n', lwd=2) text(air.pc2$scores[,1], air.pc2$scores[,2], labels=city.names[-c(out.row1,out.row2),], cex=0.7, lwd=2) biplot(air.pc,xlabs=city.names[-c(out.row1,out.row2),])