# Some other ways of displaying several variables at a time: # A function to standardize a data vector: zstd <- function(vec){ out <- (vec-mean(vec))/sd(vec) return(out) } # Let's read in the husband-wife data set: my.datafile <- tempfile() cat(file=my.datafile, " 49 1809 43 1590 25 25 1841 28 1560 19 40 1659 30 1620 38 52 1779 57 1540 26 58 1616 52 1420 30 32 1695 27 1660 23 43 1730 52 1610 33 47 1740 43 1580 26 31 1685 23 1610 26 26 1735 25 1590 23 ", sep=" ") options(scipen=999) # suppressing scientific notation huswif <- read.table(my.datafile, header=FALSE, col.names=c("Hage", "Hheight", "Wage", "Wheight", "Hagefm")) attach(huswif) # attaching the data frame # The observations are 10 married couples # The first variable is the husband's age (in years). # The second variable is the husband's height (in mm). # The third variable is the wife's age (in years). # The fourth variable is the wife's's height (in mm). # The fifth variable is the husband's age (in years) at first marriage. ############################################################################### ## Profile curves of the 10 observations based on the 5 variables: # First order the variables from smallest (in mean) to largest: ord.vec <- order(apply(huswif,2,mean)) huswif.order <- huswif[, ord.vec] plot(ord.vec, seq(min(huswif.order),max(huswif.order),length=length(ord.vec)), type='n', xlab="Hagefm Wage Hage Wheight Hheight" , ylab="Value") for (ii in 1:nrow(huswif.order)){ lines((1:length(ord.vec)), huswif.order[ii,], col=(1:nrow(huswif.order))[ii], lty=(1:nrow(huswif.order))[ii] ) } # Better to put y-axis on a log scale here: plot(ord.vec, seq(min(huswif.order),max(huswif.order),length=length(ord.vec)), type='n', log="y", xlab="Hagefm Wage Hage Wheight Hheight" , ylab="Value") for (ii in 1:nrow(huswif.order)){ lines((1:length(ord.vec)), huswif.order[ii,], col=(1:nrow(huswif.order))[ii], lty=(1:nrow(huswif.order))[ii] , ylog=T) } legend("topleft", legend=1:nrow(huswif.order), fill=1:nrow(huswif.order), lty = (1:nrow(huswif.order)) ) ## Maybe better to convert heights to inches (from mm) here? ## ## huswif.order[,4:5] <- huswif.order[,4:5]/25.4 ## ############################################################################### ## Profile bars of the 10 observations based on the 5 variables: barplot(t(as.matrix(huswif.order)), beside=T, log="y", names.arg=1:nrow(huswif.order), col=1:5) legend("topleft", legend=names(huswif.order), fill=1:5 , cex=0.5) ############################################################################### # Andrews Plots # It is recommended to standardize the observations before doing Andrews Plots: huswif.std <- apply(huswif,2,zstd) # Using our zstd function from above my.grid.length<- 500 t.vals <- seq(-pi,pi,length=my.grid.length) basis.fcn.1 <- rep( (1/sqrt(2)), times = my.grid.length) basis.fcn.2 <- sin(t.vals) basis.fcn.3 <- cos(t.vals) basis.fcn.4 <- sin(2*t.vals) basis.fcn.5 <- cos(2*t.vals) # If we had more than 5 variables, we would continue with # basis.fcn.6 <- sin(3*t.vals) # basis.fcn.7 <- cos(3*t.vals) # and so on. fourier.matrix <- rbind(basis.fcn.1, basis.fcn.2, basis.fcn.3, basis.fcn.4, basis.fcn.5) my.Andrews.curves <- huswif.std %*% fourier.matrix plot(c(-pi,pi), c(min(my.Andrews.curves),max(my.Andrews.curves)) , type='n', xlab='t', ylab='Fourier series') for (jj in 1:nrow(huswif.std)){ lines(t.vals, my.Andrews.curves[jj,], col=(1:nrow(huswif.std))[jj], lty=(1:nrow(huswif.std))[jj] ) } legend("topleft", legend=1:nrow(huswif.std), fill=1:nrow(huswif.std), lty=(1:nrow(huswif.std)), cex=0.7) # This graphical presentation of the data values preserves the relative pairwise distances: dist(huswif.std)