# R example of Kruskal-Wallis test # (and multiple comparisons) # The data are the soil measurements we saw in class. # Entering the data: ########## ## # Reading the data into R: ff <- tempfile() cat(file=ff, " 1 26.5 1 15.0 1 18.2 1 19.5 1 23.1 1 17.3 2 16.5 2 15.8 2 14.1 2 30.2 2 25.1 2 17.4 3 19.2 3 21.4 3 26.0 3 21.6 3 35.0 3 28.9 4 26.7 4 37.3 4 28.0 4 30.1 4 33.5 4 26.3 ", sep=" ") soil <- read.table(ff, header=FALSE, col.names=c("location", "claypct")) # Note we could also save the data columns into a file and use a command such as: # soil <- read.table(file = "z:/stat_705/filename.txt", header=FALSE, col.names = c("location", "claypct")) attach(soil) # The data frame called soil is now created, # with two variables, "location", "claypct". ## ######### #################################################################### location <- factor(location) # boxplots for # each of the four groups: boxplot(claypct ~ location) # The Shapiro-Wilk test result # for testing normality for each of the four groups: # Note that with such small sample sizes we have little power # to detect non-normality. tapply(claypct, location, shapiro.test) # The kruskal.test function performs the Kruskal-Wallis test in R. # The response is listed first, and then the factor: kruskal.test(claypct, location) # Equivalently, we could do: kruskal.test(claypct ~ location) # Or if the data are listed in separate vectors: claypct1 <- claypct[location==1]; claypct2 <- claypct[location==2]; claypct3 <- claypct[location==3]; claypct4 <- claypct[location==4]; kruskal.test(list(claypct1, claypct2, claypct3, claypct4)) ######################################################### # Distribution-free Bonferroni Multiple Comparisons in R # Getting the ranks for the data: soilranks <- rank(claypct) # Printing the ranks for the data: cbind(location, claypct, soilranks) # Performing the Bonferroni Multiple Comparisons: group.mean.ranks <- tapply(soilranks,location,mean) # Getting the pairwise differences in sample mean ranks: outer(group.mean.ranks,group.mean.ranks,'-') # Determining which pairs of treatments are significantly different # (For THIS PROBLEM, 10.77 is the Bonferroni cutoff value for EACH comparison!) abs(outer(group.mean.ranks,group.mean.ranks,'-')) > 10.77