# R code to analyze an r by c contingency table # and do chi-squared test of independence # We look at the snoring / heart disease data set: # The observed counts may be input as a matrix as follows: snore.heart.data <- matrix(c(24,35,51,1355,603,416), nrow=2, ncol=3, byrow=TRUE, dimnames = list(c("Yes", "No"), c("Never", "Occasionally", "~ Every Night"))) # Printing the 2 by 3 table: as.table(snore.heart.data) # We test for independence of the two classifications # using the chisq.test() function in R: chisq.test(snore.heart.data, correct=FALSE) # The P-value is near zero. # A quick way to get expected cell counts to verify large-sample assumption is met: expected.counts <- (apply(snore.heart.data,1,sum) %o% apply(snore.heart.data,2,sum))/sum(snore.heart.data) print(expected.counts) #################################################################### # R code to analyze an r by c contingency table # and do chi-squared test of homogeneity # We look at the voter survey data set: # The observed counts may be input as a matrix as follows: voter.data <- matrix(c(61,50,39,56,40,54,47,37,66), nrow=3, ncol=3, byrow=TRUE, dimnames = list(c("Upstate", "Midlands", "Coastal"), c("Approve", "Neutral", "Disapprove"))) # Printing the 3 by 3 table: as.table(voter.data) # We test for homogeneity # using the chisq.test() function in R: chisq.test(voter.data, correct=FALSE) # The P-value is 0.027. # A quick way to get expected cell counts to verify large-sample assumption is met: expected.counts <- (apply(voter.data,1,sum) %o% apply(voter.data,2,sum))/sum(voter.data) print(expected.counts) #################################################################### # R code to do Fisher's exact test # on a made-up small data set # (3 by 3 table) small.data <- matrix(c(6,5,3,5,4,5,4,3,6), nrow=3, ncol=3, byrow=TRUE, dimnames = list(c("Upstate", "Midlands", "Coastal"), c("Approve", "Neutral", "Disapprove"))) # Printing the 3 by 3 table: as.table(small.data) fisher.test(small.data)