# R Examples for Chapter 14:  Inference for Categorical Data

## Example 1:
# The M&Ms data set:
# The observed counts are 61, 64, 54, 61, 96, 64.

# We test whether the corresponding cell probabilities are
# (0.13, 0.14, 0.13, 0.24, 0.20, 0.16) using  the chisq.test() function in R:

chisq.test(c(61, 64, 54, 61, 96, 64), p=c(0.13, 0.14, 0.13, 0.24, 0.20, 0.16), correct=FALSE)

## Example 2(a):

# Category counts are 229, 211, 93, 35, 8.

# Category probabilities based on a Poisson(1) distribution:
pois.probs <- c(dpois(0:3,lambda=1), ppois(3,lambda=1,lower=F))
sum(pois.probs)  # check that they sum to 1...

# Expected counts under H0:
pois.probs*576
# All are at least 5...

chisq.test(c(229, 211, 93, 35, 8), p=pois.probs, correct=FALSE)

## Example 2(b):

# Category counts are 229, 211, 93, 35, 8.

# Category probabilities based on a Poisson distribution (mean unspecified):

# First need to estimate lambda using MLE (see notes)

lambda.hat_ML <- 534/576

pois.probs.est <- c(dpois(0:3,lambda=lambda.hat_ML), ppois(3,lambda=lambda.hat_ML,lower=F))
sum(pois.probs.est)  # check that they sum to 1...

# Expected counts under H0:
pois.probs.est*576
# All are at least 5...

# Can't use 'chisq.test' directly (R doesn't know to take away 1 d.f. for the estimated parameter):

chisq.stat <- chisq.test(c(229, 211, 93, 35, 8), p=pois.probs.est, correct=FALSE)$statistic
print(paste("Chi-square test stat= ",round(chisq.stat,2),"P-value= ", round(1-pchisq(chisq.stat,df=3),3) ))

## Example 3: 
# R code to analyze an r by c contingency table
# and do chi-squared test of independence

# We look at the happiness / marital status data set:

# The observed counts may be input as a matrix as follows:

happy.data <- matrix(c(1320,467,603,93,119,127), nrow=2, ncol=3, byrow=TRUE, 
dimnames = list(c("Very/Pretty Happy", "Not too Happy"), c("Married", "Divorced/Separated", "Never Married")))

# Printing the 2 by 3 table:

as.table(happy.data)

# We test for independence of the two classifications
# using the chisq.test() function in R:

chisq.test(happy.data, correct=FALSE)

# The P-value is near zero.

# A quick way to get expected cell counts to verify large-sample assumption is met:
expected.counts <- (apply(happy.data,1,sum) %o% apply(happy.data,2,sum))/sum(happy.data)
print(expected.counts)


## Example 5:

# R code to analyze an r by c contingency table
# and do chi-squared test of homogeneity

# We look at the political ideology data set:

# The observed counts may be input as a matrix as follows:

polit.data <- matrix(c(13,21,16,17,13,12,20,16,22), nrow=3, ncol=3, byrow=TRUE, 
dimnames = list(c("Liberal", "Moderate", "Conservative"), c("Clemson", "UofSC", "CofC")))

# Printing the 3 by 3 table:

as.table(polit.data)

# We test for homogeneity
# using the chisq.test() function in R:

chisq.test(polit.data, correct=FALSE)

# The P-value is 0.416.

# A quick way to get expected cell counts to verify large-sample assumption is met:

expected.counts <- (apply(polit.data,1,sum) %o% apply(polit.data,2,sum))/sum(polit.data)
print(expected.counts)