## Loglinear model Examples:

## Example 1: 

vice.data <-
array(c(911,44,538,456,
        3,2,43,279),
      dim = c(2, 2, 2),
      dimnames = list(
          Cigarette = c("Yes", "No"),
          Marijuana = c("Yes", "No"),
          Alcohol = c("Yes", "No")))
vice.data <- as.table(vice.data)
print(vice.data)


library(MASS) # load the "MASS" package

# No-interaction model:

loglm(~ Cigarette + Marijuana + Alcohol, data=vice.data)  

# Model with only two-way interactions:

loglm(~ Cigarette + Marijuana + Alcohol + Cigarette:Marijuana + Cigarette:Alcohol + Marijuana:Alcohol, data=vice.data)


# The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC):

# Start with the "saturated model" containing ALL possible interactions and work backward:

saturated.model <- loglm(~ Cigarette * Marijuana * Alcohol, data=vice.data)

step(saturated.model, direction="backward")

# The best model?

best.model <-loglm(~ Cigarette + Marijuana + Alcohol + Cigarette:Marijuana + Cigarette:Alcohol + Marijuana:Alcohol, data=vice.data)

best.model

print(fitted(best.model))

print(coef(best.model))

## Some odds ratios:

# Does the odds of a cigarette smoker using marijuana (relative to a non-cigarette smoker) depend on alcohol use?

# Odds Ratio[Cig. Smoker Odds / Cig. Non-Smoker Odds], for Alcohol=Yes:
 
(910.38159 / 538.6184) / (44.61841 / 455.3816)

# Odds Ratio[Cig. Smoker Odds / Cig. Non-Smoker Odds], for Alcohol=No:

(3.616797 / 42.38354) / (1.383203 / 279.61646)

####################################################

## Example 2:

cuttings.data <-
array(c(156,84,84,156,
        107,133,31,209),
      dim = c(2, 2, 2),
      dimnames = list(
          Survival = c("Alive", "Dead"),
          Planting.Time = c("Early", "Late"),
          Length = c("Long", "Short")))
cuttings.data <- as.table(cuttings.data)
print(cuttings.data)

library(MASS) # load the "MASS" package

# The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC):

# Start with the "saturated model" containing ALL possible interactions and work backward:

saturated.model <- loglm(~ Survival * Planting.Time * Length, data=cuttings.data)

step(saturated.model, direction="backward")

# Slightly simpler model:

simp.model <- loglm(~ Survival + Planting.Time + Length + Survival:Planting.Time + Survival:Length + Planting.Time:Length, data=cuttings.data)

simp.model

# No-interaction model:

v.simp.model <- loglm(~ Survival + Planting.Time + Length, data=cuttings.data)

v.simp.model

print(fitted(simp.model))

print(coef(simp.model))

## Some odds ratios:

# Does the odds of a early plant surviving (relative to a late plant) depend on cutting length?

# Odds Ratio[Early Plant Odds / Late Plant Odds], for Length=Long:
 
(161.09 / 78.91) / (78.90 / 161.09)

# Odds Ratio[Early Plant Odds / Late Plant Odds], for Length=Short:

(101.90 / 138.10) / (36.10 / 203.90)


####################################################

## Example 3:

data(UCBAdmissions)

print(UCBAdmissions)  # print the 2-by-2-by-6 table:

library(MASS) # load the "MASS" package

# The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC):

# Start with the "saturated model" containing ALL possible interactions and work backward:

saturated.model <- loglm(~ Dept * Gender * Admit, data=UCBAdmissions)

step(saturated.model, direction="backward")

# Is it possible to leave off the second-order interaction?

loglm(~ Dept + Gender + Admit + Dept:Gender + Dept:Admit + Gender:Admit, 
    data = UCBAdmissions)


best.model <-  loglm(~ Dept * Gender * Admit, data=UCBAdmissions)

best.model

print(fitted(best.model))

print(coef(best.model))

## Some odds ratios:

# Does the odds of a female being admitted (relative to a male) depend on Dept?

# Odds Ratio[Female Odds / Male Odds], Dept = A:

(89/19)/(512/313)

# Odds Ratio[Female Odds / Male Odds], Dept = B:

(17/8)/(353/207)

# Odds Ratio[Female Odds / Male Odds], Dept = C:

(202/391)/(120/205)

# Odds Ratio[Female Odds / Male Odds], Dept = D:

(131/244)/(138/279)

# Odds Ratio[Female Odds / Male Odds], Dept = E:

(94/299)/(53/138)

# Odds Ratio[Female Odds / Male Odds], Dept = F:

(24/317)/(22/351)


# Illustration of (something like) Simpson's Paradox:

# Are females less likely to be admitted than males?

# Overall:

admit.gender <- margin.table(UCBAdmissions, c(1,2))
admit.gender

round(prop.table(admit.gender, 2), 2)
summary(admit.gender)


# Separately by department:

ftable(UCBAdmissions, row.vars="Dept", col.vars = c("Gender", "Admit"))

# prop.table(UCBAdmissions, c(2,3))

ftable(round(prop.table(UCBAdmissions, c(2,3)), 2),
       row.vars="Dept", col.vars = c("Gender", "Admit"))


# An explanation: Did females and males tend to apply to the same departments?

gender.dept <- margin.table(UCBAdmissions, c(2,3))
gender.dept
round(prop.table(gender.dept, 1), 3)

# Which departments were more selective in their admission decisions?

admit.dept <- margin.table(UCBAdmissions, c(1,3))
admit.dept