## Loglinear model Examples: ## Example 1: vice.data <- array(c(911,44,538,456, 3,2,43,279), dim = c(2, 2, 2), dimnames = list( Cigarette = c("Yes", "No"), Marijuana = c("Yes", "No"), Alcohol = c("Yes", "No"))) vice.data <- as.table(vice.data) print(vice.data) library(MASS) # load the "MASS" package # No-interaction model: loglm(~ Cigarette + Marijuana + Alcohol, data=vice.data) # Model with only two-way interactions: loglm(~ Cigarette + Marijuana + Alcohol + Cigarette:Marijuana + Cigarette:Alcohol + Marijuana:Alcohol, data=vice.data) # The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC): # Start with the "saturated model" containing ALL possible interactions and work backward: saturated.model <- loglm(~ Cigarette * Marijuana * Alcohol, data=vice.data) step(saturated.model, direction="backward") # The best model? best.model <-loglm(~ Cigarette + Marijuana + Alcohol + Cigarette:Marijuana + Cigarette:Alcohol + Marijuana:Alcohol, data=vice.data) best.model print(fitted(best.model)) print(coef(best.model)) ## Some odds ratios: # Does the odds of a cigarette smoker using marijuana (relative to a non-cigarette smoker) depend on alcohol use? # Odds Ratio[Cig. Smoker Odds / Cig. Non-Smoker Odds], for Alcohol=Yes: (910.38159 / 538.6184) / (44.61841 / 455.3816) # Odds Ratio[Cig. Smoker Odds / Cig. Non-Smoker Odds], for Alcohol=No: (3.616797 / 42.38354) / (1.383203 / 279.61646) #################################################### ## Example 2: cuttings.data <- array(c(156,84,84,156, 107,133,31,209), dim = c(2, 2, 2), dimnames = list( Survival = c("Alive", "Dead"), Planting.Time = c("Early", "Late"), Length = c("Long", "Short"))) cuttings.data <- as.table(cuttings.data) print(cuttings.data) library(MASS) # load the "MASS" package # The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC): # Start with the "saturated model" containing ALL possible interactions and work backward: saturated.model <- loglm(~ Survival * Planting.Time * Length, data=cuttings.data) step(saturated.model, direction="backward") # Slightly simpler model: simp.model <- loglm(~ Survival + Planting.Time + Length + Survival:Planting.Time + Survival:Length + Planting.Time:Length, data=cuttings.data) simp.model # No-interaction model: v.simp.model <- loglm(~ Survival + Planting.Time + Length, data=cuttings.data) v.simp.model print(fitted(simp.model)) print(coef(simp.model)) ## Some odds ratios: # Does the odds of a early plant surviving (relative to a late plant) depend on cutting length? # Odds Ratio[Early Plant Odds / Late Plant Odds], for Length=Long: (161.09 / 78.91) / (78.90 / 161.09) # Odds Ratio[Early Plant Odds / Late Plant Odds], for Length=Short: (101.90 / 138.10) / (36.10 / 203.90) #################################################### ## Example 3: data(UCBAdmissions) print(UCBAdmissions) # print the 2-by-2-by-6 table: library(MASS) # load the "MASS" package # The 'step' function picks the "best" model according to the Akaike Information Criterion (AIC): # Start with the "saturated model" containing ALL possible interactions and work backward: saturated.model <- loglm(~ Dept * Gender * Admit, data=UCBAdmissions) step(saturated.model, direction="backward") # Is it possible to leave off the second-order interaction? loglm(~ Dept + Gender + Admit + Dept:Gender + Dept:Admit + Gender:Admit, data = UCBAdmissions) best.model <- loglm(~ Dept * Gender * Admit, data=UCBAdmissions) best.model print(fitted(best.model)) print(coef(best.model)) ## Some odds ratios: # Does the odds of a female being admitted (relative to a male) depend on Dept? # Odds Ratio[Female Odds / Male Odds], Dept = A: (89/19)/(512/313) # Odds Ratio[Female Odds / Male Odds], Dept = B: (17/8)/(353/207) # Odds Ratio[Female Odds / Male Odds], Dept = C: (202/391)/(120/205) # Odds Ratio[Female Odds / Male Odds], Dept = D: (131/244)/(138/279) # Odds Ratio[Female Odds / Male Odds], Dept = E: (94/299)/(53/138) # Odds Ratio[Female Odds / Male Odds], Dept = F: (24/317)/(22/351) # Illustration of (something like) Simpson's Paradox: # Are females less likely to be admitted than males? # Overall: admit.gender <- margin.table(UCBAdmissions, c(1,2)) admit.gender round(prop.table(admit.gender, 2), 2) summary(admit.gender) # Separately by department: ftable(UCBAdmissions, row.vars="Dept", col.vars = c("Gender", "Admit")) # prop.table(UCBAdmissions, c(2,3)) ftable(round(prop.table(UCBAdmissions, c(2,3)), 2), row.vars="Dept", col.vars = c("Gender", "Admit")) # An explanation: Did females and males tend to apply to the same departments? gender.dept <- margin.table(UCBAdmissions, c(2,3)) gender.dept round(prop.table(gender.dept, 1), 3) # Which departments were more selective in their admission decisions? admit.dept <- margin.table(UCBAdmissions, c(1,3)) admit.dept