# STAT_516_Lec_12_inclass experience <- c(14,29,6,25,18,4,18,12,22,6,30,11,30,5,20,13,9,32,24,13,19,4,28,22,8) success <- c(0,0,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1) plot(success ~ experience) glm_out <- glm(success ~ experience, family="binomial") summary(glm_out) x <- seq(min(experience),max(experience),length=200) b0hat <- -3.05970 b1hat <- 0.16149 pi_x <- exp(b0hat + b1hat*x) / (1 + exp(b0hat + b1hat*x)) lines(pi_x ~ x) pi_hat <- glm_out$fitted.values points(pi_hat ~ experience, pch = 19) # confidence interval for the coefficients confint.default(glm_out) # This is the estimated factor by which the odds of success # are multiplied due to a one-unit increase in x exp(b1hat) # The odds of success increase by a factor of 1.175 with each # additional month of experience. # here is a CI for the odds ratio associated with one # additional month of experience exp(confint.default(glm_out))[2,] plot(glm_out) install.packages("glmtoolbox") library(glmtoolbox) envelope(glm_out,type = "deviance") x4 <- experience^4 glm2_out <- glm(success ~ x4,family="binomial") envelope(glm2_out,type="deviance") # Germain credit score data library(foreign) # credit-g dataset from https://www.openml.org/ link <- url("https://people.stat.sc.edu/gregorkb/data/dataset_31_credit-g.arff") credg <- read.arff(link) colnames(credg) # glm_out <- glm(ifelse(class == "good",1,0) ~ ., family = "binomial", data = credg) glm_out <- glm(class ~ ., family = "binomial", data = credg) summary(glm_out) summary(credg$checking_status) # Test whether checking_status is an important predictor of credit rating # fit a full and a reduced model glm_full <- glm(class ~ ., family="binomial",data=credg) summary(glm_full) credg_nochecking <- credg[,-1] glm_red <- glm(class ~ ., family="binomial",data=credg_nochecking) s <- 3 pval <- 1 - pchisq(glm_red$deviance - glm_full$deviance,s) pval # these are my pi hat values, the estimated probabilities of "good" credit glm_full$fitted.values Yhat <- ifelse(glm_full$fitted.values >= 1/2,1,0) Y <- ifelse(credg$class=="good",1,0) TP <- sum(Yhat == 1 & Y == 1) / sum(Y==1) FP <- sum(Yhat == 1 & Y == 0) / sum(Y==0) FP