# STAT_516_Lec_12_inclass

experience <- c(14,29,6,25,18,4,18,12,22,6,30,11,30,5,20,13,9,32,24,13,19,4,28,22,8)
success <- c(0,0,0,1,1,0,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,0,1,1,1)

plot(success ~ experience)

glm_out <- glm(success ~ experience, family="binomial")
summary(glm_out)

x <- seq(min(experience),max(experience),length=200)
b0hat <- -3.05970
b1hat <- 0.16149
pi_x <- exp(b0hat + b1hat*x) / (1 + exp(b0hat + b1hat*x))
lines(pi_x ~ x)
pi_hat <- glm_out$fitted.values
points(pi_hat ~ experience, pch = 19)

# confidence interval for the coefficients

confint.default(glm_out)

# This is the estimated factor by which the odds of success
# are multiplied due to a one-unit increase in x
exp(b1hat)
# The odds of success increase by a factor of 1.175 with each
# additional month of experience.

# here is a CI for the odds ratio associated with one 
# additional month of experience
exp(confint.default(glm_out))[2,]

plot(glm_out)

install.packages("glmtoolbox")
library(glmtoolbox)
envelope(glm_out,type = "deviance")


x4 <- experience^4
glm2_out <- glm(success ~ x4,family="binomial")
envelope(glm2_out,type="deviance")

# Germain credit score data

library(foreign) # credit-g dataset from https://www.openml.org/
link <- url("https://people.stat.sc.edu/gregorkb/data/dataset_31_credit-g.arff")
credg <- read.arff(link)
colnames(credg)


# glm_out <- glm(ifelse(class == "good",1,0) ~ ., family = "binomial", data = credg)
glm_out <- glm(class ~ ., family = "binomial", data = credg)
summary(glm_out)

summary(credg$checking_status)

# Test whether checking_status is an important predictor of credit rating
# fit a full and a reduced model

glm_full <- glm(class ~ ., family="binomial",data=credg)
summary(glm_full)
credg_nochecking <- credg[,-1]
glm_red <- glm(class ~ ., family="binomial",data=credg_nochecking)

s <- 3
pval <- 1 - pchisq(glm_red$deviance - glm_full$deviance,s)
pval

# these are my pi hat values, the estimated probabilities of "good" credit
glm_full$fitted.values


Yhat <- ifelse(glm_full$fitted.values >= 1/2,1,0)

Y <- ifelse(credg$class=="good",1,0)
TP <- sum(Yhat == 1 & Y == 1) / sum(Y==1)
FP <- sum(Yhat == 1 & Y == 0) / sum(Y==0)
FP