# STAT_516_Lec_16

site1 <- c(91.28,92.83,89.35,91.90,82.85,94.83,89.93,89.00,84.62,
           86.96,88.32,91.17,83.86,89.74,92.24,92.59,84.21,89.36,
           90.96,92.85,89.39,89.82,89.91,92.16,88.67)

site2 <- c(89.35,86.51,89.04,91.82,93.02,88.32,88.76,89.26,90.36,
           87.16,91.74,86.12,92.10,83.33,87.61,88.20,92.78,86.35,
           93.84,91.20,93.44,86.77,83.77,93.19,81.79)

xbar1 <- mean(site1)
xbar2 <- mean(site2)
s1 <- sd(site1)
s2 <- sd(site2)

n1 <- length(site1)
n2 <- length(site2)

boxplot(site1,site2)

# assume equal variances
sp <- sqrt( ((n1 - 1)*s1**2 + (n2-1)*s2**2)/(n1 + n2 - 2))
s1
s2
sp
alpha <- 0.05

tval <- qt(1 - alpha/2,n1 + n2 + -2)
tval

lo <- xbar1 - xbar2 - tval * sp * sqrt(1/n1 + 1/n2)
up <- xbar1 - xbar2 + tval * sp * sqrt(1/n1 + 1/n2)
c(lo,up)


# use the t.test function

t.test(site1,site2)
t.test(site1,site2,var.equal=TRUE) # pools the variance esimates


p1hat <- 197/319
p2hat <- 151/627
n1 <- 319
n2 <- 627

lo <- p1hat - p2hat - 1.96 * sqrt(p1hat*(1-p1hat)/n1 + p2hat*(1-p2hat)/n2)
up <- p1hat - p2hat + 1.96 * sqrt(p1hat*(1-p1hat)/n1 + p2hat*(1-p2hat)/n2)
c(lo,up)


## hypothesis test


Ztest <- (0.06 - 0.03)/sqrt( 0.045*(1-0.045)*(1/1000 + 1/1000))
Ztest