# Problem 4, Chapter 6 # Answers may vary. # Problem 6, Chapter 6: library(mosaicData) head(HELPfull) #(a) HELPfullsmall <- HELPfull %>% filter(ID %in% 1:3) %>% select(ID, TIME, DRUGRISK, SEXRISK) #(c) HELPfullwide <- HELPfullsmall %>% pivot_wider(names_from = TIME, values_from = c(DRUGRISK,SEXRISK)) %>% select(DRUGRISK_0,DRUGRISK_6,SEXRISK_0,SEXRISK_6) #(a) HELPfullall <- HELPfull %>% select(ID, TIME, DRUGRISK, SEXRISK) %>% pivot_wider(names_from = TIME, values_from = c(DRUGRISK,SEXRISK)) %>% select(DRUGRISK_0,DRUGRISK_6,SEXRISK_0,SEXRISK_6) cor(HELPfullall$DRUGRISK_0,HELPfullall$DRUGRISK_6,use="complete.obs") # [1] 0.5991146 cor(HELPfullall$SEXRISK_0,HELPfullall$SEXRISK_6,use="complete.obs") # [1] 0.5076615 # Problem 7, Chapter 6: library(readr) ds1 <- read_csv(file="https://people.stat.sc.edu/hitchcock/prob7chap6.csv") # Verify this works: Treat <- filter(ds1, group == "T") Control <- filter(ds1, group == "C") all <- mutate(Treat, diff = Treat$vals - Control$vals) all # Better approach: ds2 <- ds1 %>% pivot_wider(names_from=group, values_from=vals) %>% mutate(diff = T-C) # Problem 10, Chapter 6: library(Lahman) CubsTeams <- Teams %>% select(teamID, yearID, HR, HRA) %>% filter(teamID=='CHN') CubsTeamslong <- CubsTeams %>% pivot_longer(-c(teamID,yearID), names_to = "type", values_to = "HRs") ggplot( data = CubsTeamslong, aes(x = yearID, y = HRs, color = type) ) + geom_line() # Problem 1, Chapter 7: library(mosaicData) ## avoids warning... HELPrct %>% summarize(across(where(is.numeric), \(x) mean(x, na.rm = TRUE)) ) # Problem 4, Chapter 7 Exercises # The count_seasons function takes a teamID (INCLUDING quotation marks) as its input, # and returns an integer number of seasons played for that team as its output. count_seasons <- function(x){ teamsum <- Teams %>% group_by(teamID) %>% summarise( num_seasons=n() ) %>% filter(teamID==x) ans <- teamsum$num_seasons return(ans) } library(Lahman) bk_teams <- c("BR1", "BR2", "BR3", "BR4", "BRO", "BRP", "BRF") map_int(bk_teams, count_seasons) # Nicer display of results: Brooklyn_count <- map_int(bk_teams, count_seasons) print(data.frame(bk_teams, Brooklyn_count))