###################################################################### # R commands STAT540 R Basic 1 # University of South Carolina ###################################################################### # This file contains the R commands for the lab. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then copy commands into the command # window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### library(tidyverse) library(ggplot2) library(ggflags) # from GitHub repo jimjam-slam/ggflags library(countrycode) library(dslabs) library(ggthemes) setwd("/Users/hoyen/Desktop/STAT540-Fall2024New/Notes/Started/") dat <- tibble(country = toupper(c("US", "Italy", "Canada", "UK", "Japan", "Germany", "France", "Russia")), count = c(3.2, 0.71, 0.5, 0.1, 0, 0.2, 0.1, 0), label = c(as.character(c(3.2, 0.71, 0.5, 0.1, 0, 0.2, 0.1)), "No Data"), code = c("us", "it", "ca", "gb", "jp", "de", "fr", "ru")) dat |> mutate(country = reorder(country, -count)) |> ggplot(aes(country, count, label = label)) + geom_bar(stat = "identity", fill = "darkred") + geom_text(nudge_y = 0.2, color = "darkred", size = 5) + geom_flag(y = -.5, aes(country = code), size = 12) + scale_y_continuous(breaks = c(0, 1, 2, 3, 4), limits = c(0,4)) + geom_text(aes(6.25, 3.8, label = "Source UNODC Homicide Statistics")) + ggtitle(toupper("Homicide Per 100,000 in G-8 Countries")) + xlab("") + ylab("# of gun-related homicides\nper 100,000 people") + ggthemes::theme_economist() + theme(axis.text.x = element_text(size = 8, vjust = -12), axis.ticks.x = element_blank(), axis.line.x = element_blank(), plot.margin = unit(c(1,1,1,1), "cm")) ## ----murder-rate-example-2, echo=FALSE, out.width="70%"-------------------- # from https://everytownresearch.org/wp-content/uploads/2016/07/GunTrends_murders_per_1000.png # knitr::include_graphics(file.path(img_path,"GunTrends_murders_per_1000.png")) dat <- tibble(country = toupper(c("United States", "Canada", "Portugal", "Ireland", "Italy", "Belgium", "Finland", "France", "Netherlands", "Denmark", "Sweden", "Slovakia", "Austria", "New Zealand", "Australia", "Spain", "Czech Republic", "Hungary", "Germany", "United Kingdom", "Norway", "Japan", "Republic of Korea")), count = c(3.61, 0.5, 0.48, 0.35, 0.35, 0.33, 0.26, 0.20, 0.20, 0.20, 0.19, 0.19, 0.18, 0.16, 0.16, 0.15, 0.12, 0.10, 0.06, 0.04, 0.04, 0.01, 0.01)) dat |> mutate(country = reorder(country, count)) |> ggplot(aes(country, count, label = count)) + geom_bar(stat = "identity", fill = "darkred", width = 0.5) + geom_text(nudge_y = 0.2, size = 3) + xlab("") + ylab("") + ggtitle(toupper("Gun Homicides per 100,000 residents")) + theme_minimal() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_blank(), axis.ticks.length = unit(-0.4, "cm")) + coord_flip() rm(dat) ## ----us-murders-by-state-map, message=FALSE, echo=FALSE-------------------- fifty_states <- map_data("state") murders |> mutate(murder_rate = total/population*10^5, state = tolower(state), colors = factor(ceiling(pmin(murder_rate, 9)))) |> ggplot(aes(map_id = state)) + geom_map(aes(fill = colors), color = "black", map = fifty_states) + expand_limits(x = fifty_states$long, y = fifty_states$lat) + coord_map() + scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) + labs(x = "", y = "") + theme(panel.background = element_blank()) + scale_fill_brewer(guide = "none") + theme_minimal() rm(fifty_states) ## -------------------------------------------------------------------------- coef_a <- 1 coef_b <- 1 coef_c <- -1 ## -------------------------------------------------------------------------- coef_a ## -------------------------------------------------------------------------- print(coef_a) ## -------------------------------------------------------------------------- ## ls() ## -------------------------------------------------------------------------- (-coef_b + sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) (-coef_b - sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## -------------------------------------------------------------------------- log(8) log(coef_a) ## ---- eval=FALSE----------------------------------------------------------- ## help("log") ## ---- eval=FALSE----------------------------------------------------------- ## ?log ## -------------------------------------------------------------------------- args(log) ## -------------------------------------------------------------------------- log(8, base = 2) ## -------------------------------------------------------------------------- log(x = 8, base = 2) ## -------------------------------------------------------------------------- log(8, 2) ## -------------------------------------------------------------------------- log(base = 2, x = 8) ## -------------------------------------------------------------------------- 2^3 ## ---- eval = TRUE---------------------------------------------------------- help("+") ## ---- eval = TRUE---------------------------------------------------------- ?"+" ## ---- eval = TRUE---------------------------------------------------------- help(">") ## -------------------------------------------------------------------------- ?">" ## -------------------------------------------------------------------------- ## data() ## ---- eval=FALSE----------------------------------------------------------- ## co2 ## -------------------------------------------------------------------------- pi Inf + 1 ## -------------------------------------------------------------------------- r_1 <- (-coef_b + sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) r_2 <- (-coef_b - sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## ---- eval=FALSE----------------------------------------------------------- ## coef_a <- 3 ## coef_b <- 2 ## coef_c <- -1 ## (-coef_b + sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## (-coef_b - sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## ---- eval=FALSE----------------------------------------------------------- ## ## Code to compute solution to quadratic equation ## ## ## Define the variables ## coef_a <- 3 ## coef_b <- 2 ## coef_c <- -1 ## ## ## Now compute the solution ## (-coef_b + sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## (-coef_b - sqrt(coef_b^2 - 4*coef_a*coef_c))/(2*coef_a) ## -------------------------------------------------------------------------- a <- 2 class(a) ## -------------------------------------------------------------------------- library(dslabs) ## -------------------------------------------------------------------------- class(murders) ## ---- echo=FALSE----------------------------------------------------------- # change str to make sure it stays within margins str <- function(x) utils::str(x, strict.width = 'wrap') ## -------------------------------------------------------------------------- str(murders) ## ---- echo=FALSE----------------------------------------------------------- # remove str we defined rm(str) ## -------------------------------------------------------------------------- head(murders) ## -------------------------------------------------------------------------- murders$population ## -------------------------------------------------------------------------- names(murders) #### Vectors ## -------------------------------------------------------------------------- pop <- murders$population length(pop) ## -------------------------------------------------------------------------- class(pop) #### character vector ## -------------------------------------------------------------------------- class(murders$state) #### logical ## -------------------------------------------------------------------------- z <- 3 == 2 ### if 3 is qual to 2, different from = z class(z) ## ---- eval=FALSE----------------------------------------------------------- ## ?Comparison ## -------------------------------------------------------------------------- class(murders$region) ## -------------------------------------------------------------------------- levels(murders$region) ## -------------------------------------------------------------------------- region <- murders$region value <- murders$total region <- reorder(region, value, FUN = sum) levels(region) ### Lists ## -------------------------------------------------------------------------- record <- list(name = "John Doe", student_id = 1234, grades = c(95, 82, 91, 97, 93), final_grade = "A") ## -------------------------------------------------------------------------- record class(record) ## -------------------------------------------------------------------------- record$student_id ## -------------------------------------------------------------------------- record[["student_id"]] ## -------------------------------------------------------------------------- record2 <- list("John Doe", 1234) record2 ## -------------------------------------------------------------------------- record2[[1]] ### Matrices ## -------------------------------------------------------------------------- mat <- matrix(1:12, 4, 3) mat ## -------------------------------------------------------------------------- mat[2, 3] ## -------------------------------------------------------------------------- mat[2, ] ## -------------------------------------------------------------------------- mat[, 3] ## -------------------------------------------------------------------------- mat[, 2:3] ## -------------------------------------------------------------------------- mat[1:2, 2:3] ## -------------------------------------------------------------------------- as.data.frame(mat) ## -------------------------------------------------------------------------- murders[25, 1] murders[2:3, ] #### Creating vectors ## -------------------------------------------------------------------------- codes <- c(380, 124, 818) codes ## -------------------------------------------------------------------------- country <- c("italy", "canada", "egypt") ## -------------------------------------------------------------------------- country <- c('italy', 'canada', 'egypt') ## ---- eval=FALSE----------------------------------------------------------- ## country <- c(italy, canada, egypt) ### Names ## -------------------------------------------------------------------------- codes <- c(italy = 380, canada = 124, egypt = 818) codes ## -------------------------------------------------------------------------- class(codes) ## -------------------------------------------------------------------------- names(codes) ## -------------------------------------------------------------------------- codes <- c("italy" = 380, "canada" = 124, "egypt" = 818) codes ## -------------------------------------------------------------------------- codes <- c(380, 124, 818) country <- c("italy","canada","egypt") names(codes) <- country codes ### sequences ## -------------------------------------------------------------------------- seq(1, 10) ## -------------------------------------------------------------------------- seq(1, 10, 2) ## -------------------------------------------------------------------------- 1:10 ## -------------------------------------------------------------------------- class(1:10) ## -------------------------------------------------------------------------- class(seq(1, 10, 0.5)) ## -------------------------------------------------------------------------- codes[2] ## -------------------------------------------------------------------------- codes[c(1,3)] ## -------------------------------------------------------------------------- codes[1:2] ## -------------------------------------------------------------------------- codes["canada"] codes[c("egypt","italy")] ### Coercion ## -------------------------------------------------------------------------- x <- c(1, "canada", 3) ## -------------------------------------------------------------------------- x class(x) ## -------------------------------------------------------------------------- x <- 1:5 y <- as.character(x) y ## -------------------------------------------------------------------------- as.numeric(y) ## -------------------------------------------------------------------------- x <- c("1", "b", "3") as.numeric(x) #### Sorting ## -------------------------------------------------------------------------- library(dslabs) sort(murders$total) ## -------------------------------------------------------------------------- x <- c(31, 4, 15, 92, 65) sort(x) ## -------------------------------------------------------------------------- index <- order(x) x[index] ## -------------------------------------------------------------------------- x order(x) ## -------------------------------------------------------------------------- murders$state[1:6] murders$abb[1:6] ###### Exercise ####### Order the states abbrevation by total gum murders ####### ## -------------------------------------------------------------------------- max(murders$total) ## -------------------------------------------------------------------------- i_max <- which.max(murders$total) murders$state[i_max] ## -------------------------------------------------------------------------- x <- c(31, 4, 15, 92, 65) rank(x) ## ---- echo=FALSE----------------------------------------------------------- tmp <- data.frame(original=x, sort=sort(x), order=order(x), rank=rank(x)) if(knitr::is_html_output()){ knitr::kable(tmp, "html") |> kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE) } else{ knitr::kable(tmp, "latex", booktabs = TRUE) |> kableExtra::kable_styling(font_size = 8) } ## Vector Arithmetics ## -------------------------------------------------------------------------- library(dslabs) murders$state[which.max(murders$population)] ## -------------------------------------------------------------------------- inches <- c(69, 62, 66, 70, 70, 73, 67, 73, 67, 70) ## -------------------------------------------------------------------------- inches * 2.54 ## -------------------------------------------------------------------------- inches - 69 ## -------------------------------------------------------------------------- murder_rate <- murders$total / murders$population * 100000 ## -------------------------------------------------------------------------- murders$abb[order(murder_rate)] #### Recycling ## ---- warning=TRUE--------------------------------------------------------- x <- c(1, 2, 3) y <- c(10, 20, 30, 40, 50, 60, 70) x + y ### Indexing ## -------------------------------------------------------------------------- library(dslabs) ## -------------------------------------------------------------------------- murder_rate <- murders$total / murders$population * 100000 ## -------------------------------------------------------------------------- ind <- murder_rate < 0.71 ## -------------------------------------------------------------------------- ind <- murder_rate <= 0.71 ## -------------------------------------------------------------------------- murders$state[ind] ## -------------------------------------------------------------------------- sum(ind) ## -------------------------------------------------------------------------- TRUE & TRUE TRUE & FALSE FALSE & FALSE ## -------------------------------------------------------------------------- west <- murders$region == "West" safe <- murder_rate <= 1 ## -------------------------------------------------------------------------- ind <- safe & west murders$state[ind] #### Which ## -------------------------------------------------------------------------- ind <- which(murders$state == "California") murder_rate[ind] #### match ## -------------------------------------------------------------------------- ind <- match(c("New York", "Florida", "Texas"), murders$state) ind ## -------------------------------------------------------------------------- murder_rate[ind] ## -------------------------------------------------------------------------- c("Boston", "Dakota", "Washington") %in% murders$state ## -------------------------------------------------------------------------- match(c("New York", "Florida", "Texas"), murders$state) which(murders$state %in% c("New York", "Florida", "Texas")) ####### Basic Plot ## ----eval=FALSE------------------------------------------------------------ x <- murders$population / 10^6 y <- murders$total plot(x, y) ## ----first-plot, out.width="60%", echo=FALSE------------------------------- rafalib::mypar() x <- murders$population / 10^6 y <- murders$total plot(x, y) ## ---- eval=FALSE----------------------------------------------------------- ## with(murders, plot(population, total)) ## ----eval=FALSE------------------------------------------------------------ ## x <- with(murders, total / population * 100000) ## hist(x) ## ----r-base-hist, out.width="60%",echo=FALSE------------------------------- rafalib::mypar() x <- with(murders, total / population * 100000) hist(x) ## -------------------------------------------------------------------------- murders$state[which.max(x)] ## ----eval=FALSE------------------------------------------------------------ ## murders$rate <- with(murders, total / population * 100000) ## boxplot(rate~region, data = murders) ## ----r-base-boxplot, out.width="60%", echo=FALSE--------------------------- murders$rate <- with(murders, total / population * 100000) boxplot(rate~region, data = murders) ## ----eval=FALSE------------------------------------------------------------ ## x <- matrix(1:120, 12, 10) ## image(x) ## ----image-first-example, fig.height=4, fig.width=4, echo=FALSE, out.width="50%"---- x <- matrix(1:120, 12, 10) image(x)