## line plots of popularity of the male names "John", "Paul", "George", "Ringo" library(tidyverse) library(mdsr) library(babynames) Beatles <- babynames %>% filter(name %in% c("John", "Paul", "George", "Ringo") & sex == "M") %>% mutate(name = factor(name, levels = c("John", "George", "Paul", "Ringo"))) beatles_plot <- ggplot(data = Beatles, aes(x = year, y = n)) + geom_line(aes(color = name), size = 2) beatles_plot ## using 'plotly' package and 'ggplotly' function to make the beatles_plot object interactive: # install.packages("plotly") library(plotly) ggplotly(beatles_plot) beatles_plot2 <- ggplot(data = Beatles, aes(x = year, y = n, color=name)) + geom_point() ggplotly(beatles_plot2) # can try brushing/selecting with this plot ... ## Similar interactive plot, but using 'knitr' package to save as .png file: if (knitr::is_latex_output()) { # mdsr::save_webshot("gfx/plotly-beatles.png") knitr::include_graphics("gfx/plotly-beatles.png") } else { beatles_plot %>% plotly::ggplotly() } ## Creating interactive, searchable data table with the 'DT' package and 'datatable' function: # install.packages("DT") library(DT) datatable(Beatles, options = list(pageLength = 10)) ## Same as above, but using 'knitr' package to save as .png file: if (knitr::is_latex_output()) { # Beatles %>% # datatable(options = list(pageLength = 10)) %>% # mdsr::save_webshot("gfx/beatles-dt.png", vheight = 500)) knitr::include_graphics("gfx/beatles-dt.png") } else { library(DT) Beatles %>% datatable(options = list(pageLength = 10)) } ## 'dygraph' function in the 'dygraphs' package gives an interactive time series plot easily allowing you to zoom in on particular time intervals: # install.packages("dygraphs") library(dygraphs) Beatles %>% filter(sex == "M") %>% select(year, name, prop) %>% pivot_wider(names_from = name, values_from = prop) %>% dygraph(main = "Popularity of Beatles names over time") %>% dyRangeSelector(dateWindow = c("1940", "1980")) ## Same, but using 'knitr': if (knitr::is_latex_output()) { # mdsr::save_webshot(beatles_dygraph, "gfx/beatles-dygraph.png", vheight = 400) knitr::include_graphics("gfx/beatles-dygraph.png") } else { library(dygraphs) Beatles %>% filter(sex == "M") %>% select(year, name, prop) %>% pivot_wider(names_from = name, values_from = prop) %>% dygraph(main = "Popularity of Beatles names over time") %>% dyRangeSelector(dateWindow = c("1940", "1980")) } ## the streamgraph uses area rather than magnitude to display values over time # remotes::install_github("hrbrmstr/streamgraph") library(streamgraph) Beatles %>% streamgraph(key = "name", value = "n", date = "year") %>% sg_fill_brewer("Accent") ## ----echo=FALSE, eval=FALSE, include=FALSE------------------------------------ ## library(streamgraph) ## beatles_stream <- Beatles %>% ## streamgraph(key = "name", value = "n", date = "year") %>% ## sg_fill_brewer("Accent") ## ## mdsr::save_webshot(beatles_stream, "gfx/beatles-stream.png", vheight = 500) ## Animation Plots: ## Before installing 'gganimate' initially, you may have to do: # install.packages("gifski") # install.packages("av") # and then restart the R session ... # install.packages('gganimate') library(gganimate) theme_set(theme_bw()) ## Using 'gganimate' to create animated time series plots library(gganimate) library(transformr) beatles_animation <- beatles_plot + transition_states( name, transition_length = 2, state_length = 1 ) + enter_grow() + exit_shrink() animate(beatles_animation, height = 400, width = 800) ## Maybe a better example of 'gganimate': # Start with a static plot (we've seen a basic bar plot kind of like this before): my_plot <- ggplot( data = Beatles, aes( x = name, y = prop ) ) + geom_col() + xlab("Name") + ylab("Proportion with Name") my_plot # This sums the proportions for each name over all the years in the data set (that's why the "proportions" are more than 1!) # The transition_time variable specifies which variable you want to dynamic plot to change with # (typically this would be a variable that measures time) # The 'labs' function with 'frame_time' allows the title to reflect # the changing values of the transition_time variable. my_plot + ylim(c(0,0.1)) + transition_time(year) + labs(title = "Year: {frame_time}") # The dynamic plot appears as a gif in a separate window. # If you want to slow down the rate at which the frames change, then decrease the "frames per second" (fps): a1 <- my_plot + ylim(c(0,0.1)) + transition_time(year) + labs(title = "Year: {frame_time}") animate(a1, nframes = 138, fps = 5) # a lower fps produces a slower animation GenNeutral <- babynames %>% filter(name %in% c("Riley", "Lauren", "Cameron", "Taylor")) %>% mutate(name = factor(name, levels = c("Riley", "Lauren", "Cameron", "Taylor"))) my_plot2 <- ggplot( data = GenNeutral, aes( x = name, y = prop ) ) + geom_col() + xlab("Name") + ylab("Proportion with Name") my_plot2 # This single plot is not really sensible, since again, it is summing annual proportions across many years. # Doing separate panels by sex with facet_wrap: a2 <- my_plot2 + ylim(c(0,0.02)) + facet_wrap(~sex) + transition_time(year) + labs(title = "Year: {frame_time}") animate(a2, nframes = 138, fps = 5) # If you want the plot to stop at the end rather than wrap back around to the beginning, use loop=FALSE: # animate(a2, nframes = 138, fps = 5, renderer = gifski_renderer(loop=FALSE)) ## ---- include=FALSE, eval=FALSE----------------------------------------------- ## anim_save(filename = here::here("gfx/beatles-gganimate.gif")) ## ## gif <- here::here("gfx/beatles-gganimate.gif") ## ## png <- gif %>% ## magick::image_read() %>% ## magick::image_write(path = here::here("gfx/beatles-gganimate.png"), format = "png") ## Nothing new, just using knitr... if (knitr::is_latex_output()) { knitr::include_graphics(here::here("gfx/beatles-gganimate.png")) } else { knitr::include_graphics(here::here("gfx/beatles-gganimate.gif")) } ## Flexdashboard example from book (best done with RStudio and rmarkdown): ## We will not do the Flexdashboard example on the classroom computer... cat(readChar("code/flexdash.Rmd", 1e5)) knitr::include_graphics("gfx/1x1.png") ## ----flex2, echo=FALSE, fig.cap="(ref:flexdash2-cap)"------------------------- knitr::include_graphics("gfx/flexdash.png") ## Running a simple Shiny app (from Section 14.4.1 in the book): # I had to do this first # update.packages("promises") # library(promises) # and then restart R ... # install.packages("shiny") library(shiny) # The 'source' command will tell R to accept input directly from the named file (like an alternative to copy-pasting from the file): source('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/beatles_Shiny_app/ui.R') source('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/beatles_Shiny_app/server.R') # Put the name of the directory that contains the files ui.R and server.R: runApp('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/beatles_Shiny_app') # the default directory if you simply type: # runApp() # is the current working directory, which can be identified by typing: # getwd() # Can hit Escape or Ctrl-C to stop running the Shiny app... ### More complicated Shiny app: # install.packages("shinybusy") library(shinybusy) # The 'source' command will tell R to accept input directly from the named file (like an alternative to copy-pasting from the file): source('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/nyc_rest_Shiny_app/ui.R') source('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/nyc_rest_Shiny_app/server.R') # Put the name of the directory that contains the files ui.R and server.R: runApp('Z:/Z from MATHSTAT/My Documents/teaching/stat_542/nyc_rest_Shiny_app') #### Customization of ggplot 2 graphics: ## themes in ggplot2 have lots of attributes! length(theme_grey()) ## These two themes control the grey panel background and the white panel gridlines: theme_grey() %>% pluck("panel.background") theme_grey() %>% pluck("panel.grid") ## Recall the beatles_plot object: Beatles <- babynames %>% filter(name %in% c("John", "Paul", "George", "Ringo") & sex == "M") %>% mutate(name = factor(name, levels = c("John", "George", "Paul", "Ringo"))) beatles_plot <- ggplot(data = Beatles, aes(x = year, y = n)) + geom_line(aes(color = name), size = 2) ## Showing this plot with the default theme and a black and white theme: beatles_plot beatles_plot + theme_bw() ## Customizing through quick changes to the background color and the major grid lines color: beatles_plot + theme( panel.background = element_rect(fill = "cornsilk"), panel.grid.major = element_line(color = "dodgerblue") ) ## Just the first few color options in R head(colors()) # An R colors cheatsheet created by Dr. Yong Wei of Columbia University is here: # https://sites.stat.columbia.edu/tzheng/files/Rcolor.pdf # An even more extensive color cheat sheet is here: # https://www.nceas.ucsb.edu/sites/default/files/2020-04/colorPaletteCheatsheet.pdf ## You COULD write your own theme from scratch, specifying ALL the attributes, but it's much easier ## to modify an existing theme using the %+replace% operator: # This code creates a new theme called 'theme_mdsr' based on the existing 'theme_grey': theme_mdsr <- function(base_size = 12, base_family = "Helvetica") { theme_grey(base_size = base_size, base_family = base_family) %+replace% theme( axis.text = element_text(size = rel(0.8)), axis.ticks = element_line(color = "black"), legend.key = element_rect(color = "grey80"), panel.background = element_rect(fill = "whitesmoke", color = NA), panel.border = element_rect(fill = NA, color = "grey50"), panel.grid.major = element_line(color = "grey80", size = 0.2), panel.grid.minor = element_line(color = "grey92", size = 0.5), strip.background = element_rect(fill = "grey80", color = "grey50", size = 0.2) ) } ## The Beatles plot, displayed with theme_mdsr, separated by name using facet_wrap: beatles_plot + facet_wrap(~name) + theme_mdsr() ## A couple of other themes from the 'ggthemes' package: # install.packages("ggthemes") library(ggthemes) beatles_plot + theme_excel() + scale_color_excel() help(theme_excel) # funny help page for this one ... beatles_plot + theme_fivethirtyeight() beatles_plot + theme_wsj() + scale_color_wsj() ## The humorous 'xkcd' theme: # library(xkcd) ## You have to download this one specially: ## download.file( ## "http://simonsoftware.se/other/xkcd.ttf", ## # ~/Library/Fonts/ for Mac OS X ## dest = "~/.fonts/xkcd.ttf", mode = "wb" ## ) ## ----message=FALSE, warning=FALSE--------------------------------------------- # font_import(pattern = "[X/x]kcd", prompt = FALSE) # loadfonts() ## ----beatles-xkcd, fig.cap="(ref:beatles-xkcd)"------------------------------- # beatles_plot + theme_xkcd() ### Extended example on Hot Dog eating: Section 14.6 ## The entire goal of this exercise is to use R tools to create a ggplot2 graphic that looks like Figure 14.19 in the textbook! ## loading packages and reading in the basic hot dog data as a .csv file: library(tidyverse) library(mdsr) hd <- read_csv( "http://datasets.flowingdata.com/hot-dog-contest-winners.csv" ) %>% janitor::clean_names() glimpse(hd) ## Adding some data some earlier years of the hot dog eating contest: new_data <- tibble( year = c(1979, 1978, 1974, 1972, 1916), winner = c(NA, "Walter Paul", NA, NA, "James Mullen"), dogs_eaten = c(19.5, 17, 10, 14, 13), country = rep(NA, 5), new_record = c(1,1,0,0,0) ) hd <- hd %>% bind_rows(new_data) glimpse(hd) ## Setting up some vectors for the x-axis and y-axis values: xlabs <- c(1916, 1972, 1980, 1990, 2007) ylabs <- seq(from = 0, to = 70, by = 10) ## Only picking the years before 2008, to match the data displayed in the graphic on Figure 14.19 of the textbook: hd_plot <- hd %>% filter(year < 2008) ## Figure 14.20: A simple bar graph of hot dog eating p <- ggplot(data = hd_plot, aes(x = year, y = dogs_eaten)) + geom_col() p ## A tibble containing the earlier-created y-axis values ticks_y <- tibble(x = 1912, y = ylabs) ## Using the 'tribble' function to collect ALL of the many text annotations in Figure 14.19 into a single data frame: ## Note the \n character represents a new line of text: text <- tribble( ~x, ~y, ~label, ~adj, # Frank Dellarosa 1953, 37, paste( "Frank Dellarosa eats 21 and a half HDBs over 12", "\nminutes, breaking the previous record of 19 and a half."), 0, # Joey Chestnut 1985, 69, paste( "For the first time since 1999, an American", "\nreclaims the title when Joey Chestnut", "\nconsumes 66 HDBs, a new world record."), 0, # Kobayashi 1972, 55, paste( "Through 2001-2005, Takeru Kobayashi wins by no less", "\nthan 12 HDBs. In 2006, he only wins by 1.75. After win-", "\nning 6 years in a row and setting the world record 4 times,", "\nKobayashi places second in 2007."), 0, # Walter Paul 1942, 26, paste( "Walter Paul sets a new", "\nworld record with 17 HDBs."), 0, # James Mullen 1917, 10.5, paste( "James Mullen wins the inaugural", "\ncontest, scarfing 13 HDBs. Length", "\nof contest unavailable."), 0, 1935, 72, "NEW WORLD RECORD", 0, 1914, 72, "Hot dogs and buns (HDBs)", 0, 1940, 2, "*Data between 1916 and 1972 were unavailable", 0, 1922, 2, "Source: FlowingData", 0, ) ## Collecting into a single data frame ALL of the grey segments that connect the text in Figure 14.19 to the bars: ## These are x and y positions at the ends of each line segment, ## and they must be entering manually after examining Figure 14.19 to get the correct position locations: segments <- tribble( ~x, ~y, c(1978, 1991, 1991, NA), c(37, 37, 21, NA), c(2004, 2007, 2007, NA), c(69, 69, 66, NA), c(1998, 2006, 2006, NA), c(58, 58, 53.75, NA), c(2005, 2005, NA), c(58, 49, NA), c(2004, 2004, NA), c(58, 53.5, NA), c(2003, 2003, NA), c(58, 44.5, NA), c(2002, 2002, NA), c(58, 50.5, NA), c(2001, 2001, NA), c(58, 50, NA), c(1955, 1978, 1978), c(26, 26, 17) ) %>% unnest(cols = c(x, y)) ## Creating the plot by layering ALL of the different aspects onto a single plot object called 'p' ## This includes adding some more text as a title at the top of the graphic: p + geom_col(aes(fill = factor(new_record))) + geom_hline(yintercept = 0, color = "darkgray") + scale_fill_manual(name = NULL, values = c("0" = "#006f3c", "1" = "#81c450") ) + scale_x_continuous( name = NULL, breaks = xlabs, minor_breaks = NULL, limits = c(1912, 2008), expand = c(0, 1) ) + scale_y_continuous( name = NULL, breaks = ylabs, labels = NULL, minor_breaks = NULL, expand = c(0.01, 1) ) + geom_text( data = ticks_y, aes(x = x, y = y + 2, label = y), size = 3 ) + labs( title = "Winners from Nathan's Hot Dog Eating Contest", subtitle = paste( "Since 1916, the annual eating competition has grown substantially", "attracting competitors from around\nthe world.", "This year's competition will be televised on July 4, 2008", "at 12pm EDT live on ESPN.\n\n\n" ) ) + geom_text( data = text, aes(x = x, y = y, label = label), hjust = "left", size = 3 ) + geom_path( data = segments, aes(x = x, y = y), col = "darkgray" ) + # Key geom_rect( xmin = 1933, ymin = 70.75, xmax = 1934.3, ymax = 73.25, fill = "#81c450", color = "white" ) + guides(fill = FALSE) + theme( panel.background = element_rect(fill = "white"), panel.grid.major.y = element_line(color = "gray", linetype = "dotted"), plot.title = element_text(face = "bold", size = 16), plot.subtitle = element_text(size = 10), axis.ticks.length = unit(0, "cm") ) ## some code to save the graphic as a .png file # ggsave(filename = here::here("gfx/hot-dogs_R.png"), width = 12, height = 5) ## displaying the graphic as a .png file using 'knitr': # knitr::include_graphics(here::here("gfx/hot-dogs_R.png")) ### An example with "expression" to place mathematical notation and symbols as text in R plots # With a base R plot: curve(dnorm, from = -3, to = 3, n = 1000, main = expression(paste("Normal Probability Density Function for ", mu, " = 0, ", sigma, " = 1") ) ) text(-2, 0.3, expression(f(x) == paste(frac(1, sqrt(2 * pi * sigma^2)), " ", e^{ frac(-(x - mu)^2, 2 * sigma^2) })), cex = 1.2) # Note that within 'expression', two equals signs are rendered as a single equals sign # With a ggplot2 plot: myx <- seq(from=-3,to=3,by=.01) myy <- dnorm(myx) d <- data.frame(myx,myy) ggplot(d) + aes(x = myx, y=myy) + geom_line() + labs(title = expression(paste("Normal Probability Density Function for ", mu, " = 0, ", sigma, " = 1") )) + xlab("x") + ylab("dnorm(x)") + annotate( "text", label = expression(f(x) == paste(frac(1, sqrt(2 * pi * sigma^2)), " ", e^{ frac(-(x - mu)^2, 2 * sigma^2)})), x = -2, y = 0.3, size = 5, colour = "red")