############################################################# ###################### # # R example code to display / visualize multivariate data: # ###################### # ############################################################# # The built-in R data set called state contains a multivariate data matrix. # For information, type: help(state) # To see the data matrix (containing variables for the 50 states, as of 1977): state.x77 # Creating a data frame out of a matrix: state.x77.df <- data.frame(state.x77) names(state.x77.df) <- c("Popul", "Income", "Illit", "LifeExp", "Murder", "HSGrad", "Frost", "Area") attach(state.x77.df) ############################################ ############################################ # A simple scatterplot of Life Expectancy against Income: plot(Income, LifeExp) # A simple scatterplot of HS Graduation Rate against Income: plot(Income, HSGrad) # With regression line added: plot(Income, HSGrad) abline(lm(HSGrad~Income), lwd=2) # With lowess regression curve added: plot(Income, HSGrad) lines(lowess(Income,HSGrad), lwd=2) # With "rug plots" added to display both marginal distributions: plot(Income, HSGrad) rug(jitter(Income), side=1) rug(jitter(HSGrad), side=2) # With histograms added to display both marginal distributions: install.packages("psych") library(psych) scatterHist(x=Income,y=HSGrad) # Changing some default options, for a little cleaner look: scatterHist(x=Income,y=HSGrad,xlab="Income",ylab="High School Graduation Rate",ellipse=F,smooth=F,density=F) # Labeling individual observations (the states): st.name <- abbreviate(row.names(state.x77.df) ) plot(Income, HSGrad, type='n') text(Income, HSGrad, labels=st.name, lwd=2) # Even better here: R has a built-in state.abb vector, which we can use: plot(Income, HSGrad, type='n') text(Income, HSGrad, labels=state.abb, lwd=2) # Symbolic scatterplots, with separate symbols for each level of some grouping variable: # Creating a data frame that adds the 'state.region' variable to the others: more.state.data <- data.frame(state.x77.df,state.region) library(lattice) xyplot(HSGrad ~ Income, group=factor(state.region), data=more.state.data) # install.packages("ggplot2") # run this if 'ggplot2' package is not already installed library(ggplot2) qplot(Income, HSGrad, col=factor(state.region), shape=factor(state.region), data=more.state.data) # A scatterplot with multiple y axes: (functions courtesy of Horton and Kleinman (2015) book): # First function plots one y-variable against x, (by default) adds lowess curve through data, # then calls 'addsecondy' function: plottwoy <- function(x,y1,y2,xname="X",y1name="Y1",y2name="Y2",lowess.curve="yes"){ plot(x,y1,ylab=y1name,xlab=xname) if (lowess.curve=="yes") { lines(lowess(x,y1),lwd=3) addsecondy(x,y2,y1,yname=y2name,lowess.curve="yes") } else { addsecondy(x,y2,y1,yname=y2name,lowess.curve="no") } } # Second function rescales range of second y variable to that of the first, adds the right axis addsecondy <- function(x,y,origy,yname="Y2",lowess.curve="yes"){ prevlimits=range(origy) axislimits=range(y) axis(side=4,at=prevlimits[1]+diff(prevlimits)*c(0:5)/5, labels=round(axislimits[1]+diff(axislimits)*c(0:5)/5,1)) mtext(yname,side=4) newy=(y-axislimits[1])/(diff(axislimits)/diff(prevlimits))+prevlimits[1] points(x,newy,pch=2) if (lowess.curve=="yes") lines(lowess(x,newy),lty=2,lwd=3) } with(state.x77.df, plottwoy(x=HSGrad,y1=Illit,y2=Murder,xname="High School Graduation Rate",y1name="Illiteracy Rate",y2name="Murder Rate")) legend(x='topright',legend=c("Illiteracy","Murder"),pch=c(1,2),bty='n',lty=c(1,2)) # Without the lowess curves: with(state.x77.df, plottwoy(x=HSGrad,y1=Illit,y2=Murder,xname="High School Graduation Rate",y1name="Illiteracy Rate",y2name="Murder Rate",lowess.curve='no')) legend(x='topright',legend=c("Illiteracy","Murder"),pch=c(1,2),bty='n') # no 'lty' argument now ############################################ ############################################ # Finding and plotting the "convex hull" of the data: conv.hull <- chull(Income, HSGrad) plot(Income, HSGrad) polygon(Income[conv.hull], HSGrad[conv.hull], density=15, angle=30) # The ordinary sample correlation coefficient between HS Graduation Rate and Income: cor(Income, HSGrad) # A "robust" sample correlation coefficient between HS Graduation Rate and Income: cor(Income[-conv.hull], HSGrad[-conv.hull]) ## An example where removing the convex hull makes more of a difference: conv.hull <- chull(Area, Popul) plot(Area, Popul) polygon(Area[conv.hull], Popul[conv.hull], density=15, angle=30) # The ordinary sample correlation coefficient between Area and Population: cor(Area, Popul) # A "robust" sample correlation coefficient between Area and Population: cor(Area[-conv.hull], Popul[-conv.hull]) ############################################ ############################################ # Copy Everitt's chiplot function: ###################### ##### chiplot<-function(x,y,vlabs=c("X","Y"),matrix="NO") { n<-length(x); ind<-numeric(length=n) for(i in 1:n) { for(j in (1:n)[-i]) if(x[i]>x[j]&y[i]>y[j]) ind[i]<-ind[i]+1 } ind<-ind/(n-1); ind1<-numeric(length=n) for(i in 1:n) { for(j in (1:n)[-i]) if(x[i]>x[j]) ind1[i]<-ind1[i]+1 } ind1<-ind1/(n-1); ind2<-numeric(length=n) for(i in 1:n) { for(j in (1:n)[-i]) if(y[i]>y[j]) ind2[i]<-ind2[i]+1 } ind2<-ind2/(n-1) s<-sign((ind1-0.5)*(ind2-0.5)) chi<-(ind-ind1*ind2)/sqrt(ind1*(1-ind1)*ind2*(1-ind2)) lambda<-4*s*pmax((ind1-0.5)^2,(ind2-0.5)^2) thresh<-4*(1/(n-1)-0.5)^2 if(matrix=="NO") { par(mfrow=c(1,2)) plot(x,y,xlab=vlabs[1],ylab=vlabs[2]) plot(lambda[abs(lambda)=const2]<-0; l<-length(w[w==0]) if(l<0.5*n) break else const2<-2*const2 } tx<-sum(w*x)/sum(w); sx<-sqrt(sum(w*(x-tx)^2)/sum(w)); ty<-sum(w*y)/sum(w); sy<-sqrt(sum(w*(y-ty)^2)/sum(w)); r<-sum(w*(x-tx)*(y-ty))/(sx*sy*sum(w)); const2<-c2; wold<-w for(i in 1:100) { z1<-((y-ty)/sy+(x-tx)/sx)/sqrt(2*(1+r)); z2<-((y-ty)/sy-(x-tx)/sx)/sqrt(2*(1+r)); esq<-z1*z1+z2*z2 for(j in 1:10) { w[esq=const2]<-0; l<-length(w[w==0]) if(l<0.5*n) break else const2<-2*const2 } tx<-sum(w*x)/sum(w); sx<-sqrt(sum(w*(x-tx)^2)/sum(w)); ty<-sum(w*y)/sum(w); sy<-sqrt(sum(w*(y-ty)^2)/sum(w)) r<-sum(w*(x-tx)*(y-ty))/(sx*sy*sum(w)); term<-sum((w-wold)^2)/(sum(w)/n)^2 if(term<-err) break else {wold<-w const2<-c2 } } param<-c(tx,ty,sx,sy,r) param } ##### ###################### # Copy Everitt's bivbox function: ###################### ##### bivbox<-function(a, d = 7, mtitle = "Bivariate Boxplot", method = "robust",xlab="X",ylab="Y") { #a is data matrix #d is constant(usually 7) p <- length(a[1, ]) if(method == "robust") { param <- biweight(a[, 1:2]); m1 <- param[1]; m2 <- param[2] s1 <- param[3]; s2 <- param[4]; r <- param[5] } else { m1 <- mean(a[, 1]); m2 <- mean(a[, 2]); s1 <- sqrt(var(a[, 1])); s2 <- sqrt(var(a[, 2])); r <- cor(a[, 1:2])[1, 2] } x <- (a[, 1] - m1)/s1; y <- (a[, 2] - m2)/s2 e <- sqrt((x * x + y * y - 2 * r * x * y)/(1 - r * r)) e2 <- e * e; em <- median(e); emax <- max(e[e2 < d * em * em]) r1 <- em * sqrt((1 + r)/2); r2 <- em * sqrt((1 - r)/2); theta <- ((2 * pi)/360) * seq(0, 360, 3) xp <- m1 + (r1 * cos(theta) + r2 * sin(theta)) * s1; yp <- m2 + (r1 * cos(theta) - r2 * sin(theta)) * s2 r1 <- emax * sqrt((1 + r)/2); r2 <- emax * sqrt((1 - r)/2); theta <- ((2 * pi)/360) * seq(0, 360, 3) xpp <- m1 + (r1 * cos(theta) + r2 * sin(theta)) * s1; ypp <- m2 + (r1 * cos(theta) - r2 * sin(theta)) * s2 maxxl <- max(xpp); minxl <- min(xpp); maxyl <- max(ypp); minyl <- min(ypp) b1 <- (r * s2)/s1; a1 <- m2 - b1 * m1; y1 <- a1 + b1 * minxl; y2 <- a1 + b1 * maxxl b2 <- (r * s1)/s2; a2 <- m1 - b2 * m2; x1 <- a2 + b2 * minyl; x2 <- a2 + b2 * maxyl maxx <- max(c(a[, 1], xp, xpp, x1, x2)); minx <- min(c(a[, 1], xp, xpp, x1, x2)) maxy <- max(c(a[, 2], yp, ypp, y1, y2)); miny <- min(c(a[, 2], yp, ypp, y1, y2)) plot(a[, 1], a[, 2], xlim = c(minx, maxx), ylim = c(miny, maxy), xlab =xlab, ylab =ylab, lwd = 2, pch = 1) lines(xp, yp, lwd = 2); lines(xpp, ypp, lty = 2, lwd = 2) segments(minxl, y1, maxxl, y2, lty = 3, lwd = 2); segments(x1, minyl, x2, maxyl, lty = 4, lwd = 2) } ##### ###################### # Use it to create a bivariate boxplot of HS Graduation Rate and Income: # The regular method: bivbox(cbind(Income, HSGrad), xlab = "Income", ylab = "HSGrad", method ="O") # The robust method (which is the default): bivbox(cbind(Income, HSGrad), xlab = "Income", ylab = "HSGrad", method ="robust") ############################################ ############################################ # Copy Everitt's bivden function: ###################### ##### bivden<-function(x, y, ngridx = 30, ngridy = 30, constant.x = 1, constant.y = 1) { #x and y are vectors containing the bivariate data #ngridx and ngridy are the number of points in the grid mx <- mean(x); sdx <- sqrt(var(x)); my <- mean(y); sdy <- sqrt(var(y)) #scale x and y before estimation x <- scale(x); y <- scale(y); den <- matrix(0, ngridx, ngridy) #find possible value for bandwidth n <- length(x); hx <- constant.x * n^(-0.2); hy <- constant.y * n^(-0.2) h <- hx * hy; hsqrt <- sqrt(h) seqx <- seq(range(x)[1], range(x)[2], length = ngridx); seqy <- seq(range(y)[1], range(y)[2], length = ngridy) for(i in 1:n) { X <- x[i]; Y <- y[i]; xx <- (seqx - X)/hsqrt; yy <- (seqy - Y)/hsqrt den <- den + outer(xx, yy, function(x, y) exp(-0.5 * (x^2 + y^2))) } den <- den/(n * 2 * pi * h); seqx <- sdx * seqx + mx; seqy <- sdy * seqy + my result <- list(seqx = seqx, seqy = seqy, den = den) result } ##### ###################### # Use it to create a bivariate density of HS Graduation Rate and Income, # and then plot it with the persp function: den1 <- bivden(Income, HSGrad) persp(den1$seqx, den1$seqy, den1$den, xlab="Income", ylab="HSGrad", zlab="Density", lwd=2, ticktype="detailed", theta=35) # Try different values of theta (between 0 and 360) to get the best perspective. # A contour plot of the bivariate density plotted on top of the ordinary scatterplot: plot(Income, HSGrad) contour(den1$seqx, den1$seqy, den1$den, lwd=2, nlevels=15, add=T) # You can show more or fewer contours by varying the value of nlevels in this code. ############################################ ############################################ # Producing a bubble plot of Income, HS Graduation Rate, and Murder Rate # (Here Murder Rate is represented by the size of the bubbles) plot(Income, HSGrad, pch='.', lwd=2, ylim=c(35,70), xlim=c(3000,6400),cex=2) # The x limits and y limits should be adjusted so that all the bubbles fit within the plotting window symbols(Income, HSGrad, circles=Murder, inches=0.4, add=T, lwd=2) # labeling the state names: text(Income, HSGrad, labels=state.abb, lwd=2, cex=0.6) # Producing a bubble plot of Income, HS Graduation Rate, and Illiteracy Rate # (Here Illiteracy Rate is represented by the size of the bubbles) # type='n' will remove the dot in the middle of the bubbles plot(Income, HSGrad, type='n', lwd=2, ylim=c(35,70), xlim=c(3000,6400),cex=2) # The x limits and y limits should be adjusted so that all the bubbles fit within the plotting window symbols(Income, HSGrad, circles=Illit, inches=0.4, add=T, lwd=2) # labeling the state names: text(Income, HSGrad, labels=state.abb, lwd=2, cex=0.6) # What if we deleted the Income-HSGrad "outliers"? plot(Income[-conv.hull], HSGrad[-conv.hull], pch='.', lwd=2, ylim=c(35,70), xlim=c(3000,6400),cex=2) # The x limits and y limits should be adjusted so that all the bubbles fit within the plotting window symbols(Income[-conv.hull], HSGrad[-conv.hull], circles=Illit[-conv.hull], inches=0.4, add=T, lwd=2) ## By adding colors, you can include a 4th variable, which is categorical, to the bubble plot: library(ggplot2) library(carData) data(Salaries, package="carData") # plot salary against experience for a sample of faculty # (color represents rank and size represents service) ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank, size = yrs.service)) + geom_point(alpha = .6) + labs(title = "Academic salary by rank, years of service, and years since degree") # This example is from # https://rkabacoff.github.io/datavis/Multivariate.html # See lots of other cool graphs there... ## Making a ggplot "interactive" with the 'plotly' package: #install.packages("plotly") # Need to install the package first if you have not already done so... library(plotly) ## Use the usual code, but save the plot as an object in R: my_salary_plot <- ggplot(Salaries, aes(x = yrs.since.phd, y = salary, color = rank, size = yrs.service)) + geom_point(alpha = .6) + labs(title = "Academic salary by rank, years of service, and years since degree") ggplotly(my_salary_plot) # This will open up an html file with the plot on it. # Now use the mouse to hover over each point! ## Another example: This is the 'spotify' data set from the 'bayesrules' package: ## It contains data on 350 songs. It's a subset of a larger data set collected by Kaylin Pavlik. #install.packages("bayesrules") #library(bayesrules) #data(spotify) spotify <- read.csv(file="http://people.stat.sc.edu/hitchcock/spotify_bayesrules.csv",header=T) # If you want to see the whole data set: # print(as.data.frame(spotify)) # Creating a character variable that combines the artist and title: spotify$artist_title <- paste(spotify$artist,':',spotify$title) # A 4-variable bubble plot that also has a "labels" element containg the artist/title name: my_song_plot <- ggplot(spotify, aes(x = energy, y = danceability, color = genre, size = duration_ms, labels=artist_title)) + geom_point(alpha = .6) + labs(title = "Song data") # my_song_plot ggplotly(my_song_plot) ## If you only want the interactive tool to show one or two variable values, use the 'tooltip' argument: ggplotly(my_song_plot, tooltip=c("labels","size")) # ############################################ ############################################ # Producing a scatterplot matrix of all variables in this data set: pairs(state.x77.df) # If there are a lot of observations (not really here), can make the plotting characters smaller: pairs(state.x77.df, pch='.', cex=1.5) # Which pair of variables has the strongest positive association? # Which pair of variables has the strongest negative association? # Why are the scatterplots involving "Area" somewhat useless-looking here? # With regression lines overlain on each plot: pairs(state.x77.df, panel=function(x,y) {abline(lm(y~x),lwd=2) points(x,y)}) # Maximize the R graphics window to get a better presentation of these plots. # The 'ggpairs' function in the 'GGally' package works with continuous and categorical variables. # It also includes histograms for each continuous variable's marginal distribution, # bar plots for each categorical variable's marginal distribution, # and side-by-side box plots to show the association between continuous and categorical variables. install.packages("GGally") library(GGally) # Creating a data frame that adds the 'state.region' variable to the others: more.state.data <- data.frame(state.x77.df,state.region) # Making a ggpairs plot with just some of the columns in the data frame: ggpairs(more.state.data[,c(3,4,5,6,9)], axisLabels="show", diag=list(continuous="bar",discrete="bar"), upper=list(continuous="points",combo="box"), lower=list(continuous="cor",combo="facethist")) # You could also save this plot as an R object and use 'ggplotly' to make it interactive, # as in the previous example. ############################################ ############################################ # Producing a 3-D scatterplot of Income, HS Graduation Rate, and Murder Rate library(lattice) # loading the lattice package cloud(Murder ~ Income * HSGrad, xlim=range(Income), ylim=range(HSGrad), zlim=range(Murder), scales = list(distance = rep(1, 3), arrows = FALSE)) # 3-D scatterplot of Income, HS Graduation Rate, and Murder Rate with "drop lines": cloud(Murder ~ Income * HSGrad, xlim=range(Income), ylim=range(HSGrad), zlim=range(Murder), type="h", scales = list(distance = rep(1, 3), arrows = FALSE)) install.packages("scatterplot3d") library(scatterplot3d) # loading the scatterplot3d package with(more.state.data, scatterplot3d(Income,HSGrad,Murder, pch = (1:4)[state.region], type = "h", angle = 55)) # The 'rgl' package produces 3-D scatterplots that are rotatable with the mouse: install.packages('rgl') library(rgl) open3d() plot3d(Income,HSGrad,Murder, col = rainbow(length(Income))) # While the plot is open, you can interactively identify points: identify3d(Income,HSGrad,Murder) # Right-click on plot to show ID numbers # Doing it all at once with state abbreviations as identifiers: plot3d(Income,HSGrad,Murder,type="s",radius=0.025) text3d(Income,HSGrad,Murder,text=state.abb, col = rainbow(length(Income))) ############################################ ############################################ # A Conditioning Plot of HS Graduation Rate against Income, Separately for each Region: # Note that state.region is a categorical vector in this built-in R data set coplot(HSGrad ~ Income | state.region) # I like the xyplot function (in the lattice package) better than the coplot function: dev.new() # Opens another graphics window while still keeping the old graph up library(lattice) xyplot(HSGrad ~ Income | state.region) # You can also condition on a continuous variable with the 'cut' argument: xyplot(LifeExp ~ Murder| cut(Income, 2), data = state.x77.df, layout = c(2,1), xlab = "Murder Rate", ylab = "Life Expectancy") ## Here's an approach to improve the labeling of the above plot: IncCat <- cut(Income, 2) levels(IncCat) <- c("Low Income", "High Income") xyplot(LifeExp ~ Murder| IncCat, data = state.x77.df, layout = c(2,1), xlab = "Murder Rate", ylab = "Life Expectancy") ############################################ ############################################ # Creating a star plot to graphically present all variables of the state data set: stars(state.x77.df) # The first variable is denoted by the edge at the 3 o'clock (rightmost) position, # and the other variables go counterclockwise around the star. # Note how the states of Washington and Oregon resemble each other... # Kind of a cool plotting option in 'stars': stars(state.x77.df,locations=cbind(state.center$x,state.center$y)) # Plotting by location only makes sense for some data sets, not all... ############################################ ############################################ # Creating Chernoff Faces to graphically present all variables of the state data set: library(TeachingDemos) # May need to install the TeachingDemos package first? # If so, type at the command line: install.packages("TeachingDemos", dependencies=T) # while plugged in to the internet. faces( as.matrix(state.x77.df), fill=T) # For this function, the variables correspond to facial features in this order: # 1-height of face, 2-width of face, 3-shape of face, 4-height of mouth, 5-width of mouth, # 6-curve of smile, 7-height of eyes, 8-width of eyes, 9-height of hair, 10-width of hair, # 11-styling of hair, 12-height of nose, 13-width of nose, 14-width of ears, 15-height of ears. # Here's a way to generate a "key": faces.features <- c("Face height", "Face width", "Face shape", "Mouth height", "Mouth width", "Smile curve", "Eyes height", "Eyes width", "Hair height", "Hair width", "Hair styling", "Nose height", "Nose width", "Ears width", "Ears height") cbind( names(state.x77.df), faces.features[1:length( names(state.x77.df) )]) # To switch which columns correspond to which features, just reorder the columns in the data matrix: faces( as.matrix(state.x77.df[c(8,6,5,2,4,1,3,7)]), fill=T) cbind( names(state.x77.df)[c(8,6,5,2,4,1,3,7)], faces.features[1:length( names(state.x77.df) )]) # Now the 8th column (Area) corresponds to "Face height", etc. # Note Alaska has the tallest face and the "frowniest" smile here: Why? # Or a similar function: faces2(as.matrix(state.x77.df), scale="center") # For this function, the variables correspond to facial features in this order: # 1 Width of center 2 Top vs. Bottom width (height of split) 3 Height of Face 4 Width of top half of face # 5 Width of bottom half of face 6 Length of Nose 7 Height of Mouth 8 Curvature of Mouth (abs < 9) # 9 Width of Mouth 10 Height of Eyes 11 Distance between Eyes (.5-.9) 12 Angle of Eyes/Eyebrows # 13 Circle/Ellipse of Eyes 14 Size of Eyes 15 Position Left/Right of Eyeballs/Eyebrows # 16 Height of Eyebrows 17 Angle of Eyebrows 18 Width of Eyebrows # Radar Charts: # Important to scale the data first: scaled.state.x77.df <- scale(state.x77.df) selected.obs.indices <- c(11,38,40,44) some.states <- scaled.state.x77.df[selected.obs.indices, c("LifeExp","HSGrad","Popul","Illit","Murder","Frost","Income")] # Note I am arranging the variables so the "good" indicators will be on the top of the radar plot # and the "bad" indicators will be on the bottom of the radar plot. colnames(some.states) <- c("LifeExp","HSGrad","Popul","Illit","Murder","Frost","Income") rownames(some.states) <- rownames(state.x77.df)[selected.obs.indices] some.states <- as.data.frame(some.states) install.packages("fmsb") library(fmsb) # Put max and min values to be plotted for each variable. # 3 and -3 usually work well if the data are pre-standardized as they are here. #some.states <- as.data.frame(rbind(rep(3,ncol(some.states)) , rep(-3,ncol(some.states)) , some.states)) radarchart(some.states,maxmin=F,plwd=4,plty=1) # Add a legend legend(x=0.8, y=1.3, legend = rownames(state.x77.df)[selected.obs.indices], bty = "n", pch=20, col=1:length(rownames(state.x77.df)[selected.obs.indices]), cex=0.8, pt.cex=3) # With more than a couple observations, it may be better to show the radar plots separately: par(mfrow=c(2,2)) # set up a 2-by-2 array of plots... for (i in 1:length(rownames(state.x77.df)[selected.obs.indices])){ # Below, the 3 and -3 are the max and min values shown on each radar plot, # works well for pre-standardized data such as these. radarchart(as.data.frame(rbind(3,-3,some.states[i,])),plwd=4,plty=1) title(rownames(state.x77.df)[selected.obs.indices][i]) } par(mfrow=c(1,1)) # Pirate Plots: # A more advanced extention of side-by-side boxplots to see the distribution of a continuous variable at # levels of one or more categorical variables: # It shows the density estimate, mean, 95% interval estimate for the mean, and the raw data values all in one plot. install.packages("yarrr") library(yarrr) # Creating a data frame that adds the 'state.region' variable to the others: more.state.data <- data.frame(state.x77.df,state.region) # dv (continuous) is Income, iv (categorical) is state.region yarrr::pirateplot(formula = Income ~ state.region, data = more.state.data, main = "Pirateplot of Incomes", xlab = "state.region", ylab = "Income", theme=1) # Try changing theme to 2, 3, or 4 ... # Grid of Plots: # For base R plots: par(mfrow=c(3,2)) boxplot(Popul~state.region) boxplot(Area~state.region) boxplot(Illit~state.region) boxplot(LifeExp~state.region) boxplot(Murder~state.region) boxplot(Income~state.region) par(mfrow=c(1,1)) # resetting # The'patchwork' package does this incredibly well for 'ggplot2'-type plots # For examples, see: https://patchwork.data-imaginist.com/articles/guides/assembly.html ################################################# ## Animation Plots: install.packages('gganimate') library(gganimate) theme_set(theme_bw()) # These examples are from this website: # https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/ # The 'gapminder' data set has some cool demographic data collected over time: install.packages('gapminder') library(gapminder) head(gapminder) # Note the variable names... # Start with a static plot (we've see a 4-variable bubble plot kind of like this before): my_plot <- ggplot( gapminder, aes(x = gdpPercap, y=lifeExp, size = pop, colour = country) ) + geom_point(show.legend = FALSE, alpha = 0.7) + scale_color_viridis_d() + scale_size(range = c(2, 12)) + scale_x_log10() + labs(x = "GDP per capita", y = "Life expectancy") my_plot # This plots each country-year combination as a separate observation (that's why there are a LOT of bubbles) # The transition_time variable specifies which variable you want to dynamic plot to change with # (typically this would be a variable that measures time) # The 'labs' function with 'frame_time' allows the title to reflect # the changing values of the transition_time variable. my_plot + transition_time(year) + labs(title = "Year: {frame_time}") # The dynamic plot appears as a gif in a separate window. # Doing separate panels by Continent with facet_wrap: my_plot + facet_wrap(~continent) + transition_time(year) + labs(title = "Year: {frame_time}") # Look at the bouncing ball in the Africa plot. What's going on there? # Can use 'ggplotly' to identify the country on the static plot: ggplotly(my_plot) # To see all the data: # print(gapminder,n=Inf) # To just see the data for one country: gapminder %>% filter(country == "Rwanda") # Also notice the country on the right side of the Asia plot. # You can let one or more of the axes change over time as well: my_plot + transition_time(year) + labs(title = "Year: {frame_time}") + view_follow(fixed_y = TRUE) # See many more cool options at: # https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/