###################################################################### # R commands Genomic Data Science # Lab 4 University of South Carolina ###################################################################### # This file contains the R commands for the lab. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then copy commands into the command # window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### ####################################### # Installing Bioconductor packages # ######################################## if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("LiquidAssociation") ##multtest ########################################################################## # Import data ############################################################################# ## Need to creat a dataset with missing values in excel and save it as test.csv (comma delimited) for this exercise getwd() data<-read.csv("bar_locations.csv", header=T) str(data) incidence<-table(data$Borough) jpeg("NewYorkNoise.jpeg", ) barplot(incidence, ylab="Number of Incidence") dev.off() ############# FAMuss example getwd() ## change it using setwd() fmsURL<-"http://people.stat.sc.edu/hoyen/STAT588/Data/FMS_data.txt" fms<-read.delim(file=fmsURL, header=TRUE, sep="\t") colnames(fms) dim(fms) ## check the dimension of the data str(fms[,1:10]) ## check the structure of the data fms$id[1:10] fms[1,1:10] fms$pre.BMI fms$actn3_rs540874 index<-match(c("actn3_rs540874","pre.BMI"), colnames(fms)) dat<-fms[!is.na(fms$pre.BMI) & !is.na(fms$actn3_rs540874) , index] ## observations without NA attach(fms) mean(pre.BMI, na.rm=T) ############################## ## Export data ############################## write.table(dat, file="test2.txt", row.names=F, quote=F, sep="\t") ########################### dURL<-"http://itl.nist.gov/div898/strd/univ/data/PiDigits.dat" pidigits<-read.table(file=dURL, skip=60) table(pidigits) str(pidigits) prop<-table(pidigits)/5000 ##proportions prop barplot(prop, xlab="digit", ylab="proportion") abline(h=0.1, lty=2) ##scan x<-scan() str(x) ############################## # Export data ############################## newdata<-fms[,1:3] write.table(newdata, file="newdata.txt", sep="\t") save(newdata, file="newdata.RData") write.table write.csv ############################## # Graphics ############################## library(MASS) head(mammals) str(mammals) br<-seq(0, max(mammals$body)+100, by=100) hist(mammals$body, freq=T, breaks=br, xlab="Body Size (lbs)", main="Distribution body size of mammals") m<-median(mammals$body) size<-ifelse(mammals$body > m, "large", "small") mammals$size<-size ### creating a new variable in a data frame head(mammals) plot(mammals$body, mammals$brain, pch=16, xlab="Body Size (lbs)", ylab="Brain Size (g)") rm<-which(mammals$body > 1000) mamm<-mammals[-rm,] dim(mamm) setwd("/Users/hoyen/Desktop") pdf("mammals.pdf") plot(mamm$body, mamm$brain, pch=16, xlab="Body Size (lbs)", ylab="Brain Size (g)") dev.off() small<-mammals[mammals$size=="small",] plot(small$body, small$brain, pch=16) ### Lottery Code setwd("/Users/hoyen/Desktop/STAT588/") roster<-read.csv("roster.csv", stringsAsFactors=F) roster2<-roster[,1:2] student<-paste(roster2[,2], roster[,1], sep=" ") sample(student, 1) #### top2<-ifelse(mammals$body > 2000, "yes", "no") mammals[top2=="yes",] mammals[mammals$body > 2000, ] SmallMammals<-mammals[mammals$body< m, ]