######################################################################
# R commands      				 Genomic Data Science
# Lab 4                               University of South Carolina
######################################################################
# This file contains the R commands for the lab.
#
# Lines beginning with the symbol '#' are comments in R.  All other
# lines contain code.
#
# In R for Windows, you may wish to open this file from the menu bar
# (File:Display file); you can then copy commands into the command
# window.  (Use the mouse to highlight one or more lines; then
# right-click and select "Paste to console".)
######################################################################

#######################################
# Installing Bioconductor packages
#  
########################################
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("LiquidAssociation")


##multtest 


##########################################################################
# Import data 
#############################################################################
## Need to creat a dataset with missing values in excel and save it as test.csv (comma delimited) for this exercise
getwd()
data<-read.csv("bar_locations.csv", header=T)
str(data)
incidence<-table(data$Borough)

jpeg("NewYorkNoise.jpeg", )
barplot(incidence, ylab="Number of Incidence")
dev.off()


############# FAMuss example 
getwd() ## change it using setwd()
fmsURL<-"http://people.stat.sc.edu/hoyen/STAT588/Data/FMS_data.txt"
fms<-read.delim(file=fmsURL, header=TRUE, sep="\t")
colnames(fms)
dim(fms)  ## check the dimension of the data
str(fms[,1:10]) ## check the structure of the data 
fms$id[1:10] 
fms[1,1:10]
fms$pre.BMI
fms$actn3_rs540874
index<-match(c("actn3_rs540874","pre.BMI"), colnames(fms)) 
dat<-fms[!is.na(fms$pre.BMI) & !is.na(fms$actn3_rs540874) , index] ## observations without NA
attach(fms)
mean(pre.BMI, na.rm=T)

##############################
## Export data 
##############################
write.table(dat, file="test2.txt", row.names=F, quote=F, sep="\t")


########################### 
dURL<-"http://itl.nist.gov/div898/strd/univ/data/PiDigits.dat"
pidigits<-read.table(file=dURL, skip=60)
table(pidigits)
str(pidigits)
prop<-table(pidigits)/5000 ##proportions
prop
barplot(prop, xlab="digit", ylab="proportion")
abline(h=0.1, lty=2)
##scan
x<-scan()
str(x)


##############################
# Export data 
##############################
newdata<-fms[,1:3]
write.table(newdata, file="newdata.txt", sep="\t")
save(newdata, file="newdata.RData")
write.table
write.csv

##############################
# Graphics 
##############################
library(MASS)
head(mammals)
str(mammals)
br<-seq(0, max(mammals$body)+100, by=100)
hist(mammals$body, freq=T, breaks=br, xlab="Body Size (lbs)", main="Distribution body size of mammals")
m<-median(mammals$body)
size<-ifelse(mammals$body > m, "large", "small")
mammals$size<-size ### creating a new variable in a data frame 
head(mammals)
plot(mammals$body, mammals$brain, pch=16, xlab="Body Size (lbs)", ylab="Brain Size (g)")
rm<-which(mammals$body > 1000)
mamm<-mammals[-rm,]
dim(mamm)
setwd("/Users/hoyen/Desktop")
pdf("mammals.pdf")
plot(mamm$body, mamm$brain, pch=16, xlab="Body Size (lbs)", ylab="Brain Size (g)")
dev.off()

small<-mammals[mammals$size=="small",]
plot(small$body, small$brain, pch=16)


### Lottery Code 
setwd("/Users/hoyen/Desktop/STAT588/")
roster<-read.csv("roster.csv", stringsAsFactors=F)
roster2<-roster[,1:2]
student<-paste(roster2[,2], roster[,1], sep=" ")
sample(student, 1)

#### 
top2<-ifelse(mammals$body > 2000, "yes", "no")
mammals[top2=="yes",]
mammals[mammals$body > 2000, ]

SmallMammals<-mammals[mammals$body< m, ]