###################################################################### # R commands STAT588/BIOL588 # Lab 2 2023 University of South Carolina ###################################################################### # This file contains the R commands for the lab. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then copy commands into the command # window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### ################ ### Data types ################ a<-1 class(a) class(ls) ############################## # Creating simple vectors ############################## x <- c(1,3.5,-28.4,10) #numerical vector x y<-c("cat","dog","mouse","monkey") #character z<-c(TRUE,TRUE,TRUE,FALSE,FALSE) #logical vector x<-1:10 seq(1,10) seq(3, 9, by=3) rep(2, 10) rep(c(1,2,3),5) log(seq(1,2, by=0.1)) x<-c(1, 5, 10, NA, 15) sum(x) sum(x, na.rm=T) prod(x, na.rm=T) mean(x, na.rm=T) x<-1:10 cumsum(x) ## running sum cummax(x) ## running maximum cummin(x) ## running minimum ###################### ################################### # Accessing Elements in a Vector ################################### y <- c(18,32,15,-7,12,19) length(y) y[3:5] ##position in vector as positive integer y[-c(1,5,6)] ## exclude: use negative integers y<15 y[y<15] which(y==32) y<-seq(1, 10, by=2) x<-seq(2, 15) match(y,x) colors<-c("red", "blue", "pink") which(colors=="yellow") x<-c(1,5,10, NA, 15) which(is.na(x)) which(!is.na(x)) ############################## # Factors # vector with categories ############################## colors<-c(1,1,2,3) colors<-factor(colors, label=c("red", "green", "blue")) table(colors) ############################## # Matrices ############################## help(cbind) y<-c(8,32, 15, -7, 2, 19) x<-1:6 mat<-cbind(x,y) help(rbind) dim(mat) ## check dimension ncol(mat) ## the number of columns of a matrix nrow(mat) ## the number of rows of a matrix mat[2,3] # the value in the 2nd row and the 3rd column mat[1:3,] ## the first three row of mat mat[,2] ## the 2nd column of mat mat[-1,] ## exclude the first row newmat<-matrix(1:9, nrow=3) ## create new matrix newmat rowMeans(newmat) colMeans(newmat) m<-matrix(1:9, nrow=3, byrow=T) ## fill row first colnames(m)<-c("a", "b", "c") rownames(m)<-c("r1", "r2", "r3") vect<-as.vector(newmat) ############################## # Matrices Multiplication ############################## mat<-matrix(1:9, nrow=3) mat^2 mat%*%mat ############################## # Arrays ############################## myarray<-array(1:64, dim=c(4,4,4)) myarray myarray[1,2,3] ############################## # Data Frames ############################## muscle <- rnorm(n=10,mean=3,sd=1) sex <- factor(rep(c("M","F"),c(6,4))) speed <- rep(0,10) speed[1:6] <- rnorm(6,30-2*muscle[1:6],2) speed[7:10] <- rnorm(4,40-2*muscle[7:10],2) mydata <- data.frame(y=speed,x1=muscle,x2=sex) summary(mydata) str(mydata) temp <- lm(y~x1+x2,data=mydata) summary(temp) ############################## # Lists ############################## x <- list(one=c(18:36),two=c("AK","AL","AZ"),three=c(T,T,F,T),four=matrix(1:12,3,4)) x x[[1]][3:6] x$one[3:6] y<-unlist(x) str(y) ############################ # Operator Description ############################ # > greater than # >= greater than or equal to # < less than # <= less than or equal to # == equal to # != not equal to # & and # | or ################################# ############################## ## &, | ############################## x<-c(T, T, F, F) y<-c(T, F, T, F) mat<-cbind(x,y) mat and<- x & y or<-x | y and or ################### # order #################### x<-c(1,3,6,2,-1,-2) o<-order(x, decreasing=T) o orderedx<-x[o] orderedx ############################# # Reading and writing data ############################# getwd() setwd("/Users/yen-yiho/Desktop/STAT588") url<-"https://people.stat.sc.edu/hoyen/STAT588/Data/ALLpheno.csv" ALLpheno<-read.csv(file=url, header=T) save(ALLpheno, file="mydata.RData") write.csv(ALLpheno, file="ALLpheno.csv", row.names=F) rm(ALLpheno) str(ALLpheno) load("mydata.RData") str(ALLpheno) ############## # Indexing ############## # ALL dataset # molecular abnormaly table(ALLpheno$mol.bio) index1<-ALLpheno$mol.bio=="BCR/ABL" str(index1) sum(index1) mut1<-ALLpheno[index1,] head(mut1) #### BCR/ABL or ALL1/AF4 index2<-ALLpheno$mol.bio=="BCR/ABL" | ALLpheno$mol.bio=="ALL1/AF4" mut2<-ALLpheno[index2, ] str(mut2) ###### BCR/ABL and female #### indexing functions #### which, match, %in% #### which imut1<-which(ALLpheno$mol.bio=="BCR/ABL") str(imut1) mut1<-ALLpheno[imut1,] #### match id<-c("1005", "16002") iid<-match(id, ALLpheno$cod) iid ALLpheno[iid,] #####%in% pattern<-c("BCR/ABL", "ALL1/AF4") imut3<-which(ALLpheno$mol.biol %in% c("BCR/ABL", "ALL1/AF4")) mut3<-ALLpheno[imut3,] dim(mut3) str(mut3) ##### BCR/ABL and B cell table(ALLpheno$BT) iBcell<-grep(ALLpheno$BT, patter="^B") iBmut1<-intersect(iBcell, imut1) length(iBmut1) ############ What kind of mutations from T-cell leukemia iTcell<-grep(ALLpheno$BT, patter="^T") length(iTcell) Tcell<-ALLpheno[iTcell,] table(Tcell$mol.bio) ################ # Indexing exercise ################# getwd() ## change it using setwd() fmsURL<-"http://people.stat.sc.edu/hoyen/STAT588/Data/FMS_data.txt" fms<-read.delim(file=fmsURL, header=TRUE, sep="\t") colnames(fms) dim(fms) ## check the dimension of the data str(fms[,1:10]) ## check the structure of the data fms$id[1:10] fms[1,1:10] fms$pre.BMI fms$actn3_rs540874 ################ # Indexing exercise ################# #### Exercise 1: identify the gene "actn3_rs540874", and pre.BMI variables in the fms data #####Exercise 2: create a smaller data set with only id, actn3_rs540874 gene and pre.BMI #### Exercise 3: Remove any NA values in these three column index<-match(c("actn3_rs540874","pre.BMI"), colnames(fms)) dat<-fms[!is.na(fms$pre.BMI) & !is.na(fms$actn3_rs540874) , index] ## observations without NA attach(fms) mean(pre.BMI, na.rm=T)