############# # Section 5 ############# #Section 5.1 rep(1,10) rep("Gamecocks Rule",50) rep(1:3, 10) rep(1:3, each=5) rep(1:3, times=5, each=5) help.search("distribution") ?Uniform runif(3) runif(3,0,10) rchisq(2) rchisq(2,5) #Creating constant matrices and variables zeromat <- matrix(0,5,3) zeromat zeromat <- matrix(0,3,5) zeromat zeromat <- matrix(0,ncol=3,nrow=5) zeromat onesvec.col <- matrix(1,10,1) bigvec <- 1:200 mat.bycols <- matrix(bigvec,100,2) mat.bycols mat.byrows <- matrix(bigvec,100,2,byrow=T) mat.byrows rnorm.mat<-matrix(rnorm(36), nr=9, nc=4) rnorm.mat matrix(rnorm(32), nr=9, nc=4) #Section 5.2 #You may want to change the working directory to simplify the following commands ?read.table #Reading tab-delimited files with and without headers brainbod = read.table("e://stat540files//brainbod.txt") brainbod = read.table("brainbod.txt") # need to navigate via File -> Change Dir... first #On a Mac, the correct menus to go to in order to change directory are: # Misc --> Change Working Directory # or in RStudio on a Mac: # Session --> Set Working Directory --> Choose Directory brainbod = read.table("brainbod_rownames.txt") # need to specify that first row is a header... brainbod = read.table("brainbod_rownames.txt",header=T) brainbod = read.table("brainbod_nonames.txt") #Assigning row and column names brainbod = read.table("brainbod_nonames.txt",col.names=c("Species","Body Weight","Brain Weight")) brainbod = read.table("brainbod_nonames.txt",col.names=c("Species","Body Weight","Brain Weight"),row.names=1) # Also: read.csv and read.delim brainbod2 = read.csv("brainbod.csv") brainbod2 = read.table("brainbod.csv", sep=",") # note difference in defaults with respect to header brainbod2 = read.table("brainbod.csv", sep=",",header=T) ## Note that you can read files from websites as well: brainbod = read.table("http://people.stat.sc.edu/hitchcock/brainbod.txt") ## Reading fixed width files: my.file.df <- read.fwf(file="http://people.stat.sc.edu/hitchcock/simplefixedwidth.txt", col.names=c("social", "birthdate", "Name"), colClasses=c("numeric","character","character"), strip.white=T, as.is=T, widths=c(9,8,19) ) my.file.df$birthdate <- as.Date(my.file.df$birthdate, "%m%d%Y") my.file.df ?scan my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes=0, type=""), multi.line=T)) my.dfr # Be a bit careful in how you read in numeric columns: typeof(my.dfr$grapes) my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes="numeric", type='character'), multi.line=T)) my.dfr typeof(my.dfr$grapes) typeof(my.dfr$tomato) # To have R store it as "double", use the =0 argument value: my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes=0, type=""), multi.line=T)) # Remember R's default conversion of character columns to factors: is.character(my.dfr$name) is.factor(my.dfr$name) # Can coerce it back to a character column: my.dfr$name <- as.character(my.dfr$name) is.character(my.dfr$name) my.datafile <- tempfile() cat(file=my.datafile, " 71 15 74 19 70 11 71 15 69 12 76 21 74 19 70 13 ", sep=" ") simpbasketball <- read.table(my.datafile, header=FALSE, col.names=c("height", "goals")) attach(simpbasketball) # Reading Excel files into R # easiest way: Save Excel worksheet as .csv file, then: presdata <- read.csv("Presidents.csv", header=T) presdata[45,] is.na(presdata[45,2]) presdata <- read.csv("Presidents.csv", header=T, na.strings="") # prevents conversion of character columns to factors: presdata <- read.csv("Presidents.csv", header=T, na.strings="", as.is = T) presdata$Name presdata$Term presdata$Age presdata$Party # Maybe "party" should be a factor ... presdata$Party <- as.factor(presdata$Party) presdata$Party