#############
# Section 5
#############

#Section 5.1

rep(1,10)

rep("Gamecocks Rule",50)

rep(1:3, 10)

rep(1:3, each=5)

rep(1:3, times=5, each=5)

help.search("distribution")

?Uniform

runif(3)

runif(3,0,10)

rchisq(2)

rchisq(2,5)

#Creating constant matrices and variables

zeromat <- matrix(0,5,3)

zeromat 

zeromat <- matrix(0,3,5)

zeromat

zeromat <- matrix(0,ncol=3,nrow=5)

zeromat

onesvec.col <- matrix(1,10,1)

bigvec <- 1:200

mat.bycols <- matrix(bigvec,100,2)

mat.bycols

mat.byrows <- matrix(bigvec,100,2,byrow=T)

mat.byrows

rnorm.mat<-matrix(rnorm(36), nr=9, nc=4)

rnorm.mat

matrix(rnorm(32), nr=9, nc=4)

#Section 5.2
#You may want to change the working directory to simplify the following commands

?read.table

#Reading tab-delimited files with and without headers
brainbod = read.table("e://stat540files//brainbod.txt")
brainbod = read.table("brainbod.txt") # need to navigate via File -> Change Dir... first

#On a Mac, the correct menus to go to in order to change directory are:
# Misc --> Change Working Directory 
# or in RStudio on a Mac:
# Session --> Set Working Directory --> Choose Directory

brainbod = read.table("brainbod_rownames.txt")  # need to specify that first row is a header...
brainbod = read.table("brainbod_rownames.txt",header=T) 


brainbod = read.table("brainbod_nonames.txt")

#Assigning row and column names
brainbod = read.table("brainbod_nonames.txt",col.names=c("Species","Body Weight","Brain Weight"))
brainbod = read.table("brainbod_nonames.txt",col.names=c("Species","Body Weight","Brain Weight"),row.names=1)

# Also: read.csv and read.delim

brainbod2 = read.csv("brainbod.csv")

brainbod2 = read.table("brainbod.csv", sep=",") # note difference in defaults with respect to header

brainbod2 = read.table("brainbod.csv", sep=",",header=T)

## Note that you can read files from websites as well:

brainbod = read.table("http://people.stat.sc.edu/hitchcock/brainbod.txt")

## Reading fixed width files:

my.file.df <- read.fwf(file="http://people.stat.sc.edu/hitchcock/simplefixedwidth.txt", col.names=c("social", "birthdate", "Name"), colClasses=c("numeric","character","character"), strip.white=T, as.is=T, widths=c(9,8,19) ) 

my.file.df$birthdate <- as.Date(my.file.df$birthdate, "%m%d%Y")

my.file.df


?scan

my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes=0, type=""), multi.line=T))

my.dfr

# Be a bit careful in how you read in numeric columns:

typeof(my.dfr$grapes)

my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes="numeric", type='character'), multi.line=T))
my.dfr

typeof(my.dfr$grapes)
typeof(my.dfr$tomato)

# To have R store it as "double", use the =0 argument value:
my.dfr <- as.data.frame(scan("garden.txt", what=list(name="", tomato=0, grapes=0, type=""), multi.line=T))


# Remember R's default conversion of character columns to factors:

is.character(my.dfr$name)

is.factor(my.dfr$name)

# Can coerce it back to a character column:

my.dfr$name <- as.character(my.dfr$name)
is.character(my.dfr$name)



my.datafile <- tempfile()
cat(file=my.datafile, "
 71 15 
 74 19 
 70 11 
 71 15 
 69 12 
 76 21 
 74 19 
 70 13 
", sep=" ")

simpbasketball <- read.table(my.datafile, header=FALSE, col.names=c("height", "goals")) 
 
attach(simpbasketball)


# Reading Excel files into R

# easiest way:  Save Excel worksheet as .csv file, then:

presdata <- read.csv("Presidents.csv", header=T)

presdata[45,]

is.na(presdata[45,2])

presdata <- read.csv("Presidents.csv", header=T, na.strings="")

# prevents conversion of character columns to factors:

presdata <- read.csv("Presidents.csv", header=T, na.strings="", as.is = T)

presdata$Name
presdata$Term
presdata$Age
presdata$Party

# Maybe "party" should be a factor ...

presdata$Party <- as.factor(presdata$Party)

presdata$Party