# Chapter 9

# Extracting subsets of a column
data(state)
load("July_15_COVID.RData")
attach(July_15_COVID)
tryit=State[50:46]
tryit
match(tryit,State)
tryit %in% State
State[-(30:40)]

#Extracting subsets of an array

July_15_stats=as.matrix(July_15_COVID[,-(c(1,12))])
July_15_stats=as.matrix(July_15_COVID[,2:11])
rownames(July_15_stats)=State
colnames(July_15_stats)
July_15_stats[,-1]
July_15_stats[,c(2,3,6)]
July_15_stats[6:2,c(2,4)]
tristate.region=July_15_stats[c("South Carolina","Georgia","North Carolina"),]
tristate.region

#Logical extraction
northeast.region=July_15_COVID[Region=="Northeast",]
northeast.region
#Borrowing Region from dataframe--be very careful that the dataframe and matrix have the same format
northeast.region=July_15_stats[Region=="Northeast",]
northeast.region

#Self-referential logical extraction
x=rnorm(10)
x[x>0]
x>0
which(x>0)
x[which(x>0)]
NEHighTest=July_15_COVID[(Region=="Northeast")&(July_15_COVID[,"Testing_Rate"]>15000),]
NEHighTest=July_15_COVID[(Region=="Northeast")&(Testing_Rate>15000),]
NEHighTest

#Ordering a variable
ord=order(state.area)
ord
state.name[ord]
ord=rev(order(state.area))
state.name[ord]
ord=order(-state.area)

#Ordering an entire array by a single column
Population=state.x77[,"Population"]
ord=rev(order(Population))
state.x77.popsorted=state.x77[ord,]
state.x77.popsorted

#Shortcut
state.x77.popsorted=state.x77[rev(order(state.x77[,"Population"])),]
state.x77.popsorted

#Ordering by two variables
ord=order(as.character(state.region),(-Population))
state.df=data.frame(state.name,state.region,state.x77)
ord=order(as.character(state.region),(-Population))
state.df=state.df[ord,]
state.df

# Chapter 10

# 100 random samples of size 10 
mymat=matrix(rnorm(1000),ncol=10)
dim(mymat)

# Kronecker (outer) product
seq(5,100,by=5)%o%rep(1,5)
rep(1,10)%o%seq(.5,2.5,by=.5)
rep(1,10)%o%rep(1,10)

matrix(1,10,10)


max(mymat)

# Row-wise and column-wise operations
rowmaxes = apply(mymat,1,max)
mymat[1,]
rowmaxes[1]
length(rowmaxes)
hist(rowmaxes)
colmeans <- apply(mymat,2,mean)

# Some row-wise and column-wise operations have built-in shortcut functions:

colSums(mymat)
rowMeans(mymat)

#Item by item comparison
mymat[1:10,1:2]
pmax(mymat[1:10,1],mymat[1:10,2])

# Row-wise operation with missing value
mymat[23,4]=NA
rowmaxes = apply(mymat,1,max)
rowmaxes
rowmaxes = apply(mymat,1,max,na.rm=T)
rowmaxes[23]

#Row-wise operation using a factor
tapply(state.x77[,"Murder"],state.region,mean)
by(state.x77[,"Murder"],state.region,mean)
tapply(state.x77[,"Illiteracy"],state.region,mean)
tapply(state.x77[,"Murder"],state.region,cummax)


# Iteration

rowmaxes=NULL  # or rowmaxes=c()
rowabsmax=NULL
diffabsmax=NULL
for (j in 1:nrow(mymat)) {
rowmaxes[j]=max(mymat[j,])
rowabsmax[j]=max(abs(mymat[j,]))
diffabsmax[j]=rowabsmax[j]-rowmaxes[j]}
hist(diffabsmax)

#machine epsilon
x=.5
niter=1
while(x!=0){y=x; x=x/2; niter=niter+1}
x
y
niter