# Chapter 9 # Extracting subsets of a column data(state) load("July_15_COVID.RData") attach(July_15_COVID) tryit=State[50:46] tryit match(tryit,State) tryit %in% State State[-(30:40)] #Extracting subsets of an array July_15_stats=as.matrix(July_15_COVID[,-(c(1,12))]) July_15_stats=as.matrix(July_15_COVID[,2:11]) rownames(July_15_stats)=State colnames(July_15_stats) July_15_stats[,-1] July_15_stats[,c(2,3,6)] July_15_stats[6:2,c(2,4)] tristate.region=July_15_stats[c("South Carolina","Georgia","North Carolina"),] tristate.region #Logical extraction northeast.region=July_15_COVID[Region=="Northeast",] northeast.region #Borrowing Region from dataframe--be very careful that the dataframe and matrix have the same format northeast.region=July_15_stats[Region=="Northeast",] northeast.region #Self-referential logical extraction x=rnorm(10) x[x>0] x>0 which(x>0) x[which(x>0)] NEHighTest=July_15_COVID[(Region=="Northeast")&(July_15_COVID[,"Testing_Rate"]>15000),] NEHighTest=July_15_COVID[(Region=="Northeast")&(Testing_Rate>15000),] NEHighTest #Ordering a variable ord=order(state.area) ord state.name[ord] ord=rev(order(state.area)) state.name[ord] ord=order(-state.area) #Ordering an entire array by a single column Population=state.x77[,"Population"] ord=rev(order(Population)) state.x77.popsorted=state.x77[ord,] state.x77.popsorted #Shortcut state.x77.popsorted=state.x77[rev(order(state.x77[,"Population"])),] state.x77.popsorted #Ordering by two variables ord=order(as.character(state.region),(-Population)) state.df=data.frame(state.name,state.region,state.x77) ord=order(as.character(state.region),(-Population)) state.df=state.df[ord,] state.df # Chapter 10 # 100 random samples of size 10 mymat=matrix(rnorm(1000),ncol=10) dim(mymat) # Kronecker (outer) product seq(5,100,by=5)%o%rep(1,5) rep(1,10)%o%seq(.5,2.5,by=.5) rep(1,10)%o%rep(1,10) matrix(1,10,10) max(mymat) # Row-wise and column-wise operations rowmaxes = apply(mymat,1,max) mymat[1,] rowmaxes[1] length(rowmaxes) hist(rowmaxes) colmeans <- apply(mymat,2,mean) # Some row-wise and column-wise operations have built-in shortcut functions: colSums(mymat) rowMeans(mymat) #Item by item comparison mymat[1:10,1:2] pmax(mymat[1:10,1],mymat[1:10,2]) # Row-wise operation with missing value mymat[23,4]=NA rowmaxes = apply(mymat,1,max) rowmaxes rowmaxes = apply(mymat,1,max,na.rm=T) rowmaxes[23] #Row-wise operation using a factor tapply(state.x77[,"Murder"],state.region,mean) by(state.x77[,"Murder"],state.region,mean) tapply(state.x77[,"Illiteracy"],state.region,mean) tapply(state.x77[,"Murder"],state.region,cummax) # Iteration rowmaxes=NULL # or rowmaxes=c() rowabsmax=NULL diffabsmax=NULL for (j in 1:nrow(mymat)) { rowmaxes[j]=max(mymat[j,]) rowabsmax[j]=max(abs(mymat[j,])) diffabsmax[j]=rowabsmax[j]-rowmaxes[j]} hist(diffabsmax) #machine epsilon x=.5 niter=1 while(x!=0){y=x; x=x/2; niter=niter+1} x y niter