# STAT_540_Lec_09_R # importing data into R # get the working directory getwd() # set the working directory setwd("/Users/karlgregory/Desktop/stat540_data") # sometimes useful to list all the files in the working directory: files <- list.files(getwd()) dat <- files[grep(".dat",files)] # list all files with ".dat" in the name # on a windows machine change "/" to "\\" safari <- read.table(file = "safari_comma.dat", sep = ",", header = T) is.data.frame(safari) # read in a data set with missing values and lines at the beginning to skip safari <- read.table(file = "safari_comma_missing.dat", sep = ",", header = T, skip = 3, na.strings = c(".","")) safari # read in a data set with missing values and lines at the beginning to skip. Tab-delimited safari <- read.table(file = "safari_tab_missing.dat", sep = "\t", header = T, skip = 3, na.strings = c(".","")) safari # read in fixed-width data safari <- read.fwf(file = "safari_fwf_missing.dat", header = T, skip = 3, na.strings = c(".",""), sep = ",", widths = c(11,3,5,3,14,9,9,4,18), strip.white = T) safari$date day <- as.Date("1/13/1999",format = "%m/%d/%Y") as.Date("1999-01-13") # default format day # strptime() function is for not only dates but times strptime("1/13/1999", format= "%m/%d/%Y") strptime("1/13/1999 10:30", format= "%m/%d/%Y %H:%M") strptime("1/13/1999 10:30 pm", format= "%m/%d/%Y %I:%M %p") strptime("1/13/1999 22:30", format= "%m/%d/%Y %H:%M") my_time <- strptime("January 13, 1999 10:30 pm", format= "%B %d, %Y %I:%M %p") # the strftime(), string format time, for putting a time into a string with a desired format # make it like this: 1999.01.13 2230 strftime(my_time, format = "%Y.%m.%d %H%M") # for the safari data: Change the date format to yyyy-mm-dd dates <- strptime(safari$date,format="%m/%d/%Y") safari$date <- strftime(dates,"%Y-%m-%d") safari # let's get the duration of each safari start <- strptime(safari$start,format = "%I:%M %p") end <- strptime(safari$end,format = "%I:%M %p") duration <- difftime(end,start,units="min") # duration of each safari in minutes # would like to show the durations in the format hh:mm # we have to do it "manually" : hrs <- floor(duration / 60) min <- as.numeric(duration) %% 60 # sprintf() a function for printing numbers in desired formats sprintf(50, fmt = "%.03f") sprintf(50, fmt = "%03.f") sprintf(8, fmt = "%02.f") paste(hrs,min,sep=":") safari$duration <- paste(sprintf(hrs,fmt="%02.f"),sprintf(min,fmt="%02.f"),sep=":") safari # processing text safari$weather # grep() function. Search for a "pattern" within a string sunny <- grep("sunny",safari$weather) # in which entries it found a match safari$sunny <- FALSE safari$sunny[sunny] <- TRUE ## How about changing a full name to a first initial and last name # strsplit() split a string on a character ch <- "Karl Bruce Gregory" name_abb <- function(ch){ full <- strsplit(ch," ")[[1]] n <- length(full) if(n == 1){ abb <- full } else { abb <- paste(substr(full[1],1,1),". ",full[n], sep = "") } return(abb) } name_abb("Karl Bruce Gregory") name_abb("Matthew Frederick Thaddeus Bailey") name_abb("Bailey") # use sapply to apply the function to the column in the data set safari$name_abb <- sapply(safari$guide,name_abb) safari # sort_by for sorting data frames sort_by(safari, ~ date)