import pandas as pd

# windows: 'C:\\Users\\karlgregory\\Desktop\\stat540_data\\'

dr = '/Users/karlgregory/Desktop/stat540_data/'
filename = 'safari_comma_missing.dat'

file = open(dr + filename) # create connection to a file
contents = file.read()
print(contents)
file.close

Some make-believe safari data
Woohoo!!

date,wildebeest,laughing hyena,crocodile,weather,start,end,fun,guide
1/13/1999,12,none,2,sunny,7:21 am,4:14 pm,yes,Joshua Tebbs
4/28/2001,3,1,1,cloudy,6:25 am,12:33 pm,Y,Edsel Peña
10/15/2010,3,.,6,rainy,8:12 am,,no,Karl Bruce Gregory
3/02/2006,1,14,5,hot/sunny,7:15 am,3:12 pm,y,Lianming Wang
2/28/1988,2,6,3,partly cloudy,4:53 am,2:16 pm,Yes,Brian Habing
7/14/2015,3,12,0,cloudy,5:47 am,3:46 pm,No,Edwards

<function TextIOWrapper.close()>

safari = pd.read_table(dr + filename,sep=',',skiprows = 3, na_values = ['.',' '])
safari

type(safari)

pandas.core.frame.DataFrame

print(safari['wildebeest'])
print(safari.wildebeest)  # columns of Pandas data frames are called Series.

0    12
1     3
2     3
3     1
4     2
5     3
Name: wildebeest, dtype: int64
0    12
1     3
2     3
3     1
4     2
5     3
Name: wildebeest, dtype: int64

filename = 'safari_fwf_missing.dat'

pd.read_fwf(dr + filename,
            skiprows = 4,
            widths = [11,3,5,3,14,9,9,4,18],
            header = None,
            names = ['Date','W','LH','Croc','Weather','start','end','fun','guide'])

import datetime

now = datetime.datetime.today()
print(now)

type(now)
print(now.year)
print(now.month)
print(now.microsecond)

2025-10-28 14:41:55.887099
2025
10
887099

viva = datetime.datetime.strptime('11/22/2015 02:23', '%m/%d/%Y %H:%M'  )  # converting a character string to a real date time value
print(viva)

viva_reformat = datetime.datetime.strftime(viva,'%B %d Anno Domini %Y, at %H:%M')
print(viva_reformat)

2015-11-22 02:23:00
November 22 Anno Domini 2015, at 02:23

dates = pd.to_datetime(safari.date,format = '%m/%d/%Y')
print(dates)
safari['newdate'] = dates
safari

0   1999-01-13
1   2001-04-28
2   2010-10-15
3   2006-03-02
4   1988-02-28
5   2015-07-14
Name: date, dtype: datetime64[ns]

weekday = safari.newdate.dt.weekday
safari['weekday'] = weekday  # make a new column of the safari data set
safari

import re # re stands for regular expression

ch = 'Karl Bruce Gregory'

print(re.sub('e','*',ch))  # replace a substring with another substring
print(re.sub('[aeiou]','*',ch)) # replace any vowels with stars
print(re.sub('[a-z]','-',ch))

print(re.search('go',ch)) # find location of a substring (first match)
print(re.findall('e',ch))
print(re.findall('[A-Z]',ch))

print(re.split(' ',ch))

Karl Bruc* Gr*gory
K*rl Br*c* Gr*g*ry
K--- B---- G------
<re.Match object; span=(14, 16), match='go'>
['e', 'e']
['K', 'B', 'G']
['Karl', 'Bruce', 'Gregory']

init = re.findall('[A-Z]',ch)
'.'.join(init) + '.' # collapse the init list with '.' between

'K.B.G.'

ch1 = 'Matthew Frederick Thaddeus Bailey' # convert to M. F. T. Bailey
ch2 = 'Karl Gregory' # want to say K. Gregory

def nameabb(ch):
    
    full =   re.split(' ',ch) 
    n = len(full)
    
    if(n == 1):
        
        abb = ch
        
    else:
    
        abb = ''
        
        for i in range(n-1): abb += full[i][0] + '. '
            
        abb += full[n-1]
        
    return abb

print(nameabb('Matthew Frederick Thaddeus Bailey'))
print(nameabb('Karl Gregory'))
print(nameabb('Karl'))

M. F. T. Bailey
K. Gregory
Karl

safari['name_abb'] = safari['guide'].map(nameabb)
safari

fundict = {'Yes': True, 'yes': True, 'y': True, 'Y' : True, 'no' : False, 'No' : False}
safari['funTF'] = safari.fun.map(fundict)
safari

safari

pos = safari['laughing hyena'] == 'none'
print(pos)

0     True
1    False
2    False
3    False
4    False
5    False
Name: laughing hyena, dtype: bool

safari.loc[pos,'laughing hyena'] = 0 # instead of using square brackets
safari

safari.sort_values(by = 'newdate')

safari.sort_values(by = ['funTF','newdate'], inplace = True) # add inplace = True to overwrite the table
safari

Dates in Python¶

Working with text in Python¶

	date	wildebeest	laughing hyena	crocodile	weather	start	end	fun	guide
0	1/13/1999	12	none	2	sunny	7:21 am	4:14 pm	yes	Joshua Tebbs
1	4/28/2001	3	1	1	cloudy	6:25 am	12:33 pm	Y	Edsel Peña
2	10/15/2010	3	NaN	6	rainy	8:12 am	NaN	no	Karl Bruce Gregory
3	3/02/2006	1	14	5	hot/sunny	7:15 am	3:12 pm	y	Lianming Wang
4	2/28/1988	2	6	3	partly cloudy	4:53 am	2:16 pm	Yes	Brian Habing
5	7/14/2015	3	12	0	cloudy	5:47 am	3:46 pm	No	Edwards