Welcome to Python Programming¶

In [20]:
print("Hello Python!")
Hello Python!

Data Types in Python¶

String Variable¶

In [22]:
organism = "Escherichia coli"        # NCBI Gene Bank accession number U00096
strain = 'str. K-12 substr. MG1665'

Can easily concatenate character strings in Python:¶

In [23]:
print("DEFINITION: " + organism + " " + strain)
DEFINITION: Escherichia coli str. K-12 substr. MG1665
In [52]:
chimp = 'GTACCACCTAAGTACTGGCTCATTCATTACAACCGGTATGTACTTCGTACATTACTGCCAGTCACCATGA'
print('Chimp D-loop:', chimp)
Chimp D-loop: GTACCACCTAAGTACTGGCTCATTCATTACAACCGGTATGTACTTCGTACATTACTGCCAGTCACCATGA
In [71]:
len(chimp)
Out[71]:
70
In [53]:
codon = 'CAT'
In [56]:
# Check if subtring is in string
is_in = codon in chimp
print('Is codon', codon, 'in chimp:', is_in)
Is codon CAT in chimp: True
In [58]:
# .count() counts how many times sub appears in string
how_many = chimp.count(codon)
print('How many times', codon, 'appears in chimp:', how_many)
 
How many times CAT appears in chimp: 4
In [59]:
# .find() returns the lowest index
first_index = chimp.find(codon)
print('First', codon, 'index: ', first_index)
First CAT index:  20
In [68]:
help(str.find)
Help on method_descriptor:

find(self, sub[, start[, end]], /) unbound builtins.str method
    Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].

    Optional arguments start and end are interpreted as in slice notation.
    Return -1 on failure.

In [69]:
first_index + len(codon)
Out[69]:
23
In [74]:
second_index = chimp.find(codon, first_index + len(codon), len(chimp))
print('Second',  codon, 'index: ', second_index)
Second CAT index:  24
In [73]:
# .rfind() returns the highest index
last_index = chimp.rfind(codon);
print('Last', codon, 'index: ', last_index)
Last CAT index:  65

Integer Variable¶

In [24]:
number_of_bps = 4641652
print('Number of base pairs:', number_of_bps)
Number of base pairs: 4641652

Float Variable¶

In [25]:
percent_A = 24.7
percent_T = 23.6

List Variable¶

The print function could work like the paste function in R

In [26]:
percents_AGCT = [percent_A, 26.0, 25.7, percent_T]
print("[A, G, C, T] =", percents_AGCT)
[A, G, C, T] = [24.7, 26.0, 25.7, 23.6]
In [1]:
x=[1,2,3,8]
type(x)
Out[1]:
list
In [10]:
y = ["a","b","z"]
type(y)
Out[10]:
list
In [ ]:
z = [True, False, False]
print(z)
In [2]:
w = ["Yen-Yi", 40, True] # write comments after # in Python
print(w)
['Yen-Yi', 40, True]
In [6]:
print(w[0]) # indexing begins at zero.
print(w[1]) # second value
Yen-Yi
40
In [7]:
u = list(range(0,12,2)) # includes 0 but not 12
print(u)
[0, 2, 4, 6, 8, 10]
In [8]:
print("The first entry of w is ",w[0])
The first entry of w is  Yen-Yi
In [12]:
"The second entry of x is " + str(x[1]) + "."
Out[12]:
'The second entry of x is 2.'
In [13]:
print("The first entry of x is ",x[0])
The first entry of x is  1
In [30]:
# Computing ratios A/T and G/C
ratio_AT = percent_A / percent_T
ratio_GC = percents_AGCT[1] / percents_AGCT[2]
print("A/T ratio is ", ratio_AT)
A/T ratio is  1.0466101694915253

creating several objects at once:

In [15]:
u, v, r = [50,60], "Hello", [True, False,False]
print(u)
print(v)
print(r)
[50, 60]
Hello
[True, False, False]
In [16]:
print(u*4)
[50, 60, 50, 60, 50, 60, 50, 60]
In [17]:
print(v*8)
HelloHelloHelloHelloHelloHelloHelloHello

We can edit the values in a list:

In [18]:
u[0] = 100
print(u)
[100, 60]

Tuple Variable¶

In [36]:
E_Coli = (organism, ratio_AT, ratio_GC)
type(E_Coli)
Out[36]:
tuple
In [37]:
print(E_Coli)
('Escherichia coli', 1.0466101694915253, 1.0116731517509727)
In [38]:
# E_Coli[2] = 2 # this won't work because you cannot edit a tuple.

The Python Dictionary¶

In [40]:
restriction_enzymes = {'EcoRI' : 'GAATTC',
                        'AluI' : 'AGCT',
                        'NotI' : 'GCGGCCGC',
                        'TaqI' : 'TCGA'
                      }
 
In [42]:
print(restriction_enzymes)
{'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA'}
In [44]:
# To get a list of keys from a dictionary view object
keys = list(restriction_enzymes.keys())
print('Keys as a list:', keys)
Keys as a list: ['EcoRI', 'AluI', 'NotI', 'TaqI']
In [46]:
values = list(restriction_enzymes.values())
print('Values as a list:', values)
Values as a list: ['GAATTC', 'AGCT', 'GCGGCCGC', 'TCGA']
In [47]:
mykey = 'crispr'
check = mykey in restriction_enzymes
print('Is', mykey, 'key in the dictionary?', check)
Is crispr key in the dictionary? False

To fetch a value from a dictionary with its key

In [48]:
EcoRI_value = restriction_enzymes['EcoRI']  #raises a KeyError if key not found
EcoRI_value = restriction_enzymes.get('EcoRI') # does not raise a KeyError if key not found
print('The recognition site of EcoRI is', EcoRI_value)
The recognition site of EcoRI is GAATTC

To add to an existing dictionary

In [49]:
restriction_enzymes['EcoRV'] = 'GATATC'
print(restriction_enzymes)
{'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA', 'EcoRV': 'GATATC'}

Equivalently

In [50]:
restriction_enzymes.update(EcoRV = 'GATATC')
print('With a new item:', restriction_enzymes)
With a new item: {'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA', 'EcoRV': 'GATATC'}

To delete an item from a dictionary

In [51]:
del restriction_enzymes['EcoRV']
print('Original dictionary:', restriction_enzymes)
Original dictionary: {'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA'}

Slicing¶

In [85]:
human = 'TTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTTACCCATCAACAACCGCTATGTATT'
print('Human D-loop:', human)
Human D-loop: TTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTTACCCATCAACAACCGCTATGTATT
In [86]:
mycodon = 'CAT'
In [78]:
# Find the lowest index of mycodon in human
index_mycodon = human.find(mycodon)
print('First', mycodon, 'index:', index_mycodon)
First CAT index: 6
In [79]:
# Extract the first codon after mycodon
first_codon = human[index_mycodon + 3: index_mycodon + 6]
print('First codon after', mycodon, ':', first_codon)
First codon after CAT : GGG
In [81]:
human[13]
Out[81]:
'A'
In [82]:
# Extract the second codon after mycodon
second_codon = human[index_mycodon + 6: index_mycodon + 9]
print('Second codon after', mycodon, ':', second_codon)
Second codon after CAT : GAA
In [83]:
# Negative starting point counts from the end of the string.
next_to_last_codon = human[-6:-3]
print('Next to last codon:', next_to_last_codon)
Next to last codon: TGT
In [84]:
# Omitted second entry in slicing indicates to the end of string
last_codon = human[-3:]
print('Last codon:', last_codon)
Last codon: ATT
In [ ]: