print("Hello Python!")

Hello Python!

organism = "Escherichia coli"        # NCBI Gene Bank accession number U00096
strain = 'str. K-12 substr. MG1665'

print("DEFINITION: " + organism + " " + strain)

DEFINITION: Escherichia coli str. K-12 substr. MG1665

chimp = 'GTACCACCTAAGTACTGGCTCATTCATTACAACCGGTATGTACTTCGTACATTACTGCCAGTCACCATGA'
print('Chimp D-loop:', chimp)

Chimp D-loop: GTACCACCTAAGTACTGGCTCATTCATTACAACCGGTATGTACTTCGTACATTACTGCCAGTCACCATGA

len(chimp)

70

codon = 'CAT'

# Check if subtring is in string
is_in = codon in chimp
print('Is codon', codon, 'in chimp:', is_in)

Is codon CAT in chimp: True

# .count() counts how many times sub appears in string
how_many = chimp.count(codon)
print('How many times', codon, 'appears in chimp:', how_many)

How many times CAT appears in chimp: 4

# .find() returns the lowest index
first_index = chimp.find(codon)
print('First', codon, 'index: ', first_index)

First CAT index:  20

help(str.find)

Help on method_descriptor:

find(self, sub[, start[, end]], /) unbound builtins.str method
    Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].

    Optional arguments start and end are interpreted as in slice notation.
    Return -1 on failure.

first_index + len(codon)

23

second_index = chimp.find(codon, first_index + len(codon), len(chimp))
print('Second',  codon, 'index: ', second_index)

Second CAT index:  24

# .rfind() returns the highest index
last_index = chimp.rfind(codon);
print('Last', codon, 'index: ', last_index)

Last CAT index:  65

number_of_bps = 4641652
print('Number of base pairs:', number_of_bps)

Number of base pairs: 4641652

percent_A = 24.7
percent_T = 23.6

percents_AGCT = [percent_A, 26.0, 25.7, percent_T]
print("[A, G, C, T] =", percents_AGCT)

[A, G, C, T] = [24.7, 26.0, 25.7, 23.6]

x=[1,2,3,8]
type(x)

list

y = ["a","b","z"]
type(y)

list

z = [True, False, False]
print(z)

w = ["Yen-Yi", 40, True] # write comments after # in Python
print(w)

['Yen-Yi', 40, True]

print(w[0]) # indexing begins at zero.
print(w[1]) # second value

Yen-Yi
40

u = list(range(0,12,2)) # includes 0 but not 12
print(u)

[0, 2, 4, 6, 8, 10]

print("The first entry of w is ",w[0])

The first entry of w is  Yen-Yi

"The second entry of x is " + str(x[1]) + "."

'The second entry of x is 2.'

print("The first entry of x is ",x[0])

The first entry of x is  1

# Computing ratios A/T and G/C
ratio_AT = percent_A / percent_T
ratio_GC = percents_AGCT[1] / percents_AGCT[2]
print("A/T ratio is ", ratio_AT)

A/T ratio is  1.0466101694915253

u, v, r = [50,60], "Hello", [True, False,False]
print(u)
print(v)
print(r)

[50, 60]
Hello
[True, False, False]

print(u*4)

[50, 60, 50, 60, 50, 60, 50, 60]

print(v*8)

HelloHelloHelloHelloHelloHelloHelloHello

u[0] = 100
print(u)

[100, 60]

E_Coli = (organism, ratio_AT, ratio_GC)
type(E_Coli)

tuple

print(E_Coli)

('Escherichia coli', 1.0466101694915253, 1.0116731517509727)

# E_Coli[2] = 2 # this won't work because you cannot edit a tuple.

restriction_enzymes = {'EcoRI' : 'GAATTC',
                        'AluI' : 'AGCT',
                        'NotI' : 'GCGGCCGC',
                        'TaqI' : 'TCGA'
                      }

print(restriction_enzymes)

{'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA'}

# To get a list of keys from a dictionary view object
keys = list(restriction_enzymes.keys())
print('Keys as a list:', keys)

Keys as a list: ['EcoRI', 'AluI', 'NotI', 'TaqI']

values = list(restriction_enzymes.values())
print('Values as a list:', values)

Values as a list: ['GAATTC', 'AGCT', 'GCGGCCGC', 'TCGA']

mykey = 'crispr'
check = mykey in restriction_enzymes
print('Is', mykey, 'key in the dictionary?', check)

Is crispr key in the dictionary? False

EcoRI_value = restriction_enzymes['EcoRI']  #raises a KeyError if key not found
EcoRI_value = restriction_enzymes.get('EcoRI') # does not raise a KeyError if key not found
print('The recognition site of EcoRI is', EcoRI_value)

The recognition site of EcoRI is GAATTC

restriction_enzymes['EcoRV'] = 'GATATC'
print(restriction_enzymes)

{'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA', 'EcoRV': 'GATATC'}

restriction_enzymes.update(EcoRV = 'GATATC')
print('With a new item:', restriction_enzymes)

With a new item: {'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA', 'EcoRV': 'GATATC'}

del restriction_enzymes['EcoRV']
print('Original dictionary:', restriction_enzymes)

Original dictionary: {'EcoRI': 'GAATTC', 'AluI': 'AGCT', 'NotI': 'GCGGCCGC', 'TaqI': 'TCGA'}

human = 'TTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTTACCCATCAACAACCGCTATGTATT'
print('Human D-loop:', human)

Human D-loop: TTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTTACCCATCAACAACCGCTATGTATT

mycodon = 'CAT'

# Find the lowest index of mycodon in human
index_mycodon = human.find(mycodon)
print('First', mycodon, 'index:', index_mycodon)

First CAT index: 6

# Extract the first codon after mycodon
first_codon = human[index_mycodon + 3: index_mycodon + 6]
print('First codon after', mycodon, ':', first_codon)

First codon after CAT : GGG

human[13]

'A'

# Extract the second codon after mycodon
second_codon = human[index_mycodon + 6: index_mycodon + 9]
print('Second codon after', mycodon, ':', second_codon)

Second codon after CAT : GAA

# Negative starting point counts from the end of the string.
next_to_last_codon = human[-6:-3]
print('Next to last codon:', next_to_last_codon)

Next to last codon: TGT

# Omitted second entry in slicing indicates to the end of string
last_codon = human[-3:]
print('Last codon:', last_codon)

Last codon: ATT

Welcome to Python Programming¶

Data Types in Python¶

String Variable¶

Can easily concatenate character strings in Python:¶

Integer Variable¶

Float Variable¶

List Variable¶

Tuple Variable¶

The Python Dictionary¶

Slicing¶