Python Programming 4¶

Conditionals and Loops¶

If Else¶

In [2]:
DNA_segment = 'ATGACATGA'
codon1 = DNA_segment[0:3]
codon1
Out[2]:
'ATG'
In [3]:
# == operator tests the equality of two strings, resulting in True/False
if (codon1 == 'ATG'):
    print('Codon', codon1, 'is a start codon.')
else:
    print('Codon', codon1, 'is not a start codon.')
 
print('Done!')
Codon ATG is a start codon.
Done!

If Elif¶

In [4]:
DNA_segment = 'ATGACATGACCAATC'
codon1 = DNA_segment[-3:]
 
# == operator tests the equality of two strings, resulting in True/False
if (codon1 == 'ATG'):
    print('Codon', codon1, 'is a start codon.')
elif ((codon1 == 'TAA') or
     (codon1 == 'TAG') or
     (codon1 == 'TGA')):
    print('Codon', codon1, 'is a stop codon.')
else:
    print('Codon', codon1, 'is neither a start nor a stop codon.')
 
print('Done!')
 
Codon ATC is neither a start nor a stop codon.
Done!

While¶

In [5]:
DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'
print('DNA sequence:', DNA_seq)
 
bp = 'T'
print('Base pair:', bp)
 
print('str.count():', DNA_seq.count(bp))
 
count = 0
index = 0
 
while index < len(DNA_seq):
    if bp == DNA_seq[index]:
        count += 1
    index += 1
 
print('Our while count T:', count)
DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Base pair: T
str.count(): 17
Our while count T: 17

Indentation matters!

In [7]:
DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'
print('DNA sequence:', DNA_seq)
 
bp = 'T'
print('Base pair:', bp)
 
print('str.count():', DNA_seq.count(bp))
 
count = 0
index = 0
 
while index < len(DNA_seq):
    if bp == DNA_seq[index]:
        count += 1
    index += 1
    print('Our while count T:', count)
DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Base pair: T
str.count(): 17
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 2
Our while count T: 3
Our while count T: 4
Our while count T: 5
Our while count T: 5
Our while count T: 5
Our while count T: 5
Our while count T: 6
Our while count T: 6
Our while count T: 7
Our while count T: 8
Our while count T: 9
Our while count T: 9
Our while count T: 10
Our while count T: 11
Our while count T: 11
Our while count T: 12
Our while count T: 12
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 14
Our while count T: 14
Our while count T: 14
Our while count T: 14
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 16
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17

For Loops¶

In [8]:
# Save nucleotide bases in a list
bases = ['T', 'C', 'A', 'G']
 
# As a warmup, print the list of bases
for base in bases:
    print(base)
 
print('Codons starting with T:')
 
for second_base in bases:
    print('Codons starting with T'+second_base)
    for third_base in bases:
        print('T'+second_base+third_base)
T
C
A
G
Codons starting with T:
Codons starting with TT
TTT
TTC
TTA
TTG
Codons starting with TC
TCT
TCC
TCA
TCG
Codons starting with TA
TAT
TAC
TAA
TAG
Codons starting with TG
TGT
TGC
TGA
TGG
In [9]:
for i in range(0,8):
    print(i)
0
1
2
3
4
5
6
7
In [13]:
days = ['Day ' + str(i) for i in range(0,20)]
days
Out[13]:
['Day 0',
 'Day 1',
 'Day 2',
 'Day 3',
 'Day 4',
 'Day 5',
 'Day 6',
 'Day 7',
 'Day 8',
 'Day 9',
 'Day 10',
 'Day 11',
 'Day 12',
 'Day 13',
 'Day 14',
 'Day 15',
 'Day 16',
 'Day 17',
 'Day 18',
 'Day 19']
In [12]:
n_samples = 50
samples = [f"S{i+1:02d}" for i in range(n_samples)]
print(samples)
['S01', 'S02', 'S03', 'S04', 'S05', 'S06', 'S07', 'S08', 'S09', 'S10', 'S11', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'S18', 'S19', 'S20', 'S21', 'S22', 'S23', 'S24', 'S25', 'S26', 'S27', 'S28', 'S29', 'S30', 'S31', 'S32', 'S33', 'S34', 'S35', 'S36', 'S37', 'S38', 'S39', 'S40', 'S41', 'S42', 'S43', 'S44', 'S45', 'S46', 'S47', 'S48', 'S49', 'S50']
In [30]:
import numpy as np
x = np.array([1,4,7,2,3])

val = 0
for i in range(x.size):
    
    #val = val + x[i]
    val += x[i]

print(val)
17

How about calculating sample variance $$ S^2=\frac{1}{n-1}\sum_{i=1}^{n}(X_i - \overline{X_n})^2 $$

In [18]:
x = np.array([1,4,7,2,3])

def sampvar(x):

    n = x.size
    sumx = 0
    for i in range(n):

        sumx += x[i]

    xbar = sumx / n

    sum2 = 0
    for i in range(n):

        sum2 += (x[i] - xbar)**2

    s2 = sum2/(n-1)
    
    return s2

print(sampvar(x))
print(np.var(x))
5.300000000000001
4.24
In [19]:
%timeit sampvar(x)
1.57 μs ± 31.1 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
In [20]:
%timeit np.var(x)
7.13 μs ± 29.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)

For large arrays, NumPy becomes orders of magnitude faster

Make a plot with multiple functions on it, with a loop

In [21]:
x = np.random.rand(1_000_000)

%timeit sampvar(x)
%timeit np.var(x)
214 ms ± 1.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.23 ms ± 93.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Making multiple plots with a for loop

In [32]:
x = np.linspace(-6, 6, 1000)

# ---- Plot multiple functions on one axes ----
plt.figure(figsize=(10, 6))
for name, fn in activations:
    y = fn(x)
    plt.plot(x, y, label=name)   # default matplotlib colors are used

# aesthetics
plt.axhline(0, linewidth=0.6)
plt.axvline(0, linewidth=0.6)
plt.title("Comparison of Activation Functions")
plt.xlabel("Input (x)")
plt.ylabel("Output")
plt.legend(loc="upper left", ncol=2, fontsize="small", framealpha=0.9)
plt.grid(True, linestyle="--", alpha=0.25)

# set y-limit a bit larger so saturating functions are visible
plt.ylim(-2.0, 6.0)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [33]:
import numpy as np
import matplotlib.pyplot as plt
# ---- Activation functions ----
def linear(x):
    return x

def hard_step(x):
    # Heaviside / step at 0
    return np.where(x >= 0, 1.0, 0.0)

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(0, x)

def leaky_relu(x, negative_slope=0.01):
    return np.where(x >= 0, x, negative_slope * x)

def elu(x, alpha=1.0):
    return np.where(x >= 0, x, alpha * (np.exp(x) - 1.0))

def swish(x, beta=1.0):
    return x * (1.0 / (1.0 + np.exp(-beta * x)))

def gelu_approx(x):
    # Approximate GELU (Hendrycks & Gimpel)
    return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * x**3)))

# ---- Prepare data ----
x = np.linspace(-5.0, 5.0, 1000)

activations = [
    ("Linear", linear),
    ("Step (Heaviside)", hard_step),
    ("Sigmoid", sigmoid),
    ("Tanh", tanh),
    ("ReLU", relu),
    ("Leaky ReLU", lambda z: leaky_relu(z, negative_slope=0.1)),
    ("ELU (α=1.0)", elu),
    ("Swish (β=1.0)", swish),
    ("GELU (approx)", gelu_approx),
]

# ---- Plot grid ----
fig, axes = plt.subplots(3, 3, figsize=(12, 10))
axes = axes.flatten()

for ax, (name, fn) in zip(axes, activations):
    y = fn(x)
    ax.plot(x, y)                 # do not set explicit colors/styles
    ax.axhline(0, linewidth=0.6)  # horizontal zero line
    ax.axvline(0, linewidth=0.6)  # vertical zero line
    ax.set_title(name, fontsize=12)
    ax.set_xlim(x.min(), x.max())
    # set y-limits to give a reasonable view for each activation
    # for sigmoids/tanh keep tight bounds; for others allow the function to show
    if name in ("Sigmoid", "Step (Heaviside)"):
        ax.set_ylim(-0.1, 1.1)
    elif name in ("Tanh"):
        ax.set_ylim(-1.1, 1.1)
    else:
        ax.set_ylim(-2.5, 5.0)
    ax.grid(True, linestyle='--', alpha=0.25)

plt.tight_layout()
plt.suptitle("Popular Activation Functions", fontsize=16, y=1.02)
plt.subplots_adjust(top=0.92)  # make room for suptitle

# Save or show
plt.show()
# plt.savefig("activations_grid.png", dpi=150, bbox_inches="tight")
No description has been provided for this image

Combining function and loops

In [5]:
a=range(1, 8)
for i in a:
    print(i)
1
2
3
4
5
6
7
In [6]:
DNA_seq = 'gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtc'
print('Sequence:', DNA_seq)
 
# Count the number of codons
DNA_length = len(DNA_seq)
number_of_codons = int(DNA_length/3)
print(number_of_codons)
Sequence: gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtc
20
In [8]:
codon_list = []
for i in range(number_of_codons):
    codon_list.append(DNA_seq[i*3:i*3 + 3])
 
print('List of codons:', codon_list)
print()
List of codons: ['ggg', 'tgc', 'gac', 'gat', 'tca', 'ttg', 'ttt', 'tcg', 'gac', 'aag', 'tgg', 'ata', 'ggc', 'aac', 'cac', 'tac', 'cgg', 'tgg', 'att', 'gtc']

In [13]:
def createCodon(seq, k=3):
    number_of_codons = int(len(seq)/k)
    codon_list = []
    for i in range(number_of_codons):
        codon_list.append(DNA_seq[i*k:(i*k + k)])
    return(codon_list)

createCodon(DNA_seq, k=4)
Out[13]:
['gggt',
 'gcga',
 'cgat',
 'tcat',
 'tgtt',
 'ttcg',
 'gaca',
 'agtg',
 'gata',
 'ggca',
 'acca',
 'ctac',
 'cggt',
 'ggat',
 'tgtc']
In [16]:
def codon_counter(seq, k=3):
    codon_list=createCodon(seq, k)
    codon_counter={}
    for codon in codon_list:
        if codon not in codon_counter:
            codon_counter[codon]=1
        else:
             codon_counter[codon]+=1
    return(codon_counter)
codon_counter(DNA_seq, k=4)
Out[16]:
{'ggg': 1,
 'tgc': 1,
 'gac': 2,
 'gat': 1,
 'tca': 1,
 'ttg': 1,
 'ttt': 1,
 'tcg': 1,
 'aag': 1,
 'tgg': 2,
 'ata': 1,
 'ggc': 1,
 'aac': 1,
 'cac': 1,
 'tac': 1,
 'cgg': 1,
 'att': 1,
 'gtc': 1}

Minimizing a function with gradient descent¶

\begin{eqnarray*} f(x) = (x-3)^2 \\ \nabla f(x_{} = 2(x-3). \\ \end{eqnarray*} Make updates $$ x_{n+1} = x_{n} - \eta \nabla f(x_{n}), $$ where $\eta$ is a hyper-parameter called learning rate.

In [51]:
import numpy as np
import matplotlib.pyplot as plt

# objective: f(x) = (x - 3)^2
# gradient: f'(x) = 2*(x - 3)

def f(x):
    return (x - 3.0)**2

def grad_f(x):
    return 2.0 * (x - 3.0)

# hyperparams
lr = 0.1
n_steps = 80
x = 0.0  # initial guess

for step in range(1, n_steps + 1):
    g = grad_f(x)
    x = x - lr * g
    if step % 10 == 0 or step == 1:
        print(f"step {step:2d}: x = {x:.6f}, f(x) = {f(x):.6e}")

print("Converged approx to x =", x)
xmin=x
xmin
step  1: x = 0.600000, f(x) = 5.760000e+00
step 10: x = 2.677877, f(x) = 1.037629e-01
step 20: x = 2.965412, f(x) = 1.196305e-03
step 30: x = 2.996286, f(x) = 1.379246e-05
step 40: x = 2.999601, f(x) = 1.590162e-07
step 50: x = 2.999957, f(x) = 1.833332e-09
step 60: x = 2.999995, f(x) = 2.113688e-11
step 70: x = 3.000000, f(x) = 2.436917e-13
step 80: x = 3.000000, f(x) = 2.809574e-15
Converged approx to x = 2.999999946994588
Out[51]:
2.999999946994588
In [27]:
x=np.linspace(0,6, 200)
fx=(x - 3)**2
fig, ax= plt.subplots(figsize=(5,3))
ax.plot(x, fx)
ax.axvline(xmin)
plt.show()
No description has been provided for this image

Modules and Packages¶

A module is simply a Python file (.py) that contains code — variables, functions, classes — that can be imported and used in another Python file.¶

A file named math_utilis.py

In [ ]:
def add(a, b):
    return a + b

def sub(a, b):
    return a - b

In another file

In [35]:
import math_utils

print(math_utils.add(3, 5))  # Output: 8
8

A package is a folder that contains multiple modules. It must include a special file named __init__.py (even if it's empty).¶

Installing External Modules (Packages)

% pip

In [43]:
import requests,
res = requests.get("https://api.github.com")
print(res.status_code)
200
In [47]:
import requests
import pandas as pd
import io

url = "https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv"

# Step 1: Download the file
response = requests.get(url)
response.raise_for_status()   # will raise an error if the download failed

# Step 2: Load CSV content into pandas
csv_text = response.text
df = pd.read_csv(io.StringIO(csv_text))

# Step 3: Show the DataFrame
print(df)
   Month   "1958"   "1959"   "1960"
0    JAN      340      360      417
1    FEB      318      342      391
2    MAR      362      406      419
3    APR      348      396      461
4    MAY      363      420      472
5    JUN      435      472      535
6    JUL      491      548      622
7    AUG      505      559      606
8    SEP      404      463      508
9    OCT      359      407      461
10   NOV      310      362      390
11   DEC      337      405      432
In [50]:
url = "https://raw.githubusercontent.com/bioconnector/workshops/master/data/airway_scaledcounts.csv"
df2 = pd.read_csv(url)
df2.head()
Out[50]:
ensgene SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516 SRR1039517 SRR1039520 SRR1039521
0 ENSG00000000003 723.0 486.0 904.0 445.0 1170.0 1097.0 806.0 604.0
1 ENSG00000000005 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 ENSG00000000419 467.0 523.0 616.0 371.0 582.0 781.0 417.0 509.0
3 ENSG00000000457 347.0 258.0 364.0 237.0 318.0 447.0 330.0 324.0
4 ENSG00000000460 96.0 81.0 73.0 66.0 118.0 94.0 102.0 74.0
In [ ]: