DNA_segment = 'ATGACATGA'
codon1 = DNA_segment[0:3]
codon1

'ATG'

# == operator tests the equality of two strings, resulting in True/False
if (codon1 == 'ATG'):
    print('Codon', codon1, 'is a start codon.')
else:
    print('Codon', codon1, 'is not a start codon.')
 
print('Done!')

Codon ATG is a start codon.
Done!

DNA_segment = 'ATGACATGACCAATC'
codon1 = DNA_segment[-3:]
 
# == operator tests the equality of two strings, resulting in True/False
if (codon1 == 'ATG'):
    print('Codon', codon1, 'is a start codon.')
elif ((codon1 == 'TAA') or
     (codon1 == 'TAG') or
     (codon1 == 'TGA')):
    print('Codon', codon1, 'is a stop codon.')
else:
    print('Codon', codon1, 'is neither a start nor a stop codon.')
 
print('Done!')

Codon ATC is neither a start nor a stop codon.
Done!

DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'
print('DNA sequence:', DNA_seq)
 
bp = 'T'
print('Base pair:', bp)
 
print('str.count():', DNA_seq.count(bp))
 
count = 0
index = 0
 
while index < len(DNA_seq):
    if bp == DNA_seq[index]:
        count += 1
    index += 1
 
print('Our while count T:', count)

DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Base pair: T
str.count(): 17
Our while count T: 17

DNA_seq = 'CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA'
print('DNA sequence:', DNA_seq)
 
bp = 'T'
print('Base pair:', bp)
 
print('str.count():', DNA_seq.count(bp))
 
count = 0
index = 0
 
while index < len(DNA_seq):
    if bp == DNA_seq[index]:
        count += 1
    index += 1
    print('Our while count T:', count)

DNA sequence: CGGACACACAAAAAGAATGAAGGATTTTGAATCTTTATTGTGTGCGAGTAACTACGAGGAAGATTAAAGA
Base pair: T
str.count(): 17
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 0
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 1
Our while count T: 2
Our while count T: 3
Our while count T: 4
Our while count T: 5
Our while count T: 5
Our while count T: 5
Our while count T: 5
Our while count T: 6
Our while count T: 6
Our while count T: 7
Our while count T: 8
Our while count T: 9
Our while count T: 9
Our while count T: 10
Our while count T: 11
Our while count T: 11
Our while count T: 12
Our while count T: 12
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 13
Our while count T: 14
Our while count T: 14
Our while count T: 14
Our while count T: 14
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 15
Our while count T: 16
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17
Our while count T: 17

# Save nucleotide bases in a list
bases = ['T', 'C', 'A', 'G']
 
# As a warmup, print the list of bases
for base in bases:
    print(base)
 
print('Codons starting with T:')
 
for second_base in bases:
    print('Codons starting with T'+second_base)
    for third_base in bases:
        print('T'+second_base+third_base)

T
C
A
G
Codons starting with T:
Codons starting with TT
TTT
TTC
TTA
TTG
Codons starting with TC
TCT
TCC
TCA
TCG
Codons starting with TA
TAT
TAC
TAA
TAG
Codons starting with TG
TGT
TGC
TGA
TGG

for i in range(0,8):
    print(i)

days = ['Day ' + str(i) for i in range(0,20)]
days

['Day 0',
 'Day 1',
 'Day 2',
 'Day 3',
 'Day 4',
 'Day 5',
 'Day 6',
 'Day 7',
 'Day 8',
 'Day 9',
 'Day 10',
 'Day 11',
 'Day 12',
 'Day 13',
 'Day 14',
 'Day 15',
 'Day 16',
 'Day 17',
 'Day 18',
 'Day 19']

n_samples = 50
samples = [f"S{i+1:02d}" for i in range(n_samples)]
print(samples)

['S01', 'S02', 'S03', 'S04', 'S05', 'S06', 'S07', 'S08', 'S09', 'S10', 'S11', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'S18', 'S19', 'S20', 'S21', 'S22', 'S23', 'S24', 'S25', 'S26', 'S27', 'S28', 'S29', 'S30', 'S31', 'S32', 'S33', 'S34', 'S35', 'S36', 'S37', 'S38', 'S39', 'S40', 'S41', 'S42', 'S43', 'S44', 'S45', 'S46', 'S47', 'S48', 'S49', 'S50']

import numpy as np
x = np.array([1,4,7,2,3])

val = 0
for i in range(x.size):
    
    #val = val + x[i]
    val += x[i]

print(val)

17

x = np.array([1,4,7,2,3])

def sampvar(x):

    n = x.size
    sumx = 0
    for i in range(n):

        sumx += x[i]

    xbar = sumx / n

    sum2 = 0
    for i in range(n):

        sum2 += (x[i] - xbar)**2

    s2 = sum2/(n-1)
    
    return s2

print(sampvar(x))
print(np.var(x))

5.300000000000001
4.24

%timeit sampvar(x)

1.57 μs ± 31.1 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)

%timeit np.var(x)

7.13 μs ± 29.2 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)

x = np.random.rand(1_000_000)

%timeit sampvar(x)
%timeit np.var(x)

214 ms ± 1.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.23 ms ± 93.3 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

x = np.linspace(-6, 6, 1000)

# ---- Plot multiple functions on one axes ----
plt.figure(figsize=(10, 6))
for name, fn in activations:
    y = fn(x)
    plt.plot(x, y, label=name)   # default matplotlib colors are used

# aesthetics
plt.axhline(0, linewidth=0.6)
plt.axvline(0, linewidth=0.6)
plt.title("Comparison of Activation Functions")
plt.xlabel("Input (x)")
plt.ylabel("Output")
plt.legend(loc="upper left", ncol=2, fontsize="small", framealpha=0.9)
plt.grid(True, linestyle="--", alpha=0.25)

# set y-limit a bit larger so saturating functions are visible
plt.ylim(-2.0, 6.0)

plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
# ---- Activation functions ----
def linear(x):
    return x

def hard_step(x):
    # Heaviside / step at 0
    return np.where(x >= 0, 1.0, 0.0)

def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(0, x)

def leaky_relu(x, negative_slope=0.01):
    return np.where(x >= 0, x, negative_slope * x)

def elu(x, alpha=1.0):
    return np.where(x >= 0, x, alpha * (np.exp(x) - 1.0))

def swish(x, beta=1.0):
    return x * (1.0 / (1.0 + np.exp(-beta * x)))

def gelu_approx(x):
    # Approximate GELU (Hendrycks & Gimpel)
    return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * x**3)))

# ---- Prepare data ----
x = np.linspace(-5.0, 5.0, 1000)

activations = [
    ("Linear", linear),
    ("Step (Heaviside)", hard_step),
    ("Sigmoid", sigmoid),
    ("Tanh", tanh),
    ("ReLU", relu),
    ("Leaky ReLU", lambda z: leaky_relu(z, negative_slope=0.1)),
    ("ELU (α=1.0)", elu),
    ("Swish (β=1.0)", swish),
    ("GELU (approx)", gelu_approx),
]

# ---- Plot grid ----
fig, axes = plt.subplots(3, 3, figsize=(12, 10))
axes = axes.flatten()

for ax, (name, fn) in zip(axes, activations):
    y = fn(x)
    ax.plot(x, y)                 # do not set explicit colors/styles
    ax.axhline(0, linewidth=0.6)  # horizontal zero line
    ax.axvline(0, linewidth=0.6)  # vertical zero line
    ax.set_title(name, fontsize=12)
    ax.set_xlim(x.min(), x.max())
    # set y-limits to give a reasonable view for each activation
    # for sigmoids/tanh keep tight bounds; for others allow the function to show
    if name in ("Sigmoid", "Step (Heaviside)"):
        ax.set_ylim(-0.1, 1.1)
    elif name in ("Tanh"):
        ax.set_ylim(-1.1, 1.1)
    else:
        ax.set_ylim(-2.5, 5.0)
    ax.grid(True, linestyle='--', alpha=0.25)

plt.tight_layout()
plt.suptitle("Popular Activation Functions", fontsize=16, y=1.02)
plt.subplots_adjust(top=0.92)  # make room for suptitle

# Save or show
plt.show()
# plt.savefig("activations_grid.png", dpi=150, bbox_inches="tight")

a=range(1, 8)
for i in a:
    print(i)

DNA_seq = 'gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtc'
print('Sequence:', DNA_seq)
 
# Count the number of codons
DNA_length = len(DNA_seq)
number_of_codons = int(DNA_length/3)
print(number_of_codons)

Sequence: gggtgcgacgattcattgttttcggacaagtggataggcaaccactaccggtggattgtc
20

codon_list = []
for i in range(number_of_codons):
    codon_list.append(DNA_seq[i*3:i*3 + 3])
 
print('List of codons:', codon_list)
print()

List of codons: ['ggg', 'tgc', 'gac', 'gat', 'tca', 'ttg', 'ttt', 'tcg', 'gac', 'aag', 'tgg', 'ata', 'ggc', 'aac', 'cac', 'tac', 'cgg', 'tgg', 'att', 'gtc']

def createCodon(seq, k=3):
    number_of_codons = int(len(seq)/k)
    codon_list = []
    for i in range(number_of_codons):
        codon_list.append(DNA_seq[i*k:(i*k + k)])
    return(codon_list)

createCodon(DNA_seq, k=4)

['gggt',
 'gcga',
 'cgat',
 'tcat',
 'tgtt',
 'ttcg',
 'gaca',
 'agtg',
 'gata',
 'ggca',
 'acca',
 'ctac',
 'cggt',
 'ggat',
 'tgtc']

def codon_counter(seq, k=3):
    codon_list=createCodon(seq, k)
    codon_counter={}
    for codon in codon_list:
        if codon not in codon_counter:
            codon_counter[codon]=1
        else:
             codon_counter[codon]+=1
    return(codon_counter)
codon_counter(DNA_seq, k=4)

{'ggg': 1,
 'tgc': 1,
 'gac': 2,
 'gat': 1,
 'tca': 1,
 'ttg': 1,
 'ttt': 1,
 'tcg': 1,
 'aag': 1,
 'tgg': 2,
 'ata': 1,
 'ggc': 1,
 'aac': 1,
 'cac': 1,
 'tac': 1,
 'cgg': 1,
 'att': 1,
 'gtc': 1}

import numpy as np
import matplotlib.pyplot as plt

# objective: f(x) = (x - 3)^2
# gradient: f'(x) = 2*(x - 3)

def f(x):
    return (x - 3.0)**2

def grad_f(x):
    return 2.0 * (x - 3.0)

# hyperparams
lr = 0.1
n_steps = 80
x = 0.0  # initial guess

for step in range(1, n_steps + 1):
    g = grad_f(x)
    x = x - lr * g
    if step % 10 == 0 or step == 1:
        print(f"step {step:2d}: x = {x:.6f}, f(x) = {f(x):.6e}")

print("Converged approx to x =", x)
xmin=x
xmin

step  1: x = 0.600000, f(x) = 5.760000e+00
step 10: x = 2.677877, f(x) = 1.037629e-01
step 20: x = 2.965412, f(x) = 1.196305e-03
step 30: x = 2.996286, f(x) = 1.379246e-05
step 40: x = 2.999601, f(x) = 1.590162e-07
step 50: x = 2.999957, f(x) = 1.833332e-09
step 60: x = 2.999995, f(x) = 2.113688e-11
step 70: x = 3.000000, f(x) = 2.436917e-13
step 80: x = 3.000000, f(x) = 2.809574e-15
Converged approx to x = 2.999999946994588

2.999999946994588

x=np.linspace(0,6, 200)
fx=(x - 3)**2
fig, ax= plt.subplots(figsize=(5,3))
ax.plot(x, fx)
ax.axvline(xmin)
plt.show()

def add(a, b):
    return a + b

def sub(a, b):
    return a - b

import math_utils

print(math_utils.add(3, 5))  # Output: 8

8

import requests,
res = requests.get("https://api.github.com")
print(res.status_code)

200

import requests
import pandas as pd
import io

url = "https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv"

# Step 1: Download the file
response = requests.get(url)
response.raise_for_status()   # will raise an error if the download failed

# Step 2: Load CSV content into pandas
csv_text = response.text
df = pd.read_csv(io.StringIO(csv_text))

# Step 3: Show the DataFrame
print(df)

   Month   "1958"   "1959"   "1960"
0    JAN      340      360      417
1    FEB      318      342      391
2    MAR      362      406      419
3    APR      348      396      461
4    MAY      363      420      472
5    JUN      435      472      535
6    JUL      491      548      622
7    AUG      505      559      606
8    SEP      404      463      508
9    OCT      359      407      461
10   NOV      310      362      390
11   DEC      337      405      432

url = "https://raw.githubusercontent.com/bioconnector/workshops/master/data/airway_scaledcounts.csv"
df2 = pd.read_csv(url)
df2.head()

	ensgene	SRR1039508	SRR1039509	SRR1039512	SRR1039513	SRR1039516	SRR1039517	SRR1039520	SRR1039521
0	ENSG00000000003	723.0	486.0	904.0	445.0	1170.0	1097.0	806.0	604.0
1	ENSG00000000005	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	ENSG00000000419	467.0	523.0	616.0	371.0	582.0	781.0	417.0	509.0
3	ENSG00000000457	347.0	258.0	364.0	237.0	318.0	447.0	330.0	324.0
4	ENSG00000000460	96.0	81.0	73.0	66.0	118.0	94.0	102.0	74.0

Python Programming 4¶

Conditionals and Loops¶

If Else¶

If Elif¶

While¶

For Loops¶

Minimizing a function with gradient descent¶

Modules and Packages¶

A module is simply a Python file (.py) that contains code — variables, functions, classes — that can be imported and used in another Python file.¶

A package is a folder that contains multiple modules. It must include a special file named init.py (even if it's empty).¶

Python Programming 4¶

Conditionals and Loops¶

If Else¶

If Elif¶

While¶

For Loops¶

Minimizing a function with gradient descent¶

Modules and Packages¶

A module is simply a Python file (.py) that contains code — variables, functions, classes — that can be imported and used in another Python file.¶

A package is a folder that contains multiple modules. It must include a special file named __init__.py (even if it's empty).¶

A package is a folder that contains multiple modules. It must include a special file named init.py (even if it's empty).¶