In [3]:
import numpy as np
import matplotlib.pyplot as plt
BASES = ["A", "C", "G", "T"]
base_to_idx = {b: i for i, b in enumerate(BASES)}
def one_hot_encode(seq):
X = np.zeros((len(seq), 4), dtype=int)
for i, b in enumerate(seq):
X[i, base_to_idx[b]] = 1
return X
def plot_matrix(mat, title="", xticklabels=None, yticklabels=None, cmap="viridis"):
plt.figure(figsize=(6, 2.5))
plt.imshow(mat, aspect="auto", cmap=cmap)
plt.colorbar()
if xticklabels is not None:
plt.xticks(range(len(xticklabels)), xticklabels)
if yticklabels is not None:
plt.yticks(range(len(yticklabels)), yticklabels)
plt.title(title)
plt.tight_layout()
plt.show()
def visualize_conv_step_by_step(seq, filt, bias=0.0, stride=1):
"""
seq: DNA string, length L
filt: filter weights, shape (kernel_size, 4)
bias: scalar
"""
X = one_hot_encode(seq)
k = filt.shape[0]
out_len = (len(seq) - k) // stride + 1
activations = []
print("Sequence:", seq)
print("Input one-hot shape:", X.shape)
print("Filter shape:", filt.shape)
print("Output length:", out_len)
print()
for pos in range(0, len(seq) - k + 1, stride):
window_seq = seq[pos:pos+k]
window_X = X[pos:pos+k, :]
score = np.sum(window_X * filt) + bias
activations.append(score)
print(f"Position {pos} to {pos+k-1}: window = {window_seq}")
print("One-hot window:")
print(window_X)
print("Elementwise product with filter:")
print(np.round(window_X * filt, 3))
print(f"Dot product + bias = {score:.4f}")
print("-" * 50)
activations = np.array(activations)
# Plot the input, filter, and output
fig, axes = plt.subplots(3, 1, figsize=(10, 6), constrained_layout=True)
axes[0].imshow(X.T, aspect="auto", cmap="Greys")
axes[0].set_title("Input DNA (one-hot)")
axes[0].set_yticks(range(4))
axes[0].set_yticklabels(BASES)
axes[0].set_xlabel("Position")
axes[1].imshow(filt.T, aspect="auto", cmap="coolwarm")
axes[1].set_title("Filter weights")
axes[1].set_yticks(range(4))
axes[1].set_yticklabels(BASES)
axes[1].set_xlabel("Kernel position")
axes[2].plot(activations, marker="o")
axes[2].set_title("Convolution output")
axes[2].set_xlabel("Sliding window start position")
axes[2].set_ylabel("Activation")
plt.show()
return activations
In [4]:
seq = "ACGTGACGTG"
filt = np.array([
[1.0, -0.5, -0.2, -0.2], # position 1 prefers A
[-0.2, 1.0, -0.5, -0.2], # position 2 prefers C
[-0.2, -0.2, 1.0, -0.5], # position 3 prefers G
[-0.2, -0.2, -0.5, 1.0], # position 4 prefers T
[0.8, -0.2, 0.4, -0.4], # position 5 prefers A or G
])
acts = visualize_conv_step_by_step(seq, filt, bias=0.0)
Sequence: ACGTGACGTG Input one-hot shape: (10, 4) Filter shape: (5, 4) Output length: 6 Position 0 to 4: window = ACGTG One-hot window: [[1 0 0 0] [0 1 0 0] [0 0 1 0] [0 0 0 1] [0 0 1 0]] Elementwise product with filter: [[ 1. -0. -0. -0. ] [-0. 1. -0. -0. ] [-0. -0. 1. -0. ] [-0. -0. -0. 1. ] [ 0. -0. 0.4 -0. ]] Dot product + bias = 4.4000 -------------------------------------------------- Position 1 to 5: window = CGTGA One-hot window: [[0 1 0 0] [0 0 1 0] [0 0 0 1] [0 0 1 0] [1 0 0 0]] Elementwise product with filter: [[ 0. -0.5 -0. -0. ] [-0. 0. -0.5 -0. ] [-0. -0. 0. -0.5] [-0. -0. -0.5 0. ] [ 0.8 -0. 0. -0. ]] Dot product + bias = -1.2000 -------------------------------------------------- Position 2 to 6: window = GTGAC One-hot window: [[0 0 1 0] [0 0 0 1] [0 0 1 0] [1 0 0 0] [0 1 0 0]] Elementwise product with filter: [[ 0. -0. -0.2 -0. ] [-0. 0. -0. -0.2] [-0. -0. 1. -0. ] [-0.2 -0. -0. 0. ] [ 0. -0.2 0. -0. ]] Dot product + bias = 0.2000 -------------------------------------------------- Position 3 to 7: window = TGACG One-hot window: [[0 0 0 1] [0 0 1 0] [1 0 0 0] [0 1 0 0] [0 0 1 0]] Elementwise product with filter: [[ 0. -0. -0. -0.2] [-0. 0. -0.5 -0. ] [-0.2 -0. 0. -0. ] [-0. -0.2 -0. 0. ] [ 0. -0. 0.4 -0. ]] Dot product + bias = -0.7000 -------------------------------------------------- Position 4 to 8: window = GACGT One-hot window: [[0 0 1 0] [1 0 0 0] [0 1 0 0] [0 0 1 0] [0 0 0 1]] Elementwise product with filter: [[ 0. -0. -0.2 -0. ] [-0.2 0. -0. -0. ] [-0. -0.2 0. -0. ] [-0. -0. -0.5 0. ] [ 0. -0. 0. -0.4]] Dot product + bias = -1.5000 -------------------------------------------------- Position 5 to 9: window = ACGTG One-hot window: [[1 0 0 0] [0 1 0 0] [0 0 1 0] [0 0 0 1] [0 0 1 0]] Elementwise product with filter: [[ 1. -0. -0. -0. ] [-0. 1. -0. -0. ] [-0. -0. 1. -0. ] [-0. -0. -0. 1. ] [ 0. -0. 0.4 -0. ]] Dot product + bias = 4.4000 --------------------------------------------------
In [ ]:
In [ ]: