# Install dependencies if needed
# !pip install networkx matplotlib scikit-learn torch

import random
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

G = nx.karate_club_graph()

num_nodes    = G.number_of_nodes()
num_features = num_nodes  # one-hot feature per node

# Node features: identity matrix
X = torch.eye(num_nodes, dtype=torch.float32)

# Labels: club membership
clubs         = [G.nodes[i]["club"] for i in range(num_nodes)]
label_encoder = LabelEncoder()
y             = torch.tensor(label_encoder.fit_transform(clubs), dtype=torch.long)

print("Number of nodes:", num_nodes)
print("Classes:",         list(label_encoder.classes_))
print("Feature matrix shape:", X.shape)
print("Label tensor shape:",   y.shape)

Number of nodes: 34
Classes: [np.str_('Mr. Hi'), np.str_('Officer')]
Feature matrix shape: torch.Size([34, 34])
Label tensor shape: torch.Size([34])

X

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Patch, Circle

G = nx.karate_club_graph()

# Get node colors based on club membership
color_map = []
for node in G.nodes():
    if G.nodes[node]['club'] == 'Mr. Hi':
        color_map.append('#3B8BD4')
    else:
        color_map.append('#D85A30')

# Compute layout
pos = nx.spring_layout(G, seed=42)

fig, ax = plt.subplots(figsize=(10, 7))
nx.draw_networkx(
    G,
    pos=pos,
    node_color=color_map,
    with_labels=True,
    node_size=500,
    font_color='white',
    font_size=9,
    edge_color='gray',
    alpha=0.9,
    ax=ax
)

# Circle nodes 0 and 33
for node in [0, 33]:
    x, y = pos[node]
    circle = Circle(
        (x, y),
        radius=0.07,
        color='gold',
        fill=False,
        linewidth=2.5,
        zorder=5
    )
    ax.add_patch(circle)

# Legend
legend_elements = [
    Patch(facecolor='#3B8BD4', label="Mr. Hi's group"),
    Patch(facecolor='#D85A30', label="Officer's group"),
    Patch(edgecolor='gold', facecolor='none', linewidth=2.5, label='Key nodes (0 & 33)'),
]
plt.legend(handles=legend_elements, loc='upper right')
plt.title("Zachary's Karate Club Graph")
plt.axis('off')
plt.tight_layout()
plt.savefig('karate_club.png', dpi=300, bbox_inches='tight')
plt.show()

node_indices = np.arange(num_nodes)

# Make sure labels is a torch tensor of class ids
labels = torch.tensor(label_encoder.fit_transform(clubs), dtype=torch.long)


train_idx, test_idx = train_test_split(
    node_indices,
    test_size=0.30,
    random_state=SEED,
    stratify=labels.numpy()
)

train_idx = torch.tensor(train_idx, dtype=torch.long)
test_idx  = torch.tensor(test_idx, dtype=torch.long)

print(f"Train nodes: {len(train_idx)}  |  Test nodes: {len(test_idx)}")
print(f"Train class distribution: {dict(zip(*np.unique(labels[train_idx].numpy(), return_counts=True)))}")
print(f"Test  class distribution: {dict(zip(*np.unique(labels[test_idx].numpy(),  return_counts=True)))}")

Train nodes: 23  |  Test nodes: 11
Train class distribution: {np.int64(0): np.int64(11), np.int64(1): np.int64(12)}
Test  class distribution: {np.int64(0): np.int64(6), np.int64(1): np.int64(5)}

A = nx.to_numpy_array(G, dtype=np.float32)
A = A + np.eye(num_nodes, dtype=np.float32)  # self-loops

D = np.diag(np.power(A.sum(axis=1), -0.5))
D[np.isinf(D)] = 0.0

A_norm = D @ A @ D
A_norm = torch.tensor(A_norm, dtype=torch.float32)

print("Adjacency shape:", A_norm.shape)

Adjacency shape: torch.Size([34, 34])

class GCNLayer(nn.Module):
    def __init__(self, in_dim, out_dim, dropout=0.0):
        super().__init__()
        self.linear  = nn.Linear(in_dim, out_dim)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = adj @ x
        x = self.linear(x)
        return x


class ThreeLayerGCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout=0.5):
        super().__init__()
        self.gcn1    = GCNLayer(in_dim,      hidden_dim, dropout)
        self.gcn2    = GCNLayer(hidden_dim,  hidden_dim, dropout)
        self.gcn3    = GCNLayer(hidden_dim,  out_dim,    dropout)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gcn1(x, adj))
        x = F.relu(self.gcn2(x, adj))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gcn3(x, adj)
        return x


model = ThreeLayerGCN(
    in_dim=num_features,
    hidden_dim=16,
    out_dim=len(label_encoder.classes_),
    dropout=0.5
)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

model

ThreeLayerGCN(
  (gcn1): GCNLayer(
    (linear): Linear(in_features=34, out_features=16, bias=True)
  )
  (gcn2): GCNLayer(
    (linear): Linear(in_features=16, out_features=16, bias=True)
  )
  (gcn3): GCNLayer(
    (linear): Linear(in_features=16, out_features=2, bias=True)
  )
)

epochs = 300
train_loss_history = []
train_acc_history  = []
test_acc_history   = []

for epoch in range(1, epochs + 1):
    model.train()
    optimizer.zero_grad()

    logits = model(X, A_norm)
    loss   = criterion(logits[train_idx], labels[train_idx])
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        logits = model(X, A_norm)
        pred   = logits.argmax(dim=1)

        train_acc = (pred[train_idx] == labels[train_idx]).float().mean().item()
        test_acc  = (pred[test_idx]  == labels[test_idx]).float().mean().item()

    train_loss_history.append(loss.item())
    train_acc_history.append(train_acc)
    test_acc_history.append(test_acc)

    if epoch == 1 or epoch % 25 == 0:
        print(f"Epoch {epoch:03d} | Loss {loss.item():.4f} "
              f"| Train Acc {train_acc:.4f} | Test Acc {test_acc:.4f}")

print(f"\nFinal Train Accuracy: {train_acc_history[-1]:.4f}")
print(f"Final Test  Accuracy: {test_acc_history[-1]:.4f}")

Epoch 001 | Loss 0.7195 | Train Acc 0.5217 | Test Acc 0.4545
Epoch 025 | Loss 0.5390 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 050 | Loss 0.1710 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 075 | Loss 0.0978 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 100 | Loss 0.1041 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 125 | Loss 0.1181 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 150 | Loss 0.0390 | Train Acc 1.0000 | Test Acc 1.0000
Epoch 175 | Loss 0.1190 | Train Acc 1.0000 | Test Acc 1.0000
Epoch 200 | Loss 0.1921 | Train Acc 1.0000 | Test Acc 1.0000
Epoch 225 | Loss 0.0458 | Train Acc 1.0000 | Test Acc 1.0000
Epoch 250 | Loss 0.0472 | Train Acc 1.0000 | Test Acc 0.9091
Epoch 275 | Loss 0.0375 | Train Acc 0.9565 | Test Acc 1.0000
Epoch 300 | Loss 0.0598 | Train Acc 1.0000 | Test Acc 0.9091

Final Train Accuracy: 1.0000
Final Test  Accuracy: 0.9091

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(train_loss_history)
axes[0].set_title("Training Loss")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")

axes[1].plot(train_acc_history, label="Train Acc")
axes[1].plot(test_acc_history,  label="Test Acc",  linestyle="--")
axes[1].set_title("Accuracy")
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("Accuracy")
axes[1].legend()

plt.tight_layout()
plt.show()

model.eval()
with torch.no_grad():
    h = F.relu(model.gcn1(X, A_norm))
    h = F.relu(model.gcn2(h, A_norm))

embeddings = h.cpu().numpy()
embeddings_2d = TSNE(n_components=2, perplexity=10, random_state=SEED).fit_transform(embeddings)

labels_np = labels.cpu().numpy()
train_set = set(train_idx.cpu().numpy())
colors = ["#3B8BD4", "#D85A30"]

plt.figure(figsize=(9, 6))
for node in range(num_nodes):
    cls = labels_np[node]
    color = colors[cls]
    marker = "o" if node in train_set else "^"
    plt.scatter(
        embeddings_2d[node, 0],
        embeddings_2d[node, 1],
        color=color,
        marker=marker,
        s=90,
        edgecolors="k",
        linewidths=0.5,
    )
    plt.text(
        embeddings_2d[node, 0] + 0.2,
        embeddings_2d[node, 1] + 0.2,
        str(node),
        fontsize=8,
    )

from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[0], markersize=9, label="Mr. Hi (train)"),
    Line2D([0], [0], marker='^', color='w', markerfacecolor=colors[0], markersize=9, label="Mr. Hi (test)"),
    Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[1], markersize=9, label="Officer (train)"),
    Line2D([0], [0], marker='^', color='w', markerfacecolor=colors[1], markersize=9, label="Officer (test)"),
]
plt.legend(handles=legend_elements, loc="best")
plt.title("3-Layer GCN Embeddings (circles=train, triangles=test)")
plt.tight_layout()
plt.show()

# Find the blue point closest to the orange cluster
officer_center = embeddings_2d[labels == 1].mean(axis=0)
hi_nodes = np.where(labels == 0)[0]
distances = np.linalg.norm(embeddings_2d[hi_nodes] - officer_center, axis=1)
boundary_node = hi_nodes[distances.argmin()]
print(f"Boundary node: {boundary_node}")

Boundary node: 8

embeddings.shape

(34, 16)

3-Layer GCN on the Karate Club Graph (with Train/Test Split)¶

1. Imports and Reproducibility¶

2. Load the Karate Club Graph¶

3. Train / Test Split¶

4. Build the Normalized Adjacency Matrix¶

5. Define the 3-Layer GCN¶

6. Train the Model¶

7. Plot Training Curves¶

8. Visualize Learned Embeddings¶

8. Summary¶