In [69]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris # example, if you have continuous target
from matplotlib.colors import ListedColormap
# ---------- Replace with your data ----------
# Example: use iris but pretend y is continuous; replace with your real continuous y
X, y = load_iris(return_X_y=True) # if you have continuous target, use it
# ------------------------------------------------
print(y)
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
cmap = ListedColormap(['tab:blue', 'tab:orange', 'tab:green'])
fig, ax = plt.subplots(figsize=(7, 5))
sc = ax.scatter(X_pca[:, 0], X_pca[:, 1],
c=y, s=30, cmap=cmap, alpha=0.7)
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}% var)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}% var)")
ax.set_title("PCA (2 components) — continuous target")
ax.grid(alpha=0.25)
plt.tight_layout()
plt.savefig("IrisPCA.png", dpi=300, bbox_inches="tight")
plt.show()
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
In [79]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from matplotlib import cm
# Load iris data
iris = load_iris(as_frame=True)
df = iris.frame # includes features + target
df.head()
# Rename target to species name (optional but clearer)
df['species'] = df['target'].map(dict(enumerate(iris.target_names)))
cmap = cm.get_cmap('tab10')
colors = df['target'].map(lambda x: cmap(x))
# Scatter plot matrix
pd.plotting.scatter_matrix(
df[iris.feature_names],
figsize=(10, 10),
diagonal='hist',
color=colors,
alpha=0.8
)
plt.suptitle("Scatter Plot Matrix of Iris Features", y=1.02)
plt.show()
/var/folders/kp/4rq4gv392szf4h4dx523w7s4_rl2wj/T/ipykernel_31264/1641073998.py:14: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.
cmap = cm.get_cmap('tab10')
In [81]:
import seaborn as sns
g=sns.pairplot(
df,
vars=iris.feature_names,
hue='species',
diag_kind='hist',
palette='tab10',
plot_kws={'alpha': 0.7, 's': 40}
)
for ax in g.axes.flatten():
if ax is not None:
for spine in ax.spines.values():
spine.set_visible(True)
spine.set_linewidth(1)
plt.savefig("IrisScatterMatrix.png", dpi=300, bbox_inches="tight")
plt.show()
In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap
# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target
# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)
# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
np.linspace(xx_min, xx_max, 300),
np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]
# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])
# K values
Ks = [1, 5, 10]
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharex=True, sharey=True)
for ax, k in zip(axes, Ks):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_pca, y)
Z = knn.predict(grid).reshape(xx.shape)
# Decision regions
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)
# Data points
ax.scatter(
X_pca[:, 0], X_pca[:, 1],
c=y,
cmap=cmap,
edgecolors='k',
s=40
)
ax.set_title(f"KNN decision regions (K={k})")
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)
# Legend (shared)
handles = [
plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
label=iris.target_names[i])
for i in range(3)
]
axes[2].legend(handles=handles, title="Species", loc='upper right')
plt.suptitle("Iris PCA (2D) — KNN decision regions", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig("Iris_KNN.png", dpi=300, bbox_inches="tight")
plt.show()
In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap
# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target
# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)
# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
np.linspace(xx_min, xx_max, 300),
np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]
# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])
Ks = [1, 5, 10]
fig, ax = plt.subplots(figsize=(6, 5))
k = Ks[0] # take the first K (the "left" one)
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_pca, y)
Z = knn.predict(grid).reshape(xx.shape)
# Decision regions
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)
# Data points
ax.scatter(
X_pca[:, 0], X_pca[:, 1],
c=y,
cmap=cmap,
edgecolors='k',
s=40
)
ax.set_title(f"KNN decision regions (K={k})")
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)
# Legend
handles = [
plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
label=iris.target_names[i])
for i in range(3)
]
ax.legend(handles=handles, title="Species", loc="upper right")
plt.title("K-Nearest Neighbor decision regions (L2)")
plt.tight_layout()
plt.savefig("Iris_KNN_single_L2.png", dpi=300, bbox_inches="tight")
plt.show()
In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap
# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target
# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)
# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
np.linspace(xx_min, xx_max, 300),
np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]
# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])
Ks = [1, 5, 10]
fig, ax = plt.subplots(figsize=(6, 5))
k = Ks[0] # take the first K (the "left" one)
knn = KNeighborsClassifier(
n_neighbors=k,
metric="manhattan" # L1 distance
)
knn.fit(X_pca, y)
Z = knn.predict(grid).reshape(xx.shape)
# Decision regions
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)
# Data points
ax.scatter(
X_pca[:, 0], X_pca[:, 1],
c=y,
cmap=cmap,
edgecolors='k',
s=40
)
ax.set_title(f"KNN decision regions (K={k})")
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)
# Legend
handles = [
plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
label=iris.target_names[i])
for i in range(3)
]
ax.legend(handles=handles, title="Species", loc="upper right")
plt.title("K-Nearest Neighbor decision regions (L1) ")
plt.tight_layout()
plt.savefig("Iris_KNN_single_L1.png", dpi=300, bbox_inches="tight")
plt.show()
In [9]:
### L1 demo
import numpy as np
import matplotlib.pyplot as plt
dims = np.arange(1, 101)
# Sparse signal: one feature differs
l1_sparse = dims * 0 + 10 # |10| = 10
l2_sparse = dims * 0 + 10 # sqrt(10^2) = 10
# Distributed noise: same total "energy" spread
noise = 10 / np.sqrt(dims)
l1_dense = dims * noise # sum |noise|
l2_dense = np.sqrt(dims * noise**2) # sqrt(sum noise^2)
plt.figure(figsize=(7,5))
plt.plot(dims, l1_sparse, label="L1 (sparse signal)", linewidth=2)
plt.plot(dims, l1_dense, label="L1 (distributed noise)", linewidth=2)
plt.plot(dims, l2_sparse, "--", label="L2 (sparse signal)", linewidth=2)
plt.plot(dims, l2_dense, "--", label="L2 (distributed noise)", linewidth=2)
plt.xlabel("Dimension")
plt.ylabel("Distance")
plt.title("L1 vs L2: Sparse Signal vs Distributed Noise")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
In [10]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
# --- data & PCA to 2D ---
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
# grid for decision surface
x_min, x_max = X_pca[:, 0].min() - 1.0, X_pca[:, 0].max() + 1.0
y_min, y_max = X_pca[:, 1].min() - 1.0, X_pca[:, 1].max() + 1.0
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 400),
np.linspace(y_min, y_max, 400))
grid = np.c_[xx.ravel(), yy.ravel()]
# common plotting params
cmap = ListedColormap(plt.get_cmap("tab10").colors[:3])
point_kwargs = dict(edgecolors='k', s=40)
def plot_knn(ax, metric_name, **knn_kwargs):
knn = KNeighborsClassifier(**knn_kwargs)
knn.fit(X_pca, y)
Z = knn.predict(grid).reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)
sc = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=cmap, **point_kwargs)
ax.set_title(metric_name)
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)
# --- two-panel figure ---
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
# L1 (Manhattan)
plot_knn(axes[0], "KNN (L1 / Manhattan)", n_neighbors=5, metric="manhattan")
# L2 (Euclidean)
plot_knn(axes[1], "KNN (L2 / Euclidean)", n_neighbors=5, metric="euclidean")
# legend
handles = [plt.Line2D([], [], marker='o', linestyle='', color=cmap(i), label=target_names[i])
for i in range(3)]
axes[1].legend(handles=handles, title="Species", loc='upper right')
plt.suptitle("Iris PCA (2D) — KNN decision regions: L1 vs L2", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig("knn_l1_vs_l2.png", dpi=300, bbox_inches="tight")
plt.savefig("knn_l1.png", dpi=300, bbox_inches="tight") # optional single-file saves
plt.show()
In [ ]: