import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris  # example, if you have continuous target
from matplotlib.colors import ListedColormap

# ---------- Replace with your data ----------
# Example: use iris but pretend y is continuous; replace with your real continuous y
X, y = load_iris(return_X_y=True)  # if you have continuous target, use it
# ------------------------------------------------
print(y)

X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

cmap = ListedColormap(['tab:blue', 'tab:orange', 'tab:green'])


fig, ax = plt.subplots(figsize=(7, 5))
sc = ax.scatter(X_pca[:, 0], X_pca[:, 1],
                c=y, s=30, cmap=cmap, alpha=0.7)
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}% var)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}% var)")
ax.set_title("PCA (2 components) — continuous target")
ax.grid(alpha=0.25)
plt.tight_layout()
plt.savefig("IrisPCA.png", dpi=300, bbox_inches="tight")
plt.show()

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from matplotlib import cm


# Load iris data
iris = load_iris(as_frame=True)
df = iris.frame   # includes features + target
df.head()

# Rename target to species name (optional but clearer)
df['species'] = df['target'].map(dict(enumerate(iris.target_names)))
cmap = cm.get_cmap('tab10')
colors = df['target'].map(lambda x: cmap(x))

# Scatter plot matrix
pd.plotting.scatter_matrix(
    df[iris.feature_names],
    figsize=(10, 10),
    diagonal='hist',
    color=colors,
    alpha=0.8
)

plt.suptitle("Scatter Plot Matrix of Iris Features", y=1.02)
plt.show()

/var/folders/kp/4rq4gv392szf4h4dx523w7s4_rl2wj/T/ipykernel_31264/1641073998.py:14: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed in 3.11. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap()`` or ``pyplot.get_cmap()`` instead.
  cmap = cm.get_cmap('tab10')

import seaborn as sns

g=sns.pairplot(
    df,
    vars=iris.feature_names,
    hue='species',
    diag_kind='hist',
    palette='tab10',
    plot_kws={'alpha': 0.7, 's': 40}
)
for ax in g.axes.flatten():
    if ax is not None:
        for spine in ax.spines.values():
            spine.set_visible(True)
            spine.set_linewidth(1)

plt.savefig("IrisScatterMatrix.png", dpi=300, bbox_inches="tight")
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap

# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target

# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)

# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
    np.linspace(xx_min, xx_max, 300),
    np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]

# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])

# K values
Ks = [1, 5, 10]

fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharex=True, sharey=True)

for ax, k in zip(axes, Ks):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_pca, y)

    Z = knn.predict(grid).reshape(xx.shape)

    # Decision regions
    ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)

    # Data points
    ax.scatter(
        X_pca[:, 0], X_pca[:, 1],
        c=y,
        cmap=cmap,
        edgecolors='k',
        s=40
    )

    ax.set_title(f"KNN decision regions (K={k})")
    ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
    ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
    ax.grid(alpha=0.3)

# Legend (shared)
handles = [
    plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
               label=iris.target_names[i])
    for i in range(3)
]
axes[2].legend(handles=handles, title="Species", loc='upper right')

plt.suptitle("Iris PCA (2D) — KNN decision regions", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig("Iris_KNN.png", dpi=300, bbox_inches="tight")
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap

# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target

# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)

# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
    np.linspace(xx_min, xx_max, 300),
    np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]

# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])



Ks = [1, 5, 10]
fig, ax = plt.subplots(figsize=(6, 5))

k = Ks[0]   # take the first K (the "left" one)

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_pca, y)

Z = knn.predict(grid).reshape(xx.shape)

# Decision regions
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)

# Data points
ax.scatter(
    X_pca[:, 0], X_pca[:, 1],
    c=y,
    cmap=cmap,
    edgecolors='k',
    s=40
)

ax.set_title(f"KNN decision regions (K={k})")
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)

# Legend
handles = [
    plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
               label=iris.target_names[i])
    for i in range(3)
]
ax.legend(handles=handles, title="Species", loc="upper right")

plt.title("K-Nearest Neighbor decision regions (L2)")
plt.tight_layout()
plt.savefig("Iris_KNN_single_L2.png", dpi=300, bbox_inches="tight")
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap

# Load Iris data
iris = load_iris()
X, y = iris.data, iris.target

# Scale + PCA
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2, random_state=1)
X_pca = pca.fit_transform(X_scaled)

# Mesh grid
xx_min, xx_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
yy_min, yy_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
xx, yy = np.meshgrid(
    np.linspace(xx_min, xx_max, 300),
    np.linspace(yy_min, yy_max, 300)
)
grid = np.c_[xx.ravel(), yy.ravel()]

# tab10 colormap (discrete & consistent)
cmap = ListedColormap(plt.cm.tab10.colors[:3])



Ks = [1, 5, 10]
fig, ax = plt.subplots(figsize=(6, 5))

k = Ks[0]   # take the first K (the "left" one)

knn = KNeighborsClassifier(
    n_neighbors=k,
    metric="manhattan"   # L1 distance
)

knn.fit(X_pca, y)

Z = knn.predict(grid).reshape(xx.shape)

# Decision regions
ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)

# Data points
ax.scatter(
    X_pca[:, 0], X_pca[:, 1],
    c=y,
    cmap=cmap,
    edgecolors='k',
    s=40
)

ax.set_title(f"KNN decision regions (K={k})")
ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
ax.grid(alpha=0.3)

# Legend
handles = [
    plt.Line2D([], [], marker='o', linestyle='', color=cmap(i),
               label=iris.target_names[i])
    for i in range(3)
]
ax.legend(handles=handles, title="Species", loc="upper right")

plt.title("K-Nearest Neighbor decision regions (L1) ")
plt.tight_layout()
plt.savefig("Iris_KNN_single_L1.png", dpi=300, bbox_inches="tight")
plt.show()

### L1 demo
import numpy as np
import matplotlib.pyplot as plt

dims = np.arange(1, 101)

# Sparse signal: one feature differs
l1_sparse = dims * 0 + 10          # |10| = 10
l2_sparse = dims * 0 + 10          # sqrt(10^2) = 10

# Distributed noise: same total "energy" spread
noise = 10 / np.sqrt(dims)
l1_dense = dims * noise            # sum |noise|
l2_dense = np.sqrt(dims * noise**2)  # sqrt(sum noise^2)

plt.figure(figsize=(7,5))
plt.plot(dims, l1_sparse, label="L1 (sparse signal)", linewidth=2)
plt.plot(dims, l1_dense, label="L1 (distributed noise)", linewidth=2)
plt.plot(dims, l2_sparse, "--", label="L2 (sparse signal)", linewidth=2)
plt.plot(dims, l2_dense, "--", label="L2 (distributed noise)", linewidth=2)

plt.xlabel("Dimension")
plt.ylabel("Distance")
plt.title("L1 vs L2: Sparse Signal vs Distributed Noise")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

# --- data & PCA to 2D ---
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# grid for decision surface
x_min, x_max = X_pca[:, 0].min() - 1.0, X_pca[:, 0].max() + 1.0
y_min, y_max = X_pca[:, 1].min() - 1.0, X_pca[:, 1].max() + 1.0
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 400),
                     np.linspace(y_min, y_max, 400))
grid = np.c_[xx.ravel(), yy.ravel()]

# common plotting params
cmap = ListedColormap(plt.get_cmap("tab10").colors[:3])
point_kwargs = dict(edgecolors='k', s=40)

def plot_knn(ax, metric_name, **knn_kwargs):
    knn = KNeighborsClassifier(**knn_kwargs)
    knn.fit(X_pca, y)
    Z = knn.predict(grid).reshape(xx.shape)

    ax.contourf(xx, yy, Z, cmap=cmap, alpha=0.25)
    sc = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=cmap, **point_kwargs)
    ax.set_title(metric_name)
    ax.set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)")
    ax.set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)")
    ax.grid(alpha=0.3)

# --- two-panel figure ---
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)

# L1 (Manhattan)
plot_knn(axes[0], "KNN (L1 / Manhattan)", n_neighbors=5, metric="manhattan")

# L2 (Euclidean)
plot_knn(axes[1], "KNN (L2 / Euclidean)", n_neighbors=5, metric="euclidean")

# legend
handles = [plt.Line2D([], [], marker='o', linestyle='', color=cmap(i), label=target_names[i])
           for i in range(3)]
axes[1].legend(handles=handles, title="Species", loc='upper right')

plt.suptitle("Iris PCA (2D) — KNN decision regions: L1 vs L2", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig("knn_l1_vs_l2.png", dpi=300, bbox_inches="tight")
plt.savefig("knn_l1.png", dpi=300, bbox_inches="tight")   # optional single-file saves
plt.show()