Segmentation of hyperspectral image with Riemannian geometry

This example compares clustering pipelines based on covariance matrices for hyperspectral image segmentation [1].

# Author: Ammar Mian

import matplotlib.pyplot as plt
import numpy as np
from sklearn.pipeline import Pipeline

from pyriemann.clustering import Kmeans
from pyriemann.estimation import Covariances
from helpers.datasets_helpers import download_salinas, read_salinas
from helpers.processing_helpers import (
    SlidingWindowVectorize,
    PCAImage,
)

Parameters

window_size = 5
data_path = "./data"
n_jobs = -1
max_iter = 100
small_dataset = True  # Whole image can take time
n_components = 5  # For PCA dimensionality reduction
estimator = "scm"  # Chose any estimator from "scm", "lwf", "oas", "mcd", "hub"

Load data

print("Loading Salinas data")
download_salinas(data_path)
data, labels, labels_names = read_salinas(data_path)
data_visualization = data.copy()  # To avoid aliasing when showing data
n_clusters = len(labels_names)

resolution = 3.7  # m, obtained from documentation of Salinas dataset

# For visualization of image
x_values = np.arange(0, data.shape[1]) * resolution
y_values = np.arange(0, data.shape[0]) * resolution
X_image, Y_image = np.meshgrid(x_values, y_values)

if small_dataset:
    reduce_factor = 5
    data = data[::reduce_factor, ::reduce_factor]
    max_iter = 5
    resolution = reduce_factor*resolution
height, width, n_features = data.shape

# For visualization of results
x_values = np.arange(window_size//2, width-window_size//2) * resolution
y_values = np.arange(window_size//2, height-window_size//2) * resolution
X_res, Y_res = np.meshgrid(x_values, y_values)

print("Reading done.")
Loading Salinas data
Downloading Salinas dataset...
Done.
Reading done.

Pipelines definition

pipeline_euclid = Pipeline([
    ("pca", PCAImage(n_components=n_components)),
    ("sliding_window", SlidingWindowVectorize(window_size=window_size)),
    ("covariances", Covariances(estimator=estimator)),
    ("kmeans", Kmeans(
        n_clusters=n_clusters,
        n_jobs=n_jobs,
        max_iter=max_iter,
        metric="euclid",
    ))
], verbose=True)

pipeline_riemann = Pipeline([
    ("pca", PCAImage(n_components=n_components)),
    ("sliding_window", SlidingWindowVectorize(window_size=window_size)),
    ("covariances", Covariances(estimator=estimator)),
    ("kmeans", Kmeans(
        n_clusters=n_clusters,
        n_jobs=n_jobs,
        max_iter=max_iter,
        metric="riemann",
    ))
], verbose=True)

pipelines = [pipeline_euclid, pipeline_riemann]
pipelines_names = [
    f"{estimator} Euclidean distance",
    f"{estimator} Riemannian distance",
]

Perform clustering

print(f"\nStarting clustering with pipelines: {pipelines_names}")
results = {}
for pipeline_name, pipeline in zip(pipelines_names, pipelines):
    print("-"*60)
    print(f"Pipeline: {pipeline_name}")
    pipeline.fit(data)
    preds = pipeline.named_steps["kmeans"].labels_
    results[pipeline_name] = \
        pipeline.named_steps["sliding_window"].inverse_predict(preds)
    print("-"*60)
print("Done")
Starting clustering with pipelines: ['scm Euclidean distance', 'scm Riemannian distance']
------------------------------------------------------------
Pipeline: scm Euclidean distance
[Pipeline] ............... (step 1 of 4) Processing pca, total=   0.0s
[Pipeline] .... (step 2 of 4) Processing sliding_window, total=   0.0s
[Pipeline] ....... (step 3 of 4) Processing covariances, total=   0.1s
[Pipeline] ............ (step 4 of 4) Processing kmeans, total=  10.4s
------------------------------------------------------------
------------------------------------------------------------
Pipeline: scm Riemannian distance
[Pipeline] ............... (step 1 of 4) Processing pca, total=   0.0s
[Pipeline] .... (step 2 of 4) Processing sliding_window, total=   0.0s
[Pipeline] ....... (step 3 of 4) Processing covariances, total=   0.1s
[Pipeline] ............ (step 4 of 4) Processing kmeans, total=  48.6s
------------------------------------------------------------
Done

Plot data

print("Plotting")
plot_value = np.mean(data_visualization, axis=2)
figure, ax = plt.subplots(figsize=(7, 5))
plt.pcolormesh(X_image, Y_image, plot_value, cmap="gray")
plt.colorbar()
ax.invert_yaxis()
plt.xlabel("x (m)")
plt.ylabel("y (m)")
plt.title("Salinas dataset (mean over bands)")
plt.tight_layout()
Salinas dataset (mean over bands)
Plotting

Plot results

for pipeline_name, labels_pred in results.items():
    figure, ax = plt.subplots(figsize=(7, 5))
    plt.pcolormesh(X_res, Y_res, labels_pred, cmap="tab20b")
    plt.xlim(X_image.min(), X_image.max())
    plt.ylim(Y_image.min(), Y_image.max())
    plt.title(f"Clustering with {pipeline_name}")
    plt.colorbar()
    ax.invert_yaxis()
    plt.xlabel("x (m)")
    plt.ylabel("y (m)")
    plt.tight_layout()
plt.show()
  • Clustering with scm Euclidean distance
  • Clustering with scm Riemannian distance

References

Total running time of the script: (1 minutes 5.932 seconds)

Gallery generated by Sphinx-Gallery