Source code for pyriemann.geometry.mean

"""Means of SPD/HPD matrices."""

import warnings

from array_api_compat import array_namespace as get_namespace, device as xpd
from array_api_extra import create_diagonal, nan_to_num
import numpy as np

from ._backend import diag_indices, nanmean, tril_indices
from ._check import check_function, check_init, check_weights
from ._docs import deprecated
from .ajd import ajd_pham
from .base import ctranspose, sqrtm, invsqrtm, logm, expm, powm, _vectorize_nd
from .distance import distance_riemann
from .geodesic import geodesic_riemann, geodesic_thompson
from .tangentspace import log_map_wasserstein, exp_map_wasserstein


[docs] @_vectorize_nd(n_axes=3) def mean_ale(X, *, tol=10e-7, maxiter=50, sample_weight=None, init=None): """AJD-based log-Euclidean (ALE) mean of SPD/HPD matrices. Approximate joint diagonalization (AJD) based log-Euclidean (ALE) mean of SPD/HPD matrices [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=10e-7 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the joint diagonalizer of input matrices is used. Returns ------- M : ndarray, shape (..., n, n) ALE mean. Notes ----- .. versionadded:: 0.2.4 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Approximate Joint Diagonalization and Geometric Mean of Symmetric Positive Definite Matrices <https://arxiv.org/abs/1505.07343>`_ M. Congedo, B. Afsari, A. Barachant, M. Moakher. PLOS ONE, 2015 """ xp = get_namespace(X) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if init is None: B = ajd_pham(X)[0] else: B = check_init(init, n, like=X) eye_n = xp.eye(n, dtype=X.real.dtype, device=xpd(X)) for _ in range(maxiter): J = xp.tensordot( sample_weight, logm(B @ X @ ctranspose(B)), axes=([0], [0]), ) delta = xp.real(xp.linalg.diagonal(expm(J))) B = (xp.abs(delta) ** -.5)[:, None] * B crit = distance_riemann(eye_n, create_diagonal(delta)) if crit <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) J = xp.tensordot( sample_weight, logm(B @ X @ ctranspose(B)), axes=([0], [0]), ) A = xp.linalg.solve(B, eye_n) M = A @ expm(J) @ ctranspose(A) return M
[docs] @_vectorize_nd(n_axes=3) def mean_alm(X, *, tol=1e-14, maxiter=100, sample_weight=None, **kwargs): r"""Ando-Li-Mathias (ALM) mean of SPD/HPD matrices. Ando-Li-Mathias (ALM) mean is computed recursively, generalizing from [1]_: .. math:: \mathbf{M} = X_1^{\frac{1}{2}} (X_1^{-\frac{1}{2}}X_2^{\frac{1}{2}} X_1^{-\frac{1}{2}})^{\frac{1}{2}} X_1^{\frac{1}{2}} and requiring a high number of iterations. Extremely slow, due to the recursive formulation. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=1e-14 Tolerance to stop the gradient descent. maxiter : int, default=100 Maximum number of iterations. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) ALM mean. Notes ----- .. versionadded:: 0.3 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Geometric Means <https://www.sciencedirect.com/science/article/pii/S0024379503008693>`_ T. Ando, C.-K. Li, and R. Mathias. Linear Algebra and its Applications. Volume 385, July 2004, Pages 305-334. """ xp = get_namespace(X) n_matrices, _, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if n_matrices == 1: return X[0] if n_matrices == 2: alpha = sample_weight[1] / sample_weight[0] / 2 M = geodesic_riemann(X[0], X[1], alpha=alpha) return M M = X M_iter = xp.zeros_like(M) for _ in range(maxiter): for h in range(n_matrices): s = np.mod(np.arange(h, h + n_matrices - 1) + 1, n_matrices) M_iter[h] = mean_alm(M[s], sample_weight=sample_weight[s]) norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2) norm_c = xp.linalg.matrix_norm(M[0], ord=2) if norm_iter / norm_c < tol: break M = M_iter M_iter = xp.zeros_like(M) else: warnings.warn("Convergence not reached", stacklevel=2) return xp.mean(M_iter, axis=0)
[docs] @_vectorize_nd(n_axes=3) def mean_bmp(X, *, tol=1e-7, maxiter=50, sample_weight=None): """Bini-Meini-Poloni (BMP) mean of SPD/HPD matrices. Bini-Meini-Poloni (BMP) mean is computed recursively [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=1e-7 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (n, n) BMP mean. Notes ----- .. versionadded:: 0.12 See Also -------- gmean References ---------- .. [1] `An effective matrix geometric mean satisfying the Ando–Li–Mathias properties <https://pages.di.unipi.it/fpoloni/publications/files2/BinMP10_means_final.pdf>`_ D. Bini, B. Meini and F. Poloni. Mathematics of Computation. 2010 """ xp = get_namespace(X) n_matrices, _, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if n_matrices == 1: return X[0] if n_matrices == 2: alpha = sample_weight[1] / sample_weight[0] / 2 M = geodesic_riemann(X[0], X[1], alpha=alpha) return M M = X M_iter = xp.zeros_like(M) for _ in range(maxiter): for h in range(n_matrices): s = np.mod(np.arange(h, h + n_matrices - 1) + 1, n_matrices) M_iter[h] = geodesic_riemann( M[h], mean_bmp(M[s], sample_weight=sample_weight[s]), alpha=(n_matrices - 1) / n_matrices, ) norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2) norm_c = xp.linalg.matrix_norm(M[0], ord=2) if norm_iter / norm_c < tol: break M = M_iter M_iter = xp.zeros_like(M) else: warnings.warn("Convergence not reached") return xp.mean(M_iter, axis=0)
[docs] @_vectorize_nd(n_axes=3) def mean_cheap(X, *, tol=1e-7, maxiter=50, sample_weight=None): """Cheap mean of SPD/HPD matrices. Cheap mean is computed as described in [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=1e-7 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (n, n) Cheap mean. Notes ----- .. versionadded:: 0.12 See Also -------- gmean References ---------- .. [1] `A note on computing matrix geometric means <https://poisson.phc.dm.unipi.it/~maxreen/bruno/pdf/D.%20Bini%20and%20B.%20Iannazzo%20-%20A%20note%20on%20computing%20Matrix%20Geometric%20Means.pdf>`_ D. Bini and B. Iannazzo. Advances in Computational Mathematics. 2011 """ # noqa xp = get_namespace(X) n_matrices, _, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if n_matrices == 1: return X[0] if n_matrices == 2: alpha = sample_weight[1] / sample_weight[0] / 2 M = geodesic_riemann(X[0], X[1], alpha=alpha) return M M = X M_iter = xp.zeros_like(M) for _ in range(maxiter): for h in range(n_matrices): M_ = xp.concatenate((M[:h], M[h+1:]), axis=0) M_iter[h] = M[h] @ expm( xp.sum(logm(xp.linalg.solve(M[h], M_)), axis=0) / n_matrices ) norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2) norm_c = xp.linalg.matrix_norm(M[0], ord=2) if norm_iter / norm_c < tol: break M = M_iter M_iter = xp.zeros_like(M) else: warnings.warn("Convergence not reached") return xp.mean(M_iter, axis=0)
[docs] def mean_chol(X, sample_weight=None, **kwargs): r"""Mean of SPD/HPD matrices according to the Cholesky metric. Cholesky mean :math:`\mathbf{M}` is :math:`\mathbf{M} = \mathbf{L} \mathbf{L}^H`, where :math:`\mathbf{L}` is computed as [1]_: .. math:: \mathbf{L} = \sum_i w_i \text{chol}(\mathbf{X}_i) with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Cholesky mean. Notes ----- .. versionadded:: 0.10 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Non-Euclidean statistics for covariance matrices, with applications to diffusion tensor imaging <https://doi.org/10.1214/09-AOAS249>`_ I.L. Dryden, A. Koloydenko, D. Zhou. Ann Appl Stat, 2009, 3(3), pp. 1102-1123. """ xp = get_namespace(X) L = mean_euclid(xp.linalg.cholesky(X), sample_weight=sample_weight) return L @ ctranspose(L)
[docs] def mean_euclid(X, sample_weight=None, **kwargs): r"""Mean of matrices according to the Euclidean metric. Euclidean mean is also called arithmetic: .. math:: \mathbf{M} = \sum_i w_i \ \mathbf{X}_i with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, m) Set of matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, m) Euclidean mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean """ xp = get_namespace(X) if sample_weight is None: return xp.mean(X, axis=-3) n_matrices = X.shape[-3] sample_weight = check_weights(sample_weight, n_matrices, like=X) return xp.tensordot(sample_weight, X, axes=([0], [-3]))
[docs] def mean_harmonic(X, sample_weight=None, **kwargs): r"""Harmonic mean of invertible matrices. .. math:: \mathbf{M} = \left( \sum_i w_i \ {\mathbf{X}_i}^{-1} \right)^{-1} with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of invertible matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Harmonic mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean """ xp = get_namespace(X) eye_n = xp.eye(X.shape[-1], dtype=X.dtype, device=xpd(X)) X_inv = xp.linalg.solve(X, eye_n) M_inv = mean_euclid(X_inv, sample_weight=sample_weight) M = xp.linalg.solve(M_inv, eye_n) return M
[docs] def mean_kullback_sym(X, sample_weight=None, **kwargs): """Mean of SPD/HPD matrices according to Kullback-Leibler divergence. Symmetrized Kullback-Leibler mean is the geometric mean between the Euclidean and the harmonic means [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Symmetrized Kullback-Leibler mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Symmetric positive-definite matrices: From geometry to applications and visualization <https://link.springer.com/chapter/10.1007/3-540-31272-2_17>`_ M. Moakher and P. Batchelor. Visualization and Processing of Tensor Fields, pp. 285-298, 2006 """ M_euclid = mean_euclid(X, sample_weight=sample_weight) M_harmonic = mean_harmonic(X, sample_weight=sample_weight) M = geodesic_riemann(M_euclid, M_harmonic, 0.5) return M
[docs] def mean_logchol(X, sample_weight=None, **kwargs): r"""Mean of SPD/HPD matrices according to the log-Cholesky metric. Log-Cholesky mean :math:`\mathbf{M}` is :math:`\mathbf{M} = \mathbf{L} \mathbf{L}^H`, where :math:`\mathbf{L}` is computed as Eq(4.3) in [1]_: .. math:: \mathbf{L} = \sum_i w_i \text{lower}(\text{chol}(\mathbf{X}_i)) + \exp \left( \sum_i w_i \log(\text{diag}(\text{chol}(\mathbf{X}_i))) \right) with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Log-Cholesky mean. Notes ----- .. versionadded:: 0.7 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Riemannian geometry of symmetric positive definite matrices via Cholesky decomposition <https://arxiv.org/pdf/1908.09326>`_ Z. Lin. SIAM J Matrix Anal Appl, 2019, 40(4), pp. 1353-1370. """ xp = get_namespace(X) n_matrices, n = X.shape[-3], X.shape[-1] sample_weight = check_weights(sample_weight, n_matrices, like=X) X_chol = xp.linalg.cholesky(X) L = xp.zeros(X.shape[:-3] + X.shape[-2:], dtype=X.dtype, device=xpd(X)) tri0, tri1 = tril_indices(n, -1, like=X) L[..., tri0, tri1] = xp.tensordot( sample_weight, X_chol[..., tri0, tri1], axes=([0], [-2]), ) diag0, diag1 = diag_indices(n, like=X) L[..., diag0, diag1] = xp.exp(xp.tensordot( sample_weight, xp.log(X_chol[..., diag0, diag1]), axes=([0], [-2]), )) return L @ ctranspose(L)
[docs] @_vectorize_nd(n_axes=3) def mean_logdet(X, *, tol=10e-5, maxiter=50, init=None, sample_weight=None): r"""Mean of SPD/HPD matrices according to the log-det metric. Log-det mean is obtained by an iterative procedure where the update is: .. math:: \mathbf{M} = \left( \sum_i w_i \ \left( 0.5 \mathbf{M} + 0.5 \mathbf{X}_i \right)^{-1} \right)^{-1} with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=10e-5 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the weighted Euclidean mean is used. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Log-det mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `A new metric on the manifold of kernel matrices with application to matrix geometric means <https://proceedings.neurips.cc/paper/2012/file/98dce83da57b0395e163467c9dae521b-Paper.pdf>`_ S. Sra. NeurIPS, 2012 """ # noqa xp = get_namespace(X) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if init is None: M = mean_euclid(X, sample_weight=sample_weight) else: M = check_init(init, n, like=X) eye_n = xp.eye(n, dtype=X.dtype, device=xpd(X)) for _ in range(maxiter): X_inv = xp.linalg.solve(0.5 * X + 0.5 * M, eye_n) J = xp.tensordot(sample_weight, X_inv, axes=([0], [0])) Mnew = xp.linalg.solve(J, eye_n) crit = xp.linalg.matrix_norm(Mnew - M, ord="fro") M = Mnew if crit <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) return M
[docs] def mean_logeuclid(X, sample_weight=None, **kwargs): r"""Mean of SPD/HPD matrices according to the log-Euclidean metric. Log-Euclidean mean is [1]_: .. math:: \mathbf{M} = \exp{ \left( \sum_i w_i \ \log{\mathbf{X}_i} \right) } with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Log-Euclidean mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Geometric means in a novel vector space structure on symmetric positive-definite matrices <https://epubs.siam.org/doi/abs/10.1137/050637996?journalCode=sjmael>`_ V. Arsigny, P. Fillard, X. Pennec, and N. Ayache. SIAM Journal on Matrix Analysis and Applications. Volume 29, Issue 1 (2007). """ M = expm(mean_euclid(logm(X), sample_weight=sample_weight)) return M
[docs] @_vectorize_nd(n_axes=3) def mean_power(X, p, *, sample_weight=None, zeta=10e-10, maxiter=100, init=None): r"""Power mean of SPD/HPD matrices. Power mean of order :math:`p` is the solution of [1]_ [2]_: .. math:: \mathbf{M} = \sum_i w_i \ \mathbf{M} \sharp_p \mathbf{X}_i with :math:`\mathbf{w}` being the weights which sum to 1, and :math:`\mathbf{A} \sharp_p \mathbf{B}` being the geodesic between matrices :math:`\mathbf{A}` and :math:`\mathbf{B}`. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. p : float Exponent, in [-1,+1]. For p=0, it returns :func:`pyriemann.geometry.mean.mean_riemann`. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. zeta : float, default=10e-10 Stopping criterion. maxiter : int, default=100 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the weighted power Euclidean mean is used. Returns ------- M : ndarray, shape (..., n, n) Power mean. Notes ----- .. versionadded:: 0.3 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Matrix Power means and the Karcher mean <https://www.sciencedirect.com/science/article/pii/S0022123611004101>`_ Y. Lim and M. Palfia. Journal of Functional Analysis, Volume 262, Issue 4, 15 February 2012, Pages 1498-1514. .. [2] `Fixed Point Algorithms for Estimating Power Means of Positive Definite Matrices <https://hal.archives-ouvertes.fr/hal-01500514>`_ M. Congedo, A. Barachant, and R. Bhatia. IEEE Transactions on Signal Processing, Volume 65, Issue 9, pp.2211-2220, May 2017 """ xp = get_namespace(X) if not isinstance(p, (int, float)): raise ValueError(f"Exponent p must be a scalar (Got {type(p)})") if p < -1 or 1 < p: raise ValueError("Exponent p must be in [-1,+1]") if p == 1: return mean_euclid(X, sample_weight=sample_weight) if p == 0: return mean_riemann( X, sample_weight=sample_weight, init=init, tol=zeta, maxiter=maxiter, ) if p == -1: return mean_harmonic(X, sample_weight=sample_weight) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) phi = 0.375 / np.abs(p) if init is None: G = powm( xp.tensordot(sample_weight, powm(X, p), axes=([0], [0])), 1/p, ) else: G = check_init(init, n, like=X) if p > 0: K = invsqrtm(G) else: K = sqrtm(G) eye_n, sqrt_n = xp.eye(n, dtype=X.dtype, device=xpd(X)), np.sqrt(n) for _ in range(maxiter): H = xp.tensordot( sample_weight, powm(K @ powm(X, np.sign(p)) @ ctranspose(K), np.abs(p)), axes=([0], [0]), ) K = powm(H, -phi) @ K crit = xp.linalg.matrix_norm(H - eye_n) / sqrt_n if crit <= zeta: break else: warnings.warn("Convergence not reached", stacklevel=2) M = ctranspose(K) @ K if p > 0: M = xp.linalg.solve(M, eye_n) return M
[docs] def mean_poweuclid(X, p, *, sample_weight=None, **kwargs): r"""Mean of SPD/HPD matrices according to the power Euclidean metric. Power Euclidean mean of order :math:`p` is [1]_: .. math:: \mathbf{M} = \left( \sum_i w_i \ \mathbf{X}_i^p \right)^{1/p} with :math:`w` being the weights which sum to 1. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. p : float Exponent. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Power Euclidean mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Power Euclidean metrics for covariance matrices with application to diffusion tensor imaging <https://arxiv.org/abs/1009.3045>`_ I.L. Dryden, X. Pennec, & J.M. Peyrat. arXiv, 2010. """ if not isinstance(p, (int, float)): raise ValueError(f"Exponent p must be a scalar (Got {type(p)})") if p == 1: return mean_euclid(X, sample_weight=sample_weight) if p == 0: return mean_logeuclid(X, sample_weight=sample_weight) if p == -1: return mean_harmonic(X, sample_weight=sample_weight) M = powm(mean_euclid(powm(X, p), sample_weight=sample_weight), 1/p) return M
[docs] @_vectorize_nd(n_axes=3) def mean_riemann(X, *, tol=10e-9, maxiter=50, init=None, sample_weight=None): r"""Mean of SPD/HPD matrices according to the Riemannian metric. The affine-invariant Riemannian mean minimizes the sum of squared affine-invariant Riemannian distances :math:`d_R` to all SPD/HPD matrices [1]_: .. math:: \arg \min_{\mathbf{M}} \sum_i w_i \ d_R (\mathbf{M}, \mathbf{X}_i)^2 with :math:`w` being the weights which sum to 1. For the convergence, the implemented stopping criterion comes from [2]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=10e-9 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the weighted Euclidean mean is used. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Affine-invariant Riemannian mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Principal geodesic analysis for the study of nonlinear statistics of shape <https://ieeexplore.ieee.org/document/1318725>`_ P.T. Fletcher, C. Lu, S. M. Pizer, S. Joshi. IEEE Trans Med Imaging, 2004, 23(8), pp. 995-1005 .. [2] `Approximate Joint Diagonalization and Geometric Mean of Symmetric Positive Definite Matrices <https://arxiv.org/abs/1505.07343>`_ M. Congedo, B. Afsari, A. Barachant, M. Moakher. PLOS ONE, 2015 """ xp = get_namespace(X) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if init is None: M = mean_euclid(X, sample_weight=sample_weight) else: M = check_init(init, n, like=X) nu = 1.0 tau = np.finfo(np.float64).max for _ in range(maxiter): M12, Mm12 = sqrtm(M), invsqrtm(M) J = xp.tensordot( sample_weight, logm(Mm12 @ X @ Mm12), axes=([0], [0]), ) M = M12 @ expm(nu * J) @ M12 crit = xp.linalg.matrix_norm(J, ord="fro") h = nu * crit if h < tau: nu = 0.95 * nu tau = h else: nu = 0.5 * nu if crit <= tol or nu <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) return M
[docs] @_vectorize_nd(n_axes=3) def mean_thompson(X, *, tol=1e-6, maxiter=50, init=None, sample_weight=None): """Mean of SPD/HPD matrices according to the Thompson metric. The Thompson mean of SPD/HPD matrices is described in [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=1e-6 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the weighted Euclidean mean is used. sample_weight : None Not used. Returns ------- M : ndarray, shape (..., n, n) Thompson mean. Notes ----- .. versionadded:: 0.10 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Differential geometry with extreme eigenvalues in the positive semidefinite cone <https://arxiv.org/pdf/2304.07347>`_ C. Mostajeran, N. Da Costa, G. Van Goffrier and R. Sepulchre. SIAM Journal on Matrix Analysis and Applications, 2024 """ xp = get_namespace(X) n_matrices, n, _ = X.shape if init is None: M = mean_euclid(X) else: M = check_init(init, n, like=X) for i in range(maxiter): Mnew = geodesic_thompson(M, X[i % n_matrices], 1 / (i + 2)) crit = xp.linalg.matrix_norm(Mnew - M, ord="fro") M = Mnew if crit <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) return M
[docs] @_vectorize_nd(n_axes=3) def mean_wasserstein(X, tol=10e-9, maxiter=50, init=None, sample_weight=None): r"""Mean of SPD/HPD matrices according to the Wasserstein metric. Wasserstein mean [1]_ is implemented as the inductive mean [2]_, adapted to the same convergence criterion as the Riemannian mean. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. tol : float, default=10e-9 Tolerance to stop the gradient descent. maxiter : int, default=50 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None the weighted Euclidean mean is used. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Wasserstein mean. Notes ----- .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- gmean References ---------- .. [1] `Barycenters in the Wasserstein space <https://hal.science/hal-00637399/file/AC_bary_revis.pdf>`_ M. Agueh and G. Carlier. SIAM Journal on Mathematical Analysis, 2011 .. [2] `Barycenter Estimation of Positive Semi-Definite Matrices with Bures-Wasserstein Distance <https://arxiv.org/abs/2302.14618>`_ J. Zheng, H. Huang, Y. Yi, Y. Li, S.-C. Lin, ArXiv, 2023 """ xp = get_namespace(X) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) if init is None: init = mean_euclid(X, sample_weight=sample_weight) else: init = check_init(init, n, like=X) M = init for _ in range(maxiter): X_ts = log_map_wasserstein(X, M) J = xp.tensordot(sample_weight, X_ts, axes=([0], [0])) M = exp_map_wasserstein(J, M) crit = xp.linalg.matrix_norm(J) if crit <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) return M
############################################################################### mean_functions = { "ale": mean_ale, "alm": mean_alm, "bmp": mean_bmp, "cheap": mean_cheap, "chol": mean_chol, "euclid": mean_euclid, "harmonic": mean_harmonic, "kullback_sym": mean_kullback_sym, "logdet": mean_logdet, "logchol": mean_logchol, "logeuclid": mean_logeuclid, "power": mean_power, "poweuclid": mean_poweuclid, "riemann": mean_riemann, "thompson": mean_thompson, "wasserstein": mean_wasserstein, }
[docs] def gmean(X, *args, metric="riemann", sample_weight=None, **kwargs): """Mean of matrices according to a metric. Compute the mean of a set of matrices according to a metric [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of matrices. *args : tuple The arguments passed to the sub function. metric : string | callable, default="riemann" Metric for mean estimation, can be: "ale", "alm", "bmp", "cheap", "chol", "euclid", "harmonic", "kullback_sym", "logchol", "logdet", "logeuclid", "riemann", "thompson", "wasserstein", or a callable function. If an exponent is given in args, it can be "power", "poweuclid". sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. **kwargs : dict The keyword arguments passed to the sub function. Returns ------- M : ndarray, shape (..., n, n) Mean of matrices. Notes ----- .. versionchanged:: 0.11 Rename mean_covariance into gmean. References ---------- .. [1] `Review of Riemannian distances and divergences, applied to SSVEP-based BCI <https://hal.archives-ouvertes.fr/LISV/hal-03015762v1>`_ S. Chevallier, E. K. Kalunga, Q. Barthélemy, E. Monacelli. Neuroinformatics, Springer, 2021, 19 (1), pp.93-106 """ mean_function = check_function(metric, mean_functions) M = mean_function( X, *args, sample_weight=sample_weight, **kwargs, ) return M
@deprecated( "mean_covariance() is deprecated and will be removed in 0.13.0; " "please use gmean()." ) def mean_covariance(X, *args, metric="riemann", sample_weight=None, **kwargs): return gmean(X, *args, metric="riemann", sample_weight=None, **kwargs) ############################################################################### def _get_mask_from_nan(X): xp = get_namespace(X) isnan = xp.isnan(X) nan_col = xp.all(isnan, axis=0) nan_row = xp.all(isnan, axis=1) if not bool(xp.all(nan_col == nan_row)): raise ValueError("NaN values are not symmetric.") keep_col = ~nan_col maskedX = X[keep_col, :][:, keep_col] if bool(xp.any(xp.isnan(maskedX))): raise ValueError("NaN values must fill rows and columns.") eye = xp.eye(X.shape[0], dtype=X.dtype, device=xpd(X)) return eye[:, keep_col] def _get_masks_from_nan(X): return [_get_mask_from_nan(x) for x in X] def _apply_masks(X, masks): return [m.mT @ x @ m for x, m in zip(X, masks)]
[docs] @_vectorize_nd(n_axes=3) def maskedmean_riemann(X, masks, *, tol=10e-9, maxiter=100, init=None, sample_weight=None): """Masked Riemannian mean of SPD/HPD matrices. Given masks defined as semi-orthogonal matrices, the masked Riemannian mean of SPD/HPD matrices is obtained with a gradient descent minimizing the sum of affine-invariant Riemannian distances between masked SPD/HPD matrices and the masked mean [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices. masks : list of n_matrices ndarray of shape (n, n_i), \ with different n_i, such that n_i <= n Masks, defined as semi-orthogonal matrices. See [1]_. tol : float, default=10e-9 Tolerance to stop the gradient descent. maxiter : int, default=100 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, the Identity is used. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Masked Riemannian mean. Notes ----- .. versionadded:: 0.3 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- mean_riemann gmean References ---------- .. [1] `Geodesically-convex optimization for averaging partially observed covariance matrices <https://hal.archives-ouvertes.fr/hal-02984423>`_ F. Yger, S. Chevallier, Q. Barthélemy, and S. Sra. Asian Conference on Machine Learning (ACML), Nov 2020, Bangkok, Thailand. pp.417 - 432. """ xp = get_namespace(X) n_matrices, n, _ = X.shape sample_weight = check_weights(sample_weight, n_matrices, like=X) maskedX = _apply_masks(X, masks) if init is None: M = xp.eye(n, dtype=X.dtype, device=xpd(X)) else: M = check_init(init, n, like=X) nu = 1.0 tau = np.finfo(np.float64).max for _ in range(maxiter): maskedM = _apply_masks(xp.broadcast_to(M, (n_matrices, n, n)), masks) J = xp.zeros((n, n), dtype=X.dtype, device=xpd(X)) for i in range(n_matrices): M12, Mm12 = sqrtm(maskedM[i]), invsqrtm(maskedM[i]) tmp = M12 @ logm(Mm12 @ maskedX[i] @ Mm12) @ M12 J += sample_weight[i] * masks[i] @ tmp @ masks[i].T M12, Mm12 = sqrtm(M), invsqrtm(M) M = M12 @ expm(Mm12 @ (nu * J) @ Mm12) @ M12 crit = xp.linalg.matrix_norm(J, ord="fro") h = nu * crit if h < tau: nu = 0.95 * nu tau = h else: nu = 0.5 * nu if crit <= tol or nu <= tol: break else: warnings.warn("Convergence not reached", stacklevel=2) return M
[docs] @_vectorize_nd(n_axes=3) def nanmean_riemann(X, tol=10e-9, maxiter=100, init=None, sample_weight=None): """Riemannian NaN-mean of SPD/HPD matrices. The Riemannian NaN-mean is the masked Riemannian mean applied to SPD/HPD matrices potentially corrupted by symmetric NaN values [1]_. Parameters ---------- X : ndarray, shape (..., n_matrices, n, n) Set of SPD/HPD matrices, corrupted by symmetric NaN values [1]_. tol : float, default=10e-9 Tolerance to stop the gradient descent. maxiter : int, default=100 Maximum number of iterations. init : None | ndarray, shape (n, n), default=None A SPD/HPD matrix used to initialize the gradient descent. If None, a regularized Euclidean NaN-mean is used. sample_weight : None | ndarray, shape (n_matrices,), default=None Weights for each matrix. If None, it uses equal weights. Returns ------- M : ndarray, shape (..., n, n) Riemannian NaN-mean. Notes ----- .. versionadded:: 0.3 .. versionchanged:: 0.12 Add support for NumPy and PyTorch. See Also -------- maskedmean_riemann gmean References ---------- .. [1] `Geodesically-convex optimization for averaging partially observed covariance matrices <https://hal.archives-ouvertes.fr/hal-02984423>`_ F. Yger, S. Chevallier, Q. Barthélemy, and S. Sra. Asian Conference on Machine Learning (ACML), Nov 2020, Bangkok, Thailand. pp.417 - 432. """ xp = get_namespace(X) _, n, _ = X.shape if init is None: eye = xp.eye(n, dtype=X.dtype, device=xpd(X)) init = nanmean(X, axis=0) + 1e-6 * eye else: init = check_init(init, n, like=X) M = maskedmean_riemann( nan_to_num(X), # avoid nan contamination in matmul _get_masks_from_nan(X), tol=tol, maxiter=maxiter, init=init, sample_weight=sample_weight, ) return M