Source code for pyriemann.geometry.mean

"""Means of SPD/HPD matrices."""

import warnings

from array_api_compat import array_namespace as get_namespace, device as xpd
from array_api_extra import create_diagonal, nan_to_num
import numpy as np

from ._backend import diag_indices, nanmean, tril_indices
from ._check import check_function, check_init, check_weights
from ._docs import deprecated
from .ajd import ajd_pham
from .base import ctranspose, sqrtm, invsqrtm, logm, expm, powm, _vectorize_nd
from .distance import distance_riemann
from .geodesic import geodesic_riemann, geodesic_thompson
from .tangentspace import log_map_wasserstein, exp_map_wasserstein



[docs]
@_vectorize_nd(n_axes=3)
def mean_ale(X, *, tol=10e-7, maxiter=50, sample_weight=None, init=None):
    """AJD-based log-Euclidean (ALE) mean of SPD/HPD matrices.

    Approximate joint diagonalization (AJD) based log-Euclidean (ALE) mean of
    SPD/HPD matrices [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=10e-7
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the joint diagonalizer of input matrices is used.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        ALE mean.

    Notes
    -----
    .. versionadded:: 0.2.4
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Approximate Joint Diagonalization and Geometric Mean of Symmetric
        Positive Definite Matrices
        <https://arxiv.org/abs/1505.07343>`_
        M. Congedo, B. Afsari, A. Barachant, M. Moakher. PLOS ONE, 2015
    """
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    if init is None:
        B = ajd_pham(X)[0]
    else:
        B = check_init(init, n, like=X)

    eye_n = xp.eye(n, dtype=X.real.dtype, device=xpd(X))
    for _ in range(maxiter):
        J = xp.tensordot(
            sample_weight, logm(B @ X @ ctranspose(B)), axes=([0], [0]),
        )
        delta = xp.real(xp.linalg.diagonal(expm(J)))
        B = (xp.abs(delta) ** -.5)[:, None] * B

        crit = distance_riemann(eye_n, create_diagonal(delta))
        if crit <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    J = xp.tensordot(
        sample_weight, logm(B @ X @ ctranspose(B)), axes=([0], [0]),
    )
    A = xp.linalg.solve(B, eye_n)
    M = A @ expm(J) @ ctranspose(A)
    return M




[docs]
@_vectorize_nd(n_axes=3)
def mean_alm(X, *, tol=1e-14, maxiter=100, sample_weight=None, **kwargs):
    r"""Ando-Li-Mathias (ALM) mean of SPD/HPD matrices.

    Ando-Li-Mathias (ALM) mean is computed recursively, generalizing from [1]_:

    .. math::
        \mathbf{M} = X_1^{\frac{1}{2}} (X_1^{-\frac{1}{2}}X_2^{\frac{1}{2}}
                     X_1^{-\frac{1}{2}})^{\frac{1}{2}} X_1^{\frac{1}{2}}

    and requiring a high number of iterations.
    Extremely slow, due to the recursive formulation.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=1e-14
        Tolerance to stop the gradient descent.
    maxiter : int, default=100
        Maximum number of iterations.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        ALM mean.

    Notes
    -----
    .. versionadded:: 0.3
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Geometric Means
        <https://www.sciencedirect.com/science/article/pii/S0024379503008693>`_
        T. Ando, C.-K. Li, and R. Mathias. Linear Algebra and its Applications.
        Volume 385, July 2004, Pages 305-334.
    """
    xp = get_namespace(X)
    n_matrices, _, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)

    if n_matrices == 1:
        return X[0]

    if n_matrices == 2:
        alpha = sample_weight[1] / sample_weight[0] / 2
        M = geodesic_riemann(X[0], X[1], alpha=alpha)
        return M

    M = X
    M_iter = xp.zeros_like(M)
    for _ in range(maxiter):
        for h in range(n_matrices):
            s = np.mod(np.arange(h, h + n_matrices - 1) + 1, n_matrices)
            M_iter[h] = mean_alm(M[s], sample_weight=sample_weight[s])

        norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2)
        norm_c = xp.linalg.matrix_norm(M[0], ord=2)
        if norm_iter / norm_c < tol:
            break
        M = M_iter
        M_iter = xp.zeros_like(M)
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return xp.mean(M_iter, axis=0)




[docs]
@_vectorize_nd(n_axes=3)
def mean_bmp(X, *, tol=1e-7, maxiter=50, sample_weight=None):
    """Bini-Meini-Poloni (BMP) mean of SPD/HPD matrices.

    Bini-Meini-Poloni (BMP) mean is computed recursively [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=1e-7
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (n, n)
        BMP mean.

    Notes
    -----
    .. versionadded:: 0.12

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `An effective matrix geometric mean satisfying the Ando–Li–Mathias
        properties
        <https://pages.di.unipi.it/fpoloni/publications/files2/BinMP10_means_final.pdf>`_
        D. Bini, B. Meini and F. Poloni. Mathematics of Computation. 2010
    """
    xp = get_namespace(X)
    n_matrices, _, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)

    if n_matrices == 1:
        return X[0]

    if n_matrices == 2:
        alpha = sample_weight[1] / sample_weight[0] / 2
        M = geodesic_riemann(X[0], X[1], alpha=alpha)
        return M

    M = X
    M_iter = xp.zeros_like(M)
    for _ in range(maxiter):
        for h in range(n_matrices):
            s = np.mod(np.arange(h, h + n_matrices - 1) + 1, n_matrices)
            M_iter[h] = geodesic_riemann(
                M[h],
                mean_bmp(M[s], sample_weight=sample_weight[s]),
                alpha=(n_matrices - 1) / n_matrices,
            )

        norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2)
        norm_c = xp.linalg.matrix_norm(M[0], ord=2)
        if norm_iter / norm_c < tol:
            break
        M = M_iter
        M_iter = xp.zeros_like(M)
    else:
        warnings.warn("Convergence not reached")

    return xp.mean(M_iter, axis=0)




[docs]
@_vectorize_nd(n_axes=3)
def mean_cheap(X, *, tol=1e-7, maxiter=50, sample_weight=None):
    """Cheap mean of SPD/HPD matrices.

    Cheap mean is computed as described in [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=1e-7
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (n, n)
        Cheap mean.

    Notes
    -----
    .. versionadded:: 0.12

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `A note on computing matrix geometric means
        <https://poisson.phc.dm.unipi.it/~maxreen/bruno/pdf/D.%20Bini%20and%20B.%20Iannazzo%20-%20A%20note%20on%20computing%20Matrix%20Geometric%20Means.pdf>`_
        D. Bini and B. Iannazzo. Advances in Computational Mathematics. 2011
    """  # noqa
    xp = get_namespace(X)
    n_matrices, _, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)

    if n_matrices == 1:
        return X[0]

    if n_matrices == 2:
        alpha = sample_weight[1] / sample_weight[0] / 2
        M = geodesic_riemann(X[0], X[1], alpha=alpha)
        return M

    M = X
    M_iter = xp.zeros_like(M)
    for _ in range(maxiter):
        for h in range(n_matrices):
            M_ = xp.concatenate((M[:h], M[h+1:]), axis=0)
            M_iter[h] = M[h] @ expm(
                xp.sum(logm(xp.linalg.solve(M[h], M_)), axis=0) / n_matrices
            )

        norm_iter = xp.linalg.matrix_norm(M_iter[0] - M[0], ord=2)
        norm_c = xp.linalg.matrix_norm(M[0], ord=2)
        if norm_iter / norm_c < tol:
            break
        M = M_iter
        M_iter = xp.zeros_like(M)
    else:
        warnings.warn("Convergence not reached")

    return xp.mean(M_iter, axis=0)




[docs]
def mean_chol(X, sample_weight=None, **kwargs):
    r"""Mean of SPD/HPD matrices according to the Cholesky metric.

    Cholesky mean :math:`\mathbf{M}` is
    :math:`\mathbf{M} = \mathbf{L} \mathbf{L}^H`,
    where :math:`\mathbf{L}` is computed as [1]_:

    .. math::
        \mathbf{L} = \sum_i w_i \text{chol}(\mathbf{X}_i)

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Cholesky mean.

    Notes
    -----
    .. versionadded:: 0.10
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Non-Euclidean statistics for covariance matrices, with applications
        to diffusion tensor imaging
        <https://doi.org/10.1214/09-AOAS249>`_
        I.L. Dryden, A. Koloydenko, D. Zhou.
        Ann Appl Stat, 2009, 3(3), pp. 1102-1123.
    """
    xp = get_namespace(X)
    L = mean_euclid(xp.linalg.cholesky(X), sample_weight=sample_weight)
    return L @ ctranspose(L)




[docs]
def mean_euclid(X, sample_weight=None, **kwargs):
    r"""Mean of matrices according to the Euclidean metric.

    Euclidean mean is also called arithmetic:

    .. math::
        \mathbf{M} = \sum_i w_i \ \mathbf{X}_i

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, m)
        Set of matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, m)
        Euclidean mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean
    """
    xp = get_namespace(X)
    if sample_weight is None:
        return xp.mean(X, axis=-3)
    n_matrices = X.shape[-3]
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    return xp.tensordot(sample_weight, X, axes=([0], [-3]))




[docs]
def mean_harmonic(X, sample_weight=None, **kwargs):
    r"""Harmonic mean of invertible matrices.

    .. math::
        \mathbf{M} = \left( \sum_i w_i \ {\mathbf{X}_i}^{-1} \right)^{-1}

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of invertible matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Harmonic mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean
    """
    xp = get_namespace(X)
    eye_n = xp.eye(X.shape[-1], dtype=X.dtype, device=xpd(X))
    X_inv = xp.linalg.solve(X, eye_n)
    M_inv = mean_euclid(X_inv, sample_weight=sample_weight)
    M = xp.linalg.solve(M_inv, eye_n)
    return M




[docs]
def mean_kullback_sym(X, sample_weight=None, **kwargs):
    """Mean of SPD/HPD matrices according to Kullback-Leibler divergence.

    Symmetrized Kullback-Leibler mean is the geometric mean between the
    Euclidean and the harmonic means [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Symmetrized Kullback-Leibler mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Symmetric positive-definite matrices: From geometry to applications
        and visualization
        <https://link.springer.com/chapter/10.1007/3-540-31272-2_17>`_
        M. Moakher and P. Batchelor. Visualization and Processing of Tensor
        Fields, pp. 285-298, 2006
    """
    M_euclid = mean_euclid(X, sample_weight=sample_weight)
    M_harmonic = mean_harmonic(X, sample_weight=sample_weight)
    M = geodesic_riemann(M_euclid, M_harmonic, 0.5)
    return M




[docs]
def mean_logchol(X, sample_weight=None, **kwargs):
    r"""Mean of SPD/HPD matrices according to the log-Cholesky metric.

    Log-Cholesky mean :math:`\mathbf{M}` is
    :math:`\mathbf{M} = \mathbf{L} \mathbf{L}^H`,
    where :math:`\mathbf{L}` is computed as Eq(4.3) in [1]_:

    .. math::
        \mathbf{L} = \sum_i w_i \text{lower}(\text{chol}(\mathbf{X}_i)) +
        \exp \left( \sum_i w_i \log(\text{diag}(\text{chol}(\mathbf{X}_i)))
        \right)

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Log-Cholesky mean.

    Notes
    -----
    .. versionadded:: 0.7
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Riemannian geometry of symmetric positive definite matrices via
        Cholesky decomposition
        <https://arxiv.org/pdf/1908.09326>`_
        Z. Lin. SIAM J Matrix Anal Appl, 2019, 40(4), pp. 1353-1370.
    """
    xp = get_namespace(X)
    n_matrices, n = X.shape[-3], X.shape[-1]
    sample_weight = check_weights(sample_weight, n_matrices, like=X)

    X_chol = xp.linalg.cholesky(X)
    L = xp.zeros(X.shape[:-3] + X.shape[-2:], dtype=X.dtype, device=xpd(X))

    tri0, tri1 = tril_indices(n, -1, like=X)
    L[..., tri0, tri1] = xp.tensordot(
        sample_weight, X_chol[..., tri0, tri1], axes=([0], [-2]),
    )

    diag0, diag1 = diag_indices(n, like=X)
    L[..., diag0, diag1] = xp.exp(xp.tensordot(
        sample_weight, xp.log(X_chol[..., diag0, diag1]), axes=([0], [-2]),
    ))

    return L @ ctranspose(L)




[docs]
@_vectorize_nd(n_axes=3)
def mean_logdet(X, *, tol=10e-5, maxiter=50, init=None, sample_weight=None):
    r"""Mean of SPD/HPD matrices according to the log-det metric.

    Log-det mean is obtained by an iterative procedure where the update is:

    .. math::
        \mathbf{M} = \left( \sum_i w_i \ \left( 0.5 \mathbf{M}
                     + 0.5 \mathbf{X}_i \right)^{-1} \right)^{-1}

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=10e-5
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the weighted Euclidean mean is used.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Log-det mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `A new metric on the manifold of kernel matrices with application to
        matrix geometric means
        <https://proceedings.neurips.cc/paper/2012/file/98dce83da57b0395e163467c9dae521b-Paper.pdf>`_
        S. Sra. NeurIPS, 2012
    """  # noqa
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    if init is None:
        M = mean_euclid(X, sample_weight=sample_weight)
    else:
        M = check_init(init, n, like=X)
    eye_n = xp.eye(n, dtype=X.dtype, device=xpd(X))

    for _ in range(maxiter):
        X_inv = xp.linalg.solve(0.5 * X + 0.5 * M, eye_n)
        J = xp.tensordot(sample_weight, X_inv, axes=([0], [0]))
        Mnew = xp.linalg.solve(J, eye_n)

        crit = xp.linalg.matrix_norm(Mnew - M, ord="fro")
        M = Mnew
        if crit <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return M




[docs]
def mean_logeuclid(X, sample_weight=None, **kwargs):
    r"""Mean of SPD/HPD matrices according to the log-Euclidean metric.

    Log-Euclidean mean is [1]_:

    .. math::
        \mathbf{M} = \exp{ \left( \sum_i w_i \ \log{\mathbf{X}_i} \right) }

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Log-Euclidean mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Geometric means in a novel vector space structure on symmetric
        positive-definite matrices
        <https://epubs.siam.org/doi/abs/10.1137/050637996?journalCode=sjmael>`_
        V. Arsigny, P. Fillard, X. Pennec, and N. Ayache. SIAM Journal on
        Matrix Analysis and Applications. Volume 29, Issue 1 (2007).
    """
    M = expm(mean_euclid(logm(X), sample_weight=sample_weight))
    return M




[docs]
@_vectorize_nd(n_axes=3)
def mean_power(X, p, *, sample_weight=None, zeta=10e-10, maxiter=100,
               init=None):
    r"""Power mean of SPD/HPD matrices.

    Power mean of order :math:`p` is the solution of [1]_ [2]_:

    .. math::
        \mathbf{M} = \sum_i w_i \ \mathbf{M} \sharp_p \mathbf{X}_i

    with :math:`\mathbf{w}` being the weights which sum to 1, and
    :math:`\mathbf{A} \sharp_p \mathbf{B}` being the geodesic between
    matrices :math:`\mathbf{A}` and :math:`\mathbf{B}`.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    p : float
        Exponent, in [-1,+1]. For p=0, it returns
        :func:`pyriemann.geometry.mean.mean_riemann`.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.
    zeta : float, default=10e-10
        Stopping criterion.
    maxiter : int, default=100
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the weighted power Euclidean mean is used.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Power mean.

    Notes
    -----
    .. versionadded:: 0.3
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Matrix Power means and the Karcher mean
        <https://www.sciencedirect.com/science/article/pii/S0022123611004101>`_
        Y. Lim and M. Palfia. Journal of Functional Analysis, Volume 262,
        Issue 4, 15 February 2012, Pages 1498-1514.
    .. [2] `Fixed Point Algorithms for Estimating Power Means of Positive
        Definite Matrices
        <https://hal.archives-ouvertes.fr/hal-01500514>`_
        M. Congedo, A. Barachant, and R. Bhatia. IEEE Transactions on Signal
        Processing, Volume 65, Issue 9, pp.2211-2220, May 2017
    """
    xp = get_namespace(X)
    if not isinstance(p, (int, float)):
        raise ValueError(f"Exponent p must be a scalar (Got {type(p)})")
    if p < -1 or 1 < p:
        raise ValueError("Exponent p must be in [-1,+1]")

    if p == 1:
        return mean_euclid(X, sample_weight=sample_weight)
    if p == 0:
        return mean_riemann(
            X,
            sample_weight=sample_weight,
            init=init,
            tol=zeta,
            maxiter=maxiter,
        )
    if p == -1:
        return mean_harmonic(X, sample_weight=sample_weight)

    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    phi = 0.375 / np.abs(p)
    if init is None:
        G = powm(
            xp.tensordot(sample_weight, powm(X, p), axes=([0], [0])), 1/p,
        )
    else:
        G = check_init(init, n, like=X)
    if p > 0:
        K = invsqrtm(G)
    else:
        K = sqrtm(G)

    eye_n, sqrt_n = xp.eye(n, dtype=X.dtype, device=xpd(X)), np.sqrt(n)
    for _ in range(maxiter):
        H = xp.tensordot(
            sample_weight,
            powm(K @ powm(X, np.sign(p)) @ ctranspose(K), np.abs(p)),
            axes=([0], [0]),
        )
        K = powm(H, -phi) @ K

        crit = xp.linalg.matrix_norm(H - eye_n) / sqrt_n
        if crit <= zeta:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    M = ctranspose(K) @ K
    if p > 0:
        M = xp.linalg.solve(M, eye_n)

    return M




[docs]
def mean_poweuclid(X, p, *, sample_weight=None, **kwargs):
    r"""Mean of SPD/HPD matrices according to the power Euclidean metric.

    Power Euclidean mean of order :math:`p` is [1]_:

    .. math::
        \mathbf{M} = \left( \sum_i w_i \ \mathbf{X}_i^p \right)^{1/p}

    with :math:`w` being the weights which sum to 1.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    p : float
        Exponent.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Power Euclidean mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Power Euclidean metrics for covariance matrices with application to
        diffusion tensor imaging
        <https://arxiv.org/abs/1009.3045>`_
        I.L. Dryden, X. Pennec, & J.M. Peyrat. arXiv, 2010.
    """
    if not isinstance(p, (int, float)):
        raise ValueError(f"Exponent p must be a scalar (Got {type(p)})")

    if p == 1:
        return mean_euclid(X, sample_weight=sample_weight)
    if p == 0:
        return mean_logeuclid(X, sample_weight=sample_weight)
    if p == -1:
        return mean_harmonic(X, sample_weight=sample_weight)

    M = powm(mean_euclid(powm(X, p), sample_weight=sample_weight), 1/p)
    return M




[docs]
@_vectorize_nd(n_axes=3)
def mean_riemann(X, *, tol=10e-9, maxiter=50, init=None, sample_weight=None):
    r"""Mean of SPD/HPD matrices according to the Riemannian metric.

    The affine-invariant Riemannian mean minimizes the sum of squared
    affine-invariant Riemannian distances :math:`d_R` to all SPD/HPD matrices
    [1]_:

    .. math::
         \arg \min_{\mathbf{M}} \sum_i w_i \ d_R (\mathbf{M}, \mathbf{X}_i)^2

    with :math:`w` being the weights which sum to 1.

    For the convergence, the implemented stopping criterion comes from [2]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=10e-9
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the weighted Euclidean mean is used.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Affine-invariant Riemannian mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Principal geodesic analysis for the study of nonlinear statistics
        of shape
        <https://ieeexplore.ieee.org/document/1318725>`_
        P.T. Fletcher, C. Lu, S. M. Pizer, S. Joshi.
        IEEE Trans Med Imaging, 2004, 23(8), pp. 995-1005
    .. [2] `Approximate Joint Diagonalization and Geometric Mean of Symmetric
        Positive Definite Matrices
        <https://arxiv.org/abs/1505.07343>`_
        M. Congedo, B. Afsari, A. Barachant, M. Moakher. PLOS ONE, 2015
    """
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    if init is None:
        M = mean_euclid(X, sample_weight=sample_weight)
    else:
        M = check_init(init, n, like=X)

    nu = 1.0
    tau = np.finfo(np.float64).max
    for _ in range(maxiter):
        M12, Mm12 = sqrtm(M), invsqrtm(M)
        J = xp.tensordot(
            sample_weight, logm(Mm12 @ X @ Mm12), axes=([0], [0]),
        )
        M = M12 @ expm(nu * J) @ M12

        crit = xp.linalg.matrix_norm(J, ord="fro")
        h = nu * crit
        if h < tau:
            nu = 0.95 * nu
            tau = h
        else:
            nu = 0.5 * nu
        if crit <= tol or nu <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return M




[docs]
@_vectorize_nd(n_axes=3)
def mean_thompson(X, *, tol=1e-6, maxiter=50, init=None, sample_weight=None):
    """Mean of SPD/HPD matrices according to the Thompson metric.

    The Thompson mean of SPD/HPD matrices is described in [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=1e-6
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the weighted Euclidean mean is used.
    sample_weight : None
        Not used.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Thompson mean.

    Notes
    -----
    .. versionadded:: 0.10
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Differential geometry with extreme eigenvalues in the positive
        semidefinite cone
        <https://arxiv.org/pdf/2304.07347>`_
        C. Mostajeran, N. Da Costa, G. Van Goffrier and R. Sepulchre.
        SIAM Journal on Matrix Analysis and Applications, 2024
    """
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    if init is None:
        M = mean_euclid(X)
    else:
        M = check_init(init, n, like=X)

    for i in range(maxiter):
        Mnew = geodesic_thompson(M, X[i % n_matrices], 1 / (i + 2))

        crit = xp.linalg.matrix_norm(Mnew - M, ord="fro")
        M = Mnew
        if crit <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return M




[docs]
@_vectorize_nd(n_axes=3)
def mean_wasserstein(X, tol=10e-9, maxiter=50, init=None, sample_weight=None):
    r"""Mean of SPD/HPD matrices according to the Wasserstein metric.

    Wasserstein mean [1]_ is implemented as the inductive mean [2]_,
    adapted to the same convergence criterion as the Riemannian mean.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    tol : float, default=10e-9
        Tolerance to stop the gradient descent.
    maxiter : int, default=50
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None the weighted Euclidean mean is used.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Wasserstein mean.

    Notes
    -----
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    gmean

    References
    ----------
    .. [1] `Barycenters in the Wasserstein space
        <https://hal.science/hal-00637399/file/AC_bary_revis.pdf>`_
        M. Agueh and G. Carlier. SIAM Journal on Mathematical Analysis, 2011
    .. [2] `Barycenter Estimation of Positive Semi-Definite Matrices with
        Bures-Wasserstein Distance
        <https://arxiv.org/abs/2302.14618>`_
        J. Zheng, H. Huang, Y. Yi, Y. Li, S.-C. Lin, ArXiv, 2023
    """
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    if init is None:
        init = mean_euclid(X, sample_weight=sample_weight)
    else:
        init = check_init(init, n, like=X)
    M = init

    for _ in range(maxiter):
        X_ts = log_map_wasserstein(X, M)
        J = xp.tensordot(sample_weight, X_ts, axes=([0], [0]))
        M = exp_map_wasserstein(J, M)
        crit = xp.linalg.matrix_norm(J)
        if crit <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return M



###############################################################################


mean_functions = {
    "ale": mean_ale,
    "alm": mean_alm,
    "bmp": mean_bmp,
    "cheap": mean_cheap,
    "chol": mean_chol,
    "euclid": mean_euclid,
    "harmonic": mean_harmonic,
    "kullback_sym": mean_kullback_sym,
    "logdet": mean_logdet,
    "logchol": mean_logchol,
    "logeuclid": mean_logeuclid,
    "power": mean_power,
    "poweuclid": mean_poweuclid,
    "riemann": mean_riemann,
    "thompson": mean_thompson,
    "wasserstein": mean_wasserstein,
}



[docs]
def gmean(X, *args, metric="riemann", sample_weight=None, **kwargs):
    """Mean of matrices according to a metric.

    Compute the mean of a set of matrices according to a metric [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of matrices.
    *args : tuple
        The arguments passed to the sub function.
    metric : string | callable, default="riemann"
        Metric for mean estimation, can be:
        "ale", "alm", "bmp", "cheap", "chol", "euclid", "harmonic",
        "kullback_sym", "logchol", "logdet", "logeuclid", "riemann",
        "thompson", "wasserstein",
        or a callable function.
        If an exponent is given in args, it can be "power", "poweuclid".
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.
    **kwargs : dict
        The keyword arguments passed to the sub function.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Mean of matrices.

    Notes
    -----
    .. versionchanged:: 0.11
        Rename mean_covariance into gmean.

    References
    ----------
    .. [1] `Review of Riemannian distances and divergences, applied to
        SSVEP-based BCI
        <https://hal.archives-ouvertes.fr/LISV/hal-03015762v1>`_
        S. Chevallier, E. K. Kalunga, Q. Barthélemy, E. Monacelli.
        Neuroinformatics, Springer, 2021, 19 (1), pp.93-106
    """
    mean_function = check_function(metric, mean_functions)
    M = mean_function(
        X,
        *args,
        sample_weight=sample_weight,
        **kwargs,
    )
    return M



@deprecated(
    "mean_covariance() is deprecated and will be removed in 0.13.0; "
    "please use gmean()."
)
def mean_covariance(X, *args, metric="riemann", sample_weight=None, **kwargs):
    return gmean(X, *args, metric="riemann", sample_weight=None, **kwargs)


###############################################################################


def _get_mask_from_nan(X):
    xp = get_namespace(X)
    isnan = xp.isnan(X)
    nan_col = xp.all(isnan, axis=0)
    nan_row = xp.all(isnan, axis=1)
    if not bool(xp.all(nan_col == nan_row)):
        raise ValueError("NaN values are not symmetric.")
    keep_col = ~nan_col
    maskedX = X[keep_col, :][:, keep_col]
    if bool(xp.any(xp.isnan(maskedX))):
        raise ValueError("NaN values must fill rows and columns.")
    eye = xp.eye(X.shape[0], dtype=X.dtype, device=xpd(X))
    return eye[:, keep_col]


def _get_masks_from_nan(X):
    return [_get_mask_from_nan(x) for x in X]


def _apply_masks(X, masks):
    return [m.mT @ x @ m for x, m in zip(X, masks)]



[docs]
@_vectorize_nd(n_axes=3)
def maskedmean_riemann(X, masks, *, tol=10e-9, maxiter=100, init=None,
                       sample_weight=None):
    """Masked Riemannian mean of SPD/HPD matrices.

    Given masks defined as semi-orthogonal matrices, the masked Riemannian mean
    of SPD/HPD matrices is obtained with a gradient descent minimizing the sum
    of affine-invariant Riemannian distances between masked SPD/HPD matrices
    and the masked mean [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices.
    masks : list of n_matrices ndarray of shape (n, n_i), \
            with different n_i, such that n_i <= n
        Masks, defined as semi-orthogonal matrices. See [1]_.
    tol : float, default=10e-9
        Tolerance to stop the gradient descent.
    maxiter : int, default=100
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, the Identity is used.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Masked Riemannian mean.

    Notes
    -----
    .. versionadded:: 0.3
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    mean_riemann
    gmean

    References
    ----------
    .. [1] `Geodesically-convex optimization for averaging partially observed
        covariance matrices
        <https://hal.archives-ouvertes.fr/hal-02984423>`_
        F. Yger, S. Chevallier, Q. Barthélemy, and S. Sra. Asian Conference on
        Machine Learning (ACML), Nov 2020, Bangkok, Thailand. pp.417 - 432.
    """
    xp = get_namespace(X)
    n_matrices, n, _ = X.shape
    sample_weight = check_weights(sample_weight, n_matrices, like=X)
    maskedX = _apply_masks(X, masks)
    if init is None:
        M = xp.eye(n, dtype=X.dtype, device=xpd(X))
    else:
        M = check_init(init, n, like=X)

    nu = 1.0
    tau = np.finfo(np.float64).max
    for _ in range(maxiter):
        maskedM = _apply_masks(xp.broadcast_to(M, (n_matrices, n, n)), masks)
        J = xp.zeros((n, n), dtype=X.dtype, device=xpd(X))
        for i in range(n_matrices):
            M12, Mm12 = sqrtm(maskedM[i]), invsqrtm(maskedM[i])
            tmp = M12 @ logm(Mm12 @ maskedX[i] @ Mm12) @ M12
            J += sample_weight[i] * masks[i] @ tmp @ masks[i].T
        M12, Mm12 = sqrtm(M), invsqrtm(M)
        M = M12 @ expm(Mm12 @ (nu * J) @ Mm12) @ M12

        crit = xp.linalg.matrix_norm(J, ord="fro")
        h = nu * crit
        if h < tau:
            nu = 0.95 * nu
            tau = h
        else:
            nu = 0.5 * nu
        if crit <= tol or nu <= tol:
            break
    else:
        warnings.warn("Convergence not reached", stacklevel=2)

    return M




[docs]
@_vectorize_nd(n_axes=3)
def nanmean_riemann(X, tol=10e-9, maxiter=100, init=None, sample_weight=None):
    """Riemannian NaN-mean of SPD/HPD matrices.

    The Riemannian NaN-mean is the masked Riemannian mean applied to SPD/HPD
    matrices potentially corrupted by symmetric NaN values [1]_.

    Parameters
    ----------
    X : ndarray, shape (..., n_matrices, n, n)
        Set of SPD/HPD matrices, corrupted by symmetric NaN values [1]_.
    tol : float, default=10e-9
        Tolerance to stop the gradient descent.
    maxiter : int, default=100
        Maximum number of iterations.
    init : None | ndarray, shape (n, n), default=None
        A SPD/HPD matrix used to initialize the gradient descent.
        If None, a regularized Euclidean NaN-mean is used.
    sample_weight : None | ndarray, shape (n_matrices,), default=None
        Weights for each matrix. If None, it uses equal weights.

    Returns
    -------
    M : ndarray, shape (..., n, n)
        Riemannian NaN-mean.

    Notes
    -----
    .. versionadded:: 0.3
    .. versionchanged:: 0.12
        Add support for NumPy and PyTorch.

    See Also
    --------
    maskedmean_riemann
    gmean

    References
    ----------
    .. [1] `Geodesically-convex optimization for averaging partially observed
        covariance matrices
        <https://hal.archives-ouvertes.fr/hal-02984423>`_
        F. Yger, S. Chevallier, Q. Barthélemy, and S. Sra. Asian Conference on
        Machine Learning (ACML), Nov 2020, Bangkok, Thailand. pp.417 - 432.
    """
    xp = get_namespace(X)
    _, n, _ = X.shape
    if init is None:
        eye = xp.eye(n, dtype=X.dtype, device=xpd(X))
        init = nanmean(X, axis=0) + 1e-6 * eye
    else:
        init = check_init(init, n, like=X)

    M = maskedmean_riemann(
        nan_to_num(X),  # avoid nan contamination in matmul
        _get_masks_from_nan(X),
        tol=tol,
        maxiter=maxiter,
        init=init,
        sample_weight=sample_weight,
    )
    return M