.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/simulated/plot_nanmean.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_simulated_plot_nanmean.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_simulated_plot_nanmean.py:


===============================================================================
Estimate mean of SPD matrices with NaN values
===============================================================================

Estimate the mean of SPD matrices corrupted by NaN values [1]_.

.. GENERATED FROM PYTHON SOURCE LINES 8-22

.. code-block:: Python

    # Authors: Quentin Barthélemy, Sylvain Chevallier and Florian Yger
    #
    # License: BSD (3-clause)

    import numpy as np
    from matplotlib import pyplot as plt
    import pandas as pd
    import seaborn as sns

    from pyriemann.datasets import make_matrices
    from pyriemann.utils.mean import mean_riemann, nanmean_riemann
    from pyriemann.utils.distance import distance_riemann


.. GENERATED FROM PYTHON SOURCE LINES 23-40

.. code-block:: Python


    def corrupt(mats, n_corrup_channels_max, rs):
        n_matrices, n_channels, _ = mats.shape
        all_n_corrup_channels, all_corrup_channels = np.zeros(n_matrices), []
        for i_matrix in range(n_matrices):
            n_corrupt_channels = rs.randint(n_corrup_channels_max + 1, size=1)
            corrup_channels = rs.choice(
                np.arange(0, n_channels), size=n_corrupt_channels, replace=False)
            for i_channel in corrup_channels:
                mats[i_matrix, i_channel] = np.nan
                mats[i_matrix, :, i_channel] = np.nan
                all_corrup_channels.append(i_channel)
            all_n_corrup_channels[i_matrix] = n_corrupt_channels
        return mats, all_n_corrup_channels, all_corrup_channels


.. GENERATED FROM PYTHON SOURCE LINES 41-43

Generate data
-------------

.. GENERATED FROM PYTHON SOURCE LINES 43-67

.. code-block:: Python


    rs = np.random.RandomState(42)
    n_matrices, n_channels = 100, 10
    mats = make_matrices(
        n_matrices, n_channels, "spd", rs, evals_low=50, evals_high=130)

    # Compute the reference, the Riemannian mean on all SPD matrices
    C_ref = mean_riemann(mats)

    # Corrupt data randomly
    n_corrup_channels_max = n_channels // 2
    print("Maximum number of corrupted channels: {} over {}".format(
        n_corrup_channels_max, n_channels))

    mats, all_n_corrup_channels, all_corrup_channels = corrupt(
        mats, n_corrup_channels_max, rs)

    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.set(title='Histogram of the number of corrupted channels',
           xlabel='Channel count', ylabel='Occurrences')
    plt.hist(all_n_corrup_channels, bins=np.arange(n_corrup_channels_max + 2) - .5)
    plt.show()


.. image-sg:: /auto_examples/simulated/images/sphx_glr_plot_nanmean_001.png
   :alt: Histogram of the number of corrupted channels
   :srcset: /auto_examples/simulated/images/sphx_glr_plot_nanmean_001.png
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Maximum number of corrupted channels: 5 over 10


.. GENERATED FROM PYTHON SOURCE LINES 68-77

.. code-block:: Python


    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.set(title='Histogram of the indices of corrupted channels',
           xlabel='Channel index', ylabel='Occurrences')
    plt.hist(all_corrup_channels, bins=np.arange(n_channels + 1) - .5)
    plt.show()


.. image-sg:: /auto_examples/simulated/images/sphx_glr_plot_nanmean_002.png
   :alt: Histogram of the indices of corrupted channels
   :srcset: /auto_examples/simulated/images/sphx_glr_plot_nanmean_002.png
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 78-87

Estimate means of SPD matrices
------------------------------

Riemannian mean could only be computed on full-rank matrices. A common
strategy is called matrix deletion, that is removing all matrices with
corrupted channels before computing mean.
This results in discarding useful information as uncorrupted channels are
removed from the computation of the mean.
Nan-mean uses as much information as possible to estimate the mean [1]_.

.. GENERATED FROM PYTHON SOURCE LINES 87-102

.. code-block:: Python


    # Euclidean NaN-mean
    C_naneucl = np.nanmean(mats, axis=0)

    # Riemannian NaN-mean
    C_nanriem = nanmean_riemann(mats)

    # Riemannian mean, after matrix deletion: average only uncorrupted matrices
    isnan = np.isnan(np.sum(mats, axis=(1, 2)))
    mats_ = np.delete(mats, np.where(isnan), axis=0)
    perc = len(mats_) / n_matrices * 100
    print("Percentage of uncorrupted matrices: {:.2f} %".format(perc))
    C_mdriem = mean_riemann(mats_)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    /home/docs/checkouts/readthedocs.org/user_builds/pyriemann/checkouts/latest/pyriemann/utils/mean.py:785: UserWarning: Convergence not reached
      warnings.warn("Convergence not reached")
    Percentage of uncorrupted matrices: 11.00 %


.. GENERATED FROM PYTHON SOURCE LINES 103-107

Compare means
-------------

Compare distances between the different means and the reference.

.. GENERATED FROM PYTHON SOURCE LINES 107-121

.. code-block:: Python


    d_naneucl = distance_riemann(C_ref, C_naneucl)
    print(f"Euclidean NaN-mean, distance to ref = {d_naneucl:.3f}")

    d_nanriem = distance_riemann(C_ref, C_nanriem)
    print(f"Riemannian NaN-mean, distance to ref = {d_nanriem:.3f}")

    d_mdriem = distance_riemann(C_ref, C_mdriem)
    print(f"Riemannian mean after deletion, distance to ref = {d_mdriem:.3f}")

    # Riemannian NaN-mean gives the best result, and Riemannian mean after matrix
    # deletion is worst than Euclidean NaN-mean.


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Euclidean NaN-mean, distance to ref = 0.131
    Riemannian NaN-mean, distance to ref = 0.073
    Riemannian mean after deletion, distance to ref = 0.248


.. GENERATED FROM PYTHON SOURCE LINES 122-127

Evaluate influence of corrupted channels
----------------------------------------

Repeat the previous experiment, varying the maximum number of corrupted
channels [1]_.

.. GENERATED FROM PYTHON SOURCE LINES 127-164

.. code-block:: Python


    mats_orig = make_matrices(
        n_matrices, n_channels, "spd", rs, evals_low=50, evals_high=130)
    C_ref = mean_riemann(mats_orig)

    df = []
    for n_corrup_channels_max in range(0, n_channels // 2 + 1):
        mats = np.copy(mats_orig)
        mats, _, _ = corrupt(mats, n_corrup_channels_max, rs)

        C_naneucl = np.nanmean(mats, axis=0)
        C_nanriem = nanmean_riemann(mats)
        isnan = np.isnan(np.sum(mats, axis=(1, 2)))
        mats_ = np.delete(mats, np.where(isnan), axis=0)
        C_mdriem = mean_riemann(mats_)

        res_naneucl = {'n_corrupt': n_corrup_channels_max,
                       'dist': distance_riemann(C_ref, C_naneucl),
                       'Means': 'Euclidean NaN-mean'}
        res_nanriem = {'n_corrupt': n_corrup_channels_max,
                       'dist': distance_riemann(C_ref, C_nanriem),
                       'Means': 'Riemannian NaN-mean'}
        res_mdriem = {'n_corrupt': n_corrup_channels_max,
                      'dist': distance_riemann(C_ref, C_mdriem),
                      'Means': 'Riemannian mean after deletion'}
        df += [res_naneucl, res_nanriem, res_mdriem]
    df = pd.DataFrame(df)

    g = sns.catplot(data=df, x="n_corrupt", y="dist", hue="Means", kind="point",
                    legend_out=False)
    g.set(title="Influence of corrupted channels")
    g.set_axis_labels("Maximum number of corrupted channels",
                      "Distance to reference")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/simulated/images/sphx_glr_plot_nanmean_003.png
   :alt: Influence of corrupted channels
   :srcset: /auto_examples/simulated/images/sphx_glr_plot_nanmean_003.png
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 165-172

References
----------
.. [1] `Geodesically-convex optimization for averaging partially observed
   covariance matrices
   <https://hal.archives-ouvertes.fr/hal-02984423>`_
   F. Yger, S. Chevallier, Q. Barthélemy, and S. Sra. Asian Conference on
   Machine Learning (ACML), Nov 2020, Bangkok, Thailand. pp.417 - 432.


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 15.204 seconds)


.. _sphx_glr_download_auto_examples_simulated_plot_nanmean.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_nanmean.ipynb <plot_nanmean.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_nanmean.py <plot_nanmean.py>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_