Source code for gtda.time_series.multivariate

"""Processing of multivariate time series."""
# License: GNU AGPLv3

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array

from ..utils import validate_params
from ..utils._docs import adapt_fit_transform_docs


[docs]@adapt_fit_transform_docs
class PearsonDissimilarity(BaseEstimator, TransformerMixin):
    """Pearson dissimilarities from collections of multivariate time series.

    The sample Pearson correlation coefficients between pairs of components of
    an :math:`N`-variate time series form an :math:`N \\times N` matrix
    :math:`R` with entries

    .. math:: R_{ij} = \\frac{ C_{ij} }{ \\sqrt{ C_{ii} C_{jj} } },

    where :math:`C` is the covariance matrix. Setting :math:`D_{ij} =
    (1 - R_{ij})/2` or :math:`D_{ij} = 1 - |R_{ij}|` we obtain a dissimilarity
    matrix with entries between 0 and 1.

    This transformer computes one dissimilarity matrix per multivariate time
    series in a collection. Examples of such collections are the outputs of
    :class:`SlidingWindow`.

    Parameters
    ----------
    absolute_value : bool, default: ``False``
        Whether absolute values of the Pearson correlation coefficients should
        be taken. Doing so makes pairs of strongly anti-correlated variables as
        similar as pairs of strongly correlated ones.

    n_jobs : int or None, optional, default: ``None``
        The number of jobs to use for the computation. ``None`` means 1 unless
        in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
        processors.

    See also
    --------
    SlidingWindow, gtda.homology.VietorisRipsPersistence

    """

    _hyperparameters = {'absolute_value': {'type': bool}}

[docs]    def __init__(self, absolute_value=False, n_jobs=None):
        self.absolute_value = absolute_value
        self.n_jobs = n_jobs

[docs]    def fit(self, X, y=None):
        """Do nothing and return the estimator unchanged.

        This method is here to implement the usual scikit-learn API and hence
        work in pipelines.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_observations, n_features)
            Input data. Each entry along axis 0 is a sample of ``n_features``
            different variables, of size ``n_observations``.

        y : None
            There is no need for a target in a transformer, yet the pipeline
            API requires this parameter.

        Returns
        -------
        self : object

        """
        check_array(X, allow_nd=True)
        validate_params(
            self.get_params(), self._hyperparameters, exclude=['n_jobs'])

        self._is_fitted = True
        return self

[docs]    def transform(self, X, y=None):
        """Compute Pearson dissimilarities.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_observations, n_features)
            Input data. Each entry along axis 0 is a sample of ``n_features``
            different variables, of size ``n_observations``.

        y : None
            There is no need for a target in a transformer, yet the pipeline
            API requires this parameter.

        Returns
        -------
        Xt : ndarray of shape (n_samples, n_features, n_features)
            Array of Pearson dissimilarities.

        """
        check_is_fitted(self, '_is_fitted')
        X = check_array(X, allow_nd=True)

        Xt = np.empty((X.shape[0], X.shape[2], X.shape[2]))
        for i, sample in enumerate(X):
            Xt[i, :, :] = np.corrcoef(sample, rowvar=False)
        Xt = 0.5 - Xt/2 if not self.absolute_value else 1 - np.abs(Xt)

        return Xt