"""Processing of multivariate time series."""
# License: GNU AGPLv3
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array
from ..utils import validate_params
from ..utils._docs import adapt_fit_transform_docs
[docs]@adapt_fit_transform_docs
class PearsonDissimilarity(BaseEstimator, TransformerMixin):
"""Pearson dissimilarities from collections of multivariate time series.
The sample Pearson correlation coefficients between pairs of components of
an :math:`N`-variate time series form an :math:`N \\times N` matrix
:math:`R` with entries
.. math:: R_{ij} = \\frac{ C_{ij} }{ \\sqrt{ C_{ii} C_{jj} } },
where :math:`C` is the covariance matrix. Setting :math:`D_{ij} =
(1 - R_{ij})/2` or :math:`D_{ij} = 1 - |R_{ij}|` we obtain a dissimilarity
matrix with entries between 0 and 1.
This transformer computes one dissimilarity matrix per multivariate time
series in a collection. Examples of such collections are the outputs of
:class:`SlidingWindow`.
Parameters
----------
absolute_value : bool, default: ``False``
Whether absolute values of the Pearson correlation coefficients should
be taken. Doing so makes pairs of strongly anti-correlated variables as
similar as pairs of strongly correlated ones.
n_jobs : int or None, optional, default: ``None``
The number of jobs to use for the computation. ``None`` means 1 unless
in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
processors.
See also
--------
SlidingWindow, gtda.homology.VietorisRipsPersistence
"""
_hyperparameters = {'absolute_value': {'type': bool}}
[docs] def __init__(self, absolute_value=False, n_jobs=None):
self.absolute_value = absolute_value
self.n_jobs = n_jobs
[docs] def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged.
This method is here to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : ndarray of shape (n_samples, n_observations, n_features)
Input data. Each entry along axis 0 is a sample of ``n_features``
different variables, of size ``n_observations``.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
check_array(X, allow_nd=True)
validate_params(
self.get_params(), self._hyperparameters, exclude=['n_jobs'])
self._is_fitted = True
return self