Source code for gtda.graphs.transition

"""Construct transition graphs from dynamical systems."""
# License: GNU AGPLv3

from types import FunctionType

import numpy as np
from joblib import Parallel, delayed
from scipy.sparse import csr_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

from ..utils._docs import adapt_fit_transform_docs
from ..utils.validation import validate_params, check_point_clouds


def identity(x):
    """The identity function."""
    return x


[docs]@adapt_fit_transform_docs
class TransitionGraph(BaseEstimator, TransformerMixin):
    """Undirected transition graphs from arrays of time-evolving states.

    Let A be a two-dimensional array viewed as a time series (along the row
    axis) of one-dimensional arrays encoding the "state" of a system. The
    corresponding *undirected transition graph* (or *network*) has as vertex
    set the set of all unique states (rows) in A, and there is an edge between
    vertex i and vertex j≠i if and only if the state corresponding to vertex
    j immediately follows the one corresponding to vertex i, somewhere in A.

    Given a collection of two-dimensional arrays, this transformer performs two
    tasks:

        1. Optionally, it preprocesses the arrays by applying a function row by
           row to them. This can be used e.g. as a "compression" step to reduce
           the size of the state space.
        2. It computes the transition graph of each array as a sparse matrix of
           zeros and ones.

    Parameters
    ----------
    func : None or callable, optional, default: ``numpy.argsort``
        If a callable, it is the function to be applied to each row of each
        array as a preprocessing step. Allowed callables are functions mapping
        1D arrays to 1D arrays of constant length, and must be compatible with
        :func:`numpy.apply_along_axis`. If ``None``, this function is the
        identity (no preprocessing). The default is ``numpy.argsort``, which
        makes the final transition graphs *ordinal partition networks*
        [1]_ [2]_ [3]_.

    func_params : None or dict, optional, default: ``None``
        Additional keyword arguments for `func`.

    n_jobs : int or None, optional, default: ``None``
        The number of jobs to use for the computation. ``None`` means 1 unless
        in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
        processors.

    Attributes
    ----------
    effective_func_params_ : dict
        A copy of `func_params` if this was not set to ``None``, otherwise an
        empty dictionary.

    Examples
    --------
    >>> import numpy as np
    >>> from gtda.graphs import TransitionGraph
    >>> X = np.array([[[1, 0], [2, 3], [5, 4]],
    ...               [[5, 4], [5, 4], [5, 4]]])
    >>> X_tg = TransitionGraph().fit_transform(X)
    >>> print(X_tg[0].toarray())
    [[0 1]
     [1 0]]
    >>> print(X_tg[1].toarray())
    [[0]]

    See also
    --------
    KNeighborsGraph, GraphGeodesicDistance

    Notes
    -----
    In general, the shapes of the sparse matrices output by :meth:`transform`
    will be different across samples, and the same row or column index will
    refer to different states in different samples.

    References
    ----------
    .. [1] M. Small, "Complex networks from time series: Capturing dynamics",
           *2013 IEEE International Symposium on Circuits and Systems
           (IS-CAS2013)*, 2013; `DOI: 10.1109/iscas.2013.6572389
           <http://dx.doi.org/10.1109/iscas.2013.6572389>`_.

    .. [2] M. McCullough, M. Small, T. Stemler, and H. Ho-Ching Iu, "Time
           lagged ordinal partition networks for capturing dynamics of
           continuous dynamical systems"; *Chaos: An Interdisciplinary Journal
           of Nonlinear Science* **25** (5), p. 053101, 2015; `DOI:
           10.1063/1.4919075 <http://dx.doi.org/10.1063/1.4919075>`_.

    .. [3] A. Myers, E. Munch, and F. A. Khasawneh, "Persistent homology of
           complex networks for dynamic state detection"; *Phys. Rev. E*
           **100**, 022314, 2019; `DOI: 10.1103/PhysRevE.100.022314
           <http://dx.doi.org/10.1109/CVPR.2015.7299106>`_.

    """

    _hyperparameters = {'func': {'type': (FunctionType, type(None))},
                        'func_params': {'type': (dict, type(None))}}

[docs]    def __init__(self, func=np.argsort, func_params=None, n_jobs=None):
        self.func = func
        self.func_params = func_params
        self.n_jobs = n_jobs

    def _make_adjacency_matrix(self, X):
        Xm = np.apply_along_axis(self._func, 1, X)
        unique_states, Xm = np.unique(Xm, axis=0, return_inverse=True)
        n_unique_states = len(unique_states)
        first = Xm[:-1]
        second = Xm[1:]
        non_diag_idx = first != second
        data = np.full(np.sum(non_diag_idx), 1, dtype=int)
        first = first[non_diag_idx]
        second = second[non_diag_idx]
        Xm = csr_matrix((data, (first, second)),
                        shape=(n_unique_states, n_unique_states))
        return Xm

[docs]    def fit(self, X, y=None):
        """Do nothing and return the estimator unchanged.

        This method is here to implement the usual scikit-learn API and hence
        work in pipelines.

        Parameters
        ----------
        X : list of length n_samples, or ndarray of shape (n_samples, \
            n_timestamps, n_features)
            Input data: a collection of 2D arrays of shape
            ``(n_timestamps, n_features)``.

        y : None
            There is no need for a target in a transformer, yet the pipeline
            API requires this parameter.

        Returns
        -------
        self : object

        """
        check_point_clouds(X)
        validate_params(
            self.get_params(), self._hyperparameters, exclude=['n_jobs'])

        if self.func is None:
            self._func = identity
        else:
            self._func = self.func

        if self.func_params is None:
            self.effective_func_params_ = {}
        else:
            self.effective_func_params_ = self.func_params.copy()

        return self

[docs]    def transform(self, X, y=None):
        """Create transition graphs from the input data and return their
        adjacency matrices. The graphs are simple and unweighted.

        Parameters
        ----------
        X : list of length n_samples, or ndarray of shape (n_samples, \
            n_timestamps, n_features)
            Input data: a collection of 2D arrays of shape
            ``(n_timestamps, n_features)``.

        y : None
            There is no need for a target in a transformer, yet the pipeline
            API requires this parameter.

        Returns
        -------
        Xt : list of length n_samples
            Collection of ``n_samples`` transition graphs. Each transition
            graph is encoded by a sparse CSR matrix of ones and zeros.

        """
        check_is_fitted(self)
        Xt = check_point_clouds(X)

        Xt = Parallel(n_jobs=self.n_jobs)(
            delayed(self._make_adjacency_matrix)(x) for x in Xt
            )
        return Xt