Source code for gtda.graphs.transition

"""Construct transition graphs from dynamical systems."""
# License: GNU AGPLv3

from types import FunctionType
import warnings

import numpy as np
from joblib import Parallel, delayed
from scipy import sparse as sp
from scipy.sparse import SparseEfficiencyWarning
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted

from ..utils._docs import adapt_fit_transform_docs
from ..utils.validation import validate_params


def identity(x):
    """The identity function."""
    return x


[docs]@adapt_fit_transform_docs class TransitionGraph(BaseEstimator, TransformerMixin): """Undirected transition graphs from arrays of time-evolving states. Let A be a two-dimensional array viewed as a time series (along the row axis) of one-dimensional arrays encoding the "state" of a system. The corresponding *undirected transition graph* (or *network*) has as vertex set the set of all unique states (rows) in A, and there is an edge between two vertices if and only if one of the corresponding states immediately follows the other in A. Given a collection of two-dimensional arrays, this transformer performs two tasks: 1. Optionally, it preprocesses the arrays by applying a function row by row to them. This can be used e.g. as a "compression" step to reduce the size of the state space. 2. It computes the undirected transition graph of each array as a sparse matrix. Parameters ---------- func : None or callable, optional, default: ``numpy.argsort`` If a callable, it is the function to be applied to each row of each array as a preprocessing steps. Allowed callables are functions mapping 1-D arrays to 1-D arrays of constant length, and must be compatible with :mod:`numpy.apply_along_axis`. If ``None``, this function is the identity (no preprocessing). The default is ``numpy.argsort``, which makes the final transition graphs *ordinal partition networks* [1]_ [2]_ [3]_. func_params : None or dict, optional, default: ``None`` Additional keyword arguments for `func`. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- effective_func_params_ : dict A copy of `func_params` if this was not set to ``None``, otherwise an empty dictionary. Examples -------- >>> import numpy as np >>> from gtda.graphs import TransitionGraph >>> X = np.array([[[1, 0], [2, 3], [5, 4]], ... [[5, 4], [5, 4], [5, 4]]) >>> tg = TransitionGraph() >>> tg = tg.fit(X) >>> print(tg.transform(X)[0].toarray()) [[0 1] [1 0]] >>> print(tg.transform(X)[1].toarray()) [[1 0] [0 0]] See also -------- GraphGeodesicDistance, gtda.time_series.TakensEmbedding Notes ----- In general, the shapes of the sparse matrices output by :meth:`transform` will be different across samples, and the same row or column index will refer to different states in different samples. References ---------- .. [1] M. Small, "Complex networks from time series: Capturing dynamics", *2013 IEEE International Symposium on Circuits and Systems (IS-CAS2013)*, 2013; doi: `10.1109/iscas.2013.6572389 <http://dx.doi.org/10.1109/iscas.2013.6572389>`_. .. [2] M. McCullough, M. Small, T. Stemler, and H. Ho-Ching Iu, "Time lagged ordinal partition networks for capturing dynamics of continuous dynamical systems"; *Chaos: An Interdisciplinary Journal of Nonlinear Science* **25** (5), p. 053101, 2015; doi: `10.1063/1.4919075 <http://dx.doi.org/10.1063/1.4919075>`_. .. [3] A. Myers, E. Munch, and F. A. Khasawneh, "Persistent homology of complex networks for dynamic state detection"; *Phys. Rev. E* **100**, 022314, 2019; doi: `10.1103/PhysRevE.100.022314 <http://dx.doi.org/10.1109/CVPR.2015.7299106>`_. """ _hyperparameters = { 'func': {'type': (FunctionType, type(None))}, 'func_params': {'type': (dict, type(None))} }
[docs] def __init__(self, func=np.argsort, func_params=None, n_jobs=None): self.func = func self.func_params = func_params self.n_jobs = n_jobs
def _make_adjacency_matrix(self, X): Xm = np.apply_along_axis(self._func, 1, X) Xm = np.unique(Xm, axis=0, return_inverse=True)[1] n_indices = 2 * (len(Xm) - 1) first = Xm[:-1] second = Xm[1:] Xm = sp.csr_matrix((np.full(n_indices, True), (np.concatenate([first, second]), np.concatenate([second, first])))) # Silence sparse warnings TODO: Benchmark with warnings.catch_warnings(): warnings.simplefilter('ignore', SparseEfficiencyWarning) sp.csr_matrix.setdiag(Xm, 0) return Xm
[docs] def fit(self, X, y=None): """Do nothing and return the estimator unchanged. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_time_steps, n_features) Input data. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ check_array(X, allow_nd=True) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.func is None: self._func = identity else: self._func = self.func if self.func_params is None: self.effective_func_params_ = {} else: self.effective_func_params_ = self.func_params.copy() return self
[docs] def transform(self, X, y=None): """Create transition graphs from the input data and return their adjacency matrices. The graphs are simple, undirected and unweighted, and the adjacency matrices are sparse matrices of type bool. Parameters ---------- X : ndarray of shape (n_samples, n_time_steps, n_features) Input data. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : array of sparse boolean matrices, shape (n_samples,) The collection of ``n_samples`` transition graphs. Each transition graph is encoded by a sparse matrix of boolean type. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._make_adjacency_matrix)(x) for x in Xt) Xt = np.asarray(Xt) return Xt