Source code for gtda.homology.cubical

"""Persistent homology on grids."""
# License: GNU AGPLv3

from numbers import Real

import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

from ._utils import _postprocess_diagrams
from ..base import PlotterMixin
from ..externals.python import CubicalComplex, PeriodicCubicalComplex
from ..plotting import plot_diagram
from ..utils.intervals import Interval
from ..utils.validation import validate_params, check_collection


[docs]class CubicalPersistence(BaseEstimator, TransformerMixin, PlotterMixin):
    """:ref:`Persistence diagrams <persistence_diagram>` resulting from
    :ref:`filtered cubical complexes <cubical_complex>`.

    Given a :ref:`greyscale image <cubical_chains_and_cubical_homology>`,
    information about the appearance and disappearance of topological features
    (technically, :ref:`homology classes <homology_and_cohomology>`) of various
    dimensions and at different scales is summarised in the corresponding
    persistence diagram.

    **Important note**:
        - Persistence diagrams produced by this class must be interpreted with
          care due to the presence of padding triples which carry no
          information. See :meth:`transform` for additional information.

    Parameters
    ----------
    homology_dimensions : list or tuple, optional, default: ``(0, 1)``
        Dimensions (non-negative integers) of the topological features to be
        detected.

    coeff : int prime, optional, default: ``2``
        Compute homology with coefficients in the prime field
        :math:`\\mathbb{F}_p = \\{ 0, \\ldots, p - 1 \\}` where :math:`p`
        equals `coeff`.

    periodic_dimensions : boolean ndarray of shape (n_dimensions,) or None, \
        optional, default: ``None``
        Periodicity of the boundaries along each of the axes, where
        ``n_dimensions`` is the dimension of the images of the collection. The
        boolean in the `d`th position expresses whether the boundaries along
        the `d`th axis are periodic. The default ``None`` is equivalent to
        passing ``numpy.zeros((n_dimensions,), dtype=np.bool)``, i.e. none of
        the boundaries are periodic.

    infinity_values : float or None, default: ``None``
        Which death value to assign to features which are still alive at
        filtration value ``numpy.inf``. ``None`` assigns the maximum pixel
        values within all images passed to :meth:`fit`.

    reduced_homology : bool, optional, default: ``True``
       If ``True``, the earliest-born triple in homology dimension 0 which has
       infinite death is discarded from each diagram computed in
       :meth:`transform`.

    n_jobs : int or None, optional, default: ``None``
        The number of jobs to use for the computation. ``None`` means 1 unless
        in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
        processors.

    Attributes
    ----------
    periodic_dimensions_ : boolean ndarray of shape (n_dimensions,)
       Effective periodicity of the boundaries along each of the axes. Set in
       :meth:`fit`.

    infinity_values_ : float
       Effective death value to assign to features which have infinite
       persistence. Set in :meth:`fit`.

    See also
    --------
    images.HeightFiltration, images.RadialFiltration, \
    images.DilationFiltration, images.ErosionFiltration, \
    images.SignedDistanceFiltration

    Notes
    -----
    `GUDHI <https://github.com/GUDHI/gudhi-devel>`_ is used as a C++ backend
    for computing cubical persistent homology [1]_. Python bindings were
    modified for performance.

    References
    ----------
    .. [1] P. Dlotko, "Cubical complex", 2015; `GUDHI User and Reference
           Manual <http://gudhi.gforge.inria.fr/doc/latest/group__cubical__\
           complex.html>`_.

    """

    _hyperparameters = {
        'homology_dimensions': {
            'type': (list, tuple),
            'of': {'type': int, 'in': Interval(0, np.inf, closed='left')}
            },
        'coeff': {'type': int, 'in': Interval(2, np.inf, closed='left')},
        'periodic_dimensions': {'type': (np.ndarray, type(None)),
                                'of': {'type': np.bool_}},
        'infinity_values': {'type': (Real, type(None))},
        'reduced_homology': {'type': bool}
        }

[docs]    def __init__(self, homology_dimensions=(0, 1), coeff=2,
                 periodic_dimensions=None, infinity_values=None,
                 reduced_homology=True, n_jobs=None):
        self.homology_dimensions = homology_dimensions
        self.coeff = coeff
        self.periodic_dimensions = periodic_dimensions
        self.infinity_values = infinity_values
        self.reduced_homology = reduced_homology
        self.n_jobs = n_jobs

    def _gudhi_diagram(self, X):
        cubical_complex = self._filtration(
            dimensions=X.shape,
            top_dimensional_cells=X.flatten(order="F"),
            **self._filtration_kwargs
            )
        Xdgm = cubical_complex.persistence(homology_coeff_field=self.coeff,
                                           min_persistence=0)

        return Xdgm

[docs]    def fit(self, X, y=None):
        """Do nothing and return the estimator unchanged.

        This method is here to implement the usual scikit-learn API and hence
        work in pipelines.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_pixels_1, ..., n_pixels_d)
            Input data. Array of d-dimensional images.

        y : None
            There is no need of a target in a transformer, yet the pipeline API
            requires this parameter.

        Returns
        -------
        self : object

        """
        X = check_collection(X, force_all_finite=False)
        validate_params(
            self.get_params(), self._hyperparameters, exclude=['n_jobs'])

        self._filtration_kwargs = {}
        if self.periodic_dimensions is None or \
           np.sum(self.periodic_dimensions) == 0:
            self._filtration = CubicalComplex
            self.periodic_dimensions_ = np.zeros(len(X) - 1, dtype=np.bool)
        else:
            self._filtration = PeriodicCubicalComplex
            self.periodic_dimensions_ = np.array(self.periodic_dimensions,
                                                 dtype=np.bool)
            self._filtration_kwargs['periodic_dimensions'] = \
                self.periodic_dimensions_

        if self.infinity_values is None:
            if hasattr(X, 'shape'):
                self.infinity_values_ = np.max(X)
            else:
                self.infinity_values_ = max(map(np.max, X))
        else:
            self.infinity_values_ = self.infinity_values

        self._homology_dimensions = sorted(self.homology_dimensions)
        self._max_homology_dimension = self._homology_dimensions[-1]

        return self

[docs]    def transform(self, X, y=None):
        """For each image in `X`, compute the relevant persistence diagram as
        an array of triples [b, d, q]. Each triple represents a persistent
        topological feature in dimension q (belonging to `homology_dimensions`)
        which is born at b and dies at d. Only triples in which b < d are
        meaningful. Triples in which b and d are equal ("diagonal elements")
        may be artificially introduced during the computation for padding
        purposes, since the number of non-trivial persistent topological
        features is typically not constant across samples. They carry no
        information and hence should be effectively ignored by any further
        computation.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_pixels_1, ..., n_pixels_d)
            Input data. Array of d-dimensional images.

        y : None
            There is no need of a target in a transformer, yet the pipeline API
            requires this parameter.

        Returns
        -------
        Xt : ndarray of shape (n_samples, n_features, 3)
            Array of persistence diagrams computed from the feature arrays or
            distance matrices in `X`. ``n_features`` equals
            :math:`\\sum_q n_q`, where :math:`n_q` is the maximum number of
            topological features in dimension :math:`q` across all samples in
            `X`.

        """
        check_is_fitted(self)
        Xt = check_collection(X, force_all_finite=False)

        Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._gudhi_diagram)(x)
                                          for x in Xt)

        Xt = _postprocess_diagrams(
            Xt, "gudhi", self._homology_dimensions, self.infinity_values_,
            self.reduced_homology
            )

        return Xt

[docs]    @staticmethod
    def plot(Xt, sample=0, homology_dimensions=None, plotly_params=None):
        """Plot a sample from a collection of persistence diagrams, with
        homology in multiple dimensions.

        Parameters
        ----------
        Xt : ndarray of shape (n_samples, n_features, 3)
            Collection of persistence diagrams, such as returned by
            :meth:`transform`.

        sample : int, optional, default: ``0``
            Index of the sample in `Xt` to be plotted.

        homology_dimensions : list, tuple or None, optional, default: ``None``
            Which homology dimensions to include in the plot. ``None`` means
            plotting all dimensions present in ``Xt[sample]``.

        plotly_params : dict or None, optional, default: ``None``
            Custom parameters to configure the plotly figure. Allowed keys are
            ``"traces"`` and ``"layout"``, and the corresponding values should
            be dictionaries containing keyword arguments as would be fed to the
            :meth:`update_traces` and :meth:`update_layout` methods of
            :class:`plotly.graph_objects.Figure`.

        Returns
        -------
        fig : :class:`plotly.graph_objects.Figure` object
            Plotly figure.

        """
        return plot_diagram(
            Xt[sample], homology_dimensions=homology_dimensions,
            plotly_params=plotly_params
            )