"""Persistence diagram preprocessing."""
# License: GNU AGPLv3
from numbers import Real
from types import FunctionType
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from ._metrics import _AVAILABLE_AMPLITUDE_METRICS, _parallel_amplitude
from ._utils import _filter, _bin, _homology_dimensions_to_sorted_ints
from ..base import PlotterMixin
from ..plotting.persistence_diagrams import plot_diagram
from ..utils._docs import adapt_fit_transform_docs
from ..utils.intervals import Interval
from ..utils.validation import check_diagrams, validate_params
[docs]@adapt_fit_transform_docs
class ForgetDimension(BaseEstimator, TransformerMixin, PlotterMixin):
"""Replaces all homology dimensions in persistence diagrams with
``numpy.inf``.
Useful when downstream tasks require the use of topological features all at
once -- and not separated between different homology dimensions.
See also
--------
PairwiseDistance, Amplitude, Scaler, Filtering
"""
[docs] def __init__(self):
pass
[docs] def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged.
This method is here to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : ndarray of shape (n_samples, n_features, 3)
Input data. Array of persistence diagrams, each a collection of
triples [b, d, q] representing persistent topological features
through their birth (b), death (d) and homology dimension (q).
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
check_diagrams(X)
self._is_fitted = True
return self
[docs] @staticmethod
def plot(Xt, sample=0, plotly_params=None):
"""Plot a sample from a collection of persistence diagrams.
Parameters
----------
Xt : ndarray of shape (n_samples, n_points, 3)
Collection of persistence diagrams, such as returned by
:meth:`transform`.
sample : int, optional, default: ``0``
Index of the sample in `Xt` to be plotted.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"traces"`` and ``"layout"``, and the corresponding values should
be dictionaries containing keyword arguments as would be fed to the
:meth:`update_traces` and :meth:`update_layout` methods of
:class:`plotly.graph_objects.Figure`.
Returns
-------
fig : :class:`plotly.graph_objects.Figure` object
Plotly figure.
"""
return plot_diagram(
Xt[sample], homology_dimensions=[np.inf],
plotly_params=plotly_params
)
[docs]@adapt_fit_transform_docs
class Scaler(BaseEstimator, TransformerMixin, PlotterMixin):
"""Linear scaling of persistence diagrams.
A positive scale factor :attr:`scale_` is calculated during :meth:`fit` by
considering all available persistence diagrams partitioned according to
homology dimensions. During :meth:`transform`, all birth-death pairs are
divided by :attr:`scale_`.
The value of :attr:`scale_` depends on two things:
- A way of computing, for each homology dimension, the :ref:`amplitude
<vectorization_amplitude_and_kernel>` in that dimension of a
persistence diagram consisting of birth-death-dimension triples
[b, d, q]. Together, `metric` and `metric_params` define this in the
same way as in :class:`Amplitude`.
- A scalar-valued function which is applied to the resulting
two-dimensional array of amplitudes (one per diagram and homology
dimension) to obtain :attr:`scale_`.
**Important note**:
- Input collections of persistence diagrams for this transformer must
satisfy certain requirements, see e.g. :meth:`fit`.
Parameters
----------
metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'betti'`` | \
``'landscape'`` |``'silhouette'`` | ``'heat'`` | \
``'persistence_image'``, optional, default: ``'bottleneck'``
See the corresponding parameter in :class:`Amplitude`.
metric_params : dict or None, optional, default: ``None``
See the corresponding parameter in :class:`Amplitude`.
function : callable, optional, default: ``numpy.max``
Function used to extract a positive scalar from the collection of
amplitude vectors in :meth:`fit`. Must map 2D arrays to scalars.
n_jobs : int or None, optional, default: ``None``
The number of jobs to use for the computation. ``None`` means 1 unless
in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
processors.
Attributes
----------
effective_metric_params_ : dict
Dictionary containing all information present in `metric_params` as
well as relevant quantities computed in :meth:`fit`.
homology_dimensions_ : tuple
Homology dimensions seen in :meth:`fit`, sorted in ascending order.
scale_ : float
Value by which to rescale diagrams.
See also
--------
PairwiseDistance, ForgetDimension, Filtering, Amplitude
Notes
-----
When `metric` is ``'bottleneck'`` and `function` is ``numpy.max``,
:meth:`fit_transform` has the effect of making the lifetime of the most
persistent point across all diagrams and homology dimensions equal to 2.
To compute scaling factors without first splitting the computation between
different homology dimensions, data should be first transformed by an
instance of :class:`ForgetDimension`.
"""
_hyperparameters = {
'metric': {'type': str, 'in': _AVAILABLE_AMPLITUDE_METRICS.keys()},
'metric_params': {'type': (dict, type(None))},
'function': {'type': (FunctionType, type(None))}
}
[docs] def __init__(self, metric='bottleneck', metric_params=None,
function=np.max, n_jobs=None):
self.metric = metric
self.metric_params = metric_params
self.function = function
self.n_jobs = n_jobs
[docs] def fit(self, X, y=None):
"""Store all observed homology dimensions in
:attr:`homology_dimensions_` and compute :attr:`scale_`.
Then, return the estimator.
Parameters
----------
X : ndarray of shape (n_samples, n_features, 3)
Input data. Array of persistence diagrams, each a collection of
triples [b, d, q] representing persistent topological features
through their birth (b), death (d) and homology dimension (q).
It is important that, for each possible homology dimension, the
number of triples for which q equals that homology dimension is
constants across the entries of X.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
X = check_diagrams(X)
validate_params(
self.get_params(), self._hyperparameters, exclude=['n_jobs'])
if self.metric_params is None:
self.effective_metric_params_ = {}
else:
self.effective_metric_params_ = self.metric_params.copy()
validate_params(self.effective_metric_params_,
_AVAILABLE_AMPLITUDE_METRICS[self.metric])
# Find the unique homology dimensions in the 3D array X passed to `fit`
# assuming that they can all be found in its zero-th entry
homology_dimensions_fit = np.unique(X[0, :, 2])
self.homology_dimensions_ = \
_homology_dimensions_to_sorted_ints(homology_dimensions_fit)
self.effective_metric_params_['samplings'], \
self.effective_metric_params_['step_sizes'] = \
_bin(X, self.metric, **self.effective_metric_params_)
if self.metric == 'persistence_image':
weight_function = self.effective_metric_params_.get(
'weight_function', None
)
weight_function = \
np.ones_like if weight_function is None else weight_function
self.effective_metric_params_['weight_function'] = weight_function
amplitude_array = _parallel_amplitude(X, self.metric,
self.effective_metric_params_,
self.homology_dimensions_,
self.n_jobs)
self.scale_ = self.function(amplitude_array)
return self
[docs] def plot(self, Xt, sample=0, homology_dimensions=None, plotly_params=None):
"""Plot a sample from a collection of persistence diagrams, with
homology in multiple dimensions.
Parameters
----------
Xt : ndarray of shape (n_samples, n_points, 3)
Collection of persistence diagrams, such as returned by
:meth:`transform`.
sample : int, optional, default: ``0``
Index of the sample in `Xt` to be plotted.
homology_dimensions : list, tuple or None, optional, default: ``None``
Which homology dimensions to include in the plot. ``None`` is
equivalent to passing :attr:`homology_dimensions_`.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"traces"`` and ``"layout"``, and the corresponding values should
be dictionaries containing keyword arguments as would be fed to the
:meth:`update_traces` and :meth:`update_layout` methods of
:class:`plotly.graph_objects.Figure`.
Returns
-------
fig : :class:`plotly.graph_objects.Figure` object
Plotly figure.
"""
if homology_dimensions is None:
_homology_dimensions = self.homology_dimensions_
else:
_homology_dimensions = homology_dimensions
return plot_diagram(
Xt[sample], homology_dimensions=_homology_dimensions,
plotly_params=plotly_params
)
[docs]@adapt_fit_transform_docs
class Filtering(BaseEstimator, TransformerMixin, PlotterMixin):
"""Filtering of persistence diagrams.
Filtering a diagram means discarding all points [b, d, q] representing
non-trivial topological features whose lifetime d - b is less than or equal
to a cutoff value. Points on the diagonal (i.e. for which b and d are
equal) may still appear in the output for padding purposes, but carry no
information.
**Important note**:
- Input collections of persistence diagrams for this transformer must
satisfy certain requirements, see e.g. :meth:`fit`.
Parameters
----------
homology_dimensions : list, tuple, or None, optional, default: ``None``
When set to ``None``, subdiagrams corresponding to all homology
dimensions seen in :meth:`fit` will be filtered. Otherwise, it contains
the homology dimensions (as non-negative integers) at which filtering
should occur.
epsilon : float, optional, default: ``0.01``
The cutoff value controlling the amount of filtering.
Attributes
----------
homology_dimensions_ : tuple
If `homology_dimensions` is set to ``None``, contains the homology
dimensions seen in :meth:`fit`, sorted in ascending order. Otherwise,
it is a similarly sorted version of `homology_dimensions`.
See also
--------
PairwiseDistance, ForgetDimension, Scaler, Amplitude
"""
_hyperparameters = {
'homology_dimensions': {
'type': (list, tuple, type(None)),
'of': {'type': int, 'in': Interval(0, np.inf, closed='left')}
},
'epsilon': {'type': Real, 'in': Interval(0, np.inf, closed='left')}
}
[docs] def __init__(self, homology_dimensions=None, epsilon=0.01):
self.homology_dimensions = homology_dimensions
self.epsilon = epsilon
[docs] def fit(self, X, y=None):
"""Store relevant homology dimensions in
:attr:`homology_dimensions_`. Then, return the estimator.
This method is here to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : ndarray of shape (n_samples, n_features, 3)
Input data. Array of persistence diagrams, each a collection of
triples [b, d, q] representing persistent topological features
through their birth (b), death (d) and homology dimension (q).
It is important that, for each possible homology dimension, the
number of triples for which q equals that homology dimension is
constants across the entries of `X`.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
X = check_diagrams(X)
validate_params(
self.get_params(), self._hyperparameters)
if self.homology_dimensions is None:
# Find the unique homology dimensions in the 3D array X passed to
# `fit` assuming that they can all be found in its zero-th entry
homology_dimensions = np.unique(X[0, :, 2])
else:
homology_dimensions = self.homology_dimensions
self.homology_dimensions_ = \
_homology_dimensions_to_sorted_ints(homology_dimensions)
return self
[docs] def plot(self, Xt, sample=0, homology_dimensions=None, plotly_params=None):
"""Plot a sample from a collection of persistence diagrams, with
homology in multiple dimensions.
Parameters
----------
Xt : ndarray of shape (n_samples, n_points, 3)
Collection of persistence diagrams, such as returned by
:meth:`transform`.
sample : int, optional, default: ``0``
Index of the sample in `Xt` to be plotted.
homology_dimensions : list, tuple or None, optional, default: ``None``
Which homology dimensions to include in the plot. ``None`` is
equivalent to passing :attr:`homology_dimensions_`.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"traces"`` and ``"layout"``, and the corresponding values should
be dictionaries containing keyword arguments as would be fed to the
:meth:`update_traces` and :meth:`update_layout` methods of
:class:`plotly.graph_objects.Figure`.
Returns
-------
fig : :class:`plotly.graph_objects.Figure` object
Plotly figure.
"""
if homology_dimensions is None:
_homology_dimensions = self.homology_dimensions_
else:
_homology_dimensions = homology_dimensions
return plot_diagram(
Xt[sample], homology_dimensions=_homology_dimensions,
plotly_params=plotly_params
)