Source code for gtda.images.filtrations

"""Filtrations of 2D/3D binary images."""
# License: GNU AGPLv3

from numbers import Real, Integral
from types import FunctionType
import itertools

import numpy as np
from joblib import Parallel, delayed, effective_n_jobs
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import pairwise_distances
from sklearn.utils import gen_even_slices
from sklearn.utils.validation import check_array, check_is_fitted

from ._utils import _dilate, _erode
from .preprocessing import Padder
from ..base import PlotterMixin
from ..plotting import plot_heatmap
from ..utils._docs import adapt_fit_transform_docs
from ..utils.intervals import Interval
from ..utils.validation import validate_params


[docs]@adapt_fit_transform_docs class HeightFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on distances to lines/planes. The height filtration assigns to each activated pixel of a binary image a greyscale value equal to the distance between the pixel and the hyperplane defined by a direction vector and the first seen edge of the image following that direction. Deactivated pixels are assigned the value of the maximum distance between any pixel of the image and the hyperplane, plus one. Parameters ---------- direction : ndarray of shape (n_dimensions,) or None, optional, default: \ ``None`` Direction vector of the height filtration in ``n_dimensions``-dimensional space, where ``n_dimensions`` is the dimension of the images of the collection (2 or 3). ``None`` is equivalent to passing ``numpy.ones(n_dimensions)``. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. direction_ : ndarray of shape (:attr:`n_dimensions_`,) Effective direction of the height filtration. Set in :meth:`fit`. mesh_ : ndarray of shape ( n_pixels_x, n_pixels_y [, n_pixels_z]) greyscale image corresponding to the height filtration of a binary image where each pixel is activated. Set in :meth:`fit`. max_value_ : float Maximum pixel value among all pixels in all images of the collection. Set in :meth:`fit`. See also -------- RadialFiltration, DilationFiltration, ErosionFiltration, \ SignedDistanceFiltration, DensityFiltration, \ gtda.homology.CubicalPersistence References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'direction': {'type': (np.ndarray, type(None)), 'of': {'type': Real}} }
[docs] def __init__(self, direction=None, n_jobs=None): self.direction = direction self.n_jobs = n_jobs
def _calculate_height(self, X): Xh = np.full(X.shape, self.max_value_) for i in range(len(Xh)): Xh[i][np.where(X[i])] = np.dot(self.mesh_[np.where(X[i])], self.direction_).reshape((-1,)) return Xh
[docs] def fit(self, X, y=None): """Calculate :attr:`n_dimensions_`, :attr:`direction_`, :attr:`mesh_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.direction is None: self.direction_ = np.ones(self.n_dimensions_,) else: self.direction_ = np.copy(self.direction) self.direction_ = self.direction_ / np.linalg.norm(self.direction_) axis_order = [2, 1, 3] mesh_range_list = \ [np.arange(X.shape[order]) if self.direction_[i] >= 0 else -np.flip(np.arange(X.shape[order])) for i, order in enumerate(axis_order[: self.n_dimensions_])] self.mesh_ = np.stack(np.meshgrid(*mesh_range_list, indexing='xy'), axis=self.n_dimensions_) self.max_value_ = 0. self.max_value_ = np.max(self._calculate_height( np.ones((1, *X.shape[1:])))) + 1 return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to the hyperplane defined by the `direction` vector and the first seen edge of the images following that `direction`. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_height)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Height filtration of image {sample}", plotly_params=plotly_params )
[docs]@adapt_fit_transform_docs class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on distances to a reference pixel. The radial filtration assigns to each pixel of a binary image a greyscale value computed as follows in terms of a reference pixel, called the "center", and of a "radius": if the binary pixel is active and lies within a ball defined by this center and this radius, then the assigned value equals this distance. In all other cases, the assigned value equals the maximum distance between any pixel of the image and the center pixel, plus one. Parameters ---------- center : ndarray of shape (:attr:`n_dimensions_`,) or None, optional,\ default: ``None`` Coordinates of the center pixel, where ``n_dimensions`` is the dimension of the images of the collection (2 or 3). ``None`` is equivalent to passing ``np.zeros(n_dimensions,)```. radius : float or None, default: ``None`` The radius of the ball centered in `center` inside which activated pixels are included in the filtration. metric : string or callable, optional, default: ``'euclidean'`` If set to ``'precomputed'``, each entry in `X` along axis 0 is interpreted to be a distance matrix. Otherwise, entries are interpreted as feature arrays, and `metric` determines a rule with which to calculate distances between pairs of instances (i.e. rows) in these arrays. If `metric` is a string, it must be one of the options allowed by :func:`scipy.spatial.distance.pdist` for its metric parameter, or a metric listed in :obj:`sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`, including "euclidean", "manhattan" or "cosine". If `metric` is a callable function, it is called on each pair of instances and the resulting value recorded. The callable should take two arrays from the entry in `X` as input, and return a value indicating the distance between them. metric_params : dict or None, optional, default: ``{}`` Additional keyword arguments for the metric function. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. center_ : ndarray of shape (:attr:`n_dimensions_`,) Effective center of the radial filtration. Set in :meth:`fit`. mesh_ : ndarray of shape ( n_pixels_x, n_pixels_y [, n_pixels_z]) greyscale image corresponding to the radial filtration of a binary image where each pixel is activated. Set in :meth:`fit`. max_value_ : float Maximum pixel value among all pixels in all images of the collection. Set in :meth:`fit`. See also -------- HeightFiltration, DilationFiltration, ErosionFiltration, \ SignedDistanceFiltration, DensityFiltration, \ gtda.homology.CubicalPersistence References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'center': {'type': (np.ndarray, type(None)), 'of': {'type': Integral}}, 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, 'metric_params': {'type': dict} }
[docs] def __init__(self, center=None, radius=np.inf, metric='euclidean', metric_params={}, n_jobs=None): self.center = center self.radius = radius self.metric = metric self.metric_params = metric_params self.n_jobs = n_jobs
def _calculate_radial(self, X): Xr = np.nan_to_num(self.mesh_ * X, nan=np.inf, posinf=np.inf) Xr = np.nan_to_num(Xr, posinf=-1) Xr[X == 0] = self.max_value_ Xr[Xr == -1] = self.max_value_ return Xr
[docs] def fit(self, X, y=None): """Calculate :attr:`center_`, :attr:`n_dimensions_`, :attr:`mesh_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.center is None: self.center_ = np.zeros(self.n_dimensions_) else: self.center_ = np.copy(self.center) self.center_ = self.center_.reshape((1, -1)) axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) for i in axis_order[:self.n_dimensions_]] self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), axis=self.n_dimensions_).reshape((-1, self.n_dimensions_)) self.mesh_ = pairwise_distances( self.center_, self.mesh_, metric=self.metric, n_jobs=1, **self.metric_params).reshape(X.shape[1:]) self.mesh_[self.mesh_ > self.radius] = np.inf self.max_value_ = 0. self.max_value_ = \ np.max(self._calculate_radial(np.ones((1, *X.shape[1:])))) + 1 return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to the center. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_radial)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Radial filtration of image {sample}", plotly_params=plotly_params )
[docs]@adapt_fit_transform_docs class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on the dilation of activated regions. Binary dilation is a morphological operator commonly used in image processing and relies on the `scipy.ndimage \ <https://docs.scipy.org/doc/scipy/reference/ndimage.html>`_ module. This filtration assigns to each pixel in an image a greyscale value calculated as follows. If the minimum Manhattan distance between the pixel and any activated pixel in the image is less than or equal to the parameter `n_iterations`, the assigned value is this distance – in particular, activated pixels are assigned a value of 0. Otherwise, the assigned greyscale value is the sum of the lengths along all axes of the image – equivalently, it is the maximum Manhattan distance between any two pixels in the image. The name of this filtration comes from the fact that these values can be computed by iteratively dilating activated regions, thickening them by a total amount `n_iterations`. Parameters ---------- n_iterations : int or None, optional, default: ``None`` Number of iterations in the dilation process. ``None`` means dilation reaches all deactivated pixels. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. n_iterations_ : int Effective number of iterations in the dilation process. Set in :meth:`fit`. max_value_ : float Maximum pixel value among all pixels in all images of the collection. Set in :meth:`fit`. See also -------- HeightFiltration, RadialFiltration, ErosionFiltration, \ SignedDistanceFiltration, DensityFiltration, \ gtda.homology.CubicalPersistence References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'n_iterations': {'type': (int, type(None)), 'in': Interval(1, np.inf, closed='left')} }
[docs] def __init__(self, n_iterations=None, n_jobs=None): self.n_iterations = n_iterations self.n_jobs = n_jobs
def _calculate_dilation(self, X): Xd = _dilate(X, 1, self.n_iterations_, 1, self.max_value_) mask_undilated = Xd == 0 Xd -= 1 Xd[mask_undilated] = self.max_value_ return Xd
[docs] def fit(self, X, y=None): """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) self.max_value_ = np.sum(X.shape[1:]) if self.n_iterations is None: self.n_iterations_ = int(self.max_value_) else: self.n_iterations_ = self.n_iterations return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to their closest activated neighboring pixel. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_dilation)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Dilation filtration of image {sample}", plotly_params=plotly_params )
[docs]@adapt_fit_transform_docs class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on the erosion of activated regions. Binary erosion is a morphological operator commonly used in image processing and relies on the `scipy.ndimage \ <https://docs.scipy.org/doc/scipy/reference/ndimage.html>`_ module. This filtration assigns to each pixel in an image a greyscale value calculated as follows. If the minimum Manhattan distance between the pixel and any deactivated pixel in the image is less than or equal to the parameter `n_iterations`, the assigned value is this distance – in particular, deactivated pixels are assigned a value of 0. Otherwise, the assigned greyscale value is the sum of the lengths along all axes of the image – equivalently, it is the maximum Manhattan distance between any two pixels in the image. The name of this filtration comes from the fact that these values can be computed by iteratively eroding activated regions, shrinking them by a total amount `n_iterations`. Parameters ---------- n_iterations : int or None, optional, default: ``None`` Number of iterations in the erosion process. ``None`` means erosion reaches all activated pixels. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. n_iterations_ : int Effective number of iterations in the erosion process. Set in :meth:`fit`. max_value_ : float Maximum pixel value among all pixels in all images of the collection. Set in :meth:`fit`. See also -------- HeightFiltration, RadialFiltration, DilationFiltration, \ SignedDistanceFiltration, DensityFiltration, \ gtda.homology.CubicalPersistence References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'n_iterations': {'type': (int, type(None)), 'in': Interval(1, np.inf, closed='left')} }
[docs] def __init__(self, n_iterations=None, n_jobs=None): self.n_iterations = n_iterations self.n_jobs = n_jobs
def _calculate_erosion(self, X): Xe = _erode(X, 1, self.n_iterations_, 1, self.max_value_) mask_uneroded = Xe == 0 Xe -= 1 Xe[mask_uneroded] = self.max_value_ return Xe
[docs] def fit(self, X, y=None): """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) self.max_value_ = np.sum(X.shape[1:]) if self.n_iterations is None: self.n_iterations_ = int(self.max_value_) else: self.n_iterations_ = self.n_iterations return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to their closest activated neighboring pixel. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_erosion)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Erosion filtration of image {sample}", plotly_params=plotly_params )
[docs]@adapt_fit_transform_docs class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on the dilation and the erosion of activated regions. This filtration assigns to each pixel in an image a greyscale value calculated as follows. For activated pixels, if the minimum Manhattan distance between the pixel and any deactivated pixel in the image is less than or equal to the parameter `n_iterations`, the assigned value is this distance minus 1. Otherwise, the assigned greyscale value is the sum of the lengths along all axes of the image – equivalently, it is the maximum Manhattan distance between any two pixels in the image, minus 1. For deactivated pixels, if the minimum Manhattan distance between the pixel and any activated pixel in the image is less than or equal to the parameter `n_iterations`, the assigned value is the opposite of this distance. Otherwise, the assigned greyscale value is the opposite of the maximum Manhattan distance between any two pixels in the image. The name of this filtration comes from the fact that it is a a negatively signed dilation plus a positively signed erosion, minus 1 on the activated pixels. Therefore, pixels the activated pixels at the boundary of the activated regions always have a pixel value of 0. Parameters ---------- n_iterations : int or None, optional, default: ``None`` Number of iterations in the dilation process. ``None`` means dilation over the full image. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. n_iterations_ : int Effective number of iterations in the dilation process. Set in :meth:`fit`. max_value_ : float Maximum pixel value among all pixels in all images of the collection. Set in :meth:`fit`. See also -------- HeightFiltration, RadialFiltration, DilationFiltration, \ ErosionFiltration, DensityFiltration, gtda.homology.CubicalPersistence References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'n_iterations': {'type': (int, type(None)), 'in': Interval(1, np.inf, closed='left')} }
[docs] def __init__(self, n_iterations=None, n_jobs=None): self.n_iterations = n_iterations self.n_jobs = n_jobs
def _calculate_signed_distance(self, X): mask = X == 1 Xd = -_dilate(X, 1, self.n_iterations_, 0, self.max_value_) Xe = _erode(X, 0, self.n_iterations_, 0, self.max_value_) mask_e = Xe == 0 mask_d = Xd == 0 Xe[np.logical_not(mask)] = 0 Xe[mask] -= 1 Xd[mask] = 0 Xd[mask_d] = -self.max_value_ Xe[mask_e] = self.max_value_ return (Xd + Xe)
[docs] def fit(self, X, y=None): """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) self.max_value_ = np.sum(X.shape[1:]) if self.n_iterations is None: self.n_iterations_ = int(self.max_value_) else: self.n_iterations_ = self.n_iterations return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the distance of its pixels to their closest activated neighboring pixel. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_signed_distance)(X[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Signed-distance filtration of image {sample}", plotly_params=plotly_params )
[docs]@adapt_fit_transform_docs class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): """Filtrations of 2D/3D binary images based on the number of activated neighboring pixels. The density filtration assigns to each pixel of a binary image a greyscale value equal to the number of activated pixels within a ball centered around it. Parameters ---------- radius : float, optional, default: ``1.`` The radius of the ball within which the number of activated pixels is considered. metric : string or callable, optional, default: ``'euclidean'`` Determines a rule with which to calculate distances between pairs of pixels. If ``metric`` is a string, it must be one of the options allowed by ``scipy.spatial.distance.pdist`` for its metric parameter, or a metric listed in ``sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS``, including "euclidean", "manhattan", or "cosine". If ``metric`` is a callable function, it is called on each pair of instances and the resulting value recorded. The callable should take two arrays from the entry in `X` as input, and return a value indicating the distance between them. metric_params : dict, optional, default: ``{}`` Additional keyword arguments for the metric function. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Attributes ---------- n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. mask_ : ndarray of shape (radius, radius [, radius]) The mask applied around each pixel to calculate the weighted number of its activated neighbors. Set in :meth:`fit`. See also -------- HeightFiltration, RadialFiltration, DilationFiltration, \ ErosionFiltration, SignedDistanceFiltration, \ gtda.homology.CubicalPersistence References ---------- [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ <https://arxiv.org/abs/1910.08345>`_. """ _hyperparameters = { 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, 'metric_params': {'type': dict}, }
[docs] def __init__(self, radius=3, metric='euclidean', metric_params={}, n_jobs=None): self.radius = radius self.metric = metric self.metric_params = metric_params self.n_jobs = n_jobs
def _calculate_density(self, X): Xd = np.zeros(X.shape) # The idea behind this is to sum up pixel values of the image # rolled according to the 3D mask for i, j, k in self._iterator: Xd += np.roll(np.roll( np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ * self.mask_[self._size + i, self._size + j, self._size + k] return Xd
[docs] def fit(self, X, y=None): """Calculate :attr:`n_dimensions_` and :attr:`mask_` from a collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) # Determine the size of the mask based on the radius and metric self._size = int(np.ceil( pairwise_distances([[0]], [[self.radius]], metric=self.metric, **self.metric_params) )) # The mask is always 3D but not the iterator. self.mask_ = np.ones(tuple(2 * self._size + 1 for _ in range(3)), dtype=bool) # Create an iterator for applying the mask to every pixel at once iterator_size_list = \ [range(-self._size, self._size + 1)] * self.n_dimensions_ + \ [[0] for _ in range(3 - self.n_dimensions_)] self._iterator = tuple(itertools.product(*iterator_size_list)) # We create a mesh so that we have an array with coordinates and we can # calculate the distance of each point to the center mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 self.mesh_ = np.stack( np.meshgrid(*mesh_size_list), axis=3).reshape((-1, 3)) # Calculate those distances to the center and use them to set the mask # values so that it corresponds to a ball center = self._size * np.ones((1, 3)) self.mask_ = pairwise_distances( center, self.mesh_, metric=self.metric, n_jobs=1, **self.metric_params).reshape(self.mask_.shape) self.mask_ = self.mask_ <= self.radius # Instantiate a padder to pad all images with 0 so that the rolling of # the mask also works at the boundary of the images padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) self._padder.fit(X.reshape((*X.shape[:3], -1))) return self
[docs] def transform(self, X, y=None): """For each binary image in the collection `X`, calculate a corresponding greyscale image based on the density of its pixels. Return the collection of greyscale images. Parameters ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. """ check_is_fitted(self) Xt = check_array(X, allow_nd=True, copy=True) # Reshape the images to 3D so that they can be rolled according to the # 3D mask Xt = Xt.reshape((*X.shape[:3], -1)) Xt = self._padder.transform(Xt) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_density)(Xt[s]) for s in gen_even_slices(Xt.shape[0], effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) Xt = Xt[:, self._size: -self._size, self._size: -self._size] if self.n_dimensions_ == 3: Xt = Xt[:, :, :, self._size: -self._size] Xt = Xt.reshape(X.shape) return Xt
[docs] @staticmethod def plot(Xt, sample=0, colorscale='greys', origin='upper', plotly_params=None): """Plot a sample from a collection of 2D greyscale images. Parameters ---------- Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) Collection of 2D greyscale images, such as returned by :meth:`transform`. sample : int, optional, default: ``0`` Index of the sample in `Xt` to be plotted. colorscale : str, optional, default: ``'greys'`` Color scale to be used in the heat map. Can be anything allowed by :class:`plotly.graph_objects.Heatmap`. origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` Position of the [0, 0] pixel of `data`, in the upper left or lower left corner. The convention ``'upper'`` is typically used for matrices and images. plotly_params : dict or None, optional, default: ``None`` Custom parameters to configure the plotly figure. Allowed keys are ``"trace"`` and ``"layout"``, and the corresponding values should be dictionaries containing keyword arguments as would be fed to the :meth:`update_traces` and :meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`. Returns ------- fig : :class:`plotly.graph_objects.Figure` object Plotly figure. """ return plot_heatmap( Xt[sample], colorscale=colorscale, origin=origin, title=f"Density filtration of image {sample}", plotly_params=plotly_params )