"""Filter functions commonly used with Mapper."""
# License: GNU AGPLv3
import warnings
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.stats import entropy
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted
from ..utils._docs import adapt_fit_transform_docs
[docs]@adapt_fit_transform_docs
class Eccentricity(BaseEstimator, TransformerMixin):
"""Eccentricities of points in a point cloud or abstract metric space.
Let `D` be a square matrix representing distances between points in a
point cloud, or directly defining an abstract metric (or metric-like)
space. The eccentricity of point `i` in the point cloud or abstract
metric space is the `p`-norm (for some `p`) of row `i` in `D`.
Parameters
----------
exponent : int or float, optional, default: ``2``
`p`-norm exponent used to calculate eccentricities from the distance
matrix.
metric : str or function, optional, default: ``'euclidean'``
Metric to use to compute the distance matrix if point cloud data is
passed as input, or ``'precomputed'`` to specify that the input is
already a distance matrix. If not ``'precomputed'``, it may be
anything allowed by :func:`scipy.spatial.distance.pdist`.
metric_params : dict, optional, default: ``{}``
Additional keyword arguments for the metric function.
"""
[docs] def __init__(self, exponent=2, metric='euclidean', metric_params={}):
self.exponent = exponent
self.metric = metric
self.metric_params = metric_params
[docs] def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged.
This method exists to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : array-like of shape (n_samples, n_features) or (n_samples, \
n_samples)
Input data.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
# TODO: Consider making this transformer stateful so that the
# eccentricities of new points relative to the data seen in fit
# may be computed. May be useful for supervised tasks with Mapper?
# Evaluate performance impact of doing this.
check_array(X)
self._is_fitted = True
return self
[docs]@adapt_fit_transform_docs
class Entropy(BaseEstimator, TransformerMixin):
"""Entropy of rows in a two-dimensional array.
The rows of the array are interpreted as probability vectors, after taking
absolute values if necessary and normalizing. Then, their (base 2) Shannon
entropies are computed and returned.
"""
[docs] def __init__(self):
pass
[docs] def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged.
This method exists to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
check_array(X)
self._is_fitted = True
return self
[docs]@adapt_fit_transform_docs
class Projection(BaseEstimator, TransformerMixin):
"""Projection onto specified columns.
In practice, this simply means returning a selection of columns of the
data.
Parameters
----------
columns : int or list of int, optional, default: ``0``
The column indices of the array to project onto.
"""
[docs] def __init__(self, columns=0):
self.columns = columns
[docs] def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged.
This method exists to implement the usual scikit-learn API and hence
work in pipelines.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data.
y : None
There is no need for a target in a transformer, yet the pipeline
API requires this parameter.
Returns
-------
self : object
"""
check_array(X)
self._is_fitted = True
return self