"""Static and interactive visualisation functions for Mapper graphs."""
# License: GNU AGPLv3
import logging
import traceback
import numpy as np
import plotly.graph_objects as go
from ipywidgets import widgets, Layout, HTML
from sklearn.base import clone
from .utils._logging import OutputWidgetHandler
from .utils._visualization import (
_validate_color_kwargs,
_calculate_graph_data,
_produce_static_figure,
_get_column_color_buttons,
_get_colors_for_vals,
)
[docs]def plot_static_mapper_graph(
pipeline, data, color_data=None, color_features=None,
node_color_statistic=None, layout="kamada_kawai", layout_dim=2,
clone_pipeline=True, n_sig_figs=3, node_scale=12, plotly_params=None
):
"""Plot Mapper graphs without interactivity on pipeline parameters.
The output graph is a rendition of the :class:`igraph.Graph` object
computed by calling the :meth:`fit_transform` method of the
:class:`~gtda.mapper.pipeline.MapperPipeline` instance `pipeline` on the
input `data`. The graph's nodes correspond to subsets of elements (rows) in
`data`; these subsets are clusters in larger portions of `data` called
"pullback (cover) sets", which are computed by means of the `pipeline`'s
"filter function" and "cover" and correspond to the differently-colored
portions in `this diagram <../../../../_images/mapper_pipeline.svg>`_.
Two clusters from different pullback cover sets can overlap; if they do, an
edge between the corresponding nodes in the graph may be drawn.
Nodes are colored according to `color_features` and `node_color_statistic`
and are sized according to the number of elements they represent. The
hovertext on each node displays, in this order:
- a globally unique ID for the node, which can be used to retrieve
node information from the :class:`igraph.Graph` object, see
:class:`~gtda.mapper.nerve.Nerve`;
- the label of the pullback (cover) set which the node's elements
form a cluster in;
- a label identifying the node as a cluster within that pullback set;
- the number of elements of `data` associated with the node;
- the value of the summary statistic which determines the node's color.
Parameters
----------
pipeline : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Mapper pipeline to act onto data.
data : array-like of shape (n_samples, n_features)
Data used to generate the Mapper graph. Can be a pandas dataframe.
color_data : array-like of length n_samples, or None, optional, \
default: ``None``
Data to be used to construct node colors in the Mapper graph (according
to `color_features` and `node_color_statistic`). Must have the same
length as `data`. ``None`` is the same as passing
``numpy.arange(len(data))``.
color_features : object or None, optional, default: ``None``
Specifies one or more feature of interest from `color_data` to be used,
together with `node_color_statistic`, to determine node colors. Ignored
if `node_color_statistic` is a numpy array.
1. ``None`` is equivalent to passing `color_data`.
2. If an object implementing :meth:`transform` or
:meth:`fit_transform`, or a callable, it is applied to
`color_data` to generate the features of interest.
3. If an index or string, or list of indices/strings, it is
equivalent to selecting a column or subset of columns from
`color_data`.
node_color_statistic : None, callable, or ndarray of shape (n_nodes,) or \
(n_nodes, 1), optional, default: ``None``
If a callable, node colors will be computed as summary statistics from
the feature array ``y`` determined by `color_data` and
`color_features`. Let ``y`` have ``n`` columns (note: 1d feature arrays
are converted to column vectors). Then, for a node representing a list
``I`` of row indices, there will be ``n`` colors, each computed as
``node_color_statistic(y[I, i])`` for ``i`` between ``0`` and ``n``.
``None`` is equivalent to passing :func:`numpy.mean`. If a numpy array,
it must have the same length as the number of nodes in the Mapper graph
and its values are used directly as node colors (`color_features` is
ignored).
layout : None, str or callable, optional, default: ``"kamada-kawai"``
Layout algorithm for the graph. Can be any accepted value for the
``layout`` parameter in the :meth:`layout` method of
:class:`igraph.Graph` [1]_.
layout_dim : int, default: ``2``
The number of dimensions for the layout. Can be 2 or 3.
clone_pipeline : bool, optional, default: ``True``
If ``True``, the input `pipeline` is cloned before computing the
Mapper graph to prevent unexpected side effects from in-place
parameter updates.
n_sig_figs : int or None, optional, default: ``3``
If not ``None``, number of significant figures to which to round node
summary statistics. If ``None``, no rounding is performed.
node_scale : int or float, optional, default: ``12``
Sets the scale factor used to determine the rendered size of the
nodes. Increase for larger nodes. Implements a formula in the
`Plotly documentation \
<https://plotly.com/python/bubble-charts/#scaling-the-size-of-bubble\
-charts>`_.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"node_trace"``, ``"edge_trace"`` and ``"layout"``, and the
corresponding values should be dictionaries containing keyword
arguments as would be fed to the :meth:`update_traces` and
:meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`.
Returns
-------
fig : :class:`plotly.graph_objects.FigureWidget` object
Figure representing the Mapper graph with appropriate node colouring
and size.
Examples
--------
Setting a colorscale different from the default one:
>>> import numpy as np
>>> np.random.seed(1)
>>> from gtda.mapper import make_mapper_pipeline, plot_static_mapper_graph
>>> pipeline = make_mapper_pipeline()
>>> data = np.random.random((100, 3))
>>> plotly_params = {"node_trace": {"marker_colorscale": "Blues"}}
>>> fig = plot_static_mapper_graph(pipeline, data,
... plotly_params=plotly_params)
Inspect the composition of a node with "Node ID" displayed as 0 in the
hovertext:
>>> graph = pipeline.fit_transform(data)
>>> graph.vs[0]["node_elements"]
array([70])
Write the figure to a file using Plotly:
>>> fname = "current_figure"
>>> fig.write_html(fname + ".html")
>>> fig.write_image(fname + ".svg") # Requires psutil
See also
--------
MapperInteractivePlotter, plot_interactive_mapper_graph, \
gtda.mapper.make_mapper_pipeline
References
----------
.. [1] `igraph.Graph.layout
<https://igraph.org/python/doc/igraph.Graph-class.html#layout>`_
documentation.
"""
# Compute the graph and fetch the indices of points in each node
_pipeline = clone(pipeline) if clone_pipeline else pipeline
graph = _pipeline.fit_transform(data)
(color_data_transformed, column_names_dropdown,
node_color_statistic) = \
_validate_color_kwargs(graph, data, color_data, color_features,
node_color_statistic, interactive=False)
edge_trace, node_trace, node_colors_color_features = \
_calculate_graph_data(
graph, color_data_transformed, node_color_statistic, layout,
layout_dim, n_sig_figs, node_scale
)
figure = _produce_static_figure(
edge_trace, node_trace, node_colors_color_features,
column_names_dropdown, layout_dim, n_sig_figs, plotly_params
)
return figure
[docs]def plot_interactive_mapper_graph(
pipeline, data, color_data=None, color_features=None,
node_color_statistic=None, layout="kamada_kawai", layout_dim=2,
clone_pipeline=True, n_sig_figs=3, node_scale=12, plotly_params=None
):
"""*As of version 0.5.0, we recommend using the object-oriented interface
provided by :class:`MapperInteractivePlotter` instead of this function.*
Plot Mapper graphs in a Jupyter session, with interactivity on pipeline
parameters.
Extends :func:`~gtda.mapper.visualization.plot_static_mapper_graph` by
providing functionality to interactively update parameters from the cover,
clustering and graph construction steps defined in `pipeline`.
Parameters
----------
pipeline : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Mapper pipeline to act on to data.
data : array-like of shape (n_samples, n_features)
Data used to generate the Mapper graph. Can be a pandas dataframe.
color_data : array-like of length n_samples, or None, optional, \
default: ``None``
Data to be used to construct node colors in the Mapper graph (according
to `color_features` and `node_color_statistic`). Must have the same
length as `data`. ``None`` is the same as passing
``numpy.arange(len(data))``.
color_features : object or None, optional, default: ``None``
Specifies one or more feature of interest from `color_data` to be used,
together with `node_color_statistic`, to determine node colors.
1. ``None`` is equivalent to passing `color_data`.
2. If an object implementing :meth:`transform` or
:meth:`fit_transform`, or a callable, it is applied to
`color_data` to generate the features of interest.
3. If an index or string, or list of indices/strings, it is
equivalent to selecting a column or subset of columns from
`color_data`.
node_color_statistic : None or callable, optional, default: ``None``
If a callable, node colors will be computed as summary statistics from
the feature array ``y`` determined by `color_data` and
`color_features`. Let ``y`` have ``n`` columns (note: 1d feature arrays
are converted to column vectors). Then, for a node representing a list
``I`` of row indices, there will be ``n`` colors, each computed as
``node_color_statistic(y[I, i])`` for ``i`` between ``0`` and ``n``.
``None`` is equivalent to passing :func:`numpy.mean`.
layout : None, str or callable, optional, default: ``"kamada-kawai"``
Layout algorithm for the graph. Can be any accepted value for the
``layout`` parameter in the :meth:`layout` method of
:class:`igraph.Graph` [1]_.
layout_dim : int, default: ``2``
The number of dimensions for the layout. Can be 2 or 3.
clone_pipeline : bool, optional, default: ``True``
If ``True``, the input `pipeline` is cloned before computing the
Mapper graph to prevent unexpected side effects from in-place
parameter updates.
n_sig_figs : int or None, optional, default: ``3``
If not ``None``, number of significant figures to which to round node
summary statistics. If ``None``, no rounding is performed.
node_scale : int or float, optional, default: ``12``
Sets the scale factor used to determine the rendered size of the
nodes. Increase for larger nodes. Implements a formula in the
`Plotly documentation \
<plotly.com/python/bubble-charts/#scaling-the-size-of-bubble-charts>`_.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"node_trace"``, ``"edge_trace"`` and ``"layout"``, and the
corresponding values should be dictionaries containing keyword
arguments as would be fed to the :meth:`update_traces` and
:meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`.
Returns
-------
box : :class:`ipywidgets.VBox` object
A box containing the following widgets: parameters of the clustering
algorithm, parameters for the covering scheme, a Mapper graph arising
from those parameters, a validation box, and logs.
See also
--------
MapperInteractivePlotter, plot_static_mapper_graph, \
gtda.mapper.pipeline.make_mapper_pipeline
References
----------
.. [1] `igraph.Graph.layout
<https://igraph.org/python/doc/igraph.Graph-class.html#layout>`_
documentation.
"""
plotter = MapperInteractivePlotter(pipeline, data, clone_pipeline)
return plotter.plot(
color_data=color_data, color_features=color_features,
node_color_statistic=node_color_statistic, layout=layout,
layout_dim=layout_dim, n_sig_figs=n_sig_figs, node_scale=node_scale,
plotly_params=plotly_params
)
[docs]class MapperInteractivePlotter:
"""Plot Mapper graphs in a Jupyter session, with interactivity on pipeline
parameters.
Provides functionality to interactively update parameters from the cover,
clustering and graph construction steps defined in `pipeline`.
An interactive widget is produced when calling :meth:`plot`. After
interacting with the widget, the current state of all outputs which may
have been altered can be retrieved via one of the attributes listed below.
Parameters
----------
pipeline : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Mapper pipeline to act on to data.
data : array-like of shape (n_samples, n_features)
Data used to generate the Mapper graph. Can be a pandas dataframe.
clone_pipeline : bool, optional, default: ``True``
If ``True``, the input `pipeline` is cloned before computing the
Mapper graph to prevent unexpected side effects from in-place
parameter updates.
Attributes
----------
graph_ : :class:`igraph.Graph` object
Current state of the graph displayed by the widget.
pipeline_ : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Current state of the Mapper pipeline.
color_features_ : array-like of shape (n_samples, n_features)
Values of the features of interest for each entry in `data`, as
produced according to `color_data` and `color_features` when calling
:meth:`plot`. Not changed by interacting with the widget.
node_summaries_ : array-like of shape (n_nodes, n_features)
Current values of the summaries computed for each node and used as
node colours in the figure. Produced according to
`node_color_statistic`, see :meth:`plot`.
figure_ : :class:`plotly.graph_objects.FigureWidget` object
Current figure representing the Mapper graph with appropriate node
colouring and size.
Examples
--------
Instantiate the plotter object on a pipeline and data configuration, and
call :meth:`plot` to display the widget in a Jupyter session:
>>> import numpy as np
>>> np.random.seed(1)
>>> from gtda.mapper import make_mapper_pipeline, MapperInteractivePlotter
>>> pipeline = make_mapper_pipeline()
>>> data = np.random.random((100, 3))
>>> plotter = MapperInteractivePlotter(pipeline, data)
>>> plotter.plot()
After interacting with the widget, inspect the composition of a node with
"Node ID" displayed as 0 in the hovertext:
>>> plotter.graph_.vs[0]["node_elements"]
array([70])
Write the current figure to a file using Plotly:
>>> fname = "current_figure"
>>> plotter.fig_.write_html(fname + ".html")
>>> plotter.fig_.write_image(fname + ".svg") # Requires psutil
See also
--------
plot_interactive_mapper_graph, plot_static_mapper_graph, \
gtda.mapper.pipeline.make_mapper_pipeline
References
----------
.. [1] `igraph.Graph.layout
<https://igraph.org/python/doc/igraph.Graph-class.html#layout>`_
documentation.
"""
def __init__(self, pipeline, data, clone_pipeline=True):
self.pipeline = pipeline
self.data = data
self.clone_pipeline = clone_pipeline
def plot(self, color_data=None, color_features=None,
node_color_statistic=None, layout="kamada_kawai", layout_dim=2,
n_sig_figs=3, node_scale=12, plotly_params=None):
""" Produce the interactive Mapper widget.
Parameters
----------
color_data : array-like of length n_samples, or None, optional, \
default: ``None``
Data to be used to construct node colors in the Mapper graph
(according to `color_features` and `node_color_statistic`). Must
have the same length as `data`. ``None`` is the same as passing
``numpy.arange(len(data))``.
color_features : object or None, optional, default: ``None``
Specifies one or more feature of interest from `color_data` to be
used, together with `node_color_statistic`, to determine node
colors.
1. ``None`` is equivalent to passing `color_data`.
2. If an object implementing :meth:`transform` or
:meth:`fit_transform`, or a callable, it is applied to
`color_data` to generate the features of interest.
3. If an index or string, or list of indices/strings, it is
equivalent to selecting a column or subset of columns from
`color_data`.
node_color_statistic : None or callable, optional, default: ``None``
If a callable, node colors will be computed as summary statistics
from the feature array ``y`` determined by `color_data` and
`color_features`. Let ``y`` have ``n`` columns (note: 1d feature
arrays are converted to column vectors). Then, for a node
representing a list ``I`` of row indices, there will be ``n``
colors, each computed as ``node_color_statistic(y[I, i])`` for
``i`` between ``0`` and ``n``.
layout : None, str or callable, optional, default: ``"kamada-kawai"``
Layout algorithm for the graph. Can be any accepted value for the
``layout`` parameter in the :meth:`layout` method of
:class:`igraph.Graph` [1]_.
layout_dim : int, default: ``2``
The number of dimensions for the layout. Can be 2 or 3.
n_sig_figs : int or None, optional, default: ``3``
If not ``None``, number of significant figures to which to round
node summary statistics. If ``None``, no rounding is performed.
node_scale : int or float, optional, default: ``12``
Sets the scale factor used to determine the rendered size of the
nodes. Increase for larger nodes. Implements a formula in the
`Plotly documentation \
<plotly.com/python/bubble-charts/#scaling-the-size-of-bubble-charts>`_.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"node_trace"``, ``"edge_trace"`` and ``"layout"``, and the
corresponding values should be dictionaries containing keyword
arguments as would be fed to the :meth:`update_traces` and
:meth:`update_layout` methods of
:class:`plotly.graph_objects.Figure`.
Returns
-------
box : :class:`ipywidgets.VBox` object
A box containing the following widgets: parameters of the
clustering algorithm, parameters for the covering scheme, a Mapper
graph arising from those parameters, a validation box, and logs.
"""
# Clone pipeline to avoid side effects from in-place parameter changes
if self.clone_pipeline:
self._pipeline = clone(self.pipeline)
else:
self._pipeline = self.pipeline
def get_widgets_per_param(params):
for key, value in params.items():
style = {'description_width': 'initial'}
description = key.split("__")[1] if "__" in key else key
if isinstance(value, float):
yield (key, widgets.FloatText(
value=value,
step=0.05,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, bool):
yield (key, widgets.ToggleButton(
value=value,
description=description,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, int):
yield (key, widgets.IntText(
value=value,
step=1,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, str):
yield (key, widgets.Text(
value=value,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
def on_parameter_change(change):
handler.clear_logs()
try:
for param, value in cover_params.items():
if isinstance(value, (int, float, str)):
self._pipeline.set_params(
**{param: cover_params_widgets[param].value}
)
for param, value in cluster_params.items():
if isinstance(value, (int, float, str)):
self._pipeline.set_params(
**{param: cluster_params_widgets[param].value}
)
for param, value in nerve_params.items():
if isinstance(value, (int, bool)):
self._pipeline.set_params(
**{param: nerve_params_widgets[param].value}
)
logger.info("Updating figure...")
with self._figure.batch_update():
self._graph = self._pipeline.fit_transform(self.data)
(edge_trace, node_trace,
self._node_colors_color_features) = \
_calculate_graph_data(
self._graph, self._color_data_transformed,
node_color_statistic, layout, layout_dim,
n_sig_figs, node_scale
)
if colorscale_for_hoverlabel is not None:
min_col, max_col = \
np.min(self._node_colors_color_features[:, 0]), \
np.max(self._node_colors_color_features[:, 0])
hoverlabel_bgcolor = _get_colors_for_vals(
self._node_colors_color_features[:, 0],
min_col, max_col, colorscale_for_hoverlabel
)
self._figure.update_traces(
hoverlabel_bgcolor=hoverlabel_bgcolor,
selector={"name": "node_trace"}
)
self._figure.update_traces(
x=node_trace.x,
y=node_trace.y,
marker_color=node_trace.marker.color,
marker_size=node_trace.marker.size,
marker_sizeref=node_trace.marker.sizeref,
hovertext=node_trace.hovertext,
**({"z": node_trace.z} if layout_dim == 3 else dict()),
selector={"name": "node_trace"}
)
self._figure.update_traces(
x=edge_trace.x,
y=edge_trace.y,
**({"z": edge_trace.z} if layout_dim == 3 else dict()),
selector={"name": "edge_trace"}
)
# Update color by column buttons if relevant
if self._node_colors_color_features.shape[1] > 1:
hovertext_color_features = node_trace.hovertext
column_color_buttons = _get_column_color_buttons(
self._node_colors_color_features,
hovertext_color_features,
colorscale_for_hoverlabel, n_sig_figs,
column_names_dropdown
)
button_height = 1.1
self._figure.update_layout(
updatemenus=[
go.layout.Updatemenu(
buttons=column_color_buttons,
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.11,
xanchor="left",
y=button_height,
yanchor="top"
)
])
valid.value = True
except Exception:
exception_data = traceback.format_exc().splitlines()
logger.exception(exception_data[-1])
valid.value = False
def observe_widgets(params, widgets):
for param, value in params.items():
if isinstance(value, (int, float, str)):
widgets[param].observe(on_parameter_change, names="value")
# Define output widget to capture logs
out = widgets.Output()
@out.capture()
def click_box(change):
if logs_box.value:
out.clear_output()
handler.show_logs()
else:
out.clear_output()
# Initialise logging
logger = logging.getLogger(__name__)
handler = OutputWidgetHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - [%(levelname)s] %(message)s"))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# Initialise cover, cluster and nerve dictionaries of parameters and
# widgets
mapper_params_items = self._pipeline.get_mapper_params().items()
cover_params = {key: value for key, value in mapper_params_items
if key.startswith("cover__")}
cover_params_widgets = dict(get_widgets_per_param(cover_params))
cluster_params = {key: value for key, value in mapper_params_items
if key.startswith("clusterer__")}
cluster_params_widgets = dict(get_widgets_per_param(cluster_params))
nerve_params = {key: value for key, value in mapper_params_items
if key in ["min_intersection", "contract_nodes"]}
nerve_params_widgets = dict(get_widgets_per_param(nerve_params))
# Initialise widgets for validating input parameters of pipeline
valid = widgets.Valid(
value=True,
description="Valid parameters",
style={"description_width": "100px"},
)
# Initialise widget for showing the logs
logs_box = widgets.Checkbox(
description="Show logs: ",
value=False,
indent=False
)
# Initialise figure with initial pipeline and config
self._graph = self._pipeline.fit_transform(self.data)
(self._color_data_transformed, column_names_dropdown,
node_color_statistic) = \
_validate_color_kwargs(self._graph, self.data, color_data,
color_features, node_color_statistic,
interactive=True)
edge_trace, node_trace, self._node_colors_color_features = \
_calculate_graph_data(
self._graph, self._color_data_transformed,
node_color_statistic, layout, layout_dim, n_sig_figs,
node_scale
)
self._figure = _produce_static_figure(
edge_trace, node_trace, self._node_colors_color_features,
column_names_dropdown, layout_dim, n_sig_figs, plotly_params
)
colorscale_for_hoverlabel = None
if layout_dim == 3:
# In plot_static_mapper_graph, hoverlabel bgcolors are set to white
# if something goes wrong in computing them according to the
# colorscale.
is_bgcolor_not_white = \
self._figure.data[1].hoverlabel.bgcolor != "white"
user_hoverlabel_bgcolor = False
if plotly_params:
if "node_trace" in plotly_params:
if "hoverlabel_bgcolor" in plotly_params["node_trace"]:
user_hoverlabel_bgcolor = True
if is_bgcolor_not_white and not user_hoverlabel_bgcolor:
colorscale_for_hoverlabel = \
self._figure.data[1].marker.colorscale
observe_widgets(cover_params, cover_params_widgets)
observe_widgets(cluster_params, cluster_params_widgets)
observe_widgets(nerve_params, nerve_params_widgets)
logs_box.observe(click_box, names="value")
# Define containers for input widgets
cover_title = HTML(value="<b>Cover parameters</b>")
container_cover = widgets.VBox(
children=[cover_title] + list(cover_params_widgets.values())
)
container_cover.layout.align_items = 'center'
cluster_title = HTML(value="<b>Clusterer parameters</b>")
container_cluster = widgets.VBox(
children=[cluster_title] + list(cluster_params_widgets.values()),
)
container_cluster.layout.align_items = 'center'
nerve_title = HTML(value="<b>Nerve parameters</b>")
container_nerve = widgets.VBox(
children=[nerve_title] + list(nerve_params_widgets.values()),
)
container_nerve.layout.align_items = 'center'
container_parameters = widgets.HBox(
children=[container_cover, container_cluster, container_nerve]
)
box = widgets.VBox([container_parameters, self._figure, valid,
logs_box, out])
return box
@property
def graph_(self):
return self._graph
@property
def pipeline_(self):
return self._pipeline
@property
def color_features_(self):
return self._color_data_transformed
@property
def node_summaries_(self):
return self._node_colors_color_features
@property
def figure_(self):
return self._figure