"""Static and interactive visualisation functions for Mapper graphs."""
# License: GNU AGPLv3
import logging
import traceback
from copy import deepcopy
from warnings import warn
import numpy as np
import plotly.graph_objects as go
from ipywidgets import widgets, Layout, HTML
from sklearn.base import clone
from .utils._logging import OutputWidgetHandler
from .utils._visualization import (
_calculate_graph_data,
_get_column_color_buttons,
_get_colors_for_vals,
PLOT_OPTIONS_LAYOUT_DEFAULTS
)
[docs]def plot_static_mapper_graph(
pipeline, data, layout="kamada_kawai", layout_dim=2,
color_variable=None, node_color_statistic=None,
color_by_columns_dropdown=False, clone_pipeline=True, n_sig_figs=3,
node_scale=12, plotly_params=None
):
"""Plot Mapper graphs without interactivity on pipeline parameters.
The output graph is a rendition of the :class:`igraph.Graph` object
computed by calling the :meth:`fit_transform` method of the
:class:`~gtda.mapper.pipeline.MapperPipeline` instance `pipeline` on the
input `data`. The graph's nodes correspond to subsets of elements (rows) in
`data`; these subsets are clusters in larger portions of `data` called
"pullback (cover) sets", which are computed by means of the `pipeline`'s
"filter function" and "cover" and correspond to the differently-colored
portions in `this diagram <../../../../_images/mapper_pipeline.svg>`_.
Two clusters from different pullback cover sets can overlap; if they do, an
edge between the corresponding nodes in the graph may be drawn.
Nodes are colored according to `color_variable` and `node_color_statistic`
and are sized according to the number of elements they represent. The
hovertext on each node displays, in this order:
- a globally unique ID for the node, which can be used to retrieve
node information from the :class:`igraph.Graph` object, see
:class:`~gtda.mapper.nerve.Nerve`;
- the label of the pullback (cover) set which the node's elements
form a cluster in;
- a label identifying the node as a cluster within that pullback set;
- the number of elements of `data` associated with the node;
- the value of the summary statistic which determines the node's color.
Parameters
----------
pipeline : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Mapper pipeline to act onto data.
data : array-like of shape (n_samples, n_features)
Data used to generate the Mapper graph. Can be a pandas dataframe.
layout : None, str or callable, optional, default: ``"kamada-kawai"``
Layout algorithm for the graph. Can be any accepted value for the
``layout`` parameter in the :meth:`layout` method of
:class:`igraph.Graph` [1]_.
layout_dim : int, default: ``2``
The number of dimensions for the layout. Can be 2 or 3.
color_variable : object or None, optional, default: ``None``
Specifies a feature of interest to be used, together with
`node_color_statistic`, to determine node colors.
1. If a numpy array or pandas dataframe, it must have the same
length as `data`.
2. ``None`` is equivalent to passing `data`.
3. If an object implementing :meth:`transform` or
:meth:`fit_transform`, it is applied to `data` to generate the
feature of interest.
4. If an index or string, or list of indices/strings, it is
equivalent to selecting a column or subset of columns from
`data`.
node_color_statistic : None, callable, or ndarray of shape (n_nodes,) or \
(n_nodes, 1), optional, default: ``None``
If a callable, node colors will be computed as summary statistics from
the feature array ``Y`` determined by `color_variable` – specifically,
the color of a node representing the entries of `data` whose row
indices are in ``I`` will be ``node_color_statistic(Y[I])``. ``None``
is equivalent to passing :func:`numpy.mean`. If a numpy array, it must
have the same length as the number of nodes in the Mapper graph and its
values are used directly as node colors (`color_variable` is ignored).
color_by_columns_dropdown : bool, optional, default: ``False``
If ``True``, a dropdown widget is generated which allows the user to
color Mapper nodes according to any column in `data` (still using
`node_color_statistic`) in addition to `color_variable`.
clone_pipeline : bool, optional, default: ``True``
If ``True``, the input `pipeline` is cloned before computing the
Mapper graph to prevent unexpected side effects from in-place
parameter updates.
n_sig_figs : int or None, optional, default: ``3``
If not ``None``, number of significant figures to which to round node
summary statistics. If ``None``, no rounding is performed.
node_scale : int or float, optional, default: ``12``
Sets the scale factor used to determine the rendered size of the
nodes. Increase for larger nodes. Implements a formula in the
`Plotly documentation \
<https://plotly.com/python/bubble-charts/#scaling-the-size-of-bubble\
-charts>`_.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"node_trace"``, ``"edge_trace"`` and ``"layout"``, and the
corresponding values should be dictionaries containing keyword
arguments as would be fed to the :meth:`update_traces` and
:meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`.
Returns
-------
fig : :class:`plotly.graph_objects.Figure` object
Figure representing the Mapper graph with appropriate node colouring
and size.
Examples
--------
Setting a colorscale different from the default one:
>>> import numpy as np
>>> np.random.seed(1)
>>> from gtda.mapper import make_mapper_pipeline, plot_static_mapper_graph
>>> pipeline = make_mapper_pipeline()
>>> data = np.random.random((100, 3))
>>> plotly_params = {"node_trace": {"marker_colorscale": "Blues"}}
>>> fig = plot_static_mapper_graph(pipeline, data,
... plotly_params=plotly_params)
Inspect the composition of a node with "Node ID" displayed as 0 in the
hovertext:
>>> graph = pipeline.fit_transform(data)
>>> graph.vs[0]["node_elements"]
array([70])
See also
--------
plot_interactive_mapper_graph, gtda.mapper.make_mapper_pipeline
References
----------
.. [1] `igraph.Graph.layout
<https://igraph.org/python/doc/igraph.Graph-class.html#layout>`_
documentation.
"""
# Compute the graph and fetch the indices of points in each node
_pipeline = clone(pipeline) if clone_pipeline else pipeline
_node_color_statistic = node_color_statistic or np.mean
# Simple duck typing to determine whether data is likely a pandas dataframe
is_data_dataframe = hasattr(data, "columns")
edge_trace, node_trace, node_elements, node_colors_color_variable = \
_calculate_graph_data(
_pipeline, data, is_data_dataframe, layout, layout_dim,
color_variable, _node_color_statistic, n_sig_figs, node_scale
)
# Define layout options
layout_options = go.Layout(
**PLOT_OPTIONS_LAYOUT_DEFAULTS["common"],
**PLOT_OPTIONS_LAYOUT_DEFAULTS[layout_dim]
)
fig = go.FigureWidget(data=[edge_trace, node_trace], layout=layout_options)
_plotly_params = deepcopy(plotly_params)
# When laying out the graph in 3D, plotly does not automatically give
# the background hoverlabel the same color as the respective marker,
# so we do this by hand here.
# TODO: Extract logic so as to avoid repetitions in interactive version
colorscale_for_hoverlabel = None
if layout_dim == 3:
compute_hoverlabel_bgcolor = True
if _plotly_params:
if "node_trace" in _plotly_params:
if "hoverlabel_bgcolor" in _plotly_params["node_trace"]:
fig.update_traces(
hoverlabel_bgcolor=_plotly_params["node_trace"].pop(
"hoverlabel_bgcolor"
),
selector={"name": "node_trace"}
)
compute_hoverlabel_bgcolor = False
if "marker_colorscale" in _plotly_params["node_trace"]:
fig.update_traces(
marker_colorscale=_plotly_params["node_trace"].pop(
"marker_colorscale"
),
selector={"name": "node_trace"}
)
if compute_hoverlabel_bgcolor:
colorscale_for_hoverlabel = fig.data[1].marker.colorscale
node_colors_color_variable = np.asarray(node_colors_color_variable)
min_col = np.min(node_colors_color_variable)
max_col = np.max(node_colors_color_variable)
try:
hoverlabel_bgcolor = _get_colors_for_vals(
node_colors_color_variable, min_col, max_col,
colorscale_for_hoverlabel
)
except Exception as e:
if e.args[0] == "This colorscale is not supported.":
warn("Data-dependent background hoverlabel colors cannot "
"be generated with this choice of colorscale. Please "
"use a standard hex- or RGB-formatted colorscale.",
RuntimeWarning)
else:
warn("Something went wrong in generating data-dependent "
"background hoverlabel colors. All background "
"hoverlabel colors will be set to white.",
RuntimeWarning)
hoverlabel_bgcolor = "white"
colorscale_for_hoverlabel = None
fig.update_traces(
hoverlabel_bgcolor=hoverlabel_bgcolor,
selector={"name": "node_trace"}
)
# Compute node colors according to data columns only if necessary
if color_by_columns_dropdown:
hovertext_color_variable = node_trace.hovertext
column_color_buttons = _get_column_color_buttons(
data, is_data_dataframe, node_elements, node_colors_color_variable,
_node_color_statistic, hovertext_color_variable,
colorscale_for_hoverlabel, n_sig_figs
)
# Avoid recomputing hoverlabel bgcolor for top button
column_color_buttons[0]["args"][0]["hoverlabel.bgcolor"] = \
[None, fig.data[1].hoverlabel.bgcolor]
else:
column_color_buttons = None
button_height = 1.1
fig.update_layout(
updatemenus=[
go.layout.Updatemenu(buttons=column_color_buttons,
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.11,
xanchor="left",
y=button_height,
yanchor="top")
]
)
if color_by_columns_dropdown:
fig.add_annotation(
go.layout.Annotation(text="Color by:",
x=0,
xref="paper",
y=button_height - 0.045,
yref="paper",
align="left",
showarrow=False)
)
# Update traces and layout according to user input
if _plotly_params:
for key in ["node_trace", "edge_trace"]:
fig.update_traces(
_plotly_params.pop(key, None),
selector={"name": key}
)
fig.update_layout(_plotly_params.pop("layout", None))
return fig
[docs]def plot_interactive_mapper_graph(
pipeline, data, layout="kamada_kawai", layout_dim=2,
color_variable=None, node_color_statistic=None, clone_pipeline=True,
color_by_columns_dropdown=False, n_sig_figs=3, node_scale=12,
plotly_params=None
):
"""Plot Mapper graphs with interactivity on pipeline parameters.
Extends :func:`~gtda.mapper.visualization.plot_static_mapper_graph` by
providing functionality to interactively update parameters from the cover,
clustering and graph construction steps defined in `pipeline`.
Parameters
----------
pipeline : :class:`~gtda.mapper.pipeline.MapperPipeline` object
Mapper pipeline to act on to data.
data : array-like of shape (n_samples, n_features)
Data used to generate the Mapper graph. Can be a pandas dataframe.
layout : None, str or callable, optional, default: ``"kamada-kawai"``
Layout algorithm for the graph. Can be any accepted value for the
``layout`` parameter in the :meth:`layout` method of
:class:`igraph.Graph` [1]_.
layout_dim : int, default: ``2``
The number of dimensions for the layout. Can be 2 or 3.
color_variable : object or None, optional, default: ``None``
Specifies a feature of interest to be used, together with
`node_color_statistic`, to determine node colors.
1. If a numpy array or pandas dataframe, it must have the same
length as `data`.
2. ``None`` is equivalent to passing `data`.
3. If an object implementing :meth:`transform` or
:meth:`fit_transform`, it is applied to `data` to generate the
feature of interest.
4. If an index or string, or list of indices/strings, it is
equivalent to selecting a column or subset of columns from
`data`.
node_color_statistic : callable or None, optional, default: ``None``
If a callable, node colors will be computed as summary statistics from
the feature array ``Y`` determined by `color_variable` – specifically,
the color of a node representing the entries of `data` whose row
indices are in ``I`` will be ``node_color_statistic(Y[I])``. ``None``
is equivalent to passing :func:`numpy.mean`.
color_by_columns_dropdown : bool, optional, default: ``False``
If ``True``, a dropdown widget is generated which allows the user to
color Mapper nodes according to any column in `data` (still using
`node_color_statistic`) in addition to `color_variable`.
clone_pipeline : bool, optional, default: ``True``
If ``True``, the input `pipeline` is cloned before computing the
Mapper graph to prevent unexpected side effects from in-place
parameter updates.
n_sig_figs : int or None, optional, default: ``3``
If not ``None``, number of significant figures to which to round node
summary statistics. If ``None``, no rounding is performed.
node_scale : int or float, optional, default: ``12``
Sets the scale factor used to determine the rendered size of the
nodes. Increase for larger nodes. Implements a formula in the
`Plotly documentation \
<plotly.com/python/bubble-charts/#scaling-the-size-of-bubble-charts>`_.
plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"node_trace"``, ``"edge_trace"`` and ``"layout"``, and the
corresponding values should be dictionaries containing keyword
arguments as would be fed to the :meth:`update_traces` and
:meth:`update_layout` methods of :class:`plotly.graph_objects.Figure`.
Returns
-------
box : :class:`ipywidgets.VBox` object
A box containing the following widgets: parameters of the clustering
algorithm, parameters for the covering scheme, a Mapper graph arising
from those parameters, a validation box, and logs.
See also
--------
plot_static_mapper_graph, gtda.mapper.pipeline.make_mapper_pipeline
References
----------
.. [1] `igraph.Graph.layout
<https://igraph.org/python/doc/igraph.Graph-class.html#layout>`_
documentation.
"""
# Clone pipeline to avoid side effects from in-place parameter changes
_pipeline = clone(pipeline) if clone_pipeline else pipeline
_node_color_statistic = node_color_statistic or np.mean
def get_widgets_per_param(params):
for key, value in params.items():
style = {'description_width': 'initial'}
description = key.split("__")[1] if "__" in key else key
if isinstance(value, float):
yield (key, widgets.FloatText(
value=value,
step=0.05,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, bool):
yield (key, widgets.ToggleButton(
value=value,
description=description,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, int):
yield (key, widgets.IntText(
value=value,
step=1,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
elif isinstance(value, str):
yield (key, widgets.Text(
value=value,
description=description,
continuous_update=False,
disabled=False,
layout=Layout(width="90%"),
style=style
))
def on_parameter_change(change):
handler.clear_logs()
try:
for param, value in cover_params.items():
if isinstance(value, (int, float, str)):
_pipeline.set_params(
**{param: cover_params_widgets[param].value}
)
for param, value in cluster_params.items():
if isinstance(value, (int, float, str)):
_pipeline.set_params(
**{param: cluster_params_widgets[param].value}
)
for param, value in nerve_params.items():
if isinstance(value, (int, bool)):
_pipeline.set_params(
**{param: nerve_params_widgets[param].value}
)
logger.info("Updating figure...")
with fig.batch_update():
(edge_trace, node_trace, node_elements,
node_colors_color_variable) = _calculate_graph_data(
_pipeline, data, is_data_dataframe, layout, layout_dim,
color_variable, _node_color_statistic, n_sig_figs,
node_scale
)
if colorscale_for_hoverlabel is not None:
node_colors_color_variable = \
np.asarray(node_colors_color_variable)
min_col = np.min(node_colors_color_variable)
max_col = np.max(node_colors_color_variable)
hoverlabel_bgcolor = _get_colors_for_vals(
node_colors_color_variable, min_col, max_col,
colorscale_for_hoverlabel
)
fig.update_traces(hoverlabel_bgcolor=hoverlabel_bgcolor,
selector={"name": "node_trace"})
fig.update_traces(
x=node_trace.x,
y=node_trace.y,
marker_color=node_trace.marker.color,
marker_size=node_trace.marker.size,
marker_sizeref=node_trace.marker.sizeref,
hovertext=node_trace.hovertext,
**({"z": node_trace.z} if layout_dim == 3 else dict()),
selector={"name": "node_trace"}
)
fig.update_traces(
x=edge_trace.x,
y=edge_trace.y,
**({"z": edge_trace.z} if layout_dim == 3 else dict()),
selector={"name": "edge_trace"}
)
# Update color by column buttons
if color_by_columns_dropdown:
hovertext_color_variable = node_trace.hovertext
column_color_buttons = _get_column_color_buttons(
data, is_data_dataframe, node_elements,
node_colors_color_variable, _node_color_statistic,
hovertext_color_variable, colorscale_for_hoverlabel,
n_sig_figs
)
# Avoid recomputing hoverlabel bgcolor for top button
if colorscale_for_hoverlabel is not None:
column_color_buttons[0]["args"][0][
"hoverlabel.bgcolor"] = [None, hoverlabel_bgcolor]
else:
column_color_buttons = None
button_height = 1.1
fig.update_layout(
updatemenus=[
go.layout.Updatemenu(
buttons=column_color_buttons,
direction="down",
pad={"r": 10, "t": 10},
showactive=True,
x=0.11,
xanchor="left",
y=button_height,
yanchor="top"
)
])
valid.value = True
except Exception:
exception_data = traceback.format_exc().splitlines()
logger.exception(exception_data[-1])
valid.value = False
def observe_widgets(params, widgets):
for param, value in params.items():
if isinstance(value, (int, float, str)):
widgets[param].observe(on_parameter_change, names="value")
# Define output widget to capture logs
out = widgets.Output()
@out.capture()
def click_box(change):
if logs_box.value:
out.clear_output()
handler.show_logs()
else:
out.clear_output()
# Initialise logging
logger = logging.getLogger(__name__)
handler = OutputWidgetHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - [%(levelname)s] %(message)s"))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# Initialise cover, cluster and nerve dictionaries of parameters and
# widgets
mapper_params_items = _pipeline.get_mapper_params().items()
cover_params = {key: value for key, value in mapper_params_items
if key.startswith("cover__")}
cover_params_widgets = dict(get_widgets_per_param(cover_params))
cluster_params = {key: value for key, value in mapper_params_items
if key.startswith("clusterer__")}
cluster_params_widgets = dict(get_widgets_per_param(cluster_params))
nerve_params = {key: value for key, value in mapper_params_items
if key in ["min_intersection", "contract_nodes"]}
nerve_params_widgets = dict(get_widgets_per_param(nerve_params))
# Initialise widgets for validating input parameters of pipeline
valid = widgets.Valid(
value=True,
description="Valid parameters",
style={"description_width": "100px"},
)
# Initialise widget for showing the logs
logs_box = widgets.Checkbox(
description="Show logs: ",
value=False,
indent=False
)
# Initialise figure with initial pipeline and config
fig = plot_static_mapper_graph(
_pipeline, data, layout=layout, layout_dim=layout_dim,
color_variable=color_variable,
node_color_statistic=_node_color_statistic,
color_by_columns_dropdown=color_by_columns_dropdown,
clone_pipeline=False, n_sig_figs=n_sig_figs, node_scale=node_scale,
plotly_params=plotly_params
)
# Store variables for later updates
is_data_dataframe = hasattr(data, "columns")
colorscale_for_hoverlabel = None
if layout_dim == 3:
# In plot_static_mapper_graph, hoverlabel bgcolors are set to white if
# something goes wrong in computing them according to the colorscale.
is_bgcolor_not_white = fig.data[1].hoverlabel.bgcolor != "white"
user_hoverlabel_bgcolor = False
if plotly_params:
if "node_trace" in plotly_params:
if "hoverlabel_bgcolor" in plotly_params["node_trace"]:
user_hoverlabel_bgcolor = True
if is_bgcolor_not_white and not user_hoverlabel_bgcolor:
colorscale_for_hoverlabel = fig.data[1].marker.colorscale
observe_widgets(cover_params, cover_params_widgets)
observe_widgets(cluster_params, cluster_params_widgets)
observe_widgets(nerve_params, nerve_params_widgets)
logs_box.observe(click_box, names="value")
# Define containers for input widgets
cover_title = HTML(value="<b>Cover parameters</b>")
container_cover = widgets.VBox(
children=[cover_title] + list(cover_params_widgets.values())
)
container_cover.layout.align_items = 'center'
cluster_title = HTML(value="<b>Clusterer parameters</b>")
container_cluster = widgets.VBox(
children=[cluster_title] + list(cluster_params_widgets.values()),
)
container_cluster.layout.align_items = 'center'
nerve_title = HTML(value="<b>Nerve parameters</b>")
container_nerve = widgets.VBox(
children=[nerve_title] + list(nerve_params_widgets.values()),
)
container_nerve.layout.align_items = 'center'
container_parameters = widgets.HBox(
children=[container_cover, container_cluster, container_nerve]
)
box = widgets.VBox([container_parameters, fig, valid, logs_box, out])
return box