diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 39aaf8e2954..e01bdf93b00 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,10 @@ New Features - Multi-index levels are now accessible through their own, regular coordinates instead of virtual coordinates (:pull:`5692`). By `Benoît Bovy `_. +- Add a ``display_values_threshold`` option to control the total number of array + elements which trigger summarization rather than full repr in (numpy) array + detailed views of the html repr (:pull:`6400`). + By `Benoît Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -60,6 +64,8 @@ Bug fixes - Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units' attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma `_. +- Fixed the poor html repr performance on large multi-indexes (:pull:`6400`). + By `Benoît Bovy `_. - Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`) By `Justus Magin `_. diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 81617ae38f9..e372e3bdd40 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -520,7 +520,11 @@ def short_numpy_repr(array): # default to lower precision so a full (abbreviated) line can fit on # one line with the default display_width - options = {"precision": 6, "linewidth": OPTIONS["display_width"], "threshold": 200} + options = { + "precision": 6, + "linewidth": OPTIONS["display_width"], + "threshold": OPTIONS["display_values_threshold"], + } if array.ndim < 3: edgeitems = 3 elif array.ndim == 3: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c8851788c29..27bd4954bc4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -5,6 +5,7 @@ from contextlib import suppress from dataclasses import dataclass, field from datetime import timedelta +from html import escape from typing import ( TYPE_CHECKING, Any, @@ -25,6 +26,7 @@ from . import duck_array_ops, nputils, utils from .npcompat import DTypeLike +from .options import OPTIONS from .pycompat import dask_version, integer_types, is_duck_dask_array, sparse_array_type from .types import T_Xarray from .utils import either_dict_or_kwargs, get_valid_numpy_dtype @@ -1507,23 +1509,31 @@ def __repr__(self) -> str: ) return f"{type(self).__name__}{props}" - def _repr_inline_(self, max_width) -> str: - # special implementation to speed-up the repr for big multi-indexes + def _get_array_subset(self) -> np.ndarray: + # used to speed-up the repr for big multi-indexes + threshold = max(100, OPTIONS["display_values_threshold"] + 2) + if self.size > threshold: + pos = threshold // 2 + indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) + subset = self[OuterIndexer((indices,))] + else: + subset = self + + return np.asarray(subset) + + def _repr_inline_(self, max_width: int) -> str: + from .formatting import format_array_flat + if self.level is None: return "MultiIndex" else: - from .formatting import format_array_flat + return format_array_flat(self._get_array_subset(), max_width) - if self.size > 100 and max_width < self.size: - n_values = max_width - indices = np.concatenate( - [np.arange(0, n_values), np.arange(-n_values, 0)] - ) - subset = self[OuterIndexer((indices,))] - else: - subset = self + def _repr_html_(self) -> str: + from .formatting import short_numpy_repr - return format_array_flat(np.asarray(subset), max_width) + array_repr = short_numpy_repr(self._get_array_subset()) + return f"
{escape(array_repr)}
" def copy(self, deep: bool = True) -> "PandasMultiIndexingAdapter": # see PandasIndexingAdapter.copy diff --git a/xarray/core/options.py b/xarray/core/options.py index 0c45e126fe6..399afe90b66 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -15,6 +15,7 @@ class T_Options(TypedDict): cmap_divergent: Union[str, "Colormap"] cmap_sequential: Union[str, "Colormap"] display_max_rows: int + display_values_threshold: int display_style: Literal["text", "html"] display_width: int display_expand_attrs: Literal["default", True, False] @@ -33,6 +34,7 @@ class T_Options(TypedDict): "cmap_divergent": "RdBu_r", "cmap_sequential": "viridis", "display_max_rows": 12, + "display_values_threshold": 200, "display_style": "html", "display_width": 80, "display_expand_attrs": "default", @@ -57,6 +59,7 @@ def _positive_integer(value): _VALIDATORS = { "arithmetic_join": _JOIN_OPTIONS.__contains__, "display_max_rows": _positive_integer, + "display_values_threshold": _positive_integer, "display_style": _DISPLAY_OPTIONS.__contains__, "display_width": _positive_integer, "display_expand_attrs": lambda choice: choice in [True, False, "default"], @@ -154,6 +157,9 @@ class set_options: * ``default`` : to expand unless over a pre-defined limit display_max_rows : int, default: 12 Maximum display rows. + display_values_threshold : int, default: 200 + Total number of array elements which trigger summarization rather + than full repr for variable data views (numpy arrays). display_style : {"text", "html"}, default: "html" Display style to use in jupyter for xarray objects. display_width : int, default: 80 diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 105cec7e850..efdb8a57288 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -479,6 +479,12 @@ def test_short_numpy_repr() -> None: num_lines = formatting.short_numpy_repr(array).count("\n") + 1 assert num_lines < 30 + # threshold option (default: 200) + array = np.arange(100) + assert "..." not in formatting.short_numpy_repr(array) + with xr.set_options(display_values_threshold=10): + assert "..." in formatting.short_numpy_repr(array) + def test_large_array_repr_length() -> None: