Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed-up multi-index html repr + add display_values_threshold option #6400

Merged
merged 6 commits into from
Mar 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ New Features
- Multi-index levels are now accessible through their own, regular coordinates
instead of virtual coordinates (:pull:`5692`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Add a ``display_values_threshold`` option to control the total number of array
elements which trigger summarization rather than full repr in (numpy) array
detailed views of the html repr (:pull:`6400`).
By `Benoît Bovy <https://github.com/benbovy>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -60,6 +64,8 @@ Bug fixes
- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`).
By `Oleh Khoma <https://github.com/okhoma>`_.
- Fixed the poor html repr performance on large multi-indexes (:pull:`6400`).
By `Benoît Bovy <https://github.com/benbovy>`_.
- Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`)
By `Justus Magin <https://github.com/keewis>`_.

Expand Down
6 changes: 5 additions & 1 deletion xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,11 @@ def short_numpy_repr(array):

# default to lower precision so a full (abbreviated) line can fit on
# one line with the default display_width
options = {"precision": 6, "linewidth": OPTIONS["display_width"], "threshold": 200}
options = {
"precision": 6,
"linewidth": OPTIONS["display_width"],
"threshold": OPTIONS["display_values_threshold"],
}
if array.ndim < 3:
edgeitems = 3
elif array.ndim == 3:
Expand Down
34 changes: 22 additions & 12 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from contextlib import suppress
from dataclasses import dataclass, field
from datetime import timedelta
from html import escape
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -25,6 +26,7 @@

from . import duck_array_ops, nputils, utils
from .npcompat import DTypeLike
from .options import OPTIONS
from .pycompat import dask_version, integer_types, is_duck_dask_array, sparse_array_type
from .types import T_Xarray
from .utils import either_dict_or_kwargs, get_valid_numpy_dtype
Expand Down Expand Up @@ -1507,23 +1509,31 @@ def __repr__(self) -> str:
)
return f"{type(self).__name__}{props}"

def _repr_inline_(self, max_width) -> str:
# special implementation to speed-up the repr for big multi-indexes
def _get_array_subset(self) -> np.ndarray:
# used to speed-up the repr for big multi-indexes
threshold = max(100, OPTIONS["display_values_threshold"] + 2)
if self.size > threshold:
pos = threshold // 2
indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)])
subset = self[OuterIndexer((indices,))]
else:
subset = self

return np.asarray(subset)

def _repr_inline_(self, max_width: int) -> str:
from .formatting import format_array_flat

if self.level is None:
return "MultiIndex"
else:
from .formatting import format_array_flat
return format_array_flat(self._get_array_subset(), max_width)

if self.size > 100 and max_width < self.size:
n_values = max_width
indices = np.concatenate(
[np.arange(0, n_values), np.arange(-n_values, 0)]
)
subset = self[OuterIndexer((indices,))]
else:
subset = self
def _repr_html_(self) -> str:
from .formatting import short_numpy_repr

return format_array_flat(np.asarray(subset), max_width)
array_repr = short_numpy_repr(self._get_array_subset())
return f"<pre>{escape(array_repr)}</pre>"

def copy(self, deep: bool = True) -> "PandasMultiIndexingAdapter":
# see PandasIndexingAdapter.copy
Expand Down
6 changes: 6 additions & 0 deletions xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class T_Options(TypedDict):
cmap_divergent: Union[str, "Colormap"]
cmap_sequential: Union[str, "Colormap"]
display_max_rows: int
display_values_threshold: int
display_style: Literal["text", "html"]
display_width: int
display_expand_attrs: Literal["default", True, False]
Expand All @@ -33,6 +34,7 @@ class T_Options(TypedDict):
"cmap_divergent": "RdBu_r",
"cmap_sequential": "viridis",
"display_max_rows": 12,
"display_values_threshold": 200,
"display_style": "html",
"display_width": 80,
"display_expand_attrs": "default",
Expand All @@ -57,6 +59,7 @@ def _positive_integer(value):
_VALIDATORS = {
"arithmetic_join": _JOIN_OPTIONS.__contains__,
"display_max_rows": _positive_integer,
"display_values_threshold": _positive_integer,
"display_style": _DISPLAY_OPTIONS.__contains__,
"display_width": _positive_integer,
"display_expand_attrs": lambda choice: choice in [True, False, "default"],
Expand Down Expand Up @@ -154,6 +157,9 @@ class set_options:
* ``default`` : to expand unless over a pre-defined limit
display_max_rows : int, default: 12
Maximum display rows.
display_values_threshold : int, default: 200
Total number of array elements which trigger summarization rather
than full repr for variable data views (numpy arrays).
display_style : {"text", "html"}, default: "html"
Display style to use in jupyter for xarray objects.
display_width : int, default: 80
Expand Down
6 changes: 6 additions & 0 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,12 @@ def test_short_numpy_repr() -> None:
num_lines = formatting.short_numpy_repr(array).count("\n") + 1
assert num_lines < 30

# threshold option (default: 200)
array = np.arange(100)
assert "..." not in formatting.short_numpy_repr(array)
with xr.set_options(display_values_threshold=10):
assert "..." in formatting.short_numpy_repr(array)


def test_large_array_repr_length() -> None:

Expand Down