Skip to content

Commit a78c1e0

Browse files
authored
dataset __repr__ updates (#5580)
* Delegate `max_rows` from dataset `__repr__` As discussed in #5545 the default setting of `display_max_rows` is sometimes less useful to inspect datasets for completeness, and changing the option is not backwards compatible. In addition, a concise and short output dataset seems to be preferred by most people. The compromise is to keep the dataset's `__repr__` short and tunable via `xr.set_options(display_max_rows=...)`, and at the same time to enable the output of all items by explicitly requesting `ds.coords`, `ds.data_vars`, and `ds.attrs`. These explicit `__repr__`s also restore backwards compatibility in these cases. Slightly changes the internal implementation of `_mapping_repr()`: Setting (leaving) `max_rows` to `None` means "no limits". * tests: Update dataset `__repr__` tests [1/2] Updates the dataset `__repr__` test to assure that the dataset output honours the `display_max_rows` setting, not the `data_vars` output. Discussed in #5545 * tests: Extend dataset `__repr__` tests [2/2] Extends the dataset `__repr__` test to ensure that the output of `ds.coords`, `ds.data_vars`, and `ds.attrs` is of full length as desired. Includes more dimensions and coordinates to cover more cases. Discussed in #5545 * doc: Add what's new entry for `__repr__` changes Sorted as a "breaking change" for 0.18.3 for now. * Revert "doc: Add what's new entry for `__repr__` changes" This reverts commit 3dd645b. * doc: Add what's new entry for `__repr__` changes Sorted as a "breaking change", for 0.19.1 for now. * doc: Remove `attrs` from `__repr__` changes Address comment from @keewis: `.attrs` is a standard python dict, so there's no custom repr. * tests: Remove `ds.attrs` formatting test According to @keewis: I don't think we need to test this because `attrs_repr` will only ever be called by `dataset_repr` / `array_repr`: on its own, the standard python `dict`'s `repr` will be used. * tests: Fix no. of coordinates in formatting_repr The number of coordinates changed to be the same as the number of variables, which only incidentally was fixed to 40. Updates the to-be-tested format string to use the same number of variables instead of the hard-coded one, which might be subject to change.
1 parent 28bdcf0 commit a78c1e0

File tree

3 files changed

+33
-15
lines changed

3 files changed

+33
-15
lines changed

doc/whats-new.rst

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ New Features
3333
Breaking changes
3434
~~~~~~~~~~~~~~~~
3535

36+
- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``coords`` and ``data_vars``
37+
ignore ``xarray.set_option(display_max_rows=...)`` and show the full output
38+
when called directly as, e.g., ``ds.data_vars`` or ``print(ds.data_vars)``
39+
(:issue:`5545`, :pull:`5580`).
40+
By `Stefan Bender <https://github.com/st-bender>`_.
3641

3742
Deprecations
3843
~~~~~~~~~~~~

xarray/core/formatting.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -377,14 +377,12 @@ def _mapping_repr(
377377
):
378378
if col_width is None:
379379
col_width = _calculate_col_width(mapping)
380-
if max_rows is None:
381-
max_rows = OPTIONS["display_max_rows"]
382380
summary = [f"{title}:"]
383381
if mapping:
384382
len_mapping = len(mapping)
385383
if not _get_boolean_with_default(expand_option_name, default=True):
386384
summary = [f"{summary[0]} ({len_mapping})"]
387-
elif len_mapping > max_rows:
385+
elif max_rows is not None and len_mapping > max_rows:
388386
summary = [f"{summary[0]} ({max_rows}/{len_mapping})"]
389387
first_rows = max_rows // 2 + max_rows % 2
390388
keys = list(mapping.keys())
@@ -418,7 +416,7 @@ def _mapping_repr(
418416
)
419417

420418

421-
def coords_repr(coords, col_width=None):
419+
def coords_repr(coords, col_width=None, max_rows=None):
422420
if col_width is None:
423421
col_width = _calculate_col_width(_get_col_items(coords))
424422
return _mapping_repr(
@@ -427,6 +425,7 @@ def coords_repr(coords, col_width=None):
427425
summarizer=summarize_coord,
428426
expand_option_name="display_expand_coords",
429427
col_width=col_width,
428+
max_rows=max_rows,
430429
)
431430

432431

@@ -544,21 +543,22 @@ def dataset_repr(ds):
544543
summary = ["<xarray.{}>".format(type(ds).__name__)]
545544

546545
col_width = _calculate_col_width(_get_col_items(ds.variables))
546+
max_rows = OPTIONS["display_max_rows"]
547547

548548
dims_start = pretty_print("Dimensions:", col_width)
549549
summary.append("{}({})".format(dims_start, dim_summary(ds)))
550550

551551
if ds.coords:
552-
summary.append(coords_repr(ds.coords, col_width=col_width))
552+
summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows))
553553

554554
unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords)
555555
if unindexed_dims_str:
556556
summary.append(unindexed_dims_str)
557557

558-
summary.append(data_vars_repr(ds.data_vars, col_width=col_width))
558+
summary.append(data_vars_repr(ds.data_vars, col_width=col_width, max_rows=max_rows))
559559

560560
if ds.attrs:
561-
summary.append(attrs_repr(ds.attrs))
561+
summary.append(attrs_repr(ds.attrs, max_rows=max_rows))
562562

563563
return "\n".join(summary)
564564

xarray/tests/test_formatting.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -509,41 +509,54 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr):
509509
long_name = "long_name"
510510
a = np.core.defchararray.add(long_name, np.arange(0, n_vars).astype(str))
511511
b = np.core.defchararray.add("attr_", np.arange(0, n_attr).astype(str))
512+
c = np.core.defchararray.add("coord", np.arange(0, n_vars).astype(str))
512513
attrs = {k: 2 for k in b}
513-
coords = dict(time=np.array([0, 1]))
514+
coords = {_c: np.array([0, 1]) for _c in c}
514515
data_vars = dict()
515-
for v in a:
516+
for (v, _c) in zip(a, coords.items()):
516517
data_vars[v] = xr.DataArray(
517518
name=v,
518519
data=np.array([3, 4]),
519-
dims=["time"],
520-
coords=coords,
520+
dims=[_c[0]],
521+
coords=dict([_c]),
521522
)
522523
ds = xr.Dataset(data_vars)
523524
ds.attrs = attrs
524525

525526
with xr.set_options(display_max_rows=display_max_rows):
526527

527528
# Parse the data_vars print and show only data_vars rows:
528-
summary = formatting.data_vars_repr(ds.data_vars).split("\n")
529+
summary = formatting.dataset_repr(ds).split("\n")
529530
summary = [v for v in summary if long_name in v]
530-
531531
# The length should be less than or equal to display_max_rows:
532532
len_summary = len(summary)
533533
data_vars_print_size = min(display_max_rows, len_summary)
534534
assert len_summary == data_vars_print_size
535535

536+
summary = formatting.data_vars_repr(ds.data_vars).split("\n")
537+
summary = [v for v in summary if long_name in v]
538+
# The length should be equal to the number of data variables
539+
len_summary = len(summary)
540+
assert len_summary == n_vars
541+
542+
summary = formatting.coords_repr(ds.coords).split("\n")
543+
summary = [v for v in summary if "coord" in v]
544+
# The length should be equal to the number of data variables
545+
len_summary = len(summary)
546+
assert len_summary == n_vars
547+
536548
with xr.set_options(
537549
display_expand_coords=False,
538550
display_expand_data_vars=False,
539551
display_expand_attrs=False,
540552
):
541553
actual = formatting.dataset_repr(ds)
554+
coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()])
542555
expected = dedent(
543556
f"""\
544557
<xarray.Dataset>
545-
Dimensions: (time: 2)
546-
Coordinates: (1)
558+
Dimensions: ({coord_s})
559+
Coordinates: ({n_vars})
547560
Data variables: ({n_vars})
548561
Attributes: ({n_attr})"""
549562
)

0 commit comments

Comments
 (0)