-
-
Notifications
You must be signed in to change notification settings - Fork 19.4k
Shorter MultiIndex representation #21145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,36 @@ and bug fixes. We recommend that all users upgrade to this version. | |
| New features | ||
| ~~~~~~~~~~~~ | ||
|
|
||
| .. _whatsnew_0231.enhancements.new_multi_index_repr_: | ||
|
|
||
| MultiIndex now has limits on many levels/labels are shown when printed | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
|
||
| Outputting a :class:`MultiIndex` used to print all level/label of the | ||
| multiindex. This could be a problem for large indices as the output could be | ||
| slow to print and make the console output difficult to navigate. | ||
|
|
||
| Outputting of ``MultiIndex`` instances now has limits to the number of levels | ||
| and labels shown ((:issue:`21145`): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extra paren here
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be the issue number |
||
|
|
||
| .. ipython:: python | ||
|
|
||
| index1=range(1000) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can make this section a bit shorter overall. |
||
| pd.MultiIndex.from_arrays([index1, index1]) | ||
|
|
||
| Previously all 1000 index rows would have been shown. | ||
|
|
||
| For smaller number of values, all values will still be shown: | ||
|
|
||
| .. ipython:: python | ||
|
|
||
| index1=range(30) | ||
| pd.MultiIndex.from_arrays([index1, index1]) | ||
| index1=range(2) | ||
| pd.MultiIndex.from_arrays([index1, index1]) | ||
|
|
||
| You can change the cutoff point for when all values are shown in the outputs | ||
| by changing :attr:`options.display.max_seq_items` (default is 100). | ||
|
|
||
| .. _whatsnew_0231.deprecations: | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,7 @@ | |
| import numpy as np | ||
| from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp | ||
|
|
||
| from pandas.compat import range, zip, lrange, lzip, map | ||
| from pandas.compat import range, zip, lrange, lzip, map, u | ||
| from pandas.compat.numpy import function as nv | ||
| from pandas import compat | ||
|
|
||
|
|
@@ -609,11 +609,28 @@ def _format_attrs(self): | |
| """ | ||
| Return a list of tuples of the (attr,formatted_value) | ||
| """ | ||
| def to_string_helper(obj, attr_name): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of creating new functions, can you simply override
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This helper is inside Not what you meant, but it could be placed in top level
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok so move this to pandas.io.formats.printing and make as generic as possible. |
||
| """converts obj.attr_name to a string. | ||
| """ | ||
| indices = getattr(obj, attr_name) | ||
| if attr_name == 'labels': | ||
| # self.labels is a list of FrozenNDArray, Index._format_data | ||
| # expects a pd.Index | ||
| indices = [Index(i) for i in indices] | ||
|
|
||
| _name = u("{}({}=").format(obj.__class__.__name__, attr_name) | ||
| attr_string = [idx._format_data(name=_name) | ||
| for idx in indices] | ||
| attr_string = u("").join(attr_string) | ||
| if attr_string.endswith(u(", ")): # else [1, 2, ], want [1, 2] | ||
| attr_string = attr_string[:-2] | ||
|
|
||
| return u("[{}]").format(attr_string) | ||
|
|
||
| attrs = [ | ||
| ('levels', ibase.default_pprint(self._levels, | ||
| max_seq_items=False)), | ||
| ('labels', ibase.default_pprint(self._labels, | ||
| max_seq_items=False))] | ||
| ('levels', to_string_helper(self, attr_name='levels')), | ||
| ('labels', to_string_helper(self, attr_name='labels')), | ||
| ] | ||
| if com._any_not_none(*self.names): | ||
| attrs.append(('names', ibase.default_pprint(self.names))) | ||
| if self.sortorder is not None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3279,3 +3279,78 @@ def test_duplicate_multiindex_labels(self): | |
| with pytest.raises(ValueError): | ||
| ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], | ||
| inplace=True) | ||
|
|
||
| def test_repr(self): | ||
| # GH21145 | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set the context manager for width here (so need multiple tests for this), e.g. with a small value then a large value to see how it works on different settings. |
||
| # no items | ||
| idx1, idx2 = range(0), [] | ||
| idx = pd.MultiIndex.from_arrays([idx1, idx2]) | ||
| expected = """\ | ||
| MultiIndex(levels=[[], []], | ||
| labels=[[], []])""" | ||
| assert repr(idx) == expected | ||
|
|
||
| # two items | ||
| idx1, idx2 = [3, 4], [5, 6] | ||
| idx = MultiIndex.from_arrays([idx1, idx2]) | ||
| expected = """\ | ||
| MultiIndex(levels=[[3, 4], [5, 6]], | ||
| labels=[[0, 1], [0, 1]])""" | ||
| assert repr(idx) == expected | ||
|
|
||
| # 100 items | ||
| idx1, idx2 = range(100), range(100) | ||
| idx = pd.MultiIndex.from_arrays([idx1, idx2]) | ||
| expected = """\ | ||
| MultiIndex(levels=[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | ||
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | ||
| 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | ||
| 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, | ||
| 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, | ||
| 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], | ||
| [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | ||
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | ||
| 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | ||
| 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, | ||
| 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, | ||
| 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], | ||
| ], | ||
| labels=[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | ||
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | ||
| 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | ||
| 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, | ||
| 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, | ||
| 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], | ||
| [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | ||
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | ||
| 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | ||
| 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, | ||
| 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, | ||
| 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], | ||
| ])""" | ||
| assert repr(idx) == expected | ||
|
|
||
| # 1000 items | ||
| idx1, idx2 = range(1000), range(1000) | ||
| idx = pd.MultiIndex.from_arrays([idx1, idx2]) | ||
| expected = """\ | ||
| MultiIndex(levels=[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
| ... | ||
| 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], | ||
| [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
| ... | ||
| 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], | ||
| ], | ||
| labels=[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
| ... | ||
| 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], | ||
| [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | ||
| ... | ||
| 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], | ||
| ])""" | ||
| assert repr(idx) == expected | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to 0.24.0