-
-
Notifications
You must be signed in to change notification settings - Fork 19.2k
Updating _resolve_numeric_only function of GroupBy #43154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
6b0f5e7
116534f
7b5ecb4
7faf1fc
d773e7a
5b4e799
3d2e78e
5836f91
1eb0e25
a0391e6
2a1835a
66ecb96
c80fa9e
95b6533
fa8a8ca
e3f6767
ed668e6
f30855a
65261b4
2477aa3
da24f29
4075353
463a7c9
dd562c5
f400174
56f28d7
4fd254b
a3b09e6
f7e2d59
270549a
17808bf
08b2429
e141123
45f54d6
0012423
a9a63e7
d46091e
a39e5ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ class providing the base-class of operations. | |
|
|
||
| from contextlib import contextmanager | ||
| import datetime | ||
| import numpy as np | ||
| from functools import ( | ||
| partial, | ||
| wraps, | ||
|
|
@@ -1119,7 +1120,24 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: | |
| # i.e. not explicitly passed by user | ||
| if self.obj.ndim == 2: | ||
| # i.e. DataFrameGroupBy | ||
| numeric_only = True | ||
| # Checking if the dataframe has non-numeric features | ||
|
||
|
|
||
| df_cols = self.obj.columns | ||
| num_cols = self.obj.select_dtypes( | ||
| include=[np.number, np.datetime64, np.timedelta64]).columns | ||
| # Removing the key columns | ||
|
|
||
| if set(num_cols).intersection(set(self.keys)) : | ||
| df_cols = set(df_cols) - set(self.keys) | ||
| num_cols = set(num_cols) - set(self.keys) | ||
| else: | ||
| df_cols = set(df_cols) - set(self.keys) | ||
| num_cols = set(num_cols) | ||
|
|
||
| if len(obj_cols - num_cols) > 0: | ||
| numeric_only = False | ||
| else: | ||
| numeric_only = True | ||
| else: | ||
| numeric_only = False | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,7 +113,19 @@ def test_groupby_aggregation_mixed_dtype(): | |
| g = df.groupby(["by1", "by2"]) | ||
| result = g[["v1", "v2"]].mean() | ||
| tm.assert_frame_equal(result, expected) | ||
| expected2 = DataFrame( | ||
|
||
| { | ||
| "v1": [15, 7, 9, 3, 3, 5], | ||
| "v2": [165, 77, 99, 33, 33, 55], | ||
| "by2": [293, 194, 0, 'damp', 'dry', 'wetred'] | ||
| }, | ||
| index = Index([1,2,12,'big','blue','red'], | ||
| dtype='object', name='by1'), | ||
| ) | ||
|
|
||
| g = df.groupby(["by1"]) | ||
| result = g.sum() | ||
| tm.assert_frame_equal(result, expected2) | ||
|
|
||
| def test_groupby_aggregation_multi_level_column(): | ||
| # GH 29772 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is already imported below