-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Gh 36562 typeerror comparison not supported between float and str #37096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
1320ff1
a1b9385
1f76d21
7076841
225675d
2677166
6f476bc
3688238
aba429c
8ae9279
dd5a38d
7b7d6f8
4226662
8cbfa01
6d71000
92e1e33
3ada9ce
95670f1
d9dcd22
66c30c7
db66528
16ae4f4
9d03dc3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
| """ | ||
| from __future__ import annotations | ||
|
|
||
| import functools | ||
| import operator | ||
| from textwrap import dedent | ||
| from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast | ||
|
|
@@ -2055,13 +2056,52 @@ def sort_mixed(values): | |
| strs = np.sort(values[str_pos]) | ||
| return np.concatenate([nums, np.asarray(strs, dtype=object)]) | ||
|
|
||
| def sort_tuples(values): | ||
|
||
| # sorts tuples with mixed values. can handle nan vs string comparisons. | ||
| def cmp_func(index_x, index_y): | ||
|
||
| x = values[index_x] | ||
| y = values[index_y] | ||
| if x == y: | ||
| return 0 | ||
| len_x = len(x) | ||
|
||
| len_y = len(y) | ||
| for i in range(max(len_x, len_y)): | ||
|
||
| # check if the tuples have different lengths (shorter tuples | ||
| # first) | ||
| if i >= len_x: | ||
| return -1 | ||
| if i >= len_y: | ||
| return +1 | ||
| x_i_na = isna(x[i]) | ||
|
||
| y_i_na = isna(y[i]) | ||
| # values are the same -> resolve tie with next element | ||
| if (x_i_na and y_i_na) or (x[i] == y[i]): | ||
|
||
| continue | ||
| # check for nan values (sort nan to the end which is consistent | ||
|
||
| # with numpy | ||
| if x_i_na and not y_i_na: | ||
| return +1 | ||
| if not x_i_na and y_i_na: | ||
| return -1 | ||
| # normal greater/less than comparison | ||
| if x[i] < y[i]: | ||
| return -1 | ||
| return +1 | ||
| return 0 | ||
|
|
||
| ixs = np.arange(len(values)) | ||
| ixs = sorted(ixs, key=functools.cmp_to_key(cmp_func)) | ||
| return values[ixs] | ||
|
|
||
| sorter = None | ||
| if ( | ||
| not is_extension_array_dtype(values) | ||
| and lib.infer_dtype(values, skipna=False) == "mixed-integer" | ||
| ): | ||
|
|
||
| ext_arr = is_extension_array_dtype(values) | ||
ssche marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if not ext_arr and lib.infer_dtype(values, skipna=False) == "mixed-integer": | ||
| # unorderable in py3 if mixed str/int | ||
ssche marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ordered = sort_mixed(values) | ||
| elif not ext_arr and values.size and isinstance(values[0], tuple): | ||
| # 1-D arrays with tuples of potentially mixed type (solves GH36562) | ||
|
||
| ordered = sort_tuples(values) | ||
| else: | ||
| try: | ||
| sorter = values.argsort() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,3 +91,28 @@ def test_multiindex_get_loc_list_raises(self): | |
| msg = "unhashable type" | ||
| with pytest.raises(TypeError, match=msg): | ||
| idx.get_loc([]) | ||
|
|
||
|
|
||
| def test_combine_first_with_nan_index(): | ||
ssche marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| mi1 = pd.MultiIndex.from_arrays( | ||
| [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"] | ||
| ) | ||
| df = pd.DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1) | ||
| mi2 = pd.MultiIndex.from_arrays( | ||
| [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"] | ||
| ) | ||
| s = pd.Series([1, 2, 3, 4, 5, 6], index=mi2) | ||
| df_combined = df.combine_first(pd.DataFrame({"col": s})) | ||
| mi_expected = pd.MultiIndex.from_arrays( | ||
| [ | ||
| ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan], | ||
| [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6], | ||
| ], | ||
| names=["a", "b"], | ||
| ) | ||
| assert (df_combined.index == mi_expected).all() | ||
ssche marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| exp_col = np.asarray( | ||
| [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan] | ||
| ) | ||
| act_col = df_combined["col"].values | ||
| assert np.allclose(act_col, exp_col, rtol=0, atol=0, equal_nan=True) | ||
|
||
Uh oh!
There was an error while loading. Please reload this page.