Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ Reshaping
- Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`)
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
- Bug in :func:`concat` resulted in a ``ValueError`` when at least one of both inputs had a non unique index (:issue:`36263`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-unique

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

-

Sparse
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,19 @@ def get_result(self):
# 1-ax to convert BlockManager axis to DataFrame axis
obj_labels = obj.axes[1 - ax]
if not new_labels.equals(obj_labels):
indexers[ax] = obj_labels.reindex(new_labels)[1]
# We have to remove the duplicates from obj_labels
# in new labels to make them unique, otherwise we would
# duplicate or duplicates again
obj_labels_duplicates = obj_labels[
obj_labels.duplicated()
].unique()
new_labels_cleared = new_labels[
~(
new_labels.duplicated()
& new_labels.isin(obj_labels_duplicates)
)
]
indexers[ax] = obj_labels.reindex(new_labels_cleared)[1]

mgrs_indexers.append((obj._mgr, indexers))

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2927,3 +2927,15 @@ def test_concat_preserves_extension_int64_dtype():
result = pd.concat([df_a, df_b], ignore_index=True)
expected = pd.DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64")
tm.assert_frame_equal(result, expected)


def test_concat_duplicate_indexes():
# GH 36263 ValueError with non unique indexes
df1 = pd.DataFrame([1, 2, 3, 4], index=[0, 1, 1, 4], columns=["a"])
df2 = pd.DataFrame([6, 7, 8, 9], index=[0, 0, 1, 3], columns=["b"])
result = pd.concat([df1, df2], axis=1)
expected = pd.DataFrame(
{"a": [1, 1, 2, 3, np.nan, 4], "b": [6, 7, 8, 8, 9, np.nan]},
index=pd.Index([0, 0, 1, 1, 3, 4]),
)
tm.assert_frame_equal(result, expected)