Skip to content

Commit c97238c

Browse files
committed
Merge pull request #10179 from sebp/bug_concat_categorical
BUG: concat on axis=0 with categorical (GH10177)
2 parents c8d20a6 + 023fc37 commit c97238c

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,4 @@ Bug Fixes
103103
- Bug that caused segfault when resampling an empty Series (:issue:`10228`)
104104
- Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`)
105105

106-
106+
- Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`)

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4133,7 +4133,7 @@ def get_empty_dtype_and_na(join_units):
41334133
else:
41344134
return np.dtype(np.bool_), None
41354135
elif 'category' in upcast_classes:
4136-
return com.CategoricalDtype(), np.nan
4136+
return np.dtype(np.object_), np.nan
41374137
elif 'float' in upcast_classes:
41384138
return np.dtype(np.float64), np.nan
41394139
elif 'datetime' in upcast_classes:

pandas/tests/test_categorical.py

+18
Original file line numberDiff line numberDiff line change
@@ -2967,6 +2967,24 @@ def test_pickle_v0_15_2(self):
29672967
#
29682968
self.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
29692969

2970+
def test_concat_categorical(self):
2971+
# See GH 10177
2972+
df1 = pd.DataFrame(np.arange(18).reshape(6, 3), columns=["a", "b", "c"])
2973+
2974+
df2 = pd.DataFrame(np.arange(14).reshape(7, 2), columns=["a", "c"])
2975+
df2['h'] = pd.Series(pd.Categorical(["one", "one", "two", "one", "two", "two", "one"]))
2976+
2977+
df_concat = pd.concat((df1, df2), axis=0).reset_index(drop=True)
2978+
2979+
df_expected = pd.DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
2980+
'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
2981+
'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13]})
2982+
df_expected['h'] = pd.Series(pd.Categorical([None, None, None, None, None, None,
2983+
"one", "one", "two", "one", "two", "two", "one"]))
2984+
2985+
tm.assert_frame_equal(df_expected, df_concat)
2986+
2987+
29702988

29712989
if __name__ == '__main__':
29722990
import nose

0 commit comments

Comments
 (0)