Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ Reshaping
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
-

Expand Down
33 changes: 29 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,12 +721,37 @@ def transpose(self, *args, **kwargs):
new_axes = self._construct_axes_dict_from(
self, [self._get_axis(x) for x in axes_names]
)
new_values = self.values.transpose(axes_numbers)
if kwargs.pop("copy", None) or (len(args) and args[-1]):
new_values = new_values.copy()

if (
self._is_homogeneous_type
and len(self._data.blocks)
and is_extension_array_dtype(self._data.blocks[0].dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can avoid self._data references by making this len(self.dtypes) and is_extension_array_dtype(self.dtypes.iloc[0])

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto on 731 with self.dtypes

):
kwargs.pop("copy", None) # by definition, we're copying
dtype = self._data.blocks[0].dtype
arr_type = dtype.construct_array_type()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this logic to pandas/core/reshape/reshape.py this has a lot of similiarity to _unstack_extension_series

# Slow, but unavoidable with 1D EAs.
new_values = []
for i in range(len(self)):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm rethinking this approach. This results in n_rows * n_columns __getitem__s. My intent was to avoid going through a 2D object-dtype ndarray. But we're essentially doing that with lists. So I think it'll be better to just do .values.T and then rebuild the EAs from the object-dtype array.

new_values.append(
arr_type._from_sequence(
[block.values[i] for block in self._data.blocks], dtype=dtype
)
)
columns = new_axes.pop("columns")
new_values = dict(zip(columns, new_values))
result = self._constructor(new_values, **new_axes)

else:
new_values = self.values.transpose(axes_numbers)
if kwargs.pop("copy", None) or (len(args) and args[-1]):
new_values = new_values.copy()

result = self._constructor(new_values, **new_axes)

nv.validate_transpose(tuple(), kwargs)
return self._constructor(new_values, **new_axes).__finalize__(self)
return result.__finalize__(self)

def swapaxes(self, axis1, axis2, copy=True):
"""
Expand Down
19 changes: 0 additions & 19 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,25 +235,6 @@ def box_df_fail(request):
return request.param


@pytest.fixture(
params=[
(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail),
(tm.to_array, False),
],
ids=id_func,
)
def box_transpose_fail(request):
"""
Fixture similar to `box` but testing both transpose cases for DataFrame,
with the tranpose=True case xfailed.
"""
# GH#23620
return request.param


@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func)
def box_with_array(request):
"""
Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,10 +755,10 @@ def test_pi_sub_isub_offset(self):
rng -= pd.offsets.MonthEnd(5)
tm.assert_index_equal(rng, expected)

def test_pi_add_offset_n_gt1(self, box_transpose_fail):
@pytest.mark.parametrize("transpose", [True, False])
def test_pi_add_offset_n_gt1(self, box, transpose):
# GH#23215
# add offset to PeriodIndex with freq.n > 1
box, transpose = box_transpose_fail

per = pd.Period("2016-01", freq="2M")
pi = pd.PeriodIndex([per])
Expand Down Expand Up @@ -984,10 +984,9 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq):
with pytest.raises(IncompatibleFrequency, match=msg):
rng -= other

def test_parr_add_sub_td64_nat(self, box_transpose_fail):
@pytest.mark.parametrize("transpose", [True, False])
def test_parr_add_sub_td64_nat(self, box, transpose):
# GH#23320 special handling for timedelta64("NaT")
box, transpose = box_transpose_fail

pi = pd.period_range("1994-04-01", periods=9, freq="19D")
other = np.timedelta64("NaT")
expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")
Expand All @@ -1011,10 +1010,9 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail):
TimedeltaArray._from_sequence(["NaT"] * 9),
],
)
def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other):
def test_parr_add_sub_tdt64_nat_array(self, box, other):
# FIXME: DataFrame fails because when when operating column-wise
# timedelta64 entries become NaT and are treated like datetimes
box = box_df_fail

pi = pd.period_range("1994-04-01", periods=9, freq="19D")
expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,17 @@ def test_ravel(self, data):
# Check that we have a view, not a copy
result[0] = result[1]
assert data[0] == data[1]

def test_transpose(self, data):
df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"])
result = df.T
expected = pd.DataFrame(
{
"a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype),
"b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype),
"c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype),
"d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype),
},
index=["A", "B"],
)
self.assert_frame_equal(result, expected)
4 changes: 4 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ def test_merge_on_extension_array_duplicates(self, data):
# Fails creating expected
super().test_merge_on_extension_array_duplicates(data)

@skip_nested
def test_transpose(self, data):
super().test_transpose(data)


class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
@skip_nested
Expand Down