Skip to content

DataFrame reduction methods broken #174

@coroa

Description

@coroa

With the versions:

In [20]: pint_pandas.show_versions()
{'numpy': '1.24.2',
 'pandas': '1.5.3',
 'pint': '0.20.1',
 'pint_pandas': '0.4.dev32+gc58a7fc'}

I am unable to run any dataframe-wise aggregation functions along axis=0:

In [19]: pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]").sum()
/Users/coroa/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/internals/blocks.py:369: UnitStrippedWarning: The unit of the quantity is stripped when downcasting to ndarray.
  res_values = np.array([[result]])
---------------------------------------------------------------------------
DimensionalityError                       Traceback (most recent call last)
File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pint/facets/plain/quantity.py:702, in PlainQuantity.__int__(self)
    701     return int(self._convert_magnitude_not_inplace(UnitsContainer()))
--> 702 raise DimensionalityError(self._units, "dimensionless")

DimensionalityError: Cannot convert from 'meter' to 'dimensionless'

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In[19], line 1
----> 1 pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]").sum()

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/generic.py:11797, in NDFrame._add_numeric_operations.<locals>.sum(self, axis, skipna, level, numeric_only, min_count, **kwargs)
  11777 @doc(
  11778     _num_doc,
  11779     desc="Return the sum of the values over the requested axis.\n\n"
   (...)
  11795     **kwargs,
  11796 ):
> 11797     return NDFrame.sum(
  11798         self, axis, skipna, level, numeric_only, min_count, **kwargs
  11799     )

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/generic.py:11501, in NDFrame.sum(self, axis, skipna, level, numeric_only, min_count, **kwargs)
  11492 def sum(
  11493     self,
  11494     axis: Axis | None = None,
   (...)
  11499     **kwargs,
  11500 ):
> 11501     return self._min_count_stat_function(
  11502         "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs
  11503     )

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/generic.py:11483, in NDFrame._min_count_stat_function(self, name, func, axis, skipna, level, numeric_only, min_count, **kwargs)
  11467     warnings.warn(
  11468         "Using the level keyword in DataFrame and Series aggregations is "
  11469         "deprecated and will be removed in a future version. Use groupby "
   (...)
  11472         stacklevel=find_stack_level(),
  11473     )
  11474     return self._agg_by_level(
  11475         name,
  11476         axis=axis,
   (...)
  11480         numeric_only=numeric_only,
  11481     )
> 11483 return self._reduce(
  11484     func,
  11485     name=name,
  11486     axis=axis,
  11487     skipna=skipna,
  11488     numeric_only=numeric_only,
  11489     min_count=min_count,
  11490 )

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/frame.py:10856, in DataFrame._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
  10852 ignore_failures = numeric_only is None
  10854 # After possibly _get_data and transposing, we are now in the
  10855 #  simple case where we can use BlockManager.reduce
> 10856 res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
  10857 out = df._constructor(res).iloc[0]
  10858 if out_dtype is not None:

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/internals/managers.py:1569, in BlockManager.reduce(self, func, ignore_failures)
   1567 res_blocks: list[Block] = []
   1568 for blk in self.blocks:
-> 1569     nbs = blk.reduce(func, ignore_failures)
   1570     res_blocks.extend(nbs)
   1572 index = Index([None])  # placeholder

File ~/.local/conda/envs/pandas-indexing/lib/python3.11/site-packages/pandas/core/internals/blocks.py:369, in Block.reduce(self, func, ignore_failures)
    365     raise
    367 if self.values.ndim == 1:
    368     # TODO(EA2D): special case not needed with 2D EAs
--> 369     res_values = np.array([[result]])
    370 else:
    371     res_values = result.reshape(-1, 1)

ValueError: setting an array element with a sequence.

Similarly df.min(), df.max() and so on are failing.

This might be connected with:
hgrecco/pint#1128 (comment)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions