2424 to_datetime ,
2525)
2626import pandas ._testing as tm
27+ from pandas .core .arrays import (
28+ BooleanArray ,
29+ FloatingArray ,
30+ IntegerArray ,
31+ )
2732from pandas .core .base import SpecificationError
2833import pandas .core .common as com
2934
@@ -1822,17 +1827,23 @@ def test_pivot_table_values_key_error():
18221827)
18231828@pytest .mark .filterwarnings ("ignore:Dropping invalid columns:FutureWarning" )
18241829@pytest .mark .filterwarnings ("ignore:.*Select only valid:FutureWarning" )
1825- def test_empty_groupby (columns , keys , values , method , op , request ):
1830+ def test_empty_groupby (columns , keys , values , method , op , request , using_array_manager ):
18261831 # GH8093 & GH26411
18271832 override_dtype = None
18281833
18291834 if (
18301835 isinstance (values , Categorical )
18311836 and not isinstance (columns , list )
1832- and op in ["sum" , "prod" ]
1837+ and op in ["sum" , "prod" , "skew" , "mad" ]
18331838 ):
18341839 # handled below GH#41291
1835- pass
1840+
1841+ if using_array_manager and op == "mad" :
1842+ right_msg = "Cannot interpret 'CategoricalDtype.* as a data type"
1843+ msg = "Regex pattern \" 'Categorical' does not implement.*" + right_msg
1844+ mark = pytest .mark .xfail (raises = AssertionError , match = msg )
1845+ request .node .add_marker (mark )
1846+
18361847 elif (
18371848 isinstance (values , Categorical )
18381849 and len (keys ) == 1
@@ -1851,11 +1862,7 @@ def test_empty_groupby(columns, keys, values, method, op, request):
18511862 raises = TypeError , match = "'Categorical' does not implement"
18521863 )
18531864 request .node .add_marker (mark )
1854- elif (
1855- isinstance (values , Categorical )
1856- and len (keys ) == 1
1857- and op in ["mad" , "min" , "max" , "sum" , "prod" , "skew" ]
1858- ):
1865+ elif isinstance (values , Categorical ) and len (keys ) == 1 and op in ["sum" , "prod" ]:
18591866 mark = pytest .mark .xfail (
18601867 raises = AssertionError , match = "(DataFrame|Series) are different"
18611868 )
@@ -1869,7 +1876,30 @@ def test_empty_groupby(columns, keys, values, method, op, request):
18691876 raises = AssertionError , match = "(DataFrame|Series) are different"
18701877 )
18711878 request .node .add_marker (mark )
1872- elif isinstance (values , pd .core .arrays .BooleanArray ) and op in ["sum" , "prod" ]:
1879+ elif (
1880+ isinstance (values , (IntegerArray , FloatingArray ))
1881+ and op == "mad"
1882+ and isinstance (columns , list )
1883+ ):
1884+ mark = pytest .mark .xfail (
1885+ raises = TypeError , match = "can only perform ops with numeric values"
1886+ )
1887+ request .node .add_marker (mark )
1888+
1889+ elif (
1890+ op == "mad"
1891+ and not isinstance (columns , list )
1892+ and isinstance (values , pd .DatetimeIndex )
1893+ and values .tz is not None
1894+ and using_array_manager
1895+ ):
1896+ mark = pytest .mark .xfail (
1897+ raises = TypeError ,
1898+ match = r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type" ,
1899+ )
1900+ request .node .add_marker (mark )
1901+
1902+ elif isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
18731903 # We expect to get Int64 back for these
18741904 override_dtype = "Int64"
18751905
@@ -1895,19 +1925,29 @@ def get_result():
18951925
18961926 if columns == "C" :
18971927 # i.e. SeriesGroupBy
1898- if op in ["prod" , "sum" ]:
1928+ if op in ["prod" , "sum" , "skew" ]:
18991929 # ops that require more than just ordered-ness
19001930 if df .dtypes [0 ].kind == "M" :
19011931 # GH#41291
19021932 # datetime64 -> prod and sum are invalid
1903- msg = "datetime64 type does not support"
1933+ if op == "skew" :
1934+ msg = "'DatetimeArray' does not implement reduction 'skew'"
1935+ else :
1936+ msg = "datetime64 type does not support"
19041937 with pytest .raises (TypeError , match = msg ):
19051938 get_result ()
19061939
19071940 return
1908- elif isinstance (values , Categorical ):
1941+ if op in ["prod" , "sum" , "skew" , "mad" ]:
1942+ if isinstance (values , Categorical ):
19091943 # GH#41291
1910- msg = "category type does not support"
1944+ if op == "mad" :
1945+ # mad calls mean, which Categorical doesn't implement
1946+ msg = "'Categorical' does not implement reduction 'mean'"
1947+ elif op == "skew" :
1948+ msg = f"'Categorical' does not implement reduction '{ op } '"
1949+ else :
1950+ msg = "category type does not support"
19111951 with pytest .raises (TypeError , match = msg ):
19121952 get_result ()
19131953
@@ -1954,6 +1994,34 @@ def get_result():
19541994 tm .assert_equal (result , expected )
19551995 return
19561996
1997+ if (
1998+ op in ["mad" , "min" , "max" , "skew" ]
1999+ and isinstance (values , Categorical )
2000+ and len (keys ) == 1
2001+ ):
2002+ # Categorical doesn't implement, so with numeric_only=True
2003+ # these are dropped and we get an empty DataFrame back
2004+ result = get_result ()
2005+ expected = df .set_index (keys )[[]]
2006+
2007+ # with numeric_only=True, these are dropped, and we get
2008+ # an empty DataFrame back
2009+ if len (keys ) != 1 :
2010+ # Categorical is special without 'observed=True'
2011+ lev = Categorical ([0 ], dtype = values .dtype )
2012+ mi = MultiIndex .from_product ([lev , lev ], names = keys )
2013+ expected = DataFrame ([], columns = [], index = mi )
2014+ else :
2015+ # all columns are dropped, but we end up with one row
2016+ # Categorical is special without 'observed=True'
2017+ lev = Categorical ([0 ], dtype = values .dtype )
2018+ ci = Index (lev , name = keys [0 ])
2019+ expected = DataFrame ([], columns = [], index = ci )
2020+ # expected = df.set_index(keys)[columns]
2021+
2022+ tm .assert_equal (result , expected )
2023+ return
2024+
19572025 result = get_result ()
19582026 expected = df .set_index (keys )[columns ]
19592027 if override_dtype is not None :
0 commit comments