@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
10631063 pd .testing .assert_frame_equal (res , expected )
10641064
10651065
1066+ @pytest .mark .parametrize (
1067+ "grp_col_dict, exp_data" ,
1068+ [
1069+ ({"nr" : "min" , "cat_ord" : "min" }, {"nr" : [1 , 5 ], "cat_ord" : ["a" , "c" ]}),
1070+ ({"cat_ord" : "min" }, {"cat_ord" : ["a" , "c" ]}),
1071+ ({"nr" : "min" }, {"nr" : [1 , 5 ]}),
1072+ ],
1073+ )
1074+ def test_groupby_single_agg_cat_cols (grp_col_dict , exp_data ):
1075+ # test single aggregations on ordered categorical cols GHGH27800
1076+
1077+ # create the result dataframe
1078+ input_df = pd .DataFrame (
1079+ {
1080+ "nr" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
1081+ "cat_ord" : list ("aabbccdd" ),
1082+ "cat" : list ("aaaabbbb" ),
1083+ }
1084+ )
1085+
1086+ input_df = input_df .astype ({"cat" : "category" , "cat_ord" : "category" })
1087+ input_df ["cat_ord" ] = input_df ["cat_ord" ].cat .as_ordered ()
1088+ result_df = input_df .groupby ("cat" ).agg (grp_col_dict )
1089+
1090+ # create expected dataframe
1091+ cat_index = pd .CategoricalIndex (
1092+ ["a" , "b" ], categories = ["a" , "b" ], ordered = False , name = "cat" , dtype = "category"
1093+ )
1094+
1095+ expected_df = pd .DataFrame (data = exp_data , index = cat_index )
1096+
1097+ tm .assert_frame_equal (result_df , expected_df )
1098+
1099+
1100+ @pytest .mark .parametrize (
1101+ "grp_col_dict, exp_data" ,
1102+ [
1103+ ({"nr" : ["min" , "max" ], "cat_ord" : "min" }, [(1 , 4 , "a" ), (5 , 8 , "c" )]),
1104+ ({"nr" : "min" , "cat_ord" : ["min" , "max" ]}, [(1 , "a" , "b" ), (5 , "c" , "d" )]),
1105+ ({"cat_ord" : ["min" , "max" ]}, [("a" , "b" ), ("c" , "d" )]),
1106+ ],
1107+ )
1108+ def test_groupby_combined_aggs_cat_cols (grp_col_dict , exp_data ):
1109+ # test combined aggregations on ordered categorical cols GH27800
1110+
1111+ # create the result dataframe
1112+ input_df = pd .DataFrame (
1113+ {
1114+ "nr" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
1115+ "cat_ord" : list ("aabbccdd" ),
1116+ "cat" : list ("aaaabbbb" ),
1117+ }
1118+ )
1119+
1120+ input_df = input_df .astype ({"cat" : "category" , "cat_ord" : "category" })
1121+ input_df ["cat_ord" ] = input_df ["cat_ord" ].cat .as_ordered ()
1122+ result_df = input_df .groupby ("cat" ).agg (grp_col_dict )
1123+
1124+ # create expected dataframe
1125+ cat_index = pd .CategoricalIndex (
1126+ ["a" , "b" ], categories = ["a" , "b" ], ordered = False , name = "cat" , dtype = "category"
1127+ )
1128+
1129+ # unpack the grp_col_dict to create the multi-index tuple
1130+ # this tuple will be used to create the expected dataframe index
1131+ multi_index_list = []
1132+ for k , v in grp_col_dict .items ():
1133+ if isinstance (v , list ):
1134+ for value in v :
1135+ multi_index_list .append ([k , value ])
1136+ else :
1137+ multi_index_list .append ([k , v ])
1138+ multi_index = pd .MultiIndex .from_tuples (tuple (multi_index_list ))
1139+
1140+ expected_df = pd .DataFrame (data = exp_data , columns = multi_index , index = cat_index )
1141+
1142+ tm .assert_frame_equal (result_df , expected_df )
1143+
1144+
10661145def test_nonagg_agg ():
10671146 # GH 35490 - Single/Multiple agg of non-agg function give same results
10681147 # TODO: agg should raise for functions that don't aggregate
0 commit comments