|
10 | 10 |
|
11 | 11 | import pandas as pd |
12 | 12 | from pandas import ( |
| 13 | + Categorical, |
13 | 14 | DataFrame, |
14 | 15 | Grouper, |
15 | 16 | Index, |
|
18 | 19 | Timestamp, |
19 | 20 | date_range, |
20 | 21 | read_csv, |
| 22 | + to_datetime, |
21 | 23 | ) |
22 | 24 | import pandas._testing as tm |
23 | 25 | from pandas.core.base import SpecificationError |
@@ -1716,15 +1718,48 @@ def test_pivot_table_values_key_error(): |
1716 | 1718 | ) |
1717 | 1719 |
|
1718 | 1720 |
|
1719 | | -def test_empty_dataframe_groupby(): |
1720 | | - # GH8093 |
1721 | | - df = DataFrame(columns=["A", "B", "C"]) |
1722 | | - |
1723 | | - result = df.groupby("A").sum() |
1724 | | - expected = DataFrame(columns=["B", "C"], dtype=np.float64) |
1725 | | - expected.index.name = "A" |
1726 | | - |
1727 | | - tm.assert_frame_equal(result, expected) |
| 1721 | +@pytest.mark.parametrize("columns", ["C", ["C"]]) |
| 1722 | +@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) |
| 1723 | +@pytest.mark.parametrize( |
| 1724 | + "values", |
| 1725 | + [ |
| 1726 | + [True], |
| 1727 | + [0], |
| 1728 | + [0.0], |
| 1729 | + ["a"], |
| 1730 | + [Categorical([0])], |
| 1731 | + [to_datetime(0)], |
| 1732 | + [date_range(0, 1, 1, tz="US/Eastern")], |
| 1733 | + [pd.array([0], dtype="Int64")], |
| 1734 | + ], |
| 1735 | +) |
| 1736 | +@pytest.mark.parametrize("method", ["attr", "agg", "apply"]) |
| 1737 | +@pytest.mark.parametrize( |
| 1738 | + "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] |
| 1739 | +) |
| 1740 | +def test_empty_groupby(columns, keys, values, method, op): |
| 1741 | + # GH8093 & GH26411 |
| 1742 | + |
| 1743 | + override_dtype = None |
| 1744 | + if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply": |
| 1745 | + # sum/product of bools is an integer |
| 1746 | + override_dtype = "int64" |
| 1747 | + |
| 1748 | + df = DataFrame([3 * values], columns=list("ABC")) |
| 1749 | + df = df.iloc[:0] |
| 1750 | + |
| 1751 | + gb = df.groupby(keys)[columns] |
| 1752 | + if method == "attr": |
| 1753 | + result = getattr(gb, op)() |
| 1754 | + else: |
| 1755 | + result = getattr(gb, method)(op) |
| 1756 | + |
| 1757 | + expected = df.set_index(keys)[columns] |
| 1758 | + if override_dtype is not None: |
| 1759 | + expected = expected.astype(override_dtype) |
| 1760 | + if len(keys) == 1: |
| 1761 | + expected.index.name = keys[0] |
| 1762 | + tm.assert_equal(result, expected) |
1728 | 1763 |
|
1729 | 1764 |
|
1730 | 1765 | def test_tuple_as_grouping(): |
|
0 commit comments