ttsim-dev · MImmesberger · Mar 23, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025
diff --git a/src/_gettsim/interface.py b/src/_gettsim/interface.py
@@ -513,7 +513,7 @@ def _fail_if_data_tree_not_valid(data_tree: NestedDataDict) -> None:
     """
     assert_valid_gettsim_pytree(
         tree=data_tree,
-        leaf_checker=lambda leaf: isinstance(leaf, pd.Series | np.ndarray),
+        leaf_checker=lambda leaf: isinstance(leaf, pd.Series | np.ndarray | list),
         tree_name="data_tree",
     )
     _fail_if_pid_is_non_unique(data_tree)

diff --git a/src/_gettsim_tests/_policy_test_utils.py b/src/_gettsim_tests/_policy_test_utils.py
@@ -1,158 +1,111 @@
 from __future__ import annotations
 
 import datetime
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
-import pandas as pd
+import flatten_dict
 import yaml
 
-from _gettsim_tests import TEST_DATA_DIR
-
-_ValueDict = dict[str, list[Any]]
+from _gettsim.shared import merge_trees
 
 if TYPE_CHECKING:
     from pathlib import Path
 
+    from _gettsim.gettsim_typing import NestedDataDict, NestedInputStructureDict
 
-class PolicyTestSet:
-    def __init__(self, policy_name: str, test_data: list[PolicyTestData]):
-        self.policy_name = policy_name
-        self.test_data = test_data
-
-    @property
-    def parametrize_args(self) -> list[tuple[PolicyTestData, str]]:
-        return [(test, column) for test in self.test_data for column in test.output_df]
-
-    def merged_input_df(self) -> pd.DataFrame:
-        return pd.concat([test.input_df for test in self.test_data], ignore_index=True)
-
-    def merged_output_df(self) -> pd.DataFrame:
-        return pd.concat([test.output_df for test in self.test_data], ignore_index=True)
 
-    def filter_test_data(
-        self, *, test_name: str | None = None, date: datetime.date | str | None = None
-    ) -> PolicyTestSet:
-        """
-        Filter the test data in this PolicyTestSet.
+class PolicyTest:
+    """A class for a single policy test."""
 
-        Note that you must pass all arguments of this function by name (and not by
-        position).
-
-        Parameters
-        ----------
-        test_name : str | None
-            If provided, only instances of `PolicyTestData` with this name are included
-            in the result. If None, no filtering is done on test name.
-        date : datetime.date | str | None
-            If provided, only instances of `PolicyTestData` with this date are
-            included in the result. If None, no filtering is done on date.
+    def __init__(
+        self,
+        input_tree: NestedDataDict,
+        expected_output_tree: NestedDataDict,
+        test_file: Path,
+        date: datetime.date,
+    ) -> None:
+        self.input_tree = input_tree
+        self.expected_output_tree = expected_output_tree
+        self.test_file = test_file
+        self.date = date
 
-        Returns
-        -------
-        PolicyTestSet
-            A new PolicyTestSet with the filtered test data.
+    @property
+    def target_structure(self) -> NestedInputStructureDict:
+        flat_target_structure = {
+            k: None for k in flatten_dict.flatten(self.expected_output_tree)
+        }
+        return flatten_dict.unflatten(flat_target_structure)
 
-        Examples
-        --------
-        >>> data = load_policy_test_data("soli_st")
-        >>> filtered_by_name = data.filter_test_data(test_name="hh_id_2")
+    @property
+    def test_name(self) -> str:
+        return self.test_file.stem
 
-        >>> filtered_by_date = data.filter_test_data(date="1991")
-        """
 
-        if isinstance(date, str):
-            date = _parse_date(date)
+def load_policy_test_data(policy_name: str) -> list[PolicyTest]:
+    from _gettsim_tests import TEST_DATA_DIR
 
-        filtered_test_data = [
-            test
-            for test in self.test_data
-            if (test_name is None or test.test_name == test_name)
-            and (date is None or test.date == date)
-        ]
+    root = TEST_DATA_DIR / policy_name
 
-        return PolicyTestSet(self.policy_name, filtered_test_data)
+    out = []
 
+    for path_of_test_file in root.glob("**/*.yaml"):
+        if _is_skipped(path_of_test_file):
+            continue
 
-class PolicyTestData:
-    def __init__(  # noqa: PLR0913
-        self,
-        policy_name: str,
-        test_file: Path,
-        test_name: str,
-        date: str,
-        inputs_provided: _ValueDict,
-        inputs_assumed: _ValueDict,
-        outputs: _ValueDict,
-    ):
-        self.policy_name = policy_name
-        self.test_file = test_file
-        self.test_name = test_name
-        self.date = _parse_date(date)
-        self._inputs_provided = inputs_provided
-        self._inputs_assumed = inputs_assumed
-        self._outputs = outputs
+        with path_of_test_file.open("r", encoding="utf-8") as file:
+            raw_test_data: NestedDataDict = yaml.safe_load(file)
 
-    @property
-    def input_df(self) -> pd.DataFrame:
-        return pd.DataFrame.from_dict(
-            {**self._inputs_provided, **self._inputs_assumed}
-        ).reset_index(drop=True)
+        out.extend(
+            _get_policy_tests_from_raw_test_data(
+                raw_test_data=raw_test_data,
+                path_of_test_file=path_of_test_file,
+            )
+        )
 
-    @property
-    def output_df(self) -> pd.DataFrame:
-        return pd.DataFrame.from_dict(self._outputs).reset_index(drop=True)
+    return out
 
-    def __repr__(self) -> str:
-        return (
-            f"PolicyTestData({self.policy_name}, {self.test_file.name}, "
-            f"{self.test_name})"
-        )
 
-    def __str__(self) -> str:
-        relative_path = self.test_file.relative_to(TEST_DATA_DIR)
-        backslash = "\\"
-        return f"{str(relative_path).replace(backslash, '/')}"
+def _is_skipped(test_file: Path) -> bool:
+    return "skip" in test_file.stem or "skip" in test_file.parent.name
 
 
-def load_policy_test_data(policy_name: str) -> PolicyTestSet:
-    from _gettsim_tests import TEST_DATA_DIR
+def _get_policy_tests_from_raw_test_data(
+    raw_test_data: NestedDataDict, path_of_test_file: Path
+) -> list[PolicyTest]:
+    """Get a list of PolicyTest objects from raw test data.
 
-    root = TEST_DATA_DIR / policy_name
+    Args:
+        raw_test_data: The raw test data.
 
+    Returns:
+        A list of PolicyTest objects.
+    """
     out = []
 
-    for test_file in root.glob("**/*.yaml"):
-        if _is_skipped(test_file):
-            continue
-
-        with test_file.open("r", encoding="utf-8") as file:
-            test_data: dict[str, dict] = yaml.safe_load(file)
+    inputs: NestedDataDict = raw_test_data.get("inputs", {})
+    input_tree: NestedDataDict = merge_trees(
+        inputs.get("provided", {}), inputs.get("assumed", {})
+    )
+    all_expected_outputs: NestedDataDict = raw_test_data.get("outputs", {})
 
-        date = test_file.parent.name
-        test_name = test_file.stem
+    date: datetime.date = _parse_date(path_of_test_file.parent.name)
 
-        inputs: dict[str, dict] = test_data["inputs"]
-        inputs_provided: _ValueDict = inputs.get("provided", {})
-        inputs_assumed: _ValueDict = inputs.get("assumed", {})
-        outputs: _ValueDict = test_data["outputs"]
+    flat_expected_outputs = flatten_dict.flatten(all_expected_outputs)
 
+    for target_name, test_data in flat_expected_outputs.items():
+        one_expected_output: NestedDataDict = flatten_dict.unflatten(
+            {target_name: test_data}
+        )
         out.append(
-            PolicyTestData(
-                policy_name=policy_name,
-                test_file=test_file,
-                test_name=test_name,
+            PolicyTest(
+                input_tree=input_tree,
+                expected_output_tree=one_expected_output,
+                test_file=path_of_test_file.stem,
                 date=date,
-                inputs_provided=inputs_provided,
-                inputs_assumed=inputs_assumed,
-                outputs=outputs,
             )
         )
 
-    return PolicyTestSet(policy_name, out)
-
-
-def _is_skipped(test_file: Path) -> bool:
-    return "skip" in test_file.stem or "skip" in test_file.parent.name
+    return out
 
 
 def _parse_date(date: str) -> datetime.date:

diff --git a/src/_gettsim_tests/test_aggregate_by_p_id.py b/src/_gettsim_tests/test_aggregate_by_p_id.py
@@ -1,36 +1,51 @@
+from typing import TYPE_CHECKING
+
+import flatten_dict
 import pytest
 from pandas.testing import assert_series_equal
 
 from _gettsim.interface import compute_taxes_and_transfers
 from _gettsim_tests._helpers import cached_set_up_policy_environment
-from _gettsim_tests._policy_test_utils import PolicyTestData, load_policy_test_data
+from _gettsim_tests._policy_test_utils import PolicyTest, load_policy_test_data
+
+if TYPE_CHECKING:
+    import datetime
+
+    from _gettsim.gettsim_typing import NestedDataDict, NestedInputStructureDict
 
 OVERRIDE_COLS = []
 
-data = load_policy_test_data("aggregate_by_p_id")
+test_data = load_policy_test_data("aggregate_by_p_id")
 
 
-@pytest.mark.xfail(reason="Needs renamings PR.")
 @pytest.mark.parametrize(
-    ("test_data", "column"),
-    data.parametrize_args,
-    ids=str,
+    "test",
+    test_data,
 )
 def test_aggregate_by_p_id(
-    test_data: PolicyTestData,
-    column: str,
+    test: PolicyTest,
 ):
-    df = test_data.input_df
-    environment = cached_set_up_policy_environment(date=test_data.date)
+    date: datetime.date = test.date
+    input_tree: NestedDataDict = test.input_tree
+    expected_output_tree: NestedDataDict = test.expected_output_tree
+    target_structure: NestedInputStructureDict = test.target_structure
+
+    environment = cached_set_up_policy_environment(date=date)
 
     result = compute_taxes_and_transfers(
-        data=df, environment=environment, targets=column
+        data_tree=input_tree, environment=environment, targets_tree=target_structure
     )
 
-    assert_series_equal(
-        result[column],
-        test_data.output_df[column],
-        check_dtype=False,
-        atol=1e-1,
-        rtol=0,
-    )
+    flat_result = flatten_dict.flatten(result)
+    flat_expected_output_tree = flatten_dict.flatten(expected_output_tree)
+
+    for result_series, expected_series in zip(
+        flat_result.values(), flat_expected_output_tree.values()
+    ):
+        assert_series_equal(
+            result_series,
+            expected_series,
+            check_dtype=False,
+            atol=1e-1,
+            rtol=0,
+        )
diff --git a/src/_gettsim_tests/test_interface.py b/src/_gettsim_tests/test_interface.py
@@ -718,16 +718,21 @@ def test_provide_endogenous_groupings(data, functions_overridden):
         (
             {
                 "demographics": {"hh_id": pd.Series([1, "1", 2])},
-                "einkommen": {"bruttolohn_m": pd.Series(["2000", 3000, 4000])},
+                "einkommensteuer": {
+                    "einkünfte": {
+                        "aus_nichtselbstständiger_arbeit": {
+                            "bruttolohn_m": pd.Series(["2000", 3000, 4000])
+                        }
+                    }
+                },
             },
             {},
             "The data types of the following columns are invalid:\n"
             "\n - demographics__hh_id: Conversion from input type object to int failed."
-            " Object\ntype is not supported as input."
-            "\n\n- "
-            "einkommensteuer__einkünfte__aus_nichtselbstständiger_arbeit__bruttolohn_m:"
-            " Conversion from input type object to float failed."
-            "\nObject type is not supported as input.",
+            " Object\ntype is not supported as input.\n"
+            "\n- einkommensteuer__einkünfte__aus_nichtselbstständiger_arbeit__bruttolohn_m:"  # noqa: E501
+            "\nConversion from input type object to float failed. "
+            "Object type is not supported\nas input.",
         ),
     ],
 )