From 2c08d6544cb103c4b4237ecff58f01e04d6c8530 Mon Sep 17 00:00:00 2001 From: Hans-Martin von Gaudecker Date: Mon, 14 Apr 2025 09:45:14 +0200 Subject: [PATCH 1/4] Remove custom type conversions. --- .../test_full_taxes_and_transfers.py | 39 ----- src/ttsim/compute_taxes_and_transfers.py | 119 +-------------- src/ttsim/typing.py | 140 +----------------- 3 files changed, 4 insertions(+), 294 deletions(-) diff --git a/src/_gettsim_tests/test_full_taxes_and_transfers.py b/src/_gettsim_tests/test_full_taxes_and_transfers.py index 0ef23d490..386a2bf01 100644 --- a/src/_gettsim_tests/test_full_taxes_and_transfers.py +++ b/src/_gettsim_tests/test_full_taxes_and_transfers.py @@ -1,4 +1,3 @@ -import dags.tree as dt import pytest from _gettsim.config import FOREIGN_KEYS, SUPPORTED_GROUPINGS @@ -8,8 +7,6 @@ load_policy_test_data, ) from ttsim import compute_taxes_and_transfers -from ttsim.function_types import PolicyInput -from ttsim.typing import check_series_has_expected_type test_data = load_policy_test_data("full_taxes_and_transfers") @@ -27,42 +24,6 @@ def test_full_taxes_transfers(test: PolicyTest): ) -@pytest.mark.parametrize("test", test_data, ids=lambda x: x.test_name) -def test_data_types(test: PolicyTest): - environment = cached_set_up_policy_environment(date=test.date) - - result = compute_taxes_and_transfers( - data_tree=test.input_tree, - environment=environment, - targets_tree=test.target_structure, - foreign_keys=FOREIGN_KEYS, - supported_groupings=SUPPORTED_GROUPINGS, - ) - - flat_types_input_variables = { - n: pi.data_type - for n, pi in dt.flatten_to_qual_names(environment.raw_objects_tree).items() - if isinstance(pi, PolicyInput) - } - flat_functions = dt.flatten_to_qual_names(environment.raw_objects_tree) - - for column_name, result_array in dt.flatten_to_qual_names(result).items(): - if column_name in flat_types_input_variables: - internal_type = flat_types_input_variables[column_name] - elif column_name in flat_functions: - internal_type = flat_functions[column_name].__annotations__["return"] - else: - # TODO (@hmgaudecker): Implement easy way to find out expected type of - # aggregated functions - # https://github.com/iza-institute-of-labor-economics/gettsim/issues/604 - if column_name.endswith(("_sn", "_hh", "_fg", "_bg", "_eg", "_ehe")): - internal_type = None - else: - raise ValueError(f"Column name {column_name} unknown.") - if internal_type: - assert check_series_has_expected_type(result_array, internal_type) - - @pytest.mark.skip( reason="Got rid of DEFAULT_TARGETS, there might not be a replacement." ) diff --git a/src/ttsim/compute_taxes_and_transfers.py b/src/ttsim/compute_taxes_and_transfers.py index e9eec9e6c..d06605818 100644 --- a/src/ttsim/compute_taxes_and_transfers.py +++ b/src/ttsim/compute_taxes_and_transfers.py @@ -3,7 +3,7 @@ import functools import inspect import warnings -from typing import TYPE_CHECKING, Any, get_args +from typing import TYPE_CHECKING, Any import dags import dags.tree as dt @@ -15,9 +15,7 @@ ) from ttsim.config import numpy_or_jax as np from ttsim.function_types import ( - DerivedAggregationFunction, GroupByFunction, - PolicyFunction, PolicyInput, TTSIMFunction, ) @@ -34,10 +32,6 @@ partition_by_reference_dict, ) from ttsim.time_conversion import TIME_UNITS -from ttsim.typing import ( - check_series_has_expected_type, - convert_series_to_internal_type, -) if TYPE_CHECKING: from ttsim.typing import ( @@ -128,10 +122,6 @@ def compute_taxes_and_transfers( ) _warn_if_functions_overridden_by_data(functions_overridden) - # data_with_correct_types = _convert_data_to_correct_types( - # data=data, - # functions_overridden=functions_overridden, - # ) functions_with_rounding_specs = ( _add_rounding_to_functions(functions=functions_to_be_used) @@ -224,113 +214,6 @@ def _get_top_level_namespace( return all_top_level_names -def _convert_data_to_correct_types( - data: QualNameDataDict, functions_overridden: QualNameTTSIMFunctionDict -) -> QualNameDataDict: - """Convert all data columns to the type that is expected by GETTSIM. - - Parameters - ---------- - data - Data provided by the user. - functions_overridden - Functions that are overridden by data. - - Returns - ------- - Data with correct types. - - """ - collected_errors = ["The data types of the following columns are invalid:\n"] - collected_conversions = [ - "The data types of the following input variables have been converted:" - ] - general_warning = ( - "Note that the automatic conversion of data types is unsafe and that" - " its correctness cannot be guaranteed." - " The best solution is to convert all columns to the expected data" - " types yourself." - ) - - data_with_correct_types = {} - - for name, series in data.items(): - internal_type = None - - # Look for column in TYPES_INPUT_VARIABLES - types_qualified_input_variables = dt.flatten_to_qual_names( - TYPES_INPUT_VARIABLES - ) - if name in types_qualified_input_variables: - internal_type = types_qualified_input_variables[name] - # Look for column in functions_tree_overridden - elif name in functions_overridden: - func = functions_overridden[name] - func_is_group_by_function = isinstance( - getattr(func, "__wrapped__", func), GroupByFunction - ) - func_is_policy_function = isinstance( - getattr(func, "__wrapped__", func), PolicyFunction - ) and not isinstance( - getattr(func, "__wrapped__", func), DerivedAggregationFunction - ) - skip_vectorization = ( - func.skip_vectorization if func_is_policy_function else True - ) - return_annotation_is_array = ( - func_is_group_by_function or func_is_policy_function - ) and skip_vectorization - if return_annotation_is_array: - # Assumes that things are annotated with numpy.ndarray([dtype]), might - # require a change if using proper numpy.typing. Not changing for now - # as we will likely switch to JAX completely. - internal_type = get_args(func.__annotations__["return"])[0] - elif "return" in func.__annotations__: - internal_type = func.__annotations__["return"] - else: - pass - else: - pass - - # Make conversion if necessary - if internal_type and not check_series_has_expected_type( - series=series, internal_type=internal_type - ): - try: - converted_leaf = convert_series_to_internal_type( - series=series, internal_type=internal_type - ) - data_with_correct_types[name] = converted_leaf - collected_conversions.append( - f" - {name} from {series.dtype} to {internal_type.__name__}" - ) - except ValueError as e: - collected_errors.append(f"\n - {name}: {e}") - else: - data_with_correct_types[name] = series - - # If any error occured raise Error - if len(collected_errors) > 1: - msg = """ - Note that conversion from floating point to integers or Booleans inherently - suffers from approximation error. It might well be that your data seemingly - obey the restrictions when scrolling through them, but in fact they do not - (for example, because 1e-15 is displayed as 0.0). \n The best solution is to - convert all columns to the expected data types yourself. - """ - collected_errors = "\n".join(collected_errors) - raise ValueError(format_errors_and_warnings(collected_errors + msg)) - # Otherwise raise warning which lists all successful conversions - elif len(collected_conversions) > 1: - collected_conversions = format_list_linewise(collected_conversions) - warnings.warn( - collected_conversions + "\n" + "\n" + general_warning, - stacklevel=2, - ) - - return data_with_correct_types - - def _create_input_data_for_concatenated_function( data: QualNameDataDict, functions: QualNameTTSIMFunctionDict, diff --git a/src/ttsim/typing.py b/src/ttsim/typing.py index 4dd6427d1..0793168ea 100644 --- a/src/ttsim/typing.py +++ b/src/ttsim/typing.py @@ -1,20 +1,10 @@ from typing import TYPE_CHECKING, NewType -import numpy -import pandas as pd -from pandas.api.types import ( - is_bool_dtype, - is_datetime64_any_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, -) - -from ttsim.config import numpy_or_jax as np - if TYPE_CHECKING: from collections.abc import Mapping + import pandas as pd + # Make these available for import from other modules. from dags.tree.typing import ( # noqa: F401 GenericCallable, @@ -24,6 +14,7 @@ ) from ttsim.aggregation import AggregateByGroupSpec, AggregateByPIDSpec + from ttsim.config import numpy_or_jax as np from ttsim.function_types import PolicyInput, TTSIMFunction, TTSIMObject NestedTTSIMObjectDict = Mapping[str, TTSIMObject | "NestedTTSIMObjectDict"] @@ -47,128 +38,3 @@ DashedISOString = NewType("DashedISOString", str) """A string representing a date in the format 'YYYY-MM-DD'.""" - - -def check_series_has_expected_type(series: pd.Series, internal_type: np.dtype) -> bool: - """Checks whether used series has already expected internal type. - - Parameters - ---------- - series : pandas.Series or pandas.DataFrame or dict of pandas.Series - Data provided by the user. - internal_type : TypeVar - One of the internal gettsim types. - - Returns - ------- - Bool - - """ - if ( - (internal_type == float) & (is_float_dtype(series)) - or (internal_type == int) & (is_integer_dtype(series)) - or (internal_type == bool) & (is_bool_dtype(series)) - or (internal_type == numpy.datetime64) & (is_datetime64_any_dtype(series)) - ): - out = True - else: - out = False - - return out - - -def convert_series_to_internal_type( - series: pd.Series, internal_type: np.dtype -) -> pd.Series: - """Check if data type of series fits to the internal type of gettsim and otherwise - convert data type of series to the internal type of gettsim. - - Parameters - ---------- - series : pd.Series - Some data series. - internal_type : TypeVar - One of the internal gettsim types. - - Returns - ------- - out : adjusted pd.Series - - """ - # Copy input series in out - out = series.copy() - - basic_error_msg = ( - f"Conversion from input type {out.dtype} to {internal_type.__name__} failed." - ) - if is_object_dtype(out): - raise ValueError(basic_error_msg + " Object type is not supported as input.") - else: - # Conversion to float - if internal_type == float: - # Conversion from boolean to float fails - if is_bool_dtype(out): - raise ValueError(basic_error_msg + " This conversion is not supported.") - else: - try: - out = out.astype(float) - except ValueError as e: - raise ValueError(basic_error_msg) from e - - # Conversion to int - elif internal_type == int: - if is_float_dtype(out): - # checking if decimal places are equal to 0, if not return error - if np.array_equal(out, out.astype(np.int64)): - out = out.astype(np.int64) - else: - raise ValueError( - basic_error_msg + " This conversion is only supported if all" - " decimal places of input data are equal to 0." - ) - else: - try: - out = out.astype(np.int64) - except ValueError as e: - raise ValueError(basic_error_msg) from e - - # Conversion to boolean - elif internal_type == bool: - # if input data type is integer - if is_integer_dtype(out): - # check if series consists only of 1 or 0 - if len([v for v in out.unique() if v not in [1, 0]]) == 0: - out = out.astype(bool) - else: - raise ValueError( - basic_error_msg + " This conversion is only supported if" - " input data exclusively contains the values 1 and 0." - ) - # if input data type is float - elif is_float_dtype(out): - # check if series consists only of 1.0 or 0.0 - if len([v for v in out.unique() if v not in [1, 0]]) == 0: - out = out.astype(bool) - else: - raise ValueError( - basic_error_msg + " This conversion is only supported if" - " input data exclusively contains the values 1.0 and 0.0." - ) - - else: - raise ValueError( - basic_error_msg + " Conversion to boolean is only supported for" - " int and float columns." - ) - - # Conversion to DateTime - elif internal_type == np.datetime64: - if not is_datetime64_any_dtype(out): - try: - out = out.astype(np.datetime64) - except ValueError as e: - raise ValueError(basic_error_msg) from e - else: - raise ValueError(f"The internal type {internal_type} is not yet supported.") - - return out From 11e2a11af43e799663ca90d29b32653254d10e14 Mon Sep 17 00:00:00 2001 From: Hans-Martin von Gaudecker Date: Mon, 14 Apr 2025 09:55:39 +0200 Subject: [PATCH 2/4] Was too greedy in prior commit, this can stay, but at different location. --- .../test_full_taxes_and_transfers.py | 38 +++++++++++++++++++ src/ttsim/function_types.py | 37 ++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/_gettsim_tests/test_full_taxes_and_transfers.py b/src/_gettsim_tests/test_full_taxes_and_transfers.py index 386a2bf01..5cd6579e3 100644 --- a/src/_gettsim_tests/test_full_taxes_and_transfers.py +++ b/src/_gettsim_tests/test_full_taxes_and_transfers.py @@ -1,3 +1,4 @@ +import dags.tree as dt import pytest from _gettsim.config import FOREIGN_KEYS, SUPPORTED_GROUPINGS @@ -7,6 +8,7 @@ load_policy_test_data, ) from ttsim import compute_taxes_and_transfers +from ttsim.function_types import PolicyInput, check_series_has_expected_type test_data = load_policy_test_data("full_taxes_and_transfers") @@ -24,6 +26,42 @@ def test_full_taxes_transfers(test: PolicyTest): ) +@pytest.mark.parametrize("test", test_data, ids=lambda x: x.test_name) +def test_data_types(test: PolicyTest): + environment = cached_set_up_policy_environment(date=test.date) + + result = compute_taxes_and_transfers( + data_tree=test.input_tree, + environment=environment, + targets_tree=test.target_structure, + foreign_keys=FOREIGN_KEYS, + supported_groupings=SUPPORTED_GROUPINGS, + ) + + flat_types_input_variables = { + n: pi.data_type + for n, pi in dt.flatten_to_qual_names(environment.raw_objects_tree).items() + if isinstance(pi, PolicyInput) + } + flat_functions = dt.flatten_to_qual_names(environment.raw_objects_tree) + + for column_name, result_array in dt.flatten_to_qual_names(result).items(): + if column_name in flat_types_input_variables: + internal_type = flat_types_input_variables[column_name] + elif column_name in flat_functions: + internal_type = flat_functions[column_name].__annotations__["return"] + else: + # TODO (@hmgaudecker): Implement easy way to find out expected type of + # aggregated functions + # https://github.com/iza-institute-of-labor-economics/gettsim/issues/604 + if column_name.endswith(("_sn", "_hh", "_fg", "_bg", "_eg", "_ehe")): + internal_type = None + else: + raise ValueError(f"Column name {column_name} unknown.") + if internal_type: + assert check_series_has_expected_type(result_array, internal_type) + + @pytest.mark.skip( reason="Got rid of DEFAULT_TARGETS, there might not be a replacement." ) diff --git a/src/ttsim/function_types.py b/src/ttsim/function_types.py index 6ae2de742..bf4e15ed2 100644 --- a/src/ttsim/function_types.py +++ b/src/ttsim/function_types.py @@ -7,6 +7,12 @@ from typing import TYPE_CHECKING, Literal, TypeVar import numpy +from pandas.api.types import ( + is_bool_dtype, + is_datetime64_any_dtype, + is_float_dtype, + is_integer_dtype, +) from ttsim.rounding import RoundingSpec from ttsim.shared import to_datetime, validate_date_range @@ -14,6 +20,9 @@ if TYPE_CHECKING: from collections.abc import Callable + import pandas as pd + + from ttsim.config import numpy_or_jax as np from ttsim.typing import DashedISOString T = TypeVar("T") @@ -423,3 +432,31 @@ def _convert_and_validate_dates( validate_date_range(start_date, end_date) return start_date, end_date + + +def check_series_has_expected_type(series: pd.Series, internal_type: np.dtype) -> bool: + """Checks whether used series has already expected internal type. + + Parameters + ---------- + series : pandas.Series or pandas.DataFrame or dict of pandas.Series + Data provided by the user. + internal_type : TypeVar + One of the internal gettsim types. + + Returns + ------- + Bool + + """ + if ( + (internal_type == float) & (is_float_dtype(series)) + or (internal_type == int) & (is_integer_dtype(series)) + or (internal_type == bool) & (is_bool_dtype(series)) + or (internal_type == numpy.datetime64) & (is_datetime64_any_dtype(series)) + ): + out = True + else: + out = False + + return out From 8075c331ebfb88e7a1127fb5a2073e77144aa036 Mon Sep 17 00:00:00 2001 From: Hans-Martin von Gaudecker Date: Mon, 14 Apr 2025 09:59:04 +0200 Subject: [PATCH 3/4] Comment out TTSIM tests, which I had missed previously. --- .../ttsim/test_compute_taxes_and_transfers.py | 126 +++++++++--------- 1 file changed, 62 insertions(+), 64 deletions(-) diff --git a/tests/ttsim/test_compute_taxes_and_transfers.py b/tests/ttsim/test_compute_taxes_and_transfers.py index f07c94aeb..ea7eaaf3f 100644 --- a/tests/ttsim/test_compute_taxes_and_transfers.py +++ b/tests/ttsim/test_compute_taxes_and_transfers.py @@ -7,12 +7,10 @@ import pandas as pd import pytest from mettsim.config import FOREIGN_KEYS, SUPPORTED_GROUPINGS -from mettsim.payroll_tax.group_by_ids import fam_id, sp_id from ttsim.aggregation import AggregateByGroupSpec, AggregateByPIDSpec, AggregationType from ttsim.compute_taxes_and_transfers import ( FunctionsAndColumnsOverlapWarning, - _convert_data_to_correct_types, _fail_if_foreign_keys_are_invalid, _fail_if_group_variables_not_constant_within_groups, _fail_if_p_id_is_non_unique, @@ -686,68 +684,68 @@ def test_fail_if_cannot_be_converted_to_internal_type( convert_series_to_internal_type(input_data, expected_type) -@pytest.mark.skip -@pytest.mark.parametrize( - "data, functions_overridden", - [ - ( - {"sp_id": pd.Series([1, 2, 3])}, - {"sp_id": sp_id}, - ), - ( - {"fam_id": pd.Series([1, 2, 3])}, - {"fam_id": fam_id}, - ), - ], -) -def test_provide_endogenous_groupings(data, functions_overridden): - """Test whether GETTSIM handles user-provided grouping IDs, which would otherwise be - set endogenously.""" - _convert_data_to_correct_types(data, functions_overridden) - - -@pytest.mark.skip -@pytest.mark.parametrize( - "data, functions_overridden, error_match", - [ - ( - {"hh_id": pd.Series([1, 1.1, 2])}, - {}, - "- hh_id: Conversion from input type float64 to int", - ), - ( - {"gondorian": pd.Series([1.1, 0.0, 1.0])}, - {}, - "- gondorian: Conversion from input type float64 to bool", - ), - ( - { - "hh_id": pd.Series([1.0, 2.0, 3.0]), - "gondorian": pd.Series([2, 0, 1]), - }, - {}, - "- gondorian: Conversion from input type int64 to bool", - ), - ( - {"gondorian": pd.Series(["True", "False"])}, - {}, - "- gondorian: Conversion from input type object to bool", - ), - ( - { - "hh_id": pd.Series([1, "1", 2]), - "payroll_tax__amount": pd.Series(["2000", 3000, 4000]), - }, - {}, - "- hh_id: Conversion from input type object to int failed.", - ), - ], -) -def test_fail_if_cannot_be_converted_to_correct_type( - data, functions_overridden, error_match -): - with pytest.raises(ValueError, match=error_match): - _convert_data_to_correct_types(data, functions_overridden) +# @pytest.mark.skip +# @pytest.mark.parametrize( +# "data, functions_overridden", +# [ +# ( +# {"sp_id": pd.Series([1, 2, 3])}, +# {"sp_id": sp_id}, +# ), +# ( +# {"fam_id": pd.Series([1, 2, 3])}, +# {"fam_id": fam_id}, +# ), +# ], +# ) +# def test_provide_endogenous_groupings(data, functions_overridden): +# """Test whether TTSIM handles user-provided grouping IDs, which would otherwise be +# set endogenously.""" +# _convert_data_to_correct_types(data, functions_overridden) + + +# @pytest.mark.skip +# @pytest.mark.parametrize( +# "data, functions_overridden, error_match", +# [ +# ( +# {"hh_id": pd.Series([1, 1.1, 2])}, +# {}, +# "- hh_id: Conversion from input type float64 to int", +# ), +# ( +# {"gondorian": pd.Series([1.1, 0.0, 1.0])}, +# {}, +# "- gondorian: Conversion from input type float64 to bool", +# ), +# ( +# { +# "hh_id": pd.Series([1.0, 2.0, 3.0]), +# "gondorian": pd.Series([2, 0, 1]), +# }, +# {}, +# "- gondorian: Conversion from input type int64 to bool", +# ), +# ( +# {"gondorian": pd.Series(["True", "False"])}, +# {}, +# "- gondorian: Conversion from input type object to bool", +# ), +# ( +# { +# "hh_id": pd.Series([1, "1", 2]), +# "payroll_tax__amount": pd.Series(["2000", 3000, 4000]), +# }, +# {}, +# "- hh_id: Conversion from input type object to int failed.", +# ), +# ], +# ) +# def test_fail_if_cannot_be_converted_to_correct_type( +# data, functions_overridden, error_match +# ): +# with pytest.raises(ValueError, match=error_match): +# _convert_data_to_correct_types(data, functions_overridden) @pytest.mark.parametrize( From 7d810fb4e4d6fa2e475a9fde43a85acf18a06078 Mon Sep 17 00:00:00 2001 From: Marvin Immesberger Date: Mon, 14 Apr 2025 13:41:00 +0200 Subject: [PATCH 4/4] Comment out more tests that relate to type conversion. --- .../ttsim/test_compute_taxes_and_transfers.py | 203 +++++++++--------- 1 file changed, 101 insertions(+), 102 deletions(-) diff --git a/tests/ttsim/test_compute_taxes_and_transfers.py b/tests/ttsim/test_compute_taxes_and_transfers.py index ea7eaaf3f..a1d1968ee 100644 --- a/tests/ttsim/test_compute_taxes_and_transfers.py +++ b/tests/ttsim/test_compute_taxes_and_transfers.py @@ -22,7 +22,6 @@ from ttsim.function_types import group_by_function, policy_function, policy_input from ttsim.policy_environment import PolicyEnvironment from ttsim.shared import assert_valid_ttsim_pytree -from ttsim.typing import convert_series_to_internal_type @policy_input() @@ -579,109 +578,109 @@ def source_func(p_id: int) -> int: # noqa: ARG001 numpy.testing.assert_array_almost_equal(out, expected) -@pytest.mark.parametrize( - "input_data, expected_type, expected_output_data", - [ - (pd.Series([0, 1, 0]), bool, pd.Series([False, True, False])), - (pd.Series([1.0, 0.0, 1]), bool, pd.Series([True, False, True])), - (pd.Series([200, 550, 237]), float, pd.Series([200.0, 550.0, 237.0])), - (pd.Series([1.0, 4.0, 10.0]), int, pd.Series([1, 4, 10])), - (pd.Series([200.0, 567.0]), int, pd.Series([200, 567])), - (pd.Series([1.0, 0.0]), bool, pd.Series([True, False])), - ], -) -def test_convert_series_to_internal_types( - input_data, expected_type, expected_output_data -): - adjusted_input = convert_series_to_internal_type(input_data, expected_type) - pd.testing.assert_series_equal(adjusted_input, expected_output_data) +# @pytest.mark.parametrize( +# "input_data, expected_type, expected_output_data", +# [ +# (pd.Series([0, 1, 0]), bool, pd.Series([False, True, False])), +# (pd.Series([1.0, 0.0, 1]), bool, pd.Series([True, False, True])), +# (pd.Series([200, 550, 237]), float, pd.Series([200.0, 550.0, 237.0])), +# (pd.Series([1.0, 4.0, 10.0]), int, pd.Series([1, 4, 10])), +# (pd.Series([200.0, 567.0]), int, pd.Series([200, 567])), +# (pd.Series([1.0, 0.0]), bool, pd.Series([True, False])), +# ], +# ) +# def test_convert_series_to_internal_types( +# input_data, expected_type, expected_output_data +# ): +# adjusted_input = convert_series_to_internal_type(input_data, expected_type) +# pd.testing.assert_series_equal(adjusted_input, expected_output_data) -@pytest.mark.parametrize( - "input_data, expected_type, error_match", - [ - ( - pd.Series(["Hallo", 200, 325]), - float, - "Conversion from input type object to float failed.", - ), - ( - pd.Series([True, False]), - float, - "Conversion from input type bool to float failed.", - ), - ( - pd.Series(["a", "b", "c"]).astype("category"), - float, - "Conversion from input type category to float failed.", - ), - ( - pd.Series(["2.0", "3.0"]), - int, - "Conversion from input type object to int failed.", - ), - ( - pd.Series([1.5, 1.0, 2.9]), - int, - "Conversion from input type float64 to int failed.", - ), - ( - pd.Series(["a", "b", "c"]).astype("category"), - int, - "Conversion from input type category to int failed.", - ), - ( - pd.Series([5, 2, 3]), - bool, - "Conversion from input type int64 to bool failed.", - ), - ( - pd.Series([1.5, 1.0, 35.0]), - bool, - "Conversion from input type float64 to bool failed.", - ), - ( - pd.Series(["a", "b", "c"]).astype("category"), - bool, - "Conversion from input type category to bool failed.", - ), - ( - pd.Series(["richtig"]), - bool, - "Conversion from input type object to bool failed.", - ), - ( - pd.Series(["True", "False", ""]), - bool, - "Conversion from input type object to bool failed.", - ), - ( - pd.Series(["true"]), - bool, - "Conversion from input type object to bool failed.", - ), - ( - pd.Series(["zweitausendzwanzig"]), - numpy.datetime64, - "Conversion from input type object to datetime64 failed.", - ), - ( - pd.Series([True, True]), - numpy.datetime64, - "Conversion from input type bool to datetime64 failed.", - ), - ( - pd.Series([2020]), - str, - "The internal type is not yet supported.", - ), - ], -) -def test_fail_if_cannot_be_converted_to_internal_type( - input_data, expected_type, error_match -): - with pytest.raises(ValueError, match=error_match): - convert_series_to_internal_type(input_data, expected_type) +# @pytest.mark.parametrize( +# "input_data, expected_type, error_match", +# [ +# ( +# pd.Series(["Hallo", 200, 325]), +# float, +# "Conversion from input type object to float failed.", +# ), +# ( +# pd.Series([True, False]), +# float, +# "Conversion from input type bool to float failed.", +# ), +# ( +# pd.Series(["a", "b", "c"]).astype("category"), +# float, +# "Conversion from input type category to float failed.", +# ), +# ( +# pd.Series(["2.0", "3.0"]), +# int, +# "Conversion from input type object to int failed.", +# ), +# ( +# pd.Series([1.5, 1.0, 2.9]), +# int, +# "Conversion from input type float64 to int failed.", +# ), +# ( +# pd.Series(["a", "b", "c"]).astype("category"), +# int, +# "Conversion from input type category to int failed.", +# ), +# ( +# pd.Series([5, 2, 3]), +# bool, +# "Conversion from input type int64 to bool failed.", +# ), +# ( +# pd.Series([1.5, 1.0, 35.0]), +# bool, +# "Conversion from input type float64 to bool failed.", +# ), +# ( +# pd.Series(["a", "b", "c"]).astype("category"), +# bool, +# "Conversion from input type category to bool failed.", +# ), +# ( +# pd.Series(["richtig"]), +# bool, +# "Conversion from input type object to bool failed.", +# ), +# ( +# pd.Series(["True", "False", ""]), +# bool, +# "Conversion from input type object to bool failed.", +# ), +# ( +# pd.Series(["true"]), +# bool, +# "Conversion from input type object to bool failed.", +# ), +# ( +# pd.Series(["zweitausendzwanzig"]), +# numpy.datetime64, +# "Conversion from input type object to datetime64 failed.", +# ), +# ( +# pd.Series([True, True]), +# numpy.datetime64, +# "Conversion from input type bool to datetime64 failed.", +# ), +# ( +# pd.Series([2020]), +# str, +# "The internal type is not yet supported.", +# ), +# ], +# ) +# def test_fail_if_cannot_be_converted_to_internal_type( +# input_data, expected_type, error_match +# ): +# with pytest.raises(ValueError, match=error_match): +# convert_series_to_internal_type(input_data, expected_type) # @pytest.mark.skip