diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 9fd35414..31404768 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -29,7 +29,10 @@ jobs: - name: install-reqs run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt - name: Run pytest - run: pytest tests --cov=dataframe_api_compat --cov=tests --cov-fail-under=100 + run: | + pytest tests --cov=dataframe_api_compat/pandas_standard --cov=tests --cov-append --cov-fail-under=50 --cov-report= --library pandas-numpy + pytest tests --cov=dataframe_api_compat/pandas_standard --cov=tests --cov-append --cov-fail-under=50 --cov-report= --library pandas-nullable + pytest tests --cov=dataframe_api_compat/polars_standard --cov=tests --cov-append --cov-fail-under=100 --library polars-lazy - name: install type-checking reqs run: python -m pip install 'git+https://github.com/data-apis/dataframe-api.git#egg=dataframe_api&subdirectory=spec/API_specification' mypy typing-extensions - name: run mypy diff --git a/tests/column/and_or_test.py b/tests/column/and_or_test.py index 2fe5846c..3cdd98d5 100644 --- a/tests/column/and_or_test.py +++ b/tests/column/and_or_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_column_with_reference -def test_column_and(library: str) -> None: +def test_column_and(library: BaseHandler) -> None: df = bool_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() ser = df.col("a") @@ -14,7 +15,7 @@ def test_column_and(library: str) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_column_or(library: str) -> None: +def test_column_or(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -24,7 +25,7 @@ def test_column_or(library: str) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_column_and_with_scalar(library: str) -> None: +def test_column_and_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -34,7 +35,7 @@ def test_column_and_with_scalar(library: str) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_column_or_with_scalar(library: str) -> None: +def test_column_or_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/any_all_test.py b/tests/column/any_all_test.py index a775dfff..9f59786f 100644 --- a/tests/column/any_all_test.py +++ b/tests/column/any_all_test.py @@ -2,10 +2,11 @@ import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 -def test_expr_any(library: str) -> None: +def test_expr_any(library: BaseHandler) -> None: df = bool_dataframe_1(library) with pytest.raises(RuntimeError): bool(df.col("a").any()) @@ -15,7 +16,7 @@ def test_expr_any(library: str) -> None: assert bool(result.persist()) -def test_expr_all(library: str) -> None: +def test_expr_all(library: BaseHandler) -> None: df = bool_dataframe_1(library).persist() result = df.col("a").all() assert not bool(result) diff --git a/tests/column/cast_test.py b/tests/column/cast_test.py index 111a4300..2e206b04 100644 --- a/tests/column/cast_test.py +++ b/tests/column/cast_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_cast_integers(library: str) -> None: +def test_cast_integers(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.assign(df.col("a").cast(ns.Int32())) diff --git a/tests/column/col_sorted_indices_test.py b/tests/column/col_sorted_indices_test.py index bd46afe5..2bcd0850 100644 --- a/tests/column/col_sorted_indices_test.py +++ b/tests/column/col_sorted_indices_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -def test_expression_sorted_indices_ascending(library: str) -> None: +def test_expression_sorted_indices_ascending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() col = df.col @@ -14,7 +15,7 @@ def test_expression_sorted_indices_ascending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_expression_sorted_indices_descending(library: str) -> None: +def test_expression_sorted_indices_descending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() col = df.col @@ -24,7 +25,7 @@ def test_expression_sorted_indices_descending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_column_sorted_indices_ascending(library: str) -> None: +def test_column_sorted_indices_ascending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices() @@ -33,7 +34,7 @@ def test_column_sorted_indices_ascending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_column_sorted_indices_descending(library: str) -> None: +def test_column_sorted_indices_descending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices(ascending=False) diff --git a/tests/column/col_to_array_object_test.py b/tests/column/col_to_array_object_test.py index ab87f8aa..7bd50bf8 100644 --- a/tests/column/col_to_array_object_test.py +++ b/tests/column/col_to_array_object_test.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import integer_dataframe_1 @@ -22,21 +23,21 @@ "float64", ], ) -def test_column_to_array_object(library: str, dtype: str) -> None: # noqa: ARG001 +def test_column_to_array_object(library: BaseHandler, dtype: str) -> None: # noqa: ARG001 ser = integer_dataframe_1(library).col("a").persist() result = np.asarray(ser.to_array()) expected = np.array([1, 2, 3], dtype=np.int64) np.testing.assert_array_equal(result, expected) -def test_column_to_array_object_bool(library: str) -> None: +def test_column_to_array_object_bool(library: BaseHandler) -> None: df = bool_dataframe_1(library).persist().col("a") result = np.asarray(df.to_array()) expected = np.array([True, True, False], dtype="bool") np.testing.assert_array_equal(result, expected) -def test_column_to_array_object_invalid(library: str) -> None: +def test_column_to_array_object_invalid(library: BaseHandler) -> None: df = bool_dataframe_1(library).col("a") with pytest.raises(RuntimeError): _ = np.asarray(df.to_array()) diff --git a/tests/column/comparisons_test.py b/tests/column/comparisons_test.py index 9ba58710..fdf2cdca 100644 --- a/tests/column/comparisons_test.py +++ b/tests/column/comparisons_test.py @@ -4,6 +4,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_7 @@ -28,7 +29,7 @@ ], ) def test_column_comparisons( - library: str, + library: BaseHandler, comparison: str, expected_data: list[object], expected_dtype: str, @@ -40,7 +41,7 @@ def test_column_comparisons( other = df.col("b") result = df.assign(getattr(ser, comparison)(other).rename("result")) expected_ns_dtype = getattr(ns, expected_dtype) - if comparison == "__pow__" and library in ("polars", "polars-lazy"): + if comparison == "__pow__" and library.name in ("polars", "polars-lazy"): # TODO result = result.cast({"result": ns.Int64()}) expected_ns_dtype = ns.Int64 @@ -66,7 +67,7 @@ def test_column_comparisons( ], ) def test_column_comparisons_scalar( - library: str, + library: BaseHandler, comparison: str, expected_data: list[object], expected_dtype: str, @@ -78,7 +79,7 @@ def test_column_comparisons_scalar( other = 3 result = df.assign(getattr(ser, comparison)(other).rename("result")) expected_ns_dtype = getattr(ns, expected_dtype) - if comparison == "__pow__" and library in ("polars", "polars-lazy"): + if comparison == "__pow__" and library.name in ("polars", "polars-lazy"): result = result.cast({"result": ns.Int64()}) expected_ns_dtype = ns.Int64 compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype) @@ -93,7 +94,7 @@ def test_column_comparisons_scalar( ], ) def test_right_column_comparisons( - library: str, + library: BaseHandler, comparison: str, expected_data: list[object], ) -> None: diff --git a/tests/column/cross_df_comparisons_test.py b/tests/column/cross_df_comparisons_test.py index d7cca76e..26847551 100644 --- a/tests/column/cross_df_comparisons_test.py +++ b/tests/column/cross_df_comparisons_test.py @@ -2,16 +2,17 @@ import pytest +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -def test_invalid_comparisons(library: str) -> None: +def test_invalid_comparisons(library: BaseHandler) -> None: with pytest.raises(ValueError): _ = integer_dataframe_1(library).col("a") > integer_dataframe_2(library).col("a") -def test_invalid_comparisons_scalar(library: str) -> None: +def test_invalid_comparisons_scalar(library: BaseHandler) -> None: with pytest.raises(ValueError): _ = ( integer_dataframe_1(library).col("a") diff --git a/tests/column/cumulative_test.py b/tests/column/cumulative_test.py index 2c229b82..8405d62a 100644 --- a/tests/column/cumulative_test.py +++ b/tests/column/cumulative_test.py @@ -5,6 +5,7 @@ from packaging.version import Version from packaging.version import parse +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 @@ -19,7 +20,7 @@ ], ) def test_cumulative_functions_column( - library: str, + library: BaseHandler, func: str, expected_data: list[float], ) -> None: @@ -30,7 +31,7 @@ def test_cumulative_functions_column( result = df.assign(getattr(ser, func)().rename("result")) if ( - parse(pd.__version__) < Version("2.0.0") and library == "pandas-nullable" + parse(pd.__version__) < Version("2.0.0") and library.name == "pandas-nullable" ): # pragma: no cover # Upstream bug result = result.cast({"result": ns.Int64()}) diff --git a/tests/column/divmod_test.py b/tests/column/divmod_test.py index dd16fec6..b56416a3 100644 --- a/tests/column/divmod_test.py +++ b/tests/column/divmod_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -def test_expression_divmod(library: str) -> None: +def test_expression_divmod(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -18,7 +19,7 @@ def test_expression_divmod(library: str) -> None: compare_column_with_reference(result.col("result"), [1, 2, 3], dtype=ns.Int64) -def test_expression_divmod_with_scalar(library: str) -> None: +def test_expression_divmod_with_scalar(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/fill_nan_test.py b/tests/column/fill_nan_test.py index 137dd4e0..3b6c37c9 100644 --- a/tests/column/fill_nan_test.py +++ b/tests/column/fill_nan_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 -def test_column_fill_nan(library: str) -> None: +def test_column_fill_nan(library: BaseHandler) -> None: # TODO: test with nullable pandas, check null isn't filled df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() @@ -14,7 +15,7 @@ def test_column_fill_nan(library: str) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Float64) -def test_column_fill_nan_with_null(library: str) -> None: +def test_column_fill_nan_with_null(library: BaseHandler) -> None: # TODO: test with nullable pandas, check null isn't filled df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() diff --git a/tests/column/fill_null_test.py b/tests/column/fill_null_test.py index 611c9efb..35ef0a26 100644 --- a/tests/column/fill_null_test.py +++ b/tests/column/fill_null_test.py @@ -1,23 +1,24 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import nan_dataframe_1 from tests.utils import null_dataframe_2 -def test_fill_null_column(library: str) -> None: +def test_fill_null_column(library: BaseHandler) -> None: df = null_dataframe_2(library) ser = df.col("a") result = df.assign(ser.fill_null(0).rename("result")).col("result") - assert float(result.get_value(2).persist()) == 0.0 # type:ignore[arg-type] - assert float(result.get_value(1).persist()) != 0.0 # type:ignore[arg-type] - assert float(result.get_value(0).persist()) != 0.0 # type:ignore[arg-type] + assert float(result.get_value(2).persist()) == 0.0 # type: ignore[arg-type] + assert float(result.get_value(1).persist()) != 0.0 # type: ignore[arg-type] + assert float(result.get_value(0).persist()) != 0.0 # type: ignore[arg-type] -def test_fill_null_noop_column(library: str) -> None: +def test_fill_null_noop_column(library: BaseHandler) -> None: df = nan_dataframe_1(library) ser = df.col("a") result = df.assign(ser.fill_null(0).rename("result")).persist().col("result") - if library != "pandas-numpy": + if library.name not in ("pandas-numpy",): # nan should not have changed! assert float(result.get_value(2)) != float( # type: ignore[arg-type] result.get_value(2), # type: ignore[arg-type] diff --git a/tests/column/get_rows_by_mask_test.py b/tests/column/get_rows_by_mask_test.py index 2a170a0e..5e95ce2c 100644 --- a/tests/column/get_rows_by_mask_test.py +++ b/tests/column/get_rows_by_mask_test.py @@ -2,11 +2,12 @@ import pandas as pd +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -def test_column_filter(library: str) -> None: +def test_column_filter(library: BaseHandler) -> None: df = integer_dataframe_1(library) ser = df.col("a") mask = ser > 1 @@ -16,7 +17,7 @@ def test_column_filter(library: str) -> None: pd.testing.assert_series_equal(result_pd, expected) -def test_column_take_by_mask_noop(library: str) -> None: +def test_column_take_by_mask_noop(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/get_rows_test.py b/tests/column/get_rows_test.py index 9254ef45..93dd3c8c 100644 --- a/tests/column/get_rows_test.py +++ b/tests/column/get_rows_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -def test_expression_take(library: str) -> None: +def test_expression_take(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/get_value_test.py b/tests/column/get_value_test.py index 49a30c89..af333d46 100644 --- a/tests/column/get_value_test.py +++ b/tests/column/get_value_test.py @@ -1,13 +1,14 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_get_value(library: str) -> None: +def test_get_value(library: BaseHandler) -> None: result = integer_dataframe_1(library).persist().col("a").get_value(0) assert int(result) == 1 # type: ignore[call-overload] -def test_mean_scalar(library: str) -> None: +def test_mean_scalar(library: BaseHandler) -> None: result = integer_dataframe_1(library).persist().col("a").max() assert int(result) == 3 # type: ignore[call-overload] diff --git a/tests/column/invert_test.py b/tests/column/invert_test.py index b6003eea..32a39fe5 100644 --- a/tests/column/invert_test.py +++ b/tests/column/invert_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_column_with_reference -def test_expression_invert(library: str) -> None: +def test_expression_invert(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -13,7 +14,7 @@ def test_expression_invert(library: str) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_column_invert(library: str) -> None: +def test_column_invert(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/is_in_test.py b/tests/column/is_in_test.py index f840706b..d26edcf0 100644 --- a/tests/column/is_in_test.py +++ b/tests/column/is_in_test.py @@ -5,6 +5,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import float_dataframe_1 from tests.utils import float_dataframe_2 @@ -24,8 +25,8 @@ ) @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated") def test_is_in( - library: str, - df_factory: Callable[[str], Any], + library: BaseHandler, + df_factory: Callable[[BaseHandler], Any], expected_values: list[bool], ) -> None: df = df_factory(library) @@ -46,8 +47,8 @@ def test_is_in( ) @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated") def test_expr_is_in( - library: str, - df_factory: Callable[[str], Any], + library: BaseHandler, + df_factory: Callable[[BaseHandler], Any], expected_values: list[bool], ) -> None: df = df_factory(library) diff --git a/tests/column/is_nan_test.py b/tests/column/is_nan_test.py index b0d04025..3e0694fb 100644 --- a/tests/column/is_nan_test.py +++ b/tests/column/is_nan_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 -def test_column_is_nan(library: str) -> None: +def test_column_is_nan(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/is_null_test.py b/tests/column/is_null_test.py index fdc8e34b..b39cfdf9 100644 --- a/tests/column/is_null_test.py +++ b/tests/column/is_null_test.py @@ -1,23 +1,24 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 from tests.utils import null_dataframe_1 -def test_column_is_null_1(library: str) -> None: +def test_column_is_null_1(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.is_null().rename("result")) - if library == "pandas-numpy": + if library.name == "pandas-numpy": expected = [False, False, True] else: expected = [False, False, False] compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_column_is_null_2(library: str) -> None: +def test_column_is_null_2(library: BaseHandler) -> None: df = null_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") diff --git a/tests/column/len_test.py b/tests/column/len_test.py index 7008cf2e..ae61df34 100644 --- a/tests/column/len_test.py +++ b/tests/column/len_test.py @@ -1,8 +1,9 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_column_len(library: str) -> None: +def test_column_len(library: BaseHandler) -> None: result = integer_dataframe_1(library).col("a").len().persist().scalar assert result == 3 diff --git a/tests/column/n_unique_test.py b/tests/column/n_unique_test.py index 3f7d9f36..3f824fc4 100644 --- a/tests/column/n_unique_test.py +++ b/tests/column/n_unique_test.py @@ -1,8 +1,9 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_column_len(library: str) -> None: +def test_column_len(library: BaseHandler) -> None: result = integer_dataframe_1(library).col("a").n_unique().persist().scalar assert result == 3 diff --git a/tests/column/name_test.py b/tests/column/name_test.py index efd5934d..94ba559c 100644 --- a/tests/column/name_test.py +++ b/tests/column/name_test.py @@ -5,11 +5,12 @@ from packaging.version import Version from packaging.version import parse +from tests.utils import BaseHandler from tests.utils import convert_to_standard_compliant_dataframe from tests.utils import integer_dataframe_1 -def test_name(library: str) -> None: +def test_name(library: BaseHandler) -> None: df = integer_dataframe_1(library).persist() name = df.col("a").name assert name == "a" diff --git a/tests/column/parent_dataframe_test.py b/tests/column/parent_dataframe_test.py index 63ca6dd4..7122c675 100644 --- a/tests/column/parent_dataframe_test.py +++ b/tests/column/parent_dataframe_test.py @@ -1,6 +1,7 @@ +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_parent_dataframe(library: str) -> None: +def test_parent_dataframe(library: BaseHandler) -> None: df = integer_dataframe_1(library) assert df.col("a").parent_dataframe is df diff --git a/tests/column/pow_test.py b/tests/column/pow_test.py index 253a7218..c34e3aed 100644 --- a/tests/column/pow_test.py +++ b/tests/column/pow_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_float_powers_column(library: str) -> None: +def test_float_powers_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -15,7 +16,7 @@ def test_float_powers_column(library: str) -> None: compare_dataframe_with_reference(result, expected, expected_dtype) # type: ignore[arg-type] -def test_float_powers_scalar_column(library: str) -> None: +def test_float_powers_scalar_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") @@ -26,26 +27,26 @@ def test_float_powers_scalar_column(library: str) -> None: compare_dataframe_with_reference(result, expected, expected_dtype) # type: ignore[arg-type] -def test_int_powers_column(library: str) -> None: +def test_int_powers_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") * 1 result = df.assign(ser.__pow__(other).rename("result")) - if library in ("polars", "polars-lazy"): + if library.name in ("polars", "polars-lazy"): result = result.cast({name: ns.Int64() for name in ("a", "b", "result")}) expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]} expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")} compare_dataframe_with_reference(result, expected, expected_dtype) -def test_int_powers_scalar_column(library: str) -> None: +def test_int_powers_scalar_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") other = 1 result = df.assign(ser.__pow__(other).rename("result")) - if library in ("polars", "polars-lazy"): + if library.name in ("polars", "polars-lazy"): result = result.cast({name: ns.Int64() for name in ("a", "b", "result")}) expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]} expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")} diff --git a/tests/column/reductions_test.py b/tests/column/reductions_test.py index 25d85d8b..107399d4 100644 --- a/tests/column/reductions_test.py +++ b/tests/column/reductions_test.py @@ -2,6 +2,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 @@ -20,7 +21,7 @@ ], ) def test_expression_reductions( - library: str, + library: BaseHandler, reduction: str, expected: float, expected_dtype: str, diff --git a/tests/column/rename_test.py b/tests/column/rename_test.py index 7904fb54..419bc922 100644 --- a/tests/column/rename_test.py +++ b/tests/column/rename_test.py @@ -1,9 +1,10 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_rename(library: str) -> None: +def test_rename(library: BaseHandler) -> None: df = integer_dataframe_1(library).persist() ser = df.col("a") result = ser.rename("new_name") diff --git a/tests/column/schema_test.py b/tests/column/schema_test.py index 20064924..79d2b298 100644 --- a/tests/column/schema_test.py +++ b/tests/column/schema_test.py @@ -3,15 +3,16 @@ import pytest from packaging.version import Version -from tests.utils import PANDAS_VERSION +from tests.utils import BaseHandler from tests.utils import mixed_dataframe_1 +from tests.utils import pandas_version @pytest.mark.skipif( - Version("2.0.0") > PANDAS_VERSION, + Version("2.0.0") > pandas_version(), reason="no pyarrow support", ) -def test_schema(library: str) -> None: +def test_schema(library: BaseHandler) -> None: df = mixed_dataframe_1(library) namespace = df.__dataframe_namespace__() result = df.col("a").dtype diff --git a/tests/column/shift_test.py b/tests/column/shift_test.py index 86c084ce..39feedb7 100644 --- a/tests/column/shift_test.py +++ b/tests/column/shift_test.py @@ -2,31 +2,32 @@ import polars as pl from polars.testing import assert_frame_equal +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import float_dataframe_1 from tests.utils import integer_dataframe_1 -def test_shift_with_fill_value(library: str) -> None: +def test_shift_with_fill_value(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.assign(df.col("a").shift(1).fill_null(999)) - if library == "pandas-numpy": + if library.name == "pandas-numpy": result = result.cast({name: ns.Int64() for name in ("a", "b")}) expected = {"a": [999, 1, 2], "b": [4, 5, 6]} compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_shift_without_fill_value(library: str) -> None: +def test_shift_without_fill_value(library: BaseHandler) -> None: df = float_dataframe_1(library) result = df.assign(df.col("a").shift(-1)) - if library == "pandas-numpy": + if library.name == "pandas-numpy": expected = pd.DataFrame({"a": [3.0, float("nan")]}) pd.testing.assert_frame_equal(result.dataframe, expected) - elif library == "pandas-nullable": + elif library.name == "pandas-nullable": expected = pd.DataFrame({"a": [3.0, None]}, dtype="Float64") pd.testing.assert_frame_equal(result.dataframe, expected) - elif library == "polars-lazy": + elif library.name == "polars-lazy": expected = pl.DataFrame({"a": [3.0, None]}) assert_frame_equal(result.dataframe.collect(), expected) # type: ignore[attr-defined] else: # pragma: no cover @@ -34,11 +35,11 @@ def test_shift_without_fill_value(library: str) -> None: raise AssertionError(msg) -def test_shift_with_fill_value_complicated(library: str) -> None: +def test_shift_with_fill_value_complicated(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.assign(df.col("a").shift(1).fill_null(df.col("a").mean())) - if library == "pandas-nullable": + if library.name == "pandas-nullable": result = result.cast({"a": ns.Float64()}) expected = {"a": [2.0, 1, 2], "b": [4, 5, 6]} expected_dtype = {"a": ns.Float64, "b": ns.Int64} diff --git a/tests/column/slice_rows_test.py b/tests/column/slice_rows_test.py index b37ba783..6047a5de 100644 --- a/tests/column/slice_rows_test.py +++ b/tests/column/slice_rows_test.py @@ -5,6 +5,7 @@ import pandas as pd import pytest +from tests.utils import BaseHandler from tests.utils import integer_dataframe_3 @@ -18,7 +19,7 @@ ], ) def test_column_slice_rows( - library: str, + library: BaseHandler, start: int | None, stop: int | None, step: int | None, diff --git a/tests/column/sort_test.py b/tests/column/sort_test.py index 7cafc5e3..045886a0 100644 --- a/tests/column/sort_test.py +++ b/tests/column/sort_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -def test_expression_sort_ascending(library: str) -> None: +def test_expression_sort_ascending(library: BaseHandler) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort().rename("c") @@ -17,7 +18,7 @@ def test_expression_sort_ascending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_expression_sort_descending(library: str) -> None: +def test_expression_sort_descending(library: BaseHandler) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort(ascending=False).rename("c") @@ -30,7 +31,7 @@ def test_expression_sort_descending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_column_sort_ascending(library: str) -> None: +def test_column_sort_ascending(library: BaseHandler) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort().rename("c") @@ -43,7 +44,7 @@ def test_column_sort_ascending(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_column_sort_descending(library: str) -> None: +def test_column_sort_descending(library: BaseHandler) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort(ascending=False).rename("c") diff --git a/tests/column/statistics_test.py b/tests/column/statistics_test.py index b7e84868..02ff1716 100644 --- a/tests/column/statistics_test.py +++ b/tests/column/statistics_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -def test_mean(library: str) -> None: +def test_mean(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.assign((df.col("a") - df.col("a").mean()).rename("result")) diff --git a/tests/column/temporal/components_test.py b/tests/column/temporal/components_test.py index f0fb3bd6..ccc2cf64 100644 --- a/tests/column/temporal/components_test.py +++ b/tests/column/temporal/components_test.py @@ -4,6 +4,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import temporal_dataframe_1 @@ -21,7 +22,7 @@ ("unix_timestamp", [1577840521, 1577934062, 1578027849]), ], ) -def test_col_components(library: str, attr: str, expected: list[int]) -> None: +def test_col_components(library: BaseHandler, attr: str, expected: list[int]) -> None: df = temporal_dataframe_1(library) ns = df.__dataframe_namespace__() for col_name in ("a", "c", "e"): @@ -43,7 +44,11 @@ def test_col_components(library: str, attr: str, expected: list[int]) -> None: ("e", [123543, 321654, 987321]), ], ) -def test_col_microsecond(library: str, col_name: str, expected: list[int]) -> None: +def test_col_microsecond( + library: BaseHandler, + col_name: str, + expected: list[int], +) -> None: df = temporal_dataframe_1(library) ns = df.__dataframe_namespace__() result = ( @@ -64,7 +69,7 @@ def test_col_microsecond(library: str, col_name: str, expected: list[int]) -> No ("e", [123543000, 321654000, 987321000]), ], ) -def test_col_nanosecond(library: str, col_name: str, expected: list[int]) -> None: +def test_col_nanosecond(library: BaseHandler, col_name: str, expected: list[int]) -> None: df = temporal_dataframe_1(library) ns = df.__dataframe_namespace__() result = ( @@ -87,7 +92,7 @@ def test_col_nanosecond(library: str, col_name: str, expected: list[int]) -> Non ], ) def test_col_unix_timestamp_time_units( - library: str, + library: BaseHandler, time_unit: Literal["s", "ms", "us", "ns"], expected: list[int], ) -> None: diff --git a/tests/column/temporal/filter_test.py b/tests/column/temporal/filter_test.py index 27c2d901..a095a46e 100644 --- a/tests/column/temporal/filter_test.py +++ b/tests/column/temporal/filter_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import temporal_dataframe_1 -def test_filter_w_date(library: str) -> None: +def test_filter_w_date(library: BaseHandler) -> None: df = temporal_dataframe_1(library).select("a", "index") ns = df.__dataframe_namespace__() result = df.filter(df.col("a") > ns.date(2020, 1, 2)).select("index") diff --git a/tests/column/temporal/floor_test.py b/tests/column/temporal/floor_test.py index b9bf5d85..9ccd08b6 100644 --- a/tests/column/temporal/floor_test.py +++ b/tests/column/temporal/floor_test.py @@ -4,6 +4,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import temporal_dataframe_1 @@ -14,7 +15,7 @@ ("1day", [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)]), ], ) -def test_floor(library: str, freq: str, expected: list[datetime]) -> None: +def test_floor(library: BaseHandler, freq: str, expected: list[datetime]) -> None: df = temporal_dataframe_1(library) ns = df.__dataframe_namespace__() col = df.col diff --git a/tests/conftest.py b/tests/conftest.py index 7f1a5d5f..27d8d49b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,9 @@ import sys from typing import Any +from tests.utils import PandasHandler +from tests.utils import PolarsHandler + LIBRARIES = { (3, 8): ["pandas-numpy", "pandas-nullable", "polars-lazy"], (3, 9): ["pandas-numpy", "pandas-nullable", "polars-lazy"], @@ -11,10 +14,43 @@ (3, 12): ["polars-lazy"], } +LIBRARIES_HANDLERS = { + "pandas-numpy": PandasHandler("pandas-numpy"), + "pandas-nullable": PandasHandler("pandas-nullable"), + "polars-lazy": PolarsHandler("polars-lazy"), +} + + +def pytest_addoption(parser: Any) -> None: + parser.addoption( + "--library", + action="store", + default=None, + type=str, + help="library to test", + ) + + +def pytest_configure(config: Any) -> None: + library = config.option.library + if library is None: # pragma: no cover + # `LIBRARIES` is already initialized + return + else: + assert library in ("pandas-numpy", "pandas-nullable", "polars-lazy") + global LIBRARIES # noqa: PLW0603 + LIBRARIES = { + (3, 8): [library], + (3, 9): [library], + (3, 10): [library], + (3, 11): [library], + (3, 12): [library], + } + def pytest_generate_tests(metafunc: Any) -> None: if "library" in metafunc.fixturenames: - metafunc.parametrize( - "library", - LIBRARIES[sys.version_info[:2]], - ) + libraries = LIBRARIES[sys.version_info[:2]] + lib_handlers = [LIBRARIES_HANDLERS[lib] for lib in libraries] + + metafunc.parametrize("library", lib_handlers, ids=libraries) diff --git a/tests/dataframe/all_rowwise_test.py b/tests/dataframe/all_rowwise_test.py index 92b2df73..60421a5c 100644 --- a/tests/dataframe/all_rowwise_test.py +++ b/tests/dataframe/all_rowwise_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_dataframe_with_reference -def test_all_horizontal(library: str) -> None: +def test_all_horizontal(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() mask = ns.all_horizontal(*[df.col(col_name) for col_name in df.column_names]) @@ -15,7 +16,7 @@ def test_all_horizontal(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_all_horizontal_invalid(library: str) -> None: +def test_all_horizontal_invalid(library: BaseHandler) -> None: df = bool_dataframe_1(library) namespace = df.__dataframe_namespace__() with pytest.raises(ValueError): diff --git a/tests/dataframe/and_test.py b/tests/dataframe/and_test.py index 2b99778b..9c26ac39 100644 --- a/tests/dataframe/and_test.py +++ b/tests/dataframe/and_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_dataframe_with_reference -def test_and_with_scalar(library: str) -> None: +def test_and_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() other = True @@ -13,7 +14,7 @@ def test_and_with_scalar(library: str) -> None: compare_dataframe_with_reference(result, expected, ns.Bool) -def test_rand_with_scalar(library: str) -> None: +def test_rand_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() other = True diff --git a/tests/dataframe/any_all_test.py b/tests/dataframe/any_all_test.py index 63f9a95d..e97f6a27 100644 --- a/tests/dataframe/any_all_test.py +++ b/tests/dataframe/any_all_test.py @@ -2,6 +2,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import bool_dataframe_3 from tests.utils import compare_dataframe_with_reference @@ -15,7 +16,7 @@ ], ) def test_reductions( - library: str, + library: BaseHandler, reduction: str, expected_data: dict[str, object], ) -> None: @@ -25,7 +26,7 @@ def test_reductions( compare_dataframe_with_reference(result, expected_data, dtype=ns.Bool) # type: ignore[arg-type] -def test_any(library: str) -> None: +def test_any(library: BaseHandler) -> None: df = bool_dataframe_3(library) ns = df.__dataframe_namespace__() result = df.any() @@ -33,7 +34,7 @@ def test_any(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_all(library: str) -> None: +def test_all(library: BaseHandler) -> None: df = bool_dataframe_3(library) ns = df.__dataframe_namespace__() result = df.all() diff --git a/tests/dataframe/any_rowwise_test.py b/tests/dataframe/any_rowwise_test.py index 6fbb9177..c43fc137 100644 --- a/tests/dataframe/any_rowwise_test.py +++ b/tests/dataframe/any_rowwise_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_dataframe_with_reference -def test_any_horizontal(library: str) -> None: +def test_any_horizontal(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() mask = ns.any_horizontal(*[df.col(col_name) for col_name in df.column_names]) @@ -15,7 +16,7 @@ def test_any_horizontal(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_any_horizontal_invalid(library: str) -> None: +def test_any_horizontal_invalid(library: BaseHandler) -> None: df = bool_dataframe_1(library) namespace = df.__dataframe_namespace__() with pytest.raises(ValueError): diff --git a/tests/dataframe/assign_test.py b/tests/dataframe/assign_test.py index f6daf5af..02d3ccb4 100644 --- a/tests/dataframe/assign_test.py +++ b/tests/dataframe/assign_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_insert_columns(library: str) -> None: +def test_insert_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") @@ -18,7 +19,7 @@ def test_insert_columns(library: str) -> None: compare_dataframe_with_reference(df, expected, dtype=ns.Int64) -def test_insert_multiple_columns(library: str) -> None: +def test_insert_multiple_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") @@ -30,7 +31,7 @@ def test_insert_multiple_columns(library: str) -> None: compare_dataframe_with_reference(df, expected, dtype=ns.Int64) -def test_insert_multiple_columns_invalid(library: str) -> None: +def test_insert_multiple_columns_invalid(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") @@ -38,7 +39,7 @@ def test_insert_multiple_columns_invalid(library: str) -> None: _ = df.assign([new_col.rename("c"), new_col.rename("d")]) # type: ignore[arg-type] -def test_insert_eager_columns(library: str) -> None: +def test_insert_eager_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") diff --git a/tests/dataframe/cast_test.py b/tests/dataframe/cast_test.py index 7e3a199d..e5679335 100644 --- a/tests/dataframe/cast_test.py +++ b/tests/dataframe/cast_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_cast_integers(library: str) -> None: +def test_cast_integers(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.cast({"a": ns.Int32()}) diff --git a/tests/dataframe/columns_iter_test.py b/tests/dataframe/columns_iter_test.py index f8a02f3b..a65b640d 100644 --- a/tests/dataframe/columns_iter_test.py +++ b/tests/dataframe/columns_iter_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_iter_columns(library: str) -> None: +def test_iter_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.assign( diff --git a/tests/dataframe/comparisons_test.py b/tests/dataframe/comparisons_test.py index 6886191f..458c56f3 100644 --- a/tests/dataframe/comparisons_test.py +++ b/tests/dataframe/comparisons_test.py @@ -2,6 +2,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 @@ -25,7 +26,7 @@ ], ) def test_comparisons_with_scalar( - library: str, + library: BaseHandler, comparison: str, expected_data: dict[str, object], expected_dtype: str, @@ -47,7 +48,7 @@ def test_comparisons_with_scalar( ], ) def test_rcomparisons_with_scalar( - library: str, + library: BaseHandler, comparison: str, expected_data: dict[str, object], ) -> None: diff --git a/tests/dataframe/cross_df_comparison_test.py b/tests/dataframe/cross_df_comparison_test.py index 6e96ddc1..fc8cbbef 100644 --- a/tests/dataframe/cross_df_comparison_test.py +++ b/tests/dataframe/cross_df_comparison_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -def test_invalid_comparisons(library: str) -> None: +def test_invalid_comparisons(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) df2 = integer_dataframe_2(library) mask = df2.col("a") > 1 diff --git a/tests/dataframe/divmod_test.py b/tests/dataframe/divmod_test.py index 0a62d3fe..7ddb61aa 100644 --- a/tests/dataframe/divmod_test.py +++ b/tests/dataframe/divmod_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_divmod_with_scalar(library: str) -> None: +def test_divmod_with_scalar(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() other = 2 diff --git a/tests/dataframe/drop_column_test.py b/tests/dataframe/drop_column_test.py index 9f948245..97e6d205 100644 --- a/tests/dataframe/drop_column_test.py +++ b/tests/dataframe/drop_column_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_drop_column(library: str) -> None: +def test_drop_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.drop("a") diff --git a/tests/dataframe/drop_nulls_test.py b/tests/dataframe/drop_nulls_test.py index 2bf6e604..7797fe68 100644 --- a/tests/dataframe/drop_nulls_test.py +++ b/tests/dataframe/drop_nulls_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import null_dataframe_1 -def test_drop_nulls(library: str) -> None: +def test_drop_nulls(library: BaseHandler) -> None: df = null_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.drop_nulls() diff --git a/tests/dataframe/fill_nan_test.py b/tests/dataframe/fill_nan_test.py index e21f8af1..b9ada35a 100644 --- a/tests/dataframe/fill_nan_test.py +++ b/tests/dataframe/fill_nan_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_1 -def test_fill_nan(library: str) -> None: +def test_fill_nan(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.fill_nan(-1) @@ -15,7 +16,7 @@ def test_fill_nan(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Float64) -def test_fill_nan_with_scalar(library: str) -> None: +def test_fill_nan_with_scalar(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.fill_nan(df.col("a").get_value(0)) @@ -24,20 +25,20 @@ def test_fill_nan_with_scalar(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Float64) -def test_fill_nan_with_scalar_invalid(library: str) -> None: +def test_fill_nan_with_scalar_invalid(library: BaseHandler) -> None: df = nan_dataframe_1(library) other = df + 1 with pytest.raises(ValueError): _ = df.fill_nan(other.col("a").get_value(0)) -def test_fill_nan_with_null(library: str) -> None: +def test_fill_nan_with_null(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.fill_nan(ns.null) n_nans = result.is_nan().sum() result = n_nans.col("a").persist().get_value(0).scalar - if library == "pandas-numpy": + if library.name in ("pandas-numpy",): # null is nan for pandas-numpy assert result == 1 else: diff --git a/tests/dataframe/fill_null_test.py b/tests/dataframe/fill_null_test.py index 12c24e8d..1217548b 100644 --- a/tests/dataframe/fill_null_test.py +++ b/tests/dataframe/fill_null_test.py @@ -2,6 +2,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import nan_dataframe_1 from tests.utils import null_dataframe_2 @@ -15,7 +16,7 @@ ["b"], ], ) -def test_fill_null(library: str, column_names: list[str] | None) -> None: +def test_fill_null(library: BaseHandler, column_names: list[str] | None) -> None: df = null_dataframe_2(library) df.__dataframe_namespace__() result = df.fill_null(0, column_names=column_names) @@ -32,14 +33,14 @@ def test_fill_null(library: str, column_names: list[str] | None) -> None: assert result.col("b").persist().get_value(2).scalar == 0 -def test_fill_null_noop(library: str) -> None: +def test_fill_null_noop(library: BaseHandler) -> None: df = nan_dataframe_1(library) result_raw = df.fill_null(0) if hasattr(result_raw.dataframe, "collect"): result = result_raw.dataframe.collect() else: result = result_raw.dataframe - if library != "pandas-numpy": + if library.name not in ("pandas-numpy",): # nan should not have changed! assert result["a"][2] != result["a"][2] else: diff --git a/tests/dataframe/get_column_by_name_test.py b/tests/dataframe/get_column_by_name_test.py index 6ddae877..c1b31dc4 100644 --- a/tests/dataframe/get_column_by_name_test.py +++ b/tests/dataframe/get_column_by_name_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_get_column(library: str) -> None: +def test_get_column(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() col = df.col diff --git a/tests/dataframe/get_column_names_test.py b/tests/dataframe/get_column_names_test.py index 86e45ab3..138ade7a 100644 --- a/tests/dataframe/get_column_names_test.py +++ b/tests/dataframe/get_column_names_test.py @@ -1,9 +1,10 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_get_column_names(library: str) -> None: +def test_get_column_names(library: BaseHandler) -> None: df = integer_dataframe_1(library) result = df.column_names assert list(result) == ["a", "b"] diff --git a/tests/dataframe/get_rows_by_mask_test.py b/tests/dataframe/get_rows_by_mask_test.py index a2ae421c..c1048550 100644 --- a/tests/dataframe/get_rows_by_mask_test.py +++ b/tests/dataframe/get_rows_by_mask_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_filter(library: str) -> None: +def test_filter(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() mask = df.col("a") % 2 == 1 diff --git a/tests/dataframe/get_rows_test.py b/tests/dataframe/get_rows_test.py index 16391c64..61b4ddd8 100644 --- a/tests/dataframe/get_rows_test.py +++ b/tests/dataframe/get_rows_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_take(library: str) -> None: +def test_take(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() df = df.assign((df.col("a") - 1).sort(ascending=False).rename("result")) diff --git a/tests/dataframe/invert_test.py b/tests/dataframe/invert_test.py index ed84c32e..91c47412 100644 --- a/tests/dataframe/invert_test.py +++ b/tests/dataframe/invert_test.py @@ -2,12 +2,13 @@ import pytest +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_invert(library: str) -> None: +def test_invert(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() result = ~df @@ -15,7 +16,7 @@ def test_invert(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_invert_invalid(library: str) -> None: +def test_invert_invalid(library: BaseHandler) -> None: df = integer_dataframe_1(library) with pytest.raises(TypeError): _ = ~df diff --git a/tests/dataframe/is_nan_test.py b/tests/dataframe/is_nan_test.py index 3d82f9dc..e7bbdea7 100644 --- a/tests/dataframe/is_nan_test.py +++ b/tests/dataframe/is_nan_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_1 -def test_dataframe_is_nan(library: str) -> None: +def test_dataframe_is_nan(library: BaseHandler) -> None: df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.is_nan() diff --git a/tests/dataframe/is_null_test.py b/tests/dataframe/is_null_test.py index c2a469b2..ac322893 100644 --- a/tests/dataframe/is_null_test.py +++ b/tests/dataframe/is_null_test.py @@ -1,15 +1,16 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_2 from tests.utils import null_dataframe_1 -def test_is_null_1(library: str) -> None: +def test_is_null_1(library: BaseHandler) -> None: df = nan_dataframe_2(library) ns = df.__dataframe_namespace__() result = df.is_null() - if library == "pandas-numpy": + if library.name == "pandas-numpy": # nan and null are the same in pandas-numpy expected = {"a": [False, False, True]} else: @@ -17,7 +18,7 @@ def test_is_null_1(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_is_null_2(library: str) -> None: +def test_is_null_2(library: BaseHandler) -> None: df = null_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.is_null() diff --git a/tests/dataframe/join_test.py b/tests/dataframe/join_test.py index b893e83e..b6b91ce9 100644 --- a/tests/dataframe/join_test.py +++ b/tests/dataframe/join_test.py @@ -3,13 +3,14 @@ import pytest from packaging.version import Version -from tests.utils import PANDAS_VERSION +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 +from tests.utils import pandas_version -def test_join_left(library: str) -> None: +def test_join_left(library: BaseHandler) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="left") @@ -18,19 +19,21 @@ def test_join_left(library: str) -> None: expected_dtype = { "a": ns.Int64, "b": ns.Int64, - "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + "c": ns.Int64 + if library.name in ["pandas-nullable", "polars-lazy"] + else ns.Float64, } compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_join_overlapping_names(library: str) -> None: +def test_join_overlapping_names(library: BaseHandler) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library) with pytest.raises(ValueError): _ = left.join(right, left_on="a", right_on="a", how="left") -def test_join_inner(library: str) -> None: +def test_join_inner(library: BaseHandler) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="inner") @@ -40,13 +43,13 @@ def test_join_inner(library: str) -> None: @pytest.mark.skip(reason="outer join has changed in Polars recently, need to fixup") -def test_join_outer(library: str) -> None: # pragma: no cover +def test_join_outer(library: BaseHandler) -> None: # pragma: no cover left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="outer").sort("a") ns = result.__dataframe_namespace__() if ( - library == "pandas-nullable" and Version("2.0.0") > PANDAS_VERSION + library.name == "pandas-nullable" and Version("2.0.0") > pandas_version() ): # pragma: no cover # upstream bug result = result.cast({"a": ns.Int64()}) @@ -57,13 +60,17 @@ def test_join_outer(library: str) -> None: # pragma: no cover } expected_dtype = { "a": ns.Int64, - "b": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, - "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + "b": ns.Int64 + if library.name in ["pandas-nullable", "polars-lazy"] + else ns.Float64, + "c": ns.Int64 + if library.name in ["pandas-nullable", "polars-lazy"] + else ns.Float64, } compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_join_two_keys(library: str) -> None: +def test_join_two_keys(library: BaseHandler) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on=["a", "b"], right_on=["a", "c"], how="left") @@ -72,12 +79,14 @@ def test_join_two_keys(library: str) -> None: expected_dtype = { "a": ns.Int64, "b": ns.Int64, - "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + "c": ns.Int64 + if library.name in ["pandas-nullable", "polars-lazy"] + else ns.Float64, } compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_join_invalid(library: str) -> None: +def test_join_invalid(library: BaseHandler) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) with pytest.raises(ValueError): diff --git a/tests/dataframe/or_test.py b/tests/dataframe/or_test.py index 1a4a8c95..383cac4f 100644 --- a/tests/dataframe/or_test.py +++ b/tests/dataframe/or_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import bool_dataframe_1 from tests.utils import compare_dataframe_with_reference -def test_or_with_scalar(library: str) -> None: +def test_or_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() other = True @@ -13,7 +14,7 @@ def test_or_with_scalar(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Bool) -def test_ror_with_scalar(library: str) -> None: +def test_ror_with_scalar(library: BaseHandler) -> None: df = bool_dataframe_1(library) ns = df.__dataframe_namespace__() other = True diff --git a/tests/dataframe/pow_test.py b/tests/dataframe/pow_test.py index eff8b95b..1ba1e370 100644 --- a/tests/dataframe/pow_test.py +++ b/tests/dataframe/pow_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_float_scalar_powers(library: str) -> None: +def test_float_scalar_powers(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() other = 1.0 diff --git a/tests/dataframe/reductions_test.py b/tests/dataframe/reductions_test.py index 2055a7ef..9a83bc44 100644 --- a/tests/dataframe/reductions_test.py +++ b/tests/dataframe/reductions_test.py @@ -4,6 +4,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 @@ -22,7 +23,7 @@ ], ) def test_dataframe_reductions( - library: str, + library: BaseHandler, reduction: str, expected: dict[str, Any], expected_dtype: str, diff --git a/tests/dataframe/rename_columns_test.py b/tests/dataframe/rename_columns_test.py index 63081cf5..2e193b11 100644 --- a/tests/dataframe/rename_columns_test.py +++ b/tests/dataframe/rename_columns_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_rename(library: str) -> None: +def test_rename(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.rename({"a": "c", "b": "e"}) @@ -14,7 +15,7 @@ def test_rename(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_rename_invalid(library: str) -> None: +def test_rename_invalid(library: BaseHandler) -> None: df = integer_dataframe_1(library) with pytest.raises( TypeError, diff --git a/tests/dataframe/schema_test.py b/tests/dataframe/schema_test.py index c8d0538e..8c84b5d5 100644 --- a/tests/dataframe/schema_test.py +++ b/tests/dataframe/schema_test.py @@ -5,15 +5,16 @@ from packaging.version import Version from packaging.version import parse -from tests.utils import PANDAS_VERSION +from tests.utils import BaseHandler from tests.utils import mixed_dataframe_1 +from tests.utils import pandas_version @pytest.mark.skipif( - Version("2.0.0") > PANDAS_VERSION, + Version("2.0.0") > pandas_version(), reason="no pyarrow support", ) -def test_schema(library: str) -> None: +def test_schema(library: BaseHandler) -> None: df = mixed_dataframe_1(library) namespace = df.__dataframe_namespace__() result = df.schema @@ -51,7 +52,8 @@ def test_schema(library: str) -> None: assert isinstance(result["m"], namespace.Datetime) assert isinstance(result["n"], namespace.Datetime) if not ( - library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0") + library.name in ("pandas-numpy", "pandas-nullable") + and parse(pd.__version__) < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["n"].time_unit == "ms" @@ -60,14 +62,18 @@ def test_schema(library: str) -> None: assert result["n"].time_zone is None assert isinstance(result["o"], namespace.Datetime) if not ( - library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0") + library.name in ("pandas-numpy", "pandas-nullable") + and parse(pd.__version__) < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["o"].time_unit == "us" else: # pragma: no cover pass assert result["o"].time_zone is None - if not (library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0")): + if not ( + library.name in ("pandas-numpy", "pandas-nullable") + and parse(pd.__version__) < Version("2.0.0") + ): # pandas non-nanosecond support only came in 2.0 - before that, these would be 'float' assert isinstance(result["p"], namespace.Duration) assert result["p"].time_unit == "ms" diff --git a/tests/dataframe/select_test.py b/tests/dataframe/select_test.py index 60bde31f..02c5d52f 100644 --- a/tests/dataframe/select_test.py +++ b/tests/dataframe/select_test.py @@ -2,11 +2,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_select(library: str) -> None: +def test_select(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.select("b") @@ -14,7 +15,7 @@ def test_select(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_select_list_of_str(library: str) -> None: +def test_select_list_of_str(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() result = df.select("a", "b") @@ -22,14 +23,14 @@ def test_select_list_of_str(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_select_list_of_str_invalid(library: str) -> None: +def test_select_list_of_str_invalid(library: BaseHandler) -> None: df = integer_dataframe_1(library) with pytest.raises(TypeError): _ = df.select(["a", "b"]) # type: ignore[arg-type] @pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated") -def test_select_empty(library: str) -> None: +def test_select_empty(library: BaseHandler) -> None: df = integer_dataframe_1(library) result = df.select() assert result.column_names == [] diff --git a/tests/dataframe/shape_test.py b/tests/dataframe/shape_test.py index 491ed0ee..b127b982 100644 --- a/tests/dataframe/shape_test.py +++ b/tests/dataframe/shape_test.py @@ -2,10 +2,11 @@ import pytest +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_shape(library: str) -> None: +def test_shape(library: BaseHandler) -> None: df = integer_dataframe_1(library).persist() assert df.shape() == (3, 2) diff --git a/tests/dataframe/slice_rows_test.py b/tests/dataframe/slice_rows_test.py index 027b98df..271e878e 100644 --- a/tests/dataframe/slice_rows_test.py +++ b/tests/dataframe/slice_rows_test.py @@ -4,6 +4,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_3 @@ -18,7 +19,7 @@ ], ) def test_slice_rows( - library: str, + library: BaseHandler, start: int | None, stop: int | None, step: int | None, diff --git a/tests/dataframe/sort_test.py b/tests/dataframe/sort_test.py index 1698e671..b5b8649b 100644 --- a/tests/dataframe/sort_test.py +++ b/tests/dataframe/sort_test.py @@ -2,12 +2,13 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_5 @pytest.mark.parametrize("keys", [["a", "b"], []]) -def test_sort(library: str, keys: list[str]) -> None: +def test_sort(library: BaseHandler, keys: list[str]) -> None: df = integer_dataframe_5(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() result = df.sort(*keys) @@ -17,7 +18,7 @@ def test_sort(library: str, keys: list[str]) -> None: @pytest.mark.parametrize("keys", [["a", "b"], []]) def test_sort_descending( - library: str, + library: BaseHandler, keys: list[str], ) -> None: df = integer_dataframe_5(library, api_version="2023.09-beta") diff --git a/tests/dataframe/to_array_object_test.py b/tests/dataframe/to_array_object_test.py index d9b53ad8..42bdb35f 100644 --- a/tests/dataframe/to_array_object_test.py +++ b/tests/dataframe/to_array_object_test.py @@ -2,10 +2,11 @@ import numpy as np +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_to_array_object(library: str) -> None: +def test_to_array_object(library: BaseHandler) -> None: df = integer_dataframe_1(library).persist() result = np.asarray(df.to_array(dtype="int64")) # type: ignore[call-arg] expected = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.int64) diff --git a/tests/dataframe/update_columns_test.py b/tests/dataframe/update_columns_test.py index 0dfc67fe..2b0ebc5c 100644 --- a/tests/dataframe/update_columns_test.py +++ b/tests/dataframe/update_columns_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_update_columns(library: str) -> None: +def test_update_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() col = df.col @@ -13,7 +14,7 @@ def test_update_columns(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_update_multiple_columns(library: str) -> None: +def test_update_multiple_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() col = df.col diff --git a/tests/dataframe/update_test.py b/tests/dataframe/update_test.py index 3c6b57a7..d5ef2c23 100644 --- a/tests/dataframe/update_test.py +++ b/tests/dataframe/update_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_update_column(library: str) -> None: +def test_update_column(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() new_col = df.col("b") + 3 @@ -16,7 +17,7 @@ def test_update_column(library: str) -> None: compare_dataframe_with_reference(df, expected, dtype=ns.Int64) -def test_update_columns(library: str) -> None: +def test_update_columns(library: BaseHandler) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") ns = df.__dataframe_namespace__() new_col_a = df.col("a") + 1 diff --git a/tests/groupby/aggregate_test.py b/tests/groupby/aggregate_test.py index 25619342..0ad381a2 100644 --- a/tests/groupby/aggregate_test.py +++ b/tests/groupby/aggregate_test.py @@ -1,8 +1,9 @@ +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -def test_aggregate(library: str) -> None: +def test_aggregate(library: BaseHandler) -> None: df = integer_dataframe_4(library) df = df.assign((df.col("b") > 0).rename("d")) ns = df.__dataframe_namespace__() @@ -51,12 +52,12 @@ def test_aggregate(library: str) -> None: "d_any": ns.Bool, "d_all": ns.Bool, } - if library == "polars-lazy": + if library.name == "polars-lazy": result = result.cast({"b_count": ns.Int64()}) compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_aggregate_only_size(library: str) -> None: +def test_aggregate_only_size(library: BaseHandler) -> None: df = integer_dataframe_4(library) ns = df.__dataframe_namespace__() result = ( @@ -70,12 +71,12 @@ def test_aggregate_only_size(library: str) -> None: "key": [1, 2], "b_count": [2, 2], } - if library == "polars-lazy": + if library.name == "polars-lazy": result = result.cast({"b_count": ns.Int64()}) compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_aggregate_no_size(library: str) -> None: +def test_aggregate_no_size(library: BaseHandler) -> None: df = integer_dataframe_4(library) ns = df.__dataframe_namespace__() result = ( diff --git a/tests/groupby/groupby_any_all_test.py b/tests/groupby/groupby_any_all_test.py index 8ae9eea1..5de8f3fa 100644 --- a/tests/groupby/groupby_any_all_test.py +++ b/tests/groupby/groupby_any_all_test.py @@ -6,6 +6,7 @@ from packaging.version import parse from polars.exceptions import SchemaError +from tests.utils import BaseHandler from tests.utils import bool_dataframe_2 from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 @@ -19,7 +20,7 @@ ], ) def test_groupby_boolean( - library: str, + library: BaseHandler, aggregation: str, expected_b: list[bool], expected_c: list[bool], @@ -29,7 +30,7 @@ def test_groupby_boolean( result = getattr(df.group_by("key"), aggregation)() # need to sort result = result.sort("key") - if library == "pandas-nullable" and parse(pd.__version__) < Version( + if library.name == "pandas-nullable" and parse(pd.__version__) < Version( "2.0.0", ): # pragma: no cover # upstream bug @@ -39,7 +40,7 @@ def test_groupby_boolean( compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_group_by_invalid_any_all(library: str) -> None: +def test_group_by_invalid_any_all(library: BaseHandler) -> None: df = integer_dataframe_4(library).persist() with pytest.raises((TypeError, SchemaError)): df.group_by("key").any() diff --git a/tests/groupby/invalid_test.py b/tests/groupby/invalid_test.py index 679d1acd..ad4be435 100644 --- a/tests/groupby/invalid_test.py +++ b/tests/groupby/invalid_test.py @@ -2,10 +2,11 @@ import pytest +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_groupby_invalid(library: str) -> None: +def test_groupby_invalid(library: BaseHandler) -> None: df = integer_dataframe_1(library).select("a") with pytest.raises((KeyError, TypeError)): df.group_by(0) # type: ignore[arg-type] diff --git a/tests/groupby/numeric_test.py b/tests/groupby/numeric_test.py index 075f1588..7109bc83 100644 --- a/tests/groupby/numeric_test.py +++ b/tests/groupby/numeric_test.py @@ -5,6 +5,7 @@ from packaging.version import Version from packaging.version import parse +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 @@ -28,7 +29,7 @@ ], ) def test_group_by_numeric( - library: str, + library: BaseHandler, aggregation: str, expected_b: list[float], expected_c: list[float], @@ -41,7 +42,7 @@ def test_group_by_numeric( expected = {"key": [1, 2], "b": expected_b, "c": expected_c} dtype = getattr(ns, expected_dtype) expected_ns_dtype = {"key": ns.Int64, "b": dtype, "c": dtype} - if library == "pandas-nullable" and parse(pd.__version__) < Version( + if library.name == "pandas-nullable" and parse(pd.__version__) < Version( "2.0.0", ): # pragma: no cover # upstream bug diff --git a/tests/groupby/size_test.py b/tests/groupby/size_test.py index 2d7da647..cf7f4c39 100644 --- a/tests/groupby/size_test.py +++ b/tests/groupby/size_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -def test_group_by_size(library: str) -> None: +def test_group_by_size(library: BaseHandler) -> None: df = integer_dataframe_4(library) ns = df.__dataframe_namespace__() result = df.group_by("key").size() diff --git a/tests/integration/free_vs_w_parent_test.py b/tests/integration/free_vs_w_parent_test.py index dab61de4..d2f6612f 100644 --- a/tests/integration/free_vs_w_parent_test.py +++ b/tests/integration/free_vs_w_parent_test.py @@ -2,10 +2,11 @@ import polars as pl from polars.testing import assert_series_equal +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_free_vs_w_parent(library: str) -> None: +def test_free_vs_w_parent(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) namespace = df1.__dataframe_namespace__() free_ser1 = namespace.column_from_1d_array( # type: ignore[call-arg] @@ -18,7 +19,7 @@ def test_free_vs_w_parent(library: str) -> None: ) result = free_ser1 + free_ser2 - if library == "polars-lazy": + if library.name == "polars-lazy": assert_series_equal( pl.select(result.column)["preds"], pl.Series("preds", [5, 7, 9], dtype=pl.Int64()), diff --git a/tests/integration/persistedness_test.py b/tests/integration/persistedness_test.py index 9d6bf7de..8803e76c 100644 --- a/tests/integration/persistedness_test.py +++ b/tests/integration/persistedness_test.py @@ -1,11 +1,12 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -def test_within_df_propagation(library: str) -> None: +def test_within_df_propagation(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) df1 = df1 + 1 with pytest.raises(RuntimeError): @@ -46,14 +47,14 @@ def test_within_df_propagation(library: str) -> None: assert int(scalar + 1) == 3 # type: ignore[call-overload] -def test_within_df_within_col_propagation(library: str) -> None: +def test_within_df_within_col_propagation(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) df1 = df1 + 1 df1 = df1.persist() assert int((df1.col("a") + 1).mean()) == 4 # type: ignore[call-overload] -def test_cross_df_propagation(library: str) -> None: +def test_cross_df_propagation(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) df2 = integer_dataframe_2(library) ns = df1.__dataframe_namespace__() @@ -69,12 +70,14 @@ def test_cross_df_propagation(library: str) -> None: expected_dtype = { "a": ns.Int64, "b": ns.Int64, - "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + "c": ns.Int64 + if library.name in ["pandas-nullable", "polars-lazy"] + else ns.Float64, } compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] -def test_multiple_propagations(library: str) -> None: +def test_multiple_propagations(library: BaseHandler) -> None: # This is a bit "ugly", as the user is "required" to call `persist` # multiple times to do things optimally df = integer_dataframe_1(library) @@ -97,7 +100,7 @@ def test_multiple_propagations(library: str) -> None: int(df1.col("a").mean()) # type: ignore[call-overload] -def test_parent_propagations(library: str) -> None: +def test_parent_propagations(library: BaseHandler) -> None: # Set up something like this: # # df diff --git a/tests/namespace/column_from_1d_array_test.py b/tests/namespace/column_from_1d_array_test.py index b2dac631..95b3b8ce 100644 --- a/tests/namespace/column_from_1d_array_test.py +++ b/tests/namespace/column_from_1d_array_test.py @@ -8,10 +8,11 @@ import pytest from packaging.version import Version -from tests.utils import PANDAS_VERSION -from tests.utils import POLARS_VERSION +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 +from tests.utils import pandas_version +from tests.utils import polars_version @pytest.mark.parametrize( @@ -30,7 +31,7 @@ ], ) def test_column_from_1d_array( - library: str, + library: BaseHandler, pandas_dtype: str, column_dtype: str, ) -> None: @@ -52,7 +53,7 @@ def test_column_from_1d_array( def test_column_from_1d_array_string( - library: str, + library: BaseHandler, ) -> None: ser = integer_dataframe_1(library).persist().col("a") ns = ser.__column_namespace__() @@ -68,7 +69,7 @@ def test_column_from_1d_array_string( def test_column_from_1d_array_bool( - library: str, + library: BaseHandler, ) -> None: ser = integer_dataframe_1(library).persist().col("a") ns = ser.__column_namespace__() @@ -83,7 +84,7 @@ def test_column_from_1d_array_bool( compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) -def test_datetime_from_1d_array(library: str) -> None: +def test_datetime_from_1d_array(library: BaseHandler) -> None: ser = integer_dataframe_1(library).persist().col("a") ns = ser.__column_namespace__() arr = np.array([date(2020, 1, 1), date(2020, 1, 2)], dtype="datetime64[ms]") @@ -98,14 +99,14 @@ def test_datetime_from_1d_array(library: str) -> None: @pytest.mark.skipif( - Version("0.19.9") > POLARS_VERSION, + Version("0.19.9") > polars_version(), reason="upstream bug", ) @pytest.mark.skipif( - Version("2.0.0") > PANDAS_VERSION, + Version("2.0.0") > pandas_version(), reason="pandas before non-nano", ) -def test_duration_from_1d_array(library: str) -> None: +def test_duration_from_1d_array(library: BaseHandler) -> None: ser = integer_dataframe_1(library).persist().col("a") ns = ser.__column_namespace__() arr = np.array([timedelta(1), timedelta(2)], dtype="timedelta64[ms]") @@ -115,7 +116,7 @@ def test_duration_from_1d_array(library: str) -> None: name="result", ), ) - if library == "polars-lazy": + if library.name == "polars-lazy": # https://github.com/data-apis/dataframe-api/issues/329 result = result.cast({"result": ns.Duration("ms")}) expected = [timedelta(1), timedelta(2)] diff --git a/tests/namespace/column_from_sequence_test.py b/tests/namespace/column_from_sequence_test.py index e6362e12..f089dd52 100644 --- a/tests/namespace/column_from_sequence_test.py +++ b/tests/namespace/column_from_sequence_test.py @@ -6,6 +6,7 @@ import pytest +from tests.utils import BaseHandler from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 @@ -30,7 +31,7 @@ ], ) def test_column_from_sequence( - library: str, + library: BaseHandler, values: list[Any], dtype: str, kwargs: dict[str, Any], @@ -51,7 +52,7 @@ def test_column_from_sequence( def test_column_from_sequence_no_dtype( - library: str, + library: BaseHandler, ) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() diff --git a/tests/namespace/concat_test.py b/tests/namespace/concat_test.py index 79901d5a..2fd83275 100644 --- a/tests/namespace/concat_test.py +++ b/tests/namespace/concat_test.py @@ -3,13 +3,14 @@ import polars as pl import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 from tests.utils import integer_dataframe_4 -def test_concat(library: str) -> None: +def test_concat(library: BaseHandler) -> None: df1 = integer_dataframe_1(library) df2 = integer_dataframe_2(library) ns = df1.__dataframe_namespace__() @@ -18,7 +19,7 @@ def test_concat(library: str) -> None: compare_dataframe_with_reference(result, expected, dtype=ns.Int64) -def test_concat_mismatch(library: str) -> None: +def test_concat_mismatch(library: BaseHandler) -> None: df1 = integer_dataframe_1(library).persist() df2 = integer_dataframe_4(library).persist() ns = df1.__dataframe_namespace__() diff --git a/tests/namespace/convert_to_standard_column_test.py b/tests/namespace/convert_to_standard_column_test.py index 029a4047..3fdfcaf1 100644 --- a/tests/namespace/convert_to_standard_column_test.py +++ b/tests/namespace/convert_to_standard_column_test.py @@ -5,12 +5,12 @@ import pytest from packaging.version import Version -from tests.utils import PANDAS_VERSION -from tests.utils import POLARS_VERSION +from tests.utils import pandas_version +from tests.utils import polars_version @pytest.mark.skipif( - Version("0.19.0") > POLARS_VERSION or Version("2.1.0") > PANDAS_VERSION, + Version("0.19.0") > polars_version() or Version("2.1.0") > pandas_version(), reason="before consortium standard in polars/pandas", ) def test_convert_to_std_column() -> None: diff --git a/tests/namespace/dataframe_from_2d_array_test.py b/tests/namespace/dataframe_from_2d_array_test.py index 503486da..2b381d96 100644 --- a/tests/namespace/dataframe_from_2d_array_test.py +++ b/tests/namespace/dataframe_from_2d_array_test.py @@ -2,11 +2,12 @@ import numpy as np +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -def test_dataframe_from_2d_array(library: str) -> None: +def test_dataframe_from_2d_array(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() arr = np.array([[1, 4], [2, 5], [3, 6]]) diff --git a/tests/namespace/is_dtype_test.py b/tests/namespace/is_dtype_test.py index 55eb17de..4d2ce868 100644 --- a/tests/namespace/is_dtype_test.py +++ b/tests/namespace/is_dtype_test.py @@ -3,8 +3,9 @@ import pytest from packaging.version import Version -from tests.utils import PANDAS_VERSION +from tests.utils import BaseHandler from tests.utils import mixed_dataframe_1 +from tests.utils import pandas_version @pytest.mark.parametrize( @@ -21,10 +22,10 @@ ], ) @pytest.mark.skipif( - Version("2.0.0") > PANDAS_VERSION, + Version("2.0.0") > pandas_version(), reason="before pandas got non-nano support", ) -def test_is_dtype(library: str, dtype: str, expected: list[str]) -> None: +def test_is_dtype(library: BaseHandler, dtype: str, expected: list[str]) -> None: df = mixed_dataframe_1(library).persist() namespace = df.__dataframe_namespace__() result = [i for i in df.column_names if namespace.is_dtype(df.schema[i], dtype)] diff --git a/tests/namespace/namespace_is_null_test.py b/tests/namespace/namespace_is_null_test.py index 7d25d1c8..758795ba 100644 --- a/tests/namespace/namespace_is_null_test.py +++ b/tests/namespace/namespace_is_null_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -def test_is_null(library: str) -> None: +def test_is_null(library: BaseHandler) -> None: df = integer_dataframe_1(library) other = integer_dataframe_2(library) # use scalar namespace just for coverage purposes diff --git a/tests/namespace/sorted_indices_test.py b/tests/namespace/sorted_indices_test.py index d99a4585..5e755d72 100644 --- a/tests/namespace/sorted_indices_test.py +++ b/tests/namespace/sorted_indices_test.py @@ -1,10 +1,11 @@ from __future__ import annotations +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -def test_column_sorted_indices_ascending(library: str) -> None: +def test_column_sorted_indices_ascending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices() @@ -19,7 +20,7 @@ def test_column_sorted_indices_ascending(library: str) -> None: "b": [4, 4, 3, 1, 2], "result": [3, 4, 2, 1, 0], } - if library in ("polars", "polars-lazy"): + if library.name in ("polars", "polars-lazy"): result = result.cast({"result": ns.Int64()}) try: compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64) @@ -28,7 +29,7 @@ def test_column_sorted_indices_ascending(library: str) -> None: compare_dataframe_with_reference(result, expected_2, dtype=ns.Int64) -def test_column_sorted_indices_descending(library: str) -> None: +def test_column_sorted_indices_descending(library: BaseHandler) -> None: df = integer_dataframe_6(library) ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices(ascending=False) @@ -43,7 +44,7 @@ def test_column_sorted_indices_descending(library: str) -> None: "b": [4, 4, 3, 1, 2], "result": [0, 1, 2, 4, 3], } - if library in ("polars", "polars-lazy"): + if library.name in ("polars", "polars-lazy"): result = result.cast({"result": ns.Int64()}) try: compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64) diff --git a/tests/namespace/to_array_object_test.py b/tests/namespace/to_array_object_test.py index 302d5738..3b1f63c2 100644 --- a/tests/namespace/to_array_object_test.py +++ b/tests/namespace/to_array_object_test.py @@ -2,17 +2,18 @@ import numpy as np +from tests.utils import BaseHandler from tests.utils import integer_dataframe_1 -def test_to_array_object(library: str) -> None: +def test_to_array_object(library: BaseHandler) -> None: df = integer_dataframe_1(library).persist() result = np.asarray(df.to_array(dtype="int64")) # type: ignore # noqa: PGH003 expected = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.int64) np.testing.assert_array_equal(result, expected) -def test_column_to_array_object(library: str) -> None: +def test_column_to_array_object(library: BaseHandler) -> None: col = integer_dataframe_1(library).col("a") result = np.asarray(col.persist().to_array()) result = np.asarray(col.persist().to_array()) diff --git a/tests/scalars/float_test.py b/tests/scalars/float_test.py index d8d76656..1784f85f 100644 --- a/tests/scalars/float_test.py +++ b/tests/scalars/float_test.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from tests.utils import BaseHandler from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 @@ -31,7 +32,7 @@ "__rtruediv__", ], ) -def test_float_binary(library: str, attr: str) -> None: +def test_float_binary(library: BaseHandler, attr: str) -> None: other = 0.5 df = integer_dataframe_2(library).persist() scalar = df.col("a").mean() @@ -39,14 +40,14 @@ def test_float_binary(library: str, attr: str) -> None: assert getattr(scalar, attr)(other) == getattr(float_scalar, attr)(other) -def test_float_binary_invalid(library: str) -> None: +def test_float_binary_invalid(library: BaseHandler) -> None: lhs = integer_dataframe_2(library).col("a").mean() rhs = integer_dataframe_1(library).col("b").mean() with pytest.raises(ValueError): _ = lhs > rhs -def test_float_binary_lazy_valid(library: str) -> None: +def test_float_binary_lazy_valid(library: BaseHandler) -> None: df = integer_dataframe_2(library).persist() lhs = df.col("a").mean() rhs = df.col("b").mean() @@ -61,7 +62,7 @@ def test_float_binary_lazy_valid(library: str) -> None: "__neg__", ], ) -def test_float_unary(library: str, attr: str) -> None: +def test_float_unary(library: BaseHandler, attr: str) -> None: df = integer_dataframe_2(library).persist() with pytest.warns(UserWarning): scalar = df.col("a").persist().mean() @@ -77,7 +78,7 @@ def test_float_unary(library: str, attr: str) -> None: "__bool__", ], ) -def test_float_unary_invalid(library: str, attr: str) -> None: +def test_float_unary_invalid(library: BaseHandler, attr: str) -> None: df = integer_dataframe_2(library) scalar = df.col("a").mean() float_scalar = float(scalar.persist()) # type: ignore[arg-type] @@ -85,7 +86,7 @@ def test_float_unary_invalid(library: str, attr: str) -> None: assert getattr(scalar, attr)() == getattr(float_scalar, attr)() -def test_free_standing(library: str) -> None: +def test_free_standing(library: BaseHandler) -> None: df = integer_dataframe_1(library) namespace = df.__dataframe_namespace__() ser = namespace.column_from_1d_array( # type: ignore[call-arg] @@ -96,7 +97,7 @@ def test_free_standing(library: str) -> None: assert result == 3.0 -def test_right_comparand(library: str) -> None: +def test_right_comparand(library: BaseHandler) -> None: df = integer_dataframe_1(library) ns = df.__dataframe_namespace__() col = df.col("a") # [1, 2, 3] diff --git a/tests/utils.py b/tests/utils.py index 019e9f3c..1dd03ac6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,25 +1,120 @@ from __future__ import annotations import math +from abc import abstractmethod from datetime import datetime from datetime import timedelta from typing import TYPE_CHECKING from typing import Any +from typing import ClassVar from typing import Mapping -import pandas as pd -import polars as pl +from packaging.version import Version from packaging.version import parse -import dataframe_api_compat.pandas_standard -import dataframe_api_compat.polars_standard - if TYPE_CHECKING: + import pandas as pd + import polars as pl from dataframe_api import Column from dataframe_api import DataFrame + from dataframe_api.typing import DType + + +def pandas_version() -> Version: + import pandas as pd + + return parse(pd.__version__) + + +def polars_version() -> Version: + import polars as pl + + return parse(pl.__version__) + + +class BaseHandler: + @property + @abstractmethod + def name(self) -> str: + ... + + @abstractmethod + def create_dataframe( + self, + data: Any, + api_version: str | None = None, + ) -> DataFrame: + ... -POLARS_VERSION = parse(pl.__version__) -PANDAS_VERSION = parse(pd.__version__) + +class PandasHandler(BaseHandler): + # for `pandas-nullable` case + # https://pandas.pydata.org/docs/user_guide/basics.html#dtypes + mapping: ClassVar[dict[str, str]] = { + "bool": "boolean", + "int64": "Int64", + "float64": "Float64", + } + + def __init__(self, name: str) -> None: + assert name in ("pandas-numpy", "pandas-nullable") + self._name = name + + @property + def name(self) -> str: + return self._name + + def create_dataframe( + self, + data: Any, + api_version: str | None = None, + ) -> DataFrame: + import pandas as pd + + import dataframe_api_compat.pandas_standard + + df = pd.DataFrame(data) + if self.name == "pandas-nullable": + new_dtypes = { + col_name: self.mapping.get(str(dtype), str(dtype)) + for col_name, dtype in zip(df.columns, df.dtypes) + } + df = df.astype(new_dtypes) + + return ( + dataframe_api_compat.pandas_standard.convert_to_standard_compliant_dataframe( + df, + api_version=api_version or "2023.11-beta", + ) + ) + + +class PolarsHandler(BaseHandler): + def __init__(self, name: str) -> None: + assert name == "polars-lazy" + self._name = name + + @property + def name(self) -> str: + return self._name + + def create_dataframe( + self, + data: Any, + api_version: str | None = None, + ) -> DataFrame: + import polars as pl + + import dataframe_api_compat.polars_standard + + df = pl.DataFrame(data) + + return ( + dataframe_api_compat.polars_standard.convert_to_standard_compliant_dataframe( + df, + api_version=api_version or "2023.11-beta", + ) + ) def convert_to_standard_compliant_dataframe( @@ -27,7 +122,12 @@ def convert_to_standard_compliant_dataframe( api_version: str | None = None, ) -> DataFrame: # TODO: type return + import pandas as pd + import polars as pl + if isinstance(df, pd.DataFrame): + import dataframe_api_compat.pandas_standard + return ( dataframe_api_compat.pandas_standard.convert_to_standard_compliant_dataframe( df, @@ -35,6 +135,8 @@ def convert_to_standard_compliant_dataframe( ) ) elif isinstance(df, (pl.DataFrame, pl.LazyFrame)): + import dataframe_api_compat.polars_standard + df_lazy = df.lazy() if isinstance(df, pl.DataFrame) else df return ( dataframe_api_compat.polars_standard.convert_to_standard_compliant_dataframe( @@ -47,314 +149,159 @@ def convert_to_standard_compliant_dataframe( raise AssertionError(msg) -def integer_dataframe_1(library: str, api_version: str | None = None) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype="int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype="Int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) - - -def integer_dataframe_2(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1, 2, 4], "b": [4, 2, 6]}, dtype="int64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [1, 2, 4], "b": [4, 2, 6]}, dtype="Int64") - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 2, 4], "b": [4, 2, 6]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def integer_dataframe_1( + library: BaseHandler, + api_version: str | None = None, +) -> DataFrame: + return library.create_dataframe( + {"a": [1, 2, 3], "b": [4, 5, 6]}, + api_version=api_version, + ) -def integer_dataframe_3(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - {"a": [1, 2, 3, 4, 5, 6, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, - dtype="int64", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame( - {"a": [1, 2, 3, 4, 5, 6, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, - dtype="Int64", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7], "b": [7, 6, 5, 4, 3, 2, 1]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def integer_dataframe_2(library: BaseHandler) -> DataFrame: + return library.create_dataframe( + {"a": [1, 2, 4], "b": [4, 2, 6]}, + ) -def integer_dataframe_4(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - {"key": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": [4, 5, 6, 7]}, - dtype="int64", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame( - {"key": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": [4, 5, 6, 7]}, - dtype="Int64", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"key": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": [4, 5, 6, 7]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def integer_dataframe_3(library: BaseHandler) -> DataFrame: + return library.create_dataframe( + {"a": [1, 2, 3, 4, 5, 6, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, + ) -def integer_dataframe_5(library: str, api_version: str | None = None) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1, 1], "b": [4, 3]}, dtype="int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [1, 1], "b": [4, 3]}, dtype="Int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 1], "b": [4, 3]}) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) - - -def integer_dataframe_6(library: str, api_version: str | None = None) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 1, 2]}, dtype="int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 1, 2]}, dtype="Int64") - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 1, 2]}) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) - - -def integer_dataframe_7(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 4]}, dtype="int64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 4]}, dtype="Int64") - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 4]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def integer_dataframe_4(library: BaseHandler) -> DataFrame: + return library.create_dataframe( + {"key": [1, 1, 2, 2], "b": [1, 2, 3, 4], "c": [4, 5, 6, 7]}, + ) -def nan_dataframe_1(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1.0, 2.0, float("nan")]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": +def integer_dataframe_5( + library: BaseHandler, + api_version: str | None = None, +) -> DataFrame: + return library.create_dataframe( + {"a": [1, 1], "b": [4, 3]}, + api_version=api_version, + ) + + +def integer_dataframe_6( + library: BaseHandler, + api_version: str | None = None, +) -> DataFrame: + return library.create_dataframe( + {"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 1, 2]}, + api_version=api_version, + ) + + +def integer_dataframe_7(library: BaseHandler) -> DataFrame: + return library.create_dataframe({"a": [1, 2, 3], "b": [1, 2, 4]}) + + +def nan_dataframe_1(library: BaseHandler) -> DataFrame: + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame({"a": [1.0, 2.0, 0.0]}, dtype="Float64") other = pd.DataFrame({"a": [1.0, 1.0, 0.0]}, dtype="Float64") return convert_to_standard_compliant_dataframe(df / other) - if library == "polars-lazy": - df = pl.DataFrame({"a": [1.0, 2.0, float("nan")]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + return library.create_dataframe({"a": [1.0, 2.0, float("nan")]}) -def nan_dataframe_2(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [0.0, 1.0, float("nan")]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": +def nan_dataframe_2(library: BaseHandler) -> DataFrame: + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame({"a": [0.0, 1.0, 0.0]}, dtype="Float64") other = pd.DataFrame({"a": [1.0, 1.0, 0.0]}, dtype="Float64") return convert_to_standard_compliant_dataframe(df / other) - if library == "polars-lazy": - df = pl.DataFrame({"a": [0.0, 1.0, float("nan")]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + return library.create_dataframe({"a": [0.0, 1.0, float("nan")]}) -def null_dataframe_1(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [1.0, 2.0, float("nan")]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": +def null_dataframe_1(library: BaseHandler) -> DataFrame: + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame({"a": [1.0, 2.0, pd.NA]}, dtype="Float64") return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": + if library.name == "polars-lazy": + import polars as pl + df = pl.DataFrame({"a": [1.0, 2.0, None]}) return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + return library.create_dataframe({"a": [1.0, 2.0, float("nan")]}) -def null_dataframe_2(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - {"a": [1.0, -1.0, float("nan")], "b": [1.0, -1.0, float("nan")]}, - dtype="float64", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": +def null_dataframe_2(library: BaseHandler) -> DataFrame: + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame( {"a": [1.0, 0.0, pd.NA], "b": [1.0, 1.0, pd.NA]}, dtype="Float64", ) return convert_to_standard_compliant_dataframe(df / df) - if library == "polars-lazy": + if library.name == "polars-lazy": + import polars as pl + df = pl.DataFrame({"a": [1.0, float("nan"), None], "b": [1.0, 1.0, None]}) return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + return library.create_dataframe( + {"a": [1.0, -1.0, float("nan")], "b": [1.0, -1.0, float("nan")]}, + ) -def bool_dataframe_1(library: str, api_version: str = "2023.09-beta") -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - {"a": [True, True, False], "b": [True, True, True]}, - dtype="bool", - ) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "pandas-nullable": - df = pd.DataFrame( - {"a": [True, True, False], "b": [True, True, True]}, - dtype="boolean", - ) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - if library == "polars-lazy": - df = pl.DataFrame({"a": [True, True, False], "b": [True, True, True]}) - return convert_to_standard_compliant_dataframe(df, api_version=api_version) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def bool_dataframe_1( + library: BaseHandler, + api_version: str = "2023.09-beta", +) -> DataFrame: + return library.create_dataframe( + {"a": [True, True, False], "b": [True, True, True]}, + api_version=api_version, + ) -def bool_dataframe_2(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - { - "key": [1, 1, 2, 2], - "b": [False, True, True, True], - "c": [True, False, False, False], - }, - ).astype({"key": "int64", "b": "bool", "c": "bool"}) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame( - { - "key": [1, 1, 2, 2], - "b": [False, True, True, True], - "c": [True, False, False, False], - }, - ).astype({"key": "Int64", "b": "boolean", "c": "boolean"}) - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame( - { - "key": [1, 1, 2, 2], - "b": [False, True, True, True], - "c": [True, False, False, False], - }, - ) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def bool_dataframe_2(library: BaseHandler) -> DataFrame: + return library.create_dataframe( + { + "key": [1, 1, 2, 2], + "b": [False, True, True, True], + "c": [True, False, False, False], + }, + ) -def bool_dataframe_3(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame( - {"a": [False, False], "b": [False, True], "c": [True, True]}, - dtype="bool", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame( - {"a": [False, False], "b": [False, True], "c": [True, True]}, - dtype="boolean", - ) - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"a": [False, False], "b": [False, True], "c": [True, True]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def bool_dataframe_3(library: BaseHandler) -> DataFrame: + return library.create_dataframe( + {"a": [False, False], "b": [False, True], "c": [True, True]}, + ) -def float_dataframe_1(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [2.0, 3.0]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [2.0, 3.0]}, dtype="Float64") - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame({"a": [2.0, 3.0]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def float_dataframe_1(library: BaseHandler) -> DataFrame: + return library.create_dataframe({"a": [2.0, 3.0]}) -def float_dataframe_2(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [2.0, 1.0]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": - df = pd.DataFrame({"a": [2.0, 1.0]}, dtype="Float64") - return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": # pragma: no cover - df = pl.DataFrame({"a": [2.0, 1.0]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) +def float_dataframe_2(library: BaseHandler) -> DataFrame: + return library.create_dataframe({"a": [2.0, 1.0]}) -def float_dataframe_3(library: str) -> DataFrame: - df: Any - if library == "pandas-numpy": - df = pd.DataFrame({"a": [float("nan"), 2.0]}, dtype="float64") - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": +def float_dataframe_3(library: BaseHandler) -> DataFrame: + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame({"a": [0.0, 2.0]}, dtype="Float64") other = pd.DataFrame({"a": [0.0, 1.0]}, dtype="Float64") return convert_to_standard_compliant_dataframe(df / other) - if library == "polars-lazy": # pragma: no cover - df = pl.DataFrame({"a": [float("nan"), 2.0]}) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + return library.create_dataframe({"a": [float("nan"), 2.0]}) -def temporal_dataframe_1(library: str) -> DataFrame: - if library in ["pandas-numpy", "pandas-nullable"]: +def temporal_dataframe_1(library: BaseHandler) -> DataFrame: + if library.name in ["pandas-numpy", "pandas-nullable"]: + import pandas as pd + df = pd.DataFrame( { "a": [ @@ -400,7 +347,9 @@ def temporal_dataframe_1(library: str) -> DataFrame: }, ) return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": + else: + import polars as pl + df = pl.DataFrame( { "a": [ @@ -446,8 +395,6 @@ def temporal_dataframe_1(library: str) -> DataFrame: }, ) return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) def compare_column_with_reference( @@ -494,7 +441,7 @@ def compare_dataframe_with_reference( ) -def mixed_dataframe_1(library: str) -> DataFrame: +def mixed_dataframe_1(library: BaseHandler) -> DataFrame: df: Any data = { "a": [1, 2, 3], @@ -515,30 +462,9 @@ def mixed_dataframe_1(library: str) -> DataFrame: "p": [timedelta(days=1), timedelta(days=2), timedelta(days=3)], "q": [timedelta(days=1), timedelta(days=2), timedelta(days=3)], } - if library == "pandas-numpy": - df = pd.DataFrame(data).astype( - { - "a": "int64", - "b": "int32", - "c": "int16", - "d": "int8", - "e": "uint64", - "f": "uint32", - "g": "uint16", - "h": "uint8", - "i": "float64", - "j": "float32", - "k": "bool", - "l": "object", - "m": "datetime64[s]", - "n": "datetime64[ms]", - "o": "datetime64[us]", - "p": "timedelta64[ms]", - "q": "timedelta64[us]", - }, - ) - return convert_to_standard_compliant_dataframe(df) - if library == "pandas-nullable": + if library.name == "pandas-nullable": + import pandas as pd + df = pd.DataFrame(data).astype( { "a": "Int64", @@ -561,29 +487,26 @@ def mixed_dataframe_1(library: str) -> DataFrame: }, ) return convert_to_standard_compliant_dataframe(df) - if library == "polars-lazy": - df = pl.DataFrame( - data, - schema={ - "a": pl.Int64, - "b": pl.Int32, - "c": pl.Int16, - "d": pl.Int8, - "e": pl.UInt64, - "f": pl.UInt32, - "g": pl.UInt16, - "h": pl.UInt8, - "i": pl.Float64, - "j": pl.Float32, - "k": pl.Boolean, - "l": pl.Utf8, - "m": pl.Datetime("ms"), - "n": pl.Datetime("ms"), - "o": pl.Datetime("us"), - "p": pl.Duration("ms"), - "q": pl.Duration("us"), - }, - ) - return convert_to_standard_compliant_dataframe(df) - msg = f"Got unexpected library: {library}" # pragma: no cover - raise AssertionError(msg) + + result = library.create_dataframe(data) + ns = result.__dataframe_namespace__() + dtypes: Mapping[str, DType] = { + "a": ns.Int64(), + "b": ns.Int32(), + "c": ns.Int16(), + "d": ns.Int8(), + "e": ns.UInt64(), + "f": ns.UInt32(), + "g": ns.UInt16(), + "h": ns.UInt8(), + "i": ns.Float64(), + "j": ns.Float32(), + "k": ns.Bool(), + "l": ns.String(), + "m": ns.Datetime("ms"), + "n": ns.Datetime("ms"), + "o": ns.Datetime("us"), + "p": ns.Duration("ms"), + "q": ns.Duration("us"), + } + return result.cast(dtypes)