diff --git a/dataframe_api_compat/pandas_standard/__init__.py b/dataframe_api_compat/pandas_standard/__init__.py index de999bcd..533eb81f 100644 --- a/dataframe_api_compat/pandas_standard/__init__.py +++ b/dataframe_api_compat/pandas_standard/__init__.py @@ -104,12 +104,16 @@ def map_pandas_dtype_to_standard_dtype(dtype: Any) -> DType: return Namespace.Float32() if dtype == "Float32": return Namespace.Float32() - if dtype == "bool": + if dtype in ("bool", "boolean"): + # Also for `pandas.core.arrays.boolean.BooleanDtype` return Namespace.Bool() if dtype == "object": return Namespace.String() if dtype == "string": return Namespace.String() + if hasattr(dtype, "name"): + # For types like `numpy.dtypes.DateTime64DType` + dtype = dtype.name if dtype.startswith("datetime64["): match = re.search(r"datetime64\[(\w{1,2})", dtype) assert match is not None diff --git a/dataframe_api_compat/pandas_standard/column_object.py b/dataframe_api_compat/pandas_standard/column_object.py index aff0c393..1df02a25 100644 --- a/dataframe_api_compat/pandas_standard/column_object.py +++ b/dataframe_api_compat/pandas_standard/column_object.py @@ -35,6 +35,8 @@ "UInt16": "uint16", "UInt8": "uint8", "boolean": "bool", + "Float64": "float64", + "Float32": "float32", } diff --git a/pyproject.toml b/pyproject.toml index 490f735e..6cf0717f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,9 @@ ignore = [ [tool.ruff.isort] force-single-line = true +[tool.black] +line-length = 90 + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/tests/column/and_or_test.py b/tests/column/and_or_test.py index 48b73380..2fe5846c 100644 --- a/tests/column/and_or_test.py +++ b/tests/column/and_or_test.py @@ -1,46 +1,44 @@ from __future__ import annotations -import pandas as pd - from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference def test_column_and(library: str) -> None: df = bool_dataframe_1(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") result = df.assign((ser & other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([True, True, False], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [True, True, False] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_column_or(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") result = df.assign((ser | other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([True, True, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [True, True, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_column_and_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = True result = df.assign((other & ser).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([True, True, False], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [True, True, False] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_column_or_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = True result = df.assign((other | ser).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([True, True, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [True, True, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) diff --git a/tests/column/cast_test.py b/tests/column/cast_test.py index f8fc16a3..111a4300 100644 --- a/tests/column/cast_test.py +++ b/tests/column/cast_test.py @@ -1,15 +1,11 @@ -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_cast_integers(library: str) -> None: df = integer_dataframe_1(library) - pdx = df.__dataframe_namespace__() - result = df.assign(df.col("a").cast(pdx.Int32())) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).astype( - {"a": "int32", "b": "int64"}, - ) - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + ns = df.__dataframe_namespace__() + result = df.assign(df.col("a").cast(ns.Int32())) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + expected_dtype = {"a": ns.Int32, "b": ns.Int64} + compare_dataframe_with_reference(result, expected, expected_dtype) diff --git a/tests/column/col_sorted_indices_test.py b/tests/column/col_sorted_indices_test.py index 2d63618c..bd46afe5 100644 --- a/tests/column/col_sorted_indices_test.py +++ b/tests/column/col_sorted_indices_test.py @@ -1,66 +1,42 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -from tests.utils import interchange_to_pandas def test_expression_sorted_indices_ascending(library: str) -> None: df = integer_dataframe_6(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() col = df.col sorted_indices = col("b").sorted_indices() result = df.take(sorted_indices) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [2, 2, 1, 1, 1], - "b": [1, 2, 3, 4, 4], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [2, 2, 1, 1, 1], "b": [1, 2, 3, 4, 4]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_expression_sorted_indices_descending(library: str) -> None: df = integer_dataframe_6(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() col = df.col sorted_indices = col("b").sorted_indices(ascending=False) result = df.take(sorted_indices) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 2, 1], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 2, 1]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_column_sorted_indices_ascending(library: str) -> None: - df = integer_dataframe_6(library).persist() + df = integer_dataframe_6(library) + ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices() result = df.take(sorted_indices) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [2, 2, 1, 1, 1], - "b": [1, 2, 3, 4, 4], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [2, 2, 1, 1, 1], "b": [1, 2, 3, 4, 4]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_column_sorted_indices_descending(library: str) -> None: - df = integer_dataframe_6(library).persist() + df = integer_dataframe_6(library) + ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices(ascending=False) result = df.take(sorted_indices) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 2, 1], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 1, 1, 2, 2], "b": [4, 4, 3, 2, 1]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/column/comparisons_test.py b/tests/column/comparisons_test.py index 3513b9c7..9ba58710 100644 --- a/tests/column/comparisons_test.py +++ b/tests/column/comparisons_test.py @@ -2,83 +2,86 @@ from typing import Any -import pandas as pd import pytest +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_7 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("comparison", "expected_data"), + ("comparison", "expected_data", "expected_dtype"), [ - ("__eq__", [True, True, False]), - ("__ne__", [False, False, True]), - ("__ge__", [True, True, False]), - ("__gt__", [False, False, False]), - ("__le__", [True, True, True]), - ("__lt__", [False, False, True]), - ("__add__", [2, 4, 7]), - ("__sub__", [0, 0, -1]), - ("__mul__", [1, 4, 12]), - ("__truediv__", [1, 1, 0.75]), - ("__floordiv__", [1, 1, 0]), - ("__pow__", [1, 4, 81]), - ("__mod__", [0, 0, 3]), + ("__eq__", [True, True, False], "Bool"), + ("__ne__", [False, False, True], "Bool"), + ("__ge__", [True, True, False], "Bool"), + ("__gt__", [False, False, False], "Bool"), + ("__le__", [True, True, True], "Bool"), + ("__lt__", [False, False, True], "Bool"), + ("__add__", [2, 4, 7], "Int64"), + ("__sub__", [0, 0, -1], "Int64"), + ("__mul__", [1, 4, 12], "Int64"), + ("__truediv__", [1, 1, 0.75], "Float64"), + ("__floordiv__", [1, 1, 0], "Int64"), + ("__pow__", [1, 4, 81], "Int64"), + ("__mod__", [0, 0, 3], "Int64"), ], ) def test_column_comparisons( library: str, comparison: str, expected_data: list[object], + expected_dtype: str, ) -> None: ser: Any - df = integer_dataframe_7(library).persist() + df = integer_dataframe_7(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") result = df.assign(getattr(ser, comparison)(other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(expected_data, name="result") - if library in ("polars", "polars-lazy") and comparison == "__pow__": + expected_ns_dtype = getattr(ns, expected_dtype) + if comparison == "__pow__" and library in ("polars", "polars-lazy"): # TODO - result_pd = result_pd.astype("int64") - pd.testing.assert_series_equal(result_pd, expected) + result = result.cast({"result": ns.Int64()}) + expected_ns_dtype = ns.Int64 + compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype) @pytest.mark.parametrize( - ("comparison", "expected_data"), + ("comparison", "expected_data", "expected_dtype"), [ - ("__eq__", [False, False, True]), - ("__ne__", [True, True, False]), - ("__ge__", [False, False, True]), - ("__gt__", [False, False, False]), - ("__le__", [True, True, True]), - ("__lt__", [True, True, False]), - ("__add__", [4, 5, 6]), - ("__sub__", [-2, -1, 0]), - ("__mul__", [3, 6, 9]), - ("__truediv__", [1 / 3, 2 / 3, 1]), - ("__floordiv__", [0, 0, 1]), - ("__pow__", [1, 8, 27]), - ("__mod__", [1, 2, 0]), + ("__eq__", [False, False, True], "Bool"), + ("__ne__", [True, True, False], "Bool"), + ("__ge__", [False, False, True], "Bool"), + ("__gt__", [False, False, False], "Bool"), + ("__le__", [True, True, True], "Bool"), + ("__lt__", [True, True, False], "Bool"), + ("__add__", [4, 5, 6], "Int64"), + ("__sub__", [-2, -1, 0], "Int64"), + ("__mul__", [3, 6, 9], "Int64"), + ("__truediv__", [1 / 3, 2 / 3, 1], "Float64"), + ("__floordiv__", [0, 0, 1], "Int64"), + ("__pow__", [1, 8, 27], "Int64"), + ("__mod__", [1, 2, 0], "Int64"), ], ) def test_column_comparisons_scalar( library: str, comparison: str, expected_data: list[object], + expected_dtype: str, ) -> None: ser: Any - df = integer_dataframe_1(library).persist() + df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = 3 result = df.assign(getattr(ser, comparison)(other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(expected_data, name="result") + expected_ns_dtype = getattr(ns, expected_dtype) if comparison == "__pow__" and library in ("polars", "polars-lazy"): - result_pd = result_pd.astype("int64") - pd.testing.assert_series_equal(result_pd, expected) + result = result.cast({"result": ns.Int64()}) + expected_ns_dtype = ns.Int64 + compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype) @pytest.mark.parametrize( @@ -96,10 +99,9 @@ def test_right_column_comparisons( ) -> None: # 1,2,3 ser: Any - df = integer_dataframe_7(library).persist() + df = integer_dataframe_7(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = 2 result = df.assign(getattr(ser, comparison)(other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(expected_data, name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), expected_data, dtype=ns.Int64) diff --git a/tests/column/cumulative_test.py b/tests/column/cumulative_test.py index dd6082c2..2c229b82 100644 --- a/tests/column/cumulative_test.py +++ b/tests/column/cumulative_test.py @@ -2,9 +2,11 @@ import pandas as pd import pytest +from packaging.version import Version +from packaging.version import parse +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( @@ -21,17 +23,16 @@ def test_cumulative_functions_column( func: str, expected_data: list[float], ) -> None: - df = integer_dataframe_1(library).persist() + df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") expected = pd.Series(expected_data, name="result") result = df.assign(getattr(ser, func)().rename("result")) - result_pd = interchange_to_pandas(result)["result"] if ( - tuple(int(v) for v in pd.__version__.split(".")) < (2, 0, 0) - and library == "pandas-nullable" + parse(pd.__version__) < Version("2.0.0") and library == "pandas-nullable" ): # pragma: no cover # Upstream bug - result_pd = result_pd.astype("int64") + result = result.cast({"result": ns.Int64()}) - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) diff --git a/tests/column/divmod_test.py b/tests/column/divmod_test.py index 5f0a7653..dd16fec6 100644 --- a/tests/column/divmod_test.py +++ b/tests/column/divmod_test.py @@ -1,41 +1,31 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_expression_divmod(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") result_quotient, result_remainder = ser.__divmod__(other) # quotient result = df.assign(result_quotient.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected_quotient = pd.Series([0, 0, 0], name="result") - pd.testing.assert_series_equal(result_pd, expected_quotient) + compare_column_with_reference(result.col("result"), [0, 0, 0], dtype=ns.Int64) # remainder result = df.assign(result_remainder.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected_remainder = pd.Series([1, 2, 3], name="result") - pd.testing.assert_series_equal(result_pd, expected_remainder) + compare_column_with_reference(result.col("result"), [1, 2, 3], dtype=ns.Int64) def test_expression_divmod_with_scalar(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") result_quotient, result_remainder = ser.__divmod__(2) # quotient result = df.assign(result_quotient.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected_quotient = pd.Series([0, 1, 1], name="result") - pd.testing.assert_series_equal(result_pd, expected_quotient) + compare_column_with_reference(result.col("result"), [0, 1, 1], dtype=ns.Int64) # remainder result = df.assign(result_remainder.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected_remainder = pd.Series([1, 0, 1], name="result") - pd.testing.assert_series_equal(result_pd, expected_remainder) + compare_column_with_reference(result.col("result"), [1, 0, 1], dtype=ns.Int64) diff --git a/tests/column/fill_nan_test.py b/tests/column/fill_nan_test.py index de923dea..137dd4e0 100644 --- a/tests/column/fill_nan_test.py +++ b/tests/column/fill_nan_test.py @@ -1,27 +1,24 @@ from __future__ import annotations -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 def test_column_fill_nan(library: str) -> None: # TODO: test with nullable pandas, check null isn't filled - df = nan_dataframe_1(library).persist() + df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.fill_nan(-1.0).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([1.0, 2.0, -1.0], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [1.0, 2.0, -1.0] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Float64) def test_column_fill_nan_with_null(library: str) -> None: # TODO: test with nullable pandas, check null isn't filled - df = nan_dataframe_1(library).persist() + df = nan_dataframe_1(library) ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.fill_nan(ns.null).is_null().rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([False, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) diff --git a/tests/column/get_rows_by_mask_test.py b/tests/column/get_rows_by_mask_test.py index e22608f0..2a170a0e 100644 --- a/tests/column/get_rows_by_mask_test.py +++ b/tests/column/get_rows_by_mask_test.py @@ -2,8 +2,8 @@ import pandas as pd +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_column_filter(library: str) -> None: @@ -18,11 +18,9 @@ def test_column_filter(library: str) -> None: def test_column_take_by_mask_noop(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") mask = ser > 0 ser = ser.filter(mask) result = df.assign(ser.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([1, 2, 3], name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), [1, 2, 3], dtype=ns.Int64) diff --git a/tests/column/get_rows_test.py b/tests/column/get_rows_test.py index 10d7a357..9254ef45 100644 --- a/tests/column/get_rows_test.py +++ b/tests/column/get_rows_test.py @@ -1,16 +1,13 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_expression_take(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") indices = df.col("a") - 1 result = df.assign(ser.take(indices).rename("result")).select("result") - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([1, 2, 3], name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), [1, 2, 3], dtype=ns.Int64) diff --git a/tests/column/invert_test.py b/tests/column/invert_test.py index 3a28c4c3..b6003eea 100644 --- a/tests/column/invert_test.py +++ b/tests/column/invert_test.py @@ -1,25 +1,22 @@ from __future__ import annotations -import pandas as pd - from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference def test_expression_invert(library: str) -> None: df = bool_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign((~ser).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([False, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_column_invert(library: str) -> None: - df = bool_dataframe_1(library).persist() + df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign((~ser).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([False, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) diff --git a/tests/column/is_in_test.py b/tests/column/is_in_test.py index b129c673..f840706b 100644 --- a/tests/column/is_in_test.py +++ b/tests/column/is_in_test.py @@ -3,13 +3,12 @@ from typing import TYPE_CHECKING from typing import Any -import pandas as pd import pytest +from tests.utils import compare_column_with_reference from tests.utils import float_dataframe_1 from tests.utils import float_dataframe_2 from tests.utils import float_dataframe_3 -from tests.utils import interchange_to_pandas if TYPE_CHECKING: from collections.abc import Callable @@ -29,13 +28,12 @@ def test_is_in( df_factory: Callable[[str], Any], expected_values: list[bool], ) -> None: - df = df_factory(library).persist() + df = df_factory(library) + ns = df.__dataframe_namespace__() ser = df.col("a") other = ser + 1 result = df.assign(ser.is_in(other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(expected_values, name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), expected_values, dtype=ns.Bool) @pytest.mark.parametrize( @@ -53,10 +51,9 @@ def test_expr_is_in( expected_values: list[bool], ) -> None: df = df_factory(library) + ns = df.__dataframe_namespace__() col = df.col ser = col("a") other = ser + 1 result = df.assign(ser.is_in(other).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(expected_values, name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), expected_values, dtype=ns.Bool) diff --git a/tests/column/is_nan_test.py b/tests/column/is_nan_test.py index 7c33e5cb..b0d04025 100644 --- a/tests/column/is_nan_test.py +++ b/tests/column/is_nan_test.py @@ -1,15 +1,13 @@ from __future__ import annotations -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 def test_column_is_nan(library: str) -> None: - df = nan_dataframe_1(library).persist() + df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.is_nan().rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([False, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) diff --git a/tests/column/is_null_test.py b/tests/column/is_null_test.py index 416350a9..fdc8e34b 100644 --- a/tests/column/is_null_test.py +++ b/tests/column/is_null_test.py @@ -1,28 +1,26 @@ from __future__ import annotations -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference from tests.utils import nan_dataframe_1 from tests.utils import null_dataframe_1 def test_column_is_null_1(library: str) -> None: - df = nan_dataframe_1(library).persist() + df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.is_null().rename("result")) - result_pd = interchange_to_pandas(result)["result"] if library == "pandas-numpy": - expected = pd.Series([False, False, True], name="result") + expected = [False, False, True] else: - expected = pd.Series([False, False, False], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, False] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_column_is_null_2(library: str) -> None: - df = null_dataframe_1(library).persist() + df = null_dataframe_1(library) + ns = df.__dataframe_namespace__() ser = df.col("a") result = df.assign(ser.is_null().rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([False, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [False, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) diff --git a/tests/column/name_test.py b/tests/column/name_test.py index ad43a601..efd5934d 100644 --- a/tests/column/name_test.py +++ b/tests/column/name_test.py @@ -2,6 +2,8 @@ import pandas as pd import pytest +from packaging.version import Version +from packaging.version import parse from tests.utils import convert_to_standard_compliant_dataframe from tests.utils import integer_dataframe_1 @@ -20,7 +22,7 @@ def test_pandas_name_if_0_named_column() -> None: @pytest.mark.skipif( - tuple(int(v) for v in pd.__version__.split(".")) < (2, 1, 0), + parse(pd.__version__) < Version("2.1.0"), reason="before consoritum standard", ) def test_invalid_name_pandas() -> None: diff --git a/tests/column/pow_test.py b/tests/column/pow_test.py index ab12dad5..253a7218 100644 --- a/tests/column/pow_test.py +++ b/tests/column/pow_test.py @@ -1,56 +1,52 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_float_powers_column(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") * 1.0 result = df.assign(ser.__pow__(other).rename("result")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 32.0, 729.0]}, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 32.0, 729.0]} + expected_dtype = {"a": ns.Int64, "b": ns.Int64, "result": ns.Float64} + compare_dataframe_with_reference(result, expected, expected_dtype) # type: ignore[arg-type] def test_float_powers_scalar_column(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") other = 1.0 result = df.assign(ser.__pow__(other).rename("result")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1.0, 2.0, 3.0]} + expected_dtype = {"a": ns.Int64, "b": ns.Int64, "result": ns.Float64} + compare_dataframe_with_reference(result, expected, expected_dtype) # type: ignore[arg-type] def test_int_powers_column(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") other = df.col("b") * 1 result = df.assign(ser.__pow__(other).rename("result")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]}) if library in ("polars", "polars-lazy"): - result_pd = result_pd.astype("int64") - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({name: ns.Int64() for name in ("a", "b", "result")}) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]} + expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")} + compare_dataframe_with_reference(result, expected, expected_dtype) def test_int_powers_scalar_column(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") other = 1 result = df.assign(ser.__pow__(other).rename("result")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]}) if library in ("polars", "polars-lazy"): - result_pd = result_pd.astype("int64") - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({name: ns.Int64() for name in ("a", "b", "result")}) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]} + expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")} + compare_dataframe_with_reference(result, expected, expected_dtype) diff --git a/tests/column/reductions_test.py b/tests/column/reductions_test.py index d9ba4d4a..25d85d8b 100644 --- a/tests/column/reductions_test.py +++ b/tests/column/reductions_test.py @@ -1,36 +1,35 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("reduction", "expected"), + ("reduction", "expected", "expected_dtype"), [ - ("min", 1), - ("max", 3), - ("sum", 6), - ("prod", 6), - ("median", 2.0), - ("mean", 2.0), - ("std", 1.0), - ("var", 1.0), + ("min", 1, "Int64"), + ("max", 3, "Int64"), + ("sum", 6, "Int64"), + ("prod", 6, "Int64"), + ("median", 2.0, "Float64"), + ("mean", 2.0, "Float64"), + ("std", 1.0, "Float64"), + ("var", 1.0, "Float64"), ], ) def test_expression_reductions( library: str, reduction: str, expected: float, + expected_dtype: str, ) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") ser = ser - getattr(ser, reduction)() result = df.assign(ser.rename("result")) - result_pd = interchange_to_pandas(result)["result"] - ser_pd = interchange_to_pandas(df)["a"].rename("result") - expected_pd = ser_pd - expected - pd.testing.assert_series_equal(result_pd, expected_pd) + reference = list((df.col("a") - expected).persist().to_array()) + expected_ns_dtype = getattr(ns, expected_dtype) + compare_column_with_reference(result.col("result"), reference, expected_ns_dtype) diff --git a/tests/column/shift_test.py b/tests/column/shift_test.py index 1f09b8b6..86c084ce 100644 --- a/tests/column/shift_test.py +++ b/tests/column/shift_test.py @@ -2,24 +2,19 @@ import polars as pl from polars.testing import assert_frame_equal +from tests.utils import compare_dataframe_with_reference from tests.utils import float_dataframe_1 from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_shift_with_fill_value(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.assign(df.col("a").shift(1).fill_null(999)) - expected = pd.DataFrame( - { - "a": [999, 1, 2], - "b": [4, 5, 6], - }, - ) - result_pd = interchange_to_pandas(result) if library == "pandas-numpy": - result_pd = result_pd.astype("int64") - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({name: ns.Int64() for name in ("a", "b")}) + expected = {"a": [999, 1, 2], "b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_shift_without_fill_value(library: str) -> None: @@ -41,14 +36,10 @@ def test_shift_without_fill_value(library: str) -> None: def test_shift_with_fill_value_complicated(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.assign(df.col("a").shift(1).fill_null(df.col("a").mean())) - expected = pd.DataFrame( - { - "a": [2.0, 1, 2], - "b": [4, 5, 6], - }, - ) - result_pd = interchange_to_pandas(result) if library == "pandas-nullable": - result_pd = result_pd.astype({"a": "float64"}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Float64()}) + expected = {"a": [2.0, 1, 2], "b": [4, 5, 6]} + expected_dtype = {"a": ns.Float64, "b": ns.Int64} + compare_dataframe_with_reference(result, expected, expected_dtype) # type: ignore[arg-type] diff --git a/tests/column/sort_test.py b/tests/column/sort_test.py index 74c40b92..7cafc5e3 100644 --- a/tests/column/sort_test.py +++ b/tests/column/sort_test.py @@ -1,68 +1,56 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -from tests.utils import interchange_to_pandas def test_expression_sort_ascending(library: str) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort().rename("c") result = df.assign(s_sorted) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "c": [1, 2, 3, 4, 4], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "c": [1, 2, 3, 4, 4], + } + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_expression_sort_descending(library: str) -> None: df = integer_dataframe_6(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort(ascending=False).rename("c") result = df.assign(s_sorted) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "c": [4, 4, 3, 2, 1], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "c": [4, 4, 3, 2, 1], + } + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_column_sort_ascending(library: str) -> None: - df = integer_dataframe_6(library, api_version="2023.09-beta").persist() + df = integer_dataframe_6(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort().rename("c") result = df.assign(s_sorted) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "c": [1, 2, 3, 4, 4], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "c": [1, 2, 3, 4, 4], + } + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_column_sort_descending(library: str) -> None: - df = integer_dataframe_6(library, api_version="2023.09-beta").persist() + df = integer_dataframe_6(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() s_sorted = df.col("b").sort(ascending=False).rename("c") result = df.assign(s_sorted) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "c": [4, 4, 3, 2, 1], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "c": [4, 4, 3, 2, 1], + } + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/column/statistics_test.py b/tests/column/statistics_test.py index cdaac140..b7e84868 100644 --- a/tests/column/statistics_test.py +++ b/tests/column/statistics_test.py @@ -1,15 +1,11 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_mean(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() result = df.assign((df.col("a") - df.col("a").mean()).rename("result")) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([-1, 0, 1.0], name="result") - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), [-1, 0, 1.0], dtype=ns.Float64) diff --git a/tests/column/temporal/components_test.py b/tests/column/temporal/components_test.py index 640a819f..f0fb3bd6 100644 --- a/tests/column/temporal/components_test.py +++ b/tests/column/temporal/components_test.py @@ -2,10 +2,9 @@ from typing import Literal -import pandas as pd import pytest -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference from tests.utils import temporal_dataframe_1 @@ -23,14 +22,17 @@ ], ) def test_col_components(library: str, attr: str, expected: list[int]) -> None: - df = temporal_dataframe_1(library).persist() + df = temporal_dataframe_1(library) + ns = df.__dataframe_namespace__() for col_name in ("a", "c", "e"): - result = df.assign(getattr(df.col(col_name), attr)().rename("result")).select( - "result", + result = ( + df.assign(getattr(df.col(col_name), attr)().rename("result")) + .select( + "result", + ) + .cast({"result": ns.Int64()}) ) - result = interchange_to_pandas(result)["result"].astype("int64") - expected = pd.Series(expected, name="result") - pd.testing.assert_series_equal(result, expected) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) @pytest.mark.parametrize( @@ -42,13 +44,16 @@ def test_col_components(library: str, attr: str, expected: list[int]) -> None: ], ) def test_col_microsecond(library: str, col_name: str, expected: list[int]) -> None: - df = temporal_dataframe_1(library).persist() - result = df.assign(df.col(col_name).microsecond().rename("result")).select( - "result", + df = temporal_dataframe_1(library) + ns = df.__dataframe_namespace__() + result = ( + df.assign(df.col(col_name).microsecond().rename("result")) + .select( + "result", + ) + .cast({"result": ns.Int64()}) ) - result = interchange_to_pandas(result)["result"].astype("int64") - expected = pd.Series(expected, name="result") - pd.testing.assert_series_equal(result, expected) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) @pytest.mark.parametrize( @@ -60,13 +65,16 @@ def test_col_microsecond(library: str, col_name: str, expected: list[int]) -> No ], ) def test_col_nanosecond(library: str, col_name: str, expected: list[int]) -> None: - df = temporal_dataframe_1(library).persist() - result = df.assign(df.col(col_name).nanosecond().rename("result")).select( # type: ignore[attr-defined] - "result", + df = temporal_dataframe_1(library) + ns = df.__dataframe_namespace__() + result = ( + df.assign(df.col(col_name).nanosecond().rename("result")) # type: ignore[attr-defined] + .select( + "result", + ) + .cast({"result": ns.Int64()}) ) - result = interchange_to_pandas(result)["result"].astype("int64") - expected = pd.Series(expected, name="result") - pd.testing.assert_series_equal(result, expected) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) @pytest.mark.parametrize( @@ -84,11 +92,14 @@ def test_col_unix_timestamp_time_units( expected: list[int], ) -> None: df = temporal_dataframe_1(library) - result = df.assign( - df.col("e").unix_timestamp(time_unit=time_unit).rename("result"), - ).select( - "result", + ns = df.__dataframe_namespace__() + result = ( + df.assign( + df.col("e").unix_timestamp(time_unit=time_unit).rename("result"), + ) + .select( + "result", + ) + .cast({"result": ns.Int64()}) ) - result = interchange_to_pandas(result)["result"].astype("int64") - expected = pd.Series(expected, name="result") - pd.testing.assert_series_equal(result, expected, check_exact=True) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) diff --git a/tests/column/temporal/filter_test.py b/tests/column/temporal/filter_test.py index 7da55036..27c2d901 100644 --- a/tests/column/temporal/filter_test.py +++ b/tests/column/temporal/filter_test.py @@ -1,13 +1,9 @@ -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference from tests.utils import temporal_dataframe_1 def test_filter_w_date(library: str) -> None: df = temporal_dataframe_1(library).select("a", "index") - namespace = df.__dataframe_namespace__() - result = df.filter(df.col("a") > namespace.date(2020, 1, 2)).select("index") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"index": [1, 2]}) - pd.testing.assert_frame_equal(result_pd, expected) + ns = df.__dataframe_namespace__() + result = df.filter(df.col("a") > ns.date(2020, 1, 2)).select("index") + compare_dataframe_with_reference(result, {"index": [1, 2]}, dtype=ns.Int64) diff --git a/tests/column/temporal/floor_test.py b/tests/column/temporal/floor_test.py index de9e4519..b9bf5d85 100644 --- a/tests/column/temporal/floor_test.py +++ b/tests/column/temporal/floor_test.py @@ -2,10 +2,9 @@ from datetime import datetime -import pandas as pd import pytest -from tests.utils import interchange_to_pandas +from tests.utils import compare_column_with_reference from tests.utils import temporal_dataframe_1 @@ -17,9 +16,8 @@ ) def test_floor(library: str, freq: str, expected: list[datetime]) -> None: df = temporal_dataframe_1(library) + ns = df.__dataframe_namespace__() col = df.col - result = df.assign(col("a").floor(freq).rename("result")).select("result").persist() # type: ignore[attr-defined] + result = df.assign(col("a").floor(freq).rename("result")).select("result") # type: ignore[attr-defined] # TODO check the resolution - result = interchange_to_pandas(result)["result"].astype("datetime64[ns]") - expected = pd.Series(expected, name="result") - pd.testing.assert_series_equal(result, expected) + compare_column_with_reference(result.col("result"), expected, dtype=ns.Datetime) diff --git a/tests/dataframe/schema_test.py b/tests/dataframe/schema_test.py index 86673377..c8d0538e 100644 --- a/tests/dataframe/schema_test.py +++ b/tests/dataframe/schema_test.py @@ -3,6 +3,7 @@ import pandas as pd import pytest from packaging.version import Version +from packaging.version import parse from tests.utils import PANDAS_VERSION from tests.utils import mixed_dataframe_1 @@ -50,8 +51,7 @@ def test_schema(library: str) -> None: assert isinstance(result["m"], namespace.Datetime) assert isinstance(result["n"], namespace.Datetime) if not ( - library.startswith("pandas") - and tuple(int(v) for v in pd.__version__.split(".")) < (2, 0, 0) + library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["n"].time_unit == "ms" @@ -60,18 +60,14 @@ def test_schema(library: str) -> None: assert result["n"].time_zone is None assert isinstance(result["o"], namespace.Datetime) if not ( - library.startswith("pandas") - and tuple(int(v) for v in pd.__version__.split(".")) < (2, 0, 0) + library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["o"].time_unit == "us" else: # pragma: no cover pass assert result["o"].time_zone is None - if not ( - library.startswith("pandas") - and tuple(int(v) for v in pd.__version__.split(".")) < (2, 0, 0) - ): + if not (library.startswith("pandas") and parse(pd.__version__) < Version("2.0.0")): # pandas non-nanosecond support only came in 2.0 - before that, these would be 'float' assert isinstance(result["p"], namespace.Duration) assert result["p"].time_unit == "ms" diff --git a/tests/groupby/groupby_any_all_test.py b/tests/groupby/groupby_any_all_test.py index 7cc427c5..4c739d56 100644 --- a/tests/groupby/groupby_any_all_test.py +++ b/tests/groupby/groupby_any_all_test.py @@ -2,6 +2,8 @@ import pandas as pd import pytest +from packaging.version import Version +from packaging.version import parse from polars.exceptions import SchemaError from tests.utils import bool_dataframe_2 @@ -28,12 +30,8 @@ def test_groupby_boolean( # need to sort result = result.sort("key") result_pd = interchange_to_pandas(result) - if library == "pandas-nullable" and tuple( - int(v) for v in pd.__version__.split(".") - ) < ( - 2, - 0, - 0, + if library == "pandas-nullable" and parse(pd.__version__) < Version( + "2.0.0", ): # pragma: no cover # upstream bug result_pd = result_pd.astype({"key": "int64"}) diff --git a/tests/groupby/numeric_test.py b/tests/groupby/numeric_test.py index ee33a4fe..02c63f0e 100644 --- a/tests/groupby/numeric_test.py +++ b/tests/groupby/numeric_test.py @@ -2,6 +2,8 @@ import pandas as pd import pytest +from packaging.version import Version +from packaging.version import parse from tests.utils import integer_dataframe_4 from tests.utils import interchange_to_pandas @@ -35,12 +37,8 @@ def test_group_by_numeric( result = result.sort("key") result_pd = interchange_to_pandas(result) expected = pd.DataFrame({"key": [1, 2], "b": expected_b, "c": expected_c}) - if library == "pandas-nullable" and tuple( - int(v) for v in pd.__version__.split(".") - ) < ( - 2, - 0, - 0, + if library == "pandas-nullable" and parse(pd.__version__) < Version( + "2.0.0", ): # pragma: no cover # upstream bug result_pd = result_pd.astype({"key": "int64"}) diff --git a/tests/integration/scale_column_test.py b/tests/integration/scale_column_test.py index 325b97f5..4f07c8fd 100644 --- a/tests/integration/scale_column_test.py +++ b/tests/integration/scale_column_test.py @@ -3,11 +3,13 @@ import pandas as pd import polars as pl import pytest +from packaging.version import Version +from packaging.version import parse from polars.testing import assert_series_equal @pytest.mark.skipif( - tuple(int(v) for v in pd.__version__.split(".")) < (2, 1, 0), + parse(pd.__version__) < Version("2.1.0"), reason="pandas doesn't support 3.8", ) def test_scale_column_pandas() -> None: @@ -19,7 +21,7 @@ def test_scale_column_pandas() -> None: @pytest.mark.skipif( - tuple(int(v) for v in pl.__version__.split(".")) < (0, 19, 0), + parse(pl.__version__) < Version("0.19.0"), reason="before consortium standard in polars", ) def test_scale_column_polars() -> None: @@ -31,7 +33,7 @@ def test_scale_column_polars() -> None: @pytest.mark.skipif( - tuple(int(v) for v in pl.__version__.split(".")) < (0, 19, 0), + parse(pl.__version__) < Version("0.19.0"), reason="before consortium standard in polars", ) def test_scale_column_polars_from_persisted_df() -> None: diff --git a/tests/integration/upstream_test.py b/tests/integration/upstream_test.py index d244e7dd..abbfc982 100644 --- a/tests/integration/upstream_test.py +++ b/tests/integration/upstream_test.py @@ -1,17 +1,15 @@ import sys import pytest +from packaging.version import Version +from packaging.version import parse class TestPolars: def test_dataframe(self) -> None: import polars as pl - if tuple(int(v) for v in pl.__version__.split(".")) < ( - 0, - 19, - 0, - ): # pragma: no cover + if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover # before consortium standard in polars return @@ -24,11 +22,7 @@ def test_dataframe(self) -> None: def test_lazyframe(self) -> None: import polars as pl - if tuple(int(v) for v in pl.__version__.split(".")) < ( - 0, - 19, - 0, - ): # pragma: no cover + if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover # before consortium standard in polars return @@ -57,11 +51,7 @@ def test_pandas(self) -> None: """ import pandas as pd - if tuple(int(v) for v in pd.__version__.split(".")) < ( - 2, - 1, - 0, - ): # pragma: no cover + if parse(pd.__version__) < Version("2.1.0"): # pragma: no cover # before consortium standard in pandas return diff --git a/tests/utils.py b/tests/utils.py index 8cc049c4..712d67bf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -4,7 +4,7 @@ from datetime import timedelta from typing import TYPE_CHECKING from typing import Any -from typing import TypeVar +from typing import Mapping from typing import cast import pandas as pd @@ -14,9 +14,8 @@ import dataframe_api_compat.pandas_standard import dataframe_api_compat.polars_standard -DType = TypeVar("DType") - if TYPE_CHECKING: + from dataframe_api import Column from dataframe_api import DataFrame POLARS_VERSION = parse(pl.__version__) @@ -485,6 +484,41 @@ def interchange_to_pandas(result: Any) -> pd.DataFrame: return cast(pd.DataFrame, df) +def compare_column_with_reference( + column: Column, + reference: list[Any], + dtype: Any, +) -> None: + column = column.persist() + col_len = column.len().scalar + assert col_len == len(reference), f"column length: {col_len} != {len(reference)}" + assert isinstance( + column.dtype, + dtype, + ), f"column dtype: {column.dtype} isn't a instance of {dtype}" + for idx in range(col_len): + assert ( + reference[idx] == column.get_value(idx).scalar + ), f"{reference[idx]} != {column.get_value(idx).scalar}" + + +def compare_dataframe_with_reference( + dataframe: DataFrame, + reference: Mapping[str, list[Any]], + dtype: Any | Mapping[str, Any], +) -> None: + assert dataframe.column_names == list( + reference.keys(), + ), f"dataframe column names: '{dataframe.column_names}' != '{list(reference.keys())}'" + for col_name in dataframe.column_names: + col_dtype = dtype[col_name] if isinstance(dtype, dict) else dtype + compare_column_with_reference( + dataframe.col(col_name), + reference[col_name], + dtype=col_dtype, + ) + + def mixed_dataframe_1(library: str) -> DataFrame: df: Any data = {