diff --git a/tests/dataframe/all_rowwise_test.py b/tests/dataframe/all_rowwise_test.py index f04bf951..92b2df73 100644 --- a/tests/dataframe/all_rowwise_test.py +++ b/tests/dataframe/all_rowwise_test.py @@ -1,20 +1,18 @@ from __future__ import annotations -import pandas as pd import pytest from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference def test_all_horizontal(library: str) -> None: df = bool_dataframe_1(library) - namespace = df.__dataframe_namespace__() - mask = namespace.all_horizontal(*[df.col(col_name) for col_name in df.column_names]) + ns = df.__dataframe_namespace__() + mask = ns.all_horizontal(*[df.col(col_name) for col_name in df.column_names]) result = df.filter(mask) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True], "b": [True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True], "b": [True, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_all_horizontal_invalid(library: str) -> None: diff --git a/tests/dataframe/and_test.py b/tests/dataframe/and_test.py index 5a0d8b65..2b99778b 100644 --- a/tests/dataframe/and_test.py +++ b/tests/dataframe/and_test.py @@ -1,24 +1,22 @@ from __future__ import annotations -import pandas as pd - from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference def test_and_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() other = True result = df & other - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True, False], "b": [True, True, True]} + compare_dataframe_with_reference(result, expected, ns.Bool) def test_rand_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() other = True result = other & df - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True, False], "b": [True, True, True]} + compare_dataframe_with_reference(result, expected, ns.Bool) diff --git a/tests/dataframe/any_all_test.py b/tests/dataframe/any_all_test.py index 5d034fdb..63f9a95d 100644 --- a/tests/dataframe/any_all_test.py +++ b/tests/dataframe/any_all_test.py @@ -1,11 +1,10 @@ from __future__ import annotations -import pandas as pd import pytest from tests.utils import bool_dataframe_1 from tests.utils import bool_dataframe_3 -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference @pytest.mark.parametrize( @@ -21,23 +20,22 @@ def test_reductions( expected_data: dict[str, object], ) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() result = getattr(df, reduction)() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame(expected_data) - pd.testing.assert_frame_equal(result_pd, expected) + compare_dataframe_with_reference(result, expected_data, dtype=ns.Bool) # type: ignore[arg-type] def test_any(library: str) -> None: df = bool_dataframe_3(library) + ns = df.__dataframe_namespace__() result = df.any() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [False], "b": [True], "c": [True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False], "b": [True], "c": [True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_all(library: str) -> None: df = bool_dataframe_3(library) + ns = df.__dataframe_namespace__() result = df.all() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [False], "b": [False], "c": [True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False], "b": [False], "c": [True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) diff --git a/tests/dataframe/any_rowwise_test.py b/tests/dataframe/any_rowwise_test.py index 0c576e86..6fbb9177 100644 --- a/tests/dataframe/any_rowwise_test.py +++ b/tests/dataframe/any_rowwise_test.py @@ -1,20 +1,18 @@ from __future__ import annotations -import pandas as pd import pytest from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference def test_any_horizontal(library: str) -> None: df = bool_dataframe_1(library) - namespace = df.__dataframe_namespace__() - mask = namespace.any_horizontal(*[df.col(col_name) for col_name in df.column_names]) + ns = df.__dataframe_namespace__() + mask = ns.any_horizontal(*[df.col(col_name) for col_name in df.column_names]) result = df.filter(mask) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True, False], "b": [True, True, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_any_horizontal_invalid(library: str) -> None: diff --git a/tests/dataframe/assign_test.py b/tests/dataframe/assign_test.py index fe86be47..f6daf5af 100644 --- a/tests/dataframe/assign_test.py +++ b/tests/dataframe/assign_test.py @@ -1,40 +1,33 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_insert_columns(library: str) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") result = df.assign(new_col.rename("c")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) # check original df didn't change - df_pd = interchange_to_pandas(df) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(df_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(df, expected, dtype=ns.Int64) def test_insert_multiple_columns(library: str) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") result = df.assign(new_col.rename("c"), new_col.rename("d")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]}, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) # check original df didn't change - df_pd = interchange_to_pandas(df) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(df_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(df, expected, dtype=ns.Int64) def test_insert_multiple_columns_invalid(library: str) -> None: @@ -47,14 +40,11 @@ def test_insert_multiple_columns_invalid(library: str) -> None: def test_insert_eager_columns(library: str) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() new_col = (df.col("b") + 3).rename("result") result = df.assign(new_col.rename("c"), new_col.rename("d")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]}, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) # check original df didn't change - df_pd = interchange_to_pandas(df) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(df_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(df, expected, dtype=ns.Int64) diff --git a/tests/dataframe/cast_test.py b/tests/dataframe/cast_test.py index a6be8afe..7e3a199d 100644 --- a/tests/dataframe/cast_test.py +++ b/tests/dataframe/cast_test.py @@ -1,15 +1,11 @@ -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_cast_integers(library: str) -> None: df = integer_dataframe_1(library) - pdx = df.__dataframe_namespace__() - result = df.cast({"a": pdx.Int32()}) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).astype( - {"a": "int32", "b": "int64"}, - ) - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + ns = df.__dataframe_namespace__() + result = df.cast({"a": ns.Int32()}) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + expected_dtype = {"a": ns.Int32, "b": ns.Int64} + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) diff --git a/tests/dataframe/columns_iter_test.py b/tests/dataframe/columns_iter_test.py index 3f127dcc..f8a02f3b 100644 --- a/tests/dataframe/columns_iter_test.py +++ b/tests/dataframe/columns_iter_test.py @@ -1,19 +1,15 @@ -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_iter_columns(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.assign( *[col / col.mean() for col in df.iter_columns()], ) - expected = pd.DataFrame( - { - "a": [0.5, 1.0, 1.5], - "b": [0.8, 1.0, 1.2], - }, - ) - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [0.5, 1.0, 1.5], + "b": [0.8, 1.0, 1.2], + } + compare_dataframe_with_reference(result, expected, dtype=ns.Float64) diff --git a/tests/dataframe/comparisons_test.py b/tests/dataframe/comparisons_test.py index a8084eb6..6886191f 100644 --- a/tests/dataframe/comparisons_test.py +++ b/tests/dataframe/comparisons_test.py @@ -1,41 +1,41 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("comparison", "expected_data"), + ("comparison", "expected_data", "expected_dtype"), [ - ("__eq__", {"a": [False, True, False], "b": [False, False, False]}), - ("__ne__", {"a": [True, False, True], "b": [True, True, True]}), - ("__ge__", {"a": [False, True, True], "b": [True, True, True]}), - ("__gt__", {"a": [False, False, True], "b": [True, True, True]}), - ("__le__", {"a": [True, True, False], "b": [False, False, False]}), - ("__lt__", {"a": [True, False, False], "b": [False, False, False]}), - ("__add__", {"a": [3, 4, 5], "b": [6, 7, 8]}), - ("__sub__", {"a": [-1, 0, 1], "b": [2, 3, 4]}), - ("__mul__", {"a": [2, 4, 6], "b": [8, 10, 12]}), - ("__truediv__", {"a": [0.5, 1, 1.5], "b": [2, 2.5, 3]}), - ("__floordiv__", {"a": [0, 1, 1], "b": [2, 2, 3]}), - ("__pow__", {"a": [1, 4, 9], "b": [16, 25, 36]}), - ("__mod__", {"a": [1, 0, 1], "b": [0, 1, 0]}), + ("__eq__", {"a": [False, True, False], "b": [False, False, False]}, "Bool"), + ("__ne__", {"a": [True, False, True], "b": [True, True, True]}, "Bool"), + ("__ge__", {"a": [False, True, True], "b": [True, True, True]}, "Bool"), + ("__gt__", {"a": [False, False, True], "b": [True, True, True]}, "Bool"), + ("__le__", {"a": [True, True, False], "b": [False, False, False]}, "Bool"), + ("__lt__", {"a": [True, False, False], "b": [False, False, False]}, "Bool"), + ("__add__", {"a": [3, 4, 5], "b": [6, 7, 8]}, "Int64"), + ("__sub__", {"a": [-1, 0, 1], "b": [2, 3, 4]}, "Int64"), + ("__mul__", {"a": [2, 4, 6], "b": [8, 10, 12]}, "Int64"), + ("__truediv__", {"a": [0.5, 1, 1.5], "b": [2, 2.5, 3]}, "Float64"), + ("__floordiv__", {"a": [0, 1, 1], "b": [2, 2, 3]}, "Int64"), + ("__pow__", {"a": [1, 4, 9], "b": [16, 25, 36]}, "Int64"), + ("__mod__", {"a": [1, 0, 1], "b": [0, 1, 0]}, "Int64"), ], ) def test_comparisons_with_scalar( library: str, comparison: str, expected_data: dict[str, object], + expected_dtype: str, ) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() other = 2 result = getattr(df, comparison)(other) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame(expected_data) - pd.testing.assert_frame_equal(result_pd, expected) + expected_ns_dtype = getattr(ns, expected_dtype) + compare_dataframe_with_reference(result, expected_data, dtype=expected_ns_dtype) # type: ignore[arg-type] @pytest.mark.parametrize( @@ -52,8 +52,7 @@ def test_rcomparisons_with_scalar( expected_data: dict[str, object], ) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() other = 2 result = getattr(df, comparison)(other) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame(expected_data) - pd.testing.assert_frame_equal(result_pd, expected) + compare_dataframe_with_reference(result, expected_data, dtype=ns.Int64) # type: ignore[arg-type] diff --git a/tests/dataframe/divmod_test.py b/tests/dataframe/divmod_test.py index 56742a61..0a62d3fe 100644 --- a/tests/dataframe/divmod_test.py +++ b/tests/dataframe/divmod_test.py @@ -1,18 +1,15 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_divmod_with_scalar(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() other = 2 result_quotient, result_remainder = df.__divmod__(other) - result_quotient_pd = interchange_to_pandas(result_quotient) - result_remainder_pd = interchange_to_pandas(result_remainder) - expected_quotient = pd.DataFrame({"a": [0, 1, 1], "b": [2, 2, 3]}) - expected_remainder = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 0]}) - pd.testing.assert_frame_equal(result_quotient_pd, expected_quotient) - pd.testing.assert_frame_equal(result_remainder_pd, expected_remainder) + expected_quotient = {"a": [0, 1, 1], "b": [2, 2, 3]} + expected_remainder = {"a": [1, 0, 1], "b": [0, 1, 0]} + compare_dataframe_with_reference(result_quotient, expected_quotient, dtype=ns.Int64) + compare_dataframe_with_reference(result_remainder, expected_remainder, dtype=ns.Int64) diff --git a/tests/dataframe/drop_column_test.py b/tests/dataframe/drop_column_test.py index c84fe61d..9f948245 100644 --- a/tests/dataframe/drop_column_test.py +++ b/tests/dataframe/drop_column_test.py @@ -1,14 +1,12 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_drop_column(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.drop("a") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/drop_nulls_test.py b/tests/dataframe/drop_nulls_test.py index 6efd74fe..2bf6e604 100644 --- a/tests/dataframe/drop_nulls_test.py +++ b/tests/dataframe/drop_nulls_test.py @@ -1,12 +1,10 @@ -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference from tests.utils import null_dataframe_1 def test_drop_nulls(library: str) -> None: df = null_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.drop_nulls() - expected = pd.DataFrame({"a": [1.0, 2.0]}) - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1.0, 2.0]} + compare_dataframe_with_reference(result, expected, dtype=ns.Float64) diff --git a/tests/dataframe/fill_nan_test.py b/tests/dataframe/fill_nan_test.py index b0b87a44..e21f8af1 100644 --- a/tests/dataframe/fill_nan_test.py +++ b/tests/dataframe/fill_nan_test.py @@ -1,28 +1,27 @@ from __future__ import annotations -import pandas as pd import pytest -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_1 def test_fill_nan(library: str) -> None: df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.fill_nan(-1) - result_pd = interchange_to_pandas(result) - result_pd = result_pd.astype("float64") - expected = pd.DataFrame({"a": [1.0, 2.0, -1.0]}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Float64()}) + expected = {"a": [1.0, 2.0, -1.0]} + compare_dataframe_with_reference(result, expected, dtype=ns.Float64) def test_fill_nan_with_scalar(library: str) -> None: df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.fill_nan(df.col("a").get_value(0)) - result_pd = interchange_to_pandas(result) - result_pd = result_pd.astype("float64") - expected = pd.DataFrame({"a": [1.0, 2.0, 1.0]}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Float64()}) + expected = {"a": [1.0, 2.0, 1.0]} + compare_dataframe_with_reference(result, expected, dtype=ns.Float64) def test_fill_nan_with_scalar_invalid(library: str) -> None: @@ -34,12 +33,12 @@ def test_fill_nan_with_scalar_invalid(library: str) -> None: def test_fill_nan_with_null(library: str) -> None: df = nan_dataframe_1(library) - namespace = df.__dataframe_namespace__() - result = df.fill_nan(namespace.null) + ns = df.__dataframe_namespace__() + result = df.fill_nan(ns.null) n_nans = result.is_nan().sum() - n_nans = interchange_to_pandas(n_nans) + result = n_nans.col("a").persist().get_value(0).scalar if library == "pandas-numpy": # null is nan for pandas-numpy - assert n_nans["a"][0] == 1 # type: ignore[index] + assert result == 1 else: - assert n_nans["a"][0] == 0 # type: ignore[index] + assert result == 0 diff --git a/tests/dataframe/fill_null_test.py b/tests/dataframe/fill_null_test.py index 7cf43250..12c24e8d 100644 --- a/tests/dataframe/fill_null_test.py +++ b/tests/dataframe/fill_null_test.py @@ -2,7 +2,6 @@ import pytest -from tests.utils import interchange_to_pandas from tests.utils import nan_dataframe_1 from tests.utils import null_dataframe_2 @@ -26,11 +25,11 @@ def test_fill_null(library: str, column_names: list[str] | None) -> None: # check there no nulls left in the column assert res1.shape()[0] == 0 # check the last element was filled with 0 - assert interchange_to_pandas(result)["a"].iloc[2] == 0 + assert result.col("a").persist().get_value(2).scalar == 0 if column_names is None or "b" in column_names: res1 = result.filter(result.col("b").is_null()).persist() assert res1.shape()[0] == 0 - assert interchange_to_pandas(result)["b"].iloc[2] == 0 + assert result.col("b").persist().get_value(2).scalar == 0 def test_fill_null_noop(library: str) -> None: diff --git a/tests/dataframe/get_column_by_name_test.py b/tests/dataframe/get_column_by_name_test.py index dc081a67..6ddae877 100644 --- a/tests/dataframe/get_column_by_name_test.py +++ b/tests/dataframe/get_column_by_name_test.py @@ -1,17 +1,13 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_get_column(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() col = df.col result = df.assign(col("a").rename("_tmp")).drop("a").rename({"_tmp": "a"}) - df.__dataframe_namespace__() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})[["b", "a"]] - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"b": [4, 5, 6], "a": [1, 2, 3]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/get_rows_by_mask_test.py b/tests/dataframe/get_rows_by_mask_test.py index 91e57668..a2ae421c 100644 --- a/tests/dataframe/get_rows_by_mask_test.py +++ b/tests/dataframe/get_rows_by_mask_test.py @@ -1,16 +1,13 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_filter(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() mask = df.col("a") % 2 == 1 result = df.filter(mask) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 3], "b": [4, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 3], "b": [4, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/get_rows_test.py b/tests/dataframe/get_rows_test.py index ef0cd398..16391c64 100644 --- a/tests/dataframe/get_rows_test.py +++ b/tests/dataframe/get_rows_test.py @@ -1,17 +1,13 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_take(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() df = df.assign((df.col("a") - 1).sort(ascending=False).rename("result")) - df.__dataframe_namespace__() result = df.take(df.col("result")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [3, 2, 1], "b": [6, 5, 4], "result": [0, 1, 2]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [3, 2, 1], "b": [6, 5, 4], "result": [0, 1, 2]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/invert_test.py b/tests/dataframe/invert_test.py index e3ff00ab..ed84c32e 100644 --- a/tests/dataframe/invert_test.py +++ b/tests/dataframe/invert_test.py @@ -1,19 +1,18 @@ from __future__ import annotations -import pandas as pd import pytest from tests.utils import bool_dataframe_1 +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_invert(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() result = ~df - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [False, False, True], "b": [False, False, False]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False, False, True], "b": [False, False, False]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_invert_invalid(library: str) -> None: diff --git a/tests/dataframe/is_nan_test.py b/tests/dataframe/is_nan_test.py index 6d4d9fcb..3d82f9dc 100644 --- a/tests/dataframe/is_nan_test.py +++ b/tests/dataframe/is_nan_test.py @@ -1,14 +1,12 @@ from __future__ import annotations -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_1 def test_dataframe_is_nan(library: str) -> None: df = nan_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.is_nan() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [False, False, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False, False, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) diff --git a/tests/dataframe/is_null_test.py b/tests/dataframe/is_null_test.py index 48197f7d..c2a469b2 100644 --- a/tests/dataframe/is_null_test.py +++ b/tests/dataframe/is_null_test.py @@ -1,27 +1,25 @@ from __future__ import annotations -import pandas as pd - -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference from tests.utils import nan_dataframe_2 from tests.utils import null_dataframe_1 def test_is_null_1(library: str) -> None: df = nan_dataframe_2(library) + ns = df.__dataframe_namespace__() result = df.is_null() - result_pd = interchange_to_pandas(result) if library == "pandas-numpy": # nan and null are the same in pandas-numpy - expected = pd.DataFrame({"a": [False, False, True]}) + expected = {"a": [False, False, True]} else: - expected = pd.DataFrame({"a": [False, False, False]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False, False, False]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_is_null_2(library: str) -> None: df = null_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.is_null() - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [False, False, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [False, False, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) diff --git a/tests/dataframe/join_test.py b/tests/dataframe/join_test.py index 48320264..b893e83e 100644 --- a/tests/dataframe/join_test.py +++ b/tests/dataframe/join_test.py @@ -1,24 +1,26 @@ from __future__ import annotations -import pandas as pd import pytest from packaging.version import Version from tests.utils import PANDAS_VERSION +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -from tests.utils import interchange_to_pandas def test_join_left(library: str) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="left") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, 2.0, float("nan")]}, - ) - pd.testing.assert_frame_equal(result_pd, expected) + ns = result.__dataframe_namespace__() + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, 2.0, float("nan")]} + expected_dtype = { + "a": ns.Int64, + "b": ns.Int64, + "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + } + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_join_overlapping_names(library: str) -> None: @@ -32,9 +34,9 @@ def test_join_inner(library: str) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="inner") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2], "b": [4, 5], "c": [4, 2]}) - pd.testing.assert_frame_equal(result_pd, expected) + ns = result.__dataframe_namespace__() + expected = {"a": [1, 2], "b": [4, 5], "c": [4, 2]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) @pytest.mark.skip(reason="outer join has changed in Polars recently, need to fixup") @@ -42,31 +44,37 @@ def test_join_outer(library: str) -> None: # pragma: no cover left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on="a", right_on="a", how="outer").sort("a") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 2, 3, 4], - "b": [4, 5, 6, float("nan")], - "c": [4.0, 2.0, float("nan"), 6.0], - }, - ) + ns = result.__dataframe_namespace__() if ( library == "pandas-nullable" and Version("2.0.0") > PANDAS_VERSION ): # pragma: no cover # upstream bug - result_pd = result_pd.astype({"a": "int64"}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Int64()}) + expected = { + "a": [1, 2, 3, 4], + "b": [4, 5, 6, float("nan")], + "c": [4.0, 2.0, float("nan"), 6.0], + } + expected_dtype = { + "a": ns.Int64, + "b": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + } + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_join_two_keys(library: str) -> None: left = integer_dataframe_1(library) right = integer_dataframe_2(library).rename({"b": "c"}) result = left.join(right, left_on=["a", "b"], right_on=["a", "c"], how="left") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, float("nan"), float("nan")]}, - ) - pd.testing.assert_frame_equal(result_pd, expected) + ns = result.__dataframe_namespace__() + expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, float("nan"), float("nan")]} + expected_dtype = { + "a": ns.Int64, + "b": ns.Int64, + "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + } + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_join_invalid(library: str) -> None: diff --git a/tests/dataframe/or_test.py b/tests/dataframe/or_test.py index fb8b00a2..1a4a8c95 100644 --- a/tests/dataframe/or_test.py +++ b/tests/dataframe/or_test.py @@ -1,24 +1,22 @@ from __future__ import annotations -import pandas as pd - from tests.utils import bool_dataframe_1 -from tests.utils import interchange_to_pandas +from tests.utils import compare_dataframe_with_reference def test_or_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() other = True result = df | other - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True, True], "b": [True, True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True, True], "b": [True, True, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) def test_ror_with_scalar(library: str) -> None: df = bool_dataframe_1(library) + ns = df.__dataframe_namespace__() other = True result = other | df - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [True, True, True], "b": [True, True, True]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [True, True, True], "b": [True, True, True]} + compare_dataframe_with_reference(result, expected, dtype=ns.Bool) diff --git a/tests/dataframe/pow_test.py b/tests/dataframe/pow_test.py index 7bfc916c..eff8b95b 100644 --- a/tests/dataframe/pow_test.py +++ b/tests/dataframe/pow_test.py @@ -1,17 +1,14 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_float_scalar_powers(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() other = 1.0 result = df.__pow__(other) - result_pd = interchange_to_pandas(result).astype( - {"a": "int64", "b": "int64"}, - ) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Int64(), "b": ns.Int64()}) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/reductions_test.py b/tests/dataframe/reductions_test.py index 79bd8c5b..2055a7ef 100644 --- a/tests/dataframe/reductions_test.py +++ b/tests/dataframe/reductions_test.py @@ -1,31 +1,34 @@ from __future__ import annotations -import pandas as pd +from typing import Any + import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("reduction", "expected"), + ("reduction", "expected", "expected_dtype"), [ - ("min", pd.DataFrame({"a": [1], "b": [4]})), - ("max", pd.DataFrame({"a": [3], "b": [6]})), - ("sum", pd.DataFrame({"a": [6], "b": [15]})), - ("prod", pd.DataFrame({"a": [6], "b": [120]})), - ("median", pd.DataFrame({"a": [2.0], "b": [5.0]})), - ("mean", pd.DataFrame({"a": [2.0], "b": [5.0]})), - ("std", pd.DataFrame({"a": [1.0], "b": [1.0]})), - ("var", pd.DataFrame({"a": [1.0], "b": [1.0]})), + ("min", {"a": [1], "b": [4]}, "Int64"), + ("max", {"a": [3], "b": [6]}, "Int64"), + ("sum", {"a": [6], "b": [15]}, "Int64"), + ("prod", {"a": [6], "b": [120]}, "Int64"), + ("median", {"a": [2.0], "b": [5.0]}, "Float64"), + ("mean", {"a": [2.0], "b": [5.0]}, "Float64"), + ("std", {"a": [1.0], "b": [1.0]}, "Float64"), + ("var", {"a": [1.0], "b": [1.0]}, "Float64"), ], ) def test_dataframe_reductions( library: str, reduction: str, - expected: pd.DataFrame, + expected: dict[str, Any], + expected_dtype: str, ) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = getattr(df, reduction)() - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + expected_ns_dtype = getattr(ns, expected_dtype) + compare_dataframe_with_reference(result, expected, dtype=expected_ns_dtype) diff --git a/tests/dataframe/rename_columns_test.py b/tests/dataframe/rename_columns_test.py index 1c88836b..63081cf5 100644 --- a/tests/dataframe/rename_columns_test.py +++ b/tests/dataframe/rename_columns_test.py @@ -1,18 +1,17 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_rename(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.rename({"a": "c", "b": "e"}) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"c": [1, 2, 3], "e": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"c": [1, 2, 3], "e": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_rename_invalid(library: str) -> None: diff --git a/tests/dataframe/select_test.py b/tests/dataframe/select_test.py index c1986c50..60bde31f 100644 --- a/tests/dataframe/select_test.py +++ b/tests/dataframe/select_test.py @@ -1,26 +1,25 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_select(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.select("b") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_select_list_of_str(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() result = df.select("a", "b") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_select_list_of_str_invalid(library: str) -> None: diff --git a/tests/dataframe/slice_rows_test.py b/tests/dataframe/slice_rows_test.py index 1b81c71e..027b98df 100644 --- a/tests/dataframe/slice_rows_test.py +++ b/tests/dataframe/slice_rows_test.py @@ -1,19 +1,20 @@ from __future__ import annotations -import pandas as pd +from typing import Any + import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_3 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( ("start", "stop", "step", "expected"), [ - (2, 7, 2, pd.DataFrame({"a": [3, 5, 7], "b": [5, 3, 1]})), - (None, 7, 2, pd.DataFrame({"a": [1, 3, 5, 7], "b": [7, 5, 3, 1]})), - (2, None, 2, pd.DataFrame({"a": [3, 5, 7], "b": [5, 3, 1]})), - (2, None, None, pd.DataFrame({"a": [3, 4, 5, 6, 7], "b": [5, 4, 3, 2, 1]})), + (2, 7, 2, {"a": [3, 5, 7], "b": [5, 3, 1]}), + (None, 7, 2, {"a": [1, 3, 5, 7], "b": [7, 5, 3, 1]}), + (2, None, 2, {"a": [3, 5, 7], "b": [5, 3, 1]}), + (2, None, None, {"a": [3, 4, 5, 6, 7], "b": [5, 4, 3, 2, 1]}), ], ) def test_slice_rows( @@ -21,9 +22,9 @@ def test_slice_rows( start: int | None, stop: int | None, step: int | None, - expected: pd.DataFrame, + expected: dict[str, Any], ) -> None: df = integer_dataframe_3(library) + ns = df.__dataframe_namespace__() result = df.slice_rows(start, stop, step) - result_pd = interchange_to_pandas(result) - pd.testing.assert_frame_equal(result_pd, expected) + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/sort_test.py b/tests/dataframe/sort_test.py index 468ec78d..1698e671 100644 --- a/tests/dataframe/sort_test.py +++ b/tests/dataframe/sort_test.py @@ -1,19 +1,18 @@ from __future__ import annotations -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_5 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize("keys", [["a", "b"], []]) def test_sort(library: str, keys: list[str]) -> None: df = integer_dataframe_5(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() result = df.sort(*keys) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 1], "b": [3, 4]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 1], "b": [3, 4]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) @pytest.mark.parametrize("keys", [["a", "b"], []]) @@ -22,7 +21,7 @@ def test_sort_descending( keys: list[str], ) -> None: df = integer_dataframe_5(library, api_version="2023.09-beta") + ns = df.__dataframe_namespace__() result = df.sort(*keys, ascending=False) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 1], "b": [4, 3]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 1], "b": [4, 3]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/update_columns_test.py b/tests/dataframe/update_columns_test.py index 16929667..0dfc67fe 100644 --- a/tests/dataframe/update_columns_test.py +++ b/tests/dataframe/update_columns_test.py @@ -1,26 +1,22 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_update_columns(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() col = df.col result = df.assign(col("a") + 1) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [2, 3, 4], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [2, 3, 4], "b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_update_multiple_columns(library: str) -> None: df = integer_dataframe_1(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() col = df.col result = df.assign(col("a") + 1, col("b") + 2) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [2, 3, 4], "b": [6, 7, 8]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/dataframe/update_test.py b/tests/dataframe/update_test.py index e858acc6..3c6b57a7 100644 --- a/tests/dataframe/update_test.py +++ b/tests/dataframe/update_test.py @@ -1,35 +1,29 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_update_column(library: str) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() new_col = df.col("b") + 3 result = df.assign(new_col) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [7, 8, 9]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [1, 2, 3], "b": [7, 8, 9]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) # check original df didn't change - df_pd = interchange_to_pandas(df) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(df_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(df, expected, dtype=ns.Int64) def test_update_columns(library: str) -> None: df = integer_dataframe_1(library, api_version="2023.09-beta") - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() new_col_a = df.col("a") + 1 new_col_b = df.col("b") + 3 result = df.assign(new_col_a, new_col_b) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [2, 3, 4], "b": [7, 8, 9]}) - pd.testing.assert_frame_equal(result_pd, expected) + expected = {"a": [2, 3, 4], "b": [7, 8, 9]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) # check original df didn't change - df_pd = interchange_to_pandas(df) - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(df_pd, expected) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(df, expected, dtype=ns.Int64) diff --git a/tests/groupby/aggregate_test.py b/tests/groupby/aggregate_test.py index c40119ed..25619342 100644 --- a/tests/groupby/aggregate_test.py +++ b/tests/groupby/aggregate_test.py @@ -1,7 +1,5 @@ -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -from tests.utils import interchange_to_pandas def test_aggregate(library: str) -> None: @@ -25,26 +23,37 @@ def test_aggregate(library: str) -> None: ) .sort("key") ) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "key": [1, 2], - "b_sum": [3, 7], - "b_prod": [2, 12], - "b_mean": [1.5, 3.5], - "b_median": [1.5, 3.5], - "b_min": [1, 3], - "b_max": [2, 4], - "b_std": [0.707107, 0.707107], - "b_var": [0.5, 0.5], - "b_count": [2, 2], - "d_any": [True, True], - "d_all": [True, True], - }, - ) + expected = { + "key": [1, 2], + "b_sum": [3, 7], + "b_prod": [2, 12], + "b_mean": [1.5, 3.5], + "b_median": [1.5, 3.5], + "b_min": [1, 3], + "b_max": [2, 4], + "b_std": [0.707107, 0.707107], + "b_var": [0.5, 0.5], + "b_count": [2, 2], + "d_any": [True, True], + "d_all": [True, True], + } + expected_dtype = { + "key": ns.Int64, + "b_sum": ns.Int64, + "b_prod": ns.Int64, + "b_mean": ns.Float64, + "b_median": ns.Float64, + "b_min": ns.Int64, + "b_max": ns.Int64, + "b_std": ns.Float64, + "b_var": ns.Float64, + "b_count": ns.Int64, + "d_any": ns.Bool, + "d_all": ns.Bool, + } if library == "polars-lazy": - result_pd = result_pd.astype({"b_count": "int64"}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"b_count": ns.Int64()}) + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_aggregate_only_size(library: str) -> None: @@ -57,16 +66,13 @@ def test_aggregate_only_size(library: str) -> None: ) .sort("key") ) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "key": [1, 2], - "b_count": [2, 2], - }, - ) + expected = { + "key": [1, 2], + "b_count": [2, 2], + } if library == "polars-lazy": - result_pd = result_pd.astype({"b_count": "int64"}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"b_count": ns.Int64()}) + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_aggregate_no_size(library: str) -> None: @@ -84,16 +90,22 @@ def test_aggregate_no_size(library: str) -> None: ) .sort("key") ) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "key": [1, 2], - "b_sum": [3, 7], - "b_mean": [1.5, 3.5], - "b_min": [1, 3], - "b_max": [2, 4], - "b_std": [0.707107, 0.707107], - "b_var": [0.5, 0.5], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "key": [1, 2], + "b_sum": [3, 7], + "b_mean": [1.5, 3.5], + "b_min": [1, 3], + "b_max": [2, 4], + "b_std": [0.707107, 0.707107], + "b_var": [0.5, 0.5], + } + expected_dtype = { + "key": ns.Int64, + "b_sum": ns.Int64, + "b_mean": ns.Float64, + "b_min": ns.Int64, + "b_max": ns.Int64, + "b_std": ns.Float64, + "b_var": ns.Float64, + } + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] diff --git a/tests/groupby/groupby_any_all_test.py b/tests/groupby/groupby_any_all_test.py index 4c739d56..8ae9eea1 100644 --- a/tests/groupby/groupby_any_all_test.py +++ b/tests/groupby/groupby_any_all_test.py @@ -7,8 +7,8 @@ from polars.exceptions import SchemaError from tests.utils import bool_dataframe_2 +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( @@ -25,20 +25,18 @@ def test_groupby_boolean( expected_c: list[bool], ) -> None: df = bool_dataframe_2(library) - df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() result = getattr(df.group_by("key"), aggregation)() # need to sort result = result.sort("key") - result_pd = interchange_to_pandas(result) if library == "pandas-nullable" and parse(pd.__version__) < Version( "2.0.0", ): # pragma: no cover # upstream bug - result_pd = result_pd.astype({"key": "int64"}) - else: - pass - expected = pd.DataFrame({"key": [1, 2], "b": expected_b, "c": expected_c}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"key": ns.Int64()}) + expected = {"key": [1, 2], "b": expected_b, "c": expected_c} + expected_dtype = {"key": ns.Int64, "b": ns.Bool, "c": ns.Bool} + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_group_by_invalid_any_all(library: str) -> None: diff --git a/tests/groupby/numeric_test.py b/tests/groupby/numeric_test.py index 02c63f0e..075f1588 100644 --- a/tests/groupby/numeric_test.py +++ b/tests/groupby/numeric_test.py @@ -5,25 +5,26 @@ from packaging.version import Version from packaging.version import parse +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("aggregation", "expected_b", "expected_c"), + ("aggregation", "expected_b", "expected_c", "expected_dtype"), [ - ("min", [1, 3], [4, 6]), - ("max", [2, 4], [5, 7]), - ("sum", [3, 7], [9, 13]), - ("prod", [2, 12], [20, 42]), - ("median", [1.5, 3.5], [4.5, 6.5]), - ("mean", [1.5, 3.5], [4.5, 6.5]), + ("min", [1, 3], [4, 6], "Int64"), + ("max", [2, 4], [5, 7], "Int64"), + ("sum", [3, 7], [9, 13], "Int64"), + ("prod", [2, 12], [20, 42], "Int64"), + ("median", [1.5, 3.5], [4.5, 6.5], "Float64"), + ("mean", [1.5, 3.5], [4.5, 6.5], "Float64"), ( "std", [0.7071067811865476, 0.7071067811865476], [0.7071067811865476, 0.7071067811865476], + "Float64", ), - ("var", [0.5, 0.5], [0.5, 0.5]), + ("var", [0.5, 0.5], [0.5, 0.5], "Float64"), ], ) def test_group_by_numeric( @@ -31,17 +32,18 @@ def test_group_by_numeric( aggregation: str, expected_b: list[float], expected_c: list[float], + expected_dtype: str, ) -> None: df = integer_dataframe_4(library) + ns = df.__dataframe_namespace__() result = getattr(df.group_by("key"), aggregation)() result = result.sort("key") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"key": [1, 2], "b": expected_b, "c": expected_c}) + expected = {"key": [1, 2], "b": expected_b, "c": expected_c} + dtype = getattr(ns, expected_dtype) + expected_ns_dtype = {"key": ns.Int64, "b": dtype, "c": dtype} if library == "pandas-nullable" and parse(pd.__version__) < Version( "2.0.0", ): # pragma: no cover # upstream bug - result_pd = result_pd.astype({"key": "int64"}) - else: - pass - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"key": ns.Int64()}) + compare_dataframe_with_reference(result, expected, dtype=expected_ns_dtype) # type: ignore[arg-type] diff --git a/tests/groupby/size_test.py b/tests/groupby/size_test.py index 5c051005..2d7da647 100644 --- a/tests/groupby/size_test.py +++ b/tests/groupby/size_test.py @@ -1,17 +1,15 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_4 -from tests.utils import interchange_to_pandas def test_group_by_size(library: str) -> None: df = integer_dataframe_4(library) + ns = df.__dataframe_namespace__() result = df.group_by("key").size() result = result.sort("key") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"key": [1, 2], "size": [2, 2]}) + expected = {"key": [1, 2], "size": [2, 2]} # TODO polars returns uint32. what do we standardise to? - result_pd["size"] = result_pd["size"].astype("int64") - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"size": ns.Int64()}) + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/integration/persistedness_test.py b/tests/integration/persistedness_test.py index e9744e52..9d6bf7de 100644 --- a/tests/integration/persistedness_test.py +++ b/tests/integration/persistedness_test.py @@ -1,14 +1,12 @@ -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -from tests.utils import interchange_to_pandas def test_within_df_propagation(library: str) -> None: df1 = integer_dataframe_1(library) - df1 = df1 df1 = df1 + 1 with pytest.raises(RuntimeError): _ = int(df1.col("a").get_value(0)) # type: ignore[call-overload] @@ -58,18 +56,22 @@ def test_within_df_within_col_propagation(library: str) -> None: def test_cross_df_propagation(library: str) -> None: df1 = integer_dataframe_1(library) df2 = integer_dataframe_2(library) - df1 = (df1 + 1).persist() - df2 = df2.rename({"b": "c"}).persist() + ns = df1.__dataframe_namespace__() + df1 = df1 + 1 + df2 = df2.rename({"b": "c"}) result = df1.join(df2, how="left", left_on="a", right_on="a") - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [2, 3, 4], - "b": [5, 6, 7], - "c": [2.0, float("nan"), 6.0], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + ns = result.__dataframe_namespace__() + expected = { + "a": [2, 3, 4], + "b": [5, 6, 7], + "c": [2.0, float("nan"), 6.0], + } + expected_dtype = { + "a": ns.Int64, + "b": ns.Int64, + "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64, + } + compare_dataframe_with_reference(result, expected, dtype=expected_dtype) # type: ignore[arg-type] def test_multiple_propagations(library: str) -> None: diff --git a/tests/namespace/column_from_1d_array_test.py b/tests/namespace/column_from_1d_array_test.py index 526487dc..b2dac631 100644 --- a/tests/namespace/column_from_1d_array_test.py +++ b/tests/namespace/column_from_1d_array_test.py @@ -5,100 +5,96 @@ from datetime import timedelta import numpy as np -import pandas as pd import pytest from packaging.version import Version from tests.utils import PANDAS_VERSION from tests.utils import POLARS_VERSION +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - "pandas_dtype", + ("pandas_dtype", "column_dtype"), [ - "float64", - "float32", - "int64", - "int32", - "int16", - "int8", - "uint64", - "uint32", - "uint16", - "uint8", + ("float64", "Float64"), + ("float32", "Float32"), + ("int64", "Int64"), + ("int32", "Int32"), + ("int16", "Int16"), + ("int8", "Int8"), + ("uint64", "UInt64"), + ("uint32", "UInt32"), + ("uint16", "UInt16"), + ("uint8", "UInt8"), ], ) def test_column_from_1d_array( library: str, pandas_dtype: str, + column_dtype: str, ) -> None: ser = integer_dataframe_1(library).col("a").persist() - namespace = ser.__column_namespace__() + ns = ser.__column_namespace__() arr = np.array([1, 2, 3], dtype=pandas_dtype) - result = namespace.dataframe_from_columns( - namespace.column_from_1d_array( # type: ignore[call-arg] + result = ns.dataframe_from_columns( + ns.column_from_1d_array( # type: ignore[call-arg] arr, name="result", ), ) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([1, 2, 3], name="result", dtype=pandas_dtype) - pd.testing.assert_series_equal(result_pd, expected) + expected = [1, 2, 3] + compare_column_with_reference( + result.col("result"), + expected, + dtype=getattr(ns, column_dtype), + ) def test_column_from_1d_array_string( library: str, ) -> None: ser = integer_dataframe_1(library).persist().col("a") - namespace = ser.__column_namespace__() + ns = ser.__column_namespace__() arr = np.array(["a", "b", "c"]) - result = namespace.dataframe_from_columns( - namespace.column_from_1d_array( # type: ignore[call-arg] + result = ns.dataframe_from_columns( + ns.column_from_1d_array( # type: ignore[call-arg] arr, name="result", ), ) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series(["a", "b", "c"], name="result", dtype="object") - pd.testing.assert_series_equal(result_pd, expected) + expected = ["a", "b", "c"] + compare_column_with_reference(result.col("result"), expected, dtype=ns.String) def test_column_from_1d_array_bool( library: str, ) -> None: ser = integer_dataframe_1(library).persist().col("a") - namespace = ser.__column_namespace__() + ns = ser.__column_namespace__() arr = np.array([True, False, True]) - result = namespace.dataframe_from_columns( - namespace.column_from_1d_array( # type: ignore[call-arg] + result = ns.dataframe_from_columns( + ns.column_from_1d_array( # type: ignore[call-arg] arr, name="result", ), ) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([True, False, True], name="result") - pd.testing.assert_series_equal(result_pd, expected) + expected = [True, False, True] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool) def test_datetime_from_1d_array(library: str) -> None: ser = integer_dataframe_1(library).persist().col("a") - namespace = ser.__column_namespace__() + ns = ser.__column_namespace__() arr = np.array([date(2020, 1, 1), date(2020, 1, 2)], dtype="datetime64[ms]") - result = namespace.dataframe_from_columns( - namespace.column_from_1d_array( # type: ignore[call-arg] + result = ns.dataframe_from_columns( + ns.column_from_1d_array( # type: ignore[call-arg] arr, name="result", ), ) - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series( - [datetime(2020, 1, 1), datetime(2020, 1, 2)], - name="result", - dtype="datetime64[ms]", - ) - pd.testing.assert_series_equal(result_pd, expected) + expected = [datetime(2020, 1, 1), datetime(2020, 1, 2)] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Datetime) @pytest.mark.skipif( @@ -111,24 +107,16 @@ def test_datetime_from_1d_array(library: str) -> None: ) def test_duration_from_1d_array(library: str) -> None: ser = integer_dataframe_1(library).persist().col("a") - namespace = ser.__column_namespace__() + ns = ser.__column_namespace__() arr = np.array([timedelta(1), timedelta(2)], dtype="timedelta64[ms]") - result = namespace.dataframe_from_columns( - namespace.column_from_1d_array( # type: ignore[call-arg] + result = ns.dataframe_from_columns( + ns.column_from_1d_array( # type: ignore[call-arg] arr, name="result", ), ) if library == "polars-lazy": # https://github.com/data-apis/dataframe-api/issues/329 - result_pd = ( - result.dataframe.collect().to_pandas()["result"].astype("timedelta64[ms]") # type: ignore[attr-defined] - ) - else: - result_pd = result.dataframe["result"] # type: ignore[index] - expected = pd.Series( - [timedelta(1), timedelta(2)], - name="result", - dtype="timedelta64[ms]", - ) - pd.testing.assert_series_equal(result_pd, expected) + result = result.cast({"result": ns.Duration("ms")}) + expected = [timedelta(1), timedelta(2)] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Duration) diff --git a/tests/namespace/column_from_sequence_test.py b/tests/namespace/column_from_sequence_test.py index 99423d4a..e6362e12 100644 --- a/tests/namespace/column_from_sequence_test.py +++ b/tests/namespace/column_from_sequence_test.py @@ -4,68 +4,29 @@ from datetime import timedelta from typing import Any -import pandas as pd import pytest +from tests.utils import compare_column_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( - ("values", "dtype", "kwargs", "expected"), + ("values", "dtype", "kwargs"), [ - ([1, 2, 3], "Int64", {}, pd.Series([1, 2, 3], dtype="int64", name="result")), - ([1, 2, 3], "Int32", {}, pd.Series([1, 2, 3], dtype="int32", name="result")), - ([1, 2, 3], "Int16", {}, pd.Series([1, 2, 3], dtype="int16", name="result")), - ([1, 2, 3], "Int8", {}, pd.Series([1, 2, 3], dtype="int8", name="result")), - ([1, 2, 3], "UInt64", {}, pd.Series([1, 2, 3], dtype="uint64", name="result")), - ([1, 2, 3], "UInt32", {}, pd.Series([1, 2, 3], dtype="uint32", name="result")), - ([1, 2, 3], "UInt16", {}, pd.Series([1, 2, 3], dtype="uint16", name="result")), - ([1, 2, 3], "UInt8", {}, pd.Series([1, 2, 3], dtype="uint8", name="result")), - ( - [1.0, 2.0, 3.0], - "Float64", - {}, - pd.Series([1, 2, 3], dtype="float64", name="result"), - ), - ( - [1.0, 2.0, 3.0], - "Float32", - {}, - pd.Series([1, 2, 3], dtype="float32", name="result"), - ), - ( - [True, False, True], - "Bool", - {}, - pd.Series([True, False, True], dtype=bool, name="result"), - ), - ( - ["express", "yourself"], - "String", - {}, - pd.Series(["express", "yourself"], dtype=object, name="result"), - ), - ( - [datetime(2020, 1, 1), datetime(2020, 1, 2)], - "Datetime", - {"time_unit": "us"}, - pd.Series( - [datetime(2020, 1, 1), datetime(2020, 1, 2)], - dtype="datetime64[us]", - name="result", - ), - ), - ( - [timedelta(1), timedelta(2)], - "Duration", - {"time_unit": "us"}, - pd.Series( - [timedelta(1), timedelta(2)], - dtype="timedelta64[us]", - name="result", - ), - ), + ([1, 2, 3], "Int64", {}), + ([1, 2, 3], "Int32", {}), + ([1, 2, 3], "Int16", {}), + ([1, 2, 3], "Int8", {}), + ([1, 2, 3], "UInt64", {}), + ([1, 2, 3], "UInt32", {}), + ([1, 2, 3], "UInt16", {}), + ([1, 2, 3], "UInt8", {}), + ([1.0, 2.0, 3.0], "Float64", {}), + ([1.0, 2.0, 3.0], "Float32", {}), + ([True, False, True], "Bool", {}), + (["express", "yourself"], "String", {}), + ([datetime(2020, 1, 1), datetime(2020, 1, 2)], "Datetime", {"time_unit": "us"}), + ([timedelta(1), timedelta(2)], "Duration", {"time_unit": "us"}), ], ) def test_column_from_sequence( @@ -73,29 +34,27 @@ def test_column_from_sequence( values: list[Any], dtype: str, kwargs: dict[str, Any], - expected: pd.Series[Any], ) -> None: df = integer_dataframe_1(library) - namespace = df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() ser = df.col("a") - namespace = ser.__column_namespace__() - result = namespace.dataframe_from_columns( - namespace.column_from_sequence( + ns = ser.__column_namespace__() + expected_dtype = getattr(ns, dtype) + result = ns.dataframe_from_columns( + ns.column_from_sequence( values, - dtype=getattr(namespace, dtype)(**kwargs), + dtype=expected_dtype(**kwargs), name="result", ), ) - result_pd = interchange_to_pandas(result)["result"] - pd.testing.assert_series_equal(result_pd, expected) + compare_column_with_reference(result.col("result"), values, dtype=expected_dtype) def test_column_from_sequence_no_dtype( library: str, ) -> None: df = integer_dataframe_1(library) - namespace = df.__dataframe_namespace__() - result = namespace.dataframe_from_columns(namespace.column_from_sequence([1, 2, 3], name="result")) # type: ignore[call-arg] - result_pd = interchange_to_pandas(result)["result"] - expected = pd.Series([1, 2, 3], dtype="int64", name="result") - pd.testing.assert_series_equal(result_pd, expected) + ns = df.__dataframe_namespace__() + result = ns.dataframe_from_columns(ns.column_from_sequence([1, 2, 3], name="result")) # type: ignore[call-arg] + expected = [1, 2, 3] + compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64) diff --git a/tests/namespace/concat_test.py b/tests/namespace/concat_test.py index 7a41648b..79901d5a 100644 --- a/tests/namespace/concat_test.py +++ b/tests/namespace/concat_test.py @@ -1,29 +1,27 @@ from __future__ import annotations -import pandas as pd import polars as pl import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 from tests.utils import integer_dataframe_4 -from tests.utils import interchange_to_pandas def test_concat(library: str) -> None: df1 = integer_dataframe_1(library) df2 = integer_dataframe_2(library) - namespace = df1.__dataframe_namespace__() - result = namespace.concat([df1, df2]) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame({"a": [1, 2, 3, 1, 2, 4], "b": [4, 5, 6, 4, 2, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + ns = df1.__dataframe_namespace__() + result = ns.concat([df1, df2]) + expected = {"a": [1, 2, 3, 1, 2, 4], "b": [4, 5, 6, 4, 2, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) def test_concat_mismatch(library: str) -> None: df1 = integer_dataframe_1(library).persist() df2 = integer_dataframe_4(library).persist() - namespace = df1.__dataframe_namespace__() + ns = df1.__dataframe_namespace__() # TODO check the error with pytest.raises((ValueError, pl.exceptions.ShapeError)): - _ = namespace.concat([df1, df2]).persist() + _ = ns.concat([df1, df2]).persist() diff --git a/tests/namespace/dataframe_from_2d_array_test.py b/tests/namespace/dataframe_from_2d_array_test.py index c3ab589b..503486da 100644 --- a/tests/namespace/dataframe_from_2d_array_test.py +++ b/tests/namespace/dataframe_from_2d_array_test.py @@ -1,21 +1,20 @@ from __future__ import annotations import numpy as np -import pandas as pd +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 -from tests.utils import interchange_to_pandas def test_dataframe_from_2d_array(library: str) -> None: df = integer_dataframe_1(library) - namespace = df.__dataframe_namespace__() + ns = df.__dataframe_namespace__() arr = np.array([[1, 4], [2, 5], [3, 6]]) - result = namespace.dataframe_from_2d_array( + result = ns.dataframe_from_2d_array( arr, names=["a", "b"], ) # TODO: consistent return type, for windows compat? - result_pd = interchange_to_pandas(result).astype("int64") - expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - pd.testing.assert_frame_equal(result_pd, expected) + result = result.cast({"a": ns.Int64(), "b": ns.Int64()}) + expected = {"a": [1, 2, 3], "b": [4, 5, 6]} + compare_dataframe_with_reference(result, expected, dtype=ns.Int64) diff --git a/tests/namespace/sorted_indices_test.py b/tests/namespace/sorted_indices_test.py index 899a7c63..d99a4585 100644 --- a/tests/namespace/sorted_indices_test.py +++ b/tests/namespace/sorted_indices_test.py @@ -1,62 +1,52 @@ from __future__ import annotations -import pandas as pd - +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_6 -from tests.utils import interchange_to_pandas def test_column_sorted_indices_ascending(library: str) -> None: df = integer_dataframe_6(library) + ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices() result = df.assign(sorted_indices.rename("result")) - result_pd = interchange_to_pandas(result) - expected_1 = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "result": [3, 4, 2, 0, 1], - }, - ) - expected_2 = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "result": [3, 4, 2, 1, 0], - }, - ) + expected_1 = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "result": [3, 4, 2, 0, 1], + } + expected_2 = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "result": [3, 4, 2, 1, 0], + } if library in ("polars", "polars-lazy"): - result_pd["result"] = result_pd["result"].astype("int64") + result = result.cast({"result": ns.Int64()}) try: - pd.testing.assert_frame_equal(result_pd, expected_1) + compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64) except AssertionError: # pragma: no cover # order isn't determinist, so try both - pd.testing.assert_frame_equal(result_pd, expected_2) + compare_dataframe_with_reference(result, expected_2, dtype=ns.Int64) def test_column_sorted_indices_descending(library: str) -> None: df = integer_dataframe_6(library) + ns = df.__dataframe_namespace__() sorted_indices = df.col("b").sorted_indices(ascending=False) result = df.assign(sorted_indices.rename("result")) - result_pd = interchange_to_pandas(result) - expected_1 = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "result": [1, 0, 2, 4, 3], - }, - ) - expected_2 = pd.DataFrame( - { - "a": [1, 1, 1, 2, 2], - "b": [4, 4, 3, 1, 2], - "result": [0, 1, 2, 4, 3], - }, - ) + expected_1 = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "result": [1, 0, 2, 4, 3], + } + expected_2 = { + "a": [1, 1, 1, 2, 2], + "b": [4, 4, 3, 1, 2], + "result": [0, 1, 2, 4, 3], + } if library in ("polars", "polars-lazy"): - result_pd["result"] = result_pd["result"].astype("int64") + result = result.cast({"result": ns.Int64()}) try: - pd.testing.assert_frame_equal(result_pd, expected_1) + compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64) except AssertionError: # order isn't determinist, so try both - pd.testing.assert_frame_equal(result_pd, expected_2) + compare_dataframe_with_reference(result, expected_2, dtype=ns.Int64) diff --git a/tests/scalars/float_test.py b/tests/scalars/float_test.py index bfcfd5d0..d8d76656 100644 --- a/tests/scalars/float_test.py +++ b/tests/scalars/float_test.py @@ -1,10 +1,9 @@ import numpy as np -import pandas as pd import pytest +from tests.utils import compare_dataframe_with_reference from tests.utils import integer_dataframe_1 from tests.utils import integer_dataframe_2 -from tests.utils import interchange_to_pandas @pytest.mark.parametrize( @@ -99,15 +98,13 @@ def test_free_standing(library: str) -> None: def test_right_comparand(library: str) -> None: df = integer_dataframe_1(library) + ns = df.__dataframe_namespace__() col = df.col("a") # [1, 2, 3] scalar = df.col("b").get_value(0) # 4 result = df.assign((scalar - col).rename("c")) - result_pd = interchange_to_pandas(result) - expected = pd.DataFrame( - { - "a": [1, 2, 3], - "b": [4, 5, 6], - "c": [3, 2, 1], - }, - ) - pd.testing.assert_frame_equal(result_pd, expected) + expected = { + "a": [1, 2, 3], + "b": [4, 5, 6], + "c": [3, 2, 1], + } + compare_dataframe_with_reference(result, expected, ns.Int64) diff --git a/tests/utils.py b/tests/utils.py index 712d67bf..019e9f3c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,11 +1,11 @@ from __future__ import annotations +import math from datetime import datetime from datetime import timedelta from typing import TYPE_CHECKING from typing import Any from typing import Mapping -from typing import cast import pandas as pd import polars as pl @@ -47,27 +47,6 @@ def convert_to_standard_compliant_dataframe( raise AssertionError(msg) -def convert_dataframe_to_pandas_numpy(df: pd.DataFrame) -> pd.DataFrame: - conversions = { - "boolean": "bool", - "Int64": "int64", - "Float64": "float64", - } - for column in df.columns: - dtype = str(df.dtypes[column]) - if dtype in conversions: - try: - df[column] = df[column].to_numpy( - conversions[dtype], - na_value=float("nan"), - ) - except ValueError: - # cannot convert float NaN to integer - assert dtype == "Int64" - df[column] = df[column].to_numpy("float64", na_value=float("nan")) - return df - - def integer_dataframe_1(library: str, api_version: str | None = None) -> DataFrame: df: Any if library == "pandas-numpy": @@ -471,19 +450,6 @@ def temporal_dataframe_1(library: str) -> DataFrame: raise AssertionError(msg) -def interchange_to_pandas(result: Any) -> pd.DataFrame: - if isinstance(result.dataframe, pl.LazyFrame): - df = result.dataframe.collect() - df = df.to_pandas() - elif isinstance(result.dataframe, pl.DataFrame): - df = result.dataframe - df = df.to_pandas() - else: - df = result.dataframe - df = convert_dataframe_to_pandas_numpy(df) - return cast(pd.DataFrame, df) - - def compare_column_with_reference( column: Column, reference: list[Any], @@ -497,9 +463,18 @@ def compare_column_with_reference( dtype, ), f"column dtype: {column.dtype} isn't a instance of {dtype}" for idx in range(col_len): - assert ( - reference[idx] == column.get_value(idx).scalar - ), f"{reference[idx]} != {column.get_value(idx).scalar}" + a, b = reference[idx], column.get_value(idx).scalar + if a == b: + return + + # copied from pandas + rtol, atol = 1e-5, 1e-8 + assert math.isclose( + a, + b, + rel_tol=rtol, + abs_tol=atol, + ), f"expected {a:.5f} but got {b:.5f}, with rtol={rtol}, atol={atol}" def compare_dataframe_with_reference(