diff --git a/tests/dataframe/all_rowwise_test.py b/tests/dataframe/all_rowwise_test.py
index f04bf951..92b2df73 100644
--- a/tests/dataframe/all_rowwise_test.py
+++ b/tests/dataframe/all_rowwise_test.py
@@ -1,20 +1,18 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
 from tests.utils import bool_dataframe_1
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 
 
 def test_all_horizontal(library: str) -> None:
     df = bool_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
-    mask = namespace.all_horizontal(*[df.col(col_name) for col_name in df.column_names])
+    ns = df.__dataframe_namespace__()
+    mask = ns.all_horizontal(*[df.col(col_name) for col_name in df.column_names])
     result = df.filter(mask)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True], "b": [True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True], "b": [True, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_all_horizontal_invalid(library: str) -> None:
diff --git a/tests/dataframe/and_test.py b/tests/dataframe/and_test.py
index 5a0d8b65..2b99778b 100644
--- a/tests/dataframe/and_test.py
+++ b/tests/dataframe/and_test.py
@@ -1,24 +1,22 @@
 from __future__ import annotations
 
-import pandas as pd
-
 from tests.utils import bool_dataframe_1
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 
 
 def test_and_with_scalar(library: str) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = True
     result = df & other
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True, False], "b": [True, True, True]}
+    compare_dataframe_with_reference(result, expected, ns.Bool)
 
 
 def test_rand_with_scalar(library: str) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = True
     result = other & df
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True, False], "b": [True, True, True]}
+    compare_dataframe_with_reference(result, expected, ns.Bool)
diff --git a/tests/dataframe/any_all_test.py b/tests/dataframe/any_all_test.py
index 5d034fdb..63f9a95d 100644
--- a/tests/dataframe/any_all_test.py
+++ b/tests/dataframe/any_all_test.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
 from tests.utils import bool_dataframe_1
 from tests.utils import bool_dataframe_3
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 
 
 @pytest.mark.parametrize(
@@ -21,23 +20,22 @@ def test_reductions(
     expected_data: dict[str, object],
 ) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = getattr(df, reduction)()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(expected_data)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    compare_dataframe_with_reference(result, expected_data, dtype=ns.Bool)  # type: ignore[arg-type]
 
 
 def test_any(library: str) -> None:
     df = bool_dataframe_3(library)
+    ns = df.__dataframe_namespace__()
     result = df.any()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [False], "b": [True], "c": [True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [False], "b": [True], "c": [True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_all(library: str) -> None:
     df = bool_dataframe_3(library)
+    ns = df.__dataframe_namespace__()
     result = df.all()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [False], "b": [False], "c": [True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [False], "b": [False], "c": [True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
diff --git a/tests/dataframe/any_rowwise_test.py b/tests/dataframe/any_rowwise_test.py
index 0c576e86..6fbb9177 100644
--- a/tests/dataframe/any_rowwise_test.py
+++ b/tests/dataframe/any_rowwise_test.py
@@ -1,20 +1,18 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
 from tests.utils import bool_dataframe_1
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 
 
 def test_any_horizontal(library: str) -> None:
     df = bool_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
-    mask = namespace.any_horizontal(*[df.col(col_name) for col_name in df.column_names])
+    ns = df.__dataframe_namespace__()
+    mask = ns.any_horizontal(*[df.col(col_name) for col_name in df.column_names])
     result = df.filter(mask)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True, False], "b": [True, True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True, False], "b": [True, True, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_any_horizontal_invalid(library: str) -> None:
diff --git a/tests/dataframe/assign_test.py b/tests/dataframe/assign_test.py
index fe86be47..f6daf5af 100644
--- a/tests/dataframe/assign_test.py
+++ b/tests/dataframe/assign_test.py
@@ -1,40 +1,33 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_insert_columns(library: str) -> None:
     df = integer_dataframe_1(library, api_version="2023.09-beta")
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     new_col = (df.col("b") + 3).rename("result")
     result = df.assign(new_col.rename("c"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
     # check original df didn't change
-    df_pd = interchange_to_pandas(df)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(df_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(df, expected, dtype=ns.Int64)
 
 
 def test_insert_multiple_columns(library: str) -> None:
     df = integer_dataframe_1(library, api_version="2023.09-beta")
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     new_col = (df.col("b") + 3).rename("result")
     result = df.assign(new_col.rename("c"), new_col.rename("d"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]},
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
     # check original df didn't change
-    df_pd = interchange_to_pandas(df)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(df_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(df, expected, dtype=ns.Int64)
 
 
 def test_insert_multiple_columns_invalid(library: str) -> None:
@@ -47,14 +40,11 @@ def test_insert_multiple_columns_invalid(library: str) -> None:
 
 def test_insert_eager_columns(library: str) -> None:
     df = integer_dataframe_1(library, api_version="2023.09-beta")
+    ns = df.__dataframe_namespace__()
     new_col = (df.col("b") + 3).rename("result")
     result = df.assign(new_col.rename("c"), new_col.rename("d"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]},
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [7, 8, 9]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
     # check original df didn't change
-    df_pd = interchange_to_pandas(df)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(df_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(df, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/cast_test.py b/tests/dataframe/cast_test.py
index a6be8afe..7e3a199d 100644
--- a/tests/dataframe/cast_test.py
+++ b/tests/dataframe/cast_test.py
@@ -1,15 +1,11 @@
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_cast_integers(library: str) -> None:
     df = integer_dataframe_1(library)
-    pdx = df.__dataframe_namespace__()
-    result = df.cast({"a": pdx.Int32()})
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).astype(
-        {"a": "int32", "b": "int64"},
-    )
-    result_pd = interchange_to_pandas(result)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = df.__dataframe_namespace__()
+    result = df.cast({"a": ns.Int32()})
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    expected_dtype = {"a": ns.Int32, "b": ns.Int64}
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)
diff --git a/tests/dataframe/columns_iter_test.py b/tests/dataframe/columns_iter_test.py
index 3f127dcc..f8a02f3b 100644
--- a/tests/dataframe/columns_iter_test.py
+++ b/tests/dataframe/columns_iter_test.py
@@ -1,19 +1,15 @@
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_iter_columns(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.assign(
         *[col / col.mean() for col in df.iter_columns()],
     )
-    expected = pd.DataFrame(
-        {
-            "a": [0.5, 1.0, 1.5],
-            "b": [0.8, 1.0, 1.2],
-        },
-    )
-    result_pd = interchange_to_pandas(result)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {
+        "a": [0.5, 1.0, 1.5],
+        "b": [0.8, 1.0, 1.2],
+    }
+    compare_dataframe_with_reference(result, expected, dtype=ns.Float64)
diff --git a/tests/dataframe/comparisons_test.py b/tests/dataframe/comparisons_test.py
index a8084eb6..6886191f 100644
--- a/tests/dataframe/comparisons_test.py
+++ b/tests/dataframe/comparisons_test.py
@@ -1,41 +1,41 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
-    ("comparison", "expected_data"),
+    ("comparison", "expected_data", "expected_dtype"),
     [
-        ("__eq__", {"a": [False, True, False], "b": [False, False, False]}),
-        ("__ne__", {"a": [True, False, True], "b": [True, True, True]}),
-        ("__ge__", {"a": [False, True, True], "b": [True, True, True]}),
-        ("__gt__", {"a": [False, False, True], "b": [True, True, True]}),
-        ("__le__", {"a": [True, True, False], "b": [False, False, False]}),
-        ("__lt__", {"a": [True, False, False], "b": [False, False, False]}),
-        ("__add__", {"a": [3, 4, 5], "b": [6, 7, 8]}),
-        ("__sub__", {"a": [-1, 0, 1], "b": [2, 3, 4]}),
-        ("__mul__", {"a": [2, 4, 6], "b": [8, 10, 12]}),
-        ("__truediv__", {"a": [0.5, 1, 1.5], "b": [2, 2.5, 3]}),
-        ("__floordiv__", {"a": [0, 1, 1], "b": [2, 2, 3]}),
-        ("__pow__", {"a": [1, 4, 9], "b": [16, 25, 36]}),
-        ("__mod__", {"a": [1, 0, 1], "b": [0, 1, 0]}),
+        ("__eq__", {"a": [False, True, False], "b": [False, False, False]}, "Bool"),
+        ("__ne__", {"a": [True, False, True], "b": [True, True, True]}, "Bool"),
+        ("__ge__", {"a": [False, True, True], "b": [True, True, True]}, "Bool"),
+        ("__gt__", {"a": [False, False, True], "b": [True, True, True]}, "Bool"),
+        ("__le__", {"a": [True, True, False], "b": [False, False, False]}, "Bool"),
+        ("__lt__", {"a": [True, False, False], "b": [False, False, False]}, "Bool"),
+        ("__add__", {"a": [3, 4, 5], "b": [6, 7, 8]}, "Int64"),
+        ("__sub__", {"a": [-1, 0, 1], "b": [2, 3, 4]}, "Int64"),
+        ("__mul__", {"a": [2, 4, 6], "b": [8, 10, 12]}, "Int64"),
+        ("__truediv__", {"a": [0.5, 1, 1.5], "b": [2, 2.5, 3]}, "Float64"),
+        ("__floordiv__", {"a": [0, 1, 1], "b": [2, 2, 3]}, "Int64"),
+        ("__pow__", {"a": [1, 4, 9], "b": [16, 25, 36]}, "Int64"),
+        ("__mod__", {"a": [1, 0, 1], "b": [0, 1, 0]}, "Int64"),
     ],
 )
 def test_comparisons_with_scalar(
     library: str,
     comparison: str,
     expected_data: dict[str, object],
+    expected_dtype: str,
 ) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = 2
     result = getattr(df, comparison)(other)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(expected_data)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected_ns_dtype = getattr(ns, expected_dtype)
+    compare_dataframe_with_reference(result, expected_data, dtype=expected_ns_dtype)  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize(
@@ -52,8 +52,7 @@ def test_rcomparisons_with_scalar(
     expected_data: dict[str, object],
 ) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = 2
     result = getattr(df, comparison)(other)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(expected_data)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    compare_dataframe_with_reference(result, expected_data, dtype=ns.Int64)  # type: ignore[arg-type]
diff --git a/tests/dataframe/divmod_test.py b/tests/dataframe/divmod_test.py
index 56742a61..0a62d3fe 100644
--- a/tests/dataframe/divmod_test.py
+++ b/tests/dataframe/divmod_test.py
@@ -1,18 +1,15 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_divmod_with_scalar(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = 2
     result_quotient, result_remainder = df.__divmod__(other)
-    result_quotient_pd = interchange_to_pandas(result_quotient)
-    result_remainder_pd = interchange_to_pandas(result_remainder)
-    expected_quotient = pd.DataFrame({"a": [0, 1, 1], "b": [2, 2, 3]})
-    expected_remainder = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 0]})
-    pd.testing.assert_frame_equal(result_quotient_pd, expected_quotient)
-    pd.testing.assert_frame_equal(result_remainder_pd, expected_remainder)
+    expected_quotient = {"a": [0, 1, 1], "b": [2, 2, 3]}
+    expected_remainder = {"a": [1, 0, 1], "b": [0, 1, 0]}
+    compare_dataframe_with_reference(result_quotient, expected_quotient, dtype=ns.Int64)
+    compare_dataframe_with_reference(result_remainder, expected_remainder, dtype=ns.Int64)
diff --git a/tests/dataframe/drop_column_test.py b/tests/dataframe/drop_column_test.py
index c84fe61d..9f948245 100644
--- a/tests/dataframe/drop_column_test.py
+++ b/tests/dataframe/drop_column_test.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_drop_column(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.drop("a")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/drop_nulls_test.py b/tests/dataframe/drop_nulls_test.py
index 6efd74fe..2bf6e604 100644
--- a/tests/dataframe/drop_nulls_test.py
+++ b/tests/dataframe/drop_nulls_test.py
@@ -1,12 +1,10 @@
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import null_dataframe_1
 
 
 def test_drop_nulls(library: str) -> None:
     df = null_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.drop_nulls()
-    expected = pd.DataFrame({"a": [1.0, 2.0]})
-    result_pd = interchange_to_pandas(result)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1.0, 2.0]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Float64)
diff --git a/tests/dataframe/fill_nan_test.py b/tests/dataframe/fill_nan_test.py
index b0b87a44..e21f8af1 100644
--- a/tests/dataframe/fill_nan_test.py
+++ b/tests/dataframe/fill_nan_test.py
@@ -1,28 +1,27 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import nan_dataframe_1
 
 
 def test_fill_nan(library: str) -> None:
     df = nan_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.fill_nan(-1)
-    result_pd = interchange_to_pandas(result)
-    result_pd = result_pd.astype("float64")
-    expected = pd.DataFrame({"a": [1.0, 2.0, -1.0]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    result = result.cast({"a": ns.Float64()})
+    expected = {"a": [1.0, 2.0, -1.0]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Float64)
 
 
 def test_fill_nan_with_scalar(library: str) -> None:
     df = nan_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.fill_nan(df.col("a").get_value(0))
-    result_pd = interchange_to_pandas(result)
-    result_pd = result_pd.astype("float64")
-    expected = pd.DataFrame({"a": [1.0, 2.0, 1.0]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    result = result.cast({"a": ns.Float64()})
+    expected = {"a": [1.0, 2.0, 1.0]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Float64)
 
 
 def test_fill_nan_with_scalar_invalid(library: str) -> None:
@@ -34,12 +33,12 @@ def test_fill_nan_with_scalar_invalid(library: str) -> None:
 
 def test_fill_nan_with_null(library: str) -> None:
     df = nan_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
-    result = df.fill_nan(namespace.null)
+    ns = df.__dataframe_namespace__()
+    result = df.fill_nan(ns.null)
     n_nans = result.is_nan().sum()
-    n_nans = interchange_to_pandas(n_nans)
+    result = n_nans.col("a").persist().get_value(0).scalar
     if library == "pandas-numpy":
         # null is nan for pandas-numpy
-        assert n_nans["a"][0] == 1  # type: ignore[index]
+        assert result == 1
     else:
-        assert n_nans["a"][0] == 0  # type: ignore[index]
+        assert result == 0
diff --git a/tests/dataframe/fill_null_test.py b/tests/dataframe/fill_null_test.py
index 7cf43250..12c24e8d 100644
--- a/tests/dataframe/fill_null_test.py
+++ b/tests/dataframe/fill_null_test.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from tests.utils import interchange_to_pandas
 from tests.utils import nan_dataframe_1
 from tests.utils import null_dataframe_2
 
@@ -26,11 +25,11 @@ def test_fill_null(library: str, column_names: list[str] | None) -> None:
         # check there no nulls left in the column
         assert res1.shape()[0] == 0
         # check the last element was filled with 0
-        assert interchange_to_pandas(result)["a"].iloc[2] == 0
+        assert result.col("a").persist().get_value(2).scalar == 0
     if column_names is None or "b" in column_names:
         res1 = result.filter(result.col("b").is_null()).persist()
         assert res1.shape()[0] == 0
-        assert interchange_to_pandas(result)["b"].iloc[2] == 0
+        assert result.col("b").persist().get_value(2).scalar == 0
 
 
 def test_fill_null_noop(library: str) -> None:
diff --git a/tests/dataframe/get_column_by_name_test.py b/tests/dataframe/get_column_by_name_test.py
index dc081a67..6ddae877 100644
--- a/tests/dataframe/get_column_by_name_test.py
+++ b/tests/dataframe/get_column_by_name_test.py
@@ -1,17 +1,13 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_get_column(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     col = df.col
     result = df.assign(col("a").rename("_tmp")).drop("a").rename({"_tmp": "a"})
-    df.__dataframe_namespace__()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})[["b", "a"]]
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"b": [4, 5, 6], "a": [1, 2, 3]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/get_rows_by_mask_test.py b/tests/dataframe/get_rows_by_mask_test.py
index 91e57668..a2ae421c 100644
--- a/tests/dataframe/get_rows_by_mask_test.py
+++ b/tests/dataframe/get_rows_by_mask_test.py
@@ -1,16 +1,13 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_filter(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     mask = df.col("a") % 2 == 1
     result = df.filter(mask)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 3], "b": [4, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 3], "b": [4, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/get_rows_test.py b/tests/dataframe/get_rows_test.py
index ef0cd398..16391c64 100644
--- a/tests/dataframe/get_rows_test.py
+++ b/tests/dataframe/get_rows_test.py
@@ -1,17 +1,13 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_take(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     df = df.assign((df.col("a") - 1).sort(ascending=False).rename("result"))
-    df.__dataframe_namespace__()
     result = df.take(df.col("result"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [3, 2, 1], "b": [6, 5, 4], "result": [0, 1, 2]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [3, 2, 1], "b": [6, 5, 4], "result": [0, 1, 2]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/invert_test.py b/tests/dataframe/invert_test.py
index e3ff00ab..ed84c32e 100644
--- a/tests/dataframe/invert_test.py
+++ b/tests/dataframe/invert_test.py
@@ -1,19 +1,18 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
 from tests.utils import bool_dataframe_1
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_invert(library: str) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = ~df
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [False, False, True], "b": [False, False, False]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [False, False, True], "b": [False, False, False]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_invert_invalid(library: str) -> None:
diff --git a/tests/dataframe/is_nan_test.py b/tests/dataframe/is_nan_test.py
index 6d4d9fcb..3d82f9dc 100644
--- a/tests/dataframe/is_nan_test.py
+++ b/tests/dataframe/is_nan_test.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import nan_dataframe_1
 
 
 def test_dataframe_is_nan(library: str) -> None:
     df = nan_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.is_nan()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [False, False, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [False, False, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
diff --git a/tests/dataframe/is_null_test.py b/tests/dataframe/is_null_test.py
index 48197f7d..c2a469b2 100644
--- a/tests/dataframe/is_null_test.py
+++ b/tests/dataframe/is_null_test.py
@@ -1,27 +1,25 @@
 from __future__ import annotations
 
-import pandas as pd
-
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import nan_dataframe_2
 from tests.utils import null_dataframe_1
 
 
 def test_is_null_1(library: str) -> None:
     df = nan_dataframe_2(library)
+    ns = df.__dataframe_namespace__()
     result = df.is_null()
-    result_pd = interchange_to_pandas(result)
     if library == "pandas-numpy":
         # nan and null are the same in pandas-numpy
-        expected = pd.DataFrame({"a": [False, False, True]})
+        expected = {"a": [False, False, True]}
     else:
-        expected = pd.DataFrame({"a": [False, False, False]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+        expected = {"a": [False, False, False]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_is_null_2(library: str) -> None:
     df = null_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.is_null()
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [False, False, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [False, False, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
diff --git a/tests/dataframe/join_test.py b/tests/dataframe/join_test.py
index 48320264..b893e83e 100644
--- a/tests/dataframe/join_test.py
+++ b/tests/dataframe/join_test.py
@@ -1,24 +1,26 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 from packaging.version import Version
 
 from tests.utils import PANDAS_VERSION
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
 from tests.utils import integer_dataframe_2
-from tests.utils import interchange_to_pandas
 
 
 def test_join_left(library: str) -> None:
     left = integer_dataframe_1(library)
     right = integer_dataframe_2(library).rename({"b": "c"})
     result = left.join(right, left_on="a", right_on="a", how="left")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, 2.0, float("nan")]},
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = result.__dataframe_namespace__()
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, 2.0, float("nan")]}
+    expected_dtype = {
+        "a": ns.Int64,
+        "b": ns.Int64,
+        "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64,
+    }
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_join_overlapping_names(library: str) -> None:
@@ -32,9 +34,9 @@ def test_join_inner(library: str) -> None:
     left = integer_dataframe_1(library)
     right = integer_dataframe_2(library).rename({"b": "c"})
     result = left.join(right, left_on="a", right_on="a", how="inner")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2], "b": [4, 5], "c": [4, 2]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = result.__dataframe_namespace__()
+    expected = {"a": [1, 2], "b": [4, 5], "c": [4, 2]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 @pytest.mark.skip(reason="outer join has changed in Polars recently, need to fixup")
@@ -42,31 +44,37 @@ def test_join_outer(library: str) -> None:  # pragma: no cover
     left = integer_dataframe_1(library)
     right = integer_dataframe_2(library).rename({"b": "c"})
     result = left.join(right, left_on="a", right_on="a", how="outer").sort("a")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "a": [1, 2, 3, 4],
-            "b": [4, 5, 6, float("nan")],
-            "c": [4.0, 2.0, float("nan"), 6.0],
-        },
-    )
+    ns = result.__dataframe_namespace__()
     if (
         library == "pandas-nullable" and Version("2.0.0") > PANDAS_VERSION
     ):  # pragma: no cover
         # upstream bug
-        result_pd = result_pd.astype({"a": "int64"})
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({"a": ns.Int64()})
+    expected = {
+        "a": [1, 2, 3, 4],
+        "b": [4, 5, 6, float("nan")],
+        "c": [4.0, 2.0, float("nan"), 6.0],
+    }
+    expected_dtype = {
+        "a": ns.Int64,
+        "b": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64,
+        "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64,
+    }
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_join_two_keys(library: str) -> None:
     left = integer_dataframe_1(library)
     right = integer_dataframe_2(library).rename({"b": "c"})
     result = left.join(right, left_on=["a", "b"], right_on=["a", "c"], how="left")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, float("nan"), float("nan")]},
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = result.__dataframe_namespace__()
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [4.0, float("nan"), float("nan")]}
+    expected_dtype = {
+        "a": ns.Int64,
+        "b": ns.Int64,
+        "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64,
+    }
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_join_invalid(library: str) -> None:
diff --git a/tests/dataframe/or_test.py b/tests/dataframe/or_test.py
index fb8b00a2..1a4a8c95 100644
--- a/tests/dataframe/or_test.py
+++ b/tests/dataframe/or_test.py
@@ -1,24 +1,22 @@
 from __future__ import annotations
 
-import pandas as pd
-
 from tests.utils import bool_dataframe_1
-from tests.utils import interchange_to_pandas
+from tests.utils import compare_dataframe_with_reference
 
 
 def test_or_with_scalar(library: str) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = True
     result = df | other
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True, True], "b": [True, True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True, True], "b": [True, True, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
 
 
 def test_ror_with_scalar(library: str) -> None:
     df = bool_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = True
     result = other | df
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [True, True, True], "b": [True, True, True]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [True, True, True], "b": [True, True, True]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Bool)
diff --git a/tests/dataframe/pow_test.py b/tests/dataframe/pow_test.py
index 7bfc916c..eff8b95b 100644
--- a/tests/dataframe/pow_test.py
+++ b/tests/dataframe/pow_test.py
@@ -1,17 +1,14 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_float_scalar_powers(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     other = 1.0
     result = df.__pow__(other)
-    result_pd = interchange_to_pandas(result).astype(
-        {"a": "int64", "b": "int64"},
-    )
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    result = result.cast({"a": ns.Int64(), "b": ns.Int64()})
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/reductions_test.py b/tests/dataframe/reductions_test.py
index 79bd8c5b..2055a7ef 100644
--- a/tests/dataframe/reductions_test.py
+++ b/tests/dataframe/reductions_test.py
@@ -1,31 +1,34 @@
 from __future__ import annotations
 
-import pandas as pd
+from typing import Any
+
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
-    ("reduction", "expected"),
+    ("reduction", "expected", "expected_dtype"),
     [
-        ("min", pd.DataFrame({"a": [1], "b": [4]})),
-        ("max", pd.DataFrame({"a": [3], "b": [6]})),
-        ("sum", pd.DataFrame({"a": [6], "b": [15]})),
-        ("prod", pd.DataFrame({"a": [6], "b": [120]})),
-        ("median", pd.DataFrame({"a": [2.0], "b": [5.0]})),
-        ("mean", pd.DataFrame({"a": [2.0], "b": [5.0]})),
-        ("std", pd.DataFrame({"a": [1.0], "b": [1.0]})),
-        ("var", pd.DataFrame({"a": [1.0], "b": [1.0]})),
+        ("min", {"a": [1], "b": [4]}, "Int64"),
+        ("max", {"a": [3], "b": [6]}, "Int64"),
+        ("sum", {"a": [6], "b": [15]}, "Int64"),
+        ("prod", {"a": [6], "b": [120]}, "Int64"),
+        ("median", {"a": [2.0], "b": [5.0]}, "Float64"),
+        ("mean", {"a": [2.0], "b": [5.0]}, "Float64"),
+        ("std", {"a": [1.0], "b": [1.0]}, "Float64"),
+        ("var", {"a": [1.0], "b": [1.0]}, "Float64"),
     ],
 )
 def test_dataframe_reductions(
     library: str,
     reduction: str,
-    expected: pd.DataFrame,
+    expected: dict[str, Any],
+    expected_dtype: str,
 ) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = getattr(df, reduction)()
-    result_pd = interchange_to_pandas(result)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected_ns_dtype = getattr(ns, expected_dtype)
+    compare_dataframe_with_reference(result, expected, dtype=expected_ns_dtype)
diff --git a/tests/dataframe/rename_columns_test.py b/tests/dataframe/rename_columns_test.py
index 1c88836b..63081cf5 100644
--- a/tests/dataframe/rename_columns_test.py
+++ b/tests/dataframe/rename_columns_test.py
@@ -1,18 +1,17 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_rename(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.rename({"a": "c", "b": "e"})
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"c": [1, 2, 3], "e": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"c": [1, 2, 3], "e": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_rename_invalid(library: str) -> None:
diff --git a/tests/dataframe/select_test.py b/tests/dataframe/select_test.py
index c1986c50..60bde31f 100644
--- a/tests/dataframe/select_test.py
+++ b/tests/dataframe/select_test.py
@@ -1,26 +1,25 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_select(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.select("b")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_select_list_of_str(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     result = df.select("a", "b")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_select_list_of_str_invalid(library: str) -> None:
diff --git a/tests/dataframe/slice_rows_test.py b/tests/dataframe/slice_rows_test.py
index 1b81c71e..027b98df 100644
--- a/tests/dataframe/slice_rows_test.py
+++ b/tests/dataframe/slice_rows_test.py
@@ -1,19 +1,20 @@
 from __future__ import annotations
 
-import pandas as pd
+from typing import Any
+
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_3
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
     ("start", "stop", "step", "expected"),
     [
-        (2, 7, 2, pd.DataFrame({"a": [3, 5, 7], "b": [5, 3, 1]})),
-        (None, 7, 2, pd.DataFrame({"a": [1, 3, 5, 7], "b": [7, 5, 3, 1]})),
-        (2, None, 2, pd.DataFrame({"a": [3, 5, 7], "b": [5, 3, 1]})),
-        (2, None, None, pd.DataFrame({"a": [3, 4, 5, 6, 7], "b": [5, 4, 3, 2, 1]})),
+        (2, 7, 2, {"a": [3, 5, 7], "b": [5, 3, 1]}),
+        (None, 7, 2, {"a": [1, 3, 5, 7], "b": [7, 5, 3, 1]}),
+        (2, None, 2, {"a": [3, 5, 7], "b": [5, 3, 1]}),
+        (2, None, None, {"a": [3, 4, 5, 6, 7], "b": [5, 4, 3, 2, 1]}),
     ],
 )
 def test_slice_rows(
@@ -21,9 +22,9 @@ def test_slice_rows(
     start: int | None,
     stop: int | None,
     step: int | None,
-    expected: pd.DataFrame,
+    expected: dict[str, Any],
 ) -> None:
     df = integer_dataframe_3(library)
+    ns = df.__dataframe_namespace__()
     result = df.slice_rows(start, stop, step)
-    result_pd = interchange_to_pandas(result)
-    pd.testing.assert_frame_equal(result_pd, expected)
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/sort_test.py b/tests/dataframe/sort_test.py
index 468ec78d..1698e671 100644
--- a/tests/dataframe/sort_test.py
+++ b/tests/dataframe/sort_test.py
@@ -1,19 +1,18 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_5
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize("keys", [["a", "b"], []])
 def test_sort(library: str, keys: list[str]) -> None:
     df = integer_dataframe_5(library, api_version="2023.09-beta")
+    ns = df.__dataframe_namespace__()
     result = df.sort(*keys)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 1], "b": [3, 4]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 1], "b": [3, 4]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 @pytest.mark.parametrize("keys", [["a", "b"], []])
@@ -22,7 +21,7 @@ def test_sort_descending(
     keys: list[str],
 ) -> None:
     df = integer_dataframe_5(library, api_version="2023.09-beta")
+    ns = df.__dataframe_namespace__()
     result = df.sort(*keys, ascending=False)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 1], "b": [4, 3]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 1], "b": [4, 3]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/update_columns_test.py b/tests/dataframe/update_columns_test.py
index 16929667..0dfc67fe 100644
--- a/tests/dataframe/update_columns_test.py
+++ b/tests/dataframe/update_columns_test.py
@@ -1,26 +1,22 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_update_columns(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     col = df.col
     result = df.assign(col("a") + 1)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [2, 3, 4], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [2, 3, 4], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_update_multiple_columns(library: str) -> None:
     df = integer_dataframe_1(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     col = df.col
     result = df.assign(col("a") + 1, col("b") + 2)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [2, 3, 4], "b": [6, 7, 8]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/dataframe/update_test.py b/tests/dataframe/update_test.py
index e858acc6..3c6b57a7 100644
--- a/tests/dataframe/update_test.py
+++ b/tests/dataframe/update_test.py
@@ -1,35 +1,29 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_update_column(library: str) -> None:
     df = integer_dataframe_1(library, api_version="2023.09-beta")
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     new_col = df.col("b") + 3
     result = df.assign(new_col)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [7, 8, 9]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [7, 8, 9]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
     # check original df didn't change
-    df_pd = interchange_to_pandas(df)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(df_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(df, expected, dtype=ns.Int64)
 
 
 def test_update_columns(library: str) -> None:
     df = integer_dataframe_1(library, api_version="2023.09-beta")
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     new_col_a = df.col("a") + 1
     new_col_b = df.col("b") + 3
     result = df.assign(new_col_a, new_col_b)
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [2, 3, 4], "b": [7, 8, 9]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {"a": [2, 3, 4], "b": [7, 8, 9]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
     # check original df didn't change
-    df_pd = interchange_to_pandas(df)
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(df_pd, expected)
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(df, expected, dtype=ns.Int64)
diff --git a/tests/groupby/aggregate_test.py b/tests/groupby/aggregate_test.py
index c40119ed..25619342 100644
--- a/tests/groupby/aggregate_test.py
+++ b/tests/groupby/aggregate_test.py
@@ -1,7 +1,5 @@
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_4
-from tests.utils import interchange_to_pandas
 
 
 def test_aggregate(library: str) -> None:
@@ -25,26 +23,37 @@ def test_aggregate(library: str) -> None:
         )
         .sort("key")
     )
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "key": [1, 2],
-            "b_sum": [3, 7],
-            "b_prod": [2, 12],
-            "b_mean": [1.5, 3.5],
-            "b_median": [1.5, 3.5],
-            "b_min": [1, 3],
-            "b_max": [2, 4],
-            "b_std": [0.707107, 0.707107],
-            "b_var": [0.5, 0.5],
-            "b_count": [2, 2],
-            "d_any": [True, True],
-            "d_all": [True, True],
-        },
-    )
+    expected = {
+        "key": [1, 2],
+        "b_sum": [3, 7],
+        "b_prod": [2, 12],
+        "b_mean": [1.5, 3.5],
+        "b_median": [1.5, 3.5],
+        "b_min": [1, 3],
+        "b_max": [2, 4],
+        "b_std": [0.707107, 0.707107],
+        "b_var": [0.5, 0.5],
+        "b_count": [2, 2],
+        "d_any": [True, True],
+        "d_all": [True, True],
+    }
+    expected_dtype = {
+        "key": ns.Int64,
+        "b_sum": ns.Int64,
+        "b_prod": ns.Int64,
+        "b_mean": ns.Float64,
+        "b_median": ns.Float64,
+        "b_min": ns.Int64,
+        "b_max": ns.Int64,
+        "b_std": ns.Float64,
+        "b_var": ns.Float64,
+        "b_count": ns.Int64,
+        "d_any": ns.Bool,
+        "d_all": ns.Bool,
+    }
     if library == "polars-lazy":
-        result_pd = result_pd.astype({"b_count": "int64"})
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({"b_count": ns.Int64()})
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_aggregate_only_size(library: str) -> None:
@@ -57,16 +66,13 @@ def test_aggregate_only_size(library: str) -> None:
         )
         .sort("key")
     )
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "key": [1, 2],
-            "b_count": [2, 2],
-        },
-    )
+    expected = {
+        "key": [1, 2],
+        "b_count": [2, 2],
+    }
     if library == "polars-lazy":
-        result_pd = result_pd.astype({"b_count": "int64"})
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({"b_count": ns.Int64()})
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_aggregate_no_size(library: str) -> None:
@@ -84,16 +90,22 @@ def test_aggregate_no_size(library: str) -> None:
         )
         .sort("key")
     )
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "key": [1, 2],
-            "b_sum": [3, 7],
-            "b_mean": [1.5, 3.5],
-            "b_min": [1, 3],
-            "b_max": [2, 4],
-            "b_std": [0.707107, 0.707107],
-            "b_var": [0.5, 0.5],
-        },
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {
+        "key": [1, 2],
+        "b_sum": [3, 7],
+        "b_mean": [1.5, 3.5],
+        "b_min": [1, 3],
+        "b_max": [2, 4],
+        "b_std": [0.707107, 0.707107],
+        "b_var": [0.5, 0.5],
+    }
+    expected_dtype = {
+        "key": ns.Int64,
+        "b_sum": ns.Int64,
+        "b_mean": ns.Float64,
+        "b_min": ns.Int64,
+        "b_max": ns.Int64,
+        "b_std": ns.Float64,
+        "b_var": ns.Float64,
+    }
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
diff --git a/tests/groupby/groupby_any_all_test.py b/tests/groupby/groupby_any_all_test.py
index 4c739d56..8ae9eea1 100644
--- a/tests/groupby/groupby_any_all_test.py
+++ b/tests/groupby/groupby_any_all_test.py
@@ -7,8 +7,8 @@
 from polars.exceptions import SchemaError
 
 from tests.utils import bool_dataframe_2
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_4
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
@@ -25,20 +25,18 @@ def test_groupby_boolean(
     expected_c: list[bool],
 ) -> None:
     df = bool_dataframe_2(library)
-    df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     result = getattr(df.group_by("key"), aggregation)()
     # need to sort
     result = result.sort("key")
-    result_pd = interchange_to_pandas(result)
     if library == "pandas-nullable" and parse(pd.__version__) < Version(
         "2.0.0",
     ):  # pragma: no cover
         # upstream bug
-        result_pd = result_pd.astype({"key": "int64"})
-    else:
-        pass
-    expected = pd.DataFrame({"key": [1, 2], "b": expected_b, "c": expected_c})
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({"key": ns.Int64()})
+    expected = {"key": [1, 2], "b": expected_b, "c": expected_c}
+    expected_dtype = {"key": ns.Int64, "b": ns.Bool, "c": ns.Bool}
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_group_by_invalid_any_all(library: str) -> None:
diff --git a/tests/groupby/numeric_test.py b/tests/groupby/numeric_test.py
index 02c63f0e..075f1588 100644
--- a/tests/groupby/numeric_test.py
+++ b/tests/groupby/numeric_test.py
@@ -5,25 +5,26 @@
 from packaging.version import Version
 from packaging.version import parse
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_4
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
-    ("aggregation", "expected_b", "expected_c"),
+    ("aggregation", "expected_b", "expected_c", "expected_dtype"),
     [
-        ("min", [1, 3], [4, 6]),
-        ("max", [2, 4], [5, 7]),
-        ("sum", [3, 7], [9, 13]),
-        ("prod", [2, 12], [20, 42]),
-        ("median", [1.5, 3.5], [4.5, 6.5]),
-        ("mean", [1.5, 3.5], [4.5, 6.5]),
+        ("min", [1, 3], [4, 6], "Int64"),
+        ("max", [2, 4], [5, 7], "Int64"),
+        ("sum", [3, 7], [9, 13], "Int64"),
+        ("prod", [2, 12], [20, 42], "Int64"),
+        ("median", [1.5, 3.5], [4.5, 6.5], "Float64"),
+        ("mean", [1.5, 3.5], [4.5, 6.5], "Float64"),
         (
             "std",
             [0.7071067811865476, 0.7071067811865476],
             [0.7071067811865476, 0.7071067811865476],
+            "Float64",
         ),
-        ("var", [0.5, 0.5], [0.5, 0.5]),
+        ("var", [0.5, 0.5], [0.5, 0.5], "Float64"),
     ],
 )
 def test_group_by_numeric(
@@ -31,17 +32,18 @@ def test_group_by_numeric(
     aggregation: str,
     expected_b: list[float],
     expected_c: list[float],
+    expected_dtype: str,
 ) -> None:
     df = integer_dataframe_4(library)
+    ns = df.__dataframe_namespace__()
     result = getattr(df.group_by("key"), aggregation)()
     result = result.sort("key")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"key": [1, 2], "b": expected_b, "c": expected_c})
+    expected = {"key": [1, 2], "b": expected_b, "c": expected_c}
+    dtype = getattr(ns, expected_dtype)
+    expected_ns_dtype = {"key": ns.Int64, "b": dtype, "c": dtype}
     if library == "pandas-nullable" and parse(pd.__version__) < Version(
         "2.0.0",
     ):  # pragma: no cover
         # upstream bug
-        result_pd = result_pd.astype({"key": "int64"})
-    else:
-        pass
-    pd.testing.assert_frame_equal(result_pd, expected)
+        result = result.cast({"key": ns.Int64()})
+    compare_dataframe_with_reference(result, expected, dtype=expected_ns_dtype)  # type: ignore[arg-type]
diff --git a/tests/groupby/size_test.py b/tests/groupby/size_test.py
index 5c051005..2d7da647 100644
--- a/tests/groupby/size_test.py
+++ b/tests/groupby/size_test.py
@@ -1,17 +1,15 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_4
-from tests.utils import interchange_to_pandas
 
 
 def test_group_by_size(library: str) -> None:
     df = integer_dataframe_4(library)
+    ns = df.__dataframe_namespace__()
     result = df.group_by("key").size()
     result = result.sort("key")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"key": [1, 2], "size": [2, 2]})
+    expected = {"key": [1, 2], "size": [2, 2]}
     # TODO polars returns uint32. what do we standardise to?
-    result_pd["size"] = result_pd["size"].astype("int64")
-    pd.testing.assert_frame_equal(result_pd, expected)
+    result = result.cast({"size": ns.Int64()})
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/integration/persistedness_test.py b/tests/integration/persistedness_test.py
index e9744e52..9d6bf7de 100644
--- a/tests/integration/persistedness_test.py
+++ b/tests/integration/persistedness_test.py
@@ -1,14 +1,12 @@
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
 from tests.utils import integer_dataframe_2
-from tests.utils import interchange_to_pandas
 
 
 def test_within_df_propagation(library: str) -> None:
     df1 = integer_dataframe_1(library)
-    df1 = df1
     df1 = df1 + 1
     with pytest.raises(RuntimeError):
         _ = int(df1.col("a").get_value(0))  # type: ignore[call-overload]
@@ -58,18 +56,22 @@ def test_within_df_within_col_propagation(library: str) -> None:
 def test_cross_df_propagation(library: str) -> None:
     df1 = integer_dataframe_1(library)
     df2 = integer_dataframe_2(library)
-    df1 = (df1 + 1).persist()
-    df2 = df2.rename({"b": "c"}).persist()
+    ns = df1.__dataframe_namespace__()
+    df1 = df1 + 1
+    df2 = df2.rename({"b": "c"})
     result = df1.join(df2, how="left", left_on="a", right_on="a")
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "a": [2, 3, 4],
-            "b": [5, 6, 7],
-            "c": [2.0, float("nan"), 6.0],
-        },
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = result.__dataframe_namespace__()
+    expected = {
+        "a": [2, 3, 4],
+        "b": [5, 6, 7],
+        "c": [2.0, float("nan"), 6.0],
+    }
+    expected_dtype = {
+        "a": ns.Int64,
+        "b": ns.Int64,
+        "c": ns.Int64 if library in ["pandas-nullable", "polars-lazy"] else ns.Float64,
+    }
+    compare_dataframe_with_reference(result, expected, dtype=expected_dtype)  # type: ignore[arg-type]
 
 
 def test_multiple_propagations(library: str) -> None:
diff --git a/tests/namespace/column_from_1d_array_test.py b/tests/namespace/column_from_1d_array_test.py
index 526487dc..b2dac631 100644
--- a/tests/namespace/column_from_1d_array_test.py
+++ b/tests/namespace/column_from_1d_array_test.py
@@ -5,100 +5,96 @@
 from datetime import timedelta
 
 import numpy as np
-import pandas as pd
 import pytest
 from packaging.version import Version
 
 from tests.utils import PANDAS_VERSION
 from tests.utils import POLARS_VERSION
+from tests.utils import compare_column_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
-    "pandas_dtype",
+    ("pandas_dtype", "column_dtype"),
     [
-        "float64",
-        "float32",
-        "int64",
-        "int32",
-        "int16",
-        "int8",
-        "uint64",
-        "uint32",
-        "uint16",
-        "uint8",
+        ("float64", "Float64"),
+        ("float32", "Float32"),
+        ("int64", "Int64"),
+        ("int32", "Int32"),
+        ("int16", "Int16"),
+        ("int8", "Int8"),
+        ("uint64", "UInt64"),
+        ("uint32", "UInt32"),
+        ("uint16", "UInt16"),
+        ("uint8", "UInt8"),
     ],
 )
 def test_column_from_1d_array(
     library: str,
     pandas_dtype: str,
+    column_dtype: str,
 ) -> None:
     ser = integer_dataframe_1(library).col("a").persist()
-    namespace = ser.__column_namespace__()
+    ns = ser.__column_namespace__()
     arr = np.array([1, 2, 3], dtype=pandas_dtype)
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_1d_array(  # type: ignore[call-arg]
+    result = ns.dataframe_from_columns(
+        ns.column_from_1d_array(  # type: ignore[call-arg]
             arr,
             name="result",
         ),
     )
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([1, 2, 3], name="result", dtype=pandas_dtype)
-    pd.testing.assert_series_equal(result_pd, expected)
+    expected = [1, 2, 3]
+    compare_column_with_reference(
+        result.col("result"),
+        expected,
+        dtype=getattr(ns, column_dtype),
+    )
 
 
 def test_column_from_1d_array_string(
     library: str,
 ) -> None:
     ser = integer_dataframe_1(library).persist().col("a")
-    namespace = ser.__column_namespace__()
+    ns = ser.__column_namespace__()
     arr = np.array(["a", "b", "c"])
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_1d_array(  # type: ignore[call-arg]
+    result = ns.dataframe_from_columns(
+        ns.column_from_1d_array(  # type: ignore[call-arg]
             arr,
             name="result",
         ),
     )
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series(["a", "b", "c"], name="result", dtype="object")
-    pd.testing.assert_series_equal(result_pd, expected)
+    expected = ["a", "b", "c"]
+    compare_column_with_reference(result.col("result"), expected, dtype=ns.String)
 
 
 def test_column_from_1d_array_bool(
     library: str,
 ) -> None:
     ser = integer_dataframe_1(library).persist().col("a")
-    namespace = ser.__column_namespace__()
+    ns = ser.__column_namespace__()
     arr = np.array([True, False, True])
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_1d_array(  # type: ignore[call-arg]
+    result = ns.dataframe_from_columns(
+        ns.column_from_1d_array(  # type: ignore[call-arg]
             arr,
             name="result",
         ),
     )
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([True, False, True], name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    expected = [True, False, True]
+    compare_column_with_reference(result.col("result"), expected, dtype=ns.Bool)
 
 
 def test_datetime_from_1d_array(library: str) -> None:
     ser = integer_dataframe_1(library).persist().col("a")
-    namespace = ser.__column_namespace__()
+    ns = ser.__column_namespace__()
     arr = np.array([date(2020, 1, 1), date(2020, 1, 2)], dtype="datetime64[ms]")
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_1d_array(  # type: ignore[call-arg]
+    result = ns.dataframe_from_columns(
+        ns.column_from_1d_array(  # type: ignore[call-arg]
             arr,
             name="result",
         ),
     )
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series(
-        [datetime(2020, 1, 1), datetime(2020, 1, 2)],
-        name="result",
-        dtype="datetime64[ms]",
-    )
-    pd.testing.assert_series_equal(result_pd, expected)
+    expected = [datetime(2020, 1, 1), datetime(2020, 1, 2)]
+    compare_column_with_reference(result.col("result"), expected, dtype=ns.Datetime)
 
 
 @pytest.mark.skipif(
@@ -111,24 +107,16 @@ def test_datetime_from_1d_array(library: str) -> None:
 )
 def test_duration_from_1d_array(library: str) -> None:
     ser = integer_dataframe_1(library).persist().col("a")
-    namespace = ser.__column_namespace__()
+    ns = ser.__column_namespace__()
     arr = np.array([timedelta(1), timedelta(2)], dtype="timedelta64[ms]")
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_1d_array(  # type: ignore[call-arg]
+    result = ns.dataframe_from_columns(
+        ns.column_from_1d_array(  # type: ignore[call-arg]
             arr,
             name="result",
         ),
     )
     if library == "polars-lazy":
         # https://github.com/data-apis/dataframe-api/issues/329
-        result_pd = (
-            result.dataframe.collect().to_pandas()["result"].astype("timedelta64[ms]")  # type: ignore[attr-defined]
-        )
-    else:
-        result_pd = result.dataframe["result"]  # type: ignore[index]
-    expected = pd.Series(
-        [timedelta(1), timedelta(2)],
-        name="result",
-        dtype="timedelta64[ms]",
-    )
-    pd.testing.assert_series_equal(result_pd, expected)
+        result = result.cast({"result": ns.Duration("ms")})
+    expected = [timedelta(1), timedelta(2)]
+    compare_column_with_reference(result.col("result"), expected, dtype=ns.Duration)
diff --git a/tests/namespace/column_from_sequence_test.py b/tests/namespace/column_from_sequence_test.py
index 99423d4a..e6362e12 100644
--- a/tests/namespace/column_from_sequence_test.py
+++ b/tests/namespace/column_from_sequence_test.py
@@ -4,68 +4,29 @@
 from datetime import timedelta
 from typing import Any
 
-import pandas as pd
 import pytest
 
+from tests.utils import compare_column_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
-    ("values", "dtype", "kwargs", "expected"),
+    ("values", "dtype", "kwargs"),
     [
-        ([1, 2, 3], "Int64", {}, pd.Series([1, 2, 3], dtype="int64", name="result")),
-        ([1, 2, 3], "Int32", {}, pd.Series([1, 2, 3], dtype="int32", name="result")),
-        ([1, 2, 3], "Int16", {}, pd.Series([1, 2, 3], dtype="int16", name="result")),
-        ([1, 2, 3], "Int8", {}, pd.Series([1, 2, 3], dtype="int8", name="result")),
-        ([1, 2, 3], "UInt64", {}, pd.Series([1, 2, 3], dtype="uint64", name="result")),
-        ([1, 2, 3], "UInt32", {}, pd.Series([1, 2, 3], dtype="uint32", name="result")),
-        ([1, 2, 3], "UInt16", {}, pd.Series([1, 2, 3], dtype="uint16", name="result")),
-        ([1, 2, 3], "UInt8", {}, pd.Series([1, 2, 3], dtype="uint8", name="result")),
-        (
-            [1.0, 2.0, 3.0],
-            "Float64",
-            {},
-            pd.Series([1, 2, 3], dtype="float64", name="result"),
-        ),
-        (
-            [1.0, 2.0, 3.0],
-            "Float32",
-            {},
-            pd.Series([1, 2, 3], dtype="float32", name="result"),
-        ),
-        (
-            [True, False, True],
-            "Bool",
-            {},
-            pd.Series([True, False, True], dtype=bool, name="result"),
-        ),
-        (
-            ["express", "yourself"],
-            "String",
-            {},
-            pd.Series(["express", "yourself"], dtype=object, name="result"),
-        ),
-        (
-            [datetime(2020, 1, 1), datetime(2020, 1, 2)],
-            "Datetime",
-            {"time_unit": "us"},
-            pd.Series(
-                [datetime(2020, 1, 1), datetime(2020, 1, 2)],
-                dtype="datetime64[us]",
-                name="result",
-            ),
-        ),
-        (
-            [timedelta(1), timedelta(2)],
-            "Duration",
-            {"time_unit": "us"},
-            pd.Series(
-                [timedelta(1), timedelta(2)],
-                dtype="timedelta64[us]",
-                name="result",
-            ),
-        ),
+        ([1, 2, 3], "Int64", {}),
+        ([1, 2, 3], "Int32", {}),
+        ([1, 2, 3], "Int16", {}),
+        ([1, 2, 3], "Int8", {}),
+        ([1, 2, 3], "UInt64", {}),
+        ([1, 2, 3], "UInt32", {}),
+        ([1, 2, 3], "UInt16", {}),
+        ([1, 2, 3], "UInt8", {}),
+        ([1.0, 2.0, 3.0], "Float64", {}),
+        ([1.0, 2.0, 3.0], "Float32", {}),
+        ([True, False, True], "Bool", {}),
+        (["express", "yourself"], "String", {}),
+        ([datetime(2020, 1, 1), datetime(2020, 1, 2)], "Datetime", {"time_unit": "us"}),
+        ([timedelta(1), timedelta(2)], "Duration", {"time_unit": "us"}),
     ],
 )
 def test_column_from_sequence(
@@ -73,29 +34,27 @@ def test_column_from_sequence(
     values: list[Any],
     dtype: str,
     kwargs: dict[str, Any],
-    expected: pd.Series[Any],
 ) -> None:
     df = integer_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     ser = df.col("a")
-    namespace = ser.__column_namespace__()
-    result = namespace.dataframe_from_columns(
-        namespace.column_from_sequence(
+    ns = ser.__column_namespace__()
+    expected_dtype = getattr(ns, dtype)
+    result = ns.dataframe_from_columns(
+        ns.column_from_sequence(
             values,
-            dtype=getattr(namespace, dtype)(**kwargs),
+            dtype=expected_dtype(**kwargs),
             name="result",
         ),
     )
-    result_pd = interchange_to_pandas(result)["result"]
-    pd.testing.assert_series_equal(result_pd, expected)
+    compare_column_with_reference(result.col("result"), values, dtype=expected_dtype)
 
 
 def test_column_from_sequence_no_dtype(
     library: str,
 ) -> None:
     df = integer_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
-    result = namespace.dataframe_from_columns(namespace.column_from_sequence([1, 2, 3], name="result"))  # type: ignore[call-arg]
-    result_pd = interchange_to_pandas(result)["result"]
-    expected = pd.Series([1, 2, 3], dtype="int64", name="result")
-    pd.testing.assert_series_equal(result_pd, expected)
+    ns = df.__dataframe_namespace__()
+    result = ns.dataframe_from_columns(ns.column_from_sequence([1, 2, 3], name="result"))  # type: ignore[call-arg]
+    expected = [1, 2, 3]
+    compare_column_with_reference(result.col("result"), expected, dtype=ns.Int64)
diff --git a/tests/namespace/concat_test.py b/tests/namespace/concat_test.py
index 7a41648b..79901d5a 100644
--- a/tests/namespace/concat_test.py
+++ b/tests/namespace/concat_test.py
@@ -1,29 +1,27 @@
 from __future__ import annotations
 
-import pandas as pd
 import polars as pl
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
 from tests.utils import integer_dataframe_2
 from tests.utils import integer_dataframe_4
-from tests.utils import interchange_to_pandas
 
 
 def test_concat(library: str) -> None:
     df1 = integer_dataframe_1(library)
     df2 = integer_dataframe_2(library)
-    namespace = df1.__dataframe_namespace__()
-    result = namespace.concat([df1, df2])
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame({"a": [1, 2, 3, 1, 2, 4], "b": [4, 5, 6, 4, 2, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    ns = df1.__dataframe_namespace__()
+    result = ns.concat([df1, df2])
+    expected = {"a": [1, 2, 3, 1, 2, 4], "b": [4, 5, 6, 4, 2, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
 
 
 def test_concat_mismatch(library: str) -> None:
     df1 = integer_dataframe_1(library).persist()
     df2 = integer_dataframe_4(library).persist()
-    namespace = df1.__dataframe_namespace__()
+    ns = df1.__dataframe_namespace__()
     # TODO check the error
     with pytest.raises((ValueError, pl.exceptions.ShapeError)):
-        _ = namespace.concat([df1, df2]).persist()
+        _ = ns.concat([df1, df2]).persist()
diff --git a/tests/namespace/dataframe_from_2d_array_test.py b/tests/namespace/dataframe_from_2d_array_test.py
index c3ab589b..503486da 100644
--- a/tests/namespace/dataframe_from_2d_array_test.py
+++ b/tests/namespace/dataframe_from_2d_array_test.py
@@ -1,21 +1,20 @@
 from __future__ import annotations
 
 import numpy as np
-import pandas as pd
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
-from tests.utils import interchange_to_pandas
 
 
 def test_dataframe_from_2d_array(library: str) -> None:
     df = integer_dataframe_1(library)
-    namespace = df.__dataframe_namespace__()
+    ns = df.__dataframe_namespace__()
     arr = np.array([[1, 4], [2, 5], [3, 6]])
-    result = namespace.dataframe_from_2d_array(
+    result = ns.dataframe_from_2d_array(
         arr,
         names=["a", "b"],
     )
     # TODO: consistent return type, for windows compat?
-    result_pd = interchange_to_pandas(result).astype("int64")
-    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    pd.testing.assert_frame_equal(result_pd, expected)
+    result = result.cast({"a": ns.Int64(), "b": ns.Int64()})
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    compare_dataframe_with_reference(result, expected, dtype=ns.Int64)
diff --git a/tests/namespace/sorted_indices_test.py b/tests/namespace/sorted_indices_test.py
index 899a7c63..d99a4585 100644
--- a/tests/namespace/sorted_indices_test.py
+++ b/tests/namespace/sorted_indices_test.py
@@ -1,62 +1,52 @@
 from __future__ import annotations
 
-import pandas as pd
-
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_6
-from tests.utils import interchange_to_pandas
 
 
 def test_column_sorted_indices_ascending(library: str) -> None:
     df = integer_dataframe_6(library)
+    ns = df.__dataframe_namespace__()
     sorted_indices = df.col("b").sorted_indices()
     result = df.assign(sorted_indices.rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected_1 = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": [4, 4, 3, 1, 2],
-            "result": [3, 4, 2, 0, 1],
-        },
-    )
-    expected_2 = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": [4, 4, 3, 1, 2],
-            "result": [3, 4, 2, 1, 0],
-        },
-    )
+    expected_1 = {
+        "a": [1, 1, 1, 2, 2],
+        "b": [4, 4, 3, 1, 2],
+        "result": [3, 4, 2, 0, 1],
+    }
+    expected_2 = {
+        "a": [1, 1, 1, 2, 2],
+        "b": [4, 4, 3, 1, 2],
+        "result": [3, 4, 2, 1, 0],
+    }
     if library in ("polars", "polars-lazy"):
-        result_pd["result"] = result_pd["result"].astype("int64")
+        result = result.cast({"result": ns.Int64()})
     try:
-        pd.testing.assert_frame_equal(result_pd, expected_1)
+        compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
     except AssertionError:  # pragma: no cover
         # order isn't determinist, so try both
-        pd.testing.assert_frame_equal(result_pd, expected_2)
+        compare_dataframe_with_reference(result, expected_2, dtype=ns.Int64)
 
 
 def test_column_sorted_indices_descending(library: str) -> None:
     df = integer_dataframe_6(library)
+    ns = df.__dataframe_namespace__()
     sorted_indices = df.col("b").sorted_indices(ascending=False)
     result = df.assign(sorted_indices.rename("result"))
-    result_pd = interchange_to_pandas(result)
-    expected_1 = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": [4, 4, 3, 1, 2],
-            "result": [1, 0, 2, 4, 3],
-        },
-    )
-    expected_2 = pd.DataFrame(
-        {
-            "a": [1, 1, 1, 2, 2],
-            "b": [4, 4, 3, 1, 2],
-            "result": [0, 1, 2, 4, 3],
-        },
-    )
+    expected_1 = {
+        "a": [1, 1, 1, 2, 2],
+        "b": [4, 4, 3, 1, 2],
+        "result": [1, 0, 2, 4, 3],
+    }
+    expected_2 = {
+        "a": [1, 1, 1, 2, 2],
+        "b": [4, 4, 3, 1, 2],
+        "result": [0, 1, 2, 4, 3],
+    }
     if library in ("polars", "polars-lazy"):
-        result_pd["result"] = result_pd["result"].astype("int64")
+        result = result.cast({"result": ns.Int64()})
     try:
-        pd.testing.assert_frame_equal(result_pd, expected_1)
+        compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
     except AssertionError:
         # order isn't determinist, so try both
-        pd.testing.assert_frame_equal(result_pd, expected_2)
+        compare_dataframe_with_reference(result, expected_2, dtype=ns.Int64)
diff --git a/tests/scalars/float_test.py b/tests/scalars/float_test.py
index bfcfd5d0..d8d76656 100644
--- a/tests/scalars/float_test.py
+++ b/tests/scalars/float_test.py
@@ -1,10 +1,9 @@
 import numpy as np
-import pandas as pd
 import pytest
 
+from tests.utils import compare_dataframe_with_reference
 from tests.utils import integer_dataframe_1
 from tests.utils import integer_dataframe_2
-from tests.utils import interchange_to_pandas
 
 
 @pytest.mark.parametrize(
@@ -99,15 +98,13 @@ def test_free_standing(library: str) -> None:
 
 def test_right_comparand(library: str) -> None:
     df = integer_dataframe_1(library)
+    ns = df.__dataframe_namespace__()
     col = df.col("a")  # [1, 2, 3]
     scalar = df.col("b").get_value(0)  # 4
     result = df.assign((scalar - col).rename("c"))
-    result_pd = interchange_to_pandas(result)
-    expected = pd.DataFrame(
-        {
-            "a": [1, 2, 3],
-            "b": [4, 5, 6],
-            "c": [3, 2, 1],
-        },
-    )
-    pd.testing.assert_frame_equal(result_pd, expected)
+    expected = {
+        "a": [1, 2, 3],
+        "b": [4, 5, 6],
+        "c": [3, 2, 1],
+    }
+    compare_dataframe_with_reference(result, expected, ns.Int64)
diff --git a/tests/utils.py b/tests/utils.py
index 712d67bf..019e9f3c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
+import math
 from datetime import datetime
 from datetime import timedelta
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Mapping
-from typing import cast
 
 import pandas as pd
 import polars as pl
@@ -47,27 +47,6 @@ def convert_to_standard_compliant_dataframe(
         raise AssertionError(msg)
 
 
-def convert_dataframe_to_pandas_numpy(df: pd.DataFrame) -> pd.DataFrame:
-    conversions = {
-        "boolean": "bool",
-        "Int64": "int64",
-        "Float64": "float64",
-    }
-    for column in df.columns:
-        dtype = str(df.dtypes[column])
-        if dtype in conversions:
-            try:
-                df[column] = df[column].to_numpy(
-                    conversions[dtype],
-                    na_value=float("nan"),
-                )
-            except ValueError:
-                # cannot convert float NaN to integer
-                assert dtype == "Int64"
-                df[column] = df[column].to_numpy("float64", na_value=float("nan"))
-    return df
-
-
 def integer_dataframe_1(library: str, api_version: str | None = None) -> DataFrame:
     df: Any
     if library == "pandas-numpy":
@@ -471,19 +450,6 @@ def temporal_dataframe_1(library: str) -> DataFrame:
     raise AssertionError(msg)
 
 
-def interchange_to_pandas(result: Any) -> pd.DataFrame:
-    if isinstance(result.dataframe, pl.LazyFrame):
-        df = result.dataframe.collect()
-        df = df.to_pandas()
-    elif isinstance(result.dataframe, pl.DataFrame):
-        df = result.dataframe
-        df = df.to_pandas()
-    else:
-        df = result.dataframe
-    df = convert_dataframe_to_pandas_numpy(df)
-    return cast(pd.DataFrame, df)
-
-
 def compare_column_with_reference(
     column: Column,
     reference: list[Any],
@@ -497,9 +463,18 @@ def compare_column_with_reference(
         dtype,
     ), f"column dtype: {column.dtype} isn't a instance of {dtype}"
     for idx in range(col_len):
-        assert (
-            reference[idx] == column.get_value(idx).scalar
-        ), f"{reference[idx]} != {column.get_value(idx).scalar}"
+        a, b = reference[idx], column.get_value(idx).scalar
+        if a == b:
+            return
+
+        # copied from pandas
+        rtol, atol = 1e-5, 1e-8
+        assert math.isclose(
+            a,
+            b,
+            rel_tol=rtol,
+            abs_tol=atol,
+        ), f"expected {a:.5f} but got {b:.5f}, with rtol={rtol}, atol={atol}"
 
 
 def compare_dataframe_with_reference(