diff --git a/narwhals/functions.py b/narwhals/functions.py index 95c26a0c13..cbeecc0152 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -591,20 +591,33 @@ def read_csv( | 1 2 5 | └──────────────────┘ """ - eager_backend = Implementation.from_backend(backend) - native_namespace = eager_backend.to_native_namespace() + impl = Implementation.from_backend(backend) + native_namespace = impl.to_native_namespace() native_frame: NativeFrame - if eager_backend in { + if impl in { Implementation.POLARS, Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF, }: native_frame = native_namespace.read_csv(source, **kwargs) - elif eager_backend is Implementation.PYARROW: + elif impl is Implementation.PYARROW: from pyarrow import csv # ignore-banned-import native_frame = csv.read_csv(source, **kwargs) + elif impl in { + Implementation.PYSPARK, + Implementation.DASK, + Implementation.DUCKDB, + Implementation.IBIS, + Implementation.SQLFRAME, + Implementation.PYSPARK_CONNECT, + }: + msg = ( + f"Expected eager backend, found {impl}.\n\n" + f"Hint: use nw.scan_csv(source={source}, backend={backend})" + ) + raise ValueError(msg) else: # pragma: no cover try: # implementation is UNKNOWN, Narwhals extension using this feature should @@ -734,22 +747,33 @@ def read_parquet( |c: [[0.2,0.1]] | └──────────────────┘ """ - implementation = Implementation.from_backend(backend) - native_namespace = implementation.to_native_namespace() + impl = Implementation.from_backend(backend) + native_namespace = impl.to_native_namespace() native_frame: NativeFrame - if implementation in { + if impl in { Implementation.POLARS, Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF, - Implementation.DUCKDB, - Implementation.IBIS, }: native_frame = native_namespace.read_parquet(source, **kwargs) - elif implementation is Implementation.PYARROW: + elif impl is Implementation.PYARROW: import pyarrow.parquet as pq # ignore-banned-import native_frame = pq.read_table(source, **kwargs) + elif impl in { + Implementation.PYSPARK, + Implementation.DASK, + Implementation.DUCKDB, + Implementation.IBIS, + Implementation.SQLFRAME, + Implementation.PYSPARK_CONNECT, + }: + msg = ( + f"Expected eager backend, found {impl}.\n\n" + f"Hint: use nw.scan_parquet(source={source}, backend={backend})" + ) + raise ValueError(msg) else: # pragma: no cover try: # implementation is UNKNOWN, Narwhals extension using this feature should diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 737f83953e..349f71cbb8 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -46,6 +46,17 @@ def test_read_csv_kwargs(tmpdir: pytest.TempdirFactory) -> None: assert_equal_data(result, data) +@pytest.mark.parametrize("backend", ["duckdb", "ibis", "sqlframe"]) +def test_read_csv_raise_with_lazy(tmpdir: pytest.TempdirFactory, backend: str) -> None: + pytest.importorskip(backend) + df_pl = pl.DataFrame(data) + filepath = str(tmpdir / "file.csv") # type: ignore[operator] + df_pl.write_csv(filepath) + + with pytest.raises(ValueError, match="Expected eager backend, found"): + nw.read_csv(filepath, backend=backend) + + def test_scan_csv(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> None: kwargs: dict[str, Any] if "sqlframe" in str(constructor): @@ -119,6 +130,19 @@ def test_read_parquet_kwargs(tmpdir: pytest.TempdirFactory) -> None: assert_equal_data(result, data) +@pytest.mark.parametrize("backend", ["duckdb", "ibis", "sqlframe"]) +def test_read_parquet_raise_with_lazy( + tmpdir: pytest.TempdirFactory, backend: str +) -> None: + pytest.importorskip(backend) + df_pl = pl.DataFrame(data) + filepath = str(tmpdir / "file.parquet") # type: ignore[operator] + df_pl.write_parquet(filepath) + + with pytest.raises(ValueError, match="Expected eager backend, found"): + nw.read_parquet(filepath, backend=backend) + + @pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow") def test_scan_parquet(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> None: kwargs: dict[str, Any]