Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,20 +591,33 @@ def read_csv(
| 1 2 5 |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
eager_backend = Implementation.from_backend(backend)
native_namespace = eager_backend.to_native_namespace()
impl = Implementation.from_backend(backend)
native_namespace = impl.to_native_namespace()
native_frame: NativeFrame
if eager_backend in {
if impl in {
Implementation.POLARS,
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
}:
native_frame = native_namespace.read_csv(source, **kwargs)
elif eager_backend is Implementation.PYARROW:
elif impl is Implementation.PYARROW:
from pyarrow import csv # ignore-banned-import

native_frame = csv.read_csv(source, **kwargs)
elif impl in {
Implementation.PYSPARK,
Implementation.DASK,
Implementation.DUCKDB,
Implementation.IBIS,
Implementation.SQLFRAME,
Implementation.PYSPARK_CONNECT,
}:
msg = (
f"Expected eager backend, found {impl}.\n\n"
f"Hint: use nw.scan_csv(source={source}, backend={backend})"
)
raise ValueError(msg)
Comment on lines +608 to +620
Copy link
Member Author

@FBruzzesi FBruzzesi Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For read_csv I simply wanted to improve on the error message

else: # pragma: no cover
try:
# implementation is UNKNOWN, Narwhals extension using this feature should
Expand Down Expand Up @@ -734,22 +747,33 @@ def read_parquet(
|c: [[0.2,0.1]] |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
implementation = Implementation.from_backend(backend)
native_namespace = implementation.to_native_namespace()
impl = Implementation.from_backend(backend)
native_namespace = impl.to_native_namespace()
native_frame: NativeFrame
if implementation in {
if impl in {
Implementation.POLARS,
Implementation.PANDAS,
Implementation.MODIN,
Implementation.CUDF,
Implementation.DUCKDB,
Implementation.IBIS,
Comment on lines -745 to -746
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible these were supposed to be supported for v1 only?

They stand out as the impls that had an interchange support for DataFrame

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thaaaat's totally possible! However I find it weird that:

  • read_parquet allows it, but read_csv doesn't
  • from_native in the return statement has the eager_only=True flag, not the eager_or_interchange_only

πŸ€”

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah true, I guess you could investigate 🧐 the blame?

Copy link
Member Author

@FBruzzesi FBruzzesi Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#1725 introduced full support for duckdb, and added duckdb in read_parquet
#2000 introduced full support for ibis, and added ibis in read_parquet

These are way after interchange protocol support, so it seems completely accidental, and we just missed it. Both PRs were 2k+ lines changes and we had no test to prevent that

}:
native_frame = native_namespace.read_parquet(source, **kwargs)
elif implementation is Implementation.PYARROW:
elif impl is Implementation.PYARROW:
import pyarrow.parquet as pq # ignore-banned-import

native_frame = pq.read_table(source, **kwargs)
elif impl in {
Implementation.PYSPARK,
Implementation.DASK,
Implementation.DUCKDB,
Implementation.IBIS,
Implementation.SQLFRAME,
Implementation.PYSPARK_CONNECT,
}:
msg = (
f"Expected eager backend, found {impl}.\n\n"
f"Hint: use nw.scan_parquet(source={source}, backend={backend})"
)
raise ValueError(msg)
else: # pragma: no cover
try:
# implementation is UNKNOWN, Narwhals extension using this feature should
Expand Down
24 changes: 24 additions & 0 deletions tests/read_scan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ def test_read_csv_kwargs(tmpdir: pytest.TempdirFactory) -> None:
assert_equal_data(result, data)


@pytest.mark.parametrize("backend", ["duckdb", "ibis", "sqlframe"])
def test_read_csv_raise_with_lazy(tmpdir: pytest.TempdirFactory, backend: str) -> None:
pytest.importorskip(backend)
df_pl = pl.DataFrame(data)
filepath = str(tmpdir / "file.csv") # type: ignore[operator]
df_pl.write_csv(filepath)

with pytest.raises(ValueError, match="Expected eager backend, found"):
nw.read_csv(filepath, backend=backend)


def test_scan_csv(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> None:
kwargs: dict[str, Any]
if "sqlframe" in str(constructor):
Expand Down Expand Up @@ -119,6 +130,19 @@ def test_read_parquet_kwargs(tmpdir: pytest.TempdirFactory) -> None:
assert_equal_data(result, data)


@pytest.mark.parametrize("backend", ["duckdb", "ibis", "sqlframe"])
def test_read_parquet_raise_with_lazy(
tmpdir: pytest.TempdirFactory, backend: str
) -> None:
pytest.importorskip(backend)
df_pl = pl.DataFrame(data)
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
df_pl.write_parquet(filepath)

with pytest.raises(ValueError, match="Expected eager backend, found"):
nw.read_parquet(filepath, backend=backend)


@pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow")
def test_scan_parquet(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> None:
kwargs: dict[str, Any]
Expand Down
Loading