Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
6f7a574
unify ColumnNotFound
EdAbati May 4, 2025
8fe45e6
revert
EdAbati May 4, 2025
45f09e0
Merge branch 'main' into unify-column-not-found-error
EdAbati May 4, 2025
6d58bc2
try except during select
EdAbati May 6, 2025
be834b3
catch correct exception
EdAbati May 6, 2025
19d6e24
coverage
EdAbati May 6, 2025
f0a9821
Merge branch 'main' into unify-column-not-found-error
EdAbati May 6, 2025
c1cabd1
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 9, 2025
917d073
separate lazy and eager tests
EdAbati May 9, 2025
5d2972d
coverage
EdAbati May 9, 2025
4080101
cleanup exception
EdAbati May 9, 2025
8442596
use constructor_id
EdAbati May 9, 2025
937a123
what is going on in pyspark connect?
EdAbati May 9, 2025
0544f30
ignore pyspark connect
EdAbati May 9, 2025
f41c037
Merge branch 'main' into unify-column-not-found-error
EdAbati May 11, 2025
bb17703
Merge branch 'main' into unify-column-not-found-error
EdAbati May 11, 2025
df18f34
Merge branch 'main' into unify-column-not-found-error
EdAbati May 12, 2025
b518a5a
added missing column tests
EdAbati May 13, 2025
bb7254f
move to drop test
EdAbati May 13, 2025
823d356
fix msg regex
EdAbati May 13, 2025
5647f52
update with columns
EdAbati May 13, 2025
1cb3204
update filter
EdAbati May 13, 2025
901bdf7
catch more precise error
EdAbati May 13, 2025
9cc6a3e
Merge branch 'main' into unify-column-not-found-error
EdAbati May 13, 2025
5432521
remove redundant test
EdAbati May 13, 2025
f51bb19
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 19, 2025
d5f51bb
catch duckdb exception func
EdAbati May 22, 2025
b9b0cbf
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 22, 2025
048b130
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 22, 2025
c9e3061
fix aggregate
EdAbati May 22, 2025
1b77d21
ignore ibis
EdAbati May 23, 2025
40d9abe
test aggregate in select
EdAbati May 23, 2025
0841c6a
catch_pyspark_column_not_found_exception
EdAbati May 23, 2025
29441ef
change signature
EdAbati May 23, 2025
975ef48
testing if connect has the same error?
EdAbati May 23, 2025
eae724d
coverage happy
EdAbati May 23, 2025
661655b
fix spark connect
EdAbati May 23, 2025
a2b2889
catch pyspark connect at collect
EdAbati May 23, 2025
c858861
fixes
EdAbati May 23, 2025
7601ef1
fix regex
EdAbati May 23, 2025
977738f
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 23, 2025
49c4806
coverage happier
EdAbati May 23, 2025
76b209d
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 24, 2025
6e23961
Merge branch 'main' into unify-column-not-found-error
EdAbati May 24, 2025
dd7a757
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 24, 2025
cce992b
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 26, 2025
67baf81
fix drop test
EdAbati May 26, 2025
5d1cc71
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati May 26, 2025
be7240f
restore from_available_column_names
EdAbati May 26, 2025
53021e8
Merge branch 'main' into unify-column-not-found-error
EdAbati May 30, 2025
fef9cb3
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati Jun 12, 2025
03cdad2
Merge branch 'main' into unify-column-not-found-error
EdAbati Jun 15, 2025
7c898e1
Merge remote-tracking branch 'upstream/main' into unify-column-not-fo…
EdAbati Jul 8, 2025
5c7e114
remove constructor_lazy
EdAbati Jul 8, 2025
311ed35
Merge branch 'main' into unify-column-not-found-error
MarcoGorelli Jul 13, 2025
aa20974
remove if for only polars
EdAbati Jul 15, 2025
f994666
Merge branch 'main' into unify-column-not-found-error
EdAbati Jul 15, 2025
592ed50
try removing pyspark[connect] from with_columns_missing_column xfails
EdAbati Jul 15, 2025
2240ac0
fix comment
EdAbati Jul 15, 2025
c942c57
fix pyspark[connect] test
EdAbati Jul 15, 2025
710119d
use maybe_collect in more places
EdAbati Jul 15, 2025
1e1edc1
fix comment
EdAbati Jul 15, 2025
b36c866
make coverage happy
EdAbati Jul 15, 2025
00d9cc4
make coverage really happy?
EdAbati Jul 15, 2025
a8ca283
Merge branch 'main' into unify-column-not-found-error
EdAbati Jul 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals._duckdb.utils import (
DeferredTimeZone,
F,
catch_duckdb_exception,
col,
evaluate_exprs,
lit,
Expand Down Expand Up @@ -167,11 +168,17 @@ def simple_select(self, *column_names: str) -> Self:

def aggregate(self, *exprs: DuckDBExpr) -> Self:
selection = [val.alias(name) for name, val in evaluate_exprs(self, *exprs)]
return self._with_native(self.native.aggregate(selection)) # type: ignore[arg-type]
try:
return self._with_native(self.native.aggregate(selection)) # type: ignore[arg-type]
except Exception as e: # noqa: BLE001
raise catch_duckdb_exception(e, self) from None

def select(self, *exprs: DuckDBExpr) -> Self:
selection = (val.alias(name) for name, val in evaluate_exprs(self, *exprs))
return self._with_native(self.native.select(*selection))
try:
return self._with_native(self.native.select(*selection))
except Exception as e: # noqa: BLE001
raise catch_duckdb_exception(e, self) from None

def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
Expand All @@ -197,12 +204,18 @@ def with_columns(self, *exprs: DuckDBExpr) -> Self:
for name in self.columns
]
result.extend(value.alias(name) for name, value in new_columns_map.items())
return self._with_native(self.native.select(*result))
try:
return self._with_native(self.native.select(*result))
except Exception as e: # noqa: BLE001
raise catch_duckdb_exception(e, self) from None

def filter(self, predicate: DuckDBExpr) -> Self:
# `[0]` is safe as the predicate's expression only returns a single column
mask = predicate(self)[0]
return self._with_native(self.native.filter(mask))
try:
return self._with_native(self.native.filter(mask))
except Exception as e: # noqa: BLE001
raise catch_duckdb_exception(e, self) from None

@property
def schema(self) -> dict[str, DType]:
Expand Down
20 changes: 20 additions & 0 deletions narwhals/_duckdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,21 @@
import duckdb

from narwhals._utils import Version, isinstance_or_issubclass
from narwhals.exceptions import ColumnNotFoundError

if TYPE_CHECKING:
from collections.abc import Sequence

from duckdb import DuckDBPyRelation, Expression
from duckdb.typing import DuckDBPyType

from narwhals._compliant.typing import CompliantLazyFrameAny
from narwhals._duckdb.dataframe import DuckDBLazyFrame
from narwhals._duckdb.expr import DuckDBExpr
from narwhals.dtypes import DType
from narwhals.typing import IntoDType


UNITS_DICT = {
"y": "year",
"q": "quarter",
Expand Down Expand Up @@ -349,3 +352,20 @@ def window_expression(

func = f"{str(expr).removesuffix(')')} ignore nulls)" if ignore_nulls else str(expr)
return SQLExpression(f"{func} over ({pb} {ob} {rows})")


def catch_duckdb_exception(
exception: Exception, frame: CompliantLazyFrameAny, /
) -> ColumnNotFoundError | Exception:
if isinstance(exception, duckdb.BinderException) and any(
msg in str(exception)
for msg in (
"not found in FROM clause",
"this column cannot be referenced before it is defined",
)
):
return ColumnNotFoundError.from_available_column_names(
available_columns=frame.columns
)
# Just return exception as-is.
return exception
38 changes: 35 additions & 3 deletions narwhals/_spark_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from narwhals._namespace import is_native_spark_like
from narwhals._spark_like.utils import (
catch_pyspark_connect_exception,
catch_pyspark_sql_exception,
evaluate_exprs,
import_functions,
import_native_dtypes,
Expand Down Expand Up @@ -200,7 +202,7 @@ def columns(self) -> list[str]:
)
return self._cached_columns

def collect(
def _collect(
self, backend: ModuleType | Implementation | str | None, **kwargs: Any
) -> CompliantDataFrameAny:
if backend is Implementation.PANDAS:
Expand Down Expand Up @@ -238,29 +240,59 @@ def collect(
msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
raise ValueError(msg) # pragma: no cover

def collect(
self, backend: ModuleType | Implementation | str | None, **kwargs: Any
) -> CompliantDataFrameAny:
if self._implementation.is_pyspark_connect():
try:
return self._collect(backend, **kwargs)
except Exception as e: # noqa: BLE001
raise catch_pyspark_connect_exception(e) from None
return self._collect(backend, **kwargs)

def simple_select(self, *column_names: str) -> Self:
return self._with_native(self.native.select(*column_names))

def aggregate(self, *exprs: SparkLikeExpr) -> Self:
new_columns = evaluate_exprs(self, *exprs)

new_columns_list = [col.alias(col_name) for col_name, col in new_columns]
if self._implementation.is_pyspark():
try:
return self._with_native(self.native.agg(*new_columns_list))
except Exception as e: # noqa: BLE001
raise catch_pyspark_sql_exception(e, self) from None
return self._with_native(self.native.agg(*new_columns_list))

def select(self, *exprs: SparkLikeExpr) -> Self:
new_columns = evaluate_exprs(self, *exprs)
new_columns_list = [col.alias(col_name) for (col_name, col) in new_columns]
if self._implementation.is_pyspark(): # pragma: no cover
try:
return self._with_native(self.native.select(*new_columns_list))
except Exception as e: # noqa: BLE001
raise catch_pyspark_sql_exception(e, self) from None
return self._with_native(self.native.select(*new_columns_list))

def with_columns(self, *exprs: SparkLikeExpr) -> Self:
new_columns = evaluate_exprs(self, *exprs)
if self._implementation.is_pyspark(): # pragma: no cover
try:
return self._with_native(self.native.withColumns(dict(new_columns)))
except Exception as e: # noqa: BLE001
raise catch_pyspark_sql_exception(e, self) from None

return self._with_native(self.native.withColumns(dict(new_columns)))

def filter(self, predicate: SparkLikeExpr) -> Self:
# `[0]` is safe as the predicate's expression only returns a single column
condition = predicate._call(self)[0]
spark_df = self.native.where(condition)
return self._with_native(spark_df)
if self._implementation.is_pyspark():
try:
return self._with_native(self.native.where(condition))
except Exception as e: # noqa: BLE001
raise catch_pyspark_sql_exception(e, self) from None
return self._with_native(self.native.where(condition))

@property
def schema(self) -> dict[str, DType]:
Expand Down
31 changes: 30 additions & 1 deletion narwhals/_spark_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import TYPE_CHECKING, Any, overload

from narwhals._utils import Implementation, isinstance_or_issubclass
from narwhals.exceptions import UnsupportedDTypeError
from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError

if TYPE_CHECKING:
from types import ModuleType
Expand All @@ -16,6 +16,7 @@
from sqlframe.base.session import _BaseSession as Session
from typing_extensions import TypeAlias

from narwhals._compliant.typing import CompliantLazyFrameAny
from narwhals._spark_like.dataframe import SparkLikeLazyFrame
from narwhals._spark_like.expr import SparkLikeExpr
from narwhals._utils import Version
Expand Down Expand Up @@ -292,3 +293,31 @@ def true_divide(F: Any, left: Column, right: Column) -> Column: # noqa: N803
# PySpark before 3.5 doesn't have `try_divide`, SQLFrame doesn't have it.
divide = getattr(F, "try_divide", operator.truediv)
return divide(left, right)


def catch_pyspark_sql_exception(
exception: Exception, frame: CompliantLazyFrameAny, /
) -> ColumnNotFoundError | Exception: # pragma: no cover
from pyspark.errors import AnalysisException

if isinstance(exception, AnalysisException) and str(exception).startswith(
"[UNRESOLVED_COLUMN.WITH_SUGGESTION]"
):
return ColumnNotFoundError.from_available_column_names(
available_columns=frame.columns
)
# Just return exception as-is.
return exception


def catch_pyspark_connect_exception(
exception: Exception, /
) -> ColumnNotFoundError | Exception: # pragma: no cover
from pyspark.errors.exceptions.connect import AnalysisException

if isinstance(exception, AnalysisException) and str(exception).startswith(
"[UNRESOLVED_COLUMN.WITH_SUGGESTION]"
):
return ColumnNotFoundError(str(exception))
# Just return exception as-is.
return exception
10 changes: 10 additions & 0 deletions narwhals/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ def from_missing_and_available_column_names(
)
return ColumnNotFoundError(message)

@classmethod
def from_available_column_names(
cls, available_columns: Collection[str]
) -> ColumnNotFoundError:
message = (
"The selected columns were not found."
f"\n\nHint: Did you mean one of these columns: {list(available_columns)}?"
)
return ColumnNotFoundError(message)


class ComputeError(NarwhalsError):
"""Exception raised when the underlying computation could not be evaluated."""
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from narwhals._spark_like.dataframe import SQLFrameDataFrame
from narwhals.typing import NativeFrame, NativeLazyFrame
from tests.utils import Constructor, ConstructorEager
from tests.utils import Constructor, ConstructorEager, ConstructorLazy

Data: TypeAlias = "dict[str, list[Any]]"

Expand Down Expand Up @@ -246,7 +246,7 @@ def ibis_lazy_constructor(obj: Data) -> ibis.Table: # pragma: no cover
"cudf": cudf_constructor,
"polars[eager]": polars_eager_constructor,
}
LAZY_CONSTRUCTORS: dict[str, Constructor] = {
LAZY_CONSTRUCTORS: dict[str, ConstructorLazy] = {
"dask": dask_lazy_p2_constructor,
"polars[lazy]": polars_lazy_constructor,
"duckdb": duckdb_lazy_constructor,
Expand Down
24 changes: 15 additions & 9 deletions tests/frame/drop_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,9 @@ def test_drop(constructor: Constructor, to_drop: list[str], expected: list[str])
assert df.drop(*to_drop).collect_schema().names() == expected


@pytest.mark.parametrize(
("strict", "context"),
[(True, pytest.raises(ColumnNotFoundError, match="z")), (False, does_not_raise())],
)
@pytest.mark.parametrize("strict", [True, False])
def test_drop_strict(
request: pytest.FixtureRequest,
constructor: Constructor,
context: Any,
*,
strict: bool,
request: pytest.FixtureRequest, constructor: Constructor, *, strict: bool
) -> None:
if "polars_lazy" in str(constructor) and POLARS_VERSION < (1, 0, 0) and strict:
request.applymarker(pytest.mark.xfail)
Expand All @@ -44,6 +37,19 @@ def test_drop_strict(

df = nw.from_native(constructor(data))

context: Any
if strict:
if "polars_lazy" in str(constructor):
msg = r"^(\"z\"|z)"
else:
msg = (
r"The following columns were not found: \[.*\]"
r"\n\nHint: Did you mean one of these columns: \['a', 'b'\]?"
)
Comment on lines +45 to +48
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use parse_columns_to_drop and therefore raise our ColumnNotFoundError.from_missing_and_available_column_names(missing_columns=missing_columns, available_columns=cols) for every backend

context = pytest.raises(ColumnNotFoundError, match=msg)
else:
context = does_not_raise()

with context:
names_out = df.drop(to_drop, strict=strict).collect_schema().names()
assert names_out == ["b"]
29 changes: 28 additions & 1 deletion tests/frame/filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

import narwhals as nw
from narwhals.exceptions import InvalidOperationError
from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
from tests.utils import Constructor, ConstructorEager, assert_equal_data


Expand Down Expand Up @@ -65,3 +65,30 @@ def test_filter_with_constrains(constructor: Constructor) -> None:
expected_expr = {"a": [1, 2], "b": [4, 6]}

assert_equal_data(result_expr, expected_expr)


def test_filter_missing_column(
constructor: Constructor, request: pytest.FixtureRequest
) -> None:
constructor_id = str(request.node.callspec.id)
if any(id_ == constructor_id for id_ in ("sqlframe", "pyspark[connect]", "ibis")):
request.applymarker(pytest.mark.xfail)
data = {"a": [1, 2], "b": [3, 4]}
df = nw.from_native(constructor(data))

if "polars" in str(constructor):
msg = r"^unable to find column \"c\"; valid columns: \[\"a\", \"b\"\]"
elif any(id_ == constructor_id for id_ in ("duckdb", "pyspark")):
msg = r"\n\nHint: Did you mean one of these columns: \['a', 'b'\]?"
else:
msg = (
r"The following columns were not found: \[.*\]"
r"\n\nHint: Did you mean one of these columns: \['a', 'b'\]?"
)

if "polars_lazy" in str(constructor) and isinstance(df, nw.LazyFrame):
with pytest.raises(ColumnNotFoundError, match=msg):
df.filter(c=5).collect()
else:
with pytest.raises(ColumnNotFoundError, match=msg):
df.filter(c=5)
Loading
Loading