diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index a6f8666f06..2c6c575fd4 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -16,12 +16,13 @@ NativeFrameT_co, NativeSeriesT, ) +from narwhals._expression_parsing import is_expr, is_series from narwhals._utils import ( exclude_column_names, get_column_names, passthrough_column_names, ) -from narwhals.dependencies import is_numpy_array_2d +from narwhals.dependencies import is_numpy_array, is_numpy_array_2d if TYPE_CHECKING: from collections.abc import Container, Iterable, Sequence @@ -31,12 +32,15 @@ from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals._compliant.when_then import CompliantWhen, EagerWhen from narwhals._utils import Implementation, Version + from narwhals.expr import Expr + from narwhals.series import Series from narwhals.typing import ( ConcatMethod, Into1DArray, IntoDType, IntoSchema, NonNestedLiteral, + _1DArray, _2DArray, ) @@ -57,6 +61,16 @@ class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]): @property def _expr(self) -> type[CompliantExprT]: ... + def parse_into_expr( + self, data: Expr | NonNestedLiteral | Any, /, *, str_as_lit: bool + ) -> CompliantExprT | NonNestedLiteral: + if is_expr(data): + expr = data._to_compliant_expr(self) + assert isinstance(expr, self._expr) # noqa: S101 + return expr + if isinstance(data, str) and not str_as_lit: + return self.col(data) + return data # NOTE: `polars` def all(self) -> CompliantExprT: @@ -171,6 +185,21 @@ def from_native( msg = f"Unsupported type: {type(data).__name__!r}" raise TypeError(msg) + def parse_into_expr( + self, + data: Expr | Series[NativeSeriesT] | _1DArray | NonNestedLiteral, + /, + *, + str_as_lit: bool, + ) -> EagerExprT | NonNestedLiteral: + if not (is_series(data) or is_numpy_array(data)): + return super().parse_into_expr(data, str_as_lit=str_as_lit) + return self._expr._from_series( + data._compliant_series + if is_series(data) + else self._series.from_numpy(data, context=self) + ) + @overload def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT: ... diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 41c7539f0f..561ff3de6d 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -37,7 +37,6 @@ from typing_extensions import NotRequired, Self, TypedDict from narwhals._compliant.dataframe import CompliantDataFrame - from narwhals._compliant.expr import CompliantExpr, EagerExpr from narwhals._compliant.namespace import EagerNamespace from narwhals._utils import Implementation, Version, _LimitedContext from narwhals.dtypes import DType @@ -94,7 +93,6 @@ def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Sel def to_narwhals(self) -> Series[NativeSeriesT]: return self._version.series(self, level="full") - def _to_expr(self) -> CompliantExpr[Any, Self]: ... def _with_native(self, series: Any) -> Self: ... def _with_version(self, version: Version) -> Self: ... @@ -245,9 +243,6 @@ def __narwhals_namespace__( self, ) -> EagerNamespace[Any, Self, Any, Any, NativeSeriesT]: ... - def _to_expr(self) -> EagerExpr[Any, Any]: - return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] - def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ... def _gather_slice(self, rows: _SliceIndex | range) -> Self: ... def __getitem__(self, item: MultiIndexSelector[Self]) -> Self: diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index e4ad3c5893..b5cbd85f6b 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -6,10 +6,10 @@ from enum import Enum, auto from itertools import chain -from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast +from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar from narwhals._utils import is_compliant_expr, zip_strict -from narwhals.dependencies import is_narwhals_series, is_numpy_array +from narwhals.dependencies import is_narwhals_series, is_numpy_array, is_numpy_array_1d from narwhals.exceptions import InvalidOperationError, MultiOutputExpressionError if TYPE_CHECKING: @@ -23,7 +23,6 @@ CompliantExprAny, CompliantFrameAny, CompliantNamespaceAny, - EagerNamespaceAny, EvalNames, ) from narwhals.expr import Expr @@ -47,6 +46,13 @@ def is_series(obj: Any) -> TypeIs[Series[Any]]: return isinstance(obj, Series) +def is_into_expr_eager(obj: Any) -> TypeIs[Expr | Series[Any] | str | _1DArray]: + from narwhals.expr import Expr + from narwhals.series import Series + + return isinstance(obj, (Series, Expr, str)) or is_numpy_array_1d(obj) + + def combine_evaluate_output_names( *exprs: CompliantExpr[CompliantFrameT, Any], ) -> EvalNames[CompliantFrameT]: @@ -74,24 +80,6 @@ def alias_output_names(names: Sequence[str]) -> Sequence[str]: return alias_output_names -def extract_compliant( - plx: CompliantNamespaceAny, - other: IntoExpr | NonNestedLiteral | _1DArray, - *, - str_as_lit: bool, -) -> CompliantExprAny | NonNestedLiteral: - if is_expr(other): - return other._to_compliant_expr(plx) - if isinstance(other, str) and not str_as_lit: - return plx.col(other) - if is_narwhals_series(other): - return other._compliant_series._to_expr() - if is_numpy_array(other): - ns = cast("EagerNamespaceAny", plx) - return ns._series.from_numpy(other, context=ns)._to_expr() - return other - - def evaluate_output_names_and_aliases( expr: CompliantExprAny, df: CompliantFrameAny, exclude: Sequence[str] ) -> tuple[Sequence[str], Sequence[str]]: @@ -610,10 +598,8 @@ def apply_n_ary_operation( *comparands: IntoExpr | NonNestedLiteral | _1DArray, str_as_lit: bool, ) -> CompliantExprAny: - compliant_exprs = ( - extract_compliant(plx, comparand, str_as_lit=str_as_lit) - for comparand in comparands - ) + parse = plx.parse_into_expr + compliant_exprs = (parse(into, str_as_lit=str_as_lit) for into in comparands) kinds = [ ExprKind.from_into_expr(comparand, str_as_lit=str_as_lit) for comparand in comparands diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 6645aea3f4..51a5646004 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -82,10 +82,6 @@ def __repr__(self) -> str: # pragma: no cover def _with_native(self, expr: pl.Expr) -> Self: return self.__class__(expr, self._version) - @classmethod - def _from_series(cls, series: Any) -> Self: - return cls(series.native, series._version) - def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: # Let Polars do its thing. return self diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 3de70c633e..e41d3c9ae8 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -5,6 +5,7 @@ import polars as pl +from narwhals._expression_parsing import is_expr, is_series from narwhals._polars.expr import PolarsExpr from narwhals._polars.series import PolarsSeries from narwhals._polars.utils import extract_args_kwargs, narwhals_to_native_dtype @@ -20,7 +21,17 @@ from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame from narwhals._polars.typing import FrameT from narwhals._utils import Version, _LimitedContext - from narwhals.typing import Into1DArray, IntoDType, IntoSchema, TimeUnit, _2DArray + from narwhals.expr import Expr + from narwhals.series import Series + from narwhals.typing import ( + Into1DArray, + IntoDType, + IntoSchema, + NonNestedLiteral, + TimeUnit, + _1DArray, + _2DArray, + ) class PolarsNamespace: @@ -70,6 +81,25 @@ def _expr(self) -> type[PolarsExpr]: def _series(self) -> type[PolarsSeries]: return PolarsSeries + def parse_into_expr( + self, + data: Expr | NonNestedLiteral | Series[pl.Series] | _1DArray, + /, + *, + str_as_lit: bool, + ) -> PolarsExpr | None: + if data is None: + # NOTE: To avoid `pl.lit(None)` failing this `None` check + # https://github.com/pola-rs/polars/blob/58dd8e5770f16a9bef9009a1c05f00e15a5263c7/py-polars/polars/expr/expr.py#L2870-L2872 + return data + if is_expr(data): + expr = data._to_compliant_expr(self) + assert isinstance(expr, self._expr) # noqa: S101 + return expr + if isinstance(data, str) and not str_as_lit: + return self.col(data) + return self.lit(data.to_native() if is_series(data) else data, None) + @overload def from_native(self, data: pl.DataFrame, /) -> PolarsDataFrame: ... @overload diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 5568cc8722..9ac10304dc 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -33,7 +33,6 @@ from typing_extensions import Self, TypeAlias, TypeIs from narwhals._polars.dataframe import Method, PolarsDataFrame - from narwhals._polars.expr import PolarsExpr from narwhals._polars.namespace import PolarsNamespace from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType @@ -236,9 +235,6 @@ def _from_native_object( # scalar return series - def _to_expr(self) -> PolarsExpr: - return self.__narwhals_namespace__()._expr._from_series(self) - def __getattr__(self, attr: str) -> Any: if attr not in INHERITED_METHODS: msg = f"{self.__class__.__name__} has not attribute '{attr}'." @@ -509,7 +505,9 @@ def is_close( if self._backend_version < (1, 32, 0): name = self.name ns = self.__narwhals_namespace__() - other_expr = other._to_expr() if isinstance(other, PolarsSeries) else other + other_expr = ( + ns.lit(other.native, None) if isinstance(other, PolarsSeries) else other + ) expr = ns.col(name).is_close( other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index b47710e843..0519dab653 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -20,6 +20,7 @@ ExprKind, all_exprs_are_scalar_like, check_expressions_preserve_length, + is_into_expr_eager, is_scalar_like, ) from narwhals._typing import Arrow, Pandas, _LazyAllowedImpl, _LazyFrameCollectImpl @@ -42,12 +43,7 @@ supports_arrow_c_stream, zip_strict, ) -from narwhals.dependencies import ( - get_polars, - is_numpy_array, - is_numpy_array_2d, - is_pyarrow_table, -) +from narwhals.dependencies import is_numpy_array_2d, is_pyarrow_table from narwhals.exceptions import ( ColumnNotFoundError, InvalidIntoExprError, @@ -451,26 +447,9 @@ class DataFrame(BaseFrame[DataFrameT]): _version: ClassVar[Version] = Version.MAIN def _extract_compliant(self, arg: Any) -> Any: - from narwhals.expr import Expr - from narwhals.series import Series - - plx: EagerNamespaceAny = self.__narwhals_namespace__() - if isinstance(arg, Series): - return arg._compliant_series._to_expr() - if isinstance(arg, Expr): - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if isinstance(arg, str): - return plx.col(arg) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) - if is_numpy_array(arg): - return plx._series.from_numpy(arg, context=plx)._to_expr() + if is_into_expr_eager(arg): + plx: EagerNamespaceAny = self.__narwhals_namespace__() + return plx.parse_into_expr(arg, str_as_lit=False) raise InvalidIntoExprError.from_invalid_type(type(arg)) @property @@ -2304,43 +2283,33 @@ def _extract_compliant(self, arg: Any) -> Any: if isinstance(arg, Series): # pragma: no cover msg = "Binary operations between Series and LazyFrame are not supported." raise TypeError(msg) - if isinstance(arg, str): # pragma: no cover - plx = self.__narwhals_namespace__() - return plx.col(arg) - if isinstance(arg, Expr): - if arg._metadata.n_orderable_ops: - msg = ( - "Order-dependent expressions are not supported for use in LazyFrame.\n\n" - "Hint: To make the expression valid, use `.over` with `order_by` specified.\n\n" - "For example, if you wrote `nw.col('price').cum_sum()` and you have a column\n" - "`'date'` which orders your data, then replace:\n\n" - " nw.col('price').cum_sum()\n\n" - " with:\n\n" - " nw.col('price').cum_sum().over(order_by='date')\n" - " ^^^^^^^^^^^^^^^^^^^^^^\n\n" - "See https://narwhals-dev.github.io/narwhals/concepts/order_dependence/." - ) - raise InvalidOperationError(msg) - if arg._metadata.is_filtration: - msg = ( - "Length-changing expressions are not supported for use in LazyFrame, unless\n" - "followed by an aggregation.\n\n" - "Hints:\n" - "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n" - "- Instead of `lf.select(nw.col('a').drop_nulls()).select(nw.sum('a'))`,\n" - " use `lf.select(nw.col('a').drop_nulls().sum())\n" - ) - raise InvalidOperationError(msg) - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) - raise InvalidIntoExprError.from_invalid_type(type(arg)) # pragma: no cover + if isinstance(arg, (Expr, str)): + if isinstance(arg, Expr): + if arg._metadata.n_orderable_ops: + msg = ( + "Order-dependent expressions are not supported for use in LazyFrame.\n\n" + "Hint: To make the expression valid, use `.over` with `order_by` specified.\n\n" + "For example, if you wrote `nw.col('price').cum_sum()` and you have a column\n" + "`'date'` which orders your data, then replace:\n\n" + " nw.col('price').cum_sum()\n\n" + " with:\n\n" + " nw.col('price').cum_sum().over(order_by='date')\n" + " ^^^^^^^^^^^^^^^^^^^^^^\n\n" + "See https://narwhals-dev.github.io/narwhals/concepts/order_dependence/." + ) + raise InvalidOperationError(msg) + if arg._metadata.is_filtration: + msg = ( + "Length-changing expressions are not supported for use in LazyFrame, unless\n" + "followed by an aggregation.\n\n" + "Hints:\n" + "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n" + "- Instead of `lf.select(nw.col('a').drop_nulls()).select(nw.sum('a'))`,\n" + " use `lf.select(nw.col('a').drop_nulls().sum())\n" + ) + raise InvalidOperationError(msg) + return self.__narwhals_namespace__().parse_into_expr(arg, str_as_lit=False) + raise InvalidIntoExprError.from_invalid_type(type(arg)) @property def _dataframe(self) -> type[DataFrame[Any]]: diff --git a/narwhals/expr.py b/narwhals/expr.py index 9b2991d797..e6d1074f65 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -9,7 +9,6 @@ ExprMetadata, apply_n_ary_operation, combine_metadata, - extract_compliant, ) from narwhals._utils import _validate_rolling_arguments, ensure_type, flatten from narwhals.dtypes import _validate_dtype @@ -1194,7 +1193,7 @@ def fill_null( return self.__class__( lambda plx: self._to_compliant_expr(plx).fill_null( - value=extract_compliant(plx, value, str_as_lit=True), + value=plx.parse_into_expr(value, str_as_lit=True), strategy=strategy, limit=limit, ), diff --git a/narwhals/functions.py b/narwhals/functions.py index 1b9cc3b264..f4fad9ed67 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -11,7 +11,6 @@ ExprMetadata, apply_n_ary_operation, combine_metadata, - extract_compliant, is_scalar_like, ) from narwhals._utils import ( @@ -1410,7 +1409,7 @@ def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr: def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]: compliant_expr = self._to_compliant_expr(plx) - compliant_value = extract_compliant(plx, value, str_as_lit=False) + compliant_value = plx.parse_into_expr(value, str_as_lit=False) if ( not self._metadata.is_scalar_like and is_scalar_like(kind) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index b8abd4cc92..923a387288 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -22,7 +22,6 @@ validate_strict_and_pass_though, ) from narwhals.dataframe import DataFrame as NwDataFrame, LazyFrame as NwLazyFrame -from narwhals.dependencies import get_polars from narwhals.exceptions import InvalidIntoExprError from narwhals.expr import Expr as NwExpr from narwhals.functions import _new_series_impl, concat, show_versions @@ -235,28 +234,16 @@ def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame def _extract_compliant(self, arg: Any) -> Any: - # After v1, we raise when passing order-dependent or length-changing - # expressions to LazyFrame + # After v1, we raise when passing order-dependent, length-changing, + # or filtration expressions to LazyFrame from narwhals.expr import Expr from narwhals.series import Series if isinstance(arg, Series): # pragma: no cover msg = "Mixing Series with LazyFrame is not supported." raise TypeError(msg) - if isinstance(arg, Expr): - # After stable.v1, we raise for order-dependent exprs or filtrations - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if isinstance(arg, str): - plx = self.__narwhals_namespace__() - return plx.col(arg) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) + if isinstance(arg, (Expr, str)): + return self.__narwhals_namespace__().parse_into_expr(arg, str_as_lit=False) raise InvalidIntoExprError.from_invalid_type(type(arg)) def collect(