diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 324a160824..a0c56783a2 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -105,8 +105,6 @@ def __narwhals_namespace__(self) -> ArrowNamespace: return ArrowNamespace(version=self._version) - def __narwhals_expr__(self) -> None: ... - def _reuse_series_extra_kwargs( self, *, returns_scalar: bool = False ) -> dict[str, Any]: diff --git a/narwhals/_compliant/dataframe.py b/narwhals/_compliant/dataframe.py index e6d88f3c11..36c70da8d9 100644 --- a/narwhals/_compliant/dataframe.py +++ b/narwhals/_compliant/dataframe.py @@ -111,12 +111,6 @@ def native(self) -> _NativeFrameT: @property def schema(self) -> Mapping[str, DType]: ... - def aggregate(self, *exprs: CompliantExprT_contra) -> Self: - """`select` where all args are aggregations or literals. - - (so, no broadcasting is necessary). - """ - ... def collect_schema(self) -> Mapping[str, DType]: ... def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ... @@ -211,9 +205,6 @@ def __getitem__( MultiColSelector[CompliantSeriesT], ], ) -> Self: ... - def aggregate(self, *exprs: CompliantExprT_contra) -> Self: - # NOTE: Ignore is to avoid an intermittent false positive - return self.select(*exprs) # pyright: ignore[reportArgumentType] @property def shape(self) -> tuple[int, int]: ... @@ -289,7 +280,15 @@ class CompliantLazyFrame( Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co], ): def __narwhals_lazyframe__(self) -> Self: ... + # `LazySelectorNamespace._iter_columns` depends def _iter_columns(self) -> Iterator[Any]: ... + def aggregate(self, *exprs: CompliantExprT_contra) -> Self: + """`select` where all args are aggregations or literals. + + (so, no broadcasting is necessary). + """ + ... + def collect( self, backend: _EagerAllowedImpl | None, **kwargs: Any ) -> CompliantDataFrameAny: ... @@ -317,6 +316,12 @@ def __narwhals_namespace__( def to_narwhals(self) -> DataFrame[NativeDataFrameT]: return self._version.dataframe(self, level="full") + def aggregate(self, *exprs: EagerExprT) -> Self: + # NOTE: Ignore intermittent [False Negative] + # Argument of type "EagerExprT@EagerDataFrame" cannot be assigned to parameter "exprs" of type "EagerExprT@EagerDataFrame" in function "select" + # Type "EagerExprT@EagerDataFrame" is not assignable to type "EagerExprT@EagerDataFrame" + return self.select(*exprs) # pyright: ignore[reportArgumentType] + def _with_native( self, df: NativeDataFrameT, *, validate_column_names: bool = True ) -> Self: ... @@ -331,7 +336,9 @@ def _evaluate_expr(self, expr: EagerExprT, /) -> EagerSeriesT: return result[0] def _evaluate_into_exprs(self, *exprs: EagerExprT) -> Sequence[EagerSeriesT]: - # NOTE: Ignore is to avoid an intermittent false positive + # NOTE: Ignore intermittent [False Negative] + # Argument of type "EagerExprT@EagerDataFrame" cannot be assigned to parameter "expr" of type "EagerExprT@EagerDataFrame" in function "_evaluate_into_expr" + # Type "EagerExprT@EagerDataFrame" is not assignable to type "EagerExprT@EagerDataFrame" return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType] def _evaluate_into_expr(self, expr: EagerExprT, /) -> Sequence[EagerSeriesT]: diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 905ef8526c..863bd8e6a9 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -81,6 +81,7 @@ def __ne__(self, value: Any, /) -> Self: ... # type: ignore[override] class CompliantExpr( CompliantColumn, Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co] ): + # NOTE: `narwhals` _implementation: Implementation _evaluate_output_names: EvalNames[CompliantFrameT] _alias_output_names: AliasNames | None @@ -89,9 +90,15 @@ class CompliantExpr( def __call__( self, df: CompliantFrameT ) -> Sequence[CompliantSeriesOrNativeExprT_co]: ... - def __narwhals_expr__(self) -> None: ... + def __narwhals_expr__(self) -> Self: # pragma: no cover + return self + def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ... @classmethod + def from_column_indices( + cls, *column_indices: int, context: _LimitedContext + ) -> Self: ... + @classmethod def from_column_names( cls, evaluate_column_names: EvalNames[CompliantFrameT], @@ -99,9 +106,8 @@ def from_column_names( *, context: _LimitedContext, ) -> Self: ... - @classmethod - def from_column_indices( - cls, *column_indices: int, context: _LimitedContext + def broadcast( + self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] ) -> Self: ... @staticmethod def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[CompliantFrameT]: @@ -111,6 +117,7 @@ def fn(df: CompliantFrameT) -> Sequence[str]: return fn + # NOTE: `polars` def all(self) -> Self: ... def any(self) -> Self: ... def count(self) -> Self: ... @@ -137,35 +144,24 @@ def map_batches( *, returns_scalar: bool, ) -> Self: ... - def broadcast( - self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL] - ) -> Self: ... - def _is_multi_output_unnamed(self) -> bool: - """Return `True` for multi-output aggregations without names. - - For example, column `'a'` only appears in the output as a grouping key: - - df.group_by('a').agg(nw.all().sum()) + @property + def name(self) -> NameNamespace[Self]: ... - It does not get included in: - nw.all().sum(). - """ - assert self._metadata is not None # noqa: S101 - return self._metadata.expansion_kind.is_multi_unnamed() - - def _evaluate_aliases( - self: CompliantExpr[CompliantFrameT, Any], frame: CompliantFrameT, / - ) -> Sequence[str]: - names = self._evaluate_output_names(frame) +class ImplExpr( + CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co], + Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co], +): + def _evaluate_aliases(self, frame: CompliantFrameT, /) -> Sequence[str]: + # NOTE: Ignore intermittent [False Negative] + # Argument of type "CompliantFrameT@ImplExpr" cannot be assigned to parameter of type "CompliantFrameT@ImplExpr" + # Type "CompliantFrameT@ImplExpr" is not assignable to type "CompliantFrameT@ImplExpr" + names = self._evaluate_output_names(frame) # pyright: ignore[reportArgumentType] return alias(names) if (alias := self._alias_output_names) else names - @property - def name(self) -> NameNamespace[Self]: ... - class DepthTrackingExpr( - CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co], + ImplExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co], Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co], ): _depth: int @@ -228,8 +224,6 @@ def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]: def __narwhals_namespace__( self, ) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self, Any, Any]: ... - def __narwhals_expr__(self) -> None: ... - @classmethod def _from_callable( cls, @@ -888,8 +882,7 @@ def struct(self) -> EagerExprStructNamespace[Self]: # mypy thinks `NativeExprT` should be covariant, pyright thinks it should be invariant class LazyExpr( # type: ignore[misc] - CompliantExpr[CompliantLazyFrameT, NativeExprT], - Protocol[CompliantLazyFrameT, NativeExprT], + ImplExpr[CompliantLazyFrameT, NativeExprT], Protocol[CompliantLazyFrameT, NativeExprT] ): def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: ... def alias(self, name: str) -> Self: diff --git a/narwhals/_compliant/group_by.py b/narwhals/_compliant/group_by.py index 47a7ba434a..f9529cd442 100644 --- a/narwhals/_compliant/group_by.py +++ b/narwhals/_compliant/group_by.py @@ -13,6 +13,7 @@ DepthTrackingExprAny, DepthTrackingExprT_contra, EagerExprT_contra, + ImplExprT_contra, NarwhalsAggregation, ) from narwhals._utils import is_sequence_of, zip_strict @@ -20,7 +21,7 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, Sequence - from narwhals._compliant.expr import CompliantExpr + from narwhals._compliant.expr import ImplExpr __all__ = ["CompliantGroupBy", "DepthTrackingGroupBy", "EagerGroupBy"] @@ -34,7 +35,7 @@ def _evaluate_aliases( - frame: CompliantFrameT, exprs: Iterable[CompliantExpr[CompliantFrameT, Any]], / + frame: CompliantFrameT, exprs: Iterable[ImplExpr[CompliantFrameT, Any]], / ) -> list[str]: it = (expr._evaluate_aliases(frame) for expr in exprs) return list(chain.from_iterable(it)) @@ -67,13 +68,13 @@ def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ... class ParseKeysGroupBy( - CompliantGroupBy[CompliantFrameT, CompliantExprT_contra], - Protocol[CompliantFrameT, CompliantExprT_contra], + CompliantGroupBy[CompliantFrameT, ImplExprT_contra], + Protocol[CompliantFrameT, ImplExprT_contra], ): def _parse_keys( self, compliant_frame: CompliantFrameT, - keys: Sequence[CompliantExprT_contra] | Sequence[str], + keys: Sequence[ImplExprT_contra] | Sequence[str], ) -> tuple[CompliantFrameT, list[str], list[str]]: if is_sequence_of(keys, str): keys_str = list(keys) @@ -82,7 +83,7 @@ def _parse_keys( @staticmethod def _parse_expr_keys( - compliant_frame: CompliantFrameT, keys: Sequence[CompliantExprT_contra] + compliant_frame: CompliantFrameT, keys: Sequence[ImplExprT_contra] ) -> tuple[CompliantFrameT, list[str], list[str]]: """Parses key expressions to set up `.agg` operation with correct information. diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index 83a9ecf0eb..2c6c575fd4 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -16,12 +16,13 @@ NativeFrameT_co, NativeSeriesT, ) +from narwhals._expression_parsing import is_expr, is_series from narwhals._utils import ( exclude_column_names, get_column_names, passthrough_column_names, ) -from narwhals.dependencies import is_numpy_array_2d +from narwhals.dependencies import is_numpy_array, is_numpy_array_2d if TYPE_CHECKING: from collections.abc import Container, Iterable, Sequence @@ -31,12 +32,15 @@ from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals._compliant.when_then import CompliantWhen, EagerWhen from narwhals._utils import Implementation, Version + from narwhals.expr import Expr + from narwhals.series import Series from narwhals.typing import ( ConcatMethod, Into1DArray, IntoDType, IntoSchema, NonNestedLiteral, + _1DArray, _2DArray, ) @@ -51,9 +55,24 @@ class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]): + # NOTE: `narwhals` _implementation: Implementation _version: Version + @property + def _expr(self) -> type[CompliantExprT]: ... + def parse_into_expr( + self, data: Expr | NonNestedLiteral | Any, /, *, str_as_lit: bool + ) -> CompliantExprT | NonNestedLiteral: + if is_expr(data): + expr = data._to_compliant_expr(self) + assert isinstance(expr, self._expr) # noqa: S101 + return expr + if isinstance(data, str) and not str_as_lit: + return self.col(data) + return data + + # NOTE: `polars` def all(self) -> CompliantExprT: return self._expr.from_column_names(get_column_names, context=self) @@ -93,8 +112,6 @@ def concat_str( ) -> CompliantExprT: ... @property def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... - @property - def _expr(self) -> type[CompliantExprT]: ... def coalesce(self, *exprs: CompliantExprT) -> CompliantExprT: ... @@ -168,6 +185,21 @@ def from_native( msg = f"Unsupported type: {type(data).__name__!r}" raise TypeError(msg) + def parse_into_expr( + self, + data: Expr | Series[NativeSeriesT] | _1DArray | NonNestedLiteral, + /, + *, + str_as_lit: bool, + ) -> EagerExprT | NonNestedLiteral: + if not (is_series(data) or is_numpy_array(data)): + return super().parse_into_expr(data, str_as_lit=str_as_lit) + return self._expr._from_series( + data._compliant_series + if is_series(data) + else self._series.from_numpy(data, context=self) + ) + @overload def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT: ... diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 4f892134c4..515d0ccd15 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -56,9 +56,12 @@ class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]): + # NOTE: `narwhals` _implementation: Implementation _version: Version + @property + def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ... @classmethod def from_namespace(cls, context: _LimitedContext, /) -> Self: obj = cls.__new__(cls) @@ -66,17 +69,11 @@ def from_namespace(cls, context: _LimitedContext, /) -> Self: obj._version = context._version return obj - @property - def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ... - def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ... - def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: ... - def _iter_columns_dtypes( self, df: FrameT, / ) -> Iterator[tuple[SeriesOrExprT, DType]]: ... - def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesOrExprT, str]]: yield from zip_strict(self._iter_columns(df), df.columns) @@ -93,6 +90,7 @@ def names(df: FrameT) -> Sequence[str]: return self._selector.from_callables(series, names, context=self) + # NOTE: `polars` def by_dtype( self, dtypes: Collection[DType | type[DType]] ) -> CompliantSelector[FrameT, SeriesOrExprT]: diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index b49157e1f7..561ff3de6d 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -37,7 +37,6 @@ from typing_extensions import NotRequired, Self, TypedDict from narwhals._compliant.dataframe import CompliantDataFrame - from narwhals._compliant.expr import CompliantExpr, EagerExpr from narwhals._compliant.namespace import EagerNamespace from narwhals._utils import Implementation, Version, _LimitedContext from narwhals.dtypes import DType @@ -80,18 +79,28 @@ class CompliantSeries( CompliantColumn, Protocol[NativeSeriesT], ): + # NOTE: `narwhals` _implementation: Implementation - @property - def dtype(self) -> DType: ... - @property - def name(self) -> str: ... @property def native(self) -> NativeSeriesT: ... def __narwhals_series__(self) -> Self: return self def __native_namespace__(self) -> ModuleType: ... + @classmethod + def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Self: ... + def to_narwhals(self) -> Series[NativeSeriesT]: + return self._version.series(self, level="full") + + def _with_native(self, series: Any) -> Self: ... + def _with_version(self, version: Version) -> Self: ... + + # NOTE: `polars` + @property + def dtype(self) -> DType: ... + @property + def name(self) -> str: ... def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ... def __contains__(self, other: Any) -> bool: ... def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ... @@ -99,11 +108,6 @@ def __iter__(self) -> Iterator[Any]: ... def __len__(self) -> int: return len(self.native) - def _with_native(self, series: Any) -> Self: ... - def _with_version(self, version: Version) -> Self: ... - def _to_expr(self) -> CompliantExpr[Any, Self]: ... - @classmethod - def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Self: ... @classmethod def from_numpy(cls, data: Into1DArray, /, *, context: _LimitedContext) -> Self: ... @classmethod @@ -116,9 +120,6 @@ def from_iterable( name: str = "", dtype: IntoDType | None = None, ) -> Self: ... - def to_narwhals(self) -> Series[NativeSeriesT]: - return self._version.series(self, level="full") - def __radd__(self, other: Any) -> Self: ... def __rand__(self, other: Any) -> Self: ... def __rmul__(self, other: Any) -> Self: ... @@ -177,6 +178,8 @@ def value_counts( ) -> CompliantDataFrame[Self, Any, Any, Any]: ... def var(self, *, ddof: int) -> float: ... def zip_with(self, mask: Any, other: Any) -> Self: ... + + # NOTE: *Technically* `polars` @unstable def hist_from_bins( self, bins: list[float], *, include_breakpoint: bool @@ -240,9 +243,6 @@ def __narwhals_namespace__( self, ) -> EagerNamespace[Any, Self, Any, Any, NativeSeriesT]: ... - def _to_expr(self) -> EagerExpr[Any, Any]: - return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return] - def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ... def _gather_slice(self, rows: _SliceIndex | range) -> Self: ... def __getitem__(self, item: MultiIndexSelector[Self]) -> Self: diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index e3719061be..6f7f45d548 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -16,6 +16,7 @@ CompliantExpr, DepthTrackingExpr, EagerExpr, + ImplExpr, LazyExpr, NativeExpr, ) @@ -78,6 +79,8 @@ class ScalarKwargs(TypedDict, total=False): CompliantFrameAny: TypeAlias = "CompliantFrame[Any, Any, Any]" CompliantNamespaceAny: TypeAlias = "CompliantNamespace[Any, Any]" +ImplExprAny: TypeAlias = "ImplExpr[Any, Any]" + DepthTrackingExprAny: TypeAlias = "DepthTrackingExpr[Any, Any]" EagerDataFrameAny: TypeAlias = "EagerDataFrame[Any, Any, Any, Any]" @@ -136,6 +139,8 @@ class ScalarKwargs(TypedDict, total=False): "CompliantNamespaceT_co", bound=CompliantNamespaceAny, covariant=True ) +ImplExprT_contra = TypeVar("ImplExprT_contra", bound=ImplExprAny, contravariant=True) + DepthTrackingExprT = TypeVar("DepthTrackingExprT", bound=DepthTrackingExprAny) DepthTrackingExprT_contra = TypeVar( "DepthTrackingExprT_contra", bound=DepthTrackingExprAny, contravariant=True diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 454472b614..2ff2958d43 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -73,8 +73,6 @@ def __init__( def __call__(self, df: DaskLazyFrame) -> Sequence[dx.Series]: return self._call(df) - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__(self) -> DaskNamespace: # pragma: no cover from narwhals._dask.namespace import DaskNamespace diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 9698d5e6ab..cc0112159d 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -93,8 +93,6 @@ def _window_expression( nulls_last=nulls_last, ) - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__(self) -> DuckDBNamespace: # pragma: no cover from narwhals._duckdb.namespace import DuckDBNamespace diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index e4ad3c5893..b5cbd85f6b 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -6,10 +6,10 @@ from enum import Enum, auto from itertools import chain -from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast +from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar from narwhals._utils import is_compliant_expr, zip_strict -from narwhals.dependencies import is_narwhals_series, is_numpy_array +from narwhals.dependencies import is_narwhals_series, is_numpy_array, is_numpy_array_1d from narwhals.exceptions import InvalidOperationError, MultiOutputExpressionError if TYPE_CHECKING: @@ -23,7 +23,6 @@ CompliantExprAny, CompliantFrameAny, CompliantNamespaceAny, - EagerNamespaceAny, EvalNames, ) from narwhals.expr import Expr @@ -47,6 +46,13 @@ def is_series(obj: Any) -> TypeIs[Series[Any]]: return isinstance(obj, Series) +def is_into_expr_eager(obj: Any) -> TypeIs[Expr | Series[Any] | str | _1DArray]: + from narwhals.expr import Expr + from narwhals.series import Series + + return isinstance(obj, (Series, Expr, str)) or is_numpy_array_1d(obj) + + def combine_evaluate_output_names( *exprs: CompliantExpr[CompliantFrameT, Any], ) -> EvalNames[CompliantFrameT]: @@ -74,24 +80,6 @@ def alias_output_names(names: Sequence[str]) -> Sequence[str]: return alias_output_names -def extract_compliant( - plx: CompliantNamespaceAny, - other: IntoExpr | NonNestedLiteral | _1DArray, - *, - str_as_lit: bool, -) -> CompliantExprAny | NonNestedLiteral: - if is_expr(other): - return other._to_compliant_expr(plx) - if isinstance(other, str) and not str_as_lit: - return plx.col(other) - if is_narwhals_series(other): - return other._compliant_series._to_expr() - if is_numpy_array(other): - ns = cast("EagerNamespaceAny", plx) - return ns._series.from_numpy(other, context=ns)._to_expr() - return other - - def evaluate_output_names_and_aliases( expr: CompliantExprAny, df: CompliantFrameAny, exclude: Sequence[str] ) -> tuple[Sequence[str], Sequence[str]]: @@ -610,10 +598,8 @@ def apply_n_ary_operation( *comparands: IntoExpr | NonNestedLiteral | _1DArray, str_as_lit: bool, ) -> CompliantExprAny: - compliant_exprs = ( - extract_compliant(plx, comparand, str_as_lit=str_as_lit) - for comparand in comparands - ) + parse = plx.parse_into_expr + compliant_exprs = (parse(into, str_as_lit=str_as_lit) for into in comparands) kinds = [ ExprKind.from_into_expr(comparand, str_as_lit=str_as_lit) for comparand in comparands diff --git a/narwhals/_ibis/expr.py b/narwhals/_ibis/expr.py index 7e7e8d1770..17e372c3bb 100644 --- a/narwhals/_ibis/expr.py +++ b/narwhals/_ibis/expr.py @@ -101,8 +101,6 @@ def _window_expression( ) return expr.over(window) - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover from narwhals._ibis.namespace import IbisNamespace diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 5705eedc8c..6cab84e3a3 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -134,8 +134,6 @@ def __narwhals_namespace__(self) -> PandasLikeNamespace: return PandasLikeNamespace(self._implementation, version=self._version) - def __narwhals_expr__(self) -> None: ... - @classmethod def from_column_names( cls: type[Self], diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index df79180c9a..5a9fbbe670 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -113,7 +113,9 @@ class PolarsBaseFrame(Generic[NativePolarsFrame]): unique: Method[Self] with_columns: Method[Self] + _native_frame: NativePolarsFrame _implementation = Implementation.POLARS + _version: Version def __init__( self, @@ -122,7 +124,7 @@ def __init__( version: Version, validate_backend_version: bool = False, ) -> None: - self._native_frame: NativePolarsFrame = df + self._native_frame = df self._version = version if validate_backend_version: self._validate_backend_version() diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 9a238947ab..c03b6dca97 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -22,21 +22,48 @@ from typing_extensions import Self - from narwhals._compliant.typing import EvalNames from narwhals._expression_parsing import ExprKind, ExprMetadata - from narwhals._polars.dataframe import Method, PolarsDataFrame + from narwhals._polars.dataframe import Method from narwhals._polars.namespace import PolarsNamespace - from narwhals._utils import Version, _LimitedContext + from narwhals._utils import Version from narwhals.typing import IntoDType, ModeKeepStrategy, NumericLiteral class PolarsExpr: - _implementation = Implementation.POLARS + # CompliantExpr + _implementation: Implementation = Implementation.POLARS + _version: Version + _native_expr: pl.Expr + _metadata: ExprMetadata | None = None + _evaluate_output_names: Any + _alias_output_names: Any + __call__: Any + + # CompliantExpr + builtin descriptor + # TODO @dangotbanned: Remove in #2713 + @classmethod + def from_column_names(cls, *_: Any, **__: Any) -> Self: + raise NotImplementedError + + @classmethod + def from_column_indices(cls, *_: Any, **__: Any) -> Self: + raise NotImplementedError + + @staticmethod + def _eval_names_indices(*_: Any) -> Any: + raise NotImplementedError + + def __narwhals_expr__(self) -> Self: # pragma: no cover + return self + + def __narwhals_namespace__(self) -> PolarsNamespace: # pragma: no cover + from narwhals._polars.namespace import PolarsNamespace + + return PolarsNamespace(version=self._version) def __init__(self, expr: pl.Expr, version: Version) -> None: self._native_expr = expr self._version = version - self._metadata: ExprMetadata | None = None @property def _backend_version(self) -> tuple[int, ...]: @@ -52,10 +79,6 @@ def __repr__(self) -> str: # pragma: no cover def _with_native(self, expr: pl.Expr) -> Self: return self.__class__(expr, self._version) - @classmethod - def _from_series(cls, series: Any) -> Self: - return cls(series.native, series._version) - def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self: # Let Polars do its thing. return self @@ -234,12 +257,6 @@ def __invert__(self) -> Self: def cum_count(self, *, reverse: bool) -> Self: return self._with_native(self.native.cum_count(reverse=reverse)) - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__(self) -> PolarsNamespace: # pragma: no cover - from narwhals._polars.namespace import PolarsNamespace - - return PolarsNamespace(version=self._version) - def is_close( self, other: Self | NumericLiteral, @@ -308,33 +325,6 @@ def list(self) -> PolarsExprListNamespace: def struct(self) -> PolarsExprStructNamespace: return PolarsExprStructNamespace(self) - # CompliantExpr - _alias_output_names: Any - _evaluate_aliases: Any - _evaluate_output_names: Any - _is_multi_output_unnamed: Any - __call__: Any - - # CompliantExpr + builtin descriptor - # TODO @dangotbanned: Remove in #2713 - @classmethod - def from_column_names( - cls, - evaluate_column_names: EvalNames[PolarsDataFrame], - /, - *, - context: _LimitedContext, - ) -> Self: - raise NotImplementedError - - @classmethod - def from_column_indices(cls, *column_indices: int, context: _LimitedContext) -> Self: - raise NotImplementedError - - @staticmethod - def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[PolarsDataFrame]: - raise NotImplementedError - # Polars abs: Method[Self] all: Method[Self] diff --git a/narwhals/_polars/group_by.py b/narwhals/_polars/group_by.py index 74e4efc47d..e0722565bb 100644 --- a/narwhals/_polars/group_by.py +++ b/narwhals/_polars/group_by.py @@ -17,8 +17,6 @@ class PolarsGroupBy: _compliant_frame: PolarsDataFrame _grouped: NativeGroupBy - _drop_null_keys: bool - _output_names: Sequence[str] @property def compliant(self) -> PolarsDataFrame: @@ -52,8 +50,6 @@ def __iter__(self) -> Iterator[tuple[tuple[str, ...], PolarsDataFrame]]: class PolarsLazyGroupBy: _compliant_frame: PolarsLazyFrame _grouped: NativeLazyGroupBy - _drop_null_keys: bool - _output_names: Sequence[str] @property def compliant(self) -> PolarsLazyFrame: diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 3de70c633e..08eb56dbf2 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -5,6 +5,7 @@ import polars as pl +from narwhals._expression_parsing import is_expr, is_series from narwhals._polars.expr import PolarsExpr from narwhals._polars.series import PolarsSeries from narwhals._polars.utils import extract_args_kwargs, narwhals_to_native_dtype @@ -20,7 +21,17 @@ from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame from narwhals._polars.typing import FrameT from narwhals._utils import Version, _LimitedContext - from narwhals.typing import Into1DArray, IntoDType, IntoSchema, TimeUnit, _2DArray + from narwhals.expr import Expr + from narwhals.series import Series + from narwhals.typing import ( + Into1DArray, + IntoDType, + IntoSchema, + NonNestedLiteral, + TimeUnit, + _1DArray, + _2DArray, + ) class PolarsNamespace: @@ -35,6 +46,7 @@ class PolarsNamespace: when: Method[CompliantWhen[PolarsDataFrame, PolarsSeries, PolarsExpr]] _implementation: Implementation = Implementation.POLARS + _version: Version @property def _backend_version(self) -> tuple[int, ...]: @@ -70,6 +82,25 @@ def _expr(self) -> type[PolarsExpr]: def _series(self) -> type[PolarsSeries]: return PolarsSeries + def parse_into_expr( + self, + data: Expr | NonNestedLiteral | Series[pl.Series] | _1DArray, + /, + *, + str_as_lit: bool, + ) -> PolarsExpr | None: + if data is None: + # NOTE: To avoid `pl.lit(None)` failing this `None` check + # https://github.com/pola-rs/polars/blob/58dd8e5770f16a9bef9009a1c05f00e15a5263c7/py-polars/polars/expr/expr.py#L2870-L2872 + return data + if is_expr(data): + expr = data._to_compliant_expr(self) + assert isinstance(expr, self._expr) # noqa: S101 + return expr + if isinstance(data, str) and not str_as_lit: + return self.col(data) + return self.lit(data.to_native() if is_series(data) else data, None) + @overload def from_native(self, data: pl.DataFrame, /) -> PolarsDataFrame: ... @overload diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 5568cc8722..8da184e335 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -33,7 +33,6 @@ from typing_extensions import Self, TypeAlias, TypeIs from narwhals._polars.dataframe import Method, PolarsDataFrame - from narwhals._polars.expr import PolarsExpr from narwhals._polars.namespace import PolarsNamespace from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType @@ -138,7 +137,9 @@ class PolarsSeries: - _implementation = Implementation.POLARS + _implementation: Implementation = Implementation.POLARS + _native_series: pl.Series + _version: Version _HIST_EMPTY_SCHEMA: ClassVar[Mapping[IncludeBreakpoint, Sequence[str]]] = { True: ["breakpoint", "count"], @@ -146,7 +147,7 @@ class PolarsSeries: } def __init__(self, series: pl.Series, *, version: Version) -> None: - self._native_series: pl.Series = series + self._native_series = series self._version = version @property @@ -236,9 +237,6 @@ def _from_native_object( # scalar return series - def _to_expr(self) -> PolarsExpr: - return self.__narwhals_namespace__()._expr._from_series(self) - def __getattr__(self, attr: str) -> Any: if attr not in INHERITED_METHODS: msg = f"{self.__class__.__name__} has not attribute '{attr}'." @@ -509,7 +507,9 @@ def is_close( if self._backend_version < (1, 32, 0): name = self.name ns = self.__narwhals_namespace__() - other_expr = other._to_expr() if isinstance(other, PolarsSeries) else other + other_expr = ( + ns.lit(other.native, None) if isinstance(other, PolarsSeries) else other + ) expr = ns.col(name).is_close( other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 73b3df80a5..3b5d1c1653 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -149,8 +149,6 @@ def partition_by(self, *cols: Column | str) -> WindowSpec: """Wraps `Window().partitionBy`, with default and `WindowInputs` handling.""" return self._Window.partitionBy(*cols or [self._F.lit(1)]) - def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__(self) -> SparkLikeNamespace: # pragma: no cover from narwhals._spark_like.namespace import SparkLikeNamespace diff --git a/narwhals/_sql/expr.py b/narwhals/_sql/expr.py index 279479d0b1..db6c5dff9d 100644 --- a/narwhals/_sql/expr.py +++ b/narwhals/_sql/expr.py @@ -332,6 +332,20 @@ def window_function( implementation=context._implementation, ) + def _is_multi_output_unnamed(self) -> bool: + """Return `True` for multi-output aggregations without names. + + For example, column `'a'` only appears in the output as a grouping key: + + df.group_by('a').agg(nw.all().sum()) + + It does not get included in: + + nw.all().sum(). + """ + assert self._metadata is not None # noqa: S101 + return self._metadata.expansion_kind.is_multi_unnamed() + # Binary def __eq__(self, other: Self) -> Self: # type: ignore[override] return self._with_binary(lambda expr, other: expr.__eq__(other), other) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index b47710e843..0519dab653 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -20,6 +20,7 @@ ExprKind, all_exprs_are_scalar_like, check_expressions_preserve_length, + is_into_expr_eager, is_scalar_like, ) from narwhals._typing import Arrow, Pandas, _LazyAllowedImpl, _LazyFrameCollectImpl @@ -42,12 +43,7 @@ supports_arrow_c_stream, zip_strict, ) -from narwhals.dependencies import ( - get_polars, - is_numpy_array, - is_numpy_array_2d, - is_pyarrow_table, -) +from narwhals.dependencies import is_numpy_array_2d, is_pyarrow_table from narwhals.exceptions import ( ColumnNotFoundError, InvalidIntoExprError, @@ -451,26 +447,9 @@ class DataFrame(BaseFrame[DataFrameT]): _version: ClassVar[Version] = Version.MAIN def _extract_compliant(self, arg: Any) -> Any: - from narwhals.expr import Expr - from narwhals.series import Series - - plx: EagerNamespaceAny = self.__narwhals_namespace__() - if isinstance(arg, Series): - return arg._compliant_series._to_expr() - if isinstance(arg, Expr): - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if isinstance(arg, str): - return plx.col(arg) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) - if is_numpy_array(arg): - return plx._series.from_numpy(arg, context=plx)._to_expr() + if is_into_expr_eager(arg): + plx: EagerNamespaceAny = self.__narwhals_namespace__() + return plx.parse_into_expr(arg, str_as_lit=False) raise InvalidIntoExprError.from_invalid_type(type(arg)) @property @@ -2304,43 +2283,33 @@ def _extract_compliant(self, arg: Any) -> Any: if isinstance(arg, Series): # pragma: no cover msg = "Binary operations between Series and LazyFrame are not supported." raise TypeError(msg) - if isinstance(arg, str): # pragma: no cover - plx = self.__narwhals_namespace__() - return plx.col(arg) - if isinstance(arg, Expr): - if arg._metadata.n_orderable_ops: - msg = ( - "Order-dependent expressions are not supported for use in LazyFrame.\n\n" - "Hint: To make the expression valid, use `.over` with `order_by` specified.\n\n" - "For example, if you wrote `nw.col('price').cum_sum()` and you have a column\n" - "`'date'` which orders your data, then replace:\n\n" - " nw.col('price').cum_sum()\n\n" - " with:\n\n" - " nw.col('price').cum_sum().over(order_by='date')\n" - " ^^^^^^^^^^^^^^^^^^^^^^\n\n" - "See https://narwhals-dev.github.io/narwhals/concepts/order_dependence/." - ) - raise InvalidOperationError(msg) - if arg._metadata.is_filtration: - msg = ( - "Length-changing expressions are not supported for use in LazyFrame, unless\n" - "followed by an aggregation.\n\n" - "Hints:\n" - "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n" - "- Instead of `lf.select(nw.col('a').drop_nulls()).select(nw.sum('a'))`,\n" - " use `lf.select(nw.col('a').drop_nulls().sum())\n" - ) - raise InvalidOperationError(msg) - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) - raise InvalidIntoExprError.from_invalid_type(type(arg)) # pragma: no cover + if isinstance(arg, (Expr, str)): + if isinstance(arg, Expr): + if arg._metadata.n_orderable_ops: + msg = ( + "Order-dependent expressions are not supported for use in LazyFrame.\n\n" + "Hint: To make the expression valid, use `.over` with `order_by` specified.\n\n" + "For example, if you wrote `nw.col('price').cum_sum()` and you have a column\n" + "`'date'` which orders your data, then replace:\n\n" + " nw.col('price').cum_sum()\n\n" + " with:\n\n" + " nw.col('price').cum_sum().over(order_by='date')\n" + " ^^^^^^^^^^^^^^^^^^^^^^\n\n" + "See https://narwhals-dev.github.io/narwhals/concepts/order_dependence/." + ) + raise InvalidOperationError(msg) + if arg._metadata.is_filtration: + msg = ( + "Length-changing expressions are not supported for use in LazyFrame, unless\n" + "followed by an aggregation.\n\n" + "Hints:\n" + "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n" + "- Instead of `lf.select(nw.col('a').drop_nulls()).select(nw.sum('a'))`,\n" + " use `lf.select(nw.col('a').drop_nulls().sum())\n" + ) + raise InvalidOperationError(msg) + return self.__narwhals_namespace__().parse_into_expr(arg, str_as_lit=False) + raise InvalidIntoExprError.from_invalid_type(type(arg)) @property def _dataframe(self) -> type[DataFrame[Any]]: diff --git a/narwhals/expr.py b/narwhals/expr.py index 9b2991d797..e6d1074f65 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -9,7 +9,6 @@ ExprMetadata, apply_n_ary_operation, combine_metadata, - extract_compliant, ) from narwhals._utils import _validate_rolling_arguments, ensure_type, flatten from narwhals.dtypes import _validate_dtype @@ -1194,7 +1193,7 @@ def fill_null( return self.__class__( lambda plx: self._to_compliant_expr(plx).fill_null( - value=extract_compliant(plx, value, str_as_lit=True), + value=plx.parse_into_expr(value, str_as_lit=True), strategy=strategy, limit=limit, ), diff --git a/narwhals/functions.py b/narwhals/functions.py index 1b9cc3b264..f4fad9ed67 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -11,7 +11,6 @@ ExprMetadata, apply_n_ary_operation, combine_metadata, - extract_compliant, is_scalar_like, ) from narwhals._utils import ( @@ -1410,7 +1409,7 @@ def otherwise(self, value: IntoExpr | NonNestedLiteral | _1DArray) -> Expr: def func(plx: CompliantNamespace[Any, Any]) -> CompliantExpr[Any, Any]: compliant_expr = self._to_compliant_expr(plx) - compliant_value = extract_compliant(plx, value, str_as_lit=False) + compliant_value = plx.parse_into_expr(value, str_as_lit=False) if ( not self._metadata.is_scalar_like and is_scalar_like(kind) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index b8abd4cc92..923a387288 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -22,7 +22,6 @@ validate_strict_and_pass_though, ) from narwhals.dataframe import DataFrame as NwDataFrame, LazyFrame as NwLazyFrame -from narwhals.dependencies import get_polars from narwhals.exceptions import InvalidIntoExprError from narwhals.expr import Expr as NwExpr from narwhals.functions import _new_series_impl, concat, show_versions @@ -235,28 +234,16 @@ def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame def _extract_compliant(self, arg: Any) -> Any: - # After v1, we raise when passing order-dependent or length-changing - # expressions to LazyFrame + # After v1, we raise when passing order-dependent, length-changing, + # or filtration expressions to LazyFrame from narwhals.expr import Expr from narwhals.series import Series if isinstance(arg, Series): # pragma: no cover msg = "Mixing Series with LazyFrame is not supported." raise TypeError(msg) - if isinstance(arg, Expr): - # After stable.v1, we raise for order-dependent exprs or filtrations - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if isinstance(arg, str): - plx = self.__narwhals_namespace__() - return plx.col(arg) - if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) + if isinstance(arg, (Expr, str)): + return self.__narwhals_namespace__().parse_into_expr(arg, str_as_lit=False) raise InvalidIntoExprError.from_invalid_type(type(arg)) def collect(