diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index c80238ec4f..b97b50ac0f 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING from typing import Any -from typing import Callable from typing import Literal from typing import Sequence @@ -22,6 +21,9 @@ from narwhals._arrow.dataframe import ArrowDataFrame from narwhals._arrow.namespace import ArrowNamespace + from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._expression_parsing import ExprMetadata from narwhals.utils import Version from narwhals.utils import _FullContext @@ -32,12 +34,12 @@ class ArrowExpr(EagerExpr["ArrowDataFrame", ArrowSeries]): def __init__( self: Self, - call: Callable[[ArrowDataFrame], Sequence[ArrowSeries]], + call: EvalSeries[ArrowDataFrame, ArrowSeries], *, depth: int, function_name: str, - evaluate_output_names: Callable[[ArrowDataFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[ArrowDataFrame], + alias_output_names: AliasNames | None, backend_version: tuple[int, ...], version: Version, call_kwargs: dict[str, Any] | None = None, @@ -57,7 +59,7 @@ def __init__( @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[ArrowDataFrame], Sequence[str]], + evaluate_column_names: EvalNames[ArrowDataFrame], /, *, context: _FullContext, @@ -139,11 +141,7 @@ def cum_sum(self: Self, *, reverse: bool) -> Self: def shift(self: Self, n: int) -> Self: return self._reuse_series("shift", n=n) - def over( - self: Self, - partition_by: Sequence[str], - order_by: Sequence[str] | None, - ) -> Self: + def over(self, partition_by: Sequence[str], order_by: Sequence[str] | None) -> Self: assert self._metadata is not None # noqa: S101 if partition_by and not is_scalar_like(self._metadata.kind): msg = "Only aggregation or literal operations are supported in grouped `over` context for PyArrow." diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 3e3f938410..b54ce6acd5 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -58,6 +58,9 @@ from narwhals._compliant.namespace import CompliantNamespace from narwhals._compliant.namespace import EagerNamespace from narwhals._compliant.series import CompliantSeries + from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._expression_parsing import ExprKind from narwhals._expression_parsing import ExprMetadata from narwhals.dtypes import DType @@ -84,21 +87,19 @@ class CompliantExpr(Protocol38[CompliantFrameT, CompliantSeriesOrNativeExprT_co] _implementation: Implementation _backend_version: tuple[int, ...] _version: Version - _evaluate_output_names: Callable[[CompliantFrameT], Sequence[str]] - _alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None + _evaluate_output_names: EvalNames[CompliantFrameT] + _alias_output_names: AliasNames | None _metadata: ExprMetadata | None def __call__( self, df: CompliantFrameT ) -> Sequence[CompliantSeriesOrNativeExprT_co]: ... def __narwhals_expr__(self) -> None: ... - def __narwhals_namespace__( - self, - ) -> CompliantNamespace[CompliantFrameT, Self]: ... + def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ... @classmethod def from_column_names( cls, - evaluate_column_names: Callable[[CompliantFrameT], Sequence[str]], + evaluate_column_names: EvalNames[CompliantFrameT], /, *, context: _FullContext, @@ -298,7 +299,7 @@ class DepthTrackingExpr( @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[CompliantFrameT], Sequence[str]], + evaluate_column_names: EvalNames[CompliantFrameT], /, *, context: _FullContext, @@ -330,17 +331,17 @@ class EagerExpr( DepthTrackingExpr[EagerDataFrameT, EagerSeriesT], Protocol38[EagerDataFrameT, EagerSeriesT], ): - _call: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]] + _call: EvalSeries[EagerDataFrameT, EagerSeriesT] _call_kwargs: dict[str, Any] def __init__( self: Self, - call: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]], + call: EvalSeries[EagerDataFrameT, EagerSeriesT], *, depth: int, function_name: str, - evaluate_output_names: Callable[[EagerDataFrameT], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[EagerDataFrameT], + alias_output_names: AliasNames | None, implementation: Implementation, backend_version: tuple[int, ...], version: Version, @@ -358,12 +359,12 @@ def __narwhals_expr__(self) -> None: ... @classmethod def _from_callable( cls, - func: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]], + func: EvalSeries[EagerDataFrameT, EagerSeriesT], *, depth: int, function_name: str, - evaluate_output_names: Callable[[EagerDataFrameT], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[EagerDataFrameT], + alias_output_names: AliasNames | None, context: _FullContext, call_kwargs: dict[str, Any] | None = None, ) -> Self: diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 41779df685..906d17570c 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -17,6 +17,7 @@ from narwhals._compliant.expr import EagerExpr from narwhals._compliant.expr import LazyExpr from narwhals._compliant.expr import NativeExpr + from narwhals._compliant.namespace import CompliantNamespace from narwhals._compliant.namespace import EagerNamespace from narwhals._compliant.series import CompliantSeries from narwhals._compliant.series import EagerSeries @@ -42,6 +43,7 @@ CompliantDataFrameAny: TypeAlias = "CompliantDataFrame[Any, Any, Any]" CompliantLazyFrameAny: TypeAlias = "CompliantLazyFrame[Any, Any]" CompliantFrameAny: TypeAlias = "CompliantDataFrameAny | CompliantLazyFrameAny" +CompliantNamespaceAny: TypeAlias = "CompliantNamespace[Any, Any]" DepthTrackingExprAny: TypeAlias = "DepthTrackingExpr[Any, Any]" @@ -110,8 +112,18 @@ LazyExprT_contra = TypeVar("LazyExprT_contra", bound=LazyExprAny, contravariant=True) AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] +"""A function aliasing a *sequence* of column names.""" + AliasName: TypeAlias = Callable[[str], str] +"""A function aliasing a *single* column name.""" + EvalSeries: TypeAlias = Callable[ [CompliantFrameT], Sequence[CompliantSeriesOrNativeExprT] ] +"""A function from a `Frame` to a sequence of `Series`*. + +See [underwater unicorn magic](https://narwhals-dev.github.io/narwhals/how_it_works/). +""" + EvalNames: TypeAlias = Callable[[CompliantFrameT], Sequence[str]] +"""A function from a `Frame` to a sequence of columns names *before* any aliasing takes place.""" diff --git a/narwhals/_compliant/when_then.py b/narwhals/_compliant/when_then.py index 4a27cb77b5..696acfba41 100644 --- a/narwhals/_compliant/when_then.py +++ b/narwhals/_compliant/when_then.py @@ -24,6 +24,7 @@ from typing_extensions import Self from typing_extensions import TypeAlias + from narwhals._compliant.typing import EvalSeries from narwhals.utils import Implementation from narwhals.utils import Version from narwhals.utils import _FullContext @@ -82,7 +83,7 @@ def from_expr(cls, condition: ExprT, /, *, context: _FullContext) -> Self: class CompliantThen(CompliantExpr[FrameT, SeriesT], Protocol38[FrameT, SeriesT, ExprT]): - _call: Callable[[FrameT], Sequence[SeriesT]] + _call: EvalSeries[FrameT, SeriesT] _when_value: CompliantWhen[FrameT, SeriesT, ExprT] _function_name: str _depth: int diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index bfcadcf240..addc27f322 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -34,6 +34,8 @@ from typing_extensions import Self + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._dask.dataframe import DaskLazyFrame from narwhals._dask.namespace import DaskNamespace from narwhals._expression_parsing import ExprMetadata @@ -50,12 +52,12 @@ class DaskExpr( def __init__( self: Self, - call: Callable[[DaskLazyFrame], Sequence[dx.Series]], + call: EvalSeries[DaskLazyFrame, dx.Series], *, depth: int, function_name: str, - evaluate_output_names: Callable[[DaskLazyFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[DaskLazyFrame], + alias_output_names: AliasNames | None, backend_version: tuple[int, ...], version: Version, # Kwargs with metadata which we may need in group-by agg @@ -101,7 +103,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[DaskLazyFrame], Sequence[str]], + evaluate_column_names: EvalNames[DaskLazyFrame], /, *, context: _FullContext, diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 5972b8f81b..dd54ed7bd9 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -34,6 +34,8 @@ from typing_extensions import Self from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.namespace import DuckDBNamespace from narwhals._duckdb.typing import WindowFunction @@ -51,10 +53,10 @@ class DuckDBExpr(LazyExpr["DuckDBLazyFrame", "duckdb.Expression"]): def __init__( self: Self, - call: Callable[[DuckDBLazyFrame], Sequence[duckdb.Expression]], + call: EvalSeries[DuckDBLazyFrame, duckdb.Expression], *, - evaluate_output_names: Callable[[DuckDBLazyFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[DuckDBLazyFrame], + alias_output_names: AliasNames | None, backend_version: tuple[int, ...], version: Version, ) -> None: @@ -168,7 +170,7 @@ def func(df: DuckDBLazyFrame) -> Sequence[duckdb.Expression]: @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[DuckDBLazyFrame], Sequence[str]], + evaluate_column_names: EvalNames[DuckDBLazyFrame], /, *, context: _FullContext, @@ -243,10 +245,7 @@ def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: version=self._version, ) - def _with_window_function( - self: Self, - window_function: WindowFunction, - ) -> Self: + def _with_window_function(self, window_function: WindowFunction) -> Self: result = self.__class__( self._call, evaluate_output_names=self._evaluate_output_names, diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index d10f090728..f82ef55839 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -8,7 +8,6 @@ from itertools import chain from typing import TYPE_CHECKING from typing import Any -from typing import Callable from typing import Literal from typing import Sequence from typing import TypeVar @@ -29,10 +28,13 @@ from narwhals._compliant import CompliantExprT from narwhals._compliant import CompliantFrameT from narwhals._compliant import CompliantNamespace + from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import CompliantExprAny + from narwhals._compliant.typing import CompliantFrameAny + from narwhals._compliant.typing import CompliantNamespaceAny from narwhals._compliant.typing import EagerNamespaceAny + from narwhals._compliant.typing import EvalNames from narwhals.expr import Expr - from narwhals.typing import CompliantDataFrame - from narwhals.typing import CompliantLazyFrame from narwhals.typing import IntoExpr from narwhals.typing import _1DArray @@ -48,7 +50,7 @@ def is_expr(obj: Any) -> TypeIs[Expr]: def combine_evaluate_output_names( *exprs: CompliantExpr[CompliantFrameT, Any], -) -> Callable[[CompliantFrameT], Sequence[str]]: +) -> EvalNames[CompliantFrameT]: # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1, expr2)` takes the # first name of `expr1`. if not is_compliant_expr(exprs[0]): # pragma: no cover @@ -61,9 +63,7 @@ def evaluate_output_names(df: CompliantFrameT) -> Sequence[str]: return evaluate_output_names -def combine_alias_output_names( - *exprs: CompliantExpr[Any, Any], -) -> Callable[[Sequence[str]], Sequence[str]] | None: +def combine_alias_output_names(*exprs: CompliantExprAny) -> AliasNames | None: # Follow left-hand-rule for naming. E.g. `nw.sum_horizontal(expr1.alias(alias), expr2)` takes the # aliasing function of `expr1` and apply it to the first output name of `expr1`. if exprs[0]._alias_output_names is None: @@ -91,9 +91,7 @@ def extract_compliant( def evaluate_output_names_and_aliases( - expr: CompliantExpr[Any, Any], - df: CompliantDataFrame[Any, Any, Any] | CompliantLazyFrame[Any, Any], - exclude: Sequence[str], + expr: CompliantExprAny, df: CompliantFrameAny, exclude: Sequence[str] ) -> tuple[Sequence[str], Sequence[str]]: output_names = expr._evaluate_output_names(df) if not output_names: @@ -410,11 +408,8 @@ def infer_kind(obj: IntoExpr | _1DArray | object, *, str_as_lit: bool) -> ExprKi def apply_n_ary_operation( - plx: CompliantNamespace[Any, Any], - function: Any, - *comparands: IntoExpr, - str_as_lit: bool, -) -> CompliantExpr[Any, Any]: + plx: CompliantNamespaceAny, function: Any, *comparands: IntoExpr, str_as_lit: bool +) -> CompliantExprAny: compliant_exprs = ( extract_compliant(plx, comparand, str_as_lit=str_as_lit) for comparand in comparands diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 3f04123450..a2664598d2 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING from typing import Any -from typing import Callable from typing import Literal from typing import Sequence @@ -16,6 +15,9 @@ if TYPE_CHECKING: from typing_extensions import Self + from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._expression_parsing import ExprMetadata from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace @@ -71,12 +73,12 @@ def window_kwargs_to_pandas_equivalent( class PandasLikeExpr(EagerExpr["PandasLikeDataFrame", PandasLikeSeries]): def __init__( self: Self, - call: Callable[[PandasLikeDataFrame], Sequence[PandasLikeSeries]], + call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries], *, depth: int, function_name: str, - evaluate_output_names: Callable[[PandasLikeDataFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[PandasLikeDataFrame], + alias_output_names: AliasNames | None, implementation: Implementation, backend_version: tuple[int, ...], version: Version, @@ -105,7 +107,7 @@ def __narwhals_expr__(self) -> None: ... @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[PandasLikeDataFrame], Sequence[str]], + evaluate_column_names: EvalNames[PandasLikeDataFrame], /, *, context: _FullContext, diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 406c9b4db3..968a903d8d 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -30,6 +30,8 @@ from typing_extensions import Self from narwhals._compliant.typing import AliasNames + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals._expression_parsing import ExprMetadata from narwhals._spark_like.dataframe import SparkLikeLazyFrame from narwhals._spark_like.namespace import SparkLikeNamespace @@ -42,10 +44,10 @@ class SparkLikeExpr(LazyExpr["SparkLikeLazyFrame", "Column"]): def __init__( self: Self, - call: Callable[[SparkLikeLazyFrame], Sequence[Column]], + call: EvalSeries[SparkLikeLazyFrame, Column], *, - evaluate_output_names: Callable[[SparkLikeLazyFrame], Sequence[str]], - alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None, + evaluate_output_names: EvalNames[SparkLikeLazyFrame], + alias_output_names: AliasNames | None, backend_version: tuple[int, ...], version: Version, implementation: Implementation, @@ -119,10 +121,7 @@ def __narwhals_namespace__(self: Self) -> SparkLikeNamespace: # pragma: no cove implementation=self._implementation, ) - def _with_window_function( - self: Self, - window_function: WindowFunction, - ) -> Self: + def _with_window_function(self, window_function: WindowFunction) -> Self: result = self.__class__( self._call, evaluate_output_names=self._evaluate_output_names, @@ -213,7 +212,7 @@ def func(window_inputs: WindowInputs) -> Column: @classmethod def from_column_names( cls: type[Self], - evaluate_column_names: Callable[[SparkLikeLazyFrame], Sequence[str]], + evaluate_column_names: EvalNames[SparkLikeLazyFrame], /, *, context: _FullContext, diff --git a/narwhals/utils.py b/narwhals/utils.py index b2024f3d7d..3d8a7dfae6 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -70,6 +70,7 @@ from narwhals._compliant import CompliantSeriesT from narwhals._compliant import NativeFrameT_co from narwhals._compliant import NativeSeriesT_co + from narwhals._compliant.typing import EvalNames from narwhals._dask.namespace import DaskNamespace from narwhals._duckdb.namespace import DuckDBNamespace from narwhals._pandas_like.namespace import PandasLikeNamespace @@ -1516,7 +1517,7 @@ def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequen return [col_name for col_name in frame.columns if col_name not in names] -def passthrough_column_names(names: Sequence[str], /) -> Callable[[Any], Sequence[str]]: +def passthrough_column_names(names: Sequence[str], /) -> EvalNames[Any]: def fn(_frame: Any, /) -> Sequence[str]: return names