From afdbcb4695851458ffa6b305a772d76ed189a381 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 1 Apr 2025 19:25:02 +0100 Subject: [PATCH 1/5] refactor: Add `SelectorNamespace.from_namespace` https://github.com/narwhals-dev/narwhals/pull/2294#discussion_r2014534830 --- narwhals/_arrow/namespace.py | 2 +- narwhals/_arrow/selectors.py | 6 ------ narwhals/_compliant/selectors.py | 9 +++++++++ narwhals/_dask/namespace.py | 2 +- narwhals/_dask/selectors.py | 6 ------ narwhals/_duckdb/namespace.py | 2 +- narwhals/_duckdb/selectors.py | 6 ------ narwhals/_pandas_like/namespace.py | 2 +- narwhals/_pandas_like/selectors.py | 6 ------ narwhals/_spark_like/namespace.py | 2 +- narwhals/_spark_like/selectors.py | 6 ------ 11 files changed, 14 insertions(+), 35 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index acd4d98385..3f8a84aaf2 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -236,7 +236,7 @@ def concat( @property def selectors(self: Self) -> ArrowSelectorNamespace: - return ArrowSelectorNamespace(self) + return ArrowSelectorNamespace.from_namespace(self) def when(self: Self, predicate: ArrowExpr) -> ArrowWhen: return ArrowWhen.from_expr(predicate, context=self) diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py index 4cf50535db..33292da7ef 100644 --- a/narwhals/_arrow/selectors.py +++ b/narwhals/_arrow/selectors.py @@ -13,7 +13,6 @@ from narwhals._arrow.series import ArrowSeries from narwhals._compliant import EvalNames from narwhals._compliant import EvalSeries - from narwhals.utils import _FullContext class ArrowSelectorNamespace(EagerSelectorNamespace["ArrowDataFrame", "ArrowSeries"]): @@ -33,11 +32,6 @@ def _selector( version=self._version, ) - def __init__(self: Self, context: _FullContext, /) -> None: - self._implementation = context._implementation - self._backend_version = context._backend_version - self._version = context._version - class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr): # type: ignore[misc] def _to_expr(self: Self) -> ArrowExpr: diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 6d74340b4a..3c1b8c122a 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -50,6 +50,7 @@ from narwhals.typing import TimeUnit from narwhals.utils import Implementation from narwhals.utils import Version + from narwhals.utils import _FullContext __all__ = [ "CompliantSelector", @@ -81,6 +82,14 @@ class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]): _backend_version: tuple[int, ...] _version: Version + @classmethod + def from_namespace(cls, context: _FullContext, /) -> Self: + obj = cls.__new__(cls) + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + return obj + def _selector( self, call: EvalSeries[FrameT, SeriesOrExprT], diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 17b4482ed8..8a186a516d 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -43,7 +43,7 @@ class DaskNamespace(DepthTrackingNamespace[DaskLazyFrame, "DaskExpr"]): @property def selectors(self: Self) -> DaskSelectorNamespace: - return DaskSelectorNamespace(self) + return DaskSelectorNamespace.from_namespace(self) @property def _expr(self) -> type[DaskExpr]: diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py index 1d3b311352..bacbd0d779 100644 --- a/narwhals/_dask/selectors.py +++ b/narwhals/_dask/selectors.py @@ -17,7 +17,6 @@ from narwhals._compliant import EvalNames from narwhals._compliant import EvalSeries from narwhals._dask.dataframe import DaskLazyFrame - from narwhals.utils import _FullContext class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]): # pyright: ignore[reportInvalidTypeArguments] @@ -37,11 +36,6 @@ def _selector( version=self._version, ) - def __init__(self: Self, context: _FullContext, /) -> None: - self._implementation = context._implementation - self._backend_version = context._backend_version - self._version = context._version - class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr): # type: ignore[misc] def _to_expr(self: Self) -> DaskExpr: diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 93a011d6cc..1e22cbddf7 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -47,7 +47,7 @@ def __init__( @property def selectors(self: Self) -> DuckDBSelectorNamespace: - return DuckDBSelectorNamespace(self) + return DuckDBSelectorNamespace.from_namespace(self) @property def _expr(self) -> type[DuckDBExpr]: diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py index dcfd3113b1..1b75ca0a79 100644 --- a/narwhals/_duckdb/selectors.py +++ b/narwhals/_duckdb/selectors.py @@ -13,7 +13,6 @@ from narwhals._compliant import EvalNames from narwhals._compliant import EvalSeries from narwhals._duckdb.dataframe import DuckDBLazyFrame - from narwhals.utils import _FullContext class DuckDBSelectorNamespace( @@ -33,11 +32,6 @@ def _selector( version=self._version, ) - def __init__(self: Self, context: _FullContext, /) -> None: - self._implementation = context._implementation - self._backend_version = context._backend_version - self._version = context._version - class DuckDBSelector( # type: ignore[misc] CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"], DuckDBExpr diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 27b5ff96b3..363f3282b1 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -48,7 +48,7 @@ def _series(self) -> type[PandasLikeSeries]: @property def selectors(self: Self) -> PandasSelectorNamespace: - return PandasSelectorNamespace(self) + return PandasSelectorNamespace.from_namespace(self) # --- not in spec --- def __init__( diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py index 69109f0cec..4bba569670 100644 --- a/narwhals/_pandas_like/selectors.py +++ b/narwhals/_pandas_like/selectors.py @@ -15,7 +15,6 @@ from narwhals._compliant import EvalSeries from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.series import PandasLikeSeries - from narwhals.utils import _FullContext class PandasSelectorNamespace( @@ -38,11 +37,6 @@ def _selector( version=self._version, ) - def __init__(self: Self, context: _FullContext, /) -> None: - self._implementation = context._implementation - self._backend_version = context._backend_version - self._version = context._version - class PandasSelector( # type: ignore[misc] CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 729edd963a..20a34836aa 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -40,7 +40,7 @@ def __init__( @property def selectors(self: Self) -> SparkLikeSelectorNamespace: - return SparkLikeSelectorNamespace(self) + return SparkLikeSelectorNamespace.from_namespace(self) @property def _expr(self) -> type[SparkLikeExpr]: diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py index 9b84b53761..cba86ac284 100644 --- a/narwhals/_spark_like/selectors.py +++ b/narwhals/_spark_like/selectors.py @@ -13,7 +13,6 @@ from narwhals._compliant import EvalNames from narwhals._compliant import EvalSeries from narwhals._spark_like.dataframe import SparkLikeLazyFrame - from narwhals.utils import _FullContext class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]): @@ -32,11 +31,6 @@ def _selector( implementation=self._implementation, ) - def __init__(self: Self, context: _FullContext, /) -> None: - self._backend_version = context._backend_version - self._version = context._version - self._implementation = context._implementation - class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr): # type: ignore[misc] def _to_expr(self: Self) -> SparkLikeExpr: From 04b9395e11c36e5a17bc1c6ef6b64714a6bedbb9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 1 Apr 2025 20:01:21 +0100 Subject: [PATCH 2/5] refactor: Reuse pattern from `when_then` --- narwhals/_arrow/selectors.py | 28 ++++----------- narwhals/_compliant/selectors.py | 58 +++++++++++++++++++++--------- narwhals/_dask/selectors.py | 39 ++++++-------------- narwhals/_duckdb/selectors.py | 25 ++++--------- narwhals/_pandas_like/selectors.py | 31 ++++------------ narwhals/_spark_like/selectors.py | 26 ++++---------- 6 files changed, 75 insertions(+), 132 deletions(-) diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py index 33292da7ef..8becb07cb1 100644 --- a/narwhals/_arrow/selectors.py +++ b/narwhals/_arrow/selectors.py @@ -7,34 +7,18 @@ from narwhals._compliant import EagerSelectorNamespace if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._arrow.dataframe import ArrowDataFrame - from narwhals._arrow.series import ArrowSeries - from narwhals._compliant import EvalNames - from narwhals._compliant import EvalSeries + from narwhals._arrow.dataframe import ArrowDataFrame # noqa: F401 + from narwhals._arrow.series import ArrowSeries # noqa: F401 class ArrowSelectorNamespace(EagerSelectorNamespace["ArrowDataFrame", "ArrowSeries"]): - def _selector( - self, - call: EvalSeries[ArrowDataFrame, ArrowSeries], - evaluate_output_names: EvalNames[ArrowDataFrame], - /, - ) -> ArrowSelector: - return ArrowSelector( - call, - depth=0, - function_name="selector", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - backend_version=self._backend_version, - version=self._version, - ) + @property + def _selector(self) -> type[ArrowSelector]: + return ArrowSelector class ArrowSelector(CompliantSelector["ArrowDataFrame", "ArrowSeries"], ArrowExpr): # type: ignore[misc] - def _to_expr(self: Self) -> ArrowExpr: + def _to_expr(self) -> ArrowExpr: return ArrowExpr( self._call, depth=self._depth, diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 3c1b8c122a..d569c52745 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -90,12 +90,8 @@ def from_namespace(cls, context: _FullContext, /) -> Self: obj._version = context._version return obj - def _selector( - self, - call: EvalSeries[FrameT, SeriesOrExprT], - evaluate_output_names: EvalNames[FrameT], - /, - ) -> CompliantSelector[FrameT, SeriesOrExprT]: ... + @property + def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ... def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ... @@ -119,7 +115,7 @@ def series(df: FrameT) -> Sequence[SeriesOrExprT]: def names(df: FrameT) -> Sequence[str]: return [name for name, tp in self._iter_schema(df) if isinstance(tp, dtype)] - return self._selector(series, names) + return self._selector.from_callables(series, names, context=self) def by_dtype( self: Self, dtypes: Collection[DType | type[DType]] @@ -130,7 +126,7 @@ def series(df: FrameT) -> Sequence[SeriesOrExprT]: def names(df: FrameT) -> Sequence[str]: return [name for name, tp in self._iter_schema(df) if tp in dtypes] - return self._selector(series, names) + return self._selector.from_callables(series, names, context=self) def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: p = re.compile(pattern) @@ -144,7 +140,7 @@ def series(df: FrameT) -> Sequence[SeriesOrExprT]: def names(df: FrameT) -> Sequence[str]: return [col for col in df.columns if p.search(col)] - return self._selector(series, names) + return self._selector.from_callables(series, names, context=self) def numeric(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: def series(df: FrameT) -> Sequence[SeriesOrExprT]: @@ -153,7 +149,7 @@ def series(df: FrameT) -> Sequence[SeriesOrExprT]: def names(df: FrameT) -> Sequence[str]: return [name for name, tp in self._iter_schema(df) if tp.is_numeric()] - return self._selector(series, names) + return self._selector.from_callables(series, names, context=self) def categorical(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).Categorical) @@ -168,7 +164,7 @@ def all(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: def series(df: FrameT) -> Sequence[SeriesOrExprT]: return list(self._iter_columns(df)) - return self._selector(series, get_column_names) + return self._selector.from_callables(series, get_column_names, context=self) def datetime( self: Self, @@ -189,7 +185,7 @@ def series(df: FrameT) -> Sequence[SeriesOrExprT]: def names(df: FrameT) -> Sequence[str]: return [name for name, tp in self._iter_schema(df) if matches(tp)] - return self._selector(series, names) + return self._selector.from_callables(series, names, context=self) class EagerSelectorNamespace( @@ -225,6 +221,34 @@ def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[ExprT, DType class CompliantSelector( CompliantExpr[FrameT, SeriesOrExprT], Protocol[FrameT, SeriesOrExprT] ): + _call: EvalSeries[FrameT, SeriesOrExprT] + _function_name: str + _depth: int + _implementation: Implementation + _backend_version: tuple[int, ...] + _version: Version + _call_kwargs: dict[str, Any] + + @classmethod + def from_callables( + cls, + call: EvalSeries[FrameT, SeriesOrExprT], + evaluate_output_names: EvalNames[FrameT], + *, + context: _FullContext, + ) -> Self: + obj = cls.__new__(cls) + obj._call = call + obj._depth = 0 + obj._function_name = "selector" + obj._evaluate_output_names = evaluate_output_names + obj._alias_output_names = None + obj._implementation = context._implementation + obj._backend_version = context._backend_version + obj._version = context._version + obj._call_kwargs = {} + return obj + @property def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesOrExprT]: return self.__narwhals_namespace__().selectors @@ -257,7 +281,7 @@ def names(df: FrameT) -> Sequence[str]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [x for x in lhs_names if x not in rhs_names] - return self.selectors._selector(series, names) + return self.selectors._selector.from_callables(series, names, context=self) return self._to_expr() - other @overload @@ -271,18 +295,18 @@ def __or__( ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): - def names(df: FrameT) -> Sequence[SeriesOrExprT]: + def series(df: FrameT) -> Sequence[SeriesOrExprT]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [ *(x for x, name in zip(self(df), lhs_names) if name not in rhs_names), *other(df), ] - def series(df: FrameT) -> Sequence[str]: + def names(df: FrameT) -> Sequence[str]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [*(x for x in lhs_names if x not in rhs_names), *rhs_names] - return self.selectors._selector(names, series) + return self.selectors._selector.from_callables(series, names, context=self) return self._to_expr() | other @overload @@ -304,7 +328,7 @@ def names(df: FrameT) -> Sequence[str]: lhs_names, rhs_names = _eval_lhs_rhs(df, self, other) return [x for x in lhs_names if x in rhs_names] - return self.selectors._selector(series, names) + return self.selectors._selector.from_callables(series, names, context=self) return self._to_expr() & other def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py index bacbd0d779..067b9deee8 100644 --- a/narwhals/_dask/selectors.py +++ b/narwhals/_dask/selectors.py @@ -7,38 +7,19 @@ from narwhals._dask.expr import DaskExpr if TYPE_CHECKING: - try: - import dask.dataframe.dask_expr as dx - except ModuleNotFoundError: - import dask_expr as dx - - from typing_extensions import Self - - from narwhals._compliant import EvalNames - from narwhals._compliant import EvalSeries - from narwhals._dask.dataframe import DaskLazyFrame - - -class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]): # pyright: ignore[reportInvalidTypeArguments] - def _selector( - self, - call: EvalSeries[DaskLazyFrame, dx.Series], # pyright: ignore[reportInvalidTypeForm] - evaluate_output_names: EvalNames[DaskLazyFrame], - /, - ) -> DaskSelector: - return DaskSelector( - call, - depth=0, - function_name="selector", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - backend_version=self._backend_version, - version=self._version, - ) + import dask.dataframe.dask_expr as dx # noqa: F401 + + from narwhals._dask.dataframe import DaskLazyFrame # noqa: F401 + + +class DaskSelectorNamespace(LazySelectorNamespace["DaskLazyFrame", "dx.Series"]): + @property + def _selector(self) -> type[DaskSelector]: + return DaskSelector class DaskSelector(CompliantSelector["DaskLazyFrame", "dx.Series"], DaskExpr): # type: ignore[misc] - def _to_expr(self: Self) -> DaskExpr: + def _to_expr(self) -> DaskExpr: return DaskExpr( self._call, depth=self._depth, diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py index 1b75ca0a79..a4e354c160 100644 --- a/narwhals/_duckdb/selectors.py +++ b/narwhals/_duckdb/selectors.py @@ -7,36 +7,23 @@ from narwhals._duckdb.expr import DuckDBExpr if TYPE_CHECKING: - import duckdb - from typing_extensions import Self + import duckdb # noqa: F401 - from narwhals._compliant import EvalNames - from narwhals._compliant import EvalSeries - from narwhals._duckdb.dataframe import DuckDBLazyFrame + from narwhals._duckdb.dataframe import DuckDBLazyFrame # noqa: F401 class DuckDBSelectorNamespace( LazySelectorNamespace["DuckDBLazyFrame", "duckdb.Expression"] ): - def _selector( - self, - call: EvalSeries[DuckDBLazyFrame, duckdb.Expression], - evaluate_output_names: EvalNames[DuckDBLazyFrame], - /, - ) -> DuckDBSelector: - return DuckDBSelector( - call, - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - backend_version=self._backend_version, - version=self._version, - ) + @property + def _selector(self) -> type[DuckDBSelector]: + return DuckDBSelector class DuckDBSelector( # type: ignore[misc] CompliantSelector["DuckDBLazyFrame", "duckdb.Expression"], DuckDBExpr ): - def _to_expr(self: Self) -> DuckDBExpr: + def _to_expr(self) -> DuckDBExpr: return DuckDBExpr( self._call, evaluate_output_names=self._evaluate_output_names, diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py index 4bba569670..87719b1cb3 100644 --- a/narwhals/_pandas_like/selectors.py +++ b/narwhals/_pandas_like/selectors.py @@ -4,44 +4,25 @@ from narwhals._compliant import CompliantSelector from narwhals._compliant import EagerSelectorNamespace -from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.expr import PandasLikeExpr -from narwhals._pandas_like.series import PandasLikeSeries if TYPE_CHECKING: - from typing_extensions import Self - - from narwhals._compliant import EvalNames - from narwhals._compliant import EvalSeries - from narwhals._pandas_like.dataframe import PandasLikeDataFrame - from narwhals._pandas_like.series import PandasLikeSeries + from narwhals._pandas_like.dataframe import PandasLikeDataFrame # noqa: F401 + from narwhals._pandas_like.series import PandasLikeSeries # noqa: F401 class PandasSelectorNamespace( EagerSelectorNamespace["PandasLikeDataFrame", "PandasLikeSeries"] ): - def _selector( - self, - call: EvalSeries[PandasLikeDataFrame, PandasLikeSeries], - evaluate_output_names: EvalNames[PandasLikeDataFrame], - /, - ) -> PandasSelector: - return PandasSelector( - call, - depth=0, - function_name="selector", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) + @property + def _selector(self) -> type[PandasSelector]: + return PandasSelector class PandasSelector( # type: ignore[misc] CompliantSelector["PandasLikeDataFrame", "PandasLikeSeries"], PandasLikeExpr ): - def _to_expr(self: Self) -> PandasLikeExpr: + def _to_expr(self) -> PandasLikeExpr: return PandasLikeExpr( self._call, depth=self._depth, diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py index cba86ac284..1c6489de44 100644 --- a/narwhals/_spark_like/selectors.py +++ b/narwhals/_spark_like/selectors.py @@ -7,33 +7,19 @@ from narwhals._spark_like.expr import SparkLikeExpr if TYPE_CHECKING: - from sqlframe.base.column import Column - from typing_extensions import Self + from sqlframe.base.column import Column # noqa: F401 - from narwhals._compliant import EvalNames - from narwhals._compliant import EvalSeries - from narwhals._spark_like.dataframe import SparkLikeLazyFrame + from narwhals._spark_like.dataframe import SparkLikeLazyFrame # noqa: F401 class SparkLikeSelectorNamespace(LazySelectorNamespace["SparkLikeLazyFrame", "Column"]): - def _selector( - self, - call: EvalSeries[SparkLikeLazyFrame, Column], - evaluate_output_names: EvalNames[SparkLikeLazyFrame], - /, - ) -> SparkLikeSelector: - return SparkLikeSelector( - call, - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - backend_version=self._backend_version, - version=self._version, - implementation=self._implementation, - ) + @property + def _selector(self) -> type[SparkLikeSelector]: + return SparkLikeSelector class SparkLikeSelector(CompliantSelector["SparkLikeLazyFrame", "Column"], SparkLikeExpr): # type: ignore[misc] - def _to_expr(self: Self) -> SparkLikeExpr: + def _to_expr(self) -> SparkLikeExpr: return SparkLikeExpr( self._call, evaluate_output_names=self._evaluate_output_names, From 2be746d04ebb480e31f8ce0d142d4999615f31e1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 1 Apr 2025 20:08:02 +0100 Subject: [PATCH 3/5] refactor: Only use `Protocol38` in sub `CompliantExpr` Still need to untangle `CompliantExpr` + friends https://github.com/narwhals-dev/narwhals/pull/2294#discussion_r2014534830 --- narwhals/_compliant/selectors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index d569c52745..054553da8d 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -10,6 +10,7 @@ from typing import Collection from typing import Iterable from typing import Iterator +from typing import Protocol from typing import Sequence from typing import TypeVar from typing import overload @@ -27,13 +28,12 @@ import sys if sys.version_info >= (3, 9): - from typing import Protocol + from typing import Protocol as Protocol38 else: - from typing import Generic + from typing import Generic as Protocol38 - Protocol = Generic else: # pragma: no cover - from typing import Protocol + from typing import Protocol as Protocol38 if TYPE_CHECKING: from datetime import timezone @@ -219,7 +219,7 @@ def _iter_columns_dtypes(self, df: LazyFrameT, /) -> Iterator[tuple[ExprT, DType class CompliantSelector( - CompliantExpr[FrameT, SeriesOrExprT], Protocol[FrameT, SeriesOrExprT] + CompliantExpr[FrameT, SeriesOrExprT], Protocol38[FrameT, SeriesOrExprT] ): _call: EvalSeries[FrameT, SeriesOrExprT] _function_name: str From 7a7acc710ed3a0089476f22ddf420bcdb82bef62 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 1 Apr 2025 20:20:00 +0100 Subject: [PATCH 4/5] refactor: Simplify typing --- narwhals/_compliant/selectors.py | 73 +++++++++++++++----------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 054553da8d..37b197f3e0 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -42,10 +42,13 @@ from typing_extensions import TypeAlias from typing_extensions import TypeIs - from narwhals._compliant.dataframe import CompliantDataFrame - from narwhals._compliant.dataframe import CompliantLazyFrame from narwhals._compliant.expr import NativeExpr - from narwhals._compliant.series import CompliantSeries + from narwhals._compliant.typing import CompliantDataFrameAny + from narwhals._compliant.typing import CompliantExprAny + from narwhals._compliant.typing import CompliantFrameAny + from narwhals._compliant.typing import CompliantLazyFrameAny + from narwhals._compliant.typing import CompliantSeriesAny + from narwhals._compliant.typing import CompliantSeriesOrNativeExprAny from narwhals.dtypes import DType from narwhals.typing import TimeUnit from narwhals.utils import Implementation @@ -62,14 +65,12 @@ ] -SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeries[Any] | NativeExpr") -SeriesT = TypeVar("SeriesT", bound="CompliantSeries[Any]") +SeriesOrExprT = TypeVar("SeriesOrExprT", bound="CompliantSeriesOrNativeExprAny") +SeriesT = TypeVar("SeriesT", bound="CompliantSeriesAny") ExprT = TypeVar("ExprT", bound="NativeExpr") -FrameT = TypeVar( - "FrameT", bound="CompliantDataFrame[Any, Any, Any] | CompliantLazyFrame[Any, Any]" -) -DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrame[Any, Any, Any]") -LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrame[Any, Any]") +FrameT = TypeVar("FrameT", bound="CompliantFrameAny") +DataFrameT = TypeVar("DataFrameT", bound="CompliantDataFrameAny") +LazyFrameT = TypeVar("LazyFrameT", bound="CompliantLazyFrameAny") SelectorOrExpr: TypeAlias = ( "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]" ) @@ -118,7 +119,7 @@ def names(df: FrameT) -> Sequence[str]: return self._selector.from_callables(series, names, context=self) def by_dtype( - self: Self, dtypes: Collection[DType | type[DType]] + self, dtypes: Collection[DType | type[DType]] ) -> CompliantSelector[FrameT, SeriesOrExprT]: def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ser for ser, tp in self._iter_columns_dtypes(df) if tp in dtypes] @@ -128,7 +129,7 @@ def names(df: FrameT) -> Sequence[str]: return self._selector.from_callables(series, names, context=self) - def matches(self: Self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: + def matches(self, pattern: str) -> CompliantSelector[FrameT, SeriesOrExprT]: p = re.compile(pattern) def series(df: FrameT) -> Sequence[SeriesOrExprT]: @@ -142,7 +143,7 @@ def names(df: FrameT) -> Sequence[str]: return self._selector.from_callables(series, names, context=self) - def numeric(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def numeric(self) -> CompliantSelector[FrameT, SeriesOrExprT]: def series(df: FrameT) -> Sequence[SeriesOrExprT]: return [ser for ser, tp in self._iter_columns_dtypes(df) if tp.is_numeric()] @@ -151,23 +152,23 @@ def names(df: FrameT) -> Sequence[str]: return self._selector.from_callables(series, names, context=self) - def categorical(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def categorical(self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).Categorical) - def string(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def string(self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).String) - def boolean(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def boolean(self) -> CompliantSelector[FrameT, SeriesOrExprT]: return self._is_dtype(import_dtypes_module(self._version).Boolean) - def all(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: + def all(self) -> CompliantSelector[FrameT, SeriesOrExprT]: def series(df: FrameT) -> Sequence[SeriesOrExprT]: return list(self._iter_columns(df)) return self._selector.from_callables(series, get_column_names, context=self) def datetime( - self: Self, + self, time_unit: TimeUnit | Iterable[TimeUnit] | None, time_zone: str | timezone | Iterable[str | timezone | None] | None, ) -> CompliantSelector[FrameT, SeriesOrExprT]: @@ -189,8 +190,7 @@ def names(df: FrameT) -> Sequence[str]: class EagerSelectorNamespace( - CompliantSelectorNamespace[DataFrameT, SeriesT], - Protocol[DataFrameT, SeriesT], + CompliantSelectorNamespace[DataFrameT, SeriesT], Protocol[DataFrameT, SeriesT] ): def _iter_schema(self, df: DataFrameT, /) -> Iterator[tuple[str, DType]]: for ser in self._iter_columns(df): @@ -205,8 +205,7 @@ def _iter_columns_dtypes(self, df: DataFrameT, /) -> Iterator[tuple[SeriesT, DTy class LazySelectorNamespace( - CompliantSelectorNamespace[LazyFrameT, ExprT], - Protocol[LazyFrameT, ExprT], + CompliantSelectorNamespace[LazyFrameT, ExprT], Protocol[LazyFrameT, ExprT] ): def _iter_schema(self, df: LazyFrameT) -> Iterator[tuple[str, DType]]: yield from df.schema.items() @@ -253,21 +252,21 @@ def from_callables( def selectors(self) -> CompliantSelectorNamespace[FrameT, SeriesOrExprT]: return self.__narwhals_namespace__().selectors - def _to_expr(self: Self) -> CompliantExpr[FrameT, SeriesOrExprT]: ... + def _to_expr(self) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def _is_selector( - self: Self, other: Self | CompliantExpr[FrameT, SeriesOrExprT] + self, other: Self | CompliantExpr[FrameT, SeriesOrExprT] ) -> TypeIs[CompliantSelector[FrameT, SeriesOrExprT]]: return isinstance(other, type(self)) @overload - def __sub__(self: Self, other: Self) -> Self: ... + def __sub__(self, other: Self) -> Self: ... @overload def __sub__( - self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + self, other: CompliantExpr[FrameT, SeriesOrExprT] ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __sub__( - self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): @@ -285,13 +284,13 @@ def names(df: FrameT) -> Sequence[str]: return self._to_expr() - other @overload - def __or__(self: Self, other: Self) -> Self: ... + def __or__(self, other: Self) -> Self: ... @overload def __or__( - self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + self, other: CompliantExpr[FrameT, SeriesOrExprT] ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __or__( - self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): @@ -310,13 +309,13 @@ def names(df: FrameT) -> Sequence[str]: return self._to_expr() | other @overload - def __and__(self: Self, other: Self) -> Self: ... + def __and__(self, other: Self) -> Self: ... @overload def __and__( - self: Self, other: CompliantExpr[FrameT, SeriesOrExprT] + self, other: CompliantExpr[FrameT, SeriesOrExprT] ) -> CompliantExpr[FrameT, SeriesOrExprT]: ... def __and__( - self: Self, other: SelectorOrExpr[FrameT, SeriesOrExprT] + self, other: SelectorOrExpr[FrameT, SeriesOrExprT] ) -> SelectorOrExpr[FrameT, SeriesOrExprT]: if self._is_selector(other): @@ -331,13 +330,11 @@ def names(df: FrameT) -> Sequence[str]: return self.selectors._selector.from_callables(series, names, context=self) return self._to_expr() & other - def __invert__(self: Self) -> CompliantSelector[FrameT, SeriesOrExprT]: - return self.selectors.all() - self # type: ignore[no-any-return] + def __invert__(self) -> CompliantSelector[FrameT, SeriesOrExprT]: + return self.selectors.all() - self def _eval_lhs_rhs( - df: CompliantDataFrame[Any, Any, Any] | CompliantLazyFrame[Any, Any], - lhs: CompliantExpr[Any, Any], - rhs: CompliantExpr[Any, Any], + df: CompliantFrameAny, lhs: CompliantExprAny, rhs: CompliantExprAny ) -> tuple[Sequence[str], Sequence[str]]: return lhs._evaluate_output_names(df), rhs._evaluate_output_names(df) From f164f24485fd37837ed868c6089c825b68951bdf Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 1 Apr 2025 20:25:12 +0100 Subject: [PATCH 5/5] refactor: Move `Eval*` aliases to `_compliant.typing` --- narwhals/_compliant/__init__.py | 4 ++-- narwhals/_compliant/selectors.py | 7 ++----- narwhals/_compliant/typing.py | 6 ++++++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/narwhals/_compliant/__init__.py b/narwhals/_compliant/__init__.py index 570aaad17b..39da5e7994 100644 --- a/narwhals/_compliant/__init__.py +++ b/narwhals/_compliant/__init__.py @@ -15,8 +15,6 @@ from narwhals._compliant.selectors import CompliantSelector from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals._compliant.selectors import EagerSelectorNamespace -from narwhals._compliant.selectors import EvalNames -from narwhals._compliant.selectors import EvalSeries from narwhals._compliant.selectors import LazySelectorNamespace from narwhals._compliant.series import CompliantSeries from narwhals._compliant.series import EagerSeries @@ -26,6 +24,8 @@ from narwhals._compliant.typing import CompliantSeriesT from narwhals._compliant.typing import EagerDataFrameT from narwhals._compliant.typing import EagerSeriesT +from narwhals._compliant.typing import EvalNames +from narwhals._compliant.typing import EvalSeries from narwhals._compliant.typing import IntoCompliantExpr from narwhals._compliant.typing import NativeFrameT_co from narwhals._compliant.typing import NativeSeriesT_co diff --git a/narwhals/_compliant/selectors.py b/narwhals/_compliant/selectors.py index 37b197f3e0..08a279bab4 100644 --- a/narwhals/_compliant/selectors.py +++ b/narwhals/_compliant/selectors.py @@ -6,7 +6,6 @@ from functools import partial from typing import TYPE_CHECKING from typing import Any -from typing import Callable from typing import Collection from typing import Iterable from typing import Iterator @@ -49,6 +48,8 @@ from narwhals._compliant.typing import CompliantLazyFrameAny from narwhals._compliant.typing import CompliantSeriesAny from narwhals._compliant.typing import CompliantSeriesOrNativeExprAny + from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import EvalSeries from narwhals.dtypes import DType from narwhals.typing import TimeUnit from narwhals.utils import Implementation @@ -59,8 +60,6 @@ "CompliantSelector", "CompliantSelectorNamespace", "EagerSelectorNamespace", - "EvalNames", - "EvalSeries", "LazySelectorNamespace", ] @@ -74,8 +73,6 @@ SelectorOrExpr: TypeAlias = ( "CompliantSelector[FrameT, SeriesOrExprT] | CompliantExpr[FrameT, SeriesOrExprT]" ) -EvalSeries: TypeAlias = Callable[[FrameT], Sequence[SeriesOrExprT]] -EvalNames: TypeAlias = Callable[[FrameT], Sequence[str]] class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]): diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 63e275ed76..4c59c2ebb6 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -30,6 +30,8 @@ "CompliantFrameT", "CompliantLazyFrameT", "CompliantSeriesT", + "EvalNames", + "EvalSeries", "IntoCompliantExpr", "NativeFrameT_co", "NativeSeriesT_co", @@ -107,3 +109,7 @@ AliasNames: TypeAlias = Callable[[Sequence[str]], Sequence[str]] AliasName: TypeAlias = Callable[[str], str] +EvalSeries: TypeAlias = Callable[ + [CompliantFrameT], Sequence[CompliantSeriesOrNativeExprT] +] +EvalNames: TypeAlias = Callable[[CompliantFrameT], Sequence[str]]