From 6c21874b4d041062e5e23951bcd6402b1c6d4b2d Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 2 Mar 2025 23:07:07 +0100 Subject: [PATCH 1/7] refactor: tweak from_column_names to reuse in exclude --- narwhals/_arrow/expr.py | 13 ++++++------ narwhals/_arrow/namespace.py | 32 +++++++++++------------------- narwhals/_dask/expr.py | 13 +++++++----- narwhals/_dask/namespace.py | 22 ++++++++++---------- narwhals/_duckdb/expr.py | 11 +++++----- narwhals/_duckdb/namespace.py | 21 ++++++++++---------- narwhals/_pandas_like/expr.py | 11 +++++----- narwhals/_pandas_like/namespace.py | 29 ++++++++++----------------- narwhals/_spark_like/expr.py | 11 +++++----- narwhals/_spark_like/namespace.py | 21 ++++++++++---------- 10 files changed, 85 insertions(+), 99 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index a712b458f3..171acb9de4 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -87,12 +87,11 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: @classmethod def from_column_names( cls: type[Self], - *column_names: str, + get_column_names: Callable[[ArrowDataFrame], Sequence[str]], + function_name: str, backend_version: tuple[int, ...], version: Version, ) -> Self: - from narwhals._arrow.series import ArrowSeries - def func(df: ArrowDataFrame) -> list[ArrowSeries]: try: return [ @@ -102,10 +101,10 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: backend_version=df._backend_version, version=df._version, ) - for column_name in column_names + for column_name in get_column_names(df) ] except KeyError as e: - missing_columns = [x for x in column_names if x not in df.columns] + missing_columns = [x for x in get_column_names(df) if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns ) from e @@ -113,8 +112,8 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: return cls( func, depth=0, - function_name="col", - evaluate_output_names=lambda _df: column_names, + function_name=function_name, + evaluate_output_names=get_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index e740b85929..5b8aec6c7a 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -117,37 +117,29 @@ def __init__( def col(self: Self, *column_names: str) -> ArrowExpr: from narwhals._arrow.expr import ArrowExpr + def get_column_names(_: ArrowDataFrame) -> Sequence[str]: + return column_names + return ArrowExpr.from_column_names( - *column_names, backend_version=self._backend_version, version=self._version + get_column_names=get_column_names, + function_name="col", + backend_version=self._backend_version, + version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: - from narwhals._arrow.series import ArrowSeries - - def evaluate_output_names(df: ArrowDataFrame) -> Sequence[str]: + def get_column_names(df: ArrowDataFrame) -> Sequence[str]: return [ column_name for column_name in df.columns if column_name not in excluded_names ] - def func(df: ArrowDataFrame) -> list[ArrowSeries]: - return [ - ArrowSeries( - df._native_frame[column_name], - name=column_name, - backend_version=df._backend_version, - version=df._version, - ) - for column_name in evaluate_output_names(df) - ] - - return self._create_expr_from_callable( - func, - depth=0, + return ArrowExpr.from_column_names( + get_column_names=get_column_names, function_name="exclude", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, + backend_version=self._backend_version, + version=self._version, ) def nth(self: Self, *column_indices: int) -> ArrowExpr: diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index c6a861c3d8..f386118b11 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -90,15 +90,18 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: @classmethod def from_column_names( cls: type[Self], - *column_names: str, + get_column_names: Callable[[DaskLazyFrame], Sequence[str]], + function_name: str, backend_version: tuple[int, ...], version: Version, ) -> Self: def func(df: DaskLazyFrame) -> list[dx.Series]: try: - return [df._native_frame[column_name] for column_name in column_names] + return [ + df._native_frame[column_name] for column_name in get_column_names(df) + ] except KeyError as e: - missing_columns = [x for x in column_names if x not in df.columns] + missing_columns = [x for x in get_column_names(df) if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns, @@ -107,8 +110,8 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: return cls( func, depth=0, - function_name="col", - evaluate_output_names=lambda _df: column_names, + function_name=function_name, + evaluate_output_names=get_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 8d64cf9d11..0bfdf287df 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -67,29 +67,27 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: ) def col(self: Self, *column_names: str) -> DaskExpr: + def get_column_names(_: DaskLazyFrame) -> Sequence[str]: + return column_names + return DaskExpr.from_column_names( - *column_names, backend_version=self._backend_version, version=self._version + get_column_names=get_column_names, + function_name="col", + backend_version=self._backend_version, + version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr: - def evaluate_output_names(df: DaskLazyFrame) -> Sequence[str]: + def get_column_names(df: DaskLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns if column_name not in excluded_names ] - def func(df: DaskLazyFrame) -> list[dx.Series]: - return [ - df._native_frame[column_name] for column_name in evaluate_output_names(df) - ] - - return DaskExpr( - func, - depth=0, + return DaskExpr.from_column_names( + get_column_names=get_column_names, function_name="exclude", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, backend_version=self._backend_version, version=self._version, ) diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 51f4ac6e47..03273558c9 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -79,17 +79,18 @@ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Se @classmethod def from_column_names( cls: type[Self], - *column_names: str, + get_column_names: Callable[[DuckDBLazyFrame], Sequence[str]], + function_name: str, backend_version: tuple[int, ...], version: Version, ) -> Self: - def func(_: DuckDBLazyFrame) -> list[duckdb.Expression]: - return [ColumnExpression(col_name) for col_name in column_names] + def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]: + return [ColumnExpression(col_name) for col_name in get_column_names(df)] return cls( func, - function_name="col", - evaluate_output_names=lambda _df: column_names, + function_name=function_name, + evaluate_output_names=get_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 45c877965e..837be5b476 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -237,28 +237,27 @@ def when(self: Self, predicate: DuckDBExpr) -> DuckDBWhen: ) def col(self: Self, *column_names: str) -> DuckDBExpr: + def get_column_names(_: DuckDBLazyFrame) -> Sequence[str]: + return column_names + return DuckDBExpr.from_column_names( - *column_names, backend_version=self._backend_version, version=self._version + get_column_names=get_column_names, + function_name="col", + backend_version=self._backend_version, + version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr: - def evaluate_output_names(df: DuckDBLazyFrame) -> Sequence[str]: + def get_column_names(df: DuckDBLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns if column_name not in excluded_names ] - def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]: - return [ - ColumnExpression(column_name) for column_name in evaluate_output_names(df) - ] - - return DuckDBExpr( - func, + return DuckDBExpr.from_column_names( + get_column_names=get_column_names, function_name="exclude", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, backend_version=self._backend_version, version=self._version, ) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 857f1afaad..3118fa1a64 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -115,7 +115,8 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: @classmethod def from_column_names( cls: type[Self], - *column_names: str, + get_column_names: Callable[[PandasLikeDataFrame], Sequence[str]], + function_name: str, implementation: Implementation, backend_version: tuple[int, ...], version: Version, @@ -129,10 +130,10 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: backend_version=df._backend_version, version=df._version, ) - for column_name in column_names + for column_name in get_column_names(df) ] except KeyError as e: - missing_columns = [x for x in column_names if x not in df.columns] + missing_columns = [x for x in get_column_names(df) if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns, @@ -141,8 +142,8 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: return cls( func, depth=0, - function_name="col", - evaluate_output_names=lambda _df: column_names, + function_name=function_name, + evaluate_output_names=get_column_names, alias_output_names=None, implementation=implementation, backend_version=backend_version, diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 75b5cec617..0bb2d2b854 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -109,38 +109,31 @@ def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: + def get_column_names(_: PandasLikeDataFrame) -> Sequence[str]: + return column_names + return PandasLikeExpr.from_column_names( - *column_names, + get_column_names=get_column_names, + function_name="col", implementation=self._implementation, backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: - def evaluate_output_names(df: PandasLikeDataFrame) -> Sequence[str]: + def get_column_names(df: PandasLikeDataFrame) -> Sequence[str]: return [ column_name for column_name in df.columns if column_name not in excluded_names ] - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - return [ - PandasLikeSeries( - df._native_frame[column_name], - implementation=df._implementation, - backend_version=df._backend_version, - version=df._version, - ) - for column_name in evaluate_output_names(df) - ] - - return self._create_expr_from_callable( - func, - depth=0, - evaluate_output_names=evaluate_output_names, + return PandasLikeExpr.from_column_names( + get_column_names=get_column_names, function_name="exclude", - alias_output_names=None, + implementation=self._implementation, + backend_version=self._backend_version, + version=self._version, ) def nth(self: Self, *column_indices: int) -> PandasLikeExpr: diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 41bbe46c5e..3e0d3fce36 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -131,18 +131,19 @@ def __narwhals_namespace__(self: Self) -> SparkLikeNamespace: # pragma: no cove @classmethod def from_column_names( cls: type[Self], - *column_names: str, + get_column_names: Callable[[SparkLikeLazyFrame], Sequence[str]], + function_name: str, + implementation: Implementation, backend_version: tuple[int, ...], version: Version, - implementation: Implementation, ) -> Self: def func(df: SparkLikeLazyFrame) -> list[Column]: - return [df._F.col(col_name) for col_name in column_names] + return [df._F.col(col_name) for col_name in get_column_names(df)] return cls( func, - function_name="col", - evaluate_output_names=lambda _df: column_names, + function_name=function_name, + evaluate_output_names=get_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 91cbd4f30c..d208648fff 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -62,32 +62,31 @@ def _all(df: SparkLikeLazyFrame) -> list[Column]: ) def col(self: Self, *column_names: str) -> SparkLikeExpr: + def get_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: + return column_names + return SparkLikeExpr.from_column_names( - *column_names, + get_column_names=get_column_names, + function_name="col", + implementation=self._implementation, backend_version=self._backend_version, version=self._version, - implementation=self._implementation, ) def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr: - def evaluate_output_names(df: SparkLikeLazyFrame) -> Sequence[str]: + def get_column_names(df: SparkLikeLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns if column_name not in excluded_names ] - def func(df: SparkLikeLazyFrame) -> list[Column]: - return [df._F.col(column_name) for column_name in evaluate_output_names(df)] - - return SparkLikeExpr( - func, + return SparkLikeExpr.from_column_names( + get_column_names=get_column_names, function_name="exclude", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, + implementation=self._implementation, backend_version=self._backend_version, version=self._version, - implementation=self._implementation, ) def nth(self: Self, *column_indices: int) -> SparkLikeExpr: From 932c3e1faa2b898fa39be41ffa805aaa9d463b86 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 2 Mar 2025 23:30:44 +0100 Subject: [PATCH 2/7] get_column_names -> evaluate_column_names --- narwhals/_arrow/expr.py | 10 ++++++---- narwhals/_arrow/namespace.py | 8 ++++---- narwhals/_dask/expr.py | 11 +++++++---- narwhals/_dask/namespace.py | 8 ++++---- narwhals/_duckdb/expr.py | 6 +++--- narwhals/_duckdb/namespace.py | 8 ++++---- narwhals/_pandas_like/expr.py | 10 ++++++---- narwhals/_pandas_like/namespace.py | 8 ++++---- narwhals/_spark_like/expr.py | 6 +++--- narwhals/_spark_like/namespace.py | 8 ++++---- 10 files changed, 45 insertions(+), 38 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 171acb9de4..59d3425bec 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -87,7 +87,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: @classmethod def from_column_names( cls: type[Self], - get_column_names: Callable[[ArrowDataFrame], Sequence[str]], + evaluate_column_names: Callable[[ArrowDataFrame], Sequence[str]], function_name: str, backend_version: tuple[int, ...], version: Version, @@ -101,10 +101,12 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: backend_version=df._backend_version, version=df._version, ) - for column_name in get_column_names(df) + for column_name in evaluate_column_names(df) ] except KeyError as e: - missing_columns = [x for x in get_column_names(df) if x not in df.columns] + missing_columns = [ + x for x in evaluate_column_names(df) if x not in df.columns + ] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns ) from e @@ -113,7 +115,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: func, depth=0, function_name=function_name, - evaluate_output_names=get_column_names, + evaluate_output_names=evaluate_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 5b8aec6c7a..fd67514645 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -117,18 +117,18 @@ def __init__( def col(self: Self, *column_names: str) -> ArrowExpr: from narwhals._arrow.expr import ArrowExpr - def get_column_names(_: ArrowDataFrame) -> Sequence[str]: + def evaluate_column_names(_: ArrowDataFrame) -> Sequence[str]: return column_names return ArrowExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: - def get_column_names(df: ArrowDataFrame) -> Sequence[str]: + def evaluate_column_names(df: ArrowDataFrame) -> Sequence[str]: return [ column_name for column_name in df.columns @@ -136,7 +136,7 @@ def get_column_names(df: ArrowDataFrame) -> Sequence[str]: ] return ArrowExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index f386118b11..75a4e729dc 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -90,7 +90,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: @classmethod def from_column_names( cls: type[Self], - get_column_names: Callable[[DaskLazyFrame], Sequence[str]], + evaluate_column_names: Callable[[DaskLazyFrame], Sequence[str]], function_name: str, backend_version: tuple[int, ...], version: Version, @@ -98,10 +98,13 @@ def from_column_names( def func(df: DaskLazyFrame) -> list[dx.Series]: try: return [ - df._native_frame[column_name] for column_name in get_column_names(df) + df._native_frame[column_name] + for column_name in evaluate_column_names(df) ] except KeyError as e: - missing_columns = [x for x in get_column_names(df) if x not in df.columns] + missing_columns = [ + x for x in evaluate_column_names(df) if x not in df.columns + ] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns, @@ -111,7 +114,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: func, depth=0, function_name=function_name, - evaluate_output_names=get_column_names, + evaluate_output_names=evaluate_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 0bfdf287df..50482622d8 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -67,18 +67,18 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: ) def col(self: Self, *column_names: str) -> DaskExpr: - def get_column_names(_: DaskLazyFrame) -> Sequence[str]: + def evaluate_column_names(_: DaskLazyFrame) -> Sequence[str]: return column_names return DaskExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr: - def get_column_names(df: DaskLazyFrame) -> Sequence[str]: + def evaluate_column_names(df: DaskLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns @@ -86,7 +86,7 @@ def get_column_names(df: DaskLazyFrame) -> Sequence[str]: ] return DaskExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 03273558c9..34757a7cc1 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -79,18 +79,18 @@ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Se @classmethod def from_column_names( cls: type[Self], - get_column_names: Callable[[DuckDBLazyFrame], Sequence[str]], + evaluate_column_names: Callable[[DuckDBLazyFrame], Sequence[str]], function_name: str, backend_version: tuple[int, ...], version: Version, ) -> Self: def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]: - return [ColumnExpression(col_name) for col_name in get_column_names(df)] + return [ColumnExpression(col_name) for col_name in evaluate_column_names(df)] return cls( func, function_name=function_name, - evaluate_output_names=get_column_names, + evaluate_output_names=evaluate_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 837be5b476..92f2b4bd24 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -237,18 +237,18 @@ def when(self: Self, predicate: DuckDBExpr) -> DuckDBWhen: ) def col(self: Self, *column_names: str) -> DuckDBExpr: - def get_column_names(_: DuckDBLazyFrame) -> Sequence[str]: + def evaluate_column_names(_: DuckDBLazyFrame) -> Sequence[str]: return column_names return DuckDBExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr: - def get_column_names(df: DuckDBLazyFrame) -> Sequence[str]: + def evaluate_column_names(df: DuckDBLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns @@ -256,7 +256,7 @@ def get_column_names(df: DuckDBLazyFrame) -> Sequence[str]: ] return DuckDBExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 3118fa1a64..a7a484eebf 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -115,7 +115,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: @classmethod def from_column_names( cls: type[Self], - get_column_names: Callable[[PandasLikeDataFrame], Sequence[str]], + evaluate_column_names: Callable[[PandasLikeDataFrame], Sequence[str]], function_name: str, implementation: Implementation, backend_version: tuple[int, ...], @@ -130,10 +130,12 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: backend_version=df._backend_version, version=df._version, ) - for column_name in get_column_names(df) + for column_name in evaluate_column_names(df) ] except KeyError as e: - missing_columns = [x for x in get_column_names(df) if x not in df.columns] + missing_columns = [ + x for x in evaluate_column_names(df) if x not in df.columns + ] raise ColumnNotFoundError.from_missing_and_available_column_names( missing_columns=missing_columns, available_columns=df.columns, @@ -143,7 +145,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: func, depth=0, function_name=function_name, - evaluate_output_names=get_column_names, + evaluate_output_names=evaluate_column_names, alias_output_names=None, implementation=implementation, backend_version=backend_version, diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 0bb2d2b854..16b63b7779 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -109,11 +109,11 @@ def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: - def get_column_names(_: PandasLikeDataFrame) -> Sequence[str]: + def evaluate_column_names(_: PandasLikeDataFrame) -> Sequence[str]: return column_names return PandasLikeExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="col", implementation=self._implementation, backend_version=self._backend_version, @@ -121,7 +121,7 @@ def get_column_names(_: PandasLikeDataFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: - def get_column_names(df: PandasLikeDataFrame) -> Sequence[str]: + def evaluate_column_names(df: PandasLikeDataFrame) -> Sequence[str]: return [ column_name for column_name in df.columns @@ -129,7 +129,7 @@ def get_column_names(df: PandasLikeDataFrame) -> Sequence[str]: ] return PandasLikeExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 3e0d3fce36..07b63b179a 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -131,19 +131,19 @@ def __narwhals_namespace__(self: Self) -> SparkLikeNamespace: # pragma: no cove @classmethod def from_column_names( cls: type[Self], - get_column_names: Callable[[SparkLikeLazyFrame], Sequence[str]], + evaluate_column_names: Callable[[SparkLikeLazyFrame], Sequence[str]], function_name: str, implementation: Implementation, backend_version: tuple[int, ...], version: Version, ) -> Self: def func(df: SparkLikeLazyFrame) -> list[Column]: - return [df._F.col(col_name) for col_name in get_column_names(df)] + return [df._F.col(col_name) for col_name in evaluate_column_names(df)] return cls( func, function_name=function_name, - evaluate_output_names=get_column_names, + evaluate_output_names=evaluate_column_names, alias_output_names=None, backend_version=backend_version, version=version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index d208648fff..71e952d78d 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -62,11 +62,11 @@ def _all(df: SparkLikeLazyFrame) -> list[Column]: ) def col(self: Self, *column_names: str) -> SparkLikeExpr: - def get_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: + def evaluate_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: return column_names return SparkLikeExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="col", implementation=self._implementation, backend_version=self._backend_version, @@ -74,7 +74,7 @@ def get_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr: - def get_column_names(df: SparkLikeLazyFrame) -> Sequence[str]: + def evaluate_column_names(df: SparkLikeLazyFrame) -> Sequence[str]: return [ column_name for column_name in df.columns @@ -82,7 +82,7 @@ def get_column_names(df: SparkLikeLazyFrame) -> Sequence[str]: ] return SparkLikeExpr.from_column_names( - get_column_names=get_column_names, + evaluate_column_names=evaluate_column_names, function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, From 8e41111d15b7bf1d24ba743aae6b9eaf344b7075 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 2 Mar 2025 23:46:48 +0100 Subject: [PATCH 3/7] factor out --- narwhals/_arrow/namespace.py | 11 +++-------- narwhals/_dask/namespace.py | 11 +++-------- narwhals/_duckdb/namespace.py | 16 ++++------------ narwhals/_pandas_like/namespace.py | 11 +++-------- narwhals/_spark_like/namespace.py | 11 +++-------- narwhals/utils.py | 5 +++++ 6 files changed, 21 insertions(+), 44 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index fd67514645..cc00def0ad 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from functools import partial from functools import reduce from typing import TYPE_CHECKING from typing import Any @@ -27,6 +28,7 @@ from narwhals._expression_parsing import combine_evaluate_output_names from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation +from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import import_dtypes_module @@ -128,15 +130,8 @@ def evaluate_column_names(_: ArrowDataFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: - def evaluate_column_names(df: ArrowDataFrame) -> Sequence[str]: - return [ - column_name - for column_name in df.columns - if column_name not in excluded_names - ] - return ArrowExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 50482622d8..962e4052c6 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from functools import partial from functools import reduce from typing import TYPE_CHECKING from typing import Any @@ -25,6 +26,7 @@ from narwhals._expression_parsing import combine_evaluate_output_names from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation +from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names if TYPE_CHECKING: @@ -78,15 +80,8 @@ def evaluate_column_names(_: DaskLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr: - def evaluate_column_names(df: DaskLazyFrame) -> Sequence[str]: - return [ - column_name - for column_name in df.columns - if column_name not in excluded_names - ] - return DaskExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 92f2b4bd24..15ad9614a0 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -1,7 +1,7 @@ from __future__ import annotations -import functools import operator +from functools import partial from functools import reduce from typing import TYPE_CHECKING from typing import Any @@ -26,6 +26,7 @@ from narwhals._expression_parsing import combine_evaluate_output_names from narwhals.typing import CompliantNamespace from narwhals.utils import Implementation +from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names if TYPE_CHECKING: @@ -80,9 +81,7 @@ def concat( if how == "vertical" and not all(x.schema == schema for x in items[1:]): msg = "inputs should all have the same schema" raise TypeError(msg) - res = functools.reduce( - lambda x, y: x.union(y), (item._native_frame for item in items) - ) + res = reduce(lambda x, y: x.union(y), (item._native_frame for item in items)) return first._from_native_frame(res) def concat_str( @@ -248,15 +247,8 @@ def evaluate_column_names(_: DuckDBLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr: - def evaluate_column_names(df: DuckDBLazyFrame) -> Sequence[str]: - return [ - column_name - for column_name in df.columns - if column_name not in excluded_names - ] - return DuckDBExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 16b63b7779..2678746745 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from functools import partial from functools import reduce from typing import TYPE_CHECKING from typing import Any @@ -23,6 +24,7 @@ from narwhals._pandas_like.utils import horizontal_concat from narwhals._pandas_like.utils import vertical_concat from narwhals.typing import CompliantNamespace +from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import import_dtypes_module @@ -121,15 +123,8 @@ def evaluate_column_names(_: PandasLikeDataFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: - def evaluate_column_names(df: PandasLikeDataFrame) -> Sequence[str]: - return [ - column_name - for column_name in df.columns - if column_name not in excluded_names - ] - return PandasLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=partial(exclude_column_names, names=excluded_names), function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 71e952d78d..e742606b7c 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +from functools import partial from functools import reduce from typing import TYPE_CHECKING from typing import Any @@ -19,6 +20,7 @@ from narwhals._spark_like.utils import maybe_evaluate_expr from narwhals._spark_like.utils import narwhals_to_native_dtype from narwhals.typing import CompliantNamespace +from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names if TYPE_CHECKING: @@ -74,15 +76,8 @@ def evaluate_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr: - def evaluate_column_names(df: SparkLikeLazyFrame) -> Sequence[str]: - return [ - column_name - for column_name in df.columns - if column_name not in excluded_names - ] - return SparkLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=partial(exclude_column_names, names=excluded_names), function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/utils.py b/narwhals/utils.py index f5959bb550..3e815f884e 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -9,6 +9,7 @@ from secrets import token_hex from typing import TYPE_CHECKING from typing import Any +from typing import Container from typing import Iterable from typing import Literal from typing import Sequence @@ -1356,6 +1357,10 @@ def get_column_names(frame: _StoresColumns, /) -> Sequence[str]: return frame.columns +def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequence[str]: + return [col_name for col_name in frame.columns if col_name not in names] + + def _hasattr_static(obj: Any, attr: str) -> bool: sentinel = object() return getattr_static(obj, attr, sentinel) is not sentinel From 9d3bf986563e799efe5b6c57192c9a33075e170f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 2 Mar 2025 23:55:18 +0100 Subject: [PATCH 4/7] simplify namespace.all() --- narwhals/_arrow/namespace.py | 18 ++---------------- narwhals/_dask/namespace.py | 10 ++-------- narwhals/_duckdb/namespace.py | 10 ++-------- narwhals/_pandas_like/namespace.py | 15 ++------------- narwhals/_spark_like/namespace.py | 11 +++-------- 5 files changed, 11 insertions(+), 53 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index cc00def0ad..11c409e849 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -164,23 +164,9 @@ def len(self: Self) -> ArrowExpr: ) def all(self: Self) -> ArrowExpr: - from narwhals._arrow.expr import ArrowExpr - from narwhals._arrow.series import ArrowSeries - - return ArrowExpr( - lambda df: [ - ArrowSeries( - df._native_frame[column_name], - name=column_name, - backend_version=df._backend_version, - version=df._version, - ) - for column_name in df.columns - ], - depth=0, + return ArrowExpr.from_column_names( + evaluate_column_names=get_column_names, function_name="all", - evaluate_output_names=get_column_names, - alias_output_names=None, backend_version=self._backend_version, version=self._version, ) diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 962e4052c6..f3ed364dda 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -55,15 +55,9 @@ def __init__( self._version = version def all(self: Self) -> DaskExpr: - def func(df: DaskLazyFrame) -> list[dx.Series]: - return [df._native_frame[column_name] for column_name in df.columns] - - return DaskExpr( - func, - depth=0, + return DaskExpr.from_column_names( + evaluate_column_names=get_column_names, function_name="all", - evaluate_output_names=get_column_names, - alias_output_names=None, backend_version=self._backend_version, version=self._version, ) diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 15ad9614a0..9581572994 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -12,7 +12,6 @@ from duckdb import CaseExpression from duckdb import CoalesceOperator -from duckdb import ColumnExpression from duckdb import FunctionExpression from duckdb.typing import BIGINT from duckdb.typing import VARCHAR @@ -52,14 +51,9 @@ def selectors(self: Self) -> DuckDBSelectorNamespace: return DuckDBSelectorNamespace(self) def all(self: Self) -> DuckDBExpr: - def _all(df: DuckDBLazyFrame) -> list[duckdb.Expression]: - return [ColumnExpression(col_name) for col_name in df.columns] - - return DuckDBExpr( - call=_all, + return DuckDBExpr.from_column_names( + evaluate_column_names=get_column_names, function_name="all", - evaluate_output_names=get_column_names, - alias_output_names=None, backend_version=self._backend_version, version=self._version, ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 2678746745..3c412cf8a2 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -140,20 +140,9 @@ def nth(self: Self, *column_indices: int) -> PandasLikeExpr: ) def all(self: Self) -> PandasLikeExpr: - return PandasLikeExpr( - lambda df: [ - PandasLikeSeries( - df._native_frame[column_name], - implementation=self._implementation, - backend_version=self._backend_version, - version=self._version, - ) - for column_name in df.columns - ], - depth=0, + return PandasLikeExpr.from_column_names( + evaluate_column_names=get_column_names, function_name="all", - evaluate_output_names=get_column_names, - alias_output_names=None, implementation=self._implementation, backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index e742606b7c..670a0f437c 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -50,17 +50,12 @@ def selectors(self: Self) -> SparkLikeSelectorNamespace: return SparkLikeSelectorNamespace(self) def all(self: Self) -> SparkLikeExpr: - def _all(df: SparkLikeLazyFrame) -> list[Column]: - return [df._F.col(col_name) for col_name in df.columns] - - return SparkLikeExpr( - call=_all, + return SparkLikeExpr.from_column_names( + evaluate_column_names=get_column_names, function_name="all", - evaluate_output_names=get_column_names, - alias_output_names=None, + implementation=self._implementation, backend_version=self._backend_version, version=self._version, - implementation=self._implementation, ) def col(self: Self, *column_names: str) -> SparkLikeExpr: From efd5a227a082adebffd11a36fe3149445edacf3a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 3 Mar 2025 00:01:16 +0100 Subject: [PATCH 5/7] partial out of binding --- narwhals/_arrow/namespace.py | 3 ++- narwhals/_dask/namespace.py | 3 ++- narwhals/_duckdb/namespace.py | 3 ++- narwhals/_pandas_like/namespace.py | 3 ++- narwhals/_spark_like/namespace.py | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 11c409e849..5abc300a4d 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -130,8 +130,9 @@ def evaluate_column_names(_: ArrowDataFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: + evaluate_column_names = partial(exclude_column_names, names=excluded_names) return ArrowExpr.from_column_names( - evaluate_column_names=partial(exclude_column_names, names=excluded_names), + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index f3ed364dda..774fc26865 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -74,8 +74,9 @@ def evaluate_column_names(_: DaskLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr: + evaluate_column_names = partial(exclude_column_names, names=excluded_names) return DaskExpr.from_column_names( - evaluate_column_names=partial(exclude_column_names, names=excluded_names), + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 9581572994..83c385a61e 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -241,8 +241,9 @@ def evaluate_column_names(_: DuckDBLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr: + evaluate_column_names = partial(exclude_column_names, names=excluded_names) return DuckDBExpr.from_column_names( - evaluate_column_names=partial(exclude_column_names, names=excluded_names), + evaluate_column_names=evaluate_column_names, function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 3c412cf8a2..b5a29ff4ad 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -123,8 +123,9 @@ def evaluate_column_names(_: PandasLikeDataFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: + evaluate_column_names = partial(exclude_column_names, names=excluded_names) return PandasLikeExpr.from_column_names( - evaluate_column_names=partial(exclude_column_names, names=excluded_names), + evaluate_column_names=evaluate_column_names, function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 670a0f437c..130393144f 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -71,8 +71,9 @@ def evaluate_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: ) def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr: + evaluate_column_names = partial(exclude_column_names, names=excluded_names) return SparkLikeExpr.from_column_names( - evaluate_column_names=partial(exclude_column_names, names=excluded_names), + evaluate_column_names=evaluate_column_names, function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, From bcb5621480bbade1fa2e2956268394f6ba13167c Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:10:51 +0000 Subject: [PATCH 6/7] refactor: Extract repeated inner function to `utils.passthrough_column_names` --- narwhals/_arrow/namespace.py | 6 ++---- narwhals/_dask/namespace.py | 6 ++---- narwhals/_duckdb/namespace.py | 6 ++---- narwhals/_pandas_like/namespace.py | 6 ++---- narwhals/_spark_like/namespace.py | 6 ++---- narwhals/utils.py | 8 ++++++++ 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 5abc300a4d..865c99966f 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -31,6 +31,7 @@ from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import import_dtypes_module +from narwhals.utils import passthrough_column_names if TYPE_CHECKING: from typing import Callable @@ -119,11 +120,8 @@ def __init__( def col(self: Self, *column_names: str) -> ArrowExpr: from narwhals._arrow.expr import ArrowExpr - def evaluate_column_names(_: ArrowDataFrame) -> Sequence[str]: - return column_names - return ArrowExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 774fc26865..13f2382858 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -28,6 +28,7 @@ from narwhals.utils import Implementation from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names +from narwhals.utils import passthrough_column_names if TYPE_CHECKING: from typing_extensions import Self @@ -63,11 +64,8 @@ def all(self: Self) -> DaskExpr: ) def col(self: Self, *column_names: str) -> DaskExpr: - def evaluate_column_names(_: DaskLazyFrame) -> Sequence[str]: - return column_names - return DaskExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 83c385a61e..d95202d3a9 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -27,6 +27,7 @@ from narwhals.utils import Implementation from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names +from narwhals.utils import passthrough_column_names if TYPE_CHECKING: import duckdb @@ -230,11 +231,8 @@ def when(self: Self, predicate: DuckDBExpr) -> DuckDBWhen: ) def col(self: Self, *column_names: str) -> DuckDBExpr: - def evaluate_column_names(_: DuckDBLazyFrame) -> Sequence[str]: - return column_names - return DuckDBExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index b5a29ff4ad..3f5b15883d 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -27,6 +27,7 @@ from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names from narwhals.utils import import_dtypes_module +from narwhals.utils import passthrough_column_names if TYPE_CHECKING: from typing_extensions import Self @@ -111,11 +112,8 @@ def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: - def evaluate_column_names(_: PandasLikeDataFrame) -> Sequence[str]: - return column_names - return PandasLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=passthrough_column_names(column_names), function_name="col", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 130393144f..1263e1f2b3 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -22,6 +22,7 @@ from narwhals.typing import CompliantNamespace from narwhals.utils import exclude_column_names from narwhals.utils import get_column_names +from narwhals.utils import passthrough_column_names if TYPE_CHECKING: from pyspark.sql import Column @@ -59,11 +60,8 @@ def all(self: Self) -> SparkLikeExpr: ) def col(self: Self, *column_names: str) -> SparkLikeExpr: - def evaluate_column_names(_: SparkLikeLazyFrame) -> Sequence[str]: - return column_names - return SparkLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + evaluate_column_names=passthrough_column_names(column_names), function_name="col", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/utils.py b/narwhals/utils.py index 3e815f884e..a75ead5f3d 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -9,6 +9,7 @@ from secrets import token_hex from typing import TYPE_CHECKING from typing import Any +from typing import Callable from typing import Container from typing import Iterable from typing import Literal @@ -1361,6 +1362,13 @@ def exclude_column_names(frame: _StoresColumns, names: Container[str]) -> Sequen return [col_name for col_name in frame.columns if col_name not in names] +def passthrough_column_names(names: Sequence[str], /) -> Callable[[Any], Sequence[str]]: + def fn(_frame: Any, /) -> Sequence[str]: + return names + + return fn + + def _hasattr_static(obj: Any, attr: str) -> bool: sentinel = object() return getattr_static(obj, attr, sentinel) is not sentinel From 6d3f3a7b01f0006a76ff76e509124a3fb0ba9bfc Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:22:50 +0000 Subject: [PATCH 7/7] refactor: reduce number of "column_names" occurences - `*column_names` was already positional-only - The `from_column_names` name is already explicit enough in what the first argument should be IMO --- narwhals/_arrow/expr.py | 2 ++ narwhals/_arrow/namespace.py | 7 +++---- narwhals/_dask/expr.py | 2 ++ narwhals/_dask/namespace.py | 7 +++---- narwhals/_duckdb/expr.py | 2 ++ narwhals/_duckdb/namespace.py | 7 +++---- narwhals/_pandas_like/expr.py | 2 ++ narwhals/_pandas_like/namespace.py | 7 +++---- narwhals/_spark_like/expr.py | 2 ++ narwhals/_spark_like/namespace.py | 7 +++---- 10 files changed, 25 insertions(+), 20 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 59d3425bec..ab1e0e54b0 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -88,6 +88,8 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: def from_column_names( cls: type[Self], evaluate_column_names: Callable[[ArrowDataFrame], Sequence[str]], + /, + *, function_name: str, backend_version: tuple[int, ...], version: Version, diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 865c99966f..ac49998066 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -121,16 +121,15 @@ def col(self: Self, *column_names: str) -> ArrowExpr: from narwhals._arrow.expr import ArrowExpr return ArrowExpr.from_column_names( - evaluate_column_names=passthrough_column_names(column_names), + passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr: - evaluate_column_names = partial(exclude_column_names, names=excluded_names) return ArrowExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, @@ -164,7 +163,7 @@ def len(self: Self) -> ArrowExpr: def all(self: Self) -> ArrowExpr: return ArrowExpr.from_column_names( - evaluate_column_names=get_column_names, + get_column_names, function_name="all", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 75a4e729dc..b1930e5628 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -91,6 +91,8 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: def from_column_names( cls: type[Self], evaluate_column_names: Callable[[DaskLazyFrame], Sequence[str]], + /, + *, function_name: str, backend_version: tuple[int, ...], version: Version, diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 13f2382858..eddbe7925f 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -57,7 +57,7 @@ def __init__( def all(self: Self) -> DaskExpr: return DaskExpr.from_column_names( - evaluate_column_names=get_column_names, + get_column_names, function_name="all", backend_version=self._backend_version, version=self._version, @@ -65,16 +65,15 @@ def all(self: Self) -> DaskExpr: def col(self: Self, *column_names: str) -> DaskExpr: return DaskExpr.from_column_names( - evaluate_column_names=passthrough_column_names(column_names), + passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr: - evaluate_column_names = partial(exclude_column_names, names=excluded_names) return DaskExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 34757a7cc1..2f47942b55 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -80,6 +80,8 @@ def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Se def from_column_names( cls: type[Self], evaluate_column_names: Callable[[DuckDBLazyFrame], Sequence[str]], + /, + *, function_name: str, backend_version: tuple[int, ...], version: Version, diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index d95202d3a9..f8780fbc44 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -53,7 +53,7 @@ def selectors(self: Self) -> DuckDBSelectorNamespace: def all(self: Self) -> DuckDBExpr: return DuckDBExpr.from_column_names( - evaluate_column_names=get_column_names, + get_column_names, function_name="all", backend_version=self._backend_version, version=self._version, @@ -232,16 +232,15 @@ def when(self: Self, predicate: DuckDBExpr) -> DuckDBWhen: def col(self: Self, *column_names: str) -> DuckDBExpr: return DuckDBExpr.from_column_names( - evaluate_column_names=passthrough_column_names(column_names), + passthrough_column_names(column_names), function_name="col", backend_version=self._backend_version, version=self._version, ) def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr: - evaluate_column_names = partial(exclude_column_names, names=excluded_names) return DuckDBExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + partial(exclude_column_names, names=excluded_names), function_name="exclude", backend_version=self._backend_version, version=self._version, diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index a7a484eebf..3caf6c9828 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -116,6 +116,8 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: def from_column_names( cls: type[Self], evaluate_column_names: Callable[[PandasLikeDataFrame], Sequence[str]], + /, + *, function_name: str, implementation: Implementation, backend_version: tuple[int, ...], diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 3f5b15883d..4b4994c6d0 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -113,7 +113,7 @@ def _create_compliant_series(self: Self, value: Any) -> PandasLikeSeries: # --- selection --- def col(self: Self, *column_names: str) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( - evaluate_column_names=passthrough_column_names(column_names), + passthrough_column_names(column_names), function_name="col", implementation=self._implementation, backend_version=self._backend_version, @@ -121,9 +121,8 @@ def col(self: Self, *column_names: str) -> PandasLikeExpr: ) def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr: - evaluate_column_names = partial(exclude_column_names, names=excluded_names) return PandasLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + partial(exclude_column_names, names=excluded_names), function_name="exclude", implementation=self._implementation, backend_version=self._backend_version, @@ -140,7 +139,7 @@ def nth(self: Self, *column_indices: int) -> PandasLikeExpr: def all(self: Self) -> PandasLikeExpr: return PandasLikeExpr.from_column_names( - evaluate_column_names=get_column_names, + get_column_names, function_name="all", implementation=self._implementation, backend_version=self._backend_version, diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 07b63b179a..f2c09ea8b9 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -132,6 +132,8 @@ def __narwhals_namespace__(self: Self) -> SparkLikeNamespace: # pragma: no cove def from_column_names( cls: type[Self], evaluate_column_names: Callable[[SparkLikeLazyFrame], Sequence[str]], + /, + *, function_name: str, implementation: Implementation, backend_version: tuple[int, ...], diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index 1263e1f2b3..91cad2bbb6 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -52,7 +52,7 @@ def selectors(self: Self) -> SparkLikeSelectorNamespace: def all(self: Self) -> SparkLikeExpr: return SparkLikeExpr.from_column_names( - evaluate_column_names=get_column_names, + get_column_names, function_name="all", implementation=self._implementation, backend_version=self._backend_version, @@ -61,7 +61,7 @@ def all(self: Self) -> SparkLikeExpr: def col(self: Self, *column_names: str) -> SparkLikeExpr: return SparkLikeExpr.from_column_names( - evaluate_column_names=passthrough_column_names(column_names), + passthrough_column_names(column_names), function_name="col", implementation=self._implementation, backend_version=self._backend_version, @@ -69,9 +69,8 @@ def col(self: Self, *column_names: str) -> SparkLikeExpr: ) def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr: - evaluate_column_names = partial(exclude_column_names, names=excluded_names) return SparkLikeExpr.from_column_names( - evaluate_column_names=evaluate_column_names, + partial(exclude_column_names, names=excluded_names), function_name="exclude", implementation=self._implementation, backend_version=self._backend_version,