Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Here are the top-level functions available in Narwhals.
- col
- concat
- concat_str
- exclude
- from_arrow
- from_dict
- from_native
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from narwhals.functions import col
from narwhals.functions import concat
from narwhals.functions import concat_str
from narwhals.functions import exclude
from narwhals.functions import from_arrow
from narwhals.functions import from_dict
from narwhals.functions import from_numpy
Expand Down Expand Up @@ -123,6 +124,7 @@
"dependencies",
"dtypes",
"exceptions",
"exclude",
"from_arrow",
"from_dict",
"from_native",
Expand Down
30 changes: 30 additions & 0 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Sequence
Expand Down Expand Up @@ -120,6 +121,35 @@ def col(self: Self, *column_names: str) -> ArrowExpr:
*column_names, backend_version=self._backend_version, version=self._version
)

def exclude(self: Self, excluded_names: Container[str]) -> ArrowExpr:
from narwhals._arrow.series import ArrowSeries

def evaluate_output_names(df: ArrowDataFrame) -> Sequence[str]:
return [
column_name
for column_name in df.columns
if column_name not in excluded_names
]

def func(df: ArrowDataFrame) -> list[ArrowSeries]:
return [
ArrowSeries(
df._native_frame[column_name],
name=column_name,
backend_version=df._backend_version,
version=df._version,
)
for column_name in evaluate_output_names(df)
]
Comment on lines +134 to +143
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could tweak ArrowExpr.from_column_names and use it here πŸ€” It might be a refactor follow up

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The refactor would be to accept able callable that returns the names.

@classmethod
def from_column_names(
    cls,
    get_column_names,  # callable
    function_name,
    backend_version,
    version,
):
    def func(df):
        return [... for column_name in get_column_names(df)]

    return cls(
        func,
        function_name=function_name,
        evaluate_output_names=get_column_names
    )

Then exclude and col gets refactored to:

def col(self: Self, *column_names: str):
    return ArrowExpr.from_column_names(
        lambda _: column_names, function_name="col", ...
    )

def exclude(self: Self, *column_names: str):
    def get_column_names(df) -> Sequence[str]:
        exclude_names = set(column_names)
        return [
            column_name
            for column_name in df.columns
            if column_name not in exclude_names
        ]

     return ArrowExpr.from_column_names(
         get_column_names,
         function_name="exclude", ...
     )

I did not want to increase the scope of this PR by changing the signature of from_column_names and changing col. I'm okay with doing a quick follow up.

Copy link
Member

@FBruzzesi FBruzzesi Mar 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @thomasjpfan

The refactor would be to accept able callable that returns the names.

Yes I think in the long term that's desirable and we should aim for that

I did not want to increase the scope of this PR by changing the signature of from_column_names and changing col. I'm okay with doing a quick follow up.

Happy to keep it as follow up


return self._create_expr_from_callable(
func,
depth=0,
function_name="exclude",
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
)

def nth(self: Self, *column_indices: int) -> ArrowExpr:
from narwhals._arrow.expr import ArrowExpr

Expand Down
24 changes: 24 additions & 0 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Sequence
Expand Down Expand Up @@ -67,6 +68,29 @@ def col(self: Self, *column_names: str) -> DaskExpr:
*column_names, backend_version=self._backend_version, version=self._version
)

def exclude(self: Self, excluded_names: Container[str]) -> DaskExpr:
def evaluate_output_names(df: DaskLazyFrame) -> Sequence[str]:
return [
column_name
for column_name in df.columns
if column_name not in excluded_names
]

def func(df: DaskLazyFrame) -> list[dx.Series]:
return [
df._native_frame[column_name] for column_name in evaluate_output_names(df)
]

return DaskExpr(
func,
depth=0,
function_name="exclude",
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
backend_version=self._backend_version,
version=self._version,
)

def nth(self: Self, *column_indices: int) -> DaskExpr:
return DaskExpr.from_column_indices(
*column_indices, backend_version=self._backend_version, version=self._version
Expand Down
23 changes: 23 additions & 0 deletions narwhals/_duckdb/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Literal
from typing import Sequence

Expand Down Expand Up @@ -237,6 +238,28 @@ def col(self: Self, *column_names: str) -> DuckDBExpr:
*column_names, backend_version=self._backend_version, version=self._version
)

def exclude(self: Self, excluded_names: Container[str]) -> DuckDBExpr:
def evaluate_output_names(df: DuckDBLazyFrame) -> Sequence[str]:
return [
column_name
for column_name in df.columns
if column_name not in excluded_names
]

def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
return [
ColumnExpression(column_name) for column_name in evaluate_output_names(df)
]

return DuckDBExpr(
func,
function_name="exclude",
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
backend_version=self._backend_version,
version=self._version,
)

def nth(self: Self, *column_indices: int) -> DuckDBExpr:
return DuckDBExpr.from_column_indices(
*column_indices, backend_version=self._backend_version, version=self._version
Expand Down
28 changes: 28 additions & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Sequence
Expand Down Expand Up @@ -115,6 +116,33 @@ def col(self: Self, *column_names: str) -> PandasLikeExpr:
version=self._version,
)

def exclude(self: Self, excluded_names: Container[str]) -> PandasLikeExpr:
def evaluate_output_names(df: PandasLikeDataFrame) -> Sequence[str]:
return [
column_name
for column_name in df.columns
if column_name not in excluded_names
]

def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [
PandasLikeSeries(
df._native_frame[column_name],
implementation=df._implementation,
backend_version=df._backend_version,
version=df._version,
)
for column_name in evaluate_output_names(df)
]

return self._create_expr_from_callable(
func,
depth=0,
evaluate_output_names=evaluate_output_names,
function_name="exclude",
alias_output_names=None,
)

def nth(self: Self, *column_indices: int) -> PandasLikeExpr:
return PandasLikeExpr.from_column_indices(
*column_indices,
Expand Down
22 changes: 22 additions & 0 deletions narwhals/_spark_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Container
from typing import Iterable
from typing import Literal
from typing import Sequence
Expand Down Expand Up @@ -68,6 +69,27 @@ def col(self: Self, *column_names: str) -> SparkLikeExpr:
implementation=self._implementation,
)

def exclude(self: Self, excluded_names: Container[str]) -> SparkLikeExpr:
def evaluate_output_names(df: SparkLikeLazyFrame) -> Sequence[str]:
return [
column_name
for column_name in df.columns
if column_name not in excluded_names
]

def func(df: SparkLikeLazyFrame) -> list[Column]:
return [df._F.col(column_name) for column_name in evaluate_output_names(df)]

return SparkLikeExpr(
func,
function_name="exclude",
evaluate_output_names=evaluate_output_names,
alias_output_names=None,
backend_version=self._backend_version,
version=self._version,
implementation=self._implementation,
)

def nth(self: Self, *column_indices: int) -> SparkLikeExpr:
return SparkLikeExpr.from_column_indices(
*column_indices,
Expand Down
37 changes: 37 additions & 0 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,43 @@ def func(plx: Any) -> Any:
return Expr(func, ExprMetadata.selector())


def exclude(*names: str | Iterable[str]) -> Expr:
"""Creates an expression that excludes columns by their name(s).

Arguments:
names: Name(s) of the columns to exclude.

Returns:
A new expression.

Examples:
>>> import polars as pl
>>> import narwhals as nw
>>>
>>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]})
>>> nw.from_native(df_native).select(nw.exclude("c", "a"))
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
|Narwhals DataFrame|
|------------------|
| shape: (2, 1) |
| β”Œβ”€β”€β”€β”€β”€β” |
| β”‚ b β”‚ |
| β”‚ --- β”‚ |
| β”‚ i64 β”‚ |
| β•žβ•β•β•β•β•β•‘ |
| β”‚ 3 β”‚ |
| β”‚ 4 β”‚ |
| β””β”€β”€β”€β”€β”€β”˜ |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
exclude_names = frozenset(flatten(names))

def func(plx: Any) -> Any:
return plx.exclude(exclude_names)

return Expr(func, ExprMetadata.selector())


def nth(*indices: int | Sequence[int]) -> Expr:
"""Creates an expression that references one or more columns by their index(es).

Expand Down
13 changes: 13 additions & 0 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,18 @@ def col(*names: str | Iterable[str]) -> Expr:
return _stableify(nw.col(*names))


def exclude(*names: str | Iterable[str]) -> Expr:
"""Creates an expression that excludes columns by their name(s).

Arguments:
names: Name(s) of the columns to exclude.

Returns:
A new expression.
"""
return _stableify(nw.exclude(*names))


def nth(*indices: int | Sequence[int]) -> Expr:
"""Creates an expression that references one or more columns by their index(es).

Expand Down Expand Up @@ -2417,6 +2429,7 @@ def scan_parquet(
"dependencies",
"dtypes",
"exceptions",
"exclude",
"from_arrow",
"from_dict",
"from_native",
Expand Down
32 changes: 32 additions & 0 deletions tests/expr_and_series/exclude_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import pytest

import narwhals.stable.v1 as nw
from tests.utils import assert_equal_data

if TYPE_CHECKING:
from tests.utils import Constructor


@pytest.mark.parametrize(
("exclude_selector", "expected_cols"),
[
(nw.exclude("a"), ["b", "z"]),
(nw.exclude("b", "z"), ["a"]),
(nw.exclude(["a"]), ["b", "z"]),
(nw.exclude(["b", "z"]), ["a"]),
],
)
def test_exclude(
constructor: Constructor, exclude_selector: nw.Expr, expected_cols: list[str]
) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}

df = nw.from_native(constructor(data))
result = df.select(exclude_selector)

expected = {col: data[col] for col in expected_cols}
assert_equal_data(result, expected)
Loading