Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3bfefb0
refactor: Move `CompliantExpr._is_multi_output_named`
dangotbanned Aug 27, 2025
bc6e890
refactor: Move `CompliantExpr._evaluate_aliases`
dangotbanned Aug 27, 2025
1e8a770
refactor(typing): Correctly label `[False Negative]`, document message
dangotbanned Aug 27, 2025
a5f4432
refactor: Label `polars` vs `narwhals`
dangotbanned Aug 27, 2025
bd3f691
refactor: Remove `CompliantDataFrame.aggregate`
dangotbanned Aug 27, 2025
8fa84e8
refactor: Move/remove some `PolarsExpr`
dangotbanned Aug 27, 2025
29a3038
refactor: Remove unused from `Polars*GroupBy`
dangotbanned Aug 27, 2025
2c31971
Merge branch 'main' into trim-compliant
dangotbanned Aug 27, 2025
f727dcf
Merge branch 'main' into trim-compliant
dangotbanned Aug 28, 2025
7eb1a27
Merge remote-tracking branch 'upstream/main' into trim-compliant
dangotbanned Aug 28, 2025
4c24c9e
Merge remote-tracking branch 'upstream/main' into trim-compliant
dangotbanned Aug 28, 2025
60706de
Merge branch 'main' into trim-compliant
dangotbanned Aug 29, 2025
2525fc1
Merge remote-tracking branch 'upstream/main' into trim-compliant
dangotbanned Aug 29, 2025
c67dd85
fix merge conflict
dangotbanned Aug 29, 2025
04dfd11
refactor: Split up `BaseFrame._extract_compliant` (#3054)
dangotbanned Aug 29, 2025
2464524
feat: return `self` from `__narwhals_expr__`
dangotbanned Aug 30, 2025
31fa908
fix(typing): Move annotations that broke protocols
dangotbanned Aug 30, 2025
f76b328
refactor(typing): Minimise temp `PolarsExpr` fix
dangotbanned Aug 30, 2025
b20c1e1
cov
dangotbanned Aug 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ def __narwhals_namespace__(self) -> ArrowNamespace:

return ArrowNamespace(version=self._version)

def __narwhals_expr__(self) -> None: ...

def _reuse_series_extra_kwargs(
self, *, returns_scalar: bool = False
) -> dict[str, Any]:
Expand Down
27 changes: 17 additions & 10 deletions narwhals/_compliant/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,6 @@ def native(self) -> _NativeFrameT:

@property
def schema(self) -> Mapping[str, DType]: ...
def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
"""`select` where all args are aggregations or literals.

(so, no broadcasting is necessary).
"""
...

def collect_schema(self) -> Mapping[str, DType]: ...
def drop(self, columns: Sequence[str], *, strict: bool) -> Self: ...
Expand Down Expand Up @@ -211,9 +205,6 @@ def __getitem__(
MultiColSelector[CompliantSeriesT],
],
) -> Self: ...
def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
# NOTE: Ignore is to avoid an intermittent false positive
return self.select(*exprs) # pyright: ignore[reportArgumentType]

@property
def shape(self) -> tuple[int, int]: ...
Expand Down Expand Up @@ -289,7 +280,15 @@ class CompliantLazyFrame(
Protocol[CompliantExprT_contra, NativeLazyFrameT, ToNarwhalsT_co],
):
def __narwhals_lazyframe__(self) -> Self: ...
# `LazySelectorNamespace._iter_columns` depends
def _iter_columns(self) -> Iterator[Any]: ...
def aggregate(self, *exprs: CompliantExprT_contra) -> Self:
"""`select` where all args are aggregations or literals.

(so, no broadcasting is necessary).
"""
...

def collect(
self, backend: _EagerAllowedImpl | None, **kwargs: Any
) -> CompliantDataFrameAny: ...
Expand Down Expand Up @@ -317,6 +316,12 @@ def __narwhals_namespace__(
def to_narwhals(self) -> DataFrame[NativeDataFrameT]:
return self._version.dataframe(self, level="full")

def aggregate(self, *exprs: EagerExprT) -> Self:
# NOTE: Ignore intermittent [False Negative]
# Argument of type "EagerExprT@EagerDataFrame" cannot be assigned to parameter "exprs" of type "EagerExprT@EagerDataFrame" in function "select"
# Type "EagerExprT@EagerDataFrame" is not assignable to type "EagerExprT@EagerDataFrame"
return self.select(*exprs) # pyright: ignore[reportArgumentType]

def _with_native(
self, df: NativeDataFrameT, *, validate_column_names: bool = True
) -> Self: ...
Expand All @@ -331,7 +336,9 @@ def _evaluate_expr(self, expr: EagerExprT, /) -> EagerSeriesT:
return result[0]

def _evaluate_into_exprs(self, *exprs: EagerExprT) -> Sequence[EagerSeriesT]:
# NOTE: Ignore is to avoid an intermittent false positive
# NOTE: Ignore intermittent [False Negative]
# Argument of type "EagerExprT@EagerDataFrame" cannot be assigned to parameter "expr" of type "EagerExprT@EagerDataFrame" in function "_evaluate_into_expr"
# Type "EagerExprT@EagerDataFrame" is not assignable to type "EagerExprT@EagerDataFrame"
return list(chain.from_iterable(self._evaluate_into_expr(expr) for expr in exprs)) # pyright: ignore[reportArgumentType]

def _evaluate_into_expr(self, expr: EagerExprT, /) -> Sequence[EagerSeriesT]:
Expand Down
55 changes: 24 additions & 31 deletions narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __ne__(self, value: Any, /) -> Self: ... # type: ignore[override]
class CompliantExpr(
CompliantColumn, Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]
):
# NOTE: `narwhals`
_implementation: Implementation
_evaluate_output_names: EvalNames[CompliantFrameT]
_alias_output_names: AliasNames | None
Expand All @@ -89,19 +90,24 @@ class CompliantExpr(
def __call__(
self, df: CompliantFrameT
) -> Sequence[CompliantSeriesOrNativeExprT_co]: ...
def __narwhals_expr__(self) -> None: ...
def __narwhals_expr__(self) -> Self: # pragma: no cover
return self

def __narwhals_namespace__(self) -> CompliantNamespace[CompliantFrameT, Self]: ...
@classmethod
def from_column_indices(
cls, *column_indices: int, context: _LimitedContext
) -> Self: ...
@classmethod
def from_column_names(
cls,
evaluate_column_names: EvalNames[CompliantFrameT],
/,
*,
context: _LimitedContext,
) -> Self: ...
Comment on lines 97 to 108
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm somewhat torn on these CompliantExpr constructors.

I like that all you need to do is implement these two:

  • from_column_indices
  • from_column_names
And then you get these 4 for "free"

class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]):
# NOTE: `narwhals`
_implementation: Implementation
_version: Version
@property
def _expr(self) -> type[CompliantExprT]: ...
# NOTE: `polars`
def all(self) -> CompliantExprT:
return self._expr.from_column_names(get_column_names, context=self)
def col(self, *column_names: str) -> CompliantExprT:
return self._expr.from_column_names(
passthrough_column_names(column_names), context=self
)
def exclude(self, excluded_names: Container[str]) -> CompliantExprT:
return self._expr.from_column_names(
partial(exclude_column_names, names=excluded_names), context=self
)
def nth(self, *column_indices: int) -> CompliantExprT:
return self._expr.from_column_indices(*column_indices, context=self)

What I don't like is that _LimitedContext specifies you need to pass an object with:

  • _implementation: Implementation
  • _version: Version

Recently, this PR removed the requirement of also having _backend_version:

I'd like to continue that trend and work towards removing _implementation as well 😏

I've been wanting to drop down to 1x Implementation per class for a while:

Which would just mean defining it on the class like e.g. Arrow*

class ArrowExpr(EagerExpr["ArrowDataFrame", ArrowSeries]):
_implementation: Implementation = Implementation.PYARROW

If we do that, then:

  1. We don't need it included in any signatures
    i ArrowExpr still requires a sink in __init__ even now 😒

    implementation: Implementation | None = None,
    ) -> None:

  2. *Like classes don't need to branch on Implementation inside methods
    i. We just override things in subclasses

  3. We might be able to do something different for extensions (enh: Implementation for pluginsΒ #3042)
    i. Where Implementation.UNKNOWN is just a constant to satisfy typing atm

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also then make __native_namespace__ a @classmethod 🀯

That doesn't depend on nw.Version, so we don't need to initialize a Compliant object to access it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also then make native_namespace a @classmethod 🀯

sure, if that works, why not

@classmethod
def from_column_indices(
cls, *column_indices: int, context: _LimitedContext
def broadcast(
self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]
) -> Self: ...
@staticmethod
def _eval_names_indices(indices: Sequence[int], /) -> EvalNames[CompliantFrameT]:
Expand All @@ -111,6 +117,7 @@ def fn(df: CompliantFrameT) -> Sequence[str]:

return fn

# NOTE: `polars`
def all(self) -> Self: ...
def any(self) -> Self: ...
def count(self) -> Self: ...
Expand All @@ -137,35 +144,24 @@ def map_batches(
*,
returns_scalar: bool,
) -> Self: ...
def broadcast(
self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]
) -> Self: ...
def _is_multi_output_unnamed(self) -> bool:
"""Return `True` for multi-output aggregations without names.

For example, column `'a'` only appears in the output as a grouping key:

df.group_by('a').agg(nw.all().sum())
@property
def name(self) -> NameNamespace[Self]: ...

It does not get included in:

nw.all().sum().
"""
assert self._metadata is not None # noqa: S101
return self._metadata.expansion_kind.is_multi_unnamed()

def _evaluate_aliases(
self: CompliantExpr[CompliantFrameT, Any], frame: CompliantFrameT, /
) -> Sequence[str]:
names = self._evaluate_output_names(frame)
class ImplExpr(
CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
):
def _evaluate_aliases(self, frame: CompliantFrameT, /) -> Sequence[str]:
# NOTE: Ignore intermittent [False Negative]
# Argument of type "CompliantFrameT@ImplExpr" cannot be assigned to parameter of type "CompliantFrameT@ImplExpr"
# Type "CompliantFrameT@ImplExpr" is not assignable to type "CompliantFrameT@ImplExpr"
names = self._evaluate_output_names(frame) # pyright: ignore[reportArgumentType]
return alias(names) if (alias := self._alias_output_names) else names

@property
def name(self) -> NameNamespace[Self]: ...


class DepthTrackingExpr(
CompliantExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
ImplExpr[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co],
):
_depth: int
Expand Down Expand Up @@ -228,8 +224,6 @@ def __call__(self, df: EagerDataFrameT) -> Sequence[EagerSeriesT]:
def __narwhals_namespace__(
self,
) -> EagerNamespace[EagerDataFrameT, EagerSeriesT, Self, Any, Any]: ...
def __narwhals_expr__(self) -> None: ...

@classmethod
def _from_callable(
cls,
Expand Down Expand Up @@ -888,8 +882,7 @@ def struct(self) -> EagerExprStructNamespace[Self]:

# mypy thinks `NativeExprT` should be covariant, pyright thinks it should be invariant
class LazyExpr( # type: ignore[misc]
CompliantExpr[CompliantLazyFrameT, NativeExprT],
Protocol[CompliantLazyFrameT, NativeExprT],
ImplExpr[CompliantLazyFrameT, NativeExprT], Protocol[CompliantLazyFrameT, NativeExprT]
):
def _with_alias_output_names(self, func: AliasNames | None, /) -> Self: ...
def alias(self, name: str) -> Self:
Expand Down
13 changes: 7 additions & 6 deletions narwhals/_compliant/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
DepthTrackingExprAny,
DepthTrackingExprT_contra,
EagerExprT_contra,
ImplExprT_contra,
NarwhalsAggregation,
)
from narwhals._utils import is_sequence_of, zip_strict

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, Sequence

from narwhals._compliant.expr import CompliantExpr
from narwhals._compliant.expr import ImplExpr


__all__ = ["CompliantGroupBy", "DepthTrackingGroupBy", "EagerGroupBy"]
Expand All @@ -34,7 +35,7 @@


def _evaluate_aliases(
frame: CompliantFrameT, exprs: Iterable[CompliantExpr[CompliantFrameT, Any]], /
frame: CompliantFrameT, exprs: Iterable[ImplExpr[CompliantFrameT, Any]], /
) -> list[str]:
it = (expr._evaluate_aliases(frame) for expr in exprs)
return list(chain.from_iterable(it))
Expand Down Expand Up @@ -67,13 +68,13 @@ def __iter__(self) -> Iterator[tuple[Any, CompliantDataFrameT_co]]: ...


class ParseKeysGroupBy(
CompliantGroupBy[CompliantFrameT, CompliantExprT_contra],
Protocol[CompliantFrameT, CompliantExprT_contra],
CompliantGroupBy[CompliantFrameT, ImplExprT_contra],
Protocol[CompliantFrameT, ImplExprT_contra],
):
def _parse_keys(
self,
compliant_frame: CompliantFrameT,
keys: Sequence[CompliantExprT_contra] | Sequence[str],
keys: Sequence[ImplExprT_contra] | Sequence[str],
) -> tuple[CompliantFrameT, list[str], list[str]]:
if is_sequence_of(keys, str):
keys_str = list(keys)
Expand All @@ -82,7 +83,7 @@ def _parse_keys(

@staticmethod
def _parse_expr_keys(
compliant_frame: CompliantFrameT, keys: Sequence[CompliantExprT_contra]
compliant_frame: CompliantFrameT, keys: Sequence[ImplExprT_contra]
) -> tuple[CompliantFrameT, list[str], list[str]]:
"""Parses key expressions to set up `.agg` operation with correct information.

Expand Down
38 changes: 35 additions & 3 deletions narwhals/_compliant/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
NativeFrameT_co,
NativeSeriesT,
)
from narwhals._expression_parsing import is_expr, is_series
from narwhals._utils import (
exclude_column_names,
get_column_names,
passthrough_column_names,
)
from narwhals.dependencies import is_numpy_array_2d
from narwhals.dependencies import is_numpy_array, is_numpy_array_2d

if TYPE_CHECKING:
from collections.abc import Container, Iterable, Sequence
Expand All @@ -31,12 +32,15 @@
from narwhals._compliant.selectors import CompliantSelectorNamespace
from narwhals._compliant.when_then import CompliantWhen, EagerWhen
from narwhals._utils import Implementation, Version
from narwhals.expr import Expr
from narwhals.series import Series
from narwhals.typing import (
ConcatMethod,
Into1DArray,
IntoDType,
IntoSchema,
NonNestedLiteral,
_1DArray,
_2DArray,
)

Expand All @@ -51,9 +55,24 @@


class CompliantNamespace(Protocol[CompliantFrameT, CompliantExprT]):
# NOTE: `narwhals`
_implementation: Implementation
_version: Version

@property
def _expr(self) -> type[CompliantExprT]: ...
def parse_into_expr(
self, data: Expr | NonNestedLiteral | Any, /, *, str_as_lit: bool
) -> CompliantExprT | NonNestedLiteral:
if is_expr(data):
expr = data._to_compliant_expr(self)
assert isinstance(expr, self._expr) # noqa: S101
return expr
if isinstance(data, str) and not str_as_lit:
return self.col(data)
return data

# NOTE: `polars`
def all(self) -> CompliantExprT:
return self._expr.from_column_names(get_column_names, context=self)

Expand Down Expand Up @@ -93,8 +112,6 @@ def concat_str(
) -> CompliantExprT: ...
@property
def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ...
@property
def _expr(self) -> type[CompliantExprT]: ...
def coalesce(self, *exprs: CompliantExprT) -> CompliantExprT: ...


Expand Down Expand Up @@ -168,6 +185,21 @@ def from_native(
msg = f"Unsupported type: {type(data).__name__!r}"
raise TypeError(msg)

def parse_into_expr(
self,
data: Expr | Series[NativeSeriesT] | _1DArray | NonNestedLiteral,
/,
*,
str_as_lit: bool,
) -> EagerExprT | NonNestedLiteral:
if not (is_series(data) or is_numpy_array(data)):
return super().parse_into_expr(data, str_as_lit=str_as_lit)
return self._expr._from_series(
data._compliant_series
if is_series(data)
else self._series.from_numpy(data, context=self)
)

@overload
def from_numpy(self, data: Into1DArray, /, schema: None = ...) -> EagerSeriesT: ...

Expand Down
10 changes: 4 additions & 6 deletions narwhals/_compliant/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,24 @@


class CompliantSelectorNamespace(Protocol[FrameT, SeriesOrExprT]):
# NOTE: `narwhals`
_implementation: Implementation
_version: Version

@property
def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ...
@classmethod
def from_namespace(cls, context: _LimitedContext, /) -> Self:
obj = cls.__new__(cls)
obj._implementation = context._implementation
obj._version = context._version
return obj

@property
def _selector(self) -> type[CompliantSelector[FrameT, SeriesOrExprT]]: ...

def _iter_columns(self, df: FrameT, /) -> Iterator[SeriesOrExprT]: ...

def _iter_schema(self, df: FrameT, /) -> Iterator[tuple[str, DType]]: ...

def _iter_columns_dtypes(
self, df: FrameT, /
) -> Iterator[tuple[SeriesOrExprT, DType]]: ...

def _iter_columns_names(self, df: FrameT, /) -> Iterator[tuple[SeriesOrExprT, str]]:
yield from zip_strict(self._iter_columns(df), df.columns)

Expand All @@ -93,6 +90,7 @@ def names(df: FrameT) -> Sequence[str]:

return self._selector.from_callables(series, names, context=self)

# NOTE: `polars`
def by_dtype(
self, dtypes: Collection[DType | type[DType]]
) -> CompliantSelector[FrameT, SeriesOrExprT]:
Expand Down
Loading
Loading