Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
cb470b4
refactor: Use `temp.column_name(s)` some more
dangotbanned Oct 1, 2025
23e9d43
fix(typing): Resolve some cases for `flatten_hash_safe`
dangotbanned Oct 1, 2025
f77bb4c
feat(expr-ir): Impl `acero.sort_by`
dangotbanned Oct 2, 2025
36ddce0
test: Port over `is_first_distinct` tests
dangotbanned Oct 2, 2025
0e49f57
chore: Add `Compliant{Expr,Scalar}.is_{first,last}_distinct`
dangotbanned Oct 2, 2025
a5f192c
test: Update to cover `is_last_distinct` as well
dangotbanned Oct 2, 2025
6a1b08a
feat(DRAFT): Initial `is_first_distinct` impl
dangotbanned Oct 2, 2025
1c026bf
test: Port over more cases
dangotbanned Oct 3, 2025
e7e8a04
refactor: Generalize `is_first_distinct` impl
dangotbanned Oct 3, 2025
2d46521
feat: Add `is_last_distinct`
dangotbanned Oct 3, 2025
cfb775d
refactor: Make both `is_*_distinct` methods, aliases
dangotbanned Oct 3, 2025
9db603b
feat: (Properly) add `get_column`, `to_series`
dangotbanned Oct 3, 2025
f8255d3
chore: Add `pc.is_in` wrapper
dangotbanned Oct 3, 2025
6fe2a0a
docs: Add detail to `FunctionFlags.LENGTH_PRESERVING`
dangotbanned Oct 3, 2025
938befb
test: More test porting
dangotbanned Oct 3, 2025
516f4a6
typo
dangotbanned Oct 3, 2025
ead4e62
feat(DRAFT): Some progress on `hashjoin` port
dangotbanned Oct 4, 2025
273bdcc
fix: Correctly pass down join keys
dangotbanned Oct 5, 2025
ce37617
test: Port over inner, left & clean up
dangotbanned Oct 5, 2025
18ef26a
test: Add `test_suffix`
dangotbanned Oct 5, 2025
94baf1e
test: Add `how="cross"` tests
dangotbanned Oct 5, 2025
733b45a
test: Add `how={"anti","semi"}` tests
dangotbanned Oct 5, 2025
ce321e0
test: replace `"antananarivo"`->`"a"`, `"bob"`->`"b"`
dangotbanned Oct 5, 2025
cc0d379
test: Port the other duplicate test
dangotbanned Oct 5, 2025
dd40e3a
test: Make all the xfails more visible
dangotbanned Oct 5, 2025
d1a1785
feat(DRAFT): Initial acero cross-join impl
dangotbanned Oct 5, 2025
77e55b3
refactor: Only expose `acero.join_tables`
dangotbanned Oct 5, 2025
8f7d2f3
chore: Start factoring-out `Table` dependency
dangotbanned Oct 5, 2025
b0c2a4d
Merge branch 'oh-nodes' into expr-ir/acero-order-by
dangotbanned Oct 6, 2025
d42f5de
refactor(typing): Use `IntoExprColumn` some more
dangotbanned Oct 6, 2025
b8a58c1
refactor: Split up `_parse_sort_by`
dangotbanned Oct 6, 2025
05c63fd
Make a start on `DataFrame.filter`
dangotbanned Oct 6, 2025
025213d
fill out slightly more `filter`
dangotbanned Oct 6, 2025
3e94449
get typing working again (kinda)
dangotbanned Oct 6, 2025
a611bc9
feat(DRAFT): Support `filter(list[bool])`
dangotbanned Oct 6, 2025
d514ad0
feat: Support single `Series` as well
dangotbanned Oct 6, 2025
d452920
test: Use `parametrize`
dangotbanned Oct 6, 2025
4c7c23d
feat: Add predicate expansion
dangotbanned Oct 6, 2025
2ebca30
feat(expr-ir): Full `DataFrame.filter` support
dangotbanned Oct 6, 2025
1b66786
test: Merge the anti/semi tests
dangotbanned Oct 6, 2025
fd38911
test: parametrize exception messages
dangotbanned Oct 6, 2025
3537cac
test: relax more error messages
dangotbanned Oct 6, 2025
b5ef86b
typo
dangotbanned Oct 7, 2025
8433b2d
test: Add `test_filter_mask_mixed`
dangotbanned Oct 7, 2025
7668abb
fix: Raise on duplicate column names
dangotbanned Oct 7, 2025
3ca43d1
cov
dangotbanned Oct 7, 2025
0f06479
perf: Avoid multiple collections during cross join
dangotbanned Oct 7, 2025
7e9ee74
test: Stop repeating the same data so many times
dangotbanned Oct 7, 2025
1523dbb
test: Add some cases from polars
dangotbanned Oct 8, 2025
a479f32
fix: typing mypy
dangotbanned Oct 8, 2025
8e840e0
feat(expr-ir): Full-er `DataFrame.filter` support
dangotbanned Oct 8, 2025
af26916
refactor: Simplify the `NonCrossJoinStrategy` split
dangotbanned Oct 8, 2025
6aaf75d
test: Convert raising test into a conformance test
dangotbanned Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions narwhals/_plan/_expr_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,7 @@ def is_column(self, *, allow_aliasing: bool = False) -> bool:

ir = self.expr
return isinstance(ir, Column) and ((self.name == ir.name) or allow_aliasing)


def named_ir(name: str, expr: ExprIRT, /) -> NamedIR[ExprIRT]:
return NamedIR(expr=expr, name=name)
6 changes: 5 additions & 1 deletion narwhals/_plan/_guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from narwhals._plan.compliant.series import CompliantSeries
from narwhals._plan.expr import Expr
from narwhals._plan.series import Series
from narwhals._plan.typing import NativeSeriesT, Seq
from narwhals._plan.typing import IntoExprColumn, NativeSeriesT, Seq
from narwhals.typing import NonNestedLiteral

T = TypeVar("T")
Expand Down Expand Up @@ -67,6 +67,10 @@ def is_series(obj: Series[NativeSeriesT] | Any) -> TypeIs[Series[NativeSeriesT]]
return isinstance(obj, _series().Series)


def is_into_expr_column(obj: Any) -> TypeIs[IntoExprColumn]:
return isinstance(obj, (str, _expr().Expr, _series().Series))


def is_compliant_series(
obj: CompliantSeries[NativeSeriesT] | Any,
) -> TypeIs[CompliantSeries[NativeSeriesT]]:
Expand Down
95 changes: 80 additions & 15 deletions narwhals/_plan/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from itertools import chain
from typing import TYPE_CHECKING

from narwhals._plan._guards import is_expr, is_iterable_reject
from narwhals._plan._guards import is_expr, is_into_expr_column, is_iterable_reject
from narwhals._plan.exceptions import (
invalid_into_expr_error,
is_iterable_pandas_error,
is_iterable_polars_error,
)
from narwhals.dependencies import get_polars, is_pandas_dataframe, is_pandas_series
from narwhals.exceptions import InvalidOperationError

if TYPE_CHECKING:
from collections.abc import Iterator
Expand All @@ -22,7 +23,13 @@
from typing_extensions import TypeAlias, TypeIs

from narwhals._plan.expressions import ExprIR
from narwhals._plan.typing import IntoExpr, IntoExprColumn, OneOrIterable, Seq
from narwhals._plan.typing import (
IntoExpr,
IntoExprColumn,
OneOrIterable,
PartialSeries,
Seq,
)
from narwhals.typing import IntoDType

T = TypeVar("T")
Expand Down Expand Up @@ -85,15 +92,33 @@


def parse_into_expr_ir(
input: IntoExpr, *, str_as_lit: bool = False, dtype: IntoDType | None = None
input: IntoExpr | list[Any],
*,
str_as_lit: bool = False,
list_as_series: PartialSeries | None = None,
dtype: IntoDType | None = None,
) -> ExprIR:
"""Parse a single input into an `ExprIR` node."""
"""Parse a single input into an `ExprIR` node.

Arguments:
input: The input to be parsed as an expression.
str_as_lit: Interpret string input as a string literal. If set to `False` (default),
strings are parsed as column names.
list_as_series: Interpret list input as a Series literal, using the provided constructor.
If set to `None` (default), lists will raise when passed to `lit`.
dtype: If the input is expected to resolve to a literal with a known dtype, pass
this to the `lit` constructor.
"""
from narwhals._plan import col, lit

if is_expr(input):
expr = input
elif isinstance(input, str) and not str_as_lit:
expr = col(input)
elif isinstance(input, list):
if list_as_series is None:
raise TypeError(input)
expr = lit(list_as_series(input))
else:
expr = lit(input, dtype=dtype)
return expr._ir
Expand All @@ -105,50 +130,90 @@ def parse_into_seq_of_expr_ir(
**named_inputs: IntoExpr,
) -> Seq[ExprIR]:
"""Parse variadic inputs into a flat sequence of `ExprIR` nodes."""
return tuple(_parse_into_iter_expr_ir(first_input, *more_inputs, **named_inputs))
return tuple(
_parse_into_iter_expr_ir(
first_input, *more_inputs, _list_as_series=None, **named_inputs
)
)


def parse_predicates_constraints_into_expr_ir(
first_predicate: OneOrIterable[IntoExprColumn] = (),
*more_predicates: IntoExprColumn | _RaisesInvalidIntoExprError,
first_predicate: OneOrIterable[IntoExprColumn] | list[bool] = (),
*more_predicates: IntoExprColumn | list[bool] | _RaisesInvalidIntoExprError,
_list_as_series: PartialSeries | None = None,
**constraints: IntoExpr,
) -> ExprIR:
"""Parse variadic predicates and constraints into an `ExprIR` node.

The result is an AND-reduction of all inputs.
"""
all_predicates = _parse_into_iter_expr_ir(first_predicate, *more_predicates)
all_predicates = _parse_into_iter_expr_ir(
first_predicate, *more_predicates, _list_as_series=_list_as_series
)
if constraints:
chained = chain(all_predicates, _parse_constraints(constraints))
return _combine_predicates(chained)
return _combine_predicates(all_predicates)


def parse_sort_by_into_seq_of_expr_ir(
by: OneOrIterable[IntoExprColumn] = (), *more_by: IntoExprColumn
) -> Seq[ExprIR]:
"""Parse `DataFrame.sort` and `Expr.sort_by` keys into a flat sequence of `ExprIR` nodes."""
return tuple(_parse_sort_by_into_iter_expr_ir(by, more_by))


# TODO @dangotbanned: Review the rejection predicate
# It doesn't cover all length-changing expressions, only aggregations/literals
def _parse_sort_by_into_iter_expr_ir(
by: OneOrIterable[IntoExprColumn], more_by: Iterable[IntoExprColumn]
) -> Iterator[ExprIR]:
for e in _parse_into_iter_expr_ir(by, *more_by):
if e.is_scalar:
msg = f"All expressions sort keys must preserve length, but got:\n{e!r}"
raise InvalidOperationError(msg)
yield e


def _parse_into_iter_expr_ir(
first_input: OneOrIterable[IntoExpr], *more_inputs: IntoExpr, **named_inputs: IntoExpr
first_input: OneOrIterable[IntoExpr],
*more_inputs: IntoExpr | list[Any],
_list_as_series: PartialSeries | None = None,
**named_inputs: IntoExpr,
) -> Iterator[ExprIR]:
if not _is_empty_sequence(first_input):
# NOTE: These need to be separated to introduce an intersection type
# Otherwise, `str | bytes` always passes through typing
if _is_iterable(first_input) and not is_iterable_reject(first_input):
if more_inputs:
if more_inputs and (
_list_as_series is None or not isinstance(first_input, list)
):
raise invalid_into_expr_error(first_input, more_inputs, named_inputs)
# NOTE: Ensures `first_input = [False, True, True] -> lit(Series([False, True, True]))`
elif (
_list_as_series is not None
and isinstance(first_input, list)
and not is_into_expr_column(first_input[0])
):
yield parse_into_expr_ir(first_input, list_as_series=_list_as_series)
else:
yield from _parse_positional_inputs(first_input)
yield from _parse_positional_inputs(first_input, _list_as_series)
else:
yield parse_into_expr_ir(first_input)
yield parse_into_expr_ir(first_input, list_as_series=_list_as_series)
else:
# NOTE: Passthrough case for no inputs - but gets skipped when calling next
yield from ()
if more_inputs:
yield from _parse_positional_inputs(more_inputs)
yield from _parse_positional_inputs(more_inputs, _list_as_series)
if named_inputs:
yield from _parse_named_inputs(named_inputs)


def _parse_positional_inputs(inputs: Iterable[IntoExpr], /) -> Iterator[ExprIR]:
def _parse_positional_inputs(
inputs: Iterable[IntoExpr | list[Any]], /, list_as_series: PartialSeries | None = None
) -> Iterator[ExprIR]:
for into in inputs:
yield parse_into_expr_ir(into)
yield parse_into_expr_ir(into, list_as_series=list_as_series)


def _parse_named_inputs(named_inputs: dict[str, IntoExpr], /) -> Iterator[ExprIR]:
Expand Down
Loading
Loading