Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
72017e0
Refactored Select decomposition - IR rewrites only - No new task-grap…
rjzamora Apr 14, 2025
a3494b7
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 14, 2025
b4d4507
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 15, 2025
657ebae
improve coverage
rjzamora Apr 15, 2025
121ec5f
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 15, 2025
e7ed92a
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 16, 2025
0238052
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 18, 2025
0c26572
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 22, 2025
51eb142
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 23, 2025
69e6168
address partial code review
rjzamora Apr 23, 2025
2ed1e26
remove _maybe_shuffle and refactor _decompose_expr_node
rjzamora Apr 23, 2025
7f25e4b
fix unique_input_irs
rjzamora Apr 23, 2025
77b372f
tweak comments
rjzamora Apr 23, 2025
9764a65
remove comment
rjzamora Apr 23, 2025
69642c1
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 23, 2025
02343a3
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 24, 2025
36c8551
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 25, 2025
ffa5964
Merge remote-tracking branch 'upstream/branch-25.06' into complex-agg…
rjzamora Apr 28, 2025
45ab83b
high-level notes
rjzamora Apr 28, 2025
a6e8ea6
Merge branch 'branch-25.06' into complex-aggregations-ir
rjzamora Apr 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion python/cudf_polars/cudf_polars/dsl/expressions/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
# TODO: remove need for this
# ruff: noqa: D101
Expand All @@ -18,6 +18,8 @@
if TYPE_CHECKING:
from collections.abc import Mapping

from typing_extensions import Self

from cudf_polars.containers import Column, DataFrame

__all__ = ["AggInfo", "Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"]
Expand Down Expand Up @@ -237,6 +239,24 @@ def collect_agg(self, *, depth: int) -> AggInfo:
"""Collect information about aggregations in groupbys."""
return self.value.collect_agg(depth=depth)

def reconstruct(self, expr: Expr) -> Self:
"""
Rebuild with a new `Expr` value.

Parameters
----------
expr
New `Expr` value

Returns
-------
New `NamedExpr` with `expr` as the underlying expression.
The name of the original `NamedExpr` is preserved.
"""
if expr is self.value:
return self
return type(self)(self.name, expr)


class Col(Expr):
__slots__ = ("name",)
Expand Down
45 changes: 40 additions & 5 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"ConditionalJoin",
"DataFrameScan",
"Distinct",
"Empty",
"ErrorNode",
"Filter",
"GroupBy",
Expand Down Expand Up @@ -1953,12 +1954,18 @@ def do_evaluate(cls, zlice: Zlice | None, *dfs: DataFrame) -> DataFrame:
class HConcat(IR):
"""Concatenate dataframes horizontally."""

__slots__ = ()
_non_child = ("schema",)
__slots__ = ("should_broadcast",)
_non_child = ("schema", "should_broadcast")

def __init__(self, schema: Schema, *children: IR):
def __init__(
self,
schema: Schema,
should_broadcast: bool, # noqa: FBT001
*children: IR,
):
self.schema = schema
self._non_child_args = ()
self.should_broadcast = should_broadcast
self._non_child_args = (should_broadcast,)
self.children = children

@staticmethod
Expand Down Expand Up @@ -1990,8 +1997,19 @@ def _extend_with_nulls(table: plc.Table, *, nrows: int) -> plc.Table:
)

@classmethod
def do_evaluate(cls, *dfs: DataFrame) -> DataFrame:
def do_evaluate(
cls,
should_broadcast: bool, # noqa: FBT001
*dfs: DataFrame,
) -> DataFrame:
"""Evaluate and return a dataframe."""
# Special should_broadcast case.
# Used to recombine decomposed expressions
if should_broadcast:
return DataFrame(
broadcast(*itertools.chain.from_iterable(df.columns for df in dfs))
)

max_rows = max(df.num_rows for df in dfs)
# Horizontal concatenation extends shorter tables with nulls
return DataFrame(
Expand All @@ -2008,3 +2026,20 @@ def do_evaluate(cls, *dfs: DataFrame) -> DataFrame:
)
)
)


class Empty(IR):
"""Represents an empty DataFrame."""

__slots__ = ()
_non_child = ()

def __init__(self) -> None:
self.schema = {}
self._non_child_args = ()
self.children = ()

@classmethod
def do_evaluate(cls) -> DataFrame:
"""Evaluate and return a dataframe."""
return DataFrame([])
6 changes: 5 additions & 1 deletion python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,11 @@ def _(
def _(
node: pl_ir.HConcat, translator: Translator, schema: dict[str, plc.DataType]
) -> ir.IR:
return ir.HConcat(schema, *(translator.translate_ir(n=n) for n in node.inputs))
return ir.HConcat(
schema,
False, # noqa: FBT003
*(translator.translate_ir(n=n) for n in node.inputs),
)


def translate_named_expr(
Expand Down
8 changes: 8 additions & 0 deletions python/cudf_polars/cudf_polars/dsl/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

"""DSL utilities."""

from __future__ import annotations

__all__: list[str] = []
34 changes: 34 additions & 0 deletions python/cudf_polars/cudf_polars/dsl/utils/naming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

"""Name generation utilities."""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Generator, Iterable


__all__ = ["unique_names"]


def unique_names(names: Iterable[str]) -> Generator[str, None, None]:
"""
Generate unique names relative to some known names.

Parameters
----------
names
Names we should be unique with respect to.

Yields
------
Unique names (just using sequence numbers)
"""
prefix = "_" * max(map(len, names))
i = 0
while True:
yield f"{prefix}{i}"
i += 1
Loading
Loading