-
Notifications
You must be signed in to change notification settings - Fork 7k
[Data] - Unify Project Operator to use Expressions #57076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
fb4cb18
f99ae1c
275d919
4c273e8
7aed3af
40f724e
c879ad6
c140ad7
f35cfd0
30a7ab5
6a626e8
f1b5d97
73cb01b
dbf609a
73115b2
7120d7f
4ec2d14
15bbe1e
762f27f
a142194
f488532
5ac304c
282c3d5
eb52554
ebe2441
5f814f9
fb9d380
f46a40f
cd84a20
8867156
c675dd6
aa2a9b5
29feb0f
638b9c6
62f354b
b869cfe
3df14b3
6ded04a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -160,6 +160,24 @@ def _table_from_pydict(columns: Dict[str, List[Any]]) -> Block: | |
| @staticmethod | ||
| def _combine_tables(tables: List[Block]) -> Block: | ||
| if len(tables) > 1: | ||
| # Check if we have 0-column tables to avoid losing rows during concat | ||
goutamvenkat-anyscale marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # PyArrow's concat on 0-column tables returns a 0-row table | ||
| if all(table.num_columns == 0 for table in tables): | ||
goutamvenkat-anyscale marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # Add stub column to preserve rows during concatenation | ||
| import pyarrow as pa | ||
|
|
||
| tables_with_stub = [] | ||
| for table in tables: | ||
| if table.num_rows > 0: | ||
| table = table.append_column( | ||
| "__concat_stub", pa.nulls(table.num_rows) | ||
| ) | ||
| tables_with_stub.append(table) | ||
| result = transform_pyarrow.concat(tables_with_stub, promote_types=True) | ||
| # Remove stub column after concatenation | ||
| if result.num_columns > 0: | ||
| result = result.select([]) | ||
| return result | ||
cursor[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return transform_pyarrow.concat(tables, promote_types=True) | ||
| else: | ||
| return tables[0] | ||
|
|
@@ -221,7 +239,7 @@ def fill_column(self, name: str, value: Any) -> Block: | |
|
|
||
| array = pyarrow.nulls(len(self._table), type=type) | ||
| array = pc.fill_null(array, value) | ||
| return self._table.append_column(name, array) | ||
| return self.upsert_column(name, array) | ||
goutamvenkat-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
goutamvenkat-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @classmethod | ||
| def from_bytes(cls, data: bytes) -> "ArrowBlockAccessor": | ||
|
|
@@ -321,9 +339,8 @@ def to_arrow(self) -> "pyarrow.Table": | |
| return self._table | ||
|
|
||
| def num_rows(self) -> int: | ||
| # Arrow may represent an empty table via an N > 0 row, 0-column table, e.g. when | ||
| # slicing an empty table, so we return 0 if num_columns == 0. | ||
| return self._table.num_rows if self._table.num_columns > 0 else 0 | ||
| # Arrow may represent an empty table via an N > 0 row, 0-column table | ||
| return self._table.num_rows | ||
|
||
|
|
||
| def size_bytes(self) -> int: | ||
| return self._table.nbytes | ||
|
|
@@ -469,7 +486,9 @@ def filter(self, predicate_expr: "Expr") -> "pyarrow.Table": | |
| if self._table.num_rows == 0: | ||
| return self._table | ||
|
|
||
| from ray.data._expression_evaluator import eval_expr | ||
| from ray.data._internal.planner.plan_expression.expression_evaluator import ( | ||
| eval_expr, | ||
| ) | ||
|
|
||
| # Evaluate the expression to get a boolean mask | ||
| mask = eval_expr(predicate_expr, self._table) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.