From 65f5c2a96edd12070a1339e7e1ac805f9df7e852 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Wed, 7 Feb 2024 10:57:43 -0900 Subject: [PATCH 1/4] test: parameterize test_array_map/filter more This is in prep for adding in Deferreds --- ibis/backends/tests/test_array.py | 38 ++++++++++++++++++------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index b3e5e99cbaaf..6b6057158d93 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -448,23 +448,26 @@ def test_array_slice(backend, start, stop): param({"a": [[1, 2], [4]]}, {"a": [[2, 3], [5]]}, id="no_nulls"), ], ) +@pytest.mark.parametrize( + "func", + [ + lambda x: x + 1, + functools.partial(lambda x, y: x + y, y=1), + ], +) @pytest.mark.broken( ["risingwave"], raises=AssertionError, reason="TODO(Kexiang): seems a bug", ) -def test_array_map(con, input, output): +def test_array_map(con, input, output, func): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) - expr = t.select(a=t.a.map(lambda x: x + 1)) - result = con.execute(expr.a) - assert frozenset(map(tuple, result.values)) == frozenset( - map(tuple, expected.values) - ) - - expr = t.select(a=t.a.map(functools.partial(lambda x, y: x + y, y=1))) + result = t.a.map(func) + assert result.get_name() == "ArrayMap(a, Add(x, 1))" + expr = t.select(a=result) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( map(tuple, expected.values) @@ -512,17 +515,20 @@ def test_array_map(con, input, output): param({"a": [[1, 2], [4]]}, {"a": [[2], [4]]}, id="no_nulls"), ], ) -def test_array_filter(con, input, output): +@pytest.mark.parametrize( + "predicate", + [ + lambda x: x > 1, + functools.partial(lambda x, y: x > y, y=1), + ], +) +def test_array_filter(con, input, output, predicate): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) - expr = t.select(a=t.a.filter(lambda x: x > 1)) - result = con.execute(expr.a) - assert frozenset(map(tuple, result.values)) == frozenset( - map(tuple, expected.values) - ) - - expr = t.select(a=t.a.filter(functools.partial(lambda x, y: x > y, y=1))) + result = t.a.filter(predicate) + assert result.get_name() == "ArrayFilter(a, Greater(x, 1))" + expr = t.select(a=result) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( map(tuple, expected.values) From e8d7aa3d4ae2b669f928b6dc98ea5731c6e24e55 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Wed, 7 Feb 2024 11:17:34 -0900 Subject: [PATCH 2/4] feat(common): support Deferreds in Array.map() and .filter() fixes https://github.com/ibis-project/ibis/issues/8180 --- ibis/backends/tests/test_array.py | 22 ++++----- ibis/expr/types/arrays.py | 74 ++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 25 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 6b6057158d93..7f67fda8244b 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -449,10 +449,11 @@ def test_array_slice(backend, start, stop): ], ) @pytest.mark.parametrize( - "func", + "func,result_name", [ - lambda x: x + 1, - functools.partial(lambda x, y: x + y, y=1), + (lambda x: x + 1, "ArrayMap(a, Add(x, 1))"), + (functools.partial(lambda x, y: x + y, y=1), "ArrayMap(a, Add(x, 1))"), + (ibis._ + 1, "ArrayMap(a, Add(_, 1))"), ], ) @pytest.mark.broken( @@ -460,13 +461,13 @@ def test_array_slice(backend, start, stop): raises=AssertionError, reason="TODO(Kexiang): seems a bug", ) -def test_array_map(con, input, output, func): +def test_array_map(con, input, output, func, result_name): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) result = t.a.map(func) - assert result.get_name() == "ArrayMap(a, Add(x, 1))" + assert result.get_name() == result_name expr = t.select(a=result) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( @@ -516,18 +517,19 @@ def test_array_map(con, input, output, func): ], ) @pytest.mark.parametrize( - "predicate", + ("predicate,result_name"), [ - lambda x: x > 1, - functools.partial(lambda x, y: x > y, y=1), + (lambda x: x > 1, "ArrayFilter(a, Greater(x, 1))"), + (functools.partial(lambda x, y: x > y, y=1), "ArrayFilter(a, Greater(x, 1))"), + (ibis._ > 1, "ArrayFilter(a, Greater(_, 1))"), ], ) -def test_array_filter(con, input, output, predicate): +def test_array_filter(con, input, output, predicate, result_name): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) result = t.a.filter(predicate) - assert result.get_name() == "ArrayFilter(a, Greater(x, 1))" + assert result.get_name() == result_name expr = t.select(a=result) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 17a1c84e6f3a..add931bfeb2d 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -6,7 +6,7 @@ from public import public import ibis.expr.operations as ops -from ibis.common.deferred import deferrable +from ibis.common.deferred import Deferred, deferrable from ibis.expr.types.generic import Column, Scalar, Value if TYPE_CHECKING: @@ -358,13 +358,13 @@ def join(self, sep: str | ir.StringValue) -> ir.StringValue: """ return ops.ArrayStringJoin(sep, self).to_expr() - def map(self, func: Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: - """Apply a callable `func` to each element of this array expression. + def map(self, func: Deferred | Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: + """Apply a `func` or `Deferred` to each element of this array expression. Parameters ---------- func - Function to apply to each element of this array + Function or `Deferred` to apply to each element of this array. Returns ------- @@ -374,6 +374,7 @@ def map(self, func: Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: Examples -------- >>> import ibis + >>> from ibis import _ >>> ibis.options.interactive = True >>> t = ibis.memtable({"a": [[1, None, 2], [4], []]}) >>> t @@ -386,6 +387,22 @@ def map(self, func: Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: │ [4] │ │ [] │ └──────────────────────┘ + + The most succinct way to use `map` is with `Deferred` expressions: + + >>> t.a.map((_ + 100).cast("float")) + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayMap(a, Cast(Add(_, 100), float64)) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├─────────────────────────────────────────┤ + │ [101.0, None, ... +1] │ + │ [104.0] │ + │ [] │ + └─────────────────────────────────────────┘ + + You can also use `map` with a lambda function: + >>> t.a.map(lambda x: (x + 100).cast("float")) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ArrayMap(a, Cast(Add(x, 100), float64)) ┃ @@ -426,23 +443,28 @@ def map(self, func: Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: │ [] │ └────────────────────────┘ """ - name = next(iter(inspect.signature(func).parameters.keys())) + if isinstance(func, Deferred): + name = "_" + else: + name = next(iter(inspect.signature(func).parameters.keys())) parameter = ops.Argument( name=name, shape=self.op().shape, dtype=self.type().value_type ) - return ops.ArrayMap( - self, param=parameter.param, body=func(parameter.to_expr()) - ).to_expr() + if isinstance(func, Deferred): + body = func.resolve(parameter.to_expr()) + else: + body = func(parameter.to_expr()) + return ops.ArrayMap(self, param=parameter.param, body=body).to_expr() def filter( - self, predicate: Callable[[ir.Value], bool | ir.BooleanValue] + self, predicate: Deferred | Callable[[ir.Value], bool | ir.BooleanValue] ) -> ir.ArrayValue: - """Filter array elements using `predicate`. + """Filter array elements using `predicate` function or `Deferred`. Parameters ---------- predicate - Function to use to filter array elements + Function or `Deferred` to use to filter array elements Returns ------- @@ -452,6 +474,7 @@ def filter( Examples -------- >>> import ibis + >>> from ibis import _ >>> ibis.options.interactive = True >>> t = ibis.memtable({"a": [[1, None, 2], [4], []]}) >>> t @@ -464,6 +487,22 @@ def filter( │ [4] │ │ [] │ └──────────────────────┘ + + The most succinct way to use `filter` is with `Deferred` expressions: + + >>> t.a.filter(_ > 1) + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayFilter(a, Greater(_, 1)) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├───────────────────────────────┤ + │ [2] │ + │ [4] │ + │ [] │ + └───────────────────────────────┘ + + You can also use `map` with a lambda function: + >>> t.a.filter(lambda x: x > 1) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ArrayFilter(a, Greater(x, 1)) ┃ @@ -504,15 +543,20 @@ def filter( │ [] │ └───────────────────────────────┘ """ - name = next(iter(inspect.signature(predicate).parameters.keys())) + if isinstance(predicate, Deferred): + name = "_" + else: + name = next(iter(inspect.signature(predicate).parameters.keys())) parameter = ops.Argument( name=name, shape=self.op().shape, dtype=self.type().value_type, ) - return ops.ArrayFilter( - self, param=parameter.param, body=predicate(parameter.to_expr()) - ).to_expr() + if isinstance(predicate, Deferred): + body = predicate.resolve(parameter.to_expr()) + else: + body = predicate(parameter.to_expr()) + return ops.ArrayFilter(self, param=parameter.param, body=body).to_expr() def contains(self, other: ir.Value) -> ir.BooleanValue: """Return whether the array contains `other`. From 7c616b39a69e58602b812e4058c66785dad4e0f4 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Wed, 7 Feb 2024 11:50:49 -0900 Subject: [PATCH 3/4] chore: apply code review suggestions --- ibis/backends/tests/test_array.py | 28 ++++++++++++---------------- ibis/expr/types/arrays.py | 4 ++-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 7f67fda8244b..f67661e54396 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -449,11 +449,11 @@ def test_array_slice(backend, start, stop): ], ) @pytest.mark.parametrize( - "func,result_name", + "func", [ - (lambda x: x + 1, "ArrayMap(a, Add(x, 1))"), - (functools.partial(lambda x, y: x + y, y=1), "ArrayMap(a, Add(x, 1))"), - (ibis._ + 1, "ArrayMap(a, Add(_, 1))"), + lambda x: x + 1, + functools.partial(lambda x, y: x + y, y=1), + ibis._ + 1, ], ) @pytest.mark.broken( @@ -461,14 +461,12 @@ def test_array_slice(backend, start, stop): raises=AssertionError, reason="TODO(Kexiang): seems a bug", ) -def test_array_map(con, input, output, func, result_name): +def test_array_map(con, input, output, func): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) - result = t.a.map(func) - assert result.get_name() == result_name - expr = t.select(a=result) + expr = t.select(a=t.a.map(func)) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( map(tuple, expected.values) @@ -517,20 +515,18 @@ def test_array_map(con, input, output, func, result_name): ], ) @pytest.mark.parametrize( - ("predicate,result_name"), + "predicate", [ - (lambda x: x > 1, "ArrayFilter(a, Greater(x, 1))"), - (functools.partial(lambda x, y: x > y, y=1), "ArrayFilter(a, Greater(x, 1))"), - (ibis._ > 1, "ArrayFilter(a, Greater(_, 1))"), + lambda x: x > 1, + functools.partial(lambda x, y: x > y, y=1), + ibis._ > 1, ], ) -def test_array_filter(con, input, output, predicate, result_name): +def test_array_filter(con, input, output, predicate): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.Series(output["a"]) - result = t.a.filter(predicate) - assert result.get_name() == result_name - expr = t.select(a=result) + expr = t.select(a=t.a.filter(predicate)) result = con.execute(expr.a) assert frozenset(map(tuple, result.values)) == frozenset( map(tuple, expected.values) diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index add931bfeb2d..6518b5372827 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -444,7 +444,7 @@ def map(self, func: Deferred | Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: └────────────────────────┘ """ if isinstance(func, Deferred): - name = "_" + name = str(func) else: name = next(iter(inspect.signature(func).parameters.keys())) parameter = ops.Argument( @@ -544,7 +544,7 @@ def filter( └───────────────────────────────┘ """ if isinstance(predicate, Deferred): - name = "_" + name = str(predicate) else: name = next(iter(inspect.signature(predicate).parameters.keys())) parameter = ops.Argument( From 31bb0dd18046f9404b766d48b349ca73aacd53ba Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 8 Feb 2024 07:52:36 -0500 Subject: [PATCH 4/4] chore: bring back singular parameter name for deferred inputs to array map and filter --- ibis/expr/types/arrays.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 6518b5372827..add931bfeb2d 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -444,7 +444,7 @@ def map(self, func: Deferred | Callable[[ir.Value], ir.Value]) -> ir.ArrayValue: └────────────────────────┘ """ if isinstance(func, Deferred): - name = str(func) + name = "_" else: name = next(iter(inspect.signature(func).parameters.keys())) parameter = ops.Argument( @@ -544,7 +544,7 @@ def filter( └───────────────────────────────┘ """ if isinstance(predicate, Deferred): - name = str(predicate) + name = "_" else: name = next(iter(inspect.signature(predicate).parameters.keys())) parameter = ops.Argument(