Skip to content

Commit

Permalink
fix(backends): preserve order_by position in window function when s…
Browse files Browse the repository at this point in the history
…ubsequent expressions are duplicated (#7943)

## Description of changes

This PR fixes an issue where duplicate window expressions were given the
position of the new (duplicated value)
of the expression, rather than preserving the original order.

This was because we were iterating over the new keys first and putting
those expressions into a `dict`.

BY iterating over the old keys first, we preserve the original order.

## Issues closed

Closes #7940.
  • Loading branch information
cpcloud committed Jan 9, 2024
1 parent a46bd4a commit 89056b9
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
33 changes: 33 additions & 0 deletions ibis/backends/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,3 +1251,36 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df):
)

backend.assert_series_equal(result, expected)


@pytest.mark.notyet(
["pandas", "dask"],
reason="multiple ordering keys in a window function not supported for ranking",
raises=ValueError,
)
@pytest.mark.notyet(
["mssql"],
reason="IS NULL not valid syntax for mssql",
raises=sa.exc.ProgrammingError,
)
@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError)
@pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError)
@pytest.mark.broken(
["pyspark"], reason="pyspark requires CURRENT ROW", raises=PySparkAnalysisException
)
def test_ordering_order(con):
table = ibis.memtable({"bool_col": [True, False, False, None, True]})
window = ibis.window(
order_by=[
ibis.asc(table["bool_col"].isnull()),
ibis.asc(table["bool_col"]),
],
)
expr = table.select(
rank=table["bool_col"].rank().over(window),
bool_col=table["bool_col"],
)

result = con.execute(expr)
value = result.bool_col.loc[result["rank"] == 4].item()
assert pd.isna(value)
9 changes: 8 additions & 1 deletion ibis/expr/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,14 @@ def merge_windows(_, default_frame):
group_by = tuple(toolz.unique(_.frame.group_by + default_frame.group_by))

order_by = {}
for sort_key in _.frame.order_by + default_frame.order_by:
# iterate in the order of the existing keys followed by the new keys
#
# this allows duplicates to be overridden with no effect on the original
# position
#
# see https://github.com/ibis-project/ibis/issues/7940 for how this
# originally manifested
for sort_key in default_frame.order_by + _.frame.order_by:
order_by[sort_key.expr] = sort_key.ascending
order_by = tuple(ops.SortKey(k, v) for k, v in order_by.items())

Expand Down
1 change: 0 additions & 1 deletion ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,6 @@ def bind(table):
)
return expr

op = self.op()
if isinstance(window, bl.WindowBuilder):
if table := an.find_first_base_table(self.op()):
return bind(table)
Expand Down

0 comments on commit 89056b9

Please sign in to comment.