Skip to content

Commit 6f8b478

Browse files
authored
perf: Ensure only nodes that are not changed are cached in collapse optimizer (pola-rs#17791)
1 parent 9df5929 commit 6f8b478

File tree

3 files changed

+58
-11
lines changed

3 files changed

+58
-11
lines changed

Diff for: crates/polars-plan/src/plans/optimizer/collapse_and_project.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use super::*;
1212
///
1313
/// The schema reported after this optimization is also
1414
pub(super) struct SimpleProjectionAndCollapse {
15-
/// keep track of nodes that are already processed when they
15+
/// Keep track of nodes that are already processed when they
1616
/// can be expensive. Schema materialization can be for instance.
1717
processed: BTreeSet<Node>,
1818
eager: bool,
@@ -39,12 +39,14 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
3939

4040
match lp {
4141
Select { input, expr, .. } => {
42-
if !matches!(lp_arena.get(*input), ExtContext { .. }) && self.processed.insert(node)
42+
if !matches!(lp_arena.get(*input), ExtContext { .. })
43+
&& !self.processed.contains(&node)
4344
{
4445
// First check if we can apply the optimization before we allocate.
4546
if !expr.iter().all(|e| {
4647
matches!(expr_arena.get(e.node()), AExpr::Column(_)) && !e.has_alias()
4748
}) {
49+
self.processed.insert(node);
4850
return None;
4951
}
5052

@@ -59,6 +61,7 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
5961

6062
Some(alp)
6163
} else {
64+
self.processed.insert(node);
6265
None
6366
}
6467
},
@@ -73,7 +76,7 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
7376
}),
7477
// Cleanup projections set in projection pushdown just above caches
7578
// they are not needed.
76-
cache_lp @ Cache { .. } if self.processed.insert(node) => {
79+
cache_lp @ Cache { .. } if self.processed.contains(&node) => {
7780
let cache_schema = cache_lp.schema(lp_arena);
7881
if cache_schema.len() == columns.len()
7982
&& cache_schema.iter_names().zip(columns.iter_names()).all(
@@ -92,6 +95,7 @@ impl OptimizationRule for SimpleProjectionAndCollapse {
9295
if *input_schema.as_ref() == *columns {
9396
Some(other.clone())
9497
} else {
98+
self.processed.insert(node);
9599
None
96100
}
97101
},

Diff for: crates/polars-plan/src/plans/optimizer/projection_pushdown/joins.rs

+15-8
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ pub(super) fn process_asof_join(
181181
expr_arena,
182182
)?;
183183

184-
Ok(resolve_join_suffixes(
184+
resolve_join_suffixes(
185185
input_left,
186186
input_right,
187187
left_on,
@@ -190,7 +190,7 @@ pub(super) fn process_asof_join(
190190
lp_arena,
191191
expr_arena,
192192
&local_projection,
193-
))
193+
)
194194
}
195195

196196
#[allow(clippy::too_many_arguments)]
@@ -365,7 +365,7 @@ pub(super) fn process_join(
365365
expr_arena,
366366
)?;
367367

368-
Ok(resolve_join_suffixes(
368+
resolve_join_suffixes(
369369
input_left,
370370
input_right,
371371
left_on,
@@ -374,7 +374,7 @@ pub(super) fn process_join(
374374
lp_arena,
375375
expr_arena,
376376
&local_projection,
377-
))
377+
)
378378
}
379379

380380
fn process_projection(
@@ -469,13 +469,14 @@ fn resolve_join_suffixes(
469469
lp_arena: &mut Arena<IR>,
470470
expr_arena: &mut Arena<AExpr>,
471471
local_projection: &[ColumnNode],
472-
) -> IR {
472+
) -> PolarsResult<IR> {
473473
let suffix = options.args.suffix();
474474
let alp = IRBuilder::new(input_left, expr_arena, lp_arena)
475475
.join(input_right, left_on, right_on, options.clone())
476476
.build();
477477
let schema_after_join = alp.schema(lp_arena);
478478

479+
let mut all_columns = true;
479480
let projections = local_projection
480481
.iter()
481482
.map(|proj| {
@@ -484,14 +485,20 @@ fn resolve_join_suffixes(
484485
let downstream_name = &name.as_ref()[..name.len() - suffix.len()];
485486
let col = AExpr::Column(ColumnName::from(downstream_name));
486487
let node = expr_arena.add(col);
488+
all_columns = false;
487489
ExprIR::new(node, OutputName::Alias(name.clone()))
488490
} else {
489491
ExprIR::new(proj.0, OutputName::ColumnLhs(name.clone()))
490492
}
491493
})
492494
.collect::<Vec<_>>();
493495

494-
IRBuilder::from_lp(alp, expr_arena, lp_arena)
495-
.project(projections, Default::default())
496-
.build()
496+
let builder = IRBuilder::from_lp(alp, expr_arena, lp_arena);
497+
Ok(if all_columns {
498+
builder
499+
.project_simple(projections.iter().map(|e| e.output_name()))?
500+
.build()
501+
} else {
502+
builder.project(projections, Default::default()).build()
503+
})
497504
}

Diff for: py-polars/tests/unit/test_projections.py

+36
Original file line numberDiff line numberDiff line change
@@ -529,3 +529,39 @@ def test_projection_literal_no_alias_17739() -> None:
529529
assert df.select(pl.lit(False)).select("literal").collect().to_dict(
530530
as_series=False
531531
) == {"literal": [False]}
532+
533+
534+
def test_projections_collapse_17781() -> None:
535+
frame1 = pl.LazyFrame(
536+
{
537+
"index": [0],
538+
"data1": [0],
539+
"data2": [0],
540+
}
541+
)
542+
frame2 = pl.LazyFrame(
543+
{
544+
"index": [0],
545+
"label1": [True],
546+
"label2": [False],
547+
"label3": [False],
548+
},
549+
schema=[
550+
("index", pl.Int64),
551+
("label1", pl.Boolean),
552+
("label2", pl.Boolean),
553+
("label3", pl.Boolean),
554+
],
555+
)
556+
cols = ["index", "data1", "label1", "label2"]
557+
558+
lf = None
559+
for lfj in [frame1, frame2]:
560+
use_columns = [c for c in cols if c in lfj.collect_schema().names()]
561+
lfj = lfj.select(use_columns)
562+
lfj = lfj.select(use_columns)
563+
if lf is None:
564+
lf = lfj
565+
else:
566+
lf = lf.join(lfj, on="index", how="left")
567+
assert "SELECT " not in lf.explain() # type: ignore[union-attr]

0 commit comments

Comments
 (0)