From 6b3719a0e15916aca80cd65e7d73def29c0d7426 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 12 Jul 2024 16:46:54 -0500 Subject: [PATCH] ISLE: Make block codegen non-recursive (#8935) (#8947) * ISLE: Make block codegen non-recursive In #8919 we learned that `Codegen::emit_block` can overrun the stack when generating Rust source for deeply-nested blocks. Stack usage can be worse than one might expect because ISLE is called from a build script, and those are always built in debug mode. This commit avoids the problem by using an explicit heap-allocated stack instead of recursion. I recommend turning on the "ignore whitespace" option in your diff tool of choice when reviewing this commit. The amount of stack space that this recursive function uses is affected by the version of rustc used to compile it, and the amount of stack space available depends on the host platform. As a result, this was only observed on Windows with a recent nightly version of rustc, but it could eventually affect other platforms or compiler versions, especially as ISLE rules grow in complexity. Note that there are several other places in ISLE which recurse over the user-provided rules. This commit does not change those, but they could also become a problem in the future. * Review comments: make stack manipulation local to emit_block Co-authored-by: Jamey Sharp --- cranelift/isle/isle/src/codegen.rs | 381 ++++++++++++++++------------- 1 file changed, 206 insertions(+), 175 deletions(-) diff --git a/cranelift/isle/isle/src/codegen.rs b/cranelift/isle/isle/src/codegen.rs index 5230d96e07a1..7deb391b12fa 100644 --- a/cranelift/isle/isle/src/codegen.rs +++ b/cranelift/isle/isle/src/codegen.rs @@ -5,6 +5,7 @@ use crate::serialize::{Block, ControlFlow, EvalStep, MatchArm}; use crate::stablemapset::StableSet; use crate::trie_again::{Binding, BindingId, Constraint, RuleSet}; use std::fmt::Write; +use std::slice::Iter; /// Options for code generation. #[derive(Clone, Debug, Default)] @@ -31,6 +32,11 @@ struct Codegen<'a> { terms: &'a [(TermId, RuleSet)], } +enum Nested<'a> { + Cases(Iter<'a, EvalStep>), + Arms(BindingId, Iter<'a, MatchArm>), +} + struct BodyContext<'a, W> { out: &'a mut W, ruleset: &'a RuleSet, @@ -60,7 +66,10 @@ impl<'a, W: Write> BodyContext<'a, W> { writeln!(self.out, " {{") } - fn end_block(&mut self, scope: StableSet) -> std::fmt::Result { + fn end_block(&mut self, last_line: &str, scope: StableSet) -> std::fmt::Result { + if !last_line.is_empty() { + writeln!(self.out, "{}{}", &self.indent, last_line)?; + } self.is_bound = scope; self.end_block_without_newline()?; writeln!(self.out) @@ -433,37 +442,29 @@ impl Length for ContextIterWrapper {{ ReturnKind::Plain => write!(ctx.out, "{}", ret)?, }; - let scope = ctx.enter_scope(); - ctx.begin_block()?; - - self.emit_block(&mut ctx, &root, sig.ret_kind)?; - - match (sig.ret_kind, root.steps.last()) { - (ReturnKind::Iterator, _) => { - writeln!( - ctx.out, - "{}return;", - &ctx.indent - )?; - } - (_, Some(EvalStep { check: ControlFlow::Return { .. }, .. })) => { - // If there's an outermost fallback, no need for another `return` statement. - } - (ReturnKind::Option, _) => { - writeln!(ctx.out, "{}None", &ctx.indent)? - } - (ReturnKind::Plain, _) => { - writeln!(ctx.out, - "unreachable!(\"no rule matched for term {{}} at {{}}; should it be partial?\", {:?}, {:?})", - term_name, - termdata - .decl_pos - .pretty_print_line(&self.typeenv.filenames[..]) - )? - } + let last_expr = if let Some(EvalStep { + check: ControlFlow::Return { .. }, + .. + }) = root.steps.last() + { + // If there's an outermost fallback, no need for another `return` statement. + String::new() + } else { + match sig.ret_kind { + ReturnKind::Iterator => String::new(), + ReturnKind::Option => "None".to_string(), + ReturnKind::Plain => format!( + "unreachable!(\"no rule matched for term {{}} at {{}}; should it be partial?\", {:?}, {:?})", + term_name, + termdata + .decl_pos + .pretty_print_line(&self.typeenv.filenames[..]) + ), } + }; - ctx.end_block(scope)?; + let scope = ctx.enter_scope(); + self.emit_block(&mut ctx, &root, sig.ret_kind, &last_expr, scope)?; } Ok(()) } @@ -475,12 +476,7 @@ impl Length for ContextIterWrapper {{ } } - fn emit_block( - &self, - ctx: &mut BodyContext, - block: &Block, - ret_kind: ReturnKind, - ) -> std::fmt::Result { + fn validate_block(ret_kind: ReturnKind, block: &Block) -> Nested { if !matches!(ret_kind, ReturnKind::Iterator) { // Loops are only allowed if we're returning an iterator. assert!(!block @@ -499,167 +495,202 @@ impl Length for ContextIterWrapper {{ } } - for case in block.steps.iter() { - for &expr in case.bind_order.iter() { - let iter_return = match &ctx.ruleset.bindings[expr.index()] { - Binding::Extractor { term, .. } => { - let termdata = &self.termenv.terms[term.index()]; - let sig = termdata.extractor_sig(self.typeenv).unwrap(); - if sig.ret_kind == ReturnKind::Iterator { - if termdata.has_external_extractor() { - Some(format!("C::{}_returns", sig.func_name)) - } else { - Some(format!("ContextIterWrapper::, _>")) + Nested::Cases(block.steps.iter()) + } + + fn emit_block( + &self, + ctx: &mut BodyContext, + block: &Block, + ret_kind: ReturnKind, + last_expr: &str, + scope: StableSet, + ) -> std::fmt::Result { + let mut stack = Vec::new(); + ctx.begin_block()?; + stack.push((Self::validate_block(ret_kind, block), last_expr, scope)); + + while let Some((mut nested, last_line, scope)) = stack.pop() { + match &mut nested { + Nested::Cases(cases) => { + let Some(case) = cases.next() else { + ctx.end_block(last_line, scope)?; + continue; + }; + // Iterator isn't done, put it back on the stack. + stack.push((nested, last_line, scope)); + + for &expr in case.bind_order.iter() { + let iter_return = match &ctx.ruleset.bindings[expr.index()] { + Binding::Extractor { term, .. } => { + let termdata = &self.termenv.terms[term.index()]; + let sig = termdata.extractor_sig(self.typeenv).unwrap(); + if sig.ret_kind == ReturnKind::Iterator { + if termdata.has_external_extractor() { + Some(format!("C::{}_returns", sig.func_name)) + } else { + Some(format!("ContextIterWrapper::, _>")) + } + } else { + None + } + } + Binding::Constructor { term, .. } => { + let termdata = &self.termenv.terms[term.index()]; + let sig = termdata.constructor_sig(self.typeenv).unwrap(); + if sig.ret_kind == ReturnKind::Iterator { + if termdata.has_external_constructor() { + Some(format!("C::{}_returns", sig.func_name)) + } else { + Some(format!("ContextIterWrapper::, _>")) + } + } else { + None + } } + _ => None, + }; + if let Some(ty) = iter_return { + writeln!( + ctx.out, + "{}let mut v{} = {}::default();", + &ctx.indent, + expr.index(), + ty + )?; + write!(ctx.out, "{}", &ctx.indent)?; } else { - None + write!(ctx.out, "{}let v{} = ", &ctx.indent, expr.index())?; } + self.emit_expr(ctx, expr)?; + writeln!(ctx.out, ";")?; + ctx.is_bound.insert(expr); } - Binding::Constructor { term, .. } => { - let termdata = &self.termenv.terms[term.index()]; - let sig = termdata.constructor_sig(self.typeenv).unwrap(); - if sig.ret_kind == ReturnKind::Iterator { - if termdata.has_external_constructor() { - Some(format!("C::{}_returns", sig.func_name)) - } else { - Some(format!("ContextIterWrapper::, _>")) + + match &case.check { + // Use a shorthand notation if there's only one match arm. + ControlFlow::Match { source, arms } if arms.len() == 1 => { + let arm = &arms[0]; + let scope = ctx.enter_scope(); + match arm.constraint { + Constraint::ConstInt { .. } | Constraint::ConstPrim { .. } => { + write!(ctx.out, "{}if ", &ctx.indent)?; + self.emit_expr(ctx, *source)?; + write!(ctx.out, " == ")?; + self.emit_constraint(ctx, *source, arm)?; + } + Constraint::Variant { .. } | Constraint::Some => { + write!(ctx.out, "{}if let ", &ctx.indent)?; + self.emit_constraint(ctx, *source, arm)?; + write!(ctx.out, " = ")?; + self.emit_source(ctx, *source, arm.constraint)?; + } } - } else { - None + ctx.begin_block()?; + stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); } - } - _ => None, - }; - if let Some(ty) = iter_return { - writeln!( - ctx.out, - "{}let mut v{} = {}::default();", - &ctx.indent, - expr.index(), - ty - )?; - write!(ctx.out, "{}", &ctx.indent)?; - } else { - write!(ctx.out, "{}let v{} = ", &ctx.indent, expr.index())?; - } - self.emit_expr(ctx, expr)?; - writeln!(ctx.out, ";")?; - ctx.is_bound.insert(expr); - } - match &case.check { - // Use a shorthand notation if there's only one match arm. - ControlFlow::Match { source, arms } if arms.len() == 1 => { - let arm = &arms[0]; - let scope = ctx.enter_scope(); - match arm.constraint { - Constraint::ConstInt { .. } | Constraint::ConstPrim { .. } => { + ControlFlow::Match { source, arms } => { + let scope = ctx.enter_scope(); + write!(ctx.out, "{}match ", &ctx.indent)?; + self.emit_source(ctx, *source, arms[0].constraint)?; + ctx.begin_block()?; + + // Always add a catchall arm, because we + // don't do exhaustiveness checking on the + // match arms. + stack.push((Nested::Arms(*source, arms.iter()), "_ => {}", scope)); + } + + ControlFlow::Equal { a, b, body } => { + let scope = ctx.enter_scope(); write!(ctx.out, "{}if ", &ctx.indent)?; - self.emit_expr(ctx, *source)?; + self.emit_expr(ctx, *a)?; write!(ctx.out, " == ")?; - self.emit_constraint(ctx, *source, arm)?; + self.emit_expr(ctx, *b)?; + ctx.begin_block()?; + stack.push((Self::validate_block(ret_kind, body), "", scope)); } - Constraint::Variant { .. } | Constraint::Some => { - write!(ctx.out, "{}if let ", &ctx.indent)?; - self.emit_constraint(ctx, *source, arm)?; - write!(ctx.out, " = ")?; - self.emit_source(ctx, *source, arm.constraint)?; - } - } - ctx.begin_block()?; - self.emit_block(ctx, &arm.body, ret_kind)?; - ctx.end_block(scope)?; - } - ControlFlow::Match { source, arms } => { - let scope = ctx.enter_scope(); - write!(ctx.out, "{}match ", &ctx.indent)?; - self.emit_source(ctx, *source, arms[0].constraint)?; - ctx.begin_block()?; - for arm in arms.iter() { - let scope = ctx.enter_scope(); - write!(ctx.out, "{}", &ctx.indent)?; - self.emit_constraint(ctx, *source, arm)?; - write!(ctx.out, " =>")?; - ctx.begin_block()?; - self.emit_block(ctx, &arm.body, ret_kind)?; - ctx.end_block(scope)?; - } - // Always add a catchall, because we don't do exhaustiveness checking on the - // match arms. - writeln!(ctx.out, "{}_ => {{}}", &ctx.indent)?; - ctx.end_block(scope)?; - } + ControlFlow::Loop { result, body } => { + let source = match &ctx.ruleset.bindings[result.index()] { + Binding::Iterator { source } => source, + _ => unreachable!("Loop from a non-Iterator"), + }; + let scope = ctx.enter_scope(); - ControlFlow::Equal { a, b, body } => { - let scope = ctx.enter_scope(); - write!(ctx.out, "{}if ", &ctx.indent)?; - self.emit_expr(ctx, *a)?; - write!(ctx.out, " == ")?; - self.emit_expr(ctx, *b)?; - ctx.begin_block()?; - self.emit_block(ctx, body, ret_kind)?; - ctx.end_block(scope)?; - } - - ControlFlow::Loop { result, body } => { - let source = match &ctx.ruleset.bindings[result.index()] { - Binding::Iterator { source } => source, - _ => unreachable!("Loop from a non-Iterator"), - }; - let scope = ctx.enter_scope(); + writeln!( + ctx.out, + "{}let mut v{} = v{}.into_context_iter();", + &ctx.indent, + source.index(), + source.index(), + )?; - writeln!( - ctx.out, - "{}let mut v{} = v{}.into_context_iter();", - &ctx.indent, - source.index(), - source.index(), - )?; - - write!( - ctx.out, - "{}while let Some(v{}) = v{}.next(ctx)", - &ctx.indent, - result.index(), - source.index() - )?; - ctx.is_bound.insert(*result); - ctx.begin_block()?; - self.emit_block(ctx, body, ret_kind)?; - ctx.end_block(scope)?; - } + write!( + ctx.out, + "{}while let Some(v{}) = v{}.next(ctx)", + &ctx.indent, + result.index(), + source.index() + )?; + ctx.is_bound.insert(*result); + ctx.begin_block()?; + stack.push((Self::validate_block(ret_kind, body), "", scope)); + } - &ControlFlow::Return { pos, result } => { - writeln!( - ctx.out, - "{}// Rule at {}.", - &ctx.indent, - pos.pretty_print_line(&self.typeenv.filenames) - )?; - write!(ctx.out, "{}", &ctx.indent)?; - match ret_kind { - ReturnKind::Plain | ReturnKind::Option => write!(ctx.out, "return ")?, - ReturnKind::Iterator => write!(ctx.out, "returns.extend(Some(")?, - } - self.emit_expr(ctx, result)?; - if ctx.is_ref.contains(&result) { - write!(ctx.out, ".clone()")?; - } - match ret_kind { - ReturnKind::Plain | ReturnKind::Option => writeln!(ctx.out, ";")?, - ReturnKind::Iterator => { - writeln!(ctx.out, "));")?; + &ControlFlow::Return { pos, result } => { writeln!( ctx.out, - "{}if returns.len() >= MAX_ISLE_RETURNS {{ return; }}", - ctx.indent + "{}// Rule at {}.", + &ctx.indent, + pos.pretty_print_line(&self.typeenv.filenames) )?; + write!(ctx.out, "{}", &ctx.indent)?; + match ret_kind { + ReturnKind::Plain | ReturnKind::Option => { + write!(ctx.out, "return ")? + } + ReturnKind::Iterator => write!(ctx.out, "returns.extend(Some(")?, + } + self.emit_expr(ctx, result)?; + if ctx.is_ref.contains(&result) { + write!(ctx.out, ".clone()")?; + } + match ret_kind { + ReturnKind::Plain | ReturnKind::Option => writeln!(ctx.out, ";")?, + ReturnKind::Iterator => { + writeln!(ctx.out, "));")?; + writeln!( + ctx.out, + "{}if returns.len() >= MAX_ISLE_RETURNS {{ return; }}", + ctx.indent + )?; + } + } } } } + + Nested::Arms(source, arms) => { + let Some(arm) = arms.next() else { + ctx.end_block(last_line, scope)?; + continue; + }; + let source = *source; + // Iterator isn't done, put it back on the stack. + stack.push((nested, last_line, scope)); + + let scope = ctx.enter_scope(); + write!(ctx.out, "{}", &ctx.indent)?; + self.emit_constraint(ctx, source, arm)?; + write!(ctx.out, " =>")?; + ctx.begin_block()?; + stack.push((Self::validate_block(ret_kind, &arm.body), "", scope)); + } } } + Ok(()) }