diff --git a/Cargo.lock b/Cargo.lock index fe16eb9cc98..74c95956cc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -275,6 +275,15 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +[[package]] +name = "arbtest" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3be567977128c0f71ad1462d9624ccda712193d124e944252f0c5789a06d46" +dependencies = [ + "arbitrary", +] + [[package]] name = "ark-bls12-381" version = "0.5.0" @@ -3295,6 +3304,7 @@ dependencies = [ "acir", "acvm", "arbitrary", + "arbtest", "bn254_blackbox_solver", "build-data", "color-eyre", diff --git a/Cargo.toml b/Cargo.toml index 1077dcfd12b..857a7e8d443 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -153,6 +153,7 @@ rmp = "0.8.14" rmp-serde = "1.3.0" arbitrary = "1.4.1" +arbtest = "0.3.2" cfg-if = "1.0.0" dirs = "4" serde = { version = "1.0.136", features = ["derive"] } @@ -225,4 +226,4 @@ panic = "abort" opt-level = "z" [profile.bench] -lto = true \ No newline at end of file +lto = true diff --git a/cspell.json b/cspell.json index 484858ac3a9..cd5a5706749 100644 --- a/cspell.json +++ b/cspell.json @@ -11,6 +11,7 @@ "aeiou", "appender", "Arbitrum", + "arbtest", "arithmetization", "arity", "arkworks", diff --git a/tooling/ast_fuzzer/Cargo.toml b/tooling/ast_fuzzer/Cargo.toml index 91bb35bed7d..016a2db7302 100644 --- a/tooling/ast_fuzzer/Cargo.toml +++ b/tooling/ast_fuzzer/Cargo.toml @@ -31,4 +31,5 @@ noirc_frontend.workspace = true noir_fuzzer.workspace = true [dev-dependencies] +arbtest.workspace = true rand.workspace = true diff --git a/tooling/ast_fuzzer/src/lib.rs b/tooling/ast_fuzzer/src/lib.rs index 78886eb3557..2329846a5b7 100644 --- a/tooling/ast_fuzzer/src/lib.rs +++ b/tooling/ast_fuzzer/src/lib.rs @@ -6,6 +6,7 @@ mod program; pub use abi::program_abi; pub use input::arb_inputs; use program::freq::Freqs; +pub use program::visitor::{visit_expr, visit_expr_mut}; pub use program::{DisplayAstAsNoir, arb_program}; /// AST generation configuration. @@ -31,32 +32,42 @@ pub struct Config { pub max_loop_size: usize, /// Maximum call depth for recursive calls. pub max_call_depth: usize, - /// Frequency of expressions that produce a value. + /// Frequency of expressions, which produce a value. pub expr_freqs: Freqs, - /// Frequency of statements that don't produce a value. - pub stmt_freqs: Freqs, + /// Frequency of statements in ACIR functions. + pub stmt_freqs_acir: Freqs, + /// Frequency of statements in Brillig functions. + pub stmt_freqs_brillig: Freqs, } impl Default for Config { fn default() -> Self { let expr_freqs = Freqs::new(&[ - ("unary", 5), + ("unary", 10), ("binary", 20), ("if", 15), ("block", 30), ("vars", 25), ("literal", 5), + ("call", 15), + ]); + let stmt_freqs_acir = Freqs::new(&[ + ("drop", 3), + ("assign", 30), + ("if", 10), + ("for", 18), + ("let", 25), ("call", 5), ]); - let stmt_freqs = Freqs::new(&[ + let stmt_freqs_brillig = Freqs::new(&[ ("drop", 5), - ("break", 5), - ("continue", 5), + ("break", 20), + ("continue", 20), ("assign", 30), ("if", 10), - ("for", 10), - ("loop", 10), - ("while", 10), + ("for", 15), + ("loop", 15), + ("while", 15), ("let", 20), ("call", 5), ]); @@ -72,7 +83,8 @@ impl Default for Config { max_loop_size: 10, max_call_depth: 5, expr_freqs, - stmt_freqs, + stmt_freqs_acir, + stmt_freqs_brillig, } } } diff --git a/tooling/ast_fuzzer/src/program/func.rs b/tooling/ast_fuzzer/src/program/func.rs index 26ac9113738..3593b160269 100644 --- a/tooling/ast_fuzzer/src/program/func.rs +++ b/tooling/ast_fuzzer/src/program/func.rs @@ -278,8 +278,9 @@ impl<'a> FunctionContext<'a> { } // Function calls returning a value. - if freq.enabled_when("call", self.budget > 0) { - if let Some(expr) = self.gen_call(u, typ, max_depth)? { + if freq.enabled_when("call", allow_nested && self.budget > 0) { + // Decreasing the max depth in expression position because it can be very difficult to read. + if let Some(expr) = self.gen_call(u, typ, max_depth.saturating_sub(1))? { return Ok(expr); } } @@ -575,7 +576,11 @@ impl<'a> FunctionContext<'a> { /// Generate a statement, which is an expression that doesn't return anything, /// for example loops, variable declarations, etc. fn gen_stmt(&mut self, u: &mut Unstructured) -> arbitrary::Result { - let mut freq = Freq::new(u, &self.ctx.config.stmt_freqs)?; + let mut freq = if self.unconstrained() { + Freq::new(u, &self.ctx.config.stmt_freqs_brillig)? + } else { + Freq::new(u, &self.ctx.config.stmt_freqs_acir)? + }; // TODO(#7926): Match // TODO(#7931): print // TODO(#7932): Constrain @@ -587,23 +592,6 @@ impl<'a> FunctionContext<'a> { } } - // Get loop out of the way quick, as it's always disabled for ACIR. - if freq.enabled_when("loop", self.budget > 1 && self.unconstrained()) { - return self.gen_loop(u); - } - - if freq.enabled_when("while", self.budget > 1 && self.unconstrained()) { - return self.gen_while(u); - } - - if freq.enabled_when("break", self.in_loop && self.unconstrained()) { - return Ok(Expression::Break); - } - - if freq.enabled_when("continue", self.in_loop && self.unconstrained()) { - return Ok(Expression::Continue); - } - // Require a positive budget, so that we have some for the block itself and its contents. if freq.enabled_when("if", self.budget > 1) { return self.gen_if(u, &Type::Unit, self.max_depth(), Flags::TOP); @@ -619,6 +607,25 @@ impl<'a> FunctionContext<'a> { } } + if self.unconstrained() { + // Get loop out of the way quick, as it's always disabled for ACIR. + if freq.enabled_when("loop", self.budget > 1) { + return self.gen_loop(u); + } + + if freq.enabled_when("while", self.budget > 1) { + return self.gen_while(u); + } + + if freq.enabled_when("break", self.in_loop) { + return Ok(Expression::Break); + } + + if freq.enabled_when("continue", self.in_loop) { + return Ok(Expression::Continue); + } + } + if freq.enabled("assign") { if let Some(e) = self.gen_assign(u)? { return Ok(e); diff --git a/tooling/ast_fuzzer/src/program/mod.rs b/tooling/ast_fuzzer/src/program/mod.rs index e7128dd2716..8877db80ae5 100644 --- a/tooling/ast_fuzzer/src/program/mod.rs +++ b/tooling/ast_fuzzer/src/program/mod.rs @@ -22,7 +22,7 @@ mod func; mod rewrite; mod scope; mod types; -mod visitor; +pub(crate) mod visitor; /// Generate an arbitrary monomorphized AST. pub fn arb_program(u: &mut Unstructured, config: Config) -> arbitrary::Result { diff --git a/tooling/ast_fuzzer/src/program/visitor.rs b/tooling/ast_fuzzer/src/program/visitor.rs index 9c58755d7ef..5ee2d25782d 100644 --- a/tooling/ast_fuzzer/src/program/visitor.rs +++ b/tooling/ast_fuzzer/src/program/visitor.rs @@ -6,7 +6,7 @@ use noirc_frontend::monomorphization::ast::{Expression, LValue, Literal}; /// of the visited expression. /// /// Gets mutable references so it can manipulate the expressions if needed. -pub(crate) fn visit_expr_mut(expr: &mut Expression, visit: &mut V) +pub fn visit_expr_mut(expr: &mut Expression, visit: &mut V) where V: FnMut(&mut Expression) -> bool, { @@ -141,7 +141,7 @@ where /// /// This is a read-only version, for cases where we don't have/need /// a mutable reference to the AST. -pub(crate) fn visit_expr(expr: &Expression, visit: &mut V) +pub fn visit_expr(expr: &Expression, visit: &mut V) where V: FnMut(&Expression) -> bool, { diff --git a/tooling/ast_fuzzer/tests/calibration.rs b/tooling/ast_fuzzer/tests/calibration.rs new file mode 100644 index 00000000000..bcfbe5b2fed --- /dev/null +++ b/tooling/ast_fuzzer/tests/calibration.rs @@ -0,0 +1,111 @@ +//! Calibration test for the AST program generator, which generates a bunch of random programs, +//! visits all the expressions in the AST, and counts the appearance of the labels we put in +//! the `Freqs` in `ast_fuzzer/src/lib.rs`. Then, we assert that the relative frequency of +//! the different labels is within an acceptable range. +//! +//! We can use this to calibrate the frequency values with some statistical feedback. +//! +//! ```shell +//! cargo test -p noir_ast_fuzzer --test calibration -- --nocapture +//! ``` +use std::collections::BTreeMap; + +use arbtest::arbtest; +use noir_ast_fuzzer::{Config, arb_program, visit_expr}; +use noirc_frontend::monomorphization::ast::{Expression, Type}; + +#[test] +fn arb_program_freqs_in_expected_range() { + // Counting labels separately for ACIR and Brillig, and then whether it's an expression or a statement. + let mut counts: BTreeMap>> = Default::default(); + let mut program_count = 0; + + arbtest(|u| { + let program = arb_program(u, Config::default())?; + for func in program.functions { + visit_expr(&func.body, &mut |expr| { + let Some((group, key)) = classify(expr) else { + return true; + }; + let count = counts + .entry(func.unconstrained) + .or_default() + .entry(group) + .or_default() + .entry(key) + .or_default(); + *count += 1; + true + }); + } + program_count += 1; + Ok(()) + }) + .budget_ms(1000) + .size_min(1 << 12) + .size_max(1 << 20); + + println!("Generated {program_count} programs."); + for (unconstrained, counts) in &counts { + println!("{} frequencies:", if *unconstrained { "Brillig" } else { "ACIR" }); + for (group, counts) in counts { + let total = counts.values().sum::(); + println!("\t{group} (total {total}):"); + for (key, count) in counts { + println!( + "\t\t{key}:{} {count}\t({}/100)", + std::iter::repeat_n(" ", 15 - key.len()).collect::(), + count * 100 / total + ); + } + } + } + + let freq_100 = |unconstrained, group: &str, keys: &[&str]| { + keys.iter().map(|key| counts[&unconstrained][group][key]).sum::() * 100 + / counts[&unconstrained][group].values().sum::() + }; + + // Assert relative frequencies + let loops_a = freq_100(false, "stmt", &["for"]); + let loops_b = freq_100(true, "stmt", &["for", "loop", "while"]); + let break_b = freq_100(true, "stmt", &["break"]); + + assert!((9..=11).contains(&loops_a), "ACIR loops: {loops_a}"); + assert!((loops_a - 1..=loops_a + 1).contains(&loops_b), "Brillig loops: {loops_b}"); + assert!(break_b >= loops_b, "Brillig should break out of loops: {break_b}"); +} + +/// Classify the expression into "expr" or "stmt" for frequency settings. +fn classify(expr: &Expression) -> Option<(&'static str, &'static str)> { + let cat = match expr { + Expression::Ident(_) + | Expression::Cast(_) + | Expression::Tuple(_) + | Expression::ExtractTupleField(_, _) + | Expression::Index(_) + | Expression::Semi(_) + | Expression::Clone(_) => { + return None; + } + Expression::Literal(_) => ("expr", "literal"), + Expression::Block(xs) => { + (xs.last().and_then(classify).map(|(c, _)| c).unwrap_or("stmt"), "block") + } + Expression::Unary(_) => ("expr", "unary"), + Expression::Binary(_) => ("expr", "binary"), + Expression::For(_) => ("stmt", "for"), + Expression::Loop(_) => ("stmt", "loop"), + Expression::While(_) => ("stmt", "while"), + Expression::If(x) => (if x.typ == Type::Unit { "stmt" } else { "expr" }, "if"), + Expression::Match(_) => todo!("match"), + Expression::Call(x) => (if x.return_type == Type::Unit { "stmt" } else { "expr" }, "call"), + Expression::Let(_) => ("stmt", "let"), + Expression::Constrain(_, _, _) => ("stmt", "constrain"), + Expression::Assign(_) => ("stmt", "assign"), + Expression::Drop(_) => ("stmt", "drop"), + Expression::Break => ("stmt", "break"), + Expression::Continue => ("stmt", "continue"), + }; + Some(cat) +}