diff --git a/tooling/ast_fuzzer/src/compare/compiled.rs b/tooling/ast_fuzzer/src/compare/compiled.rs index 79a99e68720..e3973ce260c 100644 --- a/tooling/ast_fuzzer/src/compare/compiled.rs +++ b/tooling/ast_fuzzer/src/compare/compiled.rs @@ -13,7 +13,7 @@ use noirc_frontend::monomorphization::ast::Program; use crate::{Config, arb_inputs, arb_program, program_abi}; -use super::{Comparable, CompareOptions, CompareResult, ExecOutput, HasPrograms}; +use super::{Comparable, CompareOptions, CompareResult, FailedOutput, HasPrograms, PassedOutput}; pub struct CompareArtifact { pub options: CompareOptions, @@ -39,6 +39,7 @@ type SsaErrorTypes = BTreeMap; /// The execution result is the value returned from the circuit and any output from `println`. type ExecResult = (Result, NargoError>, String); +#[derive(Debug)] pub struct NargoErrorWithTypes(NargoError, SsaErrorTypes); impl NargoErrorWithTypes { @@ -91,23 +92,25 @@ impl CompareCompiledResult { Ok(r) }; + let failed = |e, ets: &SsaErrorTypes, p: String| FailedOutput { + error: NargoErrorWithTypes(e, ets.clone()), + print_output: p, + }; + + let passed = |ws, p| decode(ws).map(|r| PassedOutput { return_value: r, print_output: p }); + match (res1, res2) { - (Err(e1), Err(e2)) => Ok(CompareResult::BothFailed( - NargoErrorWithTypes(e1, ets1.clone()), - NargoErrorWithTypes(e2, ets2.clone()), - )), - (Err(e1), Ok(ws2)) => Ok(CompareResult::LeftFailed( - NargoErrorWithTypes(e1, ets1.clone()), - ExecOutput { return_value: decode(ws2)?, print_output: print2 }, - )), - (Ok(ws1), Err(e2)) => Ok(CompareResult::RightFailed( - ExecOutput { return_value: decode(ws1)?, print_output: print1 }, - NargoErrorWithTypes(e2, ets2.clone()), - )), + (Err(e1), Err(e2)) => { + Ok(CompareResult::BothFailed(failed(e1, ets1, print1), failed(e2, ets2, print2))) + } + (Err(e1), Ok(ws2)) => { + Ok(CompareResult::LeftFailed(failed(e1, ets1, print1), passed(ws2, print2)?)) + } + (Ok(ws1), Err(e2)) => { + Ok(CompareResult::RightFailed(passed(ws1, print1)?, failed(e2, ets2, print2))) + } (Ok(ws1), Ok(ws2)) => { - let o1 = ExecOutput { return_value: decode(ws1)?, print_output: print1 }; - let o2 = ExecOutput { return_value: decode(ws2)?, print_output: print2 }; - Ok(CompareResult::BothPassed(o1, o2)) + Ok(CompareResult::BothPassed(passed(ws1, print1)?, passed(ws2, print2)?)) } } } @@ -125,10 +128,19 @@ impl Comparable for NargoErrorWithTypes { return false; }; + // We have a notion of treating errors as equivalents as long as the side effects + // of the failed program are otherwise the same. For this reason we compare the + // prints in `return_value_or_err`. Here we have the option to tweak which errors + // we consider equivalents, but that's really just to stay on the conservative + // side and give us a chance to inspect new kinds of test failures. + let msg1 = e1.user_defined_failure_message(); let msg2 = e2.user_defined_failure_message(); - let is_same_msg = msg1.is_some() && msg2.is_some() && msg1 == msg2; - + let equiv_msgs = if let (Some(msg1), Some(msg2)) = (msg1, msg2) { + msg1 == msg2 || msg1.contains("overflow") && msg2.contains("overflow") + } else { + false + }; match (ee1, ee2) { ( AssertionFailed(ResolvedAssertionPayload::String(c), _, _), @@ -137,7 +149,7 @@ impl Comparable for NargoErrorWithTypes { // Looks like the workaround we have for comptime failures originating from overflows and similar assertion failures. true } - (AssertionFailed(p1, _, _), AssertionFailed(p2, _, _)) => p1 == p2 || is_same_msg, + (AssertionFailed(p1, _, _), AssertionFailed(p2, _, _)) => p1 == p2 || equiv_msgs, (SolvingError(s1, _), SolvingError(s2, _)) => format!("{s1}") == format!("{s2}"), (SolvingError(s, _), AssertionFailed(p, _, _)) | (AssertionFailed(p, _, _), SolvingError(s, _)) => match (s, p) { @@ -145,7 +157,7 @@ impl Comparable for NargoErrorWithTypes { OpcodeResolutionError::UnsatisfiedConstrain { .. }, ResolvedAssertionPayload::String(s), ) => s == "Attempted to divide by zero", - _ => is_same_msg, + _ => equiv_msgs, }, } } @@ -163,12 +175,6 @@ impl std::fmt::Display for NargoErrorWithTypes { } } -impl std::fmt::Debug for NargoErrorWithTypes { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Debug::fmt(&self.0, f) - } -} - /// Compare the execution of equivalent programs, compiled in different ways. pub struct CompareCompiled

{ pub program: P, diff --git a/tooling/ast_fuzzer/src/compare/comptime.rs b/tooling/ast_fuzzer/src/compare/comptime.rs index c318575ca2a..8f05c69ca3e 100644 --- a/tooling/ast_fuzzer/src/compare/comptime.rs +++ b/tooling/ast_fuzzer/src/compare/comptime.rs @@ -47,7 +47,7 @@ fn prepare_and_compile_snippet( source: String, force_brillig: bool, output: W, -) -> CompilationResult<(CompiledProgram, W)> { +) -> (CompilationResult, W) { let output = Rc::new(RefCell::new(output)); let (mut context, root_crate_id) = prepare_snippet(source); context.set_comptime_printing(output.clone()); @@ -58,10 +58,10 @@ fn prepare_and_compile_snippet( skip_brillig_constraints_check: true, ..Default::default() }; - let (program, warnings) = compile_main(&mut context, root_crate_id, &options, None)?; + let res = compile_main(&mut context, root_crate_id, &options, None); drop(context); let output = Rc::into_inner(output).expect("context is gone").into_inner(); - Ok(((program, output), warnings)) + (res, output) } /// Compare the execution of a Noir program in pure comptime (via interpreter) @@ -82,13 +82,15 @@ impl CompareComptime { // These comptime programs have no inputs. let initial_witness = self.abi.encode(&BTreeMap::new(), None).wrap_err("abi::encode")?; + let decode_print = |print| String::from_utf8(print).expect("should be valid utf8 string"); + // Execute a compiled Program. let do_exec = |program| { - let mut print = Vec::new(); + let mut output = Vec::new(); let mut foreign_call_executor = DefaultForeignCallBuilder::default() .with_mocks(false) - .with_output(&mut print) + .with_output(&mut output) .build(); let res = nargo::ops::execute_program( @@ -97,8 +99,8 @@ impl CompareComptime { &blackbox_solver, &mut foreign_call_executor, ); + let print = decode_print(output); - let print = String::from_utf8(print).expect("should be valid utf8 string"); (res, print) }; @@ -112,8 +114,8 @@ impl CompareComptime { self.force_brillig, Vec::new(), ) { - Ok(((program, output), _)) => (program, output), - Err(e) => { + (Ok((program, _)), output) => (program, output), + (Err(e), output) => { // If the comptime code failed to compile, it could be because it executed the code // and encountered an overflow, which would be a runtime error in Brillig. let is_assertion = e.iter().any(|e| { @@ -129,11 +131,12 @@ impl CompareComptime { None, ); let res1 = Err(NargoError::ExecutionError(err)); + let print1 = decode_print(output); return CompareCompiledResult::new( &self.abi, &Default::default(), // We failed to compile the program, so no error types. &self.ssa.artifact.error_types, - (res1, "".to_string()), + (res1, print1), (res2, print2), ); } else { diff --git a/tooling/ast_fuzzer/src/compare/interpreted.rs b/tooling/ast_fuzzer/src/compare/interpreted.rs index 1e098430f78..93afc202711 100644 --- a/tooling/ast_fuzzer/src/compare/interpreted.rs +++ b/tooling/ast_fuzzer/src/compare/interpreted.rs @@ -13,7 +13,7 @@ use regex::Regex; use crate::{Config, arb_program, input::arb_inputs_from_ssa, program_abi}; -use super::{Comparable, CompareOptions, CompareResult, ExecOutput}; +use super::{Comparable, CompareOptions, CompareResult, FailedOutput, PassedOutput}; /// The state of the SSA after a particular pass in the pipeline. pub struct ComparePass { @@ -74,7 +74,7 @@ impl CompareInterpreted { } pub fn exec(&self) -> eyre::Result { - // Debug prints up fron tin case the interpreter panics. Turn them on with `RUST_LOG=debug cargo test ...` + // Debug prints up frontin case the interpreter panics. Turn them on with `RUST_LOG=debug cargo test ...` log::debug!("Program: \n{}\n", crate::DisplayAstAsNoir(&self.program)); log::debug!( "ABI inputs: \n{}\n", @@ -102,28 +102,43 @@ impl CompareInterpreted { impl CompareInterpretedResult { pub fn new(res1: InterpretResult, res2: InterpretResult) -> Self { - let out = |ret| ExecOutput { return_value: Some(ret), print_output: Default::default() }; + // Currently the SSA interpreter `call_print` doesn't do anything, so we cannot capture the print output. + let failed = |e| FailedOutput { error: e, print_output: Default::default() }; + let passed = + |ret| PassedOutput { return_value: Some(ret), print_output: Default::default() }; + match (res1, res2) { - (Ok(r1), Ok(e2)) => Self::BothPassed(out(r1), out(e2)), - (Ok(r1), Err(e2)) => Self::RightFailed(out(r1), e2), - (Err(e1), Ok(r2)) => Self::LeftFailed(e1, out(r2)), - (Err(e1), Err(e2)) => Self::BothFailed(e1, e2), + (Ok(r1), Ok(r2)) => Self::BothPassed(passed(r1), passed(r2)), + (Ok(r1), Err(e2)) => Self::RightFailed(passed(r1), failed(e2)), + (Err(e1), Ok(r2)) => Self::LeftFailed(failed(e1), passed(r2)), + (Err(e1), Err(e2)) => Self::BothFailed(failed(e1), failed(e2)), } } } impl Comparable for ssa::interpreter::errors::InterpreterError { fn equivalent(e1: &Self, e2: &Self) -> bool { + use ssa::interpreter::errors::InternalError; use ssa::interpreter::errors::InterpreterError::*; match (e1, e2) { + ( + Internal(InternalError::ConstantDoesNotFitInType { constant: c1, typ: t1 }), + Internal(InternalError::ConstantDoesNotFitInType { constant: c2, typ: t2 }), + ) => { + // The interpreter represents values in types where the result of some casts cannot be represented, while the ACIR and + // Brillig runtime can fit them into Fields, and defer validation later. We could promote this error to a non-internal one, + // but the fact remains that the interpreter would fail earlier than ACIR or Brillig. + // To deal with this we ignore these errors as long as both passes fail the same way. + c1 == c2 && t1 == t2 + } (Internal(_), _) | (_, Internal(_)) => { // We should not get, or ignore, internal errors. // They mean the interpreter got something unexpected that we need to fix. false } (Overflow { instruction: i1 }, Overflow { instruction: i2 }) => { - // Overflows can occur or uncomparable instructions, but in a parentheses it contains the values that caused it. + // Overflows can occur or instructions with different IDs, but in a parentheses it contains the values that caused it. fn details(s: &str) -> Option<&str> { let start = s.find("(")?; let end = s.find(")")?; diff --git a/tooling/ast_fuzzer/src/compare/mod.rs b/tooling/ast_fuzzer/src/compare/mod.rs index 6573e3c491b..5d0da0b063e 100644 --- a/tooling/ast_fuzzer/src/compare/mod.rs +++ b/tooling/ast_fuzzer/src/compare/mod.rs @@ -21,12 +21,6 @@ pub use interpreted::{ input_values_to_ssa, }; -#[derive(Clone, Debug, PartialEq)] -pub struct ExecOutput { - pub return_value: Option, - pub print_output: String, -} - /// Help iterate over the program(s) in the comparable artifact. pub trait HasPrograms { fn programs(&self) -> Vec<&Program>; @@ -83,15 +77,32 @@ impl Comparable for Shared { } } +/// Optional return value with the tracked side effects. +#[derive(Clone, Debug, PartialEq)] +pub struct PassedOutput { + pub return_value: Option, + pub print_output: String, +} + +/// Error returned from the circuit, with tracked side effects. +/// +/// We want to inspect side effects even on failures, so we can treat different failures +/// as equivalent as long as the other side effects are equivalent. +#[derive(Clone, Debug, PartialEq)] +pub struct FailedOutput { + pub error: E, + pub print_output: String, +} + /// Possible outcomes of the differential execution of two equivalent programs. /// /// Use [CompareResult::return_value_or_err] to do the final comparison between /// the execution result. pub enum CompareResult { - BothFailed(E, E), - LeftFailed(E, ExecOutput), - RightFailed(ExecOutput, E), - BothPassed(ExecOutput, ExecOutput), + BothFailed(FailedOutput, FailedOutput), + LeftFailed(FailedOutput, PassedOutput), + RightFailed(PassedOutput, FailedOutput), + BothPassed(PassedOutput, PassedOutput), } impl CompareResult @@ -105,17 +116,30 @@ where pub fn return_value_or_err(&self) -> eyre::Result> { match self { CompareResult::BothFailed(e1, e2) => { - if Comparable::equivalent(e1, e2) { - // Both programs failed the same way. - Ok(None) + if !Comparable::equivalent(&e1.error, &e2.error) { + let e1 = &e1.error; + let e2 = &e2.error; + bail!( + "both programs failed in non-equivalent ways:\n{e1}\n!~\n{e2}\n\n{e1:?}\n{e2:?}" + ); } else { - bail!("both programs failed:\n{e1}\n!=\n{e2}\n\n{e1:?}\n{e2:?}") + let p1 = &e1.print_output; + let p2 = &e2.print_output; + if p1 != p2 { + bail!( + "both programs failed, but disagree on printed output:\n---\n{p1}\n--- != ---\n{p2}\n---", + ); + } else { + Ok(None) + } } } CompareResult::LeftFailed(e, _) => { + let e = &e.error; bail!("first program failed: {e}\n{e:?}") } CompareResult::RightFailed(_, e) => { + let e = &e.error; bail!("second program failed: {e}\n{e:?}") } CompareResult::BothPassed(o1, o2) => match (&o1.return_value, &o2.return_value) { @@ -129,11 +153,11 @@ where bail!("only the second program returned a value: {r2:?}",) } (r1, _) => { - if o1.print_output != o2.print_output { + let p1 = &o1.print_output; + let p2 = &o2.print_output; + if p1 != p2 { bail!( - "programs disagree on printed output:\n---\n{}\n--- != ---\n{}\n---", - o1.print_output, - o2.print_output + "both programs passed, but disagree on printed output:\n---\n{p1}\n--- != ---\n{p2}\n---", ) } Ok(r1.as_ref())