diff --git a/compiler/noirc_errors/src/reporter.rs b/compiler/noirc_errors/src/reporter.rs index d817b48691f..ea7c97b1f25 100644 --- a/compiler/noirc_errors/src/reporter.rs +++ b/compiler/noirc_errors/src/reporter.rs @@ -17,6 +17,7 @@ pub struct CustomDiagnostic { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum DiagnosticKind { Error, + Bug, Warning, } @@ -62,6 +63,19 @@ impl CustomDiagnostic { } } + pub fn simple_bug( + primary_message: String, + secondary_message: String, + secondary_span: Span, + ) -> CustomDiagnostic { + CustomDiagnostic { + message: primary_message, + secondaries: vec![CustomLabel::new(secondary_message, secondary_span)], + notes: Vec::new(), + kind: DiagnosticKind::Bug, + } + } + pub fn in_file(self, file_id: fm::FileId) -> FileDiagnostic { FileDiagnostic::new(file_id, self) } @@ -81,6 +95,10 @@ impl CustomDiagnostic { pub fn is_warning(&self) -> bool { matches!(self.kind, DiagnosticKind::Warning) } + + pub fn is_bug(&self) -> bool { + matches!(self.kind, DiagnosticKind::Bug) + } } impl std::fmt::Display for CustomDiagnostic { @@ -120,10 +138,13 @@ pub fn report_all<'files>( silence_warnings: bool, ) -> ReportedErrors { // Report warnings before any errors - let (warnings, mut errors): (Vec<_>, _) = - diagnostics.iter().partition(|item| item.diagnostic.is_warning()); + let (warnings_and_bugs, mut errors): (Vec<_>, _) = + diagnostics.iter().partition(|item| !item.diagnostic.is_error()); + let (warnings, mut bugs): (Vec<_>, _) = + warnings_and_bugs.iter().partition(|item| item.diagnostic.is_warning()); let mut diagnostics = if silence_warnings { Vec::new() } else { warnings }; + diagnostics.append(&mut bugs); diagnostics.append(&mut errors); let error_count = @@ -170,6 +191,7 @@ fn convert_diagnostic( ) -> Diagnostic { let diagnostic = match (cd.kind, deny_warnings) { (DiagnosticKind::Warning, false) => Diagnostic::warning(), + (DiagnosticKind::Bug, ..) => Diagnostic::bug(), _ => Diagnostic::error(), }; diff --git a/compiler/noirc_evaluator/src/errors.rs b/compiler/noirc_evaluator/src/errors.rs index 0879056ad36..2725abb1f63 100644 --- a/compiler/noirc_evaluator/src/errors.rs +++ b/compiler/noirc_evaluator/src/errors.rs @@ -56,6 +56,7 @@ pub enum RuntimeError { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum SsaReport { Warning(InternalWarning), + Bug(InternalBug), } impl From for FileDiagnostic { @@ -78,6 +79,19 @@ impl From for FileDiagnostic { Diagnostic::simple_warning(message, secondary_message, location.span); diagnostic.in_file(file_id).with_call_stack(call_stack) } + SsaReport::Bug(bug) => { + let message = bug.to_string(); + let (secondary_message, call_stack) = match bug { + InternalBug::IndependentSubgraph { call_stack } => { + ("There is no path from the output of this brillig call to either return values or inputs of the circuit, which creates an independent subgraph. This is quite likely a soundness vulnerability".to_string(),call_stack) + } + }; + let call_stack = vecmap(call_stack, |location| location); + let file_id = call_stack.last().map(|location| location.file).unwrap_or_default(); + let location = call_stack.last().expect("Expected RuntimeError to have a location"); + let diagnostic = Diagnostic::simple_bug(message, secondary_message, location.span); + diagnostic.in_file(file_id).with_call_stack(call_stack) + } } } } @@ -90,6 +104,12 @@ pub enum InternalWarning { VerifyProof { call_stack: CallStack }, } +#[derive(Debug, PartialEq, Eq, Clone, Error, Serialize, Deserialize)] +pub enum InternalBug { + #[error("Input to brillig function is in a separate subgraph to output")] + IndependentSubgraph { call_stack: CallStack }, +} + #[derive(Debug, PartialEq, Eq, Clone, Error)] pub enum InternalError { #[error("ICE: Both expressions should have degree<=1")] diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs index 0e0c3f1e631..e1182f17bcd 100644 --- a/compiler/noirc_evaluator/src/ssa.rs +++ b/compiler/noirc_evaluator/src/ssa.rs @@ -41,6 +41,8 @@ pub mod ir; mod opt; pub mod ssa_gen; +pub(crate) struct ArtifactsAndWarnings(Artifacts, Vec); + /// Optimize the given program by converting it into SSA /// form and performing optimizations there. When finished, /// convert the final SSA into an ACIR program and return it. @@ -49,10 +51,10 @@ pub mod ssa_gen; pub(crate) fn optimize_into_acir( program: Program, options: &SsaEvaluatorOptions, -) -> Result { +) -> Result { let ssa_gen_span = span!(Level::TRACE, "ssa_generation"); let ssa_gen_span_guard = ssa_gen_span.enter(); - let ssa = SsaBuilder::new( + let mut ssa = SsaBuilder::new( program, options.enable_ssa_logging, options.force_brillig_output, @@ -89,13 +91,15 @@ pub(crate) fn optimize_into_acir( .run_pass(Ssa::array_set_optimization, "After Array Set Optimizations:") .finish(); + let ssa_level_warnings = ssa.check_for_underconstrained_values(); let brillig = time("SSA to Brillig", options.print_codegen_timings, || { ssa.to_brillig(options.enable_brillig_logging) }); drop(ssa_gen_span_guard); - time("SSA to ACIR", options.print_codegen_timings, || ssa.into_acir(&brillig)) + let artifacts = time("SSA to ACIR", options.print_codegen_timings, || ssa.into_acir(&brillig))?; + Ok(ArtifactsAndWarnings(artifacts, ssa_level_warnings)) } // Helper to time SSA passes @@ -149,6 +153,10 @@ impl SsaProgramArtifact { } self.names.push(circuit_artifact.name); } + + fn add_warnings(&mut self, mut warnings: Vec) { + self.warnings.append(&mut warnings); + } } pub struct SsaEvaluatorOptions { @@ -179,7 +187,8 @@ pub fn create_program( let func_sigs = program.function_signatures.clone(); let recursive = program.recursive; - let (generated_acirs, generated_brillig, error_types) = optimize_into_acir(program, options)?; + let ArtifactsAndWarnings((generated_acirs, generated_brillig, error_types), ssa_level_warnings) = + optimize_into_acir(program, options)?; assert_eq!( generated_acirs.len(), func_sigs.len(), @@ -187,6 +196,9 @@ pub fn create_program( ); let mut program_artifact = SsaProgramArtifact::new(generated_brillig, error_types); + + // Add warnings collected at the Ssa stage + program_artifact.add_warnings(ssa_level_warnings); // For setting up the ABI we need separately specify main's input and return witnesses let mut is_main = true; for (acir, func_sig) in generated_acirs.into_iter().zip(func_sigs) { diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index aaa483b91c9..3b7d2c1025f 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -431,6 +431,8 @@ impl<'a> Context<'a> { } warnings.extend(return_warnings); + + // Add the warnings from the alter Ssa passes Ok(self.acir_context.finish(input_witness, return_witnesses, warnings)) } diff --git a/compiler/noirc_evaluator/src/ssa/opt/check_for_underconstrained_values.rs b/compiler/noirc_evaluator/src/ssa/opt/check_for_underconstrained_values.rs new file mode 100644 index 00000000000..6dac99d7a09 --- /dev/null +++ b/compiler/noirc_evaluator/src/ssa/opt/check_for_underconstrained_values.rs @@ -0,0 +1,411 @@ +//! This module defines an SSA pass that detects if the final function has any subgraphs independent from inputs and outputs. +//! If this is the case, then part of the final circuit can be completely replaced by any other passing circuit, since there are no constraints ensuring connections. +//! So the compiler informs the developer of this as a bug +use im::HashMap; + +use crate::errors::{InternalBug, SsaReport}; +use crate::ssa::ir::basic_block::BasicBlockId; +use crate::ssa::ir::function::RuntimeType; +use crate::ssa::ir::function::{Function, FunctionId}; +use crate::ssa::ir::instruction::{Instruction, InstructionId, Intrinsic}; +use crate::ssa::ir::value::{Value, ValueId}; +use crate::ssa::ssa_gen::Ssa; +use std::collections::{BTreeMap, HashSet}; + +impl Ssa { + /// Go through each top-level non-brillig function and detect if it has independent subgraphs + #[tracing::instrument(level = "trace", skip(self))] + pub(crate) fn check_for_underconstrained_values(&mut self) -> Vec { + let mut warnings: Vec = Vec::new(); + for function in self.functions.values() { + match function.runtime() { + RuntimeType::Acir { .. } => { + warnings.extend(check_for_underconstrained_values_within_function( + function, + &self.functions, + )); + } + RuntimeType::Brillig => (), + } + } + warnings + } +} + +/// Detect independent subgraphs (not connected to function inputs or outputs) and return a vector of bug reports if some are found +fn check_for_underconstrained_values_within_function( + function: &Function, + all_functions: &BTreeMap, +) -> Vec { + let mut context = Context::default(); + let mut warnings: Vec = Vec::new(); + + context.compute_sets_of_connected_value_ids(function, all_functions); + + let all_brillig_generated_values: HashSet = + context.brillig_return_to_argument.keys().copied().collect(); + + let connected_sets_indices = + context.find_sets_connected_to_function_inputs_or_outputs(function); + + // Go through each disconnected set, find brillig calls that caused it and form warnings + for set_index in + HashSet::from_iter(0..(context.value_sets.len())).difference(&connected_sets_indices) + { + let current_set = &context.value_sets[*set_index]; + warnings.append(&mut context.find_disconnecting_brillig_calls_with_results_in_set( + current_set, + &all_brillig_generated_values, + function, + )); + } + warnings +} +#[derive(Default)] +struct Context { + visited_blocks: HashSet, + block_queue: Vec, + value_sets: Vec>, + brillig_return_to_argument: HashMap>, + brillig_return_to_instruction_id: HashMap, +} + +impl Context { + /// Compute sets of variable ValueIds that are connected with constraints + /// + /// Additionally, store information about brillig calls in the context + fn compute_sets_of_connected_value_ids( + &mut self, + function: &Function, + all_functions: &BTreeMap, + ) { + // Go through each block in the function and create a list of sets of ValueIds connected by instructions + self.block_queue.push(function.entry_block()); + while let Some(block) = self.block_queue.pop() { + if self.visited_blocks.contains(&block) { + continue; + } + self.visited_blocks.insert(block); + self.connect_value_ids_in_block(function, block, all_functions); + } + + // Merge ValueIds into sets, where each original small set of ValueIds is merged with another set if they intersect + self.merge_sets(); + } + + /// Find sets that contain input or output value of the function + /// + /// Goes through each set of connected ValueIds and see if function arguments or return values are in the set + fn find_sets_connected_to_function_inputs_or_outputs( + &mut self, + function: &Function, + ) -> HashSet { + let variable_parameters_and_return_values = function + .parameters() + .iter() + .chain(function.returns()) + .filter(|id| function.dfg.get_numeric_constant(**id).is_none()) + .copied(); + + let mut connected_sets_indices: HashSet = HashSet::new(); + + // Go through each parameter and each set and check if the set contains the parameter + // If it's the case, then that set doesn't present an issue + for parameter_or_return_value in variable_parameters_and_return_values { + for (set_index, final_set) in self.value_sets.iter().enumerate() { + if final_set.contains(¶meter_or_return_value) { + connected_sets_indices.insert(set_index); + } + } + } + connected_sets_indices + } + + /// Find which brillig calls separate this set from others and return bug warnings about them + fn find_disconnecting_brillig_calls_with_results_in_set( + &self, + current_set: &HashSet, + all_brillig_generated_values: &HashSet, + function: &Function, + ) -> Vec { + let mut warnings = Vec::new(); + // Find brillig-generated values in the set + let intersection = all_brillig_generated_values.intersection(current_set).copied(); + + // Go through all brillig outputs in the set + for brillig_output_in_set in intersection { + // Get the inputs that correspond to the output + let inputs: HashSet = + self.brillig_return_to_argument[&brillig_output_in_set].iter().copied().collect(); + + // Check if any of them are not in the set + let unused_inputs = inputs.difference(current_set).next().is_some(); + + // There is a value not in the set, which means that the inputs/outputs of this call have not been properly constrained + if unused_inputs { + warnings.push(SsaReport::Bug(InternalBug::IndependentSubgraph { + call_stack: function.dfg.get_call_stack( + self.brillig_return_to_instruction_id[&brillig_output_in_set], + ), + })); + } + } + warnings + } + /// Go through each instruction in the block and add a set of ValueIds connected through that instruction + /// + /// Additionally, this function adds mappings of brillig return values to call arguments and instruction ids from calls to brillig functions in the block + fn connect_value_ids_in_block( + &mut self, + function: &Function, + block: BasicBlockId, + all_functions: &BTreeMap, + ) { + let instructions = function.dfg[block].instructions(); + + for instruction in instructions.iter() { + let mut instruction_arguments_and_results = HashSet::new(); + + // Insert non-constant instruction arguments + function.dfg[*instruction].for_each_value(|value_id| { + if function.dfg.get_numeric_constant(value_id).is_none() { + instruction_arguments_and_results.insert(value_id); + } + }); + // And non-constant results + for value_id in function.dfg.instruction_results(*instruction).iter() { + if function.dfg.get_numeric_constant(*value_id).is_none() { + instruction_arguments_and_results.insert(*value_id); + } + } + + // For most instructions we just connect inputs and outputs + match &function.dfg[*instruction] { + Instruction::ArrayGet { .. } + | Instruction::ArraySet { .. } + | Instruction::Binary(..) + | Instruction::Cast(..) + | Instruction::Constrain(..) + | Instruction::IfElse { .. } + | Instruction::Load { .. } + | Instruction::Not(..) + | Instruction::Store { .. } + | Instruction::Truncate { .. } => { + self.value_sets.push(instruction_arguments_and_results); + } + + Instruction::Call { func: func_id, arguments: argument_ids } => { + match &function.dfg[*func_id] { + Value::Intrinsic(intrinsic) => match intrinsic { + Intrinsic::ApplyRangeConstraint + | Intrinsic::AssertConstant + | Intrinsic::AsWitness + | Intrinsic::IsUnconstrained => {} + Intrinsic::ArrayLen + | Intrinsic::AsField + | Intrinsic::AsSlice + | Intrinsic::BlackBox(..) + | Intrinsic::DerivePedersenGenerators + | Intrinsic::FromField + | Intrinsic::SlicePushBack + | Intrinsic::SlicePushFront + | Intrinsic::SlicePopBack + | Intrinsic::SlicePopFront + | Intrinsic::SliceInsert + | Intrinsic::SliceRemove + | Intrinsic::StaticAssert + | Intrinsic::StrAsBytes + | Intrinsic::ToBits(..) + | Intrinsic::ToRadix(..) => { + self.value_sets.push(instruction_arguments_and_results); + } + }, + Value::Function(callee) => match all_functions[&callee].runtime() { + RuntimeType::Brillig => { + // For calls to brillig functions we memorize the mapping of results to argument ValueId's and InstructionId's + // The latter are needed to produce the callstack later + for result in + function.dfg.instruction_results(*instruction).iter().filter( + |value_id| { + function.dfg.get_numeric_constant(**value_id).is_none() + }, + ) + { + self.brillig_return_to_argument + .insert(*result, argument_ids.clone()); + self.brillig_return_to_instruction_id + .insert(*result, *instruction); + } + } + RuntimeType::Acir(..) => { + self.value_sets.push(instruction_arguments_and_results); + } + }, + Value::ForeignFunction(..) => { + panic!("Should not be able to reach foreign function from non-brillig functions"); + } + Value::Array { .. } + | Value::Instruction { .. } + | Value::NumericConstant { .. } + | Value::Param { .. } => { + panic!("At the point we are running disconnect there shouldn't be any other values as arguments") + } + } + } + Instruction::Allocate { .. } + | Instruction::DecrementRc { .. } + | Instruction::EnableSideEffects { .. } + | Instruction::IncrementRc { .. } + | Instruction::RangeCheck { .. } => {} + } + } + + self.block_queue.extend(function.dfg[block].successors()); + } + + /// Merge all small sets into larger ones based on whether the sets intersect or not + /// + /// If two small sets have a common ValueId, we merge them into one + fn merge_sets(&mut self) { + let mut new_set_id: usize = 0; + let mut updated_sets: HashMap> = HashMap::new(); + let mut value_dictionary: HashMap = HashMap::new(); + let mut parsed_value_set: HashSet = HashSet::new(); + + // Go through each set + for set in self.value_sets.iter() { + // Check if the set has any of the ValueIds we've encountered at previous iterations + let intersection: HashSet = + set.intersection(&parsed_value_set).copied().collect(); + parsed_value_set.extend(set.iter()); + + // If there is no intersection, add the new set to updated sets + if intersection.is_empty() { + updated_sets.insert(new_set_id, set.clone()); + + // Add each entry to a dictionary for quick lookups of which ValueId is in which updated set + for entry in set.iter() { + value_dictionary.insert(*entry, new_set_id); + } + new_set_id += 1; + continue; + } + + // If there is an intersection, we have to join the sets + let mut joining_sets_ids: HashSet = + intersection.iter().map(|x| value_dictionary[x]).collect(); + let mut largest_set_size = usize::MIN; + let mut largest_set_index = usize::MAX; + + // We need to find the largest set to move as few elements as possible + for set_id in joining_sets_ids.iter() { + if updated_sets[set_id].len() > largest_set_size { + (largest_set_index, largest_set_size) = (*set_id, updated_sets[set_id].len()); + } + } + joining_sets_ids.remove(&largest_set_index); + + let mut largest_set = + updated_sets.extract(&largest_set_index).expect("Set should be in the hashmap").0; + + // For each of other sets that need to be joined + for set_id in joining_sets_ids.iter() { + // Map each element to the largest set and insert it + for element in updated_sets[set_id].iter() { + value_dictionary[element] = largest_set_index; + largest_set.insert(*element); + } + // Remove the old set + updated_sets.remove(set_id); + } + + // Join the new set with the largest sets + for element in set.iter() { + value_dictionary.insert(*element, largest_set_index); + largest_set.insert(*element); + } + updated_sets.insert(largest_set_index, largest_set); + } + self.value_sets = updated_sets.values().cloned().collect(); + } +} +#[cfg(test)] +mod test { + use crate::ssa::{ + function_builder::FunctionBuilder, + ir::{instruction::BinaryOp, map::Id, types::Type}, + }; + + #[test] + /// Test that a connected function raises no warnings + fn test_simple_connected_function() { + // fn main { + // b0(v0: Field, v1: Field): + // v2 = add v0, 1 + // v3 = mul v1, 2 + // v4 = eq v2, v3 + // return v2 + // } + let main_id = Id::test_new(0); + let mut builder = FunctionBuilder::new("main".into(), main_id); + let v0 = builder.add_parameter(Type::field()); + let v1 = builder.add_parameter(Type::field()); + + let one = builder.field_constant(1u128); + let two = builder.field_constant(2u128); + + let v2 = builder.insert_binary(v0, BinaryOp::Add, one); + let v3 = builder.insert_binary(v1, BinaryOp::Mul, two); + let _v4 = builder.insert_binary(v2, BinaryOp::Eq, v3); + builder.terminate_with_return(vec![v2]); + + let mut ssa = builder.finish(); + let ssa_level_warnings = ssa.check_for_underconstrained_values(); + assert_eq!(ssa_level_warnings.len(), 0); + } + + #[test] + /// Test where the results of a call to a brillig function are not connected to main function inputs or outputs + /// This should be detected. + fn test_simple_function_with_disconnected_part() { + // unconstrained fn br(v0: Field, v1: Field){ + // v2 = add v0, v1 + // return v2 + // } + // + // fn main { + // b0(v0: Field, v1: Field): + // v2 = add v0, 1 + // v3 = mul v1, 2 + // v4 = call br(v2, v3) + // v5 = add v4, 2 + // return + // } + let main_id = Id::test_new(0); + let mut builder = FunctionBuilder::new("main".into(), main_id); + let v0 = builder.add_parameter(Type::field()); + let v1 = builder.add_parameter(Type::field()); + + let one = builder.field_constant(1u128); + let two = builder.field_constant(2u128); + + let v2 = builder.insert_binary(v0, BinaryOp::Add, one); + let v3 = builder.insert_binary(v1, BinaryOp::Mul, two); + + let br_function_id = Id::test_new(1); + let br_function = builder.import_function(br_function_id); + let v4 = builder.insert_call(br_function, vec![v2, v3], vec![Type::field()])[0]; + let v5 = builder.insert_binary(v4, BinaryOp::Add, two); + builder.insert_constrain(v5, one, None); + builder.terminate_with_return(vec![]); + + builder.new_brillig_function("br".into(), br_function_id); + let v0 = builder.add_parameter(Type::field()); + let v1 = builder.add_parameter(Type::field()); + let v2 = builder.insert_binary(v0, BinaryOp::Add, v1); + builder.terminate_with_return(vec![v2]); + let mut ssa = builder.finish(); + let ssa_level_warnings = ssa.check_for_underconstrained_values(); + assert_eq!(ssa_level_warnings.len(), 1); + } +} diff --git a/compiler/noirc_evaluator/src/ssa/opt/mod.rs b/compiler/noirc_evaluator/src/ssa/opt/mod.rs index 4e5fa262696..56484ced290 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/mod.rs @@ -7,6 +7,7 @@ mod array_set; mod as_slice_length; mod assert_constant; mod bubble_up_constrains; +mod check_for_underconstrained_values; mod constant_folding; mod defunctionalize; mod die; diff --git a/tooling/lsp/src/notifications/mod.rs b/tooling/lsp/src/notifications/mod.rs index da3b95ce8e0..ed0ac13fae5 100644 --- a/tooling/lsp/src/notifications/mod.rs +++ b/tooling/lsp/src/notifications/mod.rs @@ -190,6 +190,7 @@ fn process_noir_document( let severity = match diagnostic.kind { DiagnosticKind::Error => DiagnosticSeverity::ERROR, DiagnosticKind::Warning => DiagnosticSeverity::WARNING, + DiagnosticKind::Bug => DiagnosticSeverity::WARNING, }; Some(Diagnostic { range,