diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs index a51ae38c950..c9c6ceb0552 100644 --- a/acvm-repo/acvm/src/compiler/mod.rs +++ b/acvm-repo/acvm/src/compiler/mod.rs @@ -1,3 +1,19 @@ +//! The `compiler` module contains several pass to transform an ACIR program. +//! Roughly, the passes are spearated into the `optimizers` which try to reduce the number of opcodes +//! and the `transformers` which adapt the opcodes to the proving backend. +//! +//! # Optimizers +//! - GeneralOptimizer: simple pass which simplify AssertZero opcodes when possible (e.g remove terms with null coeficient) +//! - UnusedMemoryOptimizer: simple pass which removes MemoryInit opcodes when they are not used (e.g no corresponding MemoryOp opcode) +//! - RangeOptimizer: forward pass to collect range check information, and backward pass to remove the ones that are redundant. +//! +//! # Transformers +//! - CSAT: create intermediate variables so that AssertZero opcodes have the correct Circuit's `ExpressionWidth`. +//! +//! ACIR generation is performed by calling the `Ssa::into_acir` method, providing any necessary brillig bytecode. +//! The compiled program will be returned as an `Artifacts` type. +//! + use std::collections::{BTreeMap, HashMap}; use acir::{ @@ -19,8 +35,12 @@ pub use simulator::CircuitSimulator; use transformers::transform_internal; pub use transformers::{MIN_EXPRESSION_WIDTH, transform}; -/// This module moves and decomposes acir opcodes. The transformation map allows consumers of this module to map +/// This module can move and decompose acir opcodes into multiple opcodes. The transformation map allows consumers of this module to map /// metadata they had about the opcodes to the new opcode structure generated after the transformation. +/// ACIR opcodes are stored inside a vector of opcodes. A transformation pass will generate a new vector of opcodes, +/// but each opcode is the result of the transformation of an opcode in the original vector. +/// So we simply keep track of the relation: index of the original opcode -> index of the new opcode in the new vector +/// However we need a vector of new indexes for the map values in the case the old opcode is decomposed into multiple opcodes. #[derive(Debug)] pub struct AcirTransformationMap { /// Maps the old acir indices to the new acir indices @@ -31,6 +51,15 @@ impl AcirTransformationMap { /// Builds a map from a vector of pointers to the old acir opcodes. /// The index in the vector is the new opcode index. /// The value of the vector is the old opcode index pointed. + /// E.g: If acir_opcode_positions = 0,1,2,4,5,5,6 + /// that means that old index 0,1,2,4,5,5,6 are mapped to the new indexes: 0,1,2,3,4,5,6 + /// This gives the following map: + /// 0 -> 0 + /// 1 -> 1 + /// 2 -> 2 + /// 4 -> 3 + /// 5 -> [4, 5] + /// 6 -> 6 fn new(acir_opcode_positions: &[usize]) -> Self { let mut old_indices_to_new_indices = HashMap::with_capacity(acir_opcode_positions.len()); for (new_index, old_index) in acir_opcode_positions.iter().copied().enumerate() { @@ -40,6 +69,11 @@ impl AcirTransformationMap { } /// Returns the new opcode location(s) corresponding to the old opcode. + /// An OpcodeLocation contains the index of the opcode in the vector of opcodes + /// This function returns the new OpcodeLocation by 'updating' the index within the given OpcodeLocation + /// using the AcirTransformationMap. In fact it does not update the given OpcodeLocation 'in-memory' but rather + /// returns a new one, and even a vector of OpcodeLocation in case there are multiple new indexes corresponding + /// to the old opcode index. pub fn new_locations( &self, old_location: OpcodeLocation, @@ -61,6 +95,8 @@ impl AcirTransformationMap { ) } + /// This function is similar to `new_locations()`, but only deal with + /// the AcirOpcodeLocation variant pub fn new_acir_locations( &self, old_location: AcirOpcodeLocation, @@ -91,7 +127,15 @@ fn transform_assert_messages( /// Applies backend specific optimizations to a [`Circuit`]. /// -/// Runs multiple passes until the output stabilizes. +/// optimize_internal: +/// - General optimizer: canonalize AssertZero opcodes. +/// - Unused Memory: remove unused MemoryInit opcodes. +/// - Redundant Ranges: remove RANGE opcodes that are redundant. +/// +/// transform_internal: run multiple times (up to 3) until the output stabilizes. +/// - CSAT: limit AssertZero opcodes to the Circuit's width. +/// - Eliminate intermediate variables: Combine AssertZero opcodes used only once. +/// - Redundant Ranges: some RANGEs may be redundant as a side effect of the previous pass. pub fn compile( acir: Circuit, expression_width: ExpressionWidth, diff --git a/acvm-repo/acvm/src/compiler/optimizers/general.rs b/acvm-repo/acvm/src/compiler/optimizers/general.rs index 788981b33af..aaf0b8761c6 100644 --- a/acvm-repo/acvm/src/compiler/optimizers/general.rs +++ b/acvm-repo/acvm/src/compiler/optimizers/general.rs @@ -7,6 +7,8 @@ use indexmap::IndexMap; /// The `GeneralOptimizer` processes all [`Expression`]s to: /// - remove any zero-coefficient terms. /// - merge any quadratic terms containing the same two witnesses. +/// +/// This pass does not depend on any other pass and should be the first one in a set of optimizing passes. pub(crate) struct GeneralOptimizer; impl GeneralOptimizer { @@ -17,8 +19,12 @@ impl GeneralOptimizer { } } -// Simplifies all mul terms with the same bi-variate variables while also removing -// terms that end up with a zero coefficient. +/// Simplifies all mul terms of the form `(scale, w_l, w_r)` with the same bi-variate variables +/// while also removing terms that end up with a zero coefficient. +/// +/// For instance, mul terms `(0,1,1), (2,2,1) (-1,2,1) (-1,1,2)` will return an +/// empty vector, because: (2,1) and (1,2) are the same bi-variate variable +/// and the resulting scale is `2-1-1 = 0` fn simplify_mul_terms(mut gate: Expression) -> Expression { let mut hash_map: IndexMap<(Witness, Witness), F> = IndexMap::new(); diff --git a/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs b/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs index 7d13dfbe427..e7e278e8d92 100644 --- a/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs +++ b/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs @@ -27,26 +27,29 @@ impl MergeExpressionsOptimizer { } } /// This pass analyzes the circuit and identifies intermediate variables that are - /// only used in two arithmetic opcodes. It then merges the opcode which produces the + /// only used in two AssertZero opcodes. It then merges the opcode which produces the /// intermediate variable into the second one that uses it - /// Note: This pass is only relevant for backends that can handle unlimited width /// /// The first pass maps witnesses to the index of the opcodes using them. /// Public inputs are not considered because they cannot be simplified. /// Witnesses used by MemoryInit opcodes are put in a separate map and marked as used by a Brillig call /// if the memory block is an input to the call. /// - /// The second pass looks for arithmetic opcodes having a witness which is only used by another arithmetic opcode. + /// The second pass looks for AssertZero opcodes having a witness which is only used by another arithmetic opcode. /// In that case, the opcode with the smallest index is merged into the other one via Gaussian elimination. /// For instance, if we have 'w1' used only by these two opcodes, - /// where `_{value}` refers to a witness and `{value}` refers to a constant: - /// [(1, w2,w3), (2, w2), (2, w1), (1, w3)] - /// [(2, w3, w4), (2,w1), (1, w4)] + /// where `(5,w2,w3)` refers the expression `5*w2*w3` and `(2, w1)` refers to the expression `2*w1`: + /// [(1, w2,w3), (2, w2), (2, w1), (1, w3)] // This opcode 'defines' the variable w1 + /// [(2, w3, w4), (2,w1), (1, w4)] // which is only used here /// We will remove the first one and modify the second one like this: /// [(2, w3, w4), (1, w4), (-1, w2), (-1/2, w3), (-1/2, w2, w3)] /// /// This transformation is relevant for Plonk-ish backends although they have a limited width because /// they can potentially handle expressions with large linear combinations using 'big-add' gates. + /// + /// Pre-condition: + /// - This pass is only relevant for backends that can handle unlimited width + /// - CSAT pass should have been run prior to this one. pub(crate) fn eliminate_intermediate_variable( &mut self, circuit: &Circuit, diff --git a/acvm-repo/acvm/src/compiler/optimizers/mod.rs b/acvm-repo/acvm/src/compiler/optimizers/mod.rs index fbd79dd2354..0aea1072694 100644 --- a/acvm-repo/acvm/src/compiler/optimizers/mod.rs +++ b/acvm-repo/acvm/src/compiler/optimizers/mod.rs @@ -26,8 +26,14 @@ pub fn optimize( ) -> (Circuit, AcirTransformationMap) { // Track original acir opcode positions throughout the transformation passes of the compilation // by applying the modifications done to the circuit opcodes and also to the opcode_positions (delete and insert) + // For instance, here before any transformation, the old acir opcode positions have not changed. + // So acir_opcode_positions = 0, 1,...,n-1, representing the index of the opcode in Circuit.opcodes vector. let acir_opcode_positions = (0..acir.opcodes.len()).collect(); + // `optimize_internal()` may change the circuit, and it returns a new one, as well the new_opcode_positions + // In the new circuit, the opcode at index `i` corresponds to the opcode at index `new_opcode_positions[i]` in the original circuit. + // For instance let's say it removed the opcode at index 3, and replaced the one at index 5 by two new opcodes + // The new_opcode_positions is now: 0,1,2,4,5,5,6,....n-1 let (mut acir, new_opcode_positions) = optimize_internal(acir, acir_opcode_positions, brillig_side_effects); @@ -41,6 +47,10 @@ pub fn optimize( /// Applies backend independent optimizations to a [`Circuit`]. /// /// Accepts an injected `acir_opcode_positions` to allow optimizations to be applied in a loop. +/// It run the following passes: +/// - General optimizer +/// - Unused Memory optimization +/// - Redundant Ranges optimization #[tracing::instrument(level = "trace", name = "optimize_acir" skip(acir, acir_opcode_positions))] pub(super) fn optimize_internal( acir: Circuit, diff --git a/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs b/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs index 6579b45dd29..01fe6dffc79 100644 --- a/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs +++ b/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs @@ -71,12 +71,13 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; /// Information gathered about witnesses which are subject to range constraints. struct RangeInfo { - /// Opcode positions at which stricter bit size information becomes available. + /// Opcode positions which updated this RangeInfo, i.e + /// at which stricter bit size information becomes available. switch_points: BTreeSet, /// Strictest constraint on bit size so far. num_bits: u32, - /// Indicate whether the bit size comes from an assertion, in which case we - /// can save an equivalent range constraint. + /// Indicate whether the bit size comes from an assertion or from array indexing, + /// in which cases we can save an equivalent range constraint. is_implied: bool, } @@ -181,6 +182,13 @@ impl<'a, F: AcirField> RangeOptimizer<'a, F> { /// Returns a `Circuit` where each Witness is only range constrained /// a minimal number of times that still allows us to avoid executing /// any new side effects due to their removal. + /// + /// The idea is to keep only the RANGE opcodes that have stricly smaller bit-size requirements + /// than before, i.e the ones that are at a 'switch point'. + /// Furthermore, we only keep the switch points that are last before + /// a 'side-effect' opcode (i.e a Brillig call). + /// As a result, we simply do a backward pass on the opcodes, so that the last Brillig call + /// is known before reaching a RANGE opcode. pub(crate) fn replace_redundant_ranges( self, order_list: Vec, diff --git a/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs b/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs index 02f9699328d..c332c64dc7d 100644 --- a/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs +++ b/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs @@ -73,3 +73,79 @@ impl UnusedMemoryOptimizer { (Circuit { opcodes: optimized_opcodes, ..self.circuit }, new_order_list) } } + +#[cfg(test)] +mod tests { + use super::*; + use acir::{ + FieldElement, + circuit::opcodes::BlockType, + native_types::{Expression, Witness}, + }; + + #[test] + fn unused_memory_is_removed() { + let src = " + current witness: w3 + private parameters: [w0, w1] + public parameters: [] + return values: [w2] + "; + let mut circuit = Circuit::from_str(src).unwrap(); + // MEMORYINIT [w0, w1] + // EXPR [ (1, w0) (-1, w1) (-1, w2) 0 ] + circuit.opcodes = vec![ + Opcode::MemoryInit { + block_id: BlockId(0), + init: vec![Witness(0), Witness(1)], + block_type: BlockType::Memory, + }, + Opcode::AssertZero(Expression { + mul_terms: Vec::new(), + linear_combinations: vec![ + (FieldElement::from(1_u128), Witness(0)), + (FieldElement::from(-1_i128), Witness(1)), + (FieldElement::from(-1_i128), Witness(2)), + ], + q_c: FieldElement::from(0u128), + }), + ]; + let unused_memory = UnusedMemoryOptimizer::new(circuit); + assert_eq!(unused_memory.unused_memory_initializations.len(), 1); + let (circuit, _) = unused_memory.remove_unused_memory_initializations(vec![0, 1]); + assert_eq!(circuit.opcodes.len(), 1); + } + + #[test] + fn databus_is_not_removed() { + let src = " + current witness: w3 + private parameters: [w0, w1] + public parameters: [] + return values: [w2] + "; + let mut circuit = Circuit::from_str(src).unwrap(); + // MEMORYINIT [w0, w1] + // EXPR [ (1, w0) (-1, w1) (-1, w2) 0 ] + circuit.opcodes = vec![ + Opcode::MemoryInit { + block_id: BlockId(0), + init: vec![Witness(0), Witness(1)], + block_type: BlockType::ReturnData, + }, + Opcode::AssertZero(Expression { + mul_terms: Vec::new(), + linear_combinations: vec![ + (FieldElement::from(1_u128), Witness(0)), + (FieldElement::from(-1_i128), Witness(1)), + (FieldElement::from(-1_i128), Witness(2)), + ], + q_c: FieldElement::from(0u128), + }), + ]; + let unused_memory = UnusedMemoryOptimizer::new(circuit); + assert_eq!(unused_memory.unused_memory_initializations.len(), 1); + let (circuit, _) = unused_memory.remove_unused_memory_initializations(vec![0, 1]); + assert_eq!(circuit.opcodes.len(), 2); + } +} diff --git a/acvm-repo/acvm/src/compiler/simulator.rs b/acvm-repo/acvm/src/compiler/simulator.rs index d916dd19eda..380a78e77ad 100644 --- a/acvm-repo/acvm/src/compiler/simulator.rs +++ b/acvm-repo/acvm/src/compiler/simulator.rs @@ -10,6 +10,12 @@ use acir::{ use std::collections::{BTreeSet, HashSet}; /// Simulate a symbolic solve for a circuit +/// Instead of evaluating witness values from the inputs, like the PWG module is doing, +/// this pass simply mark the witness that can be evaluated, from the known inputs, +/// and incrementally from the previously marked witnesses. +/// This avoid any computation on a big field which makes the process efficient. +/// When all the witness of an opcode are marked as solvable, it means that the +/// opcode is solvable. #[derive(Default)] pub struct CircuitSimulator { /// Track the witnesses that can be solved diff --git a/acvm-repo/acvm/src/compiler/transformers/csat.rs b/acvm-repo/acvm/src/compiler/transformers/csat.rs index 8a2bf9876e4..cef969107b9 100644 --- a/acvm-repo/acvm/src/compiler/transformers/csat.rs +++ b/acvm-repo/acvm/src/compiler/transformers/csat.rs @@ -16,8 +16,9 @@ pub const MIN_EXPRESSION_WIDTH: usize = 3; /// /// This is done by creating intermediate variables to hold partial calculations and then combining them /// to calculate the original expression. -// Should we give it all of the opcodes? -// Have a single transformer that you instantiate with a width, then pass many opcodes through +/// +/// Pre-Condition: +/// - General Optimizer must run before this pass pub(crate) struct CSatTransformer { width: usize, /// Track the witness that can be solved diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs index 0029bbf5e1f..c6c03167ebd 100644 --- a/acvm-repo/acvm/src/compiler/transformers/mod.rs +++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs @@ -1,3 +1,30 @@ +/// This module applies backend specific transformation to a [`Circuit`]. +/// +/// ## CSAT: transforms AssertZero opcodes into AssertZero opcodes having the required width. +/// +/// For instance, if the width is 4, the AssertZero opcode x1 + x2 + x3 + x4 + x5 - y = 0 will be transformed using 2 intermediate variables (z1,z2): +/// x1 + x2 + x3 = z1 +/// x4 + x5 = z2 +/// z1 + z2 - y = 0 +/// If x1,..x5 are inputs to the program, they are taggeg as 'solvable', and would be used to compute the value of y. +/// If we generate the intermediate variable x4 + x5 - y = z3, we get an unsolvable circuit because this AssertZero opcode will have two unkwnon values: y and z3 +/// So the CSAT transformation keep track of which witness would be solved for each opcode in order to only generate solvable intermediat variables. +/// +/// ## eliminate intermediate variables +/// The 'eliminate intermediate variables' pass will remove any intermediate variables (for instance created by the previous transformation) +/// that are used in exactly two AssertZero opcodes. +/// This results in arithmetic opcodes having linear combinations of potentially large width. +/// For instance if the intermediate variable is z1 is only used in y: +/// z1 = x1 + x2 +x3 +/// y = z1 + x4 +/// We remove it, undoing the work done during the CSAT transformation: y = x1 + x2 + x3 + x4 +/// +/// We do this because the backend is expected to handle linear combinations of 'unbounded width' in a more efficient way +/// than the 'CSAT transformation'. +/// However, it is worth to compute intermediate variables if they are used in more than one other opcode. +/// +/// ## redundant_range +/// The 'range optimization' pass, from the optimizers module will remove any redundant range opcodes again. use std::collections::BTreeMap; use acir::{ @@ -96,19 +123,12 @@ pub(super) fn transform_internal( (acir, acir_opcode_positions) } -/// Applies backend specific optimizations to a [`Circuit`]. -/// /// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations. /// /// If the width is unbounded, it does nothing. /// If it is bounded, it first performs the 'CSAT transformation' in one pass, by creating intermediate variables when necessary. -/// This results in arithmetic opcodes having the required width. -/// Then the 'eliminate intermediate variables' pass will remove any intermediate variables (for instance created by the previous transformation) -/// that are used in exactly two arithmetic opcodes. -/// This results in arithmetic opcodes having linear combinations of potentially large width. -/// Finally, the 'range optimization' pass will remove any redundant range opcodes. -/// The backend is expected to handle linear combinations of 'unbounded width' in a more efficient way -/// than the 'CSAT transformation'. +/// Then it performs `eliminate_intermediate_variable()` which (re-)combine intermediate variables used only once. +/// It concludes with a round of `replace_redundant_ranges()` which removes range checks made redundant by the previous pass. #[tracing::instrument( level = "trace", name = "transform_acir_once",