diff --git a/acvm-repo/acvm/src/compiler/mod.rs b/acvm-repo/acvm/src/compiler/mod.rs
index a51ae38c950..c9c6ceb0552 100644
--- a/acvm-repo/acvm/src/compiler/mod.rs
+++ b/acvm-repo/acvm/src/compiler/mod.rs
@@ -1,3 +1,19 @@
+//! The `compiler` module contains several pass to transform an ACIR program.
+//! Roughly, the passes are spearated into the `optimizers` which try to reduce the number of opcodes
+//! and the `transformers` which adapt the opcodes to the proving backend.
+//!
+//! # Optimizers
+//! - GeneralOptimizer: simple pass which simplify AssertZero opcodes when possible (e.g remove terms with null coeficient)
+//! - UnusedMemoryOptimizer: simple pass which removes MemoryInit opcodes when they are not used (e.g no corresponding MemoryOp opcode)
+//! - RangeOptimizer: forward pass to collect range check information, and backward pass to remove the ones that are redundant.
+//!
+//! # Transformers
+//! - CSAT: create intermediate variables so that AssertZero opcodes have the correct Circuit's `ExpressionWidth`.
+//!
+//! ACIR generation is performed by calling the `Ssa::into_acir` method, providing any necessary brillig bytecode.
+//! The compiled program will be returned as an `Artifacts` type.
+//!
+
 use std::collections::{BTreeMap, HashMap};
 
 use acir::{
@@ -19,8 +35,12 @@ pub use simulator::CircuitSimulator;
 use transformers::transform_internal;
 pub use transformers::{MIN_EXPRESSION_WIDTH, transform};
 
-/// This module moves and decomposes acir opcodes. The transformation map allows consumers of this module to map
+/// This module can move and decompose acir opcodes into multiple opcodes. The transformation map allows consumers of this module to map
 /// metadata they had about the opcodes to the new opcode structure generated after the transformation.
+/// ACIR opcodes are stored inside a vector of opcodes. A transformation pass will generate a new vector of opcodes,
+/// but each opcode is the result of the transformation of an opcode in the original vector.
+/// So we simply keep track of the relation:  index of the original opcode -> index of the new opcode in the new vector
+/// However we need a vector of new indexes for the map values in the case the old opcode is decomposed into multiple opcodes.
 #[derive(Debug)]
 pub struct AcirTransformationMap {
     /// Maps the old acir indices to the new acir indices
@@ -31,6 +51,15 @@ impl AcirTransformationMap {
     /// Builds a map from a vector of pointers to the old acir opcodes.
     /// The index in the vector is the new opcode index.
     /// The value of the vector is the old opcode index pointed.
+    /// E.g: If acir_opcode_positions = 0,1,2,4,5,5,6
+    /// that means that old index  0,1,2,4,5,5,6 are mapped to the new indexes: 0,1,2,3,4,5,6
+    /// This gives the following map:
+    /// 0 -> 0
+    /// 1 -> 1
+    /// 2 -> 2
+    /// 4 -> 3
+    /// 5 -> [4, 5]
+    /// 6 -> 6
     fn new(acir_opcode_positions: &[usize]) -> Self {
         let mut old_indices_to_new_indices = HashMap::with_capacity(acir_opcode_positions.len());
         for (new_index, old_index) in acir_opcode_positions.iter().copied().enumerate() {
@@ -40,6 +69,11 @@ impl AcirTransformationMap {
     }
 
     /// Returns the new opcode location(s) corresponding to the old opcode.
+    /// An OpcodeLocation contains the index of the opcode in the vector of opcodes
+    /// This function returns the new OpcodeLocation by 'updating' the index within the given OpcodeLocation
+    /// using the AcirTransformationMap. In fact it does not update the given OpcodeLocation 'in-memory' but rather
+    /// returns a new one, and even a vector of OpcodeLocation in case there are multiple new indexes corresponding
+    /// to the old opcode index.
     pub fn new_locations(
         &self,
         old_location: OpcodeLocation,
@@ -61,6 +95,8 @@ impl AcirTransformationMap {
         )
     }
 
+    /// This function is similar to `new_locations()`, but only deal with
+    /// the AcirOpcodeLocation variant
     pub fn new_acir_locations(
         &self,
         old_location: AcirOpcodeLocation,
@@ -91,7 +127,15 @@ fn transform_assert_messages<F: Clone>(
 
 /// Applies backend specific optimizations to a [`Circuit`].
 ///
-/// Runs multiple passes until the output stabilizes.
+/// optimize_internal:
+/// - General optimizer: canonalize AssertZero opcodes.
+/// - Unused Memory: remove unused MemoryInit opcodes.
+/// - Redundant Ranges: remove RANGE opcodes that are redundant.
+///
+/// transform_internal: run multiple times (up to 3) until the output stabilizes.
+/// - CSAT: limit AssertZero opcodes to the Circuit's width.
+/// - Eliminate intermediate variables: Combine AssertZero opcodes used only once.
+/// - Redundant Ranges: some RANGEs may be redundant as a side effect of the previous pass.
 pub fn compile<F: AcirField>(
     acir: Circuit<F>,
     expression_width: ExpressionWidth,
diff --git a/acvm-repo/acvm/src/compiler/optimizers/general.rs b/acvm-repo/acvm/src/compiler/optimizers/general.rs
index 788981b33af..aaf0b8761c6 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/general.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/general.rs
@@ -7,6 +7,8 @@ use indexmap::IndexMap;
 /// The `GeneralOptimizer` processes all [`Expression`]s to:
 /// - remove any zero-coefficient terms.
 /// - merge any quadratic terms containing the same two witnesses.
+///
+/// This pass does not depend on any other pass and should be the first one in a set of optimizing passes.
 pub(crate) struct GeneralOptimizer;
 
 impl GeneralOptimizer {
@@ -17,8 +19,12 @@ impl GeneralOptimizer {
     }
 }
 
-// Simplifies all mul terms with the same bi-variate variables while also removing
-// terms that end up with a zero coefficient.
+/// Simplifies all mul terms of the form `(scale, w_l, w_r)` with the same bi-variate variables
+/// while also removing terms that end up with a zero coefficient.
+///
+/// For instance, mul terms `(0,1,1), (2,2,1) (-1,2,1) (-1,1,2)` will return an
+/// empty vector, because: (2,1) and (1,2) are the same bi-variate variable
+/// and the resulting scale is `2-1-1 = 0`
 fn simplify_mul_terms<F: AcirField>(mut gate: Expression<F>) -> Expression<F> {
     let mut hash_map: IndexMap<(Witness, Witness), F> = IndexMap::new();
 
diff --git a/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs b/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs
index 7d13dfbe427..e7e278e8d92 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/merge_expressions.rs
@@ -27,26 +27,29 @@ impl<F: AcirField> MergeExpressionsOptimizer<F> {
         }
     }
     /// This pass analyzes the circuit and identifies intermediate variables that are
-    /// only used in two arithmetic opcodes. It then merges the opcode which produces the
+    /// only used in two AssertZero opcodes. It then merges the opcode which produces the
     /// intermediate variable into the second one that uses it
-    /// Note: This pass is only relevant for backends that can handle unlimited width
     ///
     /// The first pass maps witnesses to the index of the opcodes using them.
     /// Public inputs are not considered because they cannot be simplified.
     /// Witnesses used by MemoryInit opcodes are put in a separate map and marked as used by a Brillig call
     /// if the memory block is an input to the call.
     ///
-    /// The second pass looks for arithmetic opcodes having a witness which is only used by another arithmetic opcode.
+    /// The second pass looks for AssertZero opcodes having a witness which is only used by another arithmetic opcode.
     /// In that case, the opcode with the smallest index is merged into the other one via Gaussian elimination.
     /// For instance, if we have 'w1' used only by these two opcodes,
-    /// where `_{value}` refers to a witness and `{value}` refers to a constant:
-    /// [(1, w2,w3), (2, w2), (2, w1), (1, w3)]
-    /// [(2, w3, w4), (2,w1), (1, w4)]
+    /// where `(5,w2,w3)` refers the expression `5*w2*w3` and `(2, w1)` refers to the expression `2*w1`:
+    /// [(1, w2,w3), (2, w2), (2, w1), (1, w3)] // This opcode 'defines' the variable w1
+    /// [(2, w3, w4), (2,w1), (1, w4)]          // which is only used here
     /// We will remove the first one and modify the second one like this:
     /// [(2, w3, w4), (1, w4), (-1, w2), (-1/2, w3), (-1/2, w2, w3)]
     ///
     /// This transformation is relevant for Plonk-ish backends although they have a limited width because
     /// they can potentially handle expressions with large linear combinations using 'big-add' gates.
+    ///
+    /// Pre-condition:
+    /// - This pass is only relevant for backends that can handle unlimited width
+    /// - CSAT pass should have been run prior to this one.
     pub(crate) fn eliminate_intermediate_variable(
         &mut self,
         circuit: &Circuit<F>,
diff --git a/acvm-repo/acvm/src/compiler/optimizers/mod.rs b/acvm-repo/acvm/src/compiler/optimizers/mod.rs
index fbd79dd2354..0aea1072694 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/mod.rs
@@ -26,8 +26,14 @@ pub fn optimize<F: AcirField>(
 ) -> (Circuit<F>, AcirTransformationMap) {
     // Track original acir opcode positions throughout the transformation passes of the compilation
     // by applying the modifications done to the circuit opcodes and also to the opcode_positions (delete and insert)
+    // For instance, here before any transformation, the old acir opcode positions have not changed.
+    // So acir_opcode_positions = 0, 1,...,n-1, representing the index of the opcode in Circuit.opcodes vector.
     let acir_opcode_positions = (0..acir.opcodes.len()).collect();
 
+    // `optimize_internal()` may change the circuit, and it returns a new one, as well the new_opcode_positions
+    // In the new circuit, the opcode at index `i` corresponds to the opcode at index `new_opcode_positions[i]` in the original circuit.
+    // For instance let's say it removed the opcode at index 3, and replaced the one at index 5 by two new opcodes
+    // The new_opcode_positions is now: 0,1,2,4,5,5,6,....n-1
     let (mut acir, new_opcode_positions) =
         optimize_internal(acir, acir_opcode_positions, brillig_side_effects);
 
@@ -41,6 +47,10 @@ pub fn optimize<F: AcirField>(
 /// Applies backend independent optimizations to a [`Circuit`].
 ///
 /// Accepts an injected `acir_opcode_positions` to allow optimizations to be applied in a loop.
+/// It run the following passes:
+/// - General optimizer
+/// - Unused Memory optimization
+/// - Redundant Ranges optimization
 #[tracing::instrument(level = "trace", name = "optimize_acir" skip(acir, acir_opcode_positions))]
 pub(super) fn optimize_internal<F: AcirField>(
     acir: Circuit<F>,
diff --git a/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs b/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs
index 6579b45dd29..01fe6dffc79 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/redundant_range.rs
@@ -71,12 +71,13 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
 
 /// Information gathered about witnesses which are subject to range constraints.
 struct RangeInfo {
-    /// Opcode positions at which stricter bit size information becomes available.
+    /// Opcode positions which updated this RangeInfo, i.e
+    /// at which stricter bit size information becomes available.
     switch_points: BTreeSet<usize>,
     /// Strictest constraint on bit size so far.
     num_bits: u32,
-    /// Indicate whether the bit size comes from an assertion, in which case we
-    /// can save an equivalent range constraint.
+    /// Indicate whether the bit size comes from an assertion or from array indexing,
+    /// in which cases we can save an equivalent range constraint.
     is_implied: bool,
 }
 
@@ -181,6 +182,13 @@ impl<'a, F: AcirField> RangeOptimizer<'a, F> {
     /// Returns a `Circuit` where each Witness is only range constrained
     /// a minimal number of times that still allows us to avoid executing
     /// any new side effects due to their removal.
+    ///
+    /// The idea is to keep only the RANGE opcodes that have stricly smaller bit-size requirements
+    /// than before, i.e the ones that are at a 'switch point'.
+    /// Furthermore, we only keep the switch points that are last before
+    /// a 'side-effect' opcode (i.e a Brillig call).
+    /// As a result, we simply do a backward pass on the opcodes, so that the last Brillig call
+    /// is known before reaching a RANGE opcode.
     pub(crate) fn replace_redundant_ranges(
         self,
         order_list: Vec<usize>,
diff --git a/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs b/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs
index 02f9699328d..c332c64dc7d 100644
--- a/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs
+++ b/acvm-repo/acvm/src/compiler/optimizers/unused_memory.rs
@@ -73,3 +73,79 @@ impl<F: AcirField> UnusedMemoryOptimizer<F> {
         (Circuit { opcodes: optimized_opcodes, ..self.circuit }, new_order_list)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use acir::{
+        FieldElement,
+        circuit::opcodes::BlockType,
+        native_types::{Expression, Witness},
+    };
+
+    #[test]
+    fn unused_memory_is_removed() {
+        let src = "
+        current witness: w3
+        private parameters: [w0, w1]
+        public parameters: []
+        return values: [w2]
+        ";
+        let mut circuit = Circuit::from_str(src).unwrap();
+        // MEMORYINIT [w0, w1]
+        // EXPR [ (1, w0) (-1, w1) (-1, w2) 0 ]
+        circuit.opcodes = vec![
+            Opcode::MemoryInit {
+                block_id: BlockId(0),
+                init: vec![Witness(0), Witness(1)],
+                block_type: BlockType::Memory,
+            },
+            Opcode::AssertZero(Expression {
+                mul_terms: Vec::new(),
+                linear_combinations: vec![
+                    (FieldElement::from(1_u128), Witness(0)),
+                    (FieldElement::from(-1_i128), Witness(1)),
+                    (FieldElement::from(-1_i128), Witness(2)),
+                ],
+                q_c: FieldElement::from(0u128),
+            }),
+        ];
+        let unused_memory = UnusedMemoryOptimizer::new(circuit);
+        assert_eq!(unused_memory.unused_memory_initializations.len(), 1);
+        let (circuit, _) = unused_memory.remove_unused_memory_initializations(vec![0, 1]);
+        assert_eq!(circuit.opcodes.len(), 1);
+    }
+
+    #[test]
+    fn databus_is_not_removed() {
+        let src = "
+        current witness: w3
+        private parameters: [w0, w1]
+        public parameters: []
+        return values: [w2]
+        ";
+        let mut circuit = Circuit::from_str(src).unwrap();
+        // MEMORYINIT [w0, w1]
+        // EXPR [ (1, w0) (-1, w1) (-1, w2) 0 ]
+        circuit.opcodes = vec![
+            Opcode::MemoryInit {
+                block_id: BlockId(0),
+                init: vec![Witness(0), Witness(1)],
+                block_type: BlockType::ReturnData,
+            },
+            Opcode::AssertZero(Expression {
+                mul_terms: Vec::new(),
+                linear_combinations: vec![
+                    (FieldElement::from(1_u128), Witness(0)),
+                    (FieldElement::from(-1_i128), Witness(1)),
+                    (FieldElement::from(-1_i128), Witness(2)),
+                ],
+                q_c: FieldElement::from(0u128),
+            }),
+        ];
+        let unused_memory = UnusedMemoryOptimizer::new(circuit);
+        assert_eq!(unused_memory.unused_memory_initializations.len(), 1);
+        let (circuit, _) = unused_memory.remove_unused_memory_initializations(vec![0, 1]);
+        assert_eq!(circuit.opcodes.len(), 2);
+    }
+}
diff --git a/acvm-repo/acvm/src/compiler/simulator.rs b/acvm-repo/acvm/src/compiler/simulator.rs
index d916dd19eda..380a78e77ad 100644
--- a/acvm-repo/acvm/src/compiler/simulator.rs
+++ b/acvm-repo/acvm/src/compiler/simulator.rs
@@ -10,6 +10,12 @@ use acir::{
 use std::collections::{BTreeSet, HashSet};
 
 /// Simulate a symbolic solve for a circuit
+/// Instead of evaluating witness values from the inputs, like the PWG module is doing,
+/// this pass simply mark the witness that can be evaluated, from the known inputs,
+/// and incrementally from the previously marked witnesses.
+/// This avoid any computation on a big field which makes the process efficient.
+/// When all the witness of an opcode are marked as solvable, it means that the
+/// opcode is solvable.
 #[derive(Default)]
 pub struct CircuitSimulator {
     /// Track the witnesses that can be solved
diff --git a/acvm-repo/acvm/src/compiler/transformers/csat.rs b/acvm-repo/acvm/src/compiler/transformers/csat.rs
index 8a2bf9876e4..cef969107b9 100644
--- a/acvm-repo/acvm/src/compiler/transformers/csat.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/csat.rs
@@ -16,8 +16,9 @@ pub const MIN_EXPRESSION_WIDTH: usize = 3;
 ///
 /// This is done by creating intermediate variables to hold partial calculations and then combining them
 /// to calculate the original expression.
-// Should we give it all of the opcodes?
-// Have a single transformer that you instantiate with a width, then pass many opcodes through
+///
+/// Pre-Condition:
+/// - General Optimizer must run before this pass
 pub(crate) struct CSatTransformer {
     width: usize,
     /// Track the witness that can be solved
diff --git a/acvm-repo/acvm/src/compiler/transformers/mod.rs b/acvm-repo/acvm/src/compiler/transformers/mod.rs
index 0029bbf5e1f..c6c03167ebd 100644
--- a/acvm-repo/acvm/src/compiler/transformers/mod.rs
+++ b/acvm-repo/acvm/src/compiler/transformers/mod.rs
@@ -1,3 +1,30 @@
+/// This module applies backend specific transformation to a [`Circuit`].
+///
+/// ## CSAT: transforms AssertZero opcodes into  AssertZero opcodes having the required width.
+///
+/// For instance, if the width is 4, the AssertZero opcode x1 + x2 + x3 + x4 + x5 - y = 0 will be transformed using 2 intermediate variables (z1,z2):
+/// x1 + x2 + x3 = z1
+/// x4 + x5 = z2
+/// z1 + z2 - y = 0
+/// If x1,..x5 are inputs to the program, they are taggeg as 'solvable', and would be used to compute the value of y.
+/// If we generate the intermediate variable x4 + x5 - y = z3, we get an unsolvable circuit because this AssertZero opcode will have two unkwnon values: y and z3
+/// So the CSAT transformation keep track of which witness would be solved for each opcode in order to only generate solvable intermediat variables.
+///
+/// ## eliminate intermediate variables
+/// The 'eliminate intermediate variables' pass will remove any intermediate variables (for instance created by the previous transformation)
+/// that are used in exactly two AssertZero opcodes.
+/// This results in arithmetic opcodes having linear combinations of potentially large width.
+/// For instance if the intermediate variable is z1 is only used in y:
+/// z1 = x1 + x2 +x3
+/// y = z1 + x4
+/// We remove it, undoing the work done during the CSAT transformation: y = x1 + x2 + x3 + x4
+///
+/// We do this because the backend is expected to handle linear combinations of 'unbounded width' in a more efficient way
+/// than the 'CSAT transformation'.
+/// However, it is worth to compute intermediate variables if they are used in more than one other opcode.
+///
+/// ## redundant_range
+/// The 'range optimization' pass, from the optimizers module will remove any redundant range opcodes again.
 use std::collections::BTreeMap;
 
 use acir::{
@@ -96,19 +123,12 @@ pub(super) fn transform_internal<F: AcirField>(
     (acir, acir_opcode_positions)
 }
 
-/// Applies backend specific optimizations to a [`Circuit`].
-///
 /// Accepts an injected `acir_opcode_positions` to allow transformations to be applied directly after optimizations.
 ///
 /// If the width is unbounded, it does nothing.
 /// If it is bounded, it first performs the 'CSAT transformation' in one pass, by creating intermediate variables when necessary.
-/// This results in arithmetic opcodes having the required width.
-/// Then the 'eliminate intermediate variables' pass will remove any intermediate variables (for instance created by the previous transformation)
-/// that are used in exactly two arithmetic opcodes.
-/// This results in arithmetic opcodes having linear combinations of potentially large width.
-/// Finally, the 'range optimization' pass will remove any redundant range opcodes.
-/// The backend is expected to handle linear combinations of 'unbounded width' in a more efficient way
-/// than the 'CSAT transformation'.
+/// Then it performs `eliminate_intermediate_variable()` which (re-)combine intermediate variables used only once.
+/// It concludes with a round of `replace_redundant_ranges()` which removes range checks made redundant by the previous pass.
 #[tracing::instrument(
     level = "trace",
     name = "transform_acir_once",