diff --git a/.noir-sync-commit b/.noir-sync-commit
index 9bbde85e56b5..e2c51b55bd5a 100644
--- a/.noir-sync-commit
+++ b/.noir-sync-commit
@@ -1 +1 @@
-68c32b4ffd9b069fe4b119327dbf4018c17ab9d4
+dace07849aa28795abb30b3f9d979ffc6b6487e6
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
index 45d10323b06a..97c1760d87c1 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
@@ -141,6 +141,23 @@ pub(crate) fn optimize_into_acir(
         ssa.to_brillig(options.enable_brillig_logging)
     });
 
+    let ssa_gen_span = span!(Level::TRACE, "ssa_generation");
+    let ssa_gen_span_guard = ssa_gen_span.enter();
+
+    let ssa = SsaBuilder {
+        ssa,
+        print_ssa_passes: options.enable_ssa_logging,
+        print_codegen_timings: options.print_codegen_timings,
+    }
+    .run_pass(
+        |ssa| ssa.fold_constants_with_brillig(&brillig),
+        "After Constant Folding with Brillig:",
+    )
+    .run_pass(Ssa::dead_instruction_elimination, "After Dead Instruction Elimination:")
+    .finish();
+
+    drop(ssa_gen_span_guard);
+
     let artifacts = time("SSA to ACIR", options.print_codegen_timings, || {
         ssa.into_acir(&brillig, options.expression_width)
     })?;
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
index f606fffbf91e..6737b335b7da 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
@@ -11,7 +11,7 @@ use fxhash::FxHasher64;
 use iter_extended::vecmap;
 use noirc_frontend::hir_def::types::Type as HirType;
 
-use crate::ssa::opt::flatten_cfg::value_merger::ValueMerger;
+use crate::ssa::{ir::function::RuntimeType, opt::flatten_cfg::value_merger::ValueMerger};
 
 use super::{
     basic_block::BasicBlockId,
@@ -478,8 +478,19 @@ impl Instruction {
             | ArraySet { .. }
             | MakeArray { .. } => true,
 
+            // Store instructions must be removed by DIE in acir code, any load
+            // instructions should already be unused by that point.
+            //
+            // Note that this check assumes that it is being performed after the flattening
+            // pass and after the last mem2reg pass. This is currently the case for the DIE
+            // pass where this check is done, but does mean that we cannot perform mem2reg
+            // after the DIE pass.
+            Store { .. } => {
+                matches!(function.runtime(), RuntimeType::Acir(_))
+                    && function.reachable_blocks().len() == 1
+            }
+
             Constrain(..)
-            | Store { .. }
             | EnableSideEffectsIf { .. }
             | IncrementRc { .. }
             | DecrementRc { .. }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
index 9ee9a52b5adb..3fb0f485c2a7 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
@@ -6,7 +6,7 @@
 //!   by the [`DataFlowGraph`] automatically as new instructions are pushed.
 //! - Check whether any input values have been constrained to be equal to a value of a simpler form
 //!   by a [constrain instruction][Instruction::Constrain]. If so, replace the input value with the simpler form.
-//! - Check whether the instruction [can_be_replaced][Instruction::can_be_replaced()]
+//! - Check whether the instruction [can_be_deduplicated][Instruction::can_be_deduplicated()]
 //!   by duplicate instruction earlier in the same block.
 //!
 //! These operations are done in parallel so that they can each benefit from each other
@@ -19,33 +19,49 @@
 //!
 //! This is the only pass which removes duplicated pure [`Instruction`]s however and so is needed when
 //! different blocks are merged, i.e. after the [`flatten_cfg`][super::flatten_cfg] pass.
-use std::collections::{HashSet, VecDeque};
+use std::collections::{BTreeMap, HashSet, VecDeque};
 
-use acvm::{acir::AcirField, FieldElement};
+use acvm::{
+    acir::AcirField,
+    brillig_vm::{MemoryValue, VMStatus, VM},
+    FieldElement,
+};
+use bn254_blackbox_solver::Bn254BlackBoxSolver;
+use im::Vector;
 use iter_extended::vecmap;
 
-use crate::ssa::{
-    ir::{
-        basic_block::BasicBlockId,
-        dfg::{DataFlowGraph, InsertInstructionResult},
-        dom::DominatorTree,
-        function::Function,
-        instruction::{Instruction, InstructionId},
-        types::Type,
-        value::{Value, ValueId},
+use crate::{
+    brillig::{
+        brillig_gen::gen_brillig_for,
+        brillig_ir::{artifact::BrilligParameter, brillig_variable::get_bit_size_from_ssa_type},
+        Brillig,
+    },
+    ssa::{
+        ir::{
+            basic_block::BasicBlockId,
+            dfg::{DataFlowGraph, InsertInstructionResult},
+            dom::DominatorTree,
+            function::{Function, FunctionId, RuntimeType},
+            instruction::{Instruction, InstructionId},
+            types::Type,
+            value::{Value, ValueId},
+        },
+        ssa_gen::Ssa,
     },
-    ssa_gen::Ssa,
 };
 use fxhash::FxHashMap as HashMap;
 
 impl Ssa {
     /// Performs constant folding on each instruction.
     ///
+    /// It will not look at constraints to inform simplifications
+    /// based on the stated equivalence of two instructions.
+    ///
     /// See [`constant_folding`][self] module for more information.
     #[tracing::instrument(level = "trace", skip(self))]
     pub(crate) fn fold_constants(mut self) -> Ssa {
         for function in self.functions.values_mut() {
-            function.constant_fold(false);
+            function.constant_fold(false, None);
         }
         self
     }
@@ -58,8 +74,69 @@ impl Ssa {
     #[tracing::instrument(level = "trace", skip(self))]
     pub(crate) fn fold_constants_using_constraints(mut self) -> Ssa {
         for function in self.functions.values_mut() {
-            function.constant_fold(true);
+            function.constant_fold(true, None);
+        }
+        self
+    }
+
+    /// Performs constant folding on each instruction while also replacing calls to brillig functions
+    /// with all constant arguments by trying to evaluate those calls.
+    #[tracing::instrument(level = "trace", skip(self, brillig))]
+    pub(crate) fn fold_constants_with_brillig(mut self, brillig: &Brillig) -> Ssa {
+        // Collect all brillig functions so that later we can find them when processing a call instruction
+        let mut brillig_functions: BTreeMap<FunctionId, Function> = BTreeMap::new();
+        for (func_id, func) in &self.functions {
+            if let RuntimeType::Brillig(..) = func.runtime() {
+                let cloned_function = Function::clone_with_id(*func_id, func);
+                brillig_functions.insert(*func_id, cloned_function);
+            };
+        }
+
+        let brillig_info = Some(BrilligInfo { brillig, brillig_functions: &brillig_functions });
+
+        for function in self.functions.values_mut() {
+            function.constant_fold(false, brillig_info);
+        }
+
+        // It could happen that we inlined all calls to a given brillig function.
+        // In that case it's unused so we can remove it. This is what we check next.
+        self.remove_unused_brillig_functions(brillig_functions)
+    }
+
+    fn remove_unused_brillig_functions(
+        mut self,
+        mut brillig_functions: BTreeMap<FunctionId, Function>,
+    ) -> Ssa {
+        // Remove from the above map functions that are called
+        for function in self.functions.values() {
+            for block_id in function.reachable_blocks() {
+                for instruction_id in function.dfg[block_id].instructions() {
+                    let instruction = &function.dfg[*instruction_id];
+                    let Instruction::Call { func: func_id, arguments: _ } = instruction else {
+                        continue;
+                    };
+
+                    let func_value = &function.dfg[*func_id];
+                    let Value::Function(func_id) = func_value else { continue };
+
+                    brillig_functions.remove(func_id);
+                }
+            }
+        }
+
+        // The ones that remain are never called: let's remove them.
+        for func_id in brillig_functions.keys() {
+            // We never want to remove the main function (it could be `unconstrained` or it
+            // could have been turned into brillig if `--force-brillig` was given).
+            // We also don't want to remove entry points.
+            if self.main_id == *func_id || self.entry_point_to_generated_index.contains_key(func_id)
+            {
+                continue;
+            }
+
+            self.functions.remove(func_id);
         }
+
         self
     }
 }
@@ -67,8 +144,13 @@ impl Ssa {
 impl Function {
     /// The structure of this pass is simple:
     /// Go through each block and re-insert all instructions.
-    pub(crate) fn constant_fold(&mut self, use_constraint_info: bool) {
-        let mut context = Context::new(self, use_constraint_info);
+    pub(crate) fn constant_fold(
+        &mut self,
+        use_constraint_info: bool,
+        brillig_info: Option<BrilligInfo>,
+    ) {
+        let mut context = Context::new(use_constraint_info, brillig_info);
+        let mut dom = DominatorTree::with_function(self);
         context.block_queue.push_back(self.entry_block());
 
         while let Some(block) = context.block_queue.pop_front() {
@@ -77,40 +159,87 @@ impl Function {
             }
 
             context.visited_blocks.insert(block);
-            context.fold_constants_in_block(self, block);
+            context.fold_constants_in_block(&mut self.dfg, &mut dom, block);
         }
     }
 }
 
-struct Context {
+struct Context<'a> {
     use_constraint_info: bool,
+    brillig_info: Option<BrilligInfo<'a>>,
     /// Maps pre-folded ValueIds to the new ValueIds obtained by re-inserting the instruction.
     visited_blocks: HashSet<BasicBlockId>,
     block_queue: VecDeque<BasicBlockId>,
 
     /// Contains sets of values which are constrained to be equivalent to each other.
     ///
-    /// The mapping's structure is `side_effects_enabled_var => (constrained_value => [(block, simplified_value)])`.
+    /// The mapping's structure is `side_effects_enabled_var => (constrained_value => simplified_value)`.
     ///
     /// We partition the maps of constrained values according to the side-effects flag at the point
     /// at which the values are constrained. This prevents constraints which are only sometimes enforced
     /// being used to modify the rest of the program.
-    ///
-    /// We also keep track of how a value was simplified to other values per block. That is,
-    /// a same ValueId could have been simplified to one value in one block and to another value
-    /// in another block.
-    constraint_simplification_mappings:
-        HashMap<ValueId, HashMap<ValueId, Vec<(BasicBlockId, ValueId)>>>,
+    constraint_simplification_mappings: ConstraintSimplificationCache,
 
     // Cache of instructions without any side-effects along with their outputs.
     cached_instruction_results: InstructionResultCache,
+}
 
-    dom: DominatorTree,
+#[derive(Copy, Clone)]
+pub(crate) struct BrilligInfo<'a> {
+    brillig: &'a Brillig,
+    brillig_functions: &'a BTreeMap<FunctionId, Function>,
 }
 
-/// HashMap from (Instruction, side_effects_enabled_var) to the results of the instruction.
+/// Records a simplified equivalents of an [`Instruction`] in the blocks
+/// where the constraint that advised the simplification has been encountered.
+///
+/// For more information see [`ConstraintSimplificationCache`].
+#[derive(Default)]
+struct SimplificationCache {
+    /// Simplified expressions where we found them.
+    ///
+    /// It will always have at least one value because `add` is called
+    /// after the default is constructed.
+    simplifications: HashMap<BasicBlockId, ValueId>,
+}
+
+impl SimplificationCache {
+    /// Called with a newly encountered simplification.
+    fn add(&mut self, dfg: &DataFlowGraph, simple: ValueId, block: BasicBlockId) {
+        self.simplifications
+            .entry(block)
+            .and_modify(|existing| {
+                // `SimplificationCache` may already hold a simplification in this block
+                // so we check whether `simple` is a better simplification than the current one.
+                if let Some((_, simpler)) = simplify(dfg, *existing, simple) {
+                    *existing = simpler;
+                };
+            })
+            .or_insert(simple);
+    }
+
+    /// Try to find a simplification in a visible block.
+    fn get(&self, block: BasicBlockId, dom: &DominatorTree) -> Option<ValueId> {
+        // Deterministically walk up the dominator chain until we encounter a block that contains a simplification.
+        dom.find_map_dominator(block, |b| self.simplifications.get(&b).cloned())
+    }
+}
+
+/// HashMap from `(side_effects_enabled_var, Instruction)` to a simplified expression that it can
+/// be replaced with based on constraints that testify to their equivalence, stored together
+/// with the set of blocks at which this constraint has been observed.
+///
+/// Only blocks dominated by one in the cache should have access to this information, otherwise
+/// we create a sort of time paradox where we replace an instruction with a constant we believe
+/// it _should_ equal to, without ever actually producing and asserting the value.
+type ConstraintSimplificationCache = HashMap<ValueId, HashMap<ValueId, SimplificationCache>>;
+
+/// HashMap from `(Instruction, side_effects_enabled_var)` to the results of the instruction.
 /// Stored as a two-level map to avoid cloning Instructions during the `.get` call.
 ///
+/// The `side_effects_enabled_var` is optional because we only use them when `Instruction::requires_acir_gen_predicate`
+/// is true _and_ the constraint information is also taken into account.
+///
 /// In addition to each result, the original BasicBlockId is stored as well. This allows us
 /// to deduplicate instructions across blocks as long as the new block dominates the original.
 type InstructionResultCache = HashMap<Instruction, HashMap<Option<ValueId>, ResultCache>>;
@@ -120,66 +249,97 @@ type InstructionResultCache = HashMap<Instruction, HashMap<Option<ValueId>, Resu
 /// For more information see [`InstructionResultCache`].
 #[derive(Default)]
 struct ResultCache {
-    results: Vec<(BasicBlockId, Vec<ValueId>)>,
+    result: Option<(BasicBlockId, Vec<ValueId>)>,
 }
 
-impl Context {
-    fn new(function: &Function, use_constraint_info: bool) -> Self {
+impl<'brillig> Context<'brillig> {
+    fn new(use_constraint_info: bool, brillig_info: Option<BrilligInfo<'brillig>>) -> Self {
         Self {
             use_constraint_info,
+            brillig_info,
             visited_blocks: Default::default(),
             block_queue: Default::default(),
             constraint_simplification_mappings: Default::default(),
             cached_instruction_results: Default::default(),
-            dom: DominatorTree::with_function(function),
         }
     }
 
-    fn fold_constants_in_block(&mut self, function: &mut Function, block: BasicBlockId) {
-        let instructions = function.dfg[block].take_instructions();
+    fn fold_constants_in_block(
+        &mut self,
+        dfg: &mut DataFlowGraph,
+        dom: &mut DominatorTree,
+        block: BasicBlockId,
+    ) {
+        let instructions = dfg[block].take_instructions();
 
-        let mut side_effects_enabled_var =
-            function.dfg.make_constant(FieldElement::one(), Type::bool());
+        // Default side effect condition variable with an enabled state.
+        let mut side_effects_enabled_var = dfg.make_constant(FieldElement::one(), Type::bool());
 
         for instruction_id in instructions {
             self.fold_constants_into_instruction(
-                &mut function.dfg,
+                dfg,
+                dom,
                 block,
                 instruction_id,
                 &mut side_effects_enabled_var,
             );
         }
-        self.block_queue.extend(function.dfg[block].successors());
+        self.block_queue.extend(dfg[block].successors());
     }
 
     fn fold_constants_into_instruction(
         &mut self,
         dfg: &mut DataFlowGraph,
-        block: BasicBlockId,
+        dom: &mut DominatorTree,
+        mut block: BasicBlockId,
         id: InstructionId,
         side_effects_enabled_var: &mut ValueId,
     ) {
-        let constraint_simplification_mapping =
-            self.constraint_simplification_mappings.get(side_effects_enabled_var);
-        let instruction = Self::resolve_instruction(
-            id,
-            block,
-            dfg,
-            &mut self.dom,
-            constraint_simplification_mapping,
-        );
+        let constraint_simplification_mapping = self.get_constraint_map(*side_effects_enabled_var);
+
+        let instruction =
+            Self::resolve_instruction(id, block, dfg, dom, constraint_simplification_mapping);
+
         let old_results = dfg.instruction_results(id).to_vec();
 
         // If a copy of this instruction exists earlier in the block, then reuse the previous results.
-        if let Some(cached_results) =
-            self.get_cached(dfg, &instruction, *side_effects_enabled_var, block)
+        if let Some(cache_result) =
+            self.get_cached(dfg, dom, &instruction, *side_effects_enabled_var, block)
         {
-            Self::replace_result_ids(dfg, &old_results, cached_results);
-            return;
+            match cache_result {
+                CacheResult::Cached(cached) => {
+                    Self::replace_result_ids(dfg, &old_results, cached);
+                    return;
+                }
+                CacheResult::NeedToHoistToCommonBlock(dominator) => {
+                    // Just change the block to insert in the common dominator instead.
+                    // This will only move the current instance of the instruction right now.
+                    // When constant folding is run a second time later on, it'll catch
+                    // that the previous instance can be deduplicated to this instance.
+                    block = dominator;
+                }
+            }
         }
 
-        // Otherwise, try inserting the instruction again to apply any optimizations using the newly resolved inputs.
-        let new_results = Self::push_instruction(id, instruction.clone(), &old_results, block, dfg);
+        let new_results =
+        // First try to inline a call to a brillig function with all constant arguments.
+        Self::try_inline_brillig_call_with_all_constants(
+            &instruction,
+            &old_results,
+            block,
+            dfg,
+            self.brillig_info,
+        )
+        .unwrap_or_else(|| {
+            // Otherwise, try inserting the instruction again to apply any optimizations using the newly resolved inputs.
+            Self::push_instruction(
+                id,
+                instruction.clone(),
+                &old_results,
+                block,
+                dfg,
+            )
+        });
 
         Self::replace_result_ids(dfg, &old_results, &new_results);
 
@@ -204,7 +364,7 @@ impl Context {
         block: BasicBlockId,
         dfg: &DataFlowGraph,
         dom: &mut DominatorTree,
-        constraint_simplification_mapping: Option<&HashMap<ValueId, Vec<(BasicBlockId, ValueId)>>>,
+        constraint_simplification_mapping: &HashMap<ValueId, SimplificationCache>,
     ) -> Instruction {
         let instruction = dfg[instruction_id].clone();
 
@@ -214,30 +374,28 @@ impl Context {
         // This allows us to reach a stable final `ValueId` for each instruction input as we add more
         // constraints to the cache.
         fn resolve_cache(
+            block: BasicBlockId,
             dfg: &DataFlowGraph,
             dom: &mut DominatorTree,
-            cache: Option<&HashMap<ValueId, Vec<(BasicBlockId, ValueId)>>>,
+            cache: &HashMap<ValueId, SimplificationCache>,
             value_id: ValueId,
-            block: BasicBlockId,
         ) -> ValueId {
             let resolved_id = dfg.resolve(value_id);
-            let Some(cached_values) = cache.and_then(|cache| cache.get(&resolved_id)) else {
-                return resolved_id;
-            };
-
-            for (cached_block, cached_value) in cached_values {
-                // We can only use the simplified value if it was simplified in a block that dominates the current one
-                if dom.dominates(*cached_block, block) {
-                    return resolve_cache(dfg, dom, cache, *cached_value, block);
+            match cache.get(&resolved_id) {
+                Some(simplification_cache) => {
+                    if let Some(simplified) = simplification_cache.get(block, dom) {
+                        resolve_cache(block, dfg, dom, cache, simplified)
+                    } else {
+                        resolved_id
+                    }
                 }
+                None => resolved_id,
             }
-
-            resolved_id
         }
 
         // Resolve any inputs to ensure that we're comparing like-for-like instructions.
         instruction.map_values(|value_id| {
-            resolve_cache(dfg, dom, constraint_simplification_mapping, value_id, block)
+            resolve_cache(block, dfg, dom, constraint_simplification_mapping, value_id)
         })
     }
 
@@ -288,13 +446,14 @@ impl Context {
                     self.get_constraint_map(side_effects_enabled_var)
                         .entry(complex)
                         .or_default()
-                        .push((block, simple));
+                        .add(dfg, simple, block);
                 }
             }
         }
 
         // If the instruction doesn't have side-effects and if it won't interact with enable_side_effects during acir_gen,
         // we cache the results so we can reuse them if the same instruction appears again later in the block.
+        // Others have side effects representing failure, which are implicit in the ACIR code and can also be deduplicated.
         if instruction.can_be_deduplicated(dfg, self.use_constraint_info) {
             let use_predicate =
                 self.use_constraint_info && instruction.requires_acir_gen_predicate(dfg);
@@ -309,10 +468,12 @@ impl Context {
         }
     }
 
+    /// Get the simplification mapping from complex to simpler instructions,
+    /// which all depend on the same side effect condition variable.
     fn get_constraint_map(
         &mut self,
         side_effects_enabled_var: ValueId,
-    ) -> &mut HashMap<ValueId, Vec<(BasicBlockId, ValueId)>> {
+    ) -> &mut HashMap<ValueId, SimplificationCache> {
         self.constraint_simplification_mappings.entry(side_effects_enabled_var).or_default()
     }
 
@@ -327,26 +488,190 @@ impl Context {
         }
     }
 
-    fn get_cached<'a>(
-        &'a mut self,
+    /// Get a cached result if it can be used in this context.
+    fn get_cached(
+        &self,
         dfg: &DataFlowGraph,
+        dom: &mut DominatorTree,
         instruction: &Instruction,
         side_effects_enabled_var: ValueId,
         block: BasicBlockId,
-    ) -> Option<&'a [ValueId]> {
+    ) -> Option<CacheResult> {
         let results_for_instruction = self.cached_instruction_results.get(instruction)?;
 
         let predicate = self.use_constraint_info && instruction.requires_acir_gen_predicate(dfg);
         let predicate = predicate.then_some(side_effects_enabled_var);
 
-        results_for_instruction.get(&predicate)?.get(block, &mut self.dom)
+        results_for_instruction.get(&predicate)?.get(block, dom, instruction.has_side_effects(dfg))
+    }
+
+    /// Checks if the given instruction is a call to a brillig function with all constant arguments.
+    /// If so, we can try to evaluate that function and replace the results with the evaluation results.
+    fn try_inline_brillig_call_with_all_constants(
+        instruction: &Instruction,
+        old_results: &[ValueId],
+        block: BasicBlockId,
+        dfg: &mut DataFlowGraph,
+        brillig_info: Option<BrilligInfo>,
+    ) -> Option<Vec<ValueId>> {
+        let evaluation_result = Self::evaluate_const_brillig_call(
+            instruction,
+            brillig_info?.brillig,
+            brillig_info?.brillig_functions,
+            dfg,
+        );
+
+        match evaluation_result {
+            EvaluationResult::NotABrilligCall | EvaluationResult::CannotEvaluate(_) => None,
+            EvaluationResult::Evaluated(memory_values) => {
+                let mut memory_index = 0;
+                let new_results = vecmap(old_results, |old_result| {
+                    let typ = dfg.type_of_value(*old_result);
+                    Self::new_value_for_type_and_memory_values(
+                        typ,
+                        block,
+                        &memory_values,
+                        &mut memory_index,
+                        dfg,
+                    )
+                });
+                Some(new_results)
+            }
+        }
+    }
+
+    /// Tries to evaluate an instruction if it's a call that points to a brillig function,
+    /// and all its arguments are constant.
+    /// We do this by directly executing the function with a brillig VM.
+    fn evaluate_const_brillig_call(
+        instruction: &Instruction,
+        brillig: &Brillig,
+        brillig_functions: &BTreeMap<FunctionId, Function>,
+        dfg: &mut DataFlowGraph,
+    ) -> EvaluationResult {
+        let Instruction::Call { func: func_id, arguments } = instruction else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        let func_value = &dfg[*func_id];
+        let Value::Function(func_id) = func_value else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        let Some(func) = brillig_functions.get(func_id) else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        if !arguments.iter().all(|argument| dfg.is_constant(*argument)) {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        }
+
+        let mut brillig_arguments = Vec::new();
+        for argument in arguments {
+            let typ = dfg.type_of_value(*argument);
+            let Some(parameter) = type_to_brillig_parameter(&typ) else {
+                return EvaluationResult::CannotEvaluate(*func_id);
+            };
+            brillig_arguments.push(parameter);
+        }
+
+        // Check that return value types are supported by brillig
+        for return_id in func.returns().iter() {
+            let typ = func.dfg.type_of_value(*return_id);
+            if type_to_brillig_parameter(&typ).is_none() {
+                return EvaluationResult::CannotEvaluate(*func_id);
+            }
+        }
+
+        let Ok(generated_brillig) = gen_brillig_for(func, brillig_arguments, brillig) else {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        };
+
+        let mut calldata = Vec::new();
+        for argument in arguments {
+            value_id_to_calldata(*argument, dfg, &mut calldata);
+        }
+
+        let bytecode = &generated_brillig.byte_code;
+        let foreign_call_results = Vec::new();
+        let black_box_solver = Bn254BlackBoxSolver;
+        let profiling_active = false;
+        let mut vm =
+            VM::new(calldata, bytecode, foreign_call_results, &black_box_solver, profiling_active);
+        let vm_status: VMStatus<_> = vm.process_opcodes();
+        let VMStatus::Finished { return_data_offset, return_data_size } = vm_status else {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        };
+
+        let memory =
+            vm.get_memory()[return_data_offset..(return_data_offset + return_data_size)].to_vec();
+
+        EvaluationResult::Evaluated(memory)
+    }
+
+    /// Creates a new value inside this function by reading it from `memory_values` starting at
+    /// `memory_index` depending on the given Type: if it's an array multiple values will be read
+    /// and a new `make_array` instruction will be created.
+    fn new_value_for_type_and_memory_values(
+        typ: Type,
+        block_id: BasicBlockId,
+        memory_values: &[MemoryValue<FieldElement>],
+        memory_index: &mut usize,
+        dfg: &mut DataFlowGraph,
+    ) -> ValueId {
+        match typ {
+            Type::Numeric(_) => {
+                let memory = memory_values[*memory_index];
+                *memory_index += 1;
+
+                let field_value = match memory {
+                    MemoryValue::Field(field_value) => field_value,
+                    MemoryValue::Integer(u128_value, _) => u128_value.into(),
+                };
+                dfg.make_constant(field_value, typ)
+            }
+            Type::Array(types, length) => {
+                let mut new_array_values = Vector::new();
+                for _ in 0..length {
+                    for typ in types.iter() {
+                        let new_value = Self::new_value_for_type_and_memory_values(
+                            typ.clone(),
+                            block_id,
+                            memory_values,
+                            memory_index,
+                            dfg,
+                        );
+                        new_array_values.push_back(new_value);
+                    }
+                }
+
+                let instruction = Instruction::MakeArray {
+                    elements: new_array_values,
+                    typ: Type::Array(types, length),
+                };
+                let instruction_id = dfg.make_instruction(instruction, None);
+                dfg[block_id].instructions_mut().push(instruction_id);
+                *dfg.instruction_results(instruction_id).first().unwrap()
+            }
+            Type::Reference(_) => {
+                panic!("Unexpected reference type in brillig function result")
+            }
+            Type::Slice(_) => {
+                panic!("Unexpected slice type in brillig function result")
+            }
+            Type::Function => {
+                panic!("Unexpected function type in brillig function result")
+            }
+        }
     }
 }
 
 impl ResultCache {
     /// Records that an `Instruction` in block `block` produced the result values `results`.
     fn cache(&mut self, block: BasicBlockId, results: Vec<ValueId>) {
-        self.results.push((block, results));
+        if self.result.is_none() {
+            self.result = Some((block, results));
+        }
     }
 
     /// Returns a set of [`ValueId`]s produced from a copy of this [`Instruction`] which sits
@@ -355,16 +680,75 @@ impl ResultCache {
     /// We require that the cached instruction's block dominates `block` in order to avoid
     /// cycles causing issues (e.g. two instructions being replaced with the results of each other
     /// such that neither instruction exists anymore.)
-    fn get(&self, block: BasicBlockId, dom: &mut DominatorTree) -> Option<&[ValueId]> {
-        for (origin_block, results) in &self.results {
+    fn get(
+        &self,
+        block: BasicBlockId,
+        dom: &mut DominatorTree,
+        has_side_effects: bool,
+    ) -> Option<CacheResult> {
+        self.result.as_ref().and_then(|(origin_block, results)| {
             if dom.dominates(*origin_block, block) {
-                return Some(results);
+                Some(CacheResult::Cached(results))
+            } else if !has_side_effects {
+                // Insert a copy of this instruction in the common dominator
+                let dominator = dom.common_dominator(*origin_block, block);
+                Some(CacheResult::NeedToHoistToCommonBlock(dominator))
+            } else {
+                None
+            }
+        })
+    }
+}
+
+enum CacheResult<'a> {
+    Cached(&'a [ValueId]),
+    NeedToHoistToCommonBlock(BasicBlockId),
+}
+
+/// Result of trying to evaluate an instruction (any instruction) in this pass.
+enum EvaluationResult {
+    /// Nothing was done because the instruction wasn't a call to a brillig function,
+    /// or some arguments to it were not constants.
+    NotABrilligCall,
+    /// The instruction was a call to a brillig function, but we couldn't evaluate it.
+    /// This can occur in the situation where the brillig function reaches a "trap" or a foreign call opcode.
+    CannotEvaluate(FunctionId),
+    /// The instruction was a call to a brillig function and we were able to evaluate it,
+    /// returning evaluation memory values.
+    Evaluated(Vec<MemoryValue<FieldElement>>),
+}
+
+/// Similar to FunctionContext::ssa_type_to_parameter but never panics and disallows reference types.
+pub(crate) fn type_to_brillig_parameter(typ: &Type) -> Option<BrilligParameter> {
+    match typ {
+        Type::Numeric(_) => Some(BrilligParameter::SingleAddr(get_bit_size_from_ssa_type(typ))),
+        Type::Array(item_type, size) => {
+            let mut parameters = Vec::with_capacity(item_type.len());
+            for item_typ in item_type.iter() {
+                parameters.push(type_to_brillig_parameter(item_typ)?);
             }
+            Some(BrilligParameter::Array(parameters, *size))
         }
-        None
+        _ => None,
     }
 }
 
+fn value_id_to_calldata(value_id: ValueId, dfg: &DataFlowGraph, calldata: &mut Vec<FieldElement>) {
+    if let Some(value) = dfg.get_numeric_constant(value_id) {
+        calldata.push(value);
+        return;
+    }
+
+    if let Some((values, _type)) = dfg.get_array_constant(value_id) {
+        for value in values {
+            value_id_to_calldata(value, dfg, calldata);
+        }
+        return;
+    }
+
+    panic!("Expected ValueId to be numeric constant or array constant");
+}
+
 /// Check if one expression is simpler than the other.
 /// Returns `Some((complex, simple))` if a simplification was found, otherwise `None`.
 /// Expects the `ValueId`s to be fully resolved.
@@ -620,22 +1004,32 @@ mod test {
     // Regression for #4600
     #[test]
     fn array_get_regression() {
+        // fn main f0 {
+        //   b0(v0: u1, v1: u64):
+        //     enable_side_effects_if v0
+        //     v2 = make_array [Field 0, Field 1]
+        //     v3 = array_get v2, index v1
+        //     v4 = not v0
+        //     enable_side_effects_if v4
+        //     v5 = array_get v2, index v1
+        // }
+        //
         // We want to make sure after constant folding both array_gets remain since they are
         // under different enable_side_effects_if contexts and thus one may be disabled while
         // the other is not. If one is removed, it is possible e.g. v4 is replaced with v2 which
         // is disabled (only gets from index 0) and thus returns the wrong result.
         let src = "
-             acir(inline) fn main f0 {
-               b0(v0: u1, v1: u64):
-                 enable_side_effects v0
-                 v4 = make_array [Field 0, Field 1] : [Field; 2]
-                 v5 = array_get v4, index v1 -> Field
-                 v6 = not v0
-                 enable_side_effects v6
-                 v7 = array_get v4, index v1 -> Field
-                 return
-             }
-             ";
+            acir(inline) fn main f0 {
+              b0(v0: u1, v1: u64):
+                enable_side_effects v0
+                v4 = make_array [Field 0, Field 1] : [Field; 2]
+                v5 = array_get v4, index v1 -> Field
+                v6 = not v0
+                enable_side_effects v6
+                v7 = array_get v4, index v1 -> Field
+                return
+            }
+            ";
         let ssa = Ssa::from_str(src).unwrap();
 
         // Expected output is unchanged
@@ -693,14 +1087,14 @@ mod test {
         assert_normalized_ssa_equals(ssa, expected);
     }
 
-    // This test currently fails. It being fixed will address the issue https://github.com/noir-lang/noir/issues/5756
     #[test]
-    #[should_panic]
     fn constant_array_deduplication() {
         // fn main f0 {
         //   b0(v0: u64):
-        //     v5 = call keccakf1600([v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0])
-        //     v6 = call keccakf1600([v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0])
+        //     v1 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v2 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v5 = call keccakf1600(v1)
+        //     v6 = call keccakf1600(v2)
         // }
         //
         // Here we're checking a situation where two identical arrays are being initialized twice and being assigned separate `ValueId`s.
@@ -720,12 +1114,13 @@ mod test {
         let array1 = builder.insert_make_array(array_contents.clone(), typ.clone());
         let array2 = builder.insert_make_array(array_contents, typ.clone());
 
-        assert_eq!(array1, array2, "arrays were assigned different value ids");
+        assert_ne!(array1, array2, "arrays were not assigned different value ids");
 
         let keccakf1600 =
             builder.import_intrinsic("keccakf1600").expect("keccakf1600 intrinsic should exist");
         let _v10 = builder.insert_call(keccakf1600, vec![array1], vec![typ.clone()]);
         let _v11 = builder.insert_call(keccakf1600, vec![array2], vec![typ.clone()]);
+        builder.terminate_with_return(Vec::new());
 
         let mut ssa = builder.finish();
         ssa.normalize_ids();
@@ -735,8 +1130,13 @@ mod test {
         let main = ssa.main();
         let instructions = main.dfg[main.entry_block()].instructions();
         let starting_instruction_count = instructions.len();
-        assert_eq!(starting_instruction_count, 2);
+        assert_eq!(starting_instruction_count, 4);
 
+        // fn main f0 {
+        //   b0(v0: u64):
+        //     v1 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v5 = call keccakf1600(v1)
+        // }
         let ssa = ssa.fold_constants();
 
         println!("{ssa}");
@@ -744,7 +1144,368 @@ mod test {
         let main = ssa.main();
         let instructions = main.dfg[main.entry_block()].instructions();
         let ending_instruction_count = instructions.len();
-        assert_eq!(ending_instruction_count, 1);
+        assert_eq!(ending_instruction_count, 2);
+    }
+
+    #[test]
+    fn deduplicate_across_blocks() {
+        // fn main f0 {
+        //   b0(v0: u1):
+        //     v1 = not v0
+        //     jmp b1()
+        //   b1():
+        //     v2 = not v0
+        //     return v2
+        // }
+        let main_id = Id::test_new(0);
+
+        // Compiling main
+        let mut builder = FunctionBuilder::new("main".into(), main_id);
+        let b1 = builder.insert_block();
+
+        let v0 = builder.add_parameter(Type::bool());
+        let _v1 = builder.insert_not(v0);
+        builder.terminate_with_jmp(b1, Vec::new());
+
+        builder.switch_to_block(b1);
+        let v2 = builder.insert_not(v0);
+        builder.terminate_with_return(vec![v2]);
+
+        let ssa = builder.finish();
+        let main = ssa.main();
+        assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1);
+        assert_eq!(main.dfg[b1].instructions().len(), 1);
+
+        // Expected output:
+        //
+        // fn main f0 {
+        //   b0(v0: u1):
+        //     v1 = not v0
+        //     jmp b1()
+        //   b1():
+        //     return v1
+        // }
+        let ssa = ssa.fold_constants_using_constraints();
+        let main = ssa.main();
+        assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1);
+        assert_eq!(main.dfg[b1].instructions().len(), 0);
+    }
+
+    #[test]
+    fn deduplicate_across_non_dominated_blocks() {
+        let src = "
+            brillig(inline) fn main f0 {
+              b0(v0: u32):
+                v2 = lt u32 1000, v0
+                jmpif v2 then: b1, else: b2
+              b1():
+                v4 = add v0, u32 1
+                v5 = lt v0, v4
+                constrain v5 == u1 1
+                jmp b2()
+              b2():
+                v7 = lt u32 1000, v0
+                jmpif v7 then: b3, else: b4
+              b3():
+                v8 = add v0, u32 1
+                v9 = lt v0, v8
+                constrain v9 == u1 1
+                jmp b4()
+              b4():
+                return
+            }
+        ";
+        let ssa = Ssa::from_str(src).unwrap();
+
+        // v4 has been hoisted, although:
+        // - v5 has not yet been removed since it was encountered earlier in the program
+        // - v8 hasn't been recognized as a duplicate of v6 yet since they still reference v4 and
+        //   v5 respectively
+        let expected = "
+            brillig(inline) fn main f0 {
+              b0(v0: u32):
+                v2 = lt u32 1000, v0
+                v4 = add v0, u32 1
+                jmpif v2 then: b1, else: b2
+              b1():
+                v5 = add v0, u32 1
+                v6 = lt v0, v5
+                constrain v6 == u1 1
+                jmp b2()
+              b2():
+                jmpif v2 then: b3, else: b4
+              b3():
+                v8 = lt v0, v4
+                constrain v8 == u1 1
+                jmp b4()
+              b4():
+                return
+            }
+        ";
+
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_without_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1() -> Field
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0():
+                v0 = add Field 2, Field 3
+                return v0
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_two_field_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, Field 3) -> Field
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: Field):
+                v2 = add v0, v1
+                return v2
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_two_i32_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(i32 2, i32 3) -> i32
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: i32, v1: i32):
+                v2 = add v0, v1
+                return v2
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return i32 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_array_return() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, Field 3, Field 4) -> [Field; 3]
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: Field, v2: Field):
+                v3 = make_array [v0, v1, v2] : [Field; 3]
+                return v3
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v3 = make_array [Field 2, Field 3, Field 4] : [Field; 3]
+                return v3
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_composite_array_return() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, i32 3, Field 4, i32 5) -> [(Field, i32); 2]
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: i32, v2: i32, v3: Field):
+                v4 = make_array [v0, v1, v2, v3] : [(Field, i32); 2]
+                return v4
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v4 = make_array [Field 2, i32 3, Field 4, i32 5] : [(Field, i32); 2]
+                return v4
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_array_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = make_array [Field 2, Field 3] : [Field; 2]
+                v1 = call f1(v0) -> Field
+                return v1
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: [Field; 2]):
+                inc_rc v0
+                v2 = array_get v0, index u32 0 -> Field
+                v4 = array_get v0, index u32 1 -> Field
+                v5 = add v2, v4
+                dec_rc v0
+                return v5
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v2 = make_array [Field 2, Field 3] : [Field; 2]
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn does_not_use_cached_constrain_in_block_that_is_not_dominated() {
+        let src = "
+            brillig(inline) fn main f0 {
+              b0(v0: Field, v1: Field):
+                v3 = eq v0, Field 0
+                jmpif v3 then: b1, else: b2
+              b1():
+                v5 = eq v1, Field 1
+                constrain v1 == Field 1
+                jmp b2()
+              b2():
+                v6 = eq v1, Field 0
+                constrain v1 == Field 0
+                return
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, src);
+    }
+
+    #[test]
+    fn does_not_hoist_constrain_to_common_ancestor() {
+        let src = "
+            brillig(inline) fn main f0 {
+              b0(v0: Field, v1: Field):
+                v3 = eq v0, Field 0
+                jmpif v3 then: b1, else: b2
+              b1():
+                constrain v1 == Field 1
+                jmp b2()
+              b2():
+                jmpif v0 then: b3, else: b4
+              b3():
+                constrain v1 == Field 1 // This was incorrectly hoisted to b0 but this condition is not valid when going b0 -> b2 -> b4
+                jmp b4()
+              b4():
+                return
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, src);
+    }
+
+    #[test]
+    fn deduplicates_side_effecting_intrinsics() {
+        let src = "
+        // After EnableSideEffectsIf removal:
+        acir(inline) fn main f0 {
+          b0(v0: Field, v1: Field, v2: u1):
+            v4 = call is_unconstrained() -> u1
+            v7 = call to_be_radix(v0, u32 256) -> [u8; 1]    // `a.to_be_radix(256)`;
+            inc_rc v7
+            v8 = call to_be_radix(v0, u32 256) -> [u8; 1]    // duplicate load of `a`
+            inc_rc v8
+            v9 = cast v2 as Field                            // `if c { a.to_be_radix(256) }`
+            v10 = mul v0, v9                                 // attaching `c` to `a`
+            v11 = call to_be_radix(v10, u32 256) -> [u8; 1]  // calling `to_radix(c * a)`
+            inc_rc v11
+            enable_side_effects v2                           // side effect var for `c` shifted down by removal
+            return
+        }
+        ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let expected = "
+        acir(inline) fn main f0 {
+          b0(v0: Field, v1: Field, v2: u1):
+            v4 = call is_unconstrained() -> u1
+            v7 = call to_be_radix(v0, u32 256) -> [u8; 1]
+            inc_rc v7
+            inc_rc v7
+            v8 = cast v2 as Field
+            v9 = mul v0, v8
+            v10 = call to_be_radix(v9, u32 256) -> [u8; 1]
+            inc_rc v10
+            enable_side_effects v2
+            return
+        }
+        ";
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, expected);
     }
 
     #[test]
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
index 5d114672a556..0470f8320a5e 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
@@ -131,8 +131,7 @@
 //!   v11 = mul v4, Field 12
 //!   v12 = add v10, v11
 //!   store v12 at v5         (new store)
-use fxhash::FxHashMap as HashMap;
-use std::collections::{BTreeMap, HashSet};
+use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
 
 use acvm::{acir::AcirField, acir::BlackBoxFunc, FieldElement};
 use iter_extended::vecmap;
@@ -186,18 +185,6 @@ struct Context<'f> {
     /// Maps start of branch -> end of branch
     branch_ends: HashMap<BasicBlockId, BasicBlockId>,
 
-    /// Maps an address to the old and new value of the element at that address
-    /// These only hold stores for one block at a time and is cleared
-    /// between inlining of branches.
-    store_values: HashMap<ValueId, Store>,
-
-    /// Stores all allocations local to the current branch.
-    /// Since these branches are local to the current branch (ie. only defined within one branch of
-    /// an if expression), they should not be merged with their previous value or stored value in
-    /// the other branch since there is no such value. The ValueId here is that which is returned
-    /// by the allocate instruction.
-    local_allocations: HashSet<ValueId>,
-
     /// A stack of each jmpif condition that was taken to reach a particular point in the program.
     /// When two branches are merged back into one, this constitutes a join point, and is analogous
     /// to the rest of the program after an if statement. When such a join point / end block is
@@ -214,13 +201,15 @@ struct Context<'f> {
     /// When processing a block, we pop this stack to get its arguments
     /// and at the end we push the arguments for his successor
     arguments_stack: Vec<Vec<ValueId>>,
-}
 
-#[derive(Clone)]
-pub(crate) struct Store {
-    old_value: ValueId,
-    new_value: ValueId,
-    call_stack: CallStack,
+    /// Stores all allocations local to the current branch.
+    ///
+    /// Since these branches are local to the current branch (i.e. only defined within one branch of
+    /// an if expression), they should not be merged with their previous value or stored value in
+    /// the other branch since there is no such value.
+    ///
+    /// The `ValueId` here is that which is returned by the allocate instruction.
+    local_allocations: HashSet<ValueId>,
 }
 
 #[derive(Clone)]
@@ -231,8 +220,6 @@ struct ConditionalBranch {
     old_condition: ValueId,
     // The condition of the branch
     condition: ValueId,
-    // The store values accumulated when processing the branch
-    store_values: HashMap<ValueId, Store>,
     // The allocations accumulated when processing the branch
     local_allocations: HashSet<ValueId>,
 }
@@ -263,12 +250,11 @@ fn flatten_function_cfg(function: &mut Function, no_predicates: &HashMap<Functio
     let mut context = Context {
         inserter: FunctionInserter::new(function),
         cfg,
-        store_values: HashMap::default(),
-        local_allocations: HashSet::new(),
         branch_ends,
         slice_sizes: HashMap::default(),
         condition_stack: Vec::new(),
         arguments_stack: Vec::new(),
+        local_allocations: HashSet::default(),
     };
     context.flatten(no_predicates);
 }
@@ -343,6 +329,7 @@ impl<'f> Context<'f> {
         // If this is not a separate variable, clippy gets confused and says the to_vec is
         // unnecessary, when removing it actually causes an aliasing/mutability error.
         let instructions = self.inserter.function.dfg[block].instructions().to_vec();
+
         for instruction in instructions.iter() {
             if self.is_no_predicate(no_predicates, instruction) {
                 // disable side effect for no_predicate functions
@@ -429,14 +416,12 @@ impl<'f> Context<'f> {
         let old_condition = *condition;
         let then_condition = self.inserter.resolve(old_condition);
 
-        let old_stores = std::mem::take(&mut self.store_values);
         let old_allocations = std::mem::take(&mut self.local_allocations);
         let branch = ConditionalBranch {
             old_condition,
             condition: self.link_condition(then_condition),
-            store_values: old_stores,
-            local_allocations: old_allocations,
             last_block: *then_destination,
+            local_allocations: old_allocations,
         };
         let cond_context = ConditionalContext {
             condition: then_condition,
@@ -473,19 +458,12 @@ impl<'f> Context<'f> {
         );
         let else_condition = self.link_condition(else_condition);
 
-        // Make sure the else branch sees the previous values of each store
-        // rather than any values created in the 'then' branch.
-        let old_stores = std::mem::take(&mut cond_context.then_branch.store_values);
-        cond_context.then_branch.store_values = std::mem::take(&mut self.store_values);
-        self.undo_stores_in_then_branch(&cond_context.then_branch.store_values);
-
         let old_allocations = std::mem::take(&mut self.local_allocations);
         let else_branch = ConditionalBranch {
             old_condition: cond_context.then_branch.old_condition,
             condition: else_condition,
-            store_values: old_stores,
-            local_allocations: old_allocations,
             last_block: *block,
+            local_allocations: old_allocations,
         };
         cond_context.then_branch.local_allocations.clear();
         cond_context.else_branch = Some(else_branch);
@@ -509,10 +487,8 @@ impl<'f> Context<'f> {
         }
 
         let mut else_branch = cond_context.else_branch.unwrap();
-        let stores_in_branch = std::mem::replace(&mut self.store_values, else_branch.store_values);
         self.local_allocations = std::mem::take(&mut else_branch.local_allocations);
         else_branch.last_block = *block;
-        else_branch.store_values = stores_in_branch;
         cond_context.else_branch = Some(else_branch);
 
         // We must remember to reset whether side effects are enabled when both branches
@@ -580,8 +556,6 @@ impl<'f> Context<'f> {
                 .first()
         });
 
-        let call_stack = cond_context.call_stack;
-        self.merge_stores(cond_context.then_branch, cond_context.else_branch, call_stack);
         self.arguments_stack.pop();
         self.arguments_stack.pop();
         self.arguments_stack.push(args);
@@ -636,120 +610,37 @@ impl<'f> Context<'f> {
         self.insert_instruction_with_typevars(enable_side_effects, None, call_stack);
     }
 
-    /// Merge any store instructions found in each branch.
-    ///
-    /// This function relies on the 'then' branch being merged before the 'else' branch of a jmpif
-    /// instruction. If this ordering is changed, the ordering that store values are merged within
-    /// this function also needs to be changed to reflect that.
-    fn merge_stores(
-        &mut self,
-        then_branch: ConditionalBranch,
-        else_branch: Option<ConditionalBranch>,
-        call_stack: CallStack,
-    ) {
-        // Address -> (then_value, else_value, value_before_the_if)
-        let mut new_map = BTreeMap::new();
-
-        for (address, store) in then_branch.store_values {
-            new_map.insert(address, (store.new_value, store.old_value, store.old_value));
-        }
-
-        if else_branch.is_some() {
-            for (address, store) in else_branch.clone().unwrap().store_values {
-                if let Some(entry) = new_map.get_mut(&address) {
-                    entry.1 = store.new_value;
-                } else {
-                    new_map.insert(address, (store.old_value, store.new_value, store.old_value));
-                }
-            }
-        }
-
-        let then_condition = then_branch.condition;
-        let block = self.inserter.function.entry_block();
-
-        // Merging must occur in a separate loop as we cannot borrow `self` as mutable while `value_merger` does
-        let mut new_values = HashMap::default();
-        for (address, (then_case, else_case, _)) in &new_map {
-            let instruction = Instruction::IfElse {
-                then_condition,
-                then_value: *then_case,
-                else_value: *else_case,
-            };
-            let dfg = &mut self.inserter.function.dfg;
-            let value = dfg
-                .insert_instruction_and_results(instruction, block, None, call_stack.clone())
-                .first();
-
-            new_values.insert(address, value);
-        }
-
-        // Replace stores with new merged values
-        for (address, (_, _, old_value)) in &new_map {
-            let value = new_values[address];
-            let address = *address;
-            self.insert_instruction_with_typevars(
-                Instruction::Store { address, value },
-                None,
-                call_stack.clone(),
-            );
-
-            if let Some(store) = self.store_values.get_mut(&address) {
-                store.new_value = value;
-            } else {
-                self.store_values.insert(
-                    address,
-                    Store {
-                        old_value: *old_value,
-                        new_value: value,
-                        call_stack: call_stack.clone(),
-                    },
-                );
-            }
-        }
-    }
-
-    fn remember_store(&mut self, address: ValueId, new_value: ValueId, call_stack: CallStack) {
-        if !self.local_allocations.contains(&address) {
-            if let Some(store_value) = self.store_values.get_mut(&address) {
-                store_value.new_value = new_value;
-            } else {
-                let load = Instruction::Load { address };
-
-                let load_type = Some(vec![self.inserter.function.dfg.type_of_value(new_value)]);
-                let old_value = self
-                    .insert_instruction_with_typevars(load.clone(), load_type, call_stack.clone())
-                    .first();
-
-                self.store_values.insert(address, Store { old_value, new_value, call_stack });
-            }
-        }
-    }
-
     /// Push the given instruction to the end of the entry block of the current function.
     ///
     /// Note that each ValueId of the instruction will be mapped via self.inserter.resolve.
     /// As a result, the instruction that will be pushed will actually be a new instruction
     /// with a different InstructionId from the original. The results of the given instruction
     /// will also be mapped to the results of the new instruction.
-    fn push_instruction(&mut self, id: InstructionId) -> Vec<ValueId> {
+    ///
+    /// `previous_allocate_result` should only be set to the result of an allocate instruction
+    /// if that instruction was the instruction immediately previous to this one - if there are
+    /// any instructions in between it should be None.
+    fn push_instruction(&mut self, id: InstructionId) {
         let (instruction, call_stack) = self.inserter.map_instruction(id);
         let instruction = self.handle_instruction_side_effects(instruction, call_stack.clone());
-        let is_allocate = matches!(instruction, Instruction::Allocate);
 
+        let instruction_is_allocate = matches!(&instruction, Instruction::Allocate);
         let entry = self.inserter.function.entry_block();
         let results = self.inserter.push_instruction_value(instruction, id, entry, call_stack);
 
         // Remember an allocate was created local to this branch so that we do not try to merge store
         // values across branches for it later.
-        if is_allocate {
+        if instruction_is_allocate {
             self.local_allocations.insert(results.first());
         }
-
-        results.results().into_owned()
     }
 
     /// If we are currently in a branch, we need to modify constrain instructions
     /// to multiply them by the branch's condition (see optimization #1 in the module comment).
+    ///
+    /// `previous_allocate_result` should only be set to the result of an allocate instruction
+    /// if that instruction was the instruction immediately previous to this one - if there are
+    /// any instructions in between it should be None.
     fn handle_instruction_side_effects(
         &mut self,
         instruction: Instruction,
@@ -782,8 +673,32 @@ impl<'f> Context<'f> {
                     Instruction::Constrain(lhs, rhs, message)
                 }
                 Instruction::Store { address, value } => {
-                    self.remember_store(address, value, call_stack);
-                    Instruction::Store { address, value }
+                    // If this instruction immediately follows an allocate, and stores to that
+                    // address there is no previous value to load and we don't need a merge anyway.
+                    if self.local_allocations.contains(&address) {
+                        Instruction::Store { address, value }
+                    } else {
+                        // Instead of storing `value`, store `if condition { value } else { previous_value }`
+                        let typ = self.inserter.function.dfg.type_of_value(value);
+                        let load = Instruction::Load { address };
+                        let previous_value = self
+                            .insert_instruction_with_typevars(
+                                load,
+                                Some(vec![typ]),
+                                call_stack.clone(),
+                            )
+                            .first();
+
+                        let instruction = Instruction::IfElse {
+                            then_condition: condition,
+                            then_value: value,
+
+                            else_value: previous_value,
+                        };
+
+                        let updated_value = self.insert_instruction(instruction, call_stack);
+                        Instruction::Store { address, value: updated_value }
+                    }
                 }
                 Instruction::RangeCheck { value, max_bit_size, assert_message } => {
                     // Replace value with `value * predicate` to zero out value when predicate is inactive.
@@ -905,22 +820,10 @@ impl<'f> Context<'f> {
             call_stack,
         )
     }
-
-    fn undo_stores_in_then_branch(&mut self, store_values: &HashMap<ValueId, Store>) {
-        for (address, store) in store_values {
-            let address = *address;
-            let value = store.old_value;
-            let instruction = Instruction::Store { address, value };
-            // Considering the location of undoing a store to be the same as the original store.
-            self.insert_instruction_with_typevars(instruction, None, store.call_stack.clone());
-        }
-    }
 }
 
 #[cfg(test)]
 mod test {
-    use std::sync::Arc;
-
     use acvm::{acir::AcirField, FieldElement};
 
     use crate::ssa::{
@@ -1023,15 +926,13 @@ mod test {
               b0(v0: u1, v1: &mut Field):
                 enable_side_effects v0
                 v2 = load v1 -> Field
-                store Field 5 at v1
-                v4 = not v0
-                store v2 at v1
+                v3 = cast v0 as Field
+                v5 = sub Field 5, v2
+                v6 = mul v3, v5
+                v7 = add v2, v6
+                store v7 at v1
+                v8 = not v0
                 enable_side_effects u1 1
-                v6 = cast v0 as Field
-                v7 = sub Field 5, v2
-                v8 = mul v6, v7
-                v9 = add v2, v8
-                store v9 at v1
                 return
             }
             ";
@@ -1062,17 +963,20 @@ mod test {
               b0(v0: u1, v1: &mut Field):
                 enable_side_effects v0
                 v2 = load v1 -> Field
-                store Field 5 at v1
-                v4 = not v0
-                store v2 at v1
-                enable_side_effects v4
-                v5 = load v1 -> Field
-                store Field 6 at v1
+                v3 = cast v0 as Field
+                v5 = sub Field 5, v2
+                v6 = mul v3, v5
+                v7 = add v2, v6
+                store v7 at v1
+                v8 = not v0
+                enable_side_effects v8
+                v9 = load v1 -> Field
+                v10 = cast v8 as Field
+                v12 = sub Field 6, v9
+                v13 = mul v10, v12
+                v14 = add v9, v13
+                store v14 at v1
                 enable_side_effects u1 1
-                v8 = cast v0 as Field
-                v10 = mul v8, Field -1
-                v11 = add Field 6, v10
-                store v11 at v1
                 return
             }
             ";
@@ -1203,7 +1107,12 @@ mod test {
         let merged_values = get_all_constants_reachable_from_instruction(&main.dfg, ret);
         assert_eq!(
             merged_values,
-            vec![FieldElement::from(3u128), FieldElement::from(6u128), -FieldElement::from(1u128)]
+            vec![
+                FieldElement::from(1u128),
+                FieldElement::from(3u128),
+                FieldElement::from(5u128),
+                FieldElement::from(6u128)
+            ]
         );
 
         assert_normalized_ssa_equals(ssa, expected);
@@ -1344,63 +1253,73 @@ mod test {
     fn should_not_merge_incorrectly_to_false() {
         // Regression test for #1792
         // Tests that it does not simplify a true constraint an always-false constraint
-        // acir(inline) fn main f1 {
-        //     b0(v0: [u8; 2]):
-        //       v5 = array_get v0, index u8 0
-        //       v6 = cast v5 as u32
-        //       v8 = truncate v6 to 1 bits, max_bit_size: 32
-        //       v9 = cast v8 as u1
-        //       v10 = allocate
-        //       store u8 0 at v10
-        //       jmpif v9 then: b2, else: b3
-        //     b2():
-        //       v12 = cast v5 as Field
-        //       v13 = add v12, Field 1
-        //       store v13 at v10
-        //       jmp b4()
-        //     b4():
-        //       constrain v9 == u1 1
-        //       return
-        //     b3():
-        //       store u8 0 at v10
-        //       jmp b4()
-        //   }
-        let main_id = Id::test_new(1);
-        let mut builder = FunctionBuilder::new("main".into(), main_id);
-        builder.insert_block(); // b0
-        let b1 = builder.insert_block();
-        let b2 = builder.insert_block();
-        let b3 = builder.insert_block();
-        let element_type = Arc::new(vec![Type::unsigned(8)]);
-        let array_type = Type::Array(element_type.clone(), 2);
-        let array = builder.add_parameter(array_type);
-        let zero = builder.numeric_constant(0_u128, Type::unsigned(8));
-        let v5 = builder.insert_array_get(array, zero, Type::unsigned(8));
-        let v6 = builder.insert_cast(v5, Type::unsigned(32));
-        let i_two = builder.numeric_constant(2_u128, Type::unsigned(32));
-        let v8 = builder.insert_binary(v6, BinaryOp::Mod, i_two);
-        let v9 = builder.insert_cast(v8, Type::bool());
-        let v10 = builder.insert_allocate(Type::field());
-        builder.insert_store(v10, zero);
-        builder.terminate_with_jmpif(v9, b1, b2);
-        builder.switch_to_block(b1);
-        let one = builder.field_constant(1_u128);
-        let v5b = builder.insert_cast(v5, Type::field());
-        let v13: Id<Value> = builder.insert_binary(v5b, BinaryOp::Add, one);
-        let v14 = builder.insert_cast(v13, Type::unsigned(8));
-        builder.insert_store(v10, v14);
-        builder.terminate_with_jmp(b3, vec![]);
-        builder.switch_to_block(b2);
-        builder.insert_store(v10, zero);
-        builder.terminate_with_jmp(b3, vec![]);
-        builder.switch_to_block(b3);
-        let v_true = builder.numeric_constant(true, Type::bool());
-        let v12 = builder.insert_binary(v9, BinaryOp::Eq, v_true);
-        builder.insert_constrain(v12, v_true, None);
-        builder.terminate_with_return(vec![]);
-        let ssa = builder.finish();
+        let src = "
+        acir(inline) fn main f0 {
+          b0(v0: [u8; 2]):
+            v2 = array_get v0, index u8 0 -> u8
+            v3 = cast v2 as u32
+            v4 = truncate v3 to 1 bits, max_bit_size: 32
+            v5 = cast v4 as u1
+            v6 = allocate -> &mut Field
+            store u8 0 at v6
+            jmpif v5 then: b2, else: b1
+          b2():
+            v7 = cast v2 as Field
+            v9 = add v7, Field 1
+            v10 = cast v9 as u8
+            store v10 at v6
+            jmp b3()
+          b3():
+            constrain v5 == u1 1
+            return
+          b1():
+            store u8 0 at v6
+            jmp b3()
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let expected = "
+        acir(inline) fn main f0 {
+          b0(v0: [u8; 2]):
+            v2 = array_get v0, index u8 0 -> u8
+            v3 = cast v2 as u32
+            v4 = truncate v3 to 1 bits, max_bit_size: 32
+            v5 = cast v4 as u1
+            v6 = allocate -> &mut Field
+            store u8 0 at v6
+            enable_side_effects v5
+            v7 = cast v2 as Field
+            v9 = add v7, Field 1
+            v10 = cast v9 as u8
+            v11 = load v6 -> u8
+            v12 = cast v4 as Field
+            v13 = cast v11 as Field
+            v14 = sub v9, v13
+            v15 = mul v12, v14
+            v16 = add v13, v15
+            v17 = cast v16 as u8
+            store v17 at v6
+            v18 = not v5
+            enable_side_effects v18
+            v19 = load v6 -> u8
+            v20 = cast v18 as Field
+            v21 = cast v19 as Field
+            v23 = sub Field 0, v21
+            v24 = mul v20, v23
+            v25 = add v21, v24
+            v26 = cast v25 as u8
+            store v26 at v6
+            enable_side_effects u1 1
+            constrain v5 == u1 1
+            return
+        }
+        ";
+
         let flattened_ssa = ssa.flatten_cfg();
         let main = flattened_ssa.main();
+
         // Now assert that there is not an always-false constraint after flattening:
         let mut constrain_count = 0;
         for instruction in main.dfg[main.entry_block()].instructions() {
@@ -1414,6 +1333,8 @@ mod test {
             }
         }
         assert_eq!(constrain_count, 1);
+
+        assert_normalized_ssa_equals(flattened_ssa, expected);
     }
 
     #[test]
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
index 0690dbbf2042..53a31ae57c19 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
@@ -18,6 +18,7 @@
 //!   - A reference with 0 aliases means we were unable to find which reference this reference
 //!     refers to. If such a reference is stored to, we must conservatively invalidate every
 //!     reference in the current block.
+//! - We also track the last load instruction to each address per block.
 //!
 //! From there, to figure out the value of each reference at the end of block, iterate each instruction:
 //! - On `Instruction::Allocate`:
@@ -28,6 +29,13 @@
 //!   - Furthermore, if the result of the load is a reference, mark the result as an alias
 //!     of the reference it dereferences to (if known).
 //!     - If which reference it dereferences to is not known, this load result has no aliases.
+//!   - We also track the last instance of a load instruction to each address in a block.
+//!     If we see that the last load instruction was from the same address as the current load instruction,
+//!     we move to replace the result of the current load with the result of the previous load.
+//!     This removal requires a couple conditions:
+//!     - No store occurs to that address before the next load,
+//!     - The address is not used as an argument to a call
+//!     This optimization helps us remove repeated loads for which there are not known values.
 //! - On `Instruction::Store { address, value }`:
 //!   - If the address of the store is known:
 //!     - If the address has exactly 1 alias:
@@ -40,11 +48,13 @@
 //!     - Conservatively mark every alias in the block to `Unknown`.
 //!   - Additionally, if there were no Loads to any alias of the address between this Store and
 //!     the previous Store to the same address, the previous store can be removed.
+//!   - Remove the instance of the last load instruction to the address and its aliases
 //! - On `Instruction::Call { arguments }`:
 //!   - If any argument of the call is a reference, set the value of each alias of that
 //!     reference to `Unknown`
 //!   - Any builtin functions that may return aliases if their input also contains a
 //!     reference should be tracked. Examples: `slice_push_back`, `slice_insert`, `slice_remove`, etc.
+//!   - Remove the instance of the last load instruction for any reference arguments and their aliases
 //!
 //! On a terminator instruction:
 //! - If the terminator is a `Jmp`:
@@ -274,6 +284,9 @@ impl<'f> PerFunctionContext<'f> {
         if let Some(first_predecessor) = predecessors.next() {
             let mut first = self.blocks.get(&first_predecessor).cloned().unwrap_or_default();
             first.last_stores.clear();
+            // Last loads are tracked per block. During unification we are creating a new block from the current one,
+            // so we must clear the last loads of the current block before we return the new block.
+            first.last_loads.clear();
 
             // Note that we have to start folding with the first block as the accumulator.
             // If we started with an empty block, an empty block union'd with any other block
@@ -410,6 +423,28 @@ impl<'f> PerFunctionContext<'f> {
 
                     self.last_loads.insert(address, (instruction, block_id));
                 }
+
+                // Check whether the block has a repeat load from the same address (w/ no calls or stores in between the loads).
+                // If we do have a repeat load, we can remove the current load and map its result to the previous load's result.
+                if let Some(last_load) = references.last_loads.get(&address) {
+                    let Instruction::Load { address: previous_address } =
+                        &self.inserter.function.dfg[*last_load]
+                    else {
+                        panic!("Expected a Load instruction here");
+                    };
+                    let result = self.inserter.function.dfg.instruction_results(instruction)[0];
+                    let previous_result =
+                        self.inserter.function.dfg.instruction_results(*last_load)[0];
+                    if *previous_address == address {
+                        self.inserter.map_value(result, previous_result);
+                        self.instructions_to_remove.insert(instruction);
+                    }
+                }
+                // We want to set the load for every load even if the address has a known value
+                // and the previous load instruction was removed.
+                // We are safe to still remove a repeat load in this case as we are mapping from the current load's
+                // result to the previous load, which if it was removed should already have a mapping to the known value.
+                references.set_last_load(address, instruction);
             }
             Instruction::Store { address, value } => {
                 let address = self.inserter.function.dfg.resolve(*address);
@@ -435,6 +470,8 @@ impl<'f> PerFunctionContext<'f> {
                 }
 
                 references.set_known_value(address, value);
+                // If we see a store to an address, the last load to that address needs to remain.
+                references.keep_last_load_for(address, self.inserter.function);
                 references.last_stores.insert(address, instruction);
             }
             Instruction::Allocate => {
@@ -542,6 +579,9 @@ impl<'f> PerFunctionContext<'f> {
                 let value = self.inserter.function.dfg.resolve(*value);
                 references.set_unknown(value);
                 references.mark_value_used(value, self.inserter.function);
+
+                // If a reference is an argument to a call, the last load to that address and its aliases needs to remain.
+                references.keep_last_load_for(value, self.inserter.function);
             }
         }
     }
@@ -572,6 +612,12 @@ impl<'f> PerFunctionContext<'f> {
                 let destination_parameters = self.inserter.function.dfg[*destination].parameters();
                 assert_eq!(destination_parameters.len(), arguments.len());
 
+                // If we have multiple parameters that alias that same argument value,
+                // then those parameters also alias each other.
+                // We save parameters with repeat arguments to later mark those
+                // parameters as aliasing one another.
+                let mut arg_set: HashMap<ValueId, BTreeSet<ValueId>> = HashMap::default();
+
                 // Add an alias for each reference parameter
                 for (parameter, argument) in destination_parameters.iter().zip(arguments) {
                     if self.inserter.function.dfg.value_is_reference(*parameter) {
@@ -581,10 +627,27 @@ impl<'f> PerFunctionContext<'f> {
                             if let Some(aliases) = references.aliases.get_mut(expression) {
                                 // The argument reference is possibly aliased by this block parameter
                                 aliases.insert(*parameter);
+
+                                // Check if we have seen the same argument
+                                let seen_parameters = arg_set.entry(argument).or_default();
+                                // Add the current parameter to the parameters we have seen for this argument.
+                                // The previous parameters and the current one alias one another.
+                                seen_parameters.insert(*parameter);
                             }
                         }
                     }
                 }
+
+                // Set the aliases of the parameters
+                for (_, aliased_params) in arg_set {
+                    for param in aliased_params.iter() {
+                        self.set_aliases(
+                            references,
+                            *param,
+                            AliasSet::known_multiple(aliased_params.clone()),
+                        );
+                    }
+                }
             }
             TerminatorInstruction::Return { return_values, .. } => {
                 // Removing all `last_stores` for each returned reference is more important here
@@ -612,6 +675,8 @@ mod tests {
             map::Id,
             types::Type,
         },
+        opt::assert_normalized_ssa_equals,
+        Ssa,
     };
 
     #[test]
@@ -822,88 +887,53 @@ mod tests {
     // is later stored in a successor block
     #[test]
     fn load_aliases_in_predecessor_block() {
-        // fn main {
-        //     b0():
-        //       v0 = allocate
-        //       store Field 0 at v0
-        //       v2 = allocate
-        //       store v0 at v2
-        //       v3 = load v2
-        //       v4 = load v2
-        //       jmp b1()
-        //     b1():
-        //       store Field 1 at v3
-        //       store Field 2 at v4
-        //       v7 = load v3
-        //       v8 = eq v7, Field 2
-        //       return
-        // }
-        let main_id = Id::test_new(0);
-        let mut builder = FunctionBuilder::new("main".into(), main_id);
-
-        let v0 = builder.insert_allocate(Type::field());
-
-        let zero = builder.field_constant(0u128);
-        builder.insert_store(v0, zero);
-
-        let v2 = builder.insert_allocate(Type::Reference(Arc::new(Type::field())));
-        builder.insert_store(v2, v0);
-
-        let v3 = builder.insert_load(v2, Type::field());
-        let v4 = builder.insert_load(v2, Type::field());
-        let b1 = builder.insert_block();
-        builder.terminate_with_jmp(b1, vec![]);
-
-        builder.switch_to_block(b1);
-
-        let one = builder.field_constant(1u128);
-        builder.insert_store(v3, one);
-
-        let two = builder.field_constant(2u128);
-        builder.insert_store(v4, two);
-
-        let v8 = builder.insert_load(v3, Type::field());
-        let _ = builder.insert_binary(v8, BinaryOp::Eq, two);
-
-        builder.terminate_with_return(vec![]);
-
-        let ssa = builder.finish();
-        assert_eq!(ssa.main().reachable_blocks().len(), 2);
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            store v0 at v2
+            v3 = load v2 -> &mut Field
+            v4 = load v2 -> &mut Field
+            jmp b1()
+          b1():
+            store Field 1 at v3
+            store Field 2 at v4
+            v7 = load v3 -> Field
+            v8 = eq v7, Field 2
+            return
+        }
+        ";
 
-        // Expected result:
-        // acir fn main f0 {
-        //   b0():
-        //     v9 = allocate
-        //     store Field 0 at v9
-        //     v10 = allocate
-        //     jmp b1()
-        //   b1():
-        //     return
-        // }
-        let ssa = ssa.mem2reg();
-        println!("{}", ssa);
+        let mut ssa = Ssa::from_str(src).unwrap();
+        let main = ssa.main_mut();
 
-        let main = ssa.main();
-        assert_eq!(main.reachable_blocks().len(), 2);
+        let instructions = main.dfg[main.entry_block()].instructions();
+        assert_eq!(instructions.len(), 6); // The final return is not counted
 
         // All loads should be removed
-        assert_eq!(count_loads(main.entry_block(), &main.dfg), 0);
-        assert_eq!(count_loads(b1, &main.dfg), 0);
-
         // The first store is not removed as it is used as a nested reference in another store.
-        // We would need to track whether the store where `v9` is the store value gets removed to know whether
+        // We would need to track whether the store where `v0` is the store value gets removed to know whether
         // to remove it.
-        assert_eq!(count_stores(main.entry_block(), &main.dfg), 1);
         // The first store in b1 is removed since there is another store to the same reference
         // in the same block, and the store is not needed before the later store.
         // The rest of the stores are also removed as no loads are done within any blocks
         // to the stored values.
-        assert_eq!(count_stores(b1, &main.dfg), 0);
-
-        let b1_instructions = main.dfg[b1].instructions();
+        let expected = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            jmp b1()
+          b1():
+            return
+        }
+        ";
 
-        // We expect the last eq to be optimized out
-        assert_eq!(b1_instructions.len(), 0);
+        let ssa = ssa.mem2reg();
+        assert_normalized_ssa_equals(ssa, expected);
     }
 
     #[test]
@@ -933,7 +963,7 @@ mod tests {
         //       v10 = eq v9, Field 2
         //       constrain v9 == Field 2
         //       v11 = load v2
-        //       v12 = load v10
+        //       v12 = load v11
         //       v13 = eq v12, Field 2
         //       constrain v11 == Field 2
         //       return
@@ -992,7 +1022,7 @@ mod tests {
         let main = ssa.main();
         assert_eq!(main.reachable_blocks().len(), 4);
 
-        // The store from the original SSA should remain
+        // The stores from the original SSA should remain
         assert_eq!(count_stores(main.entry_block(), &main.dfg), 2);
         assert_eq!(count_stores(b2, &main.dfg), 1);
 
@@ -1039,4 +1069,160 @@ mod tests {
         let main = ssa.main();
         assert_eq!(count_loads(main.entry_block(), &main.dfg), 1);
     }
+
+    #[test]
+    fn remove_repeat_loads() {
+        // This tests starts with two loads from the same unknown load.
+        // Specifically you should look for `load v2` in `b3`.
+        // We should be able to remove the second repeated load.
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            store v0 at v2
+            jmp b1(Field 0)
+          b1(v3: Field):
+            v4 = eq v3, Field 0
+            jmpif v4 then: b2, else: b3
+          b2():
+            v5 = load v2 -> &mut Field
+            store Field 2 at v5
+            v8 = add v3, Field 1
+            jmp b1(v8)
+          b3():
+            v9 = load v0 -> Field
+            v10 = eq v9, Field 2
+            constrain v9 == Field 2
+            v11 = load v2 -> &mut Field
+            v12 = load v2 -> &mut Field
+            v13 = load v12 -> Field
+            v14 = eq v13, Field 2
+            constrain v13 == Field 2
+            return
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        // The repeated load from v3 should be removed
+        // b3 should only have three loads now rather than four previously
+        //
+        // All stores are expected to remain.
+        let expected = "
+        acir(inline) fn main f0 {
+          b0():
+            v1 = allocate -> &mut Field
+            store Field 0 at v1
+            v3 = allocate -> &mut &mut Field
+            store v1 at v3
+            jmp b1(Field 0)
+          b1(v0: Field):
+            v4 = eq v0, Field 0
+            jmpif v4 then: b3, else: b2
+          b3():
+            v11 = load v3 -> &mut Field
+            store Field 2 at v11
+            v13 = add v0, Field 1
+            jmp b1(v13)
+          b2():
+            v5 = load v1 -> Field
+            v7 = eq v5, Field 2
+            constrain v5 == Field 2
+            v8 = load v3 -> &mut Field
+            v9 = load v8 -> Field
+            v10 = eq v9, Field 2
+            constrain v9 == Field 2
+            return
+        }
+        ";
+
+        let ssa = ssa.mem2reg();
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn keep_repeat_loads_passed_to_a_call() {
+        // The test is the exact same as `remove_repeat_loads` above except with the call
+        // to `f1` between the repeated loads.
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v1 = allocate -> &mut Field
+            store Field 0 at v1
+            v3 = allocate -> &mut &mut Field
+            store v1 at v3
+            jmp b1(Field 0)
+          b1(v0: Field):
+            v4 = eq v0, Field 0
+            jmpif v4 then: b3, else: b2
+          b3():
+            v13 = load v3 -> &mut Field
+            store Field 2 at v13
+            v15 = add v0, Field 1
+            jmp b1(v15)
+          b2():
+            v5 = load v1 -> Field
+            v7 = eq v5, Field 2
+            constrain v5 == Field 2
+            v8 = load v3 -> &mut Field
+            call f1(v3)
+            v10 = load v3 -> &mut Field
+            v11 = load v10 -> Field
+            v12 = eq v11, Field 2
+            constrain v11 == Field 2
+            return
+        }
+        acir(inline) fn foo f1 {
+          b0(v0: &mut Field):
+            return
+        }  
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let ssa = ssa.mem2reg();
+        // We expect the program to be unchanged
+        assert_normalized_ssa_equals(ssa, src);
+    }
+
+    #[test]
+    fn keep_repeat_loads_with_alias_store() {
+        // v7, v8, and v9 alias one another. We want to make sure that a repeat load to v7 with a store
+        // to its aliases in between the repeat loads does not remove those loads.
+        let src = "
+        acir(inline) fn main f0 {
+          b0(v0: u1):
+            jmpif v0 then: b2, else: b1
+          b2():
+            v6 = allocate -> &mut Field
+            store Field 0 at v6
+            jmp b3(v6, v6, v6)
+          b3(v1: &mut Field, v2: &mut Field, v3: &mut Field):
+            v8 = load v1 -> Field
+            store Field 2 at v2
+            v10 = load v1 -> Field
+            store Field 1 at v3
+            v11 = load v1 -> Field
+            store Field 3 at v3
+            v13 = load v1 -> Field
+            constrain v8 == Field 0
+            constrain v10 == Field 2
+            constrain v11 == Field 1
+            constrain v13 == Field 3
+            return
+          b1():
+            v4 = allocate -> &mut Field
+            store Field 1 at v4
+            jmp b3(v4, v4, v4)
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let ssa = ssa.mem2reg();
+        // We expect the program to be unchanged
+        assert_normalized_ssa_equals(ssa, src);
+    }
 }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
index 0c6041029dab..ddc3365b5519 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
@@ -172,6 +172,7 @@ impl<'a> FunctionContext<'a> {
     /// Always returns a Value::Mutable wrapping the allocate instruction.
     pub(super) fn new_mutable_variable(&mut self, value_to_store: ValueId) -> Value {
         let element_type = self.builder.current_function.dfg.type_of_value(value_to_store);
+        self.builder.increment_array_reference_count(value_to_store);
         let alloc = self.builder.insert_allocate(element_type);
         self.builder.insert_store(alloc, value_to_store);
         let typ = self.builder.type_of_value(value_to_store);
@@ -735,7 +736,6 @@ impl<'a> FunctionContext<'a> {
             // Reference counting in brillig relies on us incrementing reference
             // counts when arrays/slices are constructed or indexed.
             // Thus, if we dereference an lvalue which happens to be array/slice we should increment its reference counter.
-            self.builder.increment_array_reference_count(reference);
             self.builder.insert_load(reference, element_type).into()
         })
     }
@@ -916,7 +916,10 @@ impl<'a> FunctionContext<'a> {
         let parameters = self.builder.current_function.dfg.block_parameters(entry).to_vec();
 
         for parameter in parameters {
-            self.builder.increment_array_reference_count(parameter);
+            // Avoid reference counts for immutable arrays that aren't behind references.
+            if self.builder.current_function.dfg.value_is_reference(parameter) {
+                self.builder.increment_array_reference_count(parameter);
+            }
         }
 
         entry
@@ -933,7 +936,9 @@ impl<'a> FunctionContext<'a> {
         dropped_parameters.retain(|parameter| !terminator_args.contains(parameter));
 
         for parameter in dropped_parameters {
-            self.builder.decrement_array_reference_count(parameter);
+            if self.builder.current_function.dfg.value_is_reference(parameter) {
+                self.builder.decrement_array_reference_count(parameter);
+            }
         }
     }
 
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
index c50f0a7f45c5..d28236bd3608 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
@@ -665,12 +665,11 @@ impl<'a> FunctionContext<'a> {
         values = values.map(|value| {
             let value = value.eval(self);
 
-            // Make sure to increment array reference counts on each let binding
-            self.builder.increment_array_reference_count(value);
-
             Tree::Leaf(if let_expr.mutable {
                 self.new_mutable_variable(value)
             } else {
+                // `new_mutable_variable` already increments rcs internally
+                self.builder.increment_array_reference_count(value);
                 value::Value::Normal(value)
             })
         });