perf: cache decoded instructions (#944)

Oppen · web-flow · commit c035797fedbb · 2023-05-05T22:27:05.000Z
Decoding instructions (including fetching from memory, converting them
to `u64` and later on parsing them) takes up a significant portion of
time spent executing the main loop. Caching them as they get decoded
alleviates it, reducing runtime in proof mode benchmarks up to 9%.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -199,6 +199,9 @@
     %{ ids.full_word = int(ids.n_bytes >= 8) %}
     ```
 
+* perf: cache decoded instructions [#944](https://github.com/lambdaclass/cairo-rs/pull/944)
+    * Creates a new cache field in `VirtualMachine` that stores the `Instruction` instances as they get decoded from memory, significantly reducing decoding overhead, with gains up to 9% in runtime according to benchmarks in the performance server
+
 * Add alternative hint code for nondet_bigint3 hint [#1071](https://github.com/lambdaclass/cairo-rs/pull/1071)
 
     `BuiltinHintProcessor` now supports the following hint:
diff --git a/src/types/instruction.rs b/src/types/instruction.rs
@@ -4,13 +4,13 @@ use serde::{Deserialize, Serialize};
 
 use crate::vm::decoding::decoder::decode_instruction;
 
-#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq)]
 pub enum Register {
     AP,
     FP,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub struct Instruction {
     pub off0: isize,
     pub off1: isize,
@@ -25,46 +25,46 @@ pub struct Instruction {
     pub opcode: Opcode,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum Op1Addr {
     Imm,
     AP,
     FP,
     Op0,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum Res {
     Op1,
     Add,
     Mul,
     Unconstrained,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum PcUpdate {
     Regular,
     Jump,
     JumpRel,
     Jnz,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum ApUpdate {
     Regular,
     Add,
     Add1,
     Add2,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum FpUpdate {
     Regular,
     APPlus2,
     Dst,
 }
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum Opcode {
     NOp,
     AssertEq,
diff --git a/src/vm/vm_core.rs b/src/vm/vm_core.rs
@@ -81,6 +81,7 @@ pub struct VirtualMachine {
     trace_relocated: bool,
     skip_instruction_execution: bool,
     run_finished: bool,
+    instruction_cache: Vec<Option<Instruction>>,
     #[cfg(feature = "hooks")]
     pub(crate) hooks: crate::vm::hooks::Hooks,
 }
@@ -108,6 +109,7 @@ impl VirtualMachine {
             segments: MemorySegmentManager::new(),
             run_finished: false,
             trace_relocated: false,
+            instruction_cache: Vec::new(),
             #[cfg(feature = "hooks")]
             hooks: Default::default(),
         }
@@ -183,12 +185,12 @@ impl VirtualMachine {
 
     fn update_registers(
         &mut self,
-        instruction: Instruction,
+        instruction: &Instruction,
         operands: Operands,
     ) -> Result<(), VirtualMachineError> {
-        self.update_fp(&instruction, &operands)?;
-        self.update_ap(&instruction, &operands)?;
-        self.update_pc(&instruction, &operands)?;
+        self.update_fp(instruction, &operands)?;
+        self.update_ap(instruction, &operands)?;
+        self.update_pc(instruction, &operands)?;
         Ok(())
     }
 
@@ -381,11 +383,11 @@ impl VirtualMachine {
         Ok(())
     }
 
-    fn run_instruction(&mut self, instruction: Instruction) -> Result<(), VirtualMachineError> {
+    fn run_instruction(&mut self, instruction: &Instruction) -> Result<(), VirtualMachineError> {
         let (operands, operands_addresses, deduced_operands) =
-            self.compute_operands(&instruction)?;
+            self.compute_operands(instruction)?;
         self.insert_deduced_operands(deduced_operands, &operands, &operands_addresses)?;
-        self.opcode_assertions(&instruction, &operands)?;
+        self.opcode_assertions(instruction, &operands)?;
 
         if let Some(ref mut trace) = &mut self.trace {
             trace.push(TraceEntry {
@@ -438,13 +440,23 @@ impl VirtualMachine {
     }
 
     pub fn step_instruction(&mut self) -> Result<(), VirtualMachineError> {
-        let instruction = self.decode_current_instruction()?;
+        let pc = self.run_context.pc.offset;
+
+        let mut inst_cache = core::mem::take(&mut self.instruction_cache);
+        inst_cache.resize((pc + 1).max(inst_cache.len()), None);
+
+        let instruction = inst_cache.get_mut(pc).unwrap();
+        if instruction.is_none() {
+            *instruction = Some(self.decode_current_instruction()?);
+        }
+        let instruction = instruction.as_ref().unwrap();
         if !self.skip_instruction_execution {
             self.run_instruction(instruction)?;
         } else {
             self.run_context.pc += instruction.size();
             self.skip_instruction_execution = false;
         }
+        self.instruction_cache = inst_cache;
         Ok(())
     }
 
@@ -792,6 +804,9 @@ impl VirtualMachine {
         ptr: Relocatable,
         data: &Vec<MaybeRelocatable>,
     ) -> Result<Relocatable, MemoryError> {
+        if ptr.segment_index == 0 {
+            self.instruction_cache.resize(data.len(), None);
+        }
         self.segments.load_data(ptr, data)
     }
 
@@ -1067,6 +1082,7 @@ impl VirtualMachineBuilder {
             segments: self.segments,
             run_finished: self.run_finished,
             trace_relocated: false,
+            instruction_cache: Vec::new(),
             #[cfg(feature = "hooks")]
             hooks: self.hooks,
         }
@@ -1750,7 +1766,7 @@ mod tests {
         vm.run_context.fp = 6;
 
         assert_matches!(
-            vm.update_registers(instruction, operands),
+            vm.update_registers(&instruction, operands),
             Ok::<(), VirtualMachineError>(())
         );
         assert_eq!(vm.run_context.pc, Relocatable::from((0, 5)));
@@ -1786,7 +1802,7 @@ mod tests {
         run_context!(vm, 4, 5, 6);
 
         assert_matches!(
-            vm.update_registers(instruction, operands),
+            vm.update_registers(&instruction, operands),
             Ok::<(), VirtualMachineError>(())
         );
         assert_eq!(vm.run_context.pc, Relocatable::from((0, 12)));