diff --git a/bins/revme/src/cmd/bytecode.rs b/bins/revme/src/cmd/bytecode.rs index 352e3519de..69e6eccd93 100644 --- a/bins/revme/src/cmd/bytecode.rs +++ b/bins/revme/src/cmd/bytecode.rs @@ -39,9 +39,9 @@ impl Cmd { /// Runs statetest command. pub fn run(&self) { let container_kind = if self.eof_initcode { - Some(CodeType::ReturnContract) + Some(CodeType::Initcode) } else if self.eof_runtime { - Some(CodeType::ReturnOrStop) + Some(CodeType::Runtime) } else { None }; diff --git a/bins/revme/src/cmd/eofvalidation.rs b/bins/revme/src/cmd/eofvalidation.rs index 7561ba2ad2..626ee9ed16 100644 --- a/bins/revme/src/cmd/eofvalidation.rs +++ b/bins/revme/src/cmd/eofvalidation.rs @@ -77,9 +77,9 @@ pub fn run_test(path: &Path) -> Result<(), Error> { } test_sum += 1; let kind = if test_vector.container_kind.is_some() { - Some(CodeType::ReturnContract) + Some(CodeType::Initcode) } else { - Some(CodeType::ReturnOrStop) + Some(CodeType::Runtime) }; // In future this can be generalized to cover multiple forks, Not just Osaka. let Some(test_result) = test_vector.results.get("Osaka") else { diff --git a/crates/bytecode/src/bytecode.rs b/crates/bytecode/src/bytecode.rs index 00a9fb1aac..4e27228497 100644 --- a/crates/bytecode/src/bytecode.rs +++ b/crates/bytecode/src/bytecode.rs @@ -7,7 +7,7 @@ use core::fmt::Debug; use primitives::{keccak256, Address, Bytes, B256, KECCAK_EMPTY}; use std::sync::Arc; -/// State of the [`Bytecode`] analysis +/// Main bytecode structure with all variants. #[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Bytecode { @@ -81,7 +81,7 @@ impl Bytecode { /// /// # Panics /// - /// Panics if bytecode is in incorrect format. + /// Panics if bytecode is in incorrect format. If you want to handle errors use [`Self::new_raw_checked`]. #[inline] pub fn new_raw(bytecode: Bytes) -> Self { Self::new_raw_checked(bytecode).expect("Expect correct EOF bytecode") @@ -114,15 +114,10 @@ impl Bytecode { /// Create new checked bytecode. /// - /// # Safety + /// # Panics /// - /// Bytecode needs to end with `STOP` (`0x00`) opcode as checked bytecode assumes - /// that it is safe to iterate over bytecode without checking lengths. - pub unsafe fn new_analyzed( - bytecode: Bytes, - original_len: usize, - jump_table: JumpTable, - ) -> Self { + /// For possible panics see [`LegacyAnalyzedBytecode::new`]. + pub fn new_analyzed(bytecode: Bytes, original_len: usize, jump_table: JumpTable) -> Self { Self::LegacyAnalyzed(LegacyAnalyzedBytecode::new( bytecode, original_len, @@ -156,7 +151,7 @@ impl Bytecode { self.bytes_ref().clone() } - /// Returns bytes. + /// Returns raw bytes reference. #[inline] pub fn bytes_ref(&self) -> &Bytes { match self { @@ -166,13 +161,13 @@ impl Bytecode { } } - /// Returns bytes slice. + /// Returns raw bytes slice. #[inline] pub fn bytes_slice(&self) -> &[u8] { self.bytes_ref() } - /// Returns a reference to the original bytecode. + /// Returns the original bytecode. #[inline] pub fn original_bytes(&self) -> Bytes { match self { diff --git a/crates/bytecode/src/eof.rs b/crates/bytecode/src/eof.rs index 2e167664f3..101df12105 100644 --- a/crates/bytecode/src/eof.rs +++ b/crates/bytecode/src/eof.rs @@ -1,13 +1,13 @@ mod body; +mod code_info; mod decode_helpers; mod header; pub mod printer; -mod types_section; pub mod verification; pub use body::EofBody; +pub use code_info::CodeInfo; pub use header::EofHeader; -pub use types_section::TypesSection; pub use verification::*; use core::cmp::min; @@ -39,7 +39,7 @@ impl Default for Eof { fn default() -> Self { let body = EofBody { // Types section with zero inputs, zero outputs and zero max stack size. - types_section: vec![TypesSection::default()], + code_info: vec![CodeInfo::default()], code_section: vec![1], // One code section with a STOP byte. code: Bytes::from_static(&[0x00]), @@ -136,10 +136,10 @@ pub enum EofDecodeError { MissingBodyWithoutData, /// Body size is more than specified in the header DanglingData, - /// Invalid types section data - InvalidTypesSection, - /// Invalid types section size - InvalidTypesSectionSize, + /// Invalid code info data + InvalidCodeInfo, + /// Invalid code info size + InvalidCodeInfoSize, /// Invalid EOF magic number InvalidEOFMagicNumber, /// Invalid EOF version @@ -154,8 +154,8 @@ pub enum EofDecodeError { InvalidDataKind, /// Invalid kind after code InvalidKindAfterCode, - /// Mismatch of code and types sizes - MismatchCodeAndTypesSize, + /// Mismatch of code and info sizes + MismatchCodeAndInfoSize, /// There should be at least one size NonSizes, /// Missing size @@ -178,8 +178,8 @@ impl fmt::Display for EofDecodeError { Self::MissingInput => "Short input while processing EOF", Self::MissingBodyWithoutData => "Short body while processing EOF", Self::DanglingData => "Body size is more than specified in the header", - Self::InvalidTypesSection => "Invalid types section data", - Self::InvalidTypesSectionSize => "Invalid types section size", + Self::InvalidCodeInfo => "Invalid types section data", + Self::InvalidCodeInfoSize => "Invalid types section size", Self::InvalidEOFMagicNumber => "Invalid EOF magic number", Self::InvalidEOFVersion => "Invalid EOF version", Self::InvalidTypesKind => "Invalid number for types kind", @@ -187,7 +187,7 @@ impl fmt::Display for EofDecodeError { Self::InvalidTerminalByte => "Invalid terminal code", Self::InvalidDataKind => "Invalid data kind", Self::InvalidKindAfterCode => "Invalid kind after code", - Self::MismatchCodeAndTypesSize => "Mismatch of code and types sizes", + Self::MismatchCodeAndInfoSize => "Mismatch of code and types sizes", Self::NonSizes => "There should be at least one size", Self::ShortInputForSizes => "Missing size", Self::ZeroSize => "Size cant be zero", diff --git a/crates/bytecode/src/eof/body.rs b/crates/bytecode/src/eof/body.rs index 4174d45f42..aa1c1c48e4 100644 --- a/crates/bytecode/src/eof/body.rs +++ b/crates/bytecode/src/eof/body.rs @@ -1,4 +1,4 @@ -use super::{Eof, EofDecodeError, EofHeader, TypesSection}; +use super::{CodeInfo, Eof, EofDecodeError, EofHeader}; use primitives::Bytes; use std::vec::Vec; @@ -11,7 +11,7 @@ use std::vec::Vec; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct EofBody { /// Code information - pub types_section: Vec, + pub code_info: Vec, /// Index of the last byte of each code section pub code_section: Vec, pub code: Bytes, @@ -37,7 +37,7 @@ impl EofBody { pub fn into_eof(self) -> Eof { let mut prev_value = 0; let header = EofHeader { - types_size: self.types_section.len() as u16 * 4, + types_size: self.code_info.len() as u16 * 4, code_sizes: self .code_section .iter() @@ -76,8 +76,8 @@ impl EofBody { /// Encodes this body into the given buffer. pub fn encode(&self, buffer: &mut Vec) { - for types_section in &self.types_section { - types_section.encode(buffer); + for code_info in &self.code_info { + code_info.encode(buffer); } buffer.extend_from_slice(&self.code); @@ -108,9 +108,9 @@ impl EofBody { let mut types_input = &input[header_len..]; for _ in 0..header.types_count() { - let (types_section, local_input) = TypesSection::decode(types_input)?; + let (code_info, local_input) = CodeInfo::decode(types_input)?; types_input = local_input; - body.types_section.push(types_section); + body.code_info.push(code_info); } // Extract code section diff --git a/crates/bytecode/src/eof/types_section.rs b/crates/bytecode/src/eof/code_info.rs similarity index 90% rename from crates/bytecode/src/eof/types_section.rs rename to crates/bytecode/src/eof/code_info.rs index c4c4ff50fa..31adfb21ab 100644 --- a/crates/bytecode/src/eof/types_section.rs +++ b/crates/bytecode/src/eof/code_info.rs @@ -10,7 +10,7 @@ const EOF_NON_RETURNING_FUNCTION: u8 = 0x80; /// Types section that contains stack information for matching code section #[derive(Debug, Clone, Default, Hash, PartialEq, Eq, Copy, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct TypesSection { +pub struct CodeInfo { /// `inputs` - 1 byte - `0x00-0x7F` /// /// Number of stack elements the code section consumes @@ -25,8 +25,8 @@ pub struct TypesSection { pub max_stack_size: u16, } -impl TypesSection { - /// Returns new `TypesSection` with the given inputs, outputs, and max_stack_size. +impl CodeInfo { + /// Returns new `CodeInfo` with the given inputs, outputs, and max_stack_size. pub fn new(inputs: u8, outputs: u8, max_stack_size: u16) -> Self { Self { inputs, @@ -72,10 +72,10 @@ impl TypesSection { /// Validates the section. pub fn validate(&self) -> Result<(), EofDecodeError> { if self.inputs > 0x7f || self.outputs > 0x80 || self.max_stack_size > 0x03FF { - return Err(EofDecodeError::InvalidTypesSection); + return Err(EofDecodeError::InvalidCodeInfo); } if self.inputs as u16 > self.max_stack_size { - return Err(EofDecodeError::InvalidTypesSection); + return Err(EofDecodeError::InvalidCodeInfo); } Ok(()) } diff --git a/crates/bytecode/src/eof/decode_helpers.rs b/crates/bytecode/src/eof/decode_helpers.rs index d2d2eb555d..5b033af852 100644 --- a/crates/bytecode/src/eof/decode_helpers.rs +++ b/crates/bytecode/src/eof/decode_helpers.rs @@ -1,6 +1,9 @@ use super::EofDecodeError; -/// Consumes a u8 from the input. +/// Consumes a single byte from the input slice and returns a tuple containing the remaining input slice +/// and the consumed byte as a u8. +/// +/// Returns `EofDecodeError::MissingInput` if the input slice is empty. #[inline] pub(crate) fn consume_u8(input: &[u8]) -> Result<(&[u8], u8), EofDecodeError> { if input.is_empty() { @@ -10,6 +13,8 @@ pub(crate) fn consume_u8(input: &[u8]) -> Result<(&[u8], u8), EofDecodeError> { } /// Consumes a u16 from the input. +/// +/// Returns `EofDecodeError::MissingInput` if the input slice is less than 2 bytes. #[inline] pub(crate) fn consume_u16(input: &[u8]) -> Result<(&[u8], u16), EofDecodeError> { if input.len() < 2 { diff --git a/crates/bytecode/src/eof/header.rs b/crates/bytecode/src/eof/header.rs index 37b40226bb..023c496c93 100644 --- a/crates/bytecode/src/eof/header.rs +++ b/crates/bytecode/src/eof/header.rs @@ -4,7 +4,7 @@ use super::{ }; use std::vec::Vec; -/// EOF Header containing +/// EOF header structure that contains section sizes and metadata #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct EofHeader { @@ -22,14 +22,14 @@ pub struct EofHeader { pub container_sizes: Vec, /// EOF data size pub data_size: u16, - /// Sum code sizes + /// Sum of code sizes pub sum_code_sizes: usize, - /// Sum container sizes + /// Sum of container sizes pub sum_container_sizes: usize, } const KIND_TERMINAL: u8 = 0; -const KIND_TYPES: u8 = 1; +const KIND_CODE_INFO: u8 = 1; const KIND_CODE: u8 = 2; const KIND_CONTAINER: u8 = 3; const KIND_DATA: u8 = 4; @@ -113,7 +113,7 @@ impl EofHeader { // `version` 1 byte 0x01 EOF version buffer.push(0x01); // `kind_types` 1 byte 0x01 kind marker for types size section - buffer.push(KIND_TYPES); + buffer.push(KIND_CODE_INFO); // `types_size` 2 bytes 0x0004-0xFFFF buffer.extend_from_slice(&self.types_size.to_be_bytes()); // `kind_code` 1 byte 0x02 kind marker for code size section @@ -159,8 +159,8 @@ impl EofHeader { } // `kind_types` 1 byte 0x01 kind marker for types size section - let (input, kind_types) = consume_u8(input)?; - if kind_types != KIND_TYPES { + let (input, kind_code_info) = consume_u8(input)?; + if kind_code_info != KIND_CODE_INFO { return Err(EofDecodeError::InvalidTypesKind); } @@ -170,12 +170,12 @@ impl EofHeader { header.types_size = types_size; if header.types_size % 4 != 0 { - return Err(EofDecodeError::InvalidTypesSection); + return Err(EofDecodeError::InvalidCodeInfo); } // `kind_code` 1 byte 0x02 kind marker for code size section - let (input, kind_types) = consume_u8(input)?; - if kind_types != KIND_CODE { + let (input, kind_code) = consume_u8(input)?; + if kind_code != KIND_CODE { return Err(EofDecodeError::InvalidCodeKind); } @@ -192,7 +192,7 @@ impl EofHeader { } if sizes.len() != (types_size / 4) as usize { - return Err(EofDecodeError::MismatchCodeAndTypesSize); + return Err(EofDecodeError::MismatchCodeAndInfoSize); } header.code_sizes = sizes; diff --git a/crates/bytecode/src/eof/verification.rs b/crates/bytecode/src/eof/verification.rs index 58bb953319..5160aec854 100644 --- a/crates/bytecode/src/eof/verification.rs +++ b/crates/bytecode/src/eof/verification.rs @@ -1,5 +1,5 @@ use crate::{ - eof::{Eof, EofDecodeError, TypesSection}, + eof::{CodeInfo, Eof, EofDecodeError}, opcode::{self, OPCODE_INFO}, utils::{read_i16, read_u16}, }; @@ -11,7 +11,7 @@ use std::{borrow::Cow, fmt, vec, vec::Vec}; /// Decodes `raw` into an [`Eof`] container and validates it. pub fn validate_raw_eof(raw: Bytes) -> Result { - validate_raw_eof_inner(raw, Some(CodeType::ReturnContract)) + validate_raw_eof_inner(raw, Some(CodeType::Initcode)) } /// Decodes `raw` into an [`Eof`] container and validates it. @@ -32,11 +32,11 @@ pub fn validate_raw_eof_inner( /// /// Only place where validation happen is in Creating Transaction. /// -/// Because of that we are assuming [CodeType] is [ReturnContract][CodeType::ReturnContract]. +/// Because of that we are assuming [CodeType] is [ReturnContract][CodeType::Initcode]. /// -/// Note: If needed we can make a flag that would assume [ReturnContract][CodeType::ReturnContract].. +/// Note: If needed we can make a flag that would assume [ReturnContract][CodeType::Initcode].. pub fn validate_eof(eof: &Eof) -> Result<(), EofError> { - validate_eof_inner(eof, Some(CodeType::ReturnContract)) + validate_eof_inner(eof, Some(CodeType::Initcode)) } #[inline] @@ -78,8 +78,8 @@ pub fn validate_eof_codes( eof: &Eof, this_code_type: Option, ) -> Result, EofValidationError> { - if eof.body.code_section.len() != eof.body.types_section.len() { - return Err(EofValidationError::InvalidTypesSection); + if eof.body.code_section.len() != eof.body.code_info.len() { + return Err(EofValidationError::InvalidCodeInfo); } if eof.body.code_section.is_empty() { @@ -89,9 +89,9 @@ pub fn validate_eof_codes( // The first code section must have a type signature // (0, 0x80, max_stack_height) (0 inputs non-returning function) - let first_types = &eof.body.types_section[0]; + let first_types = &eof.body.code_info[0]; if first_types.inputs != 0 || !first_types.is_non_returning() { - return Err(EofValidationError::InvalidTypesSection); + return Err(EofValidationError::InvalidCodeInfo); } // Tracking access of code and sub containers. @@ -109,7 +109,7 @@ pub fn validate_eof_codes( eof.header.data_size as usize, index, eof.body.container_section.len(), - &eof.body.types_section, + &eof.body.code_info, &mut tracker, )?; } @@ -123,9 +123,7 @@ pub fn validate_eof_codes( return Err(EofValidationError::SubContainerNotAccessed); } - if tracker.this_container_code_type == Some(CodeType::ReturnContract) - && !eof.body.is_data_filled - { + if tracker.this_container_code_type == Some(CodeType::Initcode) && !eof.body.is_data_filled { return Err(EofValidationError::DataNotFilled); } @@ -212,8 +210,6 @@ pub enum EofValidationError { RETFBiggestStackNumMoreThenOutputs, /// Stack requirement is more than smallest stack items StackUnderflow, - /// Smallest stack items is more than types output - TypesStackUnderflow, /// Jump out of bounds JumpUnderflow, /// Jump to out of bounds @@ -227,10 +223,10 @@ pub enum EofValidationError { /// Code section not accessed CodeSectionNotAccessed, /// Types section invalid - InvalidTypesSection, + InvalidCodeInfo, /// First types section is invalid /// It should have inputs 0 and outputs `0x80` - InvalidFirstTypesSection, + InvalidFirstCodeInfo, /// Max stack element mismatch MaxStackMismatch, /// No code sections present @@ -241,7 +237,7 @@ pub enum EofValidationError { SubContainerCalledInTwoModes, /// Sub container not accessed SubContainerNotAccessed, - /// Data size needs to be filled for [ReturnContract][CodeType::ReturnContract] type + /// Data size needs to be filled for [ReturnContract][CodeType::Initcode] type DataNotFilled, /// Section is marked as non-returning but has either RETF or /// JUMPF to returning section opcodes @@ -266,9 +262,8 @@ pub struct AccessTracker { } impl AccessTracker { - /// Returns a new instance of `CodeSubContainerAccess`. - /// - /// Mark first code section as accessed and push first it to the stack. + /// Creates a new instance with the given container type and section sizes. + /// The first code section is marked as accessed and added to the processing stack. /// /// # Panics /// @@ -292,11 +287,11 @@ impl AccessTracker { this } - /// Mark code as accessed. + /// Marks a code section as accessed and adds it to the processing stack if not previously accessed. /// - /// If code was not accessed before, it will be added to the processing stack. + /// # Panics /// - /// Assumes that index is valid. + /// Panics if the index is out of bounds. pub fn access_code(&mut self, index: usize) { let was_accessed = mem::replace(&mut self.codes[index], true); if !was_accessed { @@ -325,22 +320,21 @@ impl AccessTracker { } } -/// Types of code sections +/// Types of code sections in EOF container /// -/// It is a error if container to contain -/// both RETURNCONTRACT and either of RETURN or STOP. +/// Container cannot mix RETURNCONTRACT with RETURN/STOP opcodes #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum CodeType { - /// Return contract code - ReturnContract, - /// Return or Stop opcodes - ReturnOrStop, + /// Code that initializes and returns a contract. + Initcode, + /// Runtime code that ends with RETURN or STOP opcodes. + Runtime, } impl CodeType { /// Returns `true` of the code is initcode. pub fn is_initcode(&self) -> bool { - matches!(self, CodeType::ReturnContract) + matches!(self, CodeType::Initcode) } } @@ -368,7 +362,6 @@ impl fmt::Display for EofValidationError { "RETF biggest stack num is more than outputs" } Self::StackUnderflow => "Stack requirement is above smallest stack items", - Self::TypesStackUnderflow => "Smallest stack items is more than output type", Self::JumpUnderflow => "Jump destination is too low", Self::JumpOverflow => "Jump destination is too high", Self::BackwardJumpBiggestNumMismatch => { @@ -381,8 +374,8 @@ impl fmt::Display for EofValidationError { "Last instruction of bytecode is not terminating" } Self::CodeSectionNotAccessed => "Code section was not accessed", - Self::InvalidTypesSection => "Invalid types section", - Self::InvalidFirstTypesSection => "Invalid first types section", + Self::InvalidCodeInfo => "Invalid types section", + Self::InvalidFirstCodeInfo => "Invalid first types section", Self::MaxStackMismatch => "Max stack element mismatches", Self::NoCodeSections => "No code sections", Self::SubContainerCalledInTwoModes => "Sub container called in two modes", @@ -407,7 +400,7 @@ pub fn validate_eof_code( data_size: usize, this_types_index: usize, num_of_containers: usize, - types: &[TypesSection], + types: &[CodeInfo], tracker: &mut AccessTracker, ) -> Result<(), EofValidationError> { let this_types = &types[this_types_index]; @@ -621,7 +614,7 @@ pub fn validate_eof_code( // Code section out of bounds. return Err(EofValidationError::EOFCREATEInvalidIndex); } - tracker.set_subcontainer_type(index, CodeType::ReturnContract)?; + tracker.set_subcontainer_type(index, CodeType::Initcode)?; } opcode::RETURNCONTRACT => { let index = code[i + 1] as usize; @@ -632,19 +625,19 @@ pub fn validate_eof_code( } if *tracker .this_container_code_type - .get_or_insert(CodeType::ReturnContract) - != CodeType::ReturnContract + .get_or_insert(CodeType::Initcode) + != CodeType::Initcode { // TODO : Make custom error return Err(EofValidationError::SubContainerCalledInTwoModes); } - tracker.set_subcontainer_type(index, CodeType::ReturnOrStop)?; + tracker.set_subcontainer_type(index, CodeType::Runtime)?; } opcode::RETURN | opcode::STOP => { if *tracker .this_container_code_type - .get_or_insert(CodeType::ReturnOrStop) - != CodeType::ReturnOrStop + .get_or_insert(CodeType::Runtime) + != CodeType::Runtime { return Err(EofValidationError::SubContainerCalledInTwoModes); } @@ -821,7 +814,7 @@ mod test { fn size_limit() { let eof = validate_raw_eof_inner( hex!("ef00010100040200010003040001000080000130500000").into(), - Some(CodeType::ReturnOrStop), + Some(CodeType::Runtime), ); assert!(eof.is_ok()); } @@ -858,7 +851,7 @@ mod test { let eof = validate_raw_eof_inner( hex!("ef000101000c02000300040001000304000000008000000080000000000000e300020000e50001") .into(), - Some(CodeType::ReturnOrStop), + Some(CodeType::Runtime), ); assert_eq!( eof, @@ -873,7 +866,7 @@ mod test { let eof = validate_raw_eof_inner( hex!("ef000101000402000100060300010014040000000080000260006000ee00ef00010100040200010001040000000080000000") .into(), - Some(CodeType::ReturnOrStop), + Some(CodeType::Runtime), ); assert_eq!( eof, diff --git a/crates/bytecode/src/legacy.rs b/crates/bytecode/src/legacy.rs index e9afe03790..71a6117bb3 100644 --- a/crates/bytecode/src/legacy.rs +++ b/crates/bytecode/src/legacy.rs @@ -1,7 +1,9 @@ +mod analysis; mod analyzed; mod jump_map; mod raw; +pub use analysis::analyze_legacy; pub use analyzed::LegacyAnalyzedBytecode; pub use jump_map::JumpTable; -pub use raw::{analyze_legacy, LegacyRawBytecode}; +pub use raw::LegacyRawBytecode; diff --git a/crates/bytecode/src/legacy/analysis.rs b/crates/bytecode/src/legacy/analysis.rs new file mode 100644 index 0000000000..d3a23b2aeb --- /dev/null +++ b/crates/bytecode/src/legacy/analysis.rs @@ -0,0 +1,38 @@ +use super::JumpTable; +use crate::opcode; +use bitvec::{bitvec, order::Lsb0, vec::BitVec}; +use std::sync::Arc; + +/// Analyze the bytecode to find the jumpdests. Used to create a jump table +/// that is needed for [`crate::LegacyAnalyzedBytecode`]. +/// This function contains a hot loop and should be optimized as much as possible. +/// +/// Undefined behavior if the bytecode does not end with a valid STOP opcode. Please check +/// [`crate::LegacyAnalyzedBytecode::new`] for details on how the bytecode is validated. +pub fn analyze_legacy(bytetecode: &[u8]) -> JumpTable { + let mut jumps: BitVec = bitvec![u8, Lsb0; 0; bytetecode.len()]; + + let range = bytetecode.as_ptr_range(); + let start = range.start; + let mut iterator = start; + let end = range.end; + while iterator < end { + let opcode = unsafe { *iterator }; + if opcode::JUMPDEST == opcode { + // SAFETY: Jumps are max length of the code + unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) } + iterator = unsafe { iterator.offset(1) }; + } else { + let push_offset = opcode.wrapping_sub(opcode::PUSH1); + if push_offset < 32 { + // SAFETY: Iterator access range is checked in the while loop + iterator = unsafe { iterator.offset((push_offset + 2) as isize) }; + } else { + // SAFETY: Iterator access range is checked in the while loop + iterator = unsafe { iterator.offset(1) }; + } + } + } + + JumpTable(Arc::new(jumps)) +} diff --git a/crates/bytecode/src/legacy/analyzed.rs b/crates/bytecode/src/legacy/analyzed.rs index c326142aff..ca5c4637c6 100644 --- a/crates/bytecode/src/legacy/analyzed.rs +++ b/crates/bytecode/src/legacy/analyzed.rs @@ -1,13 +1,37 @@ use super::JumpTable; +use crate::opcode; use bitvec::{bitvec, order::Lsb0}; use primitives::Bytes; use std::sync::Arc; -// Legacy analyzed +/// Legacy analyzed bytecode represents the original bytecode format used in Ethereum. +/// +/// # Jump Table +/// +/// A jump table maps valid jump destinations in the bytecode. +/// +/// While other EVM implementations typically analyze bytecode and cache jump tables at runtime, +/// Revm requires the jump table to be pre-computed and contained alongside the code, +/// and present with the bytecode when executing. +/// +/// # Bytecode Padding +/// +/// All legacy bytecode is padded with 33 zero bytes at the end. This padding ensures the +/// bytecode always ends with a valid STOP (0x00) opcode. The reason for 33 bytes padding (and not one byte) +/// is handling the edge cases where a PUSH32 opcode appears at the end of the original +/// bytecode without enough remaining bytes for its immediate data. Original bytecode length +/// is stored in order to be able to copy original bytecode. +/// +/// # Gas safety +/// +/// When bytecode is created through CREATE, CREATE2, or contract creation transactions, it undergoes +/// analysis to generate its jump table. This analysis is O(n) on side of bytecode that is expensive, +/// but the high gas cost required to store bytecode in the database is high enough to cover the +/// expense of doing analysis and generate the jump table. #[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct LegacyAnalyzedBytecode { - /// Bytecode with 32 zero bytes padding + /// Bytecode with 33 zero bytes padding bytecode: Bytes, /// Original bytes length original_len: usize, @@ -28,7 +52,32 @@ impl Default for LegacyAnalyzedBytecode { impl LegacyAnalyzedBytecode { /// Creates new analyzed bytecode. + /// + /// # Panics + /// + /// * If `original_len` is greater than `bytecode.len()` + /// * If jump table length is not equal to `bytecode.len() / 32`. + /// * If last bytecode byte is not `0x00` or if bytecode is empty. pub fn new(bytecode: Bytes, original_len: usize, jump_table: JumpTable) -> Self { + if original_len > bytecode.len() { + panic!("original_len is greater than bytecode length"); + } + if jump_table.0.len() != bytecode.len() { + panic!( + "jump table length {} is not equal to bytecode length {}", + jump_table.0.len(), + bytecode.len() + ); + } + + if bytecode.is_empty() { + panic!("bytecode cannot be empty"); + } + + if bytecode.last() != Some(&opcode::STOP) { + panic!("last bytecode byte should be STOP (0x00)"); + } + Self { bytecode, original_len, @@ -63,3 +112,54 @@ impl LegacyAnalyzedBytecode { &self.jump_table } } + +#[cfg(test)] +mod tests { + use crate::{opcode, LegacyRawBytecode}; + + use super::*; + + #[test] + fn test_bytecode_new() { + let bytecode = Bytes::from_static(&[opcode::PUSH1, 0x01]); + let bytecode = LegacyRawBytecode(bytecode).into_analyzed(); + let _ = LegacyAnalyzedBytecode::new( + bytecode.bytecode, + bytecode.original_len, + bytecode.jump_table, + ); + } + + #[test] + #[should_panic(expected = "original_len is greater than bytecode length")] + fn test_panic_on_large_original_len() { + let bytecode = Bytes::from_static(&[opcode::PUSH1, 0x01]); + let bytecode = LegacyRawBytecode(bytecode).into_analyzed(); + let _ = LegacyAnalyzedBytecode::new(bytecode.bytecode, 100, bytecode.jump_table); + } + + #[test] + #[should_panic(expected = "jump table length 34 is not equal to bytecode length 35")] + fn test_panic_on_custom_jump_table() { + let bytecode = Bytes::from_static(&[opcode::PUSH1, 0x01]); + let bytecode = LegacyRawBytecode(bytecode).into_analyzed(); + let jump_table = JumpTable(Arc::new(bitvec![u8, Lsb0; 0; 34])); + let _ = LegacyAnalyzedBytecode::new(bytecode.bytecode, bytecode.original_len, jump_table); + } + + #[test] + #[should_panic(expected = "last bytecode byte should be STOP (0x00)")] + fn test_panic_on_non_stop_bytecode() { + let bytecode = Bytes::from_static(&[opcode::PUSH1, 0x01]); + let jump_table = JumpTable(Arc::new(bitvec![u8, Lsb0; 0; 2])); + let _ = LegacyAnalyzedBytecode::new(bytecode, 2, jump_table); + } + + #[test] + #[should_panic(expected = "bytecode cannot be empty")] + fn test_panic_on_empty_bytecode() { + let bytecode = Bytes::from_static(&[]); + let jump_table = JumpTable(Arc::new(bitvec![u8, Lsb0; 0; 0])); + let _ = LegacyAnalyzedBytecode::new(bytecode, 0, jump_table); + } +} diff --git a/crates/bytecode/src/legacy/jump_map.rs b/crates/bytecode/src/legacy/jump_map.rs index 49e3639052..59577ce4c0 100644 --- a/crates/bytecode/src/legacy/jump_map.rs +++ b/crates/bytecode/src/legacy/jump_map.rs @@ -2,7 +2,7 @@ use bitvec::vec::BitVec; use primitives::hex; use std::{fmt::Debug, sync::Arc}; -/// A map of valid `jump` destinations +/// A table of valid `jump` destinations. Cheap to clone and memory efficient, one bit per opcode. #[derive(Clone, Default, PartialEq, Eq, Hash, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct JumpTable(pub Arc>); diff --git a/crates/bytecode/src/legacy/raw.rs b/crates/bytecode/src/legacy/raw.rs index d8be15e157..8398198449 100644 --- a/crates/bytecode/src/legacy/raw.rs +++ b/crates/bytecode/src/legacy/raw.rs @@ -1,25 +1,24 @@ -use super::{JumpTable, LegacyAnalyzedBytecode}; -use crate::opcode; -use bitvec::{bitvec, order::Lsb0, vec::BitVec}; +use super::{analyze_legacy, LegacyAnalyzedBytecode}; use core::ops::Deref; use primitives::Bytes; -use std::{sync::Arc, vec::Vec}; +use std::vec::Vec; +/// Used only as intermediate representation for legacy bytecode. +/// Please check [`LegacyAnalyzedBytecode`] for the main structure that is used in Revm. #[derive(Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct LegacyRawBytecode(pub Bytes); impl LegacyRawBytecode { - pub fn analysis(&self) -> JumpTable { - analyze_legacy(&self.0) - } - + /// Converts the raw bytecode into an analyzed bytecode. + /// + /// It extends the bytecode with 33 zero bytes and analyzes it to find the jumpdests. pub fn into_analyzed(self) -> LegacyAnalyzedBytecode { - let jump_table = self.analysis(); let len = self.0.len(); let mut padded_bytecode = Vec::with_capacity(len + 33); padded_bytecode.extend_from_slice(&self.0); padded_bytecode.resize(len + 33, 0); + let jump_table = analyze_legacy(&padded_bytecode); LegacyAnalyzedBytecode::new(padded_bytecode.into(), len, jump_table) } } @@ -43,32 +42,3 @@ impl Deref for LegacyRawBytecode { &self.0 } } - -/// Analyze the bytecode to find the jumpdests -pub fn analyze_legacy(bytetecode: &[u8]) -> JumpTable { - let mut jumps: BitVec = bitvec![u8, Lsb0; 0; bytetecode.len()]; - - let range = bytetecode.as_ptr_range(); - let start = range.start; - let mut iterator = start; - let end = range.end; - while iterator < end { - let opcode = unsafe { *iterator }; - if opcode::JUMPDEST == opcode { - // SAFETY: Jumps are max length of the code - unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) } - iterator = unsafe { iterator.offset(1) }; - } else { - let push_offset = opcode.wrapping_sub(opcode::PUSH1); - if push_offset < 32 { - // SAFETY: Iterator access range is checked in the while loop - iterator = unsafe { iterator.offset((push_offset + 2) as isize) }; - } else { - // SAFETY: Iterator access range is checked in the while loop - iterator = unsafe { iterator.offset(1) }; - } - } - } - - JumpTable(Arc::new(jumps)) -} diff --git a/crates/bytecode/src/lib.rs b/crates/bytecode/src/lib.rs index ac667c956e..3412b33462 100644 --- a/crates/bytecode/src/lib.rs +++ b/crates/bytecode/src/lib.rs @@ -1,4 +1,10 @@ -//! Optimism-specific constants, types, and helpers. +//! Crate that contains bytecode types and opcode constants. +//! +//! EOF bytecode contains its verification logic and only valid EOF bytecode can be created. +//! +//! Legacy bytecode will always contain a jump table. +//! +//! While EIP-7702 bytecode must contains a Address. #![cfg_attr(not(test), warn(unused_crate_dependencies))] #![cfg_attr(not(feature = "std"), no_std)] diff --git a/crates/bytecode/src/opcode.rs b/crates/bytecode/src/opcode.rs index 47d7abe0e6..80a5d04b16 100644 --- a/crates/bytecode/src/opcode.rs +++ b/crates/bytecode/src/opcode.rs @@ -1,4 +1,4 @@ -//! EVM opcode definitions and utilities. +//! EVM opcode definitions and utilities. It contains opcode information and utilities to work with opcodes. #[cfg(feature = "parse")] pub mod parse; @@ -14,6 +14,7 @@ use core::{fmt, ptr::NonNull}; pub struct OpCode(u8); impl fmt::Display for OpCode { + /// Formats the opcode as a string fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let n = self.get(); if let Some(val) = OPCODE_INFO[n as usize] { @@ -26,6 +27,8 @@ impl fmt::Display for OpCode { impl OpCode { /// Instantiates a new opcode from a u8. + /// + /// Returns None if the opcode is not valid. #[inline] pub const fn new(opcode: u8) -> Option { match OPCODE_INFO[opcode as usize] { @@ -129,6 +132,7 @@ impl OpCode { } /// Returns the opcode information for the given opcode. + /// Check [OpCodeInfo] for more information. #[inline] pub const fn info_by_op(opcode: u8) -> Option { if let Some(opcode) = Self::new(opcode) { @@ -259,7 +263,7 @@ impl OpCodeInfo { // SAFETY: `self.name_*` can only be initialized with a valid `&'static str`. unsafe { // TODO : Use `str::from_raw_parts` when it's stable. - let slice = core::slice::from_raw_parts(self.name_ptr.as_ptr(), self.name_len as usize); + let slice = std::slice::from_raw_parts(self.name_ptr.as_ptr(), self.name_len as usize); core::str::from_utf8_unchecked(slice) } } @@ -308,7 +312,7 @@ pub const fn not_eof(mut op: OpCodeInfo) -> OpCodeInfo { op } -/// Sets the immediate bytes number. +/// Used for [`OPCODE_INFO`] to set the immediate bytes number in the [`OpCodeInfo`]. /// /// RJUMPV is special case where the bytes len is depending on bytecode value, /// for RJUMPV size will be set to one byte while minimum is two. @@ -318,14 +322,14 @@ pub const fn immediate_size(mut op: OpCodeInfo, n: u8) -> OpCodeInfo { op } -/// Sets the terminating flag to true. +/// Use for [`OPCODE_INFO`] to set the terminating flag to true in the [`OpCodeInfo`]. #[inline] pub const fn terminating(mut op: OpCodeInfo) -> OpCodeInfo { op.terminating = true; op } -/// Sets the number of stack inputs and outputs. +/// Use for [`OPCODE_INFO`] to sets the number of stack inputs and outputs in the [`OpCodeInfo`]. #[inline] pub const fn stack_io(mut op: OpCodeInfo, inputs: u8, outputs: u8) -> OpCodeInfo { op.inputs = inputs; @@ -336,6 +340,9 @@ pub const fn stack_io(mut op: OpCodeInfo, inputs: u8, outputs: u8) -> OpCodeInfo /// Alias for the [`JUMPDEST`] opcode pub const NOP: u8 = JUMPDEST; +/// Created all opcodes constants and two maps: +/// * `OPCODE_INFO` maps opcode number to the opcode info +/// * `NAME_TO_OPCODE` that maps opcode name to the opcode number. macro_rules! opcodes { ($($val:literal => $name:ident => $($modifier:ident $(( $($modifier_arg:expr),* ))?),*);* $(;)?) => { // Constants for each opcode. This also takes care of duplicate names. diff --git a/crates/bytecode/src/opcode/parse.rs b/crates/bytecode/src/opcode/parse.rs index b9fea29274..aee2a7a47c 100644 --- a/crates/bytecode/src/opcode/parse.rs +++ b/crates/bytecode/src/opcode/parse.rs @@ -1,3 +1,8 @@ +//! Parsing opcodes from strings. +//! +//! This module provides a function to parse opcodes from strings. +//! It is a utility function that needs to be enabled with `parse` feature. + use super::OpCode; use crate::opcode::NAME_TO_OPCODE; use core::fmt; diff --git a/crates/context/interface/src/result.rs b/crates/context/interface/src/result.rs index bb0b67e14a..aeb12b4e29 100644 --- a/crates/context/interface/src/result.rs +++ b/crates/context/interface/src/result.rs @@ -507,7 +507,7 @@ pub enum HaltReason { /// Aux data overflow, new aux data is larger than [u16] max size. EofAuxDataOverflow, - /// Aud data is smaller then already present data size. + /// Aux data is smaller than already present data size. EofAuxDataTooSmall, /// EOF Subroutine stack overflow SubRoutineStackOverflow, diff --git a/crates/interpreter/src/instruction_result.rs b/crates/interpreter/src/instruction_result.rs index f440724c68..03a266f87f 100644 --- a/crates/interpreter/src/instruction_result.rs +++ b/crates/interpreter/src/instruction_result.rs @@ -94,7 +94,7 @@ pub enum InstructionResult { SubRoutineStackOverflow, /// Aux data overflow, new aux data is larger than `u16` max size. EofAuxDataOverflow, - /// Aux data is smaller then already present data size. + /// Aux data is smaller than already present data size. EofAuxDataTooSmall, /// `EXT*CALL` target address needs to be padded with 0s. InvalidEXTCALLTarget, diff --git a/crates/interpreter/src/instructions/control.rs b/crates/interpreter/src/instructions/control.rs index c55041e785..904b04ff77 100644 --- a/crates/interpreter/src/instructions/control.rs +++ b/crates/interpreter/src/instructions/control.rs @@ -109,7 +109,7 @@ pub fn callf( let idx = interpreter.bytecode.read_u16() as usize; // Get target types - let Some(types) = interpreter.bytecode.code_section_info(idx) else { + let Some(types) = interpreter.bytecode.code_info(idx) else { panic!("Invalid EOF in execution, expecting correct intermediate in callf") }; @@ -166,7 +166,7 @@ pub fn jumpf( // Get target types let types = interpreter .bytecode - .code_section_info(idx) + .code_info(idx) .expect("Invalid code section index"); // Check max stack height for target code section. @@ -278,7 +278,7 @@ mod test { use crate::{table::make_instruction_table, DummyHost, Gas}; use bytecode::opcode::{CALLF, JUMPF, NOP, RETF, RJUMP, RJUMPI, RJUMPV, STOP}; use bytecode::{ - eof::{Eof, TypesSection}, + eof::{Eof, CodeInfo}, Bytecode, }; use primitives::bytes; @@ -382,11 +382,11 @@ mod test { } fn eof_setup(bytes1: Bytes, bytes2: Bytes) -> Interpreter { - eof_setup_with_types(bytes1, bytes2, TypesSection::default()) + eof_setup_with_types(bytes1, bytes2, CodeInfo::default()) } /// Two code section and types section is for last code. - fn eof_setup_with_types(bytes1: Bytes, bytes2: Bytes, types: TypesSection) -> Interpreter { + fn eof_setup_with_types(bytes1: Bytes, bytes2: Bytes, types: CodeInfo) -> Interpreter { let mut eof = dummy_eof(); eof.body.code_section.clear(); @@ -395,7 +395,7 @@ mod test { eof.header.code_sizes.push(bytes1.len() as u16); eof.body.code_section.push(bytes1.len()); - eof.body.types_section.push(TypesSection::new(0, 0, 11)); + eof.body.types_section.push(CodeInfo::new(0, 0, 11)); eof.header.code_sizes.push(bytes2.len() as u16); eof.body.code_section.push(bytes2.len() + bytes1.len()); @@ -472,7 +472,7 @@ mod test { let bytes1 = Bytes::from([CALLF, 0x00, 0x01]); let bytes2 = Bytes::from([STOP]); let mut interp = - eof_setup_with_types(bytes1, bytes2.clone(), TypesSection::new(0, 0, 1025)); + eof_setup_with_types(bytes1, bytes2.clone(), CodeInfo::new(0, 0, 1025)); // CALLF interp.step(&table, &mut host); @@ -510,7 +510,7 @@ mod test { let bytes1 = Bytes::from([JUMPF, 0x00, 0x01]); let bytes2 = Bytes::from([STOP]); let mut interp = - eof_setup_with_types(bytes1, bytes2.clone(), TypesSection::new(0, 0, 1025)); + eof_setup_with_types(bytes1, bytes2.clone(), CodeInfo::new(0, 0, 1025)); // JUMPF interp.step(&table, &mut host); diff --git a/crates/interpreter/src/interpreter/ext_bytecode.rs b/crates/interpreter/src/interpreter/ext_bytecode.rs index f80155ff8d..a66eaf4bc1 100644 --- a/crates/interpreter/src/interpreter/ext_bytecode.rs +++ b/crates/interpreter/src/interpreter/ext_bytecode.rs @@ -1,7 +1,7 @@ use core::ops::Deref; use bytecode::{ - eof::TypesSection, + eof::CodeInfo, utils::{read_i16, read_u16}, Bytecode, }; @@ -147,10 +147,8 @@ impl Immediates for ExtBytecode { } impl EofCodeInfo for ExtBytecode { - fn code_section_info(&self, idx: usize) -> Option<&TypesSection> { - self.base - .eof() - .and_then(|eof| eof.body.types_section.get(idx)) + fn code_info(&self, idx: usize) -> Option<&CodeInfo> { + self.base.eof().and_then(|eof| eof.body.code_info.get(idx)) } fn code_section_pc(&self, idx: usize) -> Option { diff --git a/crates/interpreter/src/interpreter_types.rs b/crates/interpreter/src/interpreter_types.rs index 83475be0a8..eac4da053d 100644 --- a/crates/interpreter/src/interpreter_types.rs +++ b/crates/interpreter/src/interpreter_types.rs @@ -1,4 +1,4 @@ -use bytecode::eof::TypesSection; +use bytecode::eof::CodeInfo; use specification::hardfork::SpecId; use crate::{Gas, InstructionResult, InterpreterAction}; @@ -96,7 +96,7 @@ pub trait SubRoutineStack { fn pop(&mut self) -> Option; // /// Returns code info from EOF body. - // fn eof_code_info(&self, idx: usize) -> Option<&TypesSection>; + // fn eof_code_info(&self, idx: usize) -> Option<&CodeInfo>; } pub trait StackTr { @@ -172,7 +172,7 @@ pub trait EofData { pub trait EofCodeInfo { /// Returns code information containing stack information. - fn code_section_info(&self, idx: usize) -> Option<&TypesSection>; + fn code_info(&self, idx: usize) -> Option<&CodeInfo>; /// Returns program counter at the start of code section. fn code_section_pc(&self, idx: usize) -> Option;