Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 79 additions & 57 deletions crates/common/types/block_execution_witness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,47 +140,30 @@ impl TryFrom<ExecutionWitness> for RpcExecutionWitness {
impl RpcExecutionWitness {
/// Convert an RPC execution witness into the internal [`ExecutionWitness`]
/// format by rebuilding trie structures from the flat node list.
/// `decoded_headers` is reused (typically from [`decode_witness_headers`])
/// to avoid a second RLP decode.
pub fn into_execution_witness(
self,
chain_config: ChainConfig,
first_block_number: u64,
decoded_headers: &[BlockHeader],
) -> Result<ExecutionWitness, GuestProgramStateError> {
if first_block_number == 0 {
return Err(GuestProgramStateError::Custom(
"first_block_number must be > 0 (need parent header)".to_string(),
));
}

let mut initial_state_root = None;

for h in &self.headers {
let header = BlockHeader::decode(h)?;
if header.number == first_block_number - 1 {
initial_state_root = Some(header.state_root);
break;
}
}

let initial_state_root = initial_state_root.ok_or_else(|| {
GuestProgramStateError::Custom(format!(
"header for block {} not found",
first_block_number - 1
))
})?;

let initial_state_root = find_parent_state_root(decoded_headers, first_block_number)?;
// Drop the `0x80` Null-node sentinel some `debug_executionWitness` producers emit.
// Undecodable/unused nodes are dropped per EELS
// `test_validation_state_extra_unused_trie_node`; missing needed nodes surface
// later as `RootNotFound` from `get_embedded_root`.
let nodes: BTreeMap<H256, Node> = self
.state
.into_iter()
.filter_map(|b| {
if b == Bytes::from_static(&[0x80]) {
// other implementations of debug_executionWitness allow for a `Null` node,
// which would fail to decode in ours
return None;
}
let hash = keccak(&b);
Some(Node::decode(&b).map(|node| (hash, node)))
let node = Node::decode(&b).ok()?;
Some((keccak(&b), node))
})
.collect::<Result<_, RLPDecodeError>>()?;
.collect();

// get state trie root and embed the rest of the trie into it
let state_trie_root = if let NodeRef::Node(state_trie_root, _) =
Expand Down Expand Up @@ -231,6 +214,58 @@ impl RpcExecutionWitness {
}
}

/// RLP-decode the raw header byte slices into a `Vec<BlockHeader>`.
pub fn decode_witness_headers<B: AsRef<[u8]>>(
headers_bytes: &[B],
) -> Result<Vec<BlockHeader>, GuestProgramStateError> {
headers_bytes
.iter()
.map(|b| BlockHeader::decode(b.as_ref()).map_err(GuestProgramStateError::from))
.collect()
}

/// Locate the parent block's state root from a slice of decoded headers.
/// Returns an error if `first_block_number == 0` (no parent possible) or if the
/// parent header is not in the slice.
fn find_parent_state_root(
headers: &[BlockHeader],
first_block_number: u64,
) -> Result<H256, GuestProgramStateError> {
let parent_number = first_block_number
.checked_sub(1)
.ok_or(GuestProgramStateError::Custom(
"first_block_number must be > 0 (need parent header)".to_string(),
))?;
headers
.iter()
.find(|h| h.number == parent_number)
.map(|h| h.state_root)
.ok_or_else(|| {
GuestProgramStateError::Custom(format!("header for block {parent_number} not found"))
})
}

/// Check that `headers[1..]` link via `parent_hash == keccak(RLP(prev))` AND
/// `number == prev.number + 1`, in input order. The first header is
/// intentionally unanchored here; the parent end is bound by the post-execution
/// state-root check in `execute_blocks`.
///
/// Call before any sort/dedup, since reordering hides violations.
pub fn validate_witness_headers_chain(
headers: &[BlockHeader],
crypto: &dyn Crypto,
) -> Result<(), GuestProgramStateError> {
for pair in headers.windows(2) {
let (prev, next) = (&pair[0], &pair[1]);
if next.number != prev.number.saturating_add(1)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: prev.number.saturating_add(1) silently lets a contiguous chain pass at u64::MAX → u64::MAX. Astronomically irrelevant, but the safer expression is prev.number.checked_add(1) which returns None and lets the comparison fail loudly — matches the spirit of the function ("validate chain linkage, surface violations").

|| next.parent_hash != prev.compute_block_hash(crypto)
{
return Err(GuestProgramStateError::NoncontiguousBlockHeaders);
}
}
Ok(())
}

/// Recursively walks an embedded state trie node and collects
/// `(hashed_address, storage_root)` pairs from leaf nodes.
fn collect_accounts_from_trie(
Expand Down Expand Up @@ -308,8 +343,10 @@ pub enum GuestProgramStateError {
NoBlockHeaders,
#[error("Parent block header of block {0} was not found")]
MissingParentHeaderOf(u64),
#[error("Non-contiguous block headers (there's a gap in the block headers list)")]
#[error("Non-contiguous block headers")]
NoncontiguousBlockHeaders,
#[error("Bytecode for code hash {0} was not found in witness")]
MissingBytecode(H256),
#[error("Trie error: {0}")]
Trie(#[from] TrieError),
#[error("RLP Decode: {0}")]
Expand Down Expand Up @@ -589,24 +626,18 @@ impl GuestProgramState {
}

/// Retrieves the account code for a specific account.
/// Returns an Err if the code is not found.
///
/// A missing code hash is always an error: ethrex's own witness producers
/// include every byte of code the VM reads, and EIP-8025 stateless
/// validation requires the same.
pub fn get_account_code(&self, code_hash: H256) -> Result<Code, GuestProgramStateError> {
if code_hash == *EMPTY_KECCACK_HASH {
return Ok(Code::default());
}
match self.codes_hashed.get(&code_hash) {
Some(code) => Ok(code.clone()),
None => {
// We do this because what usually happens is that the Witness doesn't have the code we asked for but it is because it isn't relevant for that particular case.
// In client implementations there are differences and it's natural for some clients to access more/less information in some edge cases.
// Sidenote: logger doesn't work inside SP1, that's why we use println!
println!(
"Missing bytecode for hash {} in witness. Defaulting to empty code.", // If there's a state root mismatch and this prints we have to see if it's the cause or not.
hex::encode(code_hash)
);
Ok(Code::default())
}
}
self.codes_hashed
.get(&code_hash)
.cloned()
.ok_or(GuestProgramStateError::MissingBytecode(code_hash))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_account_code now hard-errors with MissingBytecode instead of silently returning empty. Looks correct for stateless validation, but can you confirm no non-stateless caller relied on the prior silent-empty fallback? A quick rg get_account_code over the workspace would lock it down. Test matrix is green but worth eyeballing.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed get_account_code isn't used elsewhere. Other methods (RPC, etc) use the Store and not the VM to access bytecode.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the silent "default to empty Code" fallback is the right call — it was masking exactly the kind of bug the system.rs CALL-reorder is fixing (witness misses a code entry; we silently default to empty; downstream execution diverges from EELS).

Worth a sanity check that every code-hash path the VM walks during execution is now preceded by an addressed-cold check or the witness invariant guarantees inclusion. If get_account_code can be reached for a code hash that wasn't already established as touched, this change converts the previous silent-but-wrong behavior into a hard MissingBytecode failure — possibly during stateless validation of an otherwise-valid block.

Based on the PR description ("unskipping tests"), the unskip is the regression net here. If those tests cover the once-fallback cases, we're good. Non-blocking.

}

/// Retrieves code metadata (length) for a specific code hash.
Expand All @@ -615,24 +646,15 @@ impl GuestProgramState {
&self,
code_hash: H256,
) -> Result<CodeMetadata, GuestProgramStateError> {
use crate::constants::EMPTY_KECCACK_HASH;

if code_hash == *EMPTY_KECCACK_HASH {
return Ok(CodeMetadata { length: 0 });
}
match self.codes_hashed.get(&code_hash) {
Some(code) => Ok(CodeMetadata {
self.codes_hashed
.get(&code_hash)
.map(|code| CodeMetadata {
length: code.bytecode.len() as u64,
}),
None => {
// Same as get_account_code - default to empty for missing bytecode
println!(
"Missing bytecode for hash {} in witness. Defaulting to empty code metadata.",
hex::encode(code_hash)
);
Ok(CodeMetadata { length: 0 })
}
}
})
.ok_or(GuestProgramStateError::MissingBytecode(code_hash))
}

/// When executing multiple blocks in the L2 it happens that the headers in block_headers correspond to the same block headers that we have in the blocks array. The main goal is to hash these only once and set them in both places.
Expand Down
118 changes: 113 additions & 5 deletions crates/guest-program/src/l1/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use ethrex_common::types::Block;
use ethrex_common::types::block_execution_witness::ExecutionWitness;

/// Input for the L1 stateless validation program.
#[cfg(not(feature = "eip-8025"))]
#[derive(
Clone,
Default,
Expand All @@ -18,6 +19,7 @@ pub struct ProgramInput {
pub execution_witness: ExecutionWitness,
}

#[cfg(not(feature = "eip-8025"))]
impl ProgramInput {
/// Creates a new ProgramInput with the given blocks and execution witness.
pub fn new(blocks: Vec<Block>, execution_witness: ExecutionWitness) -> Self {
Expand All @@ -28,6 +30,45 @@ impl ProgramInput {
}
}

/// Input for the L1 stateless validation program (EIP-8025 build).
///
/// `Direct` carries in-memory blocks + witness (test path). `Wire` carries an
/// already-decoded EIP-8025 stateless input from spec wire bytes.
#[cfg(feature = "eip-8025")]
pub enum ProgramInput {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same identifier ProgramInput is a struct (line 15) without the eip-8025 feature and an enum (here) with it. Every consumer of ProgramInput::new(blocks, witness) works under both because both have a matching new method — but ProgramInput::default() returns different shapes (struct-default vs Direct { ..Default::default() }), and any code that pattern-matches on ProgramInput::Wire(...) or ProgramInput::Direct { .. } only compiles under one cfg.

The shape is consistent enough that this is probably fine, but a one-line module-level doc note ("this type's kind is feature-gated; cross-feature destructuring will not compile in the disabled branch") would save the next contributor a confused 10 minutes. Non-blocking.

Direct {
blocks: Vec<Block>,
execution_witness: ExecutionWitness,
},
Wire(DecodedEip8025),
}

#[cfg(feature = "eip-8025")]
impl Default for ProgramInput {
fn default() -> Self {
Self::Direct {
blocks: Vec::new(),
execution_witness: ExecutionWitness::default(),
}
}
}

#[cfg(feature = "eip-8025")]
impl ProgramInput {
/// Creates a `Direct` ProgramInput from in-memory blocks and execution witness.
pub fn new(blocks: Vec<Block>, execution_witness: ExecutionWitness) -> Self {
Self::Direct {
blocks,
execution_witness,
}
}

/// Creates a `Wire` ProgramInput from an already-decoded EIP-8025 payload.
pub fn wire(decoded: DecodedEip8025) -> Self {
Self::Wire(decoded)
}
}

/// Wire-format version byte for the legacy EIP-8025 framing.
#[cfg(feature = "eip-8025")]
pub const EIP8025_VERSION_LEGACY: u8 = 0x00;
Expand Down Expand Up @@ -79,13 +120,57 @@ const MAX_BYTES_PER_HEADER: usize = 1 << 10;
#[cfg(feature = "eip-8025")]
const MAX_PUBLIC_KEYS: usize = 1 << 20;
#[cfg(feature = "eip-8025")]
const MAX_BYTES_PER_PUBLIC_KEY: usize = 65;
const BYTES_PER_PUBLIC_KEY: usize = 65;

/// SSZ shape of the per-tx public key list in `CanonicalStatelessInput`:
/// one fixed-size 65-byte uncompressed secp256k1 key per transaction.
#[cfg(feature = "eip-8025")]
pub type PublicKeysList =
libssz_types::SszList<libssz_types::SszVector<u8, BYTES_PER_PUBLIC_KEY>, MAX_PUBLIC_KEYS>;
#[cfg(feature = "eip-8025")]
const MAX_OPTIONAL_FORK_ACTIVATION_VALUES: usize = 1;
#[cfg(feature = "eip-8025")]
const MAX_BLOB_SCHEDULES_PER_FORK: usize = 1;

/// Big-endian schema-id prefix on canonical `SszStatelessInput` wire bytes.
#[cfg(feature = "eip-8025")]
pub const STATELESS_INPUT_SCHEMA_ID: u16 = 0x0001;
/// Byte length of [`STATELESS_INPUT_SCHEMA_ID`] on the wire.
#[cfg(feature = "eip-8025")]
pub const STATELESS_INPUT_SCHEMA_ID_SIZE: usize = 2;

/// Mirrors `SszBlobSchedule` from the Amsterdam stateless-validation spec.
#[cfg(feature = "eip-8025")]
#[derive(Debug, Clone, PartialEq, Eq, libssz_derive::SszEncode, libssz_derive::SszDecode)]
pub struct CanonicalBlobSchedule {
pub target: u64,
pub max: u64,
pub base_fee_update_fraction: u64,
}

/// Mirrors `SszForkActivation` from the Amsterdam stateless-validation spec.
#[cfg(feature = "eip-8025")]
#[derive(Debug, Clone, PartialEq, Eq, libssz_derive::SszEncode, libssz_derive::SszDecode)]
pub struct CanonicalForkActivation {
pub block_number: libssz_types::SszList<u64, MAX_OPTIONAL_FORK_ACTIVATION_VALUES>,
pub timestamp: libssz_types::SszList<u64, MAX_OPTIONAL_FORK_ACTIVATION_VALUES>,
}

/// Mirrors `SszForkConfig` from the Amsterdam stateless-validation spec.
#[cfg(feature = "eip-8025")]
#[derive(Debug, Clone, PartialEq, Eq, libssz_derive::SszEncode, libssz_derive::SszDecode)]
pub struct CanonicalForkConfig {
pub fork: u64,
pub activation: CanonicalForkActivation,
pub blob_schedule: libssz_types::SszList<CanonicalBlobSchedule, MAX_BLOB_SCHEDULES_PER_FORK>,
}

/// Mirrors `SszChainConfig` from the Amsterdam stateless-validation spec.
#[cfg(feature = "eip-8025")]
#[derive(Debug, Clone, PartialEq, Eq, libssz_derive::SszEncode, libssz_derive::SszDecode)]
pub struct CanonicalChainConfig {
pub chain_id: u64,
pub active_fork: CanonicalForkConfig,
}

/// Mirrors `SszExecutionWitness` from the Amsterdam stateless-validation spec.
Expand All @@ -109,10 +194,9 @@ pub struct CanonicalStatelessInput {
pub new_payload_request: ethrex_common::types::eip8025_ssz::NewPayloadRequestAmsterdam,
pub witness: CanonicalExecutionWitness,
pub chain_config: CanonicalChainConfig,
// Currently the specs do not include proper values for this field,
// but it is planned to be supported in the next release.
pub public_keys:
libssz_types::SszList<libssz_types::SszList<u8, MAX_BYTES_PER_PUBLIC_KEY>, MAX_PUBLIC_KEYS>,
/// Per-transaction public keys (uncompressed secp256k1, 65 bytes each).
/// Mirrors `SszList[ByteVector[PUBLIC_KEY_BYTES], MAX_PUBLIC_KEYS]` in the spec.
pub public_keys: PublicKeysList,
}

/// Decoded EIP-8025 wire payload, dispatched by version byte.
Expand Down Expand Up @@ -173,6 +257,26 @@ pub fn decode_eip8025(bytes: &[u8]) -> Result<DecodedEip8025, ProgramInputDecode
}
}

/// Decode a spec-format canonical stateless input blob:
/// `[BE u16 STATELESS_INPUT_SCHEMA_ID][SSZ-encoded CanonicalStatelessInput]`.
/// Caller supplies `chain_config` out-of-band (unlike [`decode_eip8025`]).
#[cfg(feature = "eip-8025")]
pub fn decode_canonical_stateless_input_bytes(
bytes: &[u8],
) -> Result<CanonicalStatelessInput, ProgramInputDecodeError> {
use libssz::SszDecode;

if bytes.len() < STATELESS_INPUT_SCHEMA_ID_SIZE {
return Err(ProgramInputDecodeError::TooShort);
}
let (schema_bytes, ssz_bytes) = bytes.split_at(STATELESS_INPUT_SCHEMA_ID_SIZE);
let schema_id = u16::from_be_bytes([schema_bytes[0], schema_bytes[1]]);
if schema_id != STATELESS_INPUT_SCHEMA_ID {
return Err(ProgramInputDecodeError::UnknownSchemaId(schema_id));
}
CanonicalStatelessInput::from_ssz_bytes(ssz_bytes).map_err(ProgramInputDecodeError::Ssz)
}

#[cfg(feature = "eip-8025")]
fn decode_eip8025_legacy(
bytes: &[u8],
Expand Down Expand Up @@ -269,6 +373,7 @@ pub enum ProgramInputDecodeError {
Ssz(libssz::DecodeError),
Rkyv(String),
UnknownVersion(u8),
UnknownSchemaId(u16),
}

#[cfg(feature = "eip-8025")]
Expand All @@ -279,6 +384,9 @@ impl core::fmt::Display for ProgramInputDecodeError {
Self::Ssz(e) => write!(f, "SSZ decode error: {e}"),
Self::Rkyv(e) => write!(f, "rkyv decode error: {e}"),
Self::UnknownVersion(v) => write!(f, "unknown EIP-8025 wire version: {v:#04x}"),
Self::UnknownSchemaId(v) => {
write!(f, "unknown stateless input schema id: {v:#06x}")
}
}
}
}
5 changes: 4 additions & 1 deletion crates/guest-program/src/l1/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ pub use input::ProgramInput;
#[cfg(feature = "eip-8025")]
pub use input::{
CanonicalChainConfig, CanonicalExecutionWitness, CanonicalStatelessInput, DecodedEip8025,
EIP8025_VERSION_CANONICAL, EIP8025_VERSION_LEGACY, decode_eip8025, encode_eip8025,
EIP8025_VERSION_CANONICAL, EIP8025_VERSION_LEGACY, decode_canonical_stateless_input_bytes,
decode_eip8025, encode_eip8025,
};
#[cfg(feature = "eip-8025")]
pub use input::{ProgramInputDecodeError, ProgramInputEncodeError};
pub use output::ProgramOutput;
#[cfg(feature = "eip-8025")]
pub use program::execute_decoded;
pub use program::execution_program;
Loading
Loading