foundry-rs · onbjerg · Mar 21, 2022 · Mar 19, 2022 · Mar 19, 2022 · Mar 20, 2022
diff --git a/evm/src/executor/mod.rs b/evm/src/executor/mod.rs
@@ -41,6 +41,9 @@ use revm::{
 };
 use std::collections::BTreeMap;
 
+/// A mapping of addresses to their changed state.
+pub type StateChangeset = HashMap<Address, Account>;
+
 #[derive(thiserror::Error, Debug)]
 pub enum EvmError {
     /// Error which occurred during execution of a transaction
@@ -54,7 +57,7 @@ pub enum EvmError {
         traces: Option<CallTraceArena>,
         debug: Option<DebugArena>,
         labels: BTreeMap<Address, String>,
-        state_changeset: HashMap<Address, Account>,
+        state_changeset: StateChangeset,
     },
     /// Error which occurred during ABI encoding/decoding
     #[error(transparent)]
@@ -102,7 +105,7 @@ pub struct CallResult<D: Detokenize> {
     ///
     /// This is only present if the changed state was not committed to the database (i.e. if you
     /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
-    pub state_changeset: HashMap<Address, Account>,
+    pub state_changeset: StateChangeset,
 }
 
 /// The result of a raw call.
@@ -130,7 +133,7 @@ pub struct RawCallResult {
     ///
     /// This is only present if the changed state was not committed to the database (i.e. if you
     /// used `call` and `call_raw` not `call_committing` or `call_raw_committing`).
-    pub state_changeset: HashMap<Address, Account>,
+    pub state_changeset: StateChangeset,
 }
 
 impl Default for RawCallResult {
@@ -145,7 +148,7 @@ impl Default for RawCallResult {
             labels: BTreeMap::new(),
             traces: None,
             debug: None,
-            state_changeset: HashMap::new(),
+            state_changeset: StateChangeset::new(),
         }
     }
 }
@@ -159,7 +162,7 @@ pub struct Executor<DB: DatabaseRef> {
     // Also, if we stored the VM here we would still need to
     // take `&mut self` when we are not committing to the database, since
     // we need to set `evm.env`.
-    db: CacheDB<DB>,
+    pub(crate) db: CacheDB<DB>,
     env: Env,
     inspector_config: InspectorStackConfig,
 }
@@ -411,7 +414,7 @@ where
         &self,
         address: Address,
         reverted: bool,
-        state_changeset: HashMap<Address, Account>,
+        state_changeset: StateChangeset,
         should_fail: bool,
     ) -> bool {
         // Construct a new VM with the state changeset

diff --git a/evm/src/fuzz/mod.rs b/evm/src/fuzz/mod.rs
@@ -14,7 +14,10 @@ use proptest::test_runner::{TestCaseError, TestError, TestRunner};
 use revm::db::DatabaseRef;
 use serde::{Deserialize, Serialize};
 use std::{cell::RefCell, collections::BTreeMap, fmt};
-use strategies::fuzz_calldata;
+use strategies::{
+    build_initial_state, collect_state_from_call, fuzz_calldata, fuzz_calldata_from_state,
+    EvmFuzzState,
+};
 
 /// Magic return code for the `assume` cheatcode
 pub const ASSUME_MAGIC_RETURN_CODE: &[u8] = "FOUNDRY::ASSUME".as_bytes();
@@ -54,14 +57,21 @@ where
         should_fail: bool,
         errors: Option<&Abi>,
     ) -> FuzzTestResult {
-        let strat = fuzz_calldata(func);
-
         // Stores the consumed gas and calldata of every successful fuzz call
         let cases: RefCell<Vec<FuzzCase>> = RefCell::new(Default::default());
 
         // Stores the result of the last call
         let call: RefCell<RawCallResult> = RefCell::new(Default::default());
 
+        // Stores fuzz state for use with [fuzz_calldata_from_state]
+        let state: EvmFuzzState = build_initial_state(&self.executor.db);
+
+        // TODO: We should have a `FuzzerOpts` struct where we can configure the fuzzer. When we
+        // have that, we should add a way to configure strategy weights
+        let strat = proptest::strategy::Union::new_weighted(vec![
+            (60, fuzz_calldata(func.clone())),
+            (40, fuzz_calldata_from_state(func.clone(), state.clone())),
+        ]);
         tracing::debug!(func = ?func.name, should_fail, "fuzzing");
         let run_result = self.runner.clone().run(&strat, |calldata| {
             *call.borrow_mut() = self
@@ -70,6 +80,9 @@ where
                 .expect("could not make raw evm call");
             let call = call.borrow();
 
+            // Build fuzzer state
+            collect_state_from_call(&call.logs, &call.state_changeset, state.clone());
+
             // When assume cheat code is triggered return a special string "FOUNDRY::ASSUME"
             if call.result.as_ref() == ASSUME_MAGIC_RETURN_CODE {
                 return Err(TestCaseError::reject("ASSUME: Too many rejects"))

diff --git a/evm/src/fuzz/strategies/calldata.rs b/evm/src/fuzz/strategies/calldata.rs
@@ -1,16 +1,18 @@
 use super::fuzz_param;
 use ethers::{abi::Function, types::Bytes};
-use proptest::prelude::Strategy;
+use proptest::prelude::{BoxedStrategy, Strategy};
 
-/// Given a function, it returns a proptest strategy which generates valid abi-encoded calldata
+/// Given a function, it returns a strategy which generates valid calldata
 /// for that function's input types.
-pub fn fuzz_calldata(func: &Function) -> impl Strategy<Value = Bytes> + '_ {
+pub fn fuzz_calldata(func: Function) -> BoxedStrategy<Bytes> {
     // We need to compose all the strategies generated for each parameter in all
     // possible combinations
     let strats = func.inputs.iter().map(|input| fuzz_param(&input.kind)).collect::<Vec<_>>();
 
-    strats.prop_map(move |tokens| {
-        tracing::trace!(input = ?tokens);
-        func.encode_input(&tokens).unwrap().into()
-    })
+    strats
+        .prop_map(move |tokens| {
+            tracing::trace!(input = ?tokens);
+            func.encode_input(&tokens).unwrap().into()
+        })
+        .boxed()
 }
diff --git a/evm/src/fuzz/strategies/mod.rs b/evm/src/fuzz/strategies/mod.rs
@@ -2,7 +2,12 @@ mod uint;
 pub use uint::UintStrategy;
 
 mod param;
-pub use param::fuzz_param;
+pub use param::{fuzz_param, fuzz_param_from_state};
 
 mod calldata;
 pub use calldata::fuzz_calldata;
+
+mod state;
+pub use state::{
+    build_initial_state, collect_state_from_call, fuzz_calldata_from_state, EvmFuzzState,
+};
diff --git a/evm/src/fuzz/strategies/param.rs b/evm/src/fuzz/strategies/param.rs
@@ -4,11 +4,14 @@ use ethers::{
 };
 use proptest::prelude::*;
 
+use super::state::EvmFuzzState;
+
 /// The max length of arrays we fuzz for is 256.
 pub const MAX_ARRAY_LEN: usize = 256;
 
-/// Given an ethabi parameter type, returns a proptest strategy for generating values for that
-/// datatype. Works with ABI Encoder v2 tuples.
+/// Given a parameter type, returns a strategy for generating values for that type.
+///
+/// Works with ABI Encoder v2 tuples.
 pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
     match param {
         ParamType::Address => {
@@ -63,3 +66,77 @@ pub fn fuzz_param(param: &ParamType) -> impl Strategy<Value = Token> {
         }
     }
 }
+
+/// Given a parameter type, returns a strategy for generating values for that type, given some EVM
+/// fuzz state.
+///
+/// Works with ABI Encoder v2 tuples.
+pub fn fuzz_param_from_state(param: &ParamType, state: EvmFuzzState) -> BoxedStrategy<Token> {
+    // These are to comply with lifetime requirements
+    let state_len = state.borrow().len();
+    let s = state.clone();
+
+    // Select a value from the state
+    let value = any::<prop::sample::Index>()
+        .prop_map(move |index| index.index(state_len))
+        .prop_map(move |index| *s.borrow().iter().nth(index).unwrap());
+
+    // Convert the value based on the parameter type
+    match param {
+        ParamType::Address => {
+            value.prop_map(move |value| Address::from_slice(&value[12..]).into_token()).boxed()
+        }
+        ParamType::Bytes => value.prop_map(move |value| Bytes::from(value).into_token()).boxed(),
+        ParamType::Int(n) => match n / 8 {
+            32 => {
+                value.prop_map(move |value| I256::from_raw(U256::from(value)).into_token()).boxed()
+            }
+            y @ 1..=31 => value
+                .prop_map(move |value| {
+                    // Generate a uintN in the correct range, then shift it to the range of intN
+                    // by subtracting 2^(N-1)
+                    let uint = U256::from(value) % U256::from(2usize).pow(U256::from(y * 8));
+                    let max_int_plus1 = U256::from(2usize).pow(U256::from(y * 8 - 1));
+                    let num = I256::from_raw(uint.overflowing_sub(max_int_plus1).0);
+                    num.into_token()
+                })
+                .boxed(),
+            _ => panic!("unsupported solidity type int{}", n),
+        },
+        ParamType::Uint(n) => match n / 8 {
+            32 => value.prop_map(move |value| U256::from(value).into_token()).boxed(),
+            y @ 1..=31 => value
+                .prop_map(move |value| {
+                    (U256::from(value) % (U256::from(2usize).pow(U256::from(y * 8)))).into_token()
+                })
+                .boxed(),
+            _ => panic!("unsupported solidity type uint{}", n),
+        },
+        ParamType::Bool => value.prop_map(move |value| Token::Bool(value[31] == 1)).boxed(),
+        ParamType::String => value
+            .prop_map(move |value| {
+                Token::String(unsafe { std::str::from_utf8_unchecked(&value[..]).to_string() })
+            })
+            .boxed(),
+        ParamType::Array(param) => {
+            proptest::collection::vec(fuzz_param_from_state(param, state), 0..MAX_ARRAY_LEN)
+                .prop_map(Token::Array)
+                .boxed()
+        }
+        ParamType::FixedBytes(size) => {
+            let size = *size;
+            value.prop_map(move |value| Token::FixedBytes(value[32 - size..].to_vec())).boxed()
+        }
+        ParamType::FixedArray(param, size) => {
+            proptest::collection::vec(fuzz_param_from_state(param, state), 0..*size)
+                .prop_map(Token::FixedArray)
+                .boxed()
+        }
+        ParamType::Tuple(params) => params
+            .iter()
+            .map(|p| fuzz_param_from_state(p, state.clone()))
+            .collect::<Vec<_>>()
+            .prop_map(Token::Tuple)
+            .boxed(),
+    }
+}
diff --git a/evm/src/fuzz/strategies/state.rs b/evm/src/fuzz/strategies/state.rs
@@ -0,0 +1,151 @@
+use super::fuzz_param_from_state;
+use crate::executor::StateChangeset;
+use bytes::Bytes;
+use ethers::{
+    abi::{Function, RawLog},
+    types::{H256, U256},
+};
+use proptest::prelude::{BoxedStrategy, Strategy};
+use revm::{
+    db::{CacheDB, DatabaseRef},
+    opcode, spec_opcode_gas, SpecId,
+};
+use std::{cell::RefCell, collections::HashSet, io::Write, rc::Rc};
+
+/// A set of arbitrary 32 byte data from the VM used to generate values for the strategy.
+///
+/// Wrapped in a shareable container.
+pub type EvmFuzzState = Rc<RefCell<HashSet<[u8; 32]>>>;
+
+/// Given a function and some state, it returns a strategy which generated valid calldata for the
+/// given function's input types, based on state taken from the EVM.
+pub fn fuzz_calldata_from_state(
+    func: Function,
+    state: EvmFuzzState,
+) -> BoxedStrategy<ethers::types::Bytes> {
+    let strats = func
+        .inputs
+        .iter()
+        .map(|input| fuzz_param_from_state(&input.kind, state.clone()))
+        .collect::<Vec<_>>();
+
+    strats
+        .prop_map(move |tokens| {
+            tracing::trace!(input = ?tokens);
+            func.encode_input(&tokens).unwrap().into()
+        })
+        .no_shrink()
+        .boxed()
+}
+
+/// Builds the initial [EvmFuzzState] from a database.
+pub fn build_initial_state<DB: DatabaseRef>(db: &CacheDB<DB>) -> EvmFuzzState {
+    let mut state: HashSet<[u8; 32]> = HashSet::new();
+    for (address, storage) in db.storage() {
+        let info = db.basic(*address);
+
+        // Insert basic account information
+        state.insert(H256::from(*address).into());
+        state.insert(u256_to_h256(info.balance).into());
+        state.insert(u256_to_h256(U256::from(info.nonce)).into());
+
+        // Insert storage
+        for (slot, value) in storage {
+            state.insert(u256_to_h256(*slot).into());
+            state.insert(u256_to_h256(*value).into());
+        }
+    }
+
+    Rc::new(RefCell::new(state))
+}
+
+/// Collects state changes from a [StateChangeset] and logs into an [EvmFuzzState].
+pub fn collect_state_from_call(
+    logs: &[RawLog],
+    state_changeset: &StateChangeset,
+    state: EvmFuzzState,
+) {
+    let state = &mut *state.borrow_mut();
+
+    for (address, account) in state_changeset {
+        // Insert basic account information
+        state.insert(H256::from(*address).into());
+        state.insert(u256_to_h256(account.info.balance).into());
+        state.insert(u256_to_h256(U256::from(account.info.nonce)).into());
+
+        // Insert storage
+        for (slot, value) in &account.storage {
+            state.insert(u256_to_h256(*slot).into());
+            state.insert(u256_to_h256(*value).into());
+        }
+
+        // Insert push bytes
+        if let Some(code) = &account.info.code {
+            for push_byte in collect_push_bytes(code.clone()) {
+                state.insert(push_byte);
+            }
+        }
+
+        // Insert log topics and data
+        for log in logs {
+            log.topics.iter().for_each(|topic| {
+                state.insert(topic.0);
+            });
+            log.data.chunks(32).for_each(|chunk| {
+                let mut buffer: [u8; 32] = [0; 32];
+                let _ = (&mut buffer[..])
+                    .write(chunk)
+                    .expect("log data chunk was larger than 32 bytes");
+                state.insert(buffer);
+            });
+        }
+    }
+}
+
+/// The maximum number of bytes we will look at in bytecodes to find push bytes (24 KiB).
+///
+/// This is to limit the performance impact of fuzz tests that might deploy arbitrarily sized
+/// bytecode (as is the case with Solmate).
+const PUSH_BYTE_ANALYSIS_LIMIT: usize = 24 * 1024;
+
+/// Collects all push bytes from the given bytecode.
+fn collect_push_bytes(code: Bytes) -> Vec<[u8; 32]> {
+    let mut bytes: Vec<[u8; 32]> = Vec::new();
+
+    // We use [SpecId::LATEST] since we do not really care what spec it is - we are not interested
+    // in gas costs.
+    let opcode_infos = spec_opcode_gas(SpecId::LATEST);
+
+    let mut i = 0;
+    while i < code.len().min(PUSH_BYTE_ANALYSIS_LIMIT) {
+        let op = code[i];
+        if opcode_infos[op as usize].is_push {
+            let push_size = (op - opcode::PUSH1 + 1) as usize;
+            let push_start = i + 1;
+            let push_end = push_start + push_size;
+
+            // As a precaution, if a fuzz test deploys malformed bytecode (such as using `CREATE2`)
+            // this will terminate the loop early.
+            if push_start > code.len() || push_end > code.len() {
+                return bytes
+            }
+
+            let mut buffer: [u8; 32] = [0; 32];
+            let _ = (&mut buffer[..])
+                .write(&code[push_start..push_end])
+                .expect("push was larger than 32 bytes");
+            bytes.push(buffer);
+            i += push_size;
+        }
+        i += 1;
+    }
+
+    bytes
+}
+
+/// Small helper function to convert [U256] into [H256].
+fn u256_to_h256(u: U256) -> H256 {
+    let mut h = H256::default();
+    u.to_little_endian(h.as_mut());
+    h
+}