From 0f2c0ae35d019ad7f09cb4d25f4c9326109e29b0 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:25:28 +0530 Subject: [PATCH 01/16] feat(evm): SharedCorpus for multiple worker threads --- crates/evm/evm/src/executors/corpus.rs | 16 +- crates/evm/evm/src/executors/fuzz/mod.rs | 22 +- crates/evm/evm/src/executors/mod.rs | 1 + crates/evm/evm/src/executors/shared_corpus.rs | 567 ++++++++++++++++++ 4 files changed, 586 insertions(+), 20 deletions(-) create mode 100644 crates/evm/evm/src/executors/shared_corpus.rs diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 0f2a0505d9d1f..823b6d1d5f93b 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -30,7 +30,7 @@ const COVERAGE_MAP_SIZE: usize = 65536; /// Possible mutation strategies to apply on a call sequence. #[derive(Debug, Clone)] -enum MutationType { +pub(crate) enum MutationType { /// Splice original call sequence. Splice, /// Repeat selected call several times. @@ -47,19 +47,19 @@ enum MutationType { /// Holds Corpus information. #[derive(Serialize)] -struct CorpusEntry { +pub(crate) struct CorpusEntry { // Unique corpus identifier. - uuid: Uuid, + pub(crate) uuid: Uuid, // Total mutations of corpus as primary source. - total_mutations: usize, + pub(crate) total_mutations: usize, // New coverage found as a result of mutating this corpus. - new_finds_produced: usize, + pub(crate) new_finds_produced: usize, // Corpus call sequence. #[serde(skip_serializing)] - tx_seq: Vec, + pub(crate) tx_seq: Vec, // Whether this corpus is favored, i.e. producing new finds more often than // `FAVORABILITY_THRESHOLD`. - is_favored: bool, + pub(crate) is_favored: bool, } impl CorpusEntry { @@ -92,7 +92,7 @@ pub(crate) struct CorpusMetrics { // Number of features (new hitcount bin of previously hit edge) seen during the invariant run. cumulative_features_seen: usize, // Number of corpus entries. - corpus_count: usize, + pub(crate) corpus_count: usize, // Number of corpus entries that are favored. favored_items: usize, } diff --git a/crates/evm/evm/src/executors/fuzz/mod.rs b/crates/evm/evm/src/executors/fuzz/mod.rs index 9d2a56557d9b8..89ce46b480576 100644 --- a/crates/evm/evm/src/executors/fuzz/mod.rs +++ b/crates/evm/evm/src/executors/fuzz/mod.rs @@ -1,5 +1,6 @@ use crate::executors::{ DURATION_BETWEEN_METRICS_REPORT, Executor, FailFast, FuzzTestTimer, RawCallResult, + shared_corpus::{CorpusWorker, SharedCorpus}, }; use alloy_dyn_abi::JsonAbiExt; use alloy_json_abi::Function; @@ -27,7 +28,6 @@ use serde_json::json; use std::time::{Instant, SystemTime, UNIX_EPOCH}; mod types; -use crate::executors::corpus::CorpusManager; pub use types::{CaseOutcome, CounterExampleOutcome, FuzzOutcome}; /// Contains data collected during fuzz test runs. @@ -118,13 +118,10 @@ impl FuzzedExecutor { // We want to collect at least one trace which will be displayed to user. let max_traces_to_collect = std::cmp::max(1, self.config.gas_report_samples) as usize; - let mut corpus_manager = CorpusManager::new( - self.config.corpus.clone(), - strategy.boxed(), - &self.executor, - Some(func), - None, - )?; + let shared_corpus = + SharedCorpus::new(self.config.corpus.clone(), &self.executor, Some(func), None)?; + + let mut corpus_manager = shared_corpus.new_worker(strategy.boxed()); // Start timer for this fuzz test. let timer = FuzzTestTimer::new(self.config.timeout); @@ -144,11 +141,12 @@ impl FuzzedExecutor { failure.calldata } else { // If running with progress, then increment current run. + let metrics_read = shared_corpus.metrics.read(); if let Some(progress) = progress { progress.inc(1); // Display metrics in progress bar. if self.config.corpus.collect_edge_coverage() { - progress.set_message(format!("{}", &corpus_manager.metrics)); + progress.set_message(format!("{}", &metrics_read)); } } else if self.config.corpus.collect_edge_coverage() && last_metrics_report.elapsed() > DURATION_BETWEEN_METRICS_REPORT @@ -159,7 +157,7 @@ impl FuzzedExecutor { .duration_since(UNIX_EPOCH)? .as_secs(), "test": func.name, - "metrics": &corpus_manager.metrics, + "metrics": &*metrics_read, }); let _ = sh_println!("{}", serde_json::to_string(&metrics)?); last_metrics_report = Instant::now(); @@ -257,7 +255,7 @@ impl FuzzedExecutor { gas_report_traces: traces.into_iter().map(|a| a.arena).collect(), line_coverage: test_data.coverage, deprecated_cheatcodes: test_data.deprecated_cheatcodes, - failed_corpus_replays: corpus_manager.failed_replays(), + failed_corpus_replays: shared_corpus.failed_replays(), }; match test_data.failure { @@ -298,7 +296,7 @@ impl FuzzedExecutor { &mut self, address: Address, calldata: Bytes, - coverage_metrics: &mut CorpusManager, + coverage_metrics: &mut CorpusWorker, ) -> Result { let mut call = self .executor diff --git a/crates/evm/evm/src/executors/mod.rs b/crates/evm/evm/src/executors/mod.rs index d519f1617cd5f..1d5e8c4e1bb75 100644 --- a/crates/evm/evm/src/executors/mod.rs +++ b/crates/evm/evm/src/executors/mod.rs @@ -61,6 +61,7 @@ pub mod invariant; pub use invariant::InvariantExecutor; mod corpus; +mod shared_corpus; mod trace; pub use trace::TracingExecutor; diff --git a/crates/evm/evm/src/executors/shared_corpus.rs b/crates/evm/evm/src/executors/shared_corpus.rs new file mode 100644 index 0000000000000..c204f852e047d --- /dev/null +++ b/crates/evm/evm/src/executors/shared_corpus.rs @@ -0,0 +1,567 @@ +use crate::executors::{ + Executor, RawCallResult, + corpus::{CorpusEntry, CorpusMetrics, MutationType}, +}; +use alloy_dyn_abi::JsonAbiExt; +use alloy_json_abi::Function; +use alloy_primitives::{Bytes, U256}; +use eyre::eyre; +use foundry_config::FuzzCorpusConfig; +use foundry_evm_fuzz::{ + BasicTxDetails, + invariant::FuzzRunIdentifiedContracts, + strategies::{EvmFuzzState, mutate_param_value}, +}; +use parking_lot::RwLock; +use proptest::{ + prelude::{Just, Rng, Strategy}, + prop_oneof, + strategy::{BoxedStrategy, ValueTree}, + test_runner::TestRunner, +}; +use std::{ + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, + time::{SystemTime, UNIX_EPOCH}, +}; +use uuid::Uuid; + +const METADATA_SUFFIX: &str = "metadata.json"; +const JSON_EXTENSION: &str = ".json"; +const FAVORABILITY_THRESHOLD: f64 = 0.3; +const COVERAGE_MAP_SIZE: usize = 65536; + +/// Shared corpus for coverage guided fuzzing campaigns to be used by multiple [`CorpusWorker`]'s in +/// parallel. +#[derive(Clone)] +pub(crate) struct SharedCorpus { + // Corpus configuration. + config: Arc, + /// Shared in-memory corpus, populated from the persisted files and runs across multiple + /// workers. Mutation is performed on these. + /// + /// Map of corpus [`Uuid`] to [`CorpusEntry`]. + in_memory_corpus: Arc>>, + /// Number of failed replays from persisted corpus. + failed_replays: Arc, + /// History of binned hitcount of edges seen during fuzzing + history_map: Arc>>, + /// Corpus metrics. + pub(crate) metrics: Arc>, +} + +/// Operates on the [`SharedCorpus`] for coverage guided fuzzing and generating fuzz inputs using +/// [`CorpusWorker::new_input`] for stateless tests, [`CorpusWorker::new_inputs`] for stateful +/// tests. +pub(crate) struct CorpusWorker { + /// Shared Corpus + corpus: SharedCorpus, + /// Fuzzed calls generator. + tx_generator: BoxedStrategy, + /// Call sequence mutation strategy type generator. + mutation_generator: BoxedStrategy, + /// Identifier of current mutated entry for this worker. + current_mutated: Option, +} + +impl CorpusWorker { + pub fn new(corpus: SharedCorpus, tx_generator: BoxedStrategy) -> Self { + let mutation_generator = prop_oneof![ + Just(MutationType::Splice), + Just(MutationType::Repeat), + Just(MutationType::Interleave), + Just(MutationType::Prefix), + Just(MutationType::Suffix), + Just(MutationType::Abi), + ] + .boxed(); + + Self { corpus, tx_generator, mutation_generator, current_mutated: None } + } + + /// Updates stats for the given call sequence, if new coverage produced. + /// Persists the call sequence (if corpus directory is configured and new coverage) and updates + /// in-memory corpus. + pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { + // Early return if corpus dir / coverage guided fuzzing is not configured. + let Some(corpus_dir) = &self.corpus.config.corpus_dir else { + return; + }; + + let mut in_mem_write = self.corpus.in_memory_corpus.write(); + let mut metrics_write = self.corpus.metrics.write(); + // Update stats of current mutated primary corpus. + if let Some(uuid) = &self.current_mutated { + if let Some(corpus) = in_mem_write.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) { + corpus.total_mutations += 1; + if new_coverage { + corpus.new_finds_produced += 1 + } + let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) + < FAVORABILITY_THRESHOLD; + metrics_write.update_favored(is_favored, corpus.is_favored); + corpus.is_favored = is_favored; + + trace!( + target: "corpus", + "updated corpus {}, total mutations: {}, new finds: {}", + corpus.uuid, corpus.total_mutations, corpus.new_finds_produced + ); + } + + self.current_mutated = None; + } + + // Collect inputs only if current run produced new coverage. + if !new_coverage { + return; + } + + let corpus = CorpusEntry::from_tx_seq(inputs); + let corpus_uuid = corpus.uuid; + + // Persist to disk if corpus dir is configured. + let write_result = if self.corpus.config.corpus_gzip { + foundry_common::fs::write_json_gzip_file( + corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), + &corpus.tx_seq, + ) + } else { + foundry_common::fs::write_json_file( + corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(), + &corpus.tx_seq, + ) + }; + + if let Err(err) = write_result { + debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq); + } else { + trace!( + target: "corpus", + "persisted {} inputs for new coverage in {corpus_uuid} corpus", + &corpus.tx_seq.len() + ); + } + // This includes reverting txs in the corpus and `can_continue` removes + // them. We want this as it is new coverage and may help reach the other branch. + metrics_write.corpus_count += 1; + in_mem_write.push(corpus); + } + + /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than + /// configured max mutations value. Used by invariant test campaigns. + pub fn new_inputs( + &mut self, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + targeted_contracts: &FuzzRunIdentifiedContracts, + ) -> eyre::Result> { + let mut new_seq = vec![]; + + // Early return with first_input only if corpus dir / coverage guided fuzzing not + // configured. + if !self.corpus.config.is_coverage_guided() { + new_seq.push(self.new_tx(test_runner)?); + return Ok(new_seq); + }; + + let in_mem_read = self.corpus.in_memory_corpus.read(); + if !in_mem_read.is_empty() { + self.evict_oldest_corpus()?; + + let mutation_type = self + .mutation_generator + .new_tree(test_runner) + .map_err(|err| eyre!("Could not generate mutation type {err}"))? + .current(); + let rng = test_runner.rng(); + let corpus_len = in_mem_read.len(); + let primary = &in_mem_read[rng.random_range(0..corpus_len)]; + let secondary = &in_mem_read[rng.random_range(0..corpus_len)]; + + match mutation_type { + MutationType::Splice => { + trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid); + + self.current_mutated = Some(primary.uuid); + + let start1 = rng.random_range(0..primary.tx_seq.len()); + let end1 = rng.random_range(start1..primary.tx_seq.len()); + + let start2 = rng.random_range(0..secondary.tx_seq.len()); + let end2 = rng.random_range(start2..secondary.tx_seq.len()); + + for tx in primary.tx_seq.iter().take(end1).skip(start1) { + new_seq.push(tx.clone()); + } + for tx in secondary.tx_seq.iter().take(end2).skip(start2) { + new_seq.push(tx.clone()); + } + } + MutationType::Repeat => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "repeat {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + let start = rng.random_range(0..corpus.tx_seq.len()); + let end = rng.random_range(start..corpus.tx_seq.len()); + let item_idx = rng.random_range(0..corpus.tx_seq.len()); + let repeated = vec![new_seq[item_idx].clone(); end - start]; + new_seq.splice(start..end, repeated); + } + MutationType::Interleave => { + trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid); + + self.current_mutated = Some(primary.uuid); + + for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) { + // chunks? + let tx = if rng.random::() { tx1.clone() } else { tx2.clone() }; + new_seq.push(tx); + } + } + MutationType::Prefix => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + for i in 0..rng.random_range(0..=new_seq.len()) { + new_seq[i] = self.new_tx(test_runner)?; + } + } + MutationType::Suffix => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len() + { + new_seq[i] = self.new_tx(test_runner)?; + } + } + MutationType::Abi => { + let targets = targeted_contracts.targets.lock(); + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + + let idx = rng.random_range(0..new_seq.len()); + let tx = new_seq.get_mut(idx).unwrap(); + if let (_, Some(function)) = targets.fuzzed_artifacts(tx) { + // TODO add call_value to call details and mutate it as well as sender some + // of the time + if !function.inputs.is_empty() { + self.abi_mutate(tx, function, test_runner, fuzz_state)?; + } + } + } + } + } + + // Make sure the new sequence contains at least one tx to start fuzzing from. + if new_seq.is_empty() { + new_seq.push(self.new_tx(test_runner)?); + } + trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len()); + + Ok(new_seq) + } + + /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more + /// than configured max mutations value. Used by fuzz test campaigns. + pub fn new_input( + &mut self, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + function: &Function, + ) -> eyre::Result { + // Early return if not running with coverage guided fuzzing. + if !self.corpus.config.is_coverage_guided() { + return Ok(self.new_tx(test_runner)?.call_details.calldata); + } + + self.evict_oldest_corpus()?; + + let in_mem_read = self.corpus.in_memory_corpus.read(); + let tx = if !in_mem_read.is_empty() { + let corpus = &in_mem_read[test_runner.rng().random_range(0..in_mem_read.len())]; + self.current_mutated = Some(corpus.uuid); + let new_seq = corpus.tx_seq.clone(); + let mut tx = new_seq.first().unwrap().clone(); + self.abi_mutate(&mut tx, function, test_runner, fuzz_state)?; + tx + } else { + self.new_tx(test_runner)? + }; + + Ok(tx.call_details.calldata) + } + + /// Generates single call from corpus strategy. + pub fn new_tx(&self, test_runner: &mut TestRunner) -> eyre::Result { + Ok(self + .tx_generator + .new_tree(test_runner) + .map_err(|_| eyre!("Could not generate case"))? + .current()) + } + + /// Returns the next call to be used in call sequence. + /// If coverage guided fuzzing is not configured or if previous input was discarded then this is + /// a new tx from strategy. + /// If running with coverage guided fuzzing it returns a new call only when sequence + /// does not have enough entries, or randomly. Otherwise, returns the next call from initial + /// sequence. + pub fn generate_next_input( + &mut self, + test_runner: &mut TestRunner, + sequence: &[BasicTxDetails], + discarded: bool, + depth: usize, + ) -> eyre::Result { + // Early return with new input if corpus dir / coverage guided fuzzing not configured or if + // call was discarded. + if self.corpus.config.corpus_dir.is_none() || discarded { + return self.new_tx(test_runner); + } + + // When running with coverage guided fuzzing enabled then generate new sequence if initial + // sequence's length is less than depth or randomly, to occasionally intermix new txs. + if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) { + return self.new_tx(test_runner); + } + + // Continue with the next call initial sequence + Ok(sequence[depth].clone()) + } + + /// Flush the oldest corpus mutated more than configured max mutations unless they are + /// favored. + fn evict_oldest_corpus(&self) -> eyre::Result<()> { + let mut in_mem_write = self.corpus.in_memory_corpus.write(); + if in_mem_write.len() > self.corpus.config.corpus_min_size.max(1) + && let Some(index) = in_mem_write.iter().position(|corpus| { + corpus.total_mutations > self.corpus.config.corpus_min_mutations + && !corpus.is_favored + }) + { + let corpus = in_mem_write.get(index).unwrap(); + + let uuid = corpus.uuid; + debug!(target: "corpus", "evict corpus {uuid}"); + + // Flush to disk the seed metadata at the time of eviction. + let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); + foundry_common::fs::write_json_file( + self.corpus + .config + .corpus_dir + .clone() + .unwrap() + .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) + .as_path(), + &corpus, + )?; + + // Remove corpus from memory. + in_mem_write.remove(index); + } + Ok(()) + } + + /// Mutates calldata of provided tx by abi decoding current values and randomly selecting the + /// inputs to change. + fn abi_mutate( + &self, + tx: &mut BasicTxDetails, + function: &Function, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + ) -> eyre::Result<()> { + // let rng = test_runner.rng(); + let mut arg_mutation_rounds = + test_runner.rng().random_range(0..=function.inputs.len()).max(1); + let round_arg_idx: Vec = if function.inputs.len() <= 1 { + vec![0] + } else { + (0..arg_mutation_rounds) + .map(|_| test_runner.rng().random_range(0..function.inputs.len())) + .collect() + }; + let mut prev_inputs = function + .abi_decode_input(&tx.call_details.calldata[4..]) + .map_err(|err| eyre!("failed to load previous inputs: {err}"))?; + + while arg_mutation_rounds > 0 { + let idx = round_arg_idx[arg_mutation_rounds - 1]; + prev_inputs[idx] = mutate_param_value( + &function + .inputs + .get(idx) + .expect("Could not get input to mutate") + .selector_type() + .parse()?, + prev_inputs[idx].clone(), + test_runner, + fuzz_state, + ); + arg_mutation_rounds -= 1; + } + + tx.call_details.calldata = + function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); + Ok(()) + } + + /// Collects coverage from call result and updates metrics. + pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { + if !self.corpus.config.collect_edge_coverage() { + return false; + } + + let mut history_map_write = self.corpus.history_map.write(); + let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut history_map_write); + if new_coverage { + self.corpus.metrics.write().update_seen(is_edge); + } + new_coverage + } +} + +impl SharedCorpus { + pub fn new( + config: FuzzCorpusConfig, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result { + let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; + let mut metrics = CorpusMetrics::default(); + let mut in_memory_corpus = vec![]; + let failed_replays = AtomicUsize::new(0); + + // Early return if corpus dir / coverage guided fuzzing not configured. + let Some(corpus_dir) = &config.corpus_dir else { + return Ok(Self { + config: config.into(), + in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), + failed_replays: failed_replays.into(), + history_map: Arc::new(RwLock::new(history_map)), + metrics: Arc::new(RwLock::new(metrics)), + }); + }; + + // Ensure corpus dir for current test is created. + if !corpus_dir.is_dir() { + foundry_common::fs::create_dir_all(corpus_dir)?; + } + + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { + let path = entry?.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && name.contains(METADATA_SUFFIX) + { + // Ignore metadata files + continue; + } + + let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { + Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), + _ => foundry_common::fs::read_json_file::>(&path), + }; + + let Ok(tx_seq) = read_corpus_result else { + trace!(target: "corpus", "failed to load corpus from {}", path.display()); + continue; + }; + + if !tx_seq.is_empty() { + // Warm up history map from loaded sequences. + let mut executor = executor.clone(); + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor + .call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + ) + .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; + + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut history_map); + if new_coverage { + metrics.update_seen(is_edge); + } + + // Commit only when running invariant / stateful tests. + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + } else { + failed_replays.fetch_add(1, Ordering::Relaxed); + + // If the only input for fuzzed function cannot be replied, then move to + // next one without adding it in memory. + if fuzzed_function.is_some() { + continue 'corpus_replay; + } + } + } + + metrics.corpus_count += 1; + + trace!( + target: "corpus", + "load sequence with len {} from corpus file {}", + tx_seq.len(), + path.display() + ); + + // Populate in memory corpus with the sequence from corpus file. + + in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + } + } + + Ok(Self { + config: config.into(), + in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), + failed_replays: failed_replays.into(), + history_map: Arc::new(RwLock::new(history_map)), + metrics: Arc::new(RwLock::new(metrics)), + }) + } + + pub fn new_worker(&self, tx_generator: BoxedStrategy) -> CorpusWorker { + CorpusWorker::new(self.clone(), tx_generator) + } + + /// Returns campaign failed replays. + pub fn failed_replays(self) -> usize { + self.failed_replays.load(Ordering::Relaxed) + } +} From c7d9e60a770596087474ddc103a52507b4d5b5ba Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:14:31 +0530 Subject: [PATCH 02/16] use SharedCorpus + CorpusWorker in InvariantExecutor --- crates/evm/evm/src/executors/invariant/mod.rs | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/crates/evm/evm/src/executors/invariant/mod.rs b/crates/evm/evm/src/executors/invariant/mod.rs index 9361dca926549..fdc9916998f51 100644 --- a/crates/evm/evm/src/executors/invariant/mod.rs +++ b/crates/evm/evm/src/executors/invariant/mod.rs @@ -1,5 +1,8 @@ use crate::{ - executors::{Executor, RawCallResult}, + executors::{ + Executor, RawCallResult, + shared_corpus::{CorpusWorker, SharedCorpus}, + }, inspectors::Fuzzer, }; use alloy_primitives::{ @@ -51,9 +54,7 @@ use serde::{Deserialize, Serialize}; use serde_json::json; mod shrink; -use crate::executors::{ - DURATION_BETWEEN_METRICS_REPORT, EvmError, FailFast, FuzzTestTimer, corpus::CorpusManager, -}; +use crate::executors::{DURATION_BETWEEN_METRICS_REPORT, EvmError, FailFast, FuzzTestTimer}; pub use shrink::check_sequence; sol! { @@ -337,7 +338,7 @@ impl<'a> InvariantExecutor<'a> { return Err(eyre!("Invariant test function should have no inputs")); } - let (mut invariant_test, mut corpus_manager) = + let (mut invariant_test, shared_corpus, mut corpus_manager) = self.prepare_test(&invariant_contract, fuzz_fixtures, deployed_libs)?; // Start timer for this invariant test. @@ -510,12 +511,13 @@ impl<'a> InvariantExecutor<'a> { // End current invariant test run. invariant_test.end_run(current_run, self.config.gas_report_samples as usize); + let metrics_read = shared_corpus.metrics.read(); if let Some(progress) = progress { // If running with progress then increment completed runs. progress.inc(1); // Display metrics in progress bar. if edge_coverage_enabled { - progress.set_message(format!("{}", &corpus_manager.metrics)); + progress.set_message(format!("{}", &metrics_read)); } } else if edge_coverage_enabled && last_metrics_report.elapsed() > DURATION_BETWEEN_METRICS_REPORT @@ -526,7 +528,7 @@ impl<'a> InvariantExecutor<'a> { .duration_since(UNIX_EPOCH)? .as_secs(), "invariant": invariant_contract.invariant_function.name, - "metrics": &corpus_manager.metrics, + "metrics": &*metrics_read, }); let _ = sh_println!("{}", serde_json::to_string(&metrics)?); last_metrics_report = Instant::now(); @@ -547,7 +549,7 @@ impl<'a> InvariantExecutor<'a> { gas_report_traces: result.gas_report_traces, line_coverage: result.line_coverage, metrics: result.metrics, - failed_corpus_replays: corpus_manager.failed_replays(), + failed_corpus_replays: shared_corpus.failed_replays(), }) } @@ -559,7 +561,7 @@ impl<'a> InvariantExecutor<'a> { invariant_contract: &InvariantContract<'_>, fuzz_fixtures: &FuzzFixtures, deployed_libs: &[Address], - ) -> Result<(InvariantTest, CorpusManager)> { + ) -> Result<(InvariantTest, SharedCorpus, CorpusWorker)> { // Finds out the chosen deployed contracts and/or senders. self.select_contract_artifacts(invariant_contract.address)?; let (targeted_senders, targeted_contracts) = @@ -633,13 +635,14 @@ impl<'a> InvariantExecutor<'a> { return Err(eyre!(error.revert_reason().unwrap_or_default())); } - let corpus_manager = CorpusManager::new( + let shared_corpus = SharedCorpus::new( self.config.corpus.clone(), - strategy.boxed(), &self.executor, None, Some(&targeted_contracts), )?; + + let corpus_worker = shared_corpus.new_worker(strategy.boxed()); let invariant_test = InvariantTest::new( fuzz_state, targeted_contracts, @@ -648,7 +651,7 @@ impl<'a> InvariantExecutor<'a> { self.runner.clone(), ); - Ok((invariant_test, corpus_manager)) + Ok((invariant_test, shared_corpus, corpus_worker)) } /// Fills the `InvariantExecutor` with the artifact identifier filters (in `path:name` string From 0aceb16162c75990fddea178d5382c84666bbb4a Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:10:30 +0530 Subject: [PATCH 03/16] remove CorpusManager --- crates/evm/evm/src/executors/corpus.rs | 425 +++++++------ crates/evm/evm/src/executors/fuzz/mod.rs | 2 +- crates/evm/evm/src/executors/invariant/mod.rs | 2 +- crates/evm/evm/src/executors/mod.rs | 1 - crates/evm/evm/src/executors/shared_corpus.rs | 567 ------------------ 5 files changed, 227 insertions(+), 770 deletions(-) delete mode 100644 crates/evm/evm/src/executors/shared_corpus.rs diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 823b6d1d5f93b..32c4ec3a45554 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -9,6 +9,7 @@ use foundry_evm_fuzz::{ invariant::FuzzRunIdentifiedContracts, strategies::{EvmFuzzState, mutate_param_value}, }; +use parking_lot::RwLock; use proptest::{ prelude::{Just, Rng, Strategy}, prop_oneof, @@ -19,6 +20,10 @@ use serde::Serialize; use std::{ fmt, path::PathBuf, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, time::{SystemTime, UNIX_EPOCH}, }; use uuid::Uuid; @@ -30,7 +35,7 @@ const COVERAGE_MAP_SIZE: usize = 65536; /// Possible mutation strategies to apply on a call sequence. #[derive(Debug, Clone)] -pub(crate) enum MutationType { +enum MutationType { /// Splice original call sequence. Splice, /// Repeat selected call several times. @@ -47,19 +52,19 @@ pub(crate) enum MutationType { /// Holds Corpus information. #[derive(Serialize)] -pub(crate) struct CorpusEntry { +struct CorpusEntry { // Unique corpus identifier. - pub(crate) uuid: Uuid, + uuid: Uuid, // Total mutations of corpus as primary source. - pub(crate) total_mutations: usize, + total_mutations: usize, // New coverage found as a result of mutating this corpus. - pub(crate) new_finds_produced: usize, + new_finds_produced: usize, // Corpus call sequence. #[serde(skip_serializing)] - pub(crate) tx_seq: Vec, + tx_seq: Vec, // Whether this corpus is favored, i.e. producing new finds more often than // `FAVORABILITY_THRESHOLD`. - pub(crate) is_favored: bool, + is_favored: bool, } impl CorpusEntry { @@ -92,7 +97,7 @@ pub(crate) struct CorpusMetrics { // Number of features (new hitcount bin of previously hit edge) seen during the invariant run. cumulative_features_seen: usize, // Number of corpus entries. - pub(crate) corpus_count: usize, + corpus_count: usize, // Number of corpus entries that are favored. favored_items: usize, } @@ -128,35 +133,40 @@ impl CorpusMetrics { } } -/// Fuzz corpus manager, used in coverage guided fuzzing mode by both stateless and stateful tests. -pub(crate) struct CorpusManager { - // Fuzzed calls generator. +/// Shared corpus used for coverage guided fuzzing campaigns by both stateless and stateful tests. +#[derive(Clone)] +pub(crate) struct SharedCorpus { + // Corpus configuration. + config: Arc, + /// Shared in-memory corpus, populated from the persisted files and runs across multiple + /// workers. Mutation is performed on these. + /// + /// Map of corpus [`Uuid`] to [`CorpusEntry`]. + in_memory_corpus: Arc>>, + /// Number of failed replays from persisted corpus. + failed_replays: Arc, + /// History of binned hitcount of edges seen during fuzzing + history_map: Arc>>, + /// Corpus metrics. + pub(crate) metrics: Arc>, +} + +/// Operates on the [`SharedCorpus`] for coverage guided fuzzing and generating fuzz inputs using +/// [`CorpusWorker::new_input`] for stateless tests, [`CorpusWorker::new_inputs`] for stateful +/// tests. +pub(crate) struct CorpusWorker { + /// Shared Corpus + corpus: SharedCorpus, + /// Fuzzed calls generator. tx_generator: BoxedStrategy, - // Call sequence mutation strategy type generator. + /// Call sequence mutation strategy type generator. mutation_generator: BoxedStrategy, - // Corpus configuration. - config: FuzzCorpusConfig, - // In-memory corpus, populated from persisted files and current runs. - // Mutation is performed on these. - in_memory_corpus: Vec, - // Identifier of current mutated entry. + /// Identifier of current mutated entry for this worker. current_mutated: Option, - // Number of failed replays from persisted corpus. - failed_replays: usize, - // History of binned hitcount of edges seen during fuzzing. - history_map: Vec, - // Corpus metrics. - pub(crate) metrics: CorpusMetrics, } -impl CorpusManager { - pub fn new( - config: FuzzCorpusConfig, - tx_generator: BoxedStrategy, - executor: &Executor, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result { +impl CorpusWorker { + pub fn new(corpus: SharedCorpus, tx_generator: BoxedStrategy) -> Self { let mutation_generator = prop_oneof![ Just(MutationType::Splice), Just(MutationType::Repeat), @@ -166,119 +176,8 @@ impl CorpusManager { Just(MutationType::Abi), ] .boxed(); - let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; - let mut metrics = CorpusMetrics::default(); - let mut in_memory_corpus = vec![]; - let mut failed_replays = 0; - // Early return if corpus dir / coverage guided fuzzing not configured. - let Some(corpus_dir) = &config.corpus_dir else { - return Ok(Self { - tx_generator, - mutation_generator, - config, - in_memory_corpus, - current_mutated: None, - failed_replays, - history_map, - metrics, - }); - }; - - // Ensure corpus dir for current test is created. - if !corpus_dir.is_dir() { - foundry_common::fs::create_dir_all(corpus_dir)?; - } - - let can_replay_tx = |tx: &BasicTxDetails| -> bool { - fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) - || fuzzed_function.is_some_and(|function| { - tx.call_details - .calldata - .get(..4) - .is_some_and(|selector| function.selector() == selector) - }) - }; - - 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { - let path = entry?.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && name.contains(METADATA_SUFFIX) - { - // Ignore metadata files - continue; - } - - let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { - Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), - _ => foundry_common::fs::read_json_file::>(&path), - }; - - let Ok(tx_seq) = read_corpus_result else { - trace!(target: "corpus", "failed to load corpus from {}", path.display()); - continue; - }; - - if !tx_seq.is_empty() { - // Warm up history map from loaded sequences. - let mut executor = executor.clone(); - for tx in &tx_seq { - if can_replay_tx(tx) { - let mut call_result = executor - .call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - ) - .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; - - let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut history_map); - if new_coverage { - metrics.update_seen(is_edge); - } - - // Commit only when running invariant / stateful tests. - if fuzzed_contracts.is_some() { - executor.commit(&mut call_result); - } - } else { - failed_replays += 1; - - // If the only input for fuzzed function cannot be replied, then move to - // next one without adding it in memory. - if fuzzed_function.is_some() { - continue 'corpus_replay; - } - } - } - - metrics.corpus_count += 1; - - trace!( - target: "corpus", - "load sequence with len {} from corpus file {}", - tx_seq.len(), - path.display() - ); - - // Populate in memory corpus with the sequence from corpus file. - in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); - } - } - - Ok(Self { - tx_generator, - mutation_generator, - config, - in_memory_corpus, - current_mutated: None, - failed_replays, - history_map, - metrics, - }) + Self { corpus, tx_generator, mutation_generator, current_mutated: None } } /// Updates stats for the given call sequence, if new coverage produced. @@ -286,22 +185,22 @@ impl CorpusManager { /// in-memory corpus. pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { // Early return if corpus dir / coverage guided fuzzing is not configured. - let Some(corpus_dir) = &self.config.corpus_dir else { + let Some(corpus_dir) = &self.corpus.config.corpus_dir else { return; }; + let mut in_mem_write = self.corpus.in_memory_corpus.write(); + let mut metrics_write = self.corpus.metrics.write(); // Update stats of current mutated primary corpus. if let Some(uuid) = &self.current_mutated { - if let Some(corpus) = - self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) - { + if let Some(corpus) = in_mem_write.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) { corpus.total_mutations += 1; if new_coverage { corpus.new_finds_produced += 1 } let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) < FAVORABILITY_THRESHOLD; - self.metrics.update_favored(is_favored, corpus.is_favored); + metrics_write.update_favored(is_favored, corpus.is_favored); corpus.is_favored = is_favored; trace!( @@ -323,7 +222,7 @@ impl CorpusManager { let corpus_uuid = corpus.uuid; // Persist to disk if corpus dir is configured. - let write_result = if self.config.corpus_gzip { + let write_result = if self.corpus.config.corpus_gzip { foundry_common::fs::write_json_gzip_file( corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), &corpus.tx_seq, @@ -344,11 +243,10 @@ impl CorpusManager { &corpus.tx_seq.len() ); } - // This includes reverting txs in the corpus and `can_continue` removes // them. We want this as it is new coverage and may help reach the other branch. - self.metrics.corpus_count += 1; - self.in_memory_corpus.push(corpus); + metrics_write.corpus_count += 1; + in_mem_write.push(corpus); } /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than @@ -363,12 +261,13 @@ impl CorpusManager { // Early return with first_input only if corpus dir / coverage guided fuzzing not // configured. - if !self.config.is_coverage_guided() { + if !self.corpus.config.is_coverage_guided() { new_seq.push(self.new_tx(test_runner)?); return Ok(new_seq); }; - if !self.in_memory_corpus.is_empty() { + let in_mem_read = self.corpus.in_memory_corpus.read(); + if !in_mem_read.is_empty() { self.evict_oldest_corpus()?; let mutation_type = self @@ -377,9 +276,9 @@ impl CorpusManager { .map_err(|err| eyre!("Could not generate mutation type {err}"))? .current(); let rng = test_runner.rng(); - let corpus_len = self.in_memory_corpus.len(); - let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; - let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; + let corpus_len = in_mem_read.len(); + let primary = &in_mem_read[rng.random_range(0..corpus_len)]; + let secondary = &in_mem_read[rng.random_range(0..corpus_len)]; match mutation_type { MutationType::Splice => { @@ -478,8 +377,8 @@ impl CorpusManager { Ok(new_seq) } - /// Generates new input from in memory corpus. Evicts oldest corpus mutated more than - /// configured max mutations value. Used by fuzz test campaigns. + /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more + /// than configured max mutations value. Used by fuzz test campaigns. pub fn new_input( &mut self, test_runner: &mut TestRunner, @@ -487,15 +386,15 @@ impl CorpusManager { function: &Function, ) -> eyre::Result { // Early return if not running with coverage guided fuzzing. - if !self.config.is_coverage_guided() { + if !self.corpus.config.is_coverage_guided() { return Ok(self.new_tx(test_runner)?.call_details.calldata); } - let tx = if !self.in_memory_corpus.is_empty() { - self.evict_oldest_corpus()?; + self.evict_oldest_corpus()?; - let corpus = &self.in_memory_corpus - [test_runner.rng().random_range(0..self.in_memory_corpus.len())]; + let in_mem_read = self.corpus.in_memory_corpus.read(); + let tx = if !in_mem_read.is_empty() { + let corpus = &in_mem_read[test_runner.rng().random_range(0..in_mem_read.len())]; self.current_mutated = Some(corpus.uuid); let new_seq = corpus.tx_seq.clone(); let mut tx = new_seq.first().unwrap().clone(); @@ -508,6 +407,15 @@ impl CorpusManager { Ok(tx.call_details.calldata) } + /// Generates single call from corpus strategy. + pub fn new_tx(&self, test_runner: &mut TestRunner) -> eyre::Result { + Ok(self + .tx_generator + .new_tree(test_runner) + .map_err(|_| eyre!("Could not generate case"))? + .current()) + } + /// Returns the next call to be used in call sequence. /// If coverage guided fuzzing is not configured or if previous input was discarded then this is /// a new tx from strategy. @@ -523,7 +431,7 @@ impl CorpusManager { ) -> eyre::Result { // Early return with new input if corpus dir / coverage guided fuzzing not configured or if // call was discarded. - if self.config.corpus_dir.is_none() || discarded { + if self.corpus.config.corpus_dir.is_none() || discarded { return self.new_tx(test_runner); } @@ -537,42 +445,17 @@ impl CorpusManager { Ok(sequence[depth].clone()) } - /// Generates single call from corpus strategy. - pub fn new_tx(&mut self, test_runner: &mut TestRunner) -> eyre::Result { - Ok(self - .tx_generator - .new_tree(test_runner) - .map_err(|_| eyre!("Could not generate case"))? - .current()) - } - - /// Returns campaign failed replays. - pub fn failed_replays(self) -> usize { - self.failed_replays - } - - /// Collects coverage from call result and updates metrics. - pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { - if !self.config.collect_edge_coverage() { - return false; - } - - let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut self.history_map); - if new_coverage { - self.metrics.update_seen(is_edge); - } - new_coverage - } - /// Flush the oldest corpus mutated more than configured max mutations unless they are /// favored. - fn evict_oldest_corpus(&mut self) -> eyre::Result<()> { - if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1) - && let Some(index) = self.in_memory_corpus.iter().position(|corpus| { - corpus.total_mutations > self.config.corpus_min_mutations && !corpus.is_favored + fn evict_oldest_corpus(&self) -> eyre::Result<()> { + let mut in_mem_write = self.corpus.in_memory_corpus.write(); + if in_mem_write.len() > self.corpus.config.corpus_min_size.max(1) + && let Some(index) = in_mem_write.iter().position(|corpus| { + corpus.total_mutations > self.corpus.config.corpus_min_mutations + && !corpus.is_favored }) { - let corpus = self.in_memory_corpus.get(index).unwrap(); + let corpus = in_mem_write.get(index).unwrap(); let uuid = corpus.uuid; debug!(target: "corpus", "evict corpus {uuid}"); @@ -580,7 +463,8 @@ impl CorpusManager { // Flush to disk the seed metadata at the time of eviction. let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); foundry_common::fs::write_json_file( - self.config + self.corpus + .config .corpus_dir .clone() .unwrap() @@ -590,7 +474,7 @@ impl CorpusManager { )?; // Remove corpus from memory. - self.in_memory_corpus.remove(index); + in_mem_write.remove(index); } Ok(()) } @@ -638,4 +522,145 @@ impl CorpusManager { function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); Ok(()) } + + /// Collects coverage from call result and updates metrics. + pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { + if !self.corpus.config.collect_edge_coverage() { + return false; + } + + let mut history_map_write = self.corpus.history_map.write(); + let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut history_map_write); + if new_coverage { + self.corpus.metrics.write().update_seen(is_edge); + } + new_coverage + } +} + +impl SharedCorpus { + pub fn new( + config: FuzzCorpusConfig, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result { + let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; + let mut metrics = CorpusMetrics::default(); + let mut in_memory_corpus = vec![]; + let failed_replays = AtomicUsize::new(0); + + // Early return if corpus dir / coverage guided fuzzing not configured. + let Some(corpus_dir) = &config.corpus_dir else { + return Ok(Self { + config: config.into(), + in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), + failed_replays: failed_replays.into(), + history_map: Arc::new(RwLock::new(history_map)), + metrics: Arc::new(RwLock::new(metrics)), + }); + }; + + // Ensure corpus dir for current test is created. + if !corpus_dir.is_dir() { + foundry_common::fs::create_dir_all(corpus_dir)?; + } + + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { + let path = entry?.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && name.contains(METADATA_SUFFIX) + { + // Ignore metadata files + continue; + } + + let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { + Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), + _ => foundry_common::fs::read_json_file::>(&path), + }; + + let Ok(tx_seq) = read_corpus_result else { + trace!(target: "corpus", "failed to load corpus from {}", path.display()); + continue; + }; + + if !tx_seq.is_empty() { + // Warm up history map from loaded sequences. + let mut executor = executor.clone(); + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor + .call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + ) + .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; + + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut history_map); + if new_coverage { + metrics.update_seen(is_edge); + } + + // Commit only when running invariant / stateful tests. + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + } else { + failed_replays.fetch_add(1, Ordering::Relaxed); + + // If the only input for fuzzed function cannot be replied, then move to + // next one without adding it in memory. + if fuzzed_function.is_some() { + continue 'corpus_replay; + } + } + } + + metrics.corpus_count += 1; + + trace!( + target: "corpus", + "load sequence with len {} from corpus file {}", + tx_seq.len(), + path.display() + ); + + // Populate in memory corpus with the sequence from corpus file. + + in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + } + } + + Ok(Self { + config: config.into(), + in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), + failed_replays: failed_replays.into(), + history_map: Arc::new(RwLock::new(history_map)), + metrics: Arc::new(RwLock::new(metrics)), + }) + } + + pub fn new_worker(&self, tx_generator: BoxedStrategy) -> CorpusWorker { + CorpusWorker::new(self.clone(), tx_generator) + } + + /// Returns campaign failed replays. + pub fn failed_replays(self) -> usize { + self.failed_replays.load(Ordering::Relaxed) + } } diff --git a/crates/evm/evm/src/executors/fuzz/mod.rs b/crates/evm/evm/src/executors/fuzz/mod.rs index 89ce46b480576..147483acb2374 100644 --- a/crates/evm/evm/src/executors/fuzz/mod.rs +++ b/crates/evm/evm/src/executors/fuzz/mod.rs @@ -1,6 +1,6 @@ use crate::executors::{ DURATION_BETWEEN_METRICS_REPORT, Executor, FailFast, FuzzTestTimer, RawCallResult, - shared_corpus::{CorpusWorker, SharedCorpus}, + corpus::{CorpusWorker, SharedCorpus}, }; use alloy_dyn_abi::JsonAbiExt; use alloy_json_abi::Function; diff --git a/crates/evm/evm/src/executors/invariant/mod.rs b/crates/evm/evm/src/executors/invariant/mod.rs index fdc9916998f51..cd8f6b6904b47 100644 --- a/crates/evm/evm/src/executors/invariant/mod.rs +++ b/crates/evm/evm/src/executors/invariant/mod.rs @@ -1,7 +1,7 @@ use crate::{ executors::{ Executor, RawCallResult, - shared_corpus::{CorpusWorker, SharedCorpus}, + corpus::{CorpusWorker, SharedCorpus}, }, inspectors::Fuzzer, }; diff --git a/crates/evm/evm/src/executors/mod.rs b/crates/evm/evm/src/executors/mod.rs index 1d5e8c4e1bb75..d519f1617cd5f 100644 --- a/crates/evm/evm/src/executors/mod.rs +++ b/crates/evm/evm/src/executors/mod.rs @@ -61,7 +61,6 @@ pub mod invariant; pub use invariant::InvariantExecutor; mod corpus; -mod shared_corpus; mod trace; pub use trace::TracingExecutor; diff --git a/crates/evm/evm/src/executors/shared_corpus.rs b/crates/evm/evm/src/executors/shared_corpus.rs deleted file mode 100644 index c204f852e047d..0000000000000 --- a/crates/evm/evm/src/executors/shared_corpus.rs +++ /dev/null @@ -1,567 +0,0 @@ -use crate::executors::{ - Executor, RawCallResult, - corpus::{CorpusEntry, CorpusMetrics, MutationType}, -}; -use alloy_dyn_abi::JsonAbiExt; -use alloy_json_abi::Function; -use alloy_primitives::{Bytes, U256}; -use eyre::eyre; -use foundry_config::FuzzCorpusConfig; -use foundry_evm_fuzz::{ - BasicTxDetails, - invariant::FuzzRunIdentifiedContracts, - strategies::{EvmFuzzState, mutate_param_value}, -}; -use parking_lot::RwLock; -use proptest::{ - prelude::{Just, Rng, Strategy}, - prop_oneof, - strategy::{BoxedStrategy, ValueTree}, - test_runner::TestRunner, -}; -use std::{ - sync::{ - Arc, - atomic::{AtomicUsize, Ordering}, - }, - time::{SystemTime, UNIX_EPOCH}, -}; -use uuid::Uuid; - -const METADATA_SUFFIX: &str = "metadata.json"; -const JSON_EXTENSION: &str = ".json"; -const FAVORABILITY_THRESHOLD: f64 = 0.3; -const COVERAGE_MAP_SIZE: usize = 65536; - -/// Shared corpus for coverage guided fuzzing campaigns to be used by multiple [`CorpusWorker`]'s in -/// parallel. -#[derive(Clone)] -pub(crate) struct SharedCorpus { - // Corpus configuration. - config: Arc, - /// Shared in-memory corpus, populated from the persisted files and runs across multiple - /// workers. Mutation is performed on these. - /// - /// Map of corpus [`Uuid`] to [`CorpusEntry`]. - in_memory_corpus: Arc>>, - /// Number of failed replays from persisted corpus. - failed_replays: Arc, - /// History of binned hitcount of edges seen during fuzzing - history_map: Arc>>, - /// Corpus metrics. - pub(crate) metrics: Arc>, -} - -/// Operates on the [`SharedCorpus`] for coverage guided fuzzing and generating fuzz inputs using -/// [`CorpusWorker::new_input`] for stateless tests, [`CorpusWorker::new_inputs`] for stateful -/// tests. -pub(crate) struct CorpusWorker { - /// Shared Corpus - corpus: SharedCorpus, - /// Fuzzed calls generator. - tx_generator: BoxedStrategy, - /// Call sequence mutation strategy type generator. - mutation_generator: BoxedStrategy, - /// Identifier of current mutated entry for this worker. - current_mutated: Option, -} - -impl CorpusWorker { - pub fn new(corpus: SharedCorpus, tx_generator: BoxedStrategy) -> Self { - let mutation_generator = prop_oneof![ - Just(MutationType::Splice), - Just(MutationType::Repeat), - Just(MutationType::Interleave), - Just(MutationType::Prefix), - Just(MutationType::Suffix), - Just(MutationType::Abi), - ] - .boxed(); - - Self { corpus, tx_generator, mutation_generator, current_mutated: None } - } - - /// Updates stats for the given call sequence, if new coverage produced. - /// Persists the call sequence (if corpus directory is configured and new coverage) and updates - /// in-memory corpus. - pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { - // Early return if corpus dir / coverage guided fuzzing is not configured. - let Some(corpus_dir) = &self.corpus.config.corpus_dir else { - return; - }; - - let mut in_mem_write = self.corpus.in_memory_corpus.write(); - let mut metrics_write = self.corpus.metrics.write(); - // Update stats of current mutated primary corpus. - if let Some(uuid) = &self.current_mutated { - if let Some(corpus) = in_mem_write.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) { - corpus.total_mutations += 1; - if new_coverage { - corpus.new_finds_produced += 1 - } - let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) - < FAVORABILITY_THRESHOLD; - metrics_write.update_favored(is_favored, corpus.is_favored); - corpus.is_favored = is_favored; - - trace!( - target: "corpus", - "updated corpus {}, total mutations: {}, new finds: {}", - corpus.uuid, corpus.total_mutations, corpus.new_finds_produced - ); - } - - self.current_mutated = None; - } - - // Collect inputs only if current run produced new coverage. - if !new_coverage { - return; - } - - let corpus = CorpusEntry::from_tx_seq(inputs); - let corpus_uuid = corpus.uuid; - - // Persist to disk if corpus dir is configured. - let write_result = if self.corpus.config.corpus_gzip { - foundry_common::fs::write_json_gzip_file( - corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), - &corpus.tx_seq, - ) - } else { - foundry_common::fs::write_json_file( - corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(), - &corpus.tx_seq, - ) - }; - - if let Err(err) = write_result { - debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq); - } else { - trace!( - target: "corpus", - "persisted {} inputs for new coverage in {corpus_uuid} corpus", - &corpus.tx_seq.len() - ); - } - // This includes reverting txs in the corpus and `can_continue` removes - // them. We want this as it is new coverage and may help reach the other branch. - metrics_write.corpus_count += 1; - in_mem_write.push(corpus); - } - - /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than - /// configured max mutations value. Used by invariant test campaigns. - pub fn new_inputs( - &mut self, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - targeted_contracts: &FuzzRunIdentifiedContracts, - ) -> eyre::Result> { - let mut new_seq = vec![]; - - // Early return with first_input only if corpus dir / coverage guided fuzzing not - // configured. - if !self.corpus.config.is_coverage_guided() { - new_seq.push(self.new_tx(test_runner)?); - return Ok(new_seq); - }; - - let in_mem_read = self.corpus.in_memory_corpus.read(); - if !in_mem_read.is_empty() { - self.evict_oldest_corpus()?; - - let mutation_type = self - .mutation_generator - .new_tree(test_runner) - .map_err(|err| eyre!("Could not generate mutation type {err}"))? - .current(); - let rng = test_runner.rng(); - let corpus_len = in_mem_read.len(); - let primary = &in_mem_read[rng.random_range(0..corpus_len)]; - let secondary = &in_mem_read[rng.random_range(0..corpus_len)]; - - match mutation_type { - MutationType::Splice => { - trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid); - - self.current_mutated = Some(primary.uuid); - - let start1 = rng.random_range(0..primary.tx_seq.len()); - let end1 = rng.random_range(start1..primary.tx_seq.len()); - - let start2 = rng.random_range(0..secondary.tx_seq.len()); - let end2 = rng.random_range(start2..secondary.tx_seq.len()); - - for tx in primary.tx_seq.iter().take(end1).skip(start1) { - new_seq.push(tx.clone()); - } - for tx in secondary.tx_seq.iter().take(end2).skip(start2) { - new_seq.push(tx.clone()); - } - } - MutationType::Repeat => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "repeat {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - let start = rng.random_range(0..corpus.tx_seq.len()); - let end = rng.random_range(start..corpus.tx_seq.len()); - let item_idx = rng.random_range(0..corpus.tx_seq.len()); - let repeated = vec![new_seq[item_idx].clone(); end - start]; - new_seq.splice(start..end, repeated); - } - MutationType::Interleave => { - trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid); - - self.current_mutated = Some(primary.uuid); - - for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) { - // chunks? - let tx = if rng.random::() { tx1.clone() } else { tx2.clone() }; - new_seq.push(tx); - } - } - MutationType::Prefix => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - for i in 0..rng.random_range(0..=new_seq.len()) { - new_seq[i] = self.new_tx(test_runner)?; - } - } - MutationType::Suffix => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len() - { - new_seq[i] = self.new_tx(test_runner)?; - } - } - MutationType::Abi => { - let targets = targeted_contracts.targets.lock(); - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - - let idx = rng.random_range(0..new_seq.len()); - let tx = new_seq.get_mut(idx).unwrap(); - if let (_, Some(function)) = targets.fuzzed_artifacts(tx) { - // TODO add call_value to call details and mutate it as well as sender some - // of the time - if !function.inputs.is_empty() { - self.abi_mutate(tx, function, test_runner, fuzz_state)?; - } - } - } - } - } - - // Make sure the new sequence contains at least one tx to start fuzzing from. - if new_seq.is_empty() { - new_seq.push(self.new_tx(test_runner)?); - } - trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len()); - - Ok(new_seq) - } - - /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more - /// than configured max mutations value. Used by fuzz test campaigns. - pub fn new_input( - &mut self, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - function: &Function, - ) -> eyre::Result { - // Early return if not running with coverage guided fuzzing. - if !self.corpus.config.is_coverage_guided() { - return Ok(self.new_tx(test_runner)?.call_details.calldata); - } - - self.evict_oldest_corpus()?; - - let in_mem_read = self.corpus.in_memory_corpus.read(); - let tx = if !in_mem_read.is_empty() { - let corpus = &in_mem_read[test_runner.rng().random_range(0..in_mem_read.len())]; - self.current_mutated = Some(corpus.uuid); - let new_seq = corpus.tx_seq.clone(); - let mut tx = new_seq.first().unwrap().clone(); - self.abi_mutate(&mut tx, function, test_runner, fuzz_state)?; - tx - } else { - self.new_tx(test_runner)? - }; - - Ok(tx.call_details.calldata) - } - - /// Generates single call from corpus strategy. - pub fn new_tx(&self, test_runner: &mut TestRunner) -> eyre::Result { - Ok(self - .tx_generator - .new_tree(test_runner) - .map_err(|_| eyre!("Could not generate case"))? - .current()) - } - - /// Returns the next call to be used in call sequence. - /// If coverage guided fuzzing is not configured or if previous input was discarded then this is - /// a new tx from strategy. - /// If running with coverage guided fuzzing it returns a new call only when sequence - /// does not have enough entries, or randomly. Otherwise, returns the next call from initial - /// sequence. - pub fn generate_next_input( - &mut self, - test_runner: &mut TestRunner, - sequence: &[BasicTxDetails], - discarded: bool, - depth: usize, - ) -> eyre::Result { - // Early return with new input if corpus dir / coverage guided fuzzing not configured or if - // call was discarded. - if self.corpus.config.corpus_dir.is_none() || discarded { - return self.new_tx(test_runner); - } - - // When running with coverage guided fuzzing enabled then generate new sequence if initial - // sequence's length is less than depth or randomly, to occasionally intermix new txs. - if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) { - return self.new_tx(test_runner); - } - - // Continue with the next call initial sequence - Ok(sequence[depth].clone()) - } - - /// Flush the oldest corpus mutated more than configured max mutations unless they are - /// favored. - fn evict_oldest_corpus(&self) -> eyre::Result<()> { - let mut in_mem_write = self.corpus.in_memory_corpus.write(); - if in_mem_write.len() > self.corpus.config.corpus_min_size.max(1) - && let Some(index) = in_mem_write.iter().position(|corpus| { - corpus.total_mutations > self.corpus.config.corpus_min_mutations - && !corpus.is_favored - }) - { - let corpus = in_mem_write.get(index).unwrap(); - - let uuid = corpus.uuid; - debug!(target: "corpus", "evict corpus {uuid}"); - - // Flush to disk the seed metadata at the time of eviction. - let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); - foundry_common::fs::write_json_file( - self.corpus - .config - .corpus_dir - .clone() - .unwrap() - .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) - .as_path(), - &corpus, - )?; - - // Remove corpus from memory. - in_mem_write.remove(index); - } - Ok(()) - } - - /// Mutates calldata of provided tx by abi decoding current values and randomly selecting the - /// inputs to change. - fn abi_mutate( - &self, - tx: &mut BasicTxDetails, - function: &Function, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - ) -> eyre::Result<()> { - // let rng = test_runner.rng(); - let mut arg_mutation_rounds = - test_runner.rng().random_range(0..=function.inputs.len()).max(1); - let round_arg_idx: Vec = if function.inputs.len() <= 1 { - vec![0] - } else { - (0..arg_mutation_rounds) - .map(|_| test_runner.rng().random_range(0..function.inputs.len())) - .collect() - }; - let mut prev_inputs = function - .abi_decode_input(&tx.call_details.calldata[4..]) - .map_err(|err| eyre!("failed to load previous inputs: {err}"))?; - - while arg_mutation_rounds > 0 { - let idx = round_arg_idx[arg_mutation_rounds - 1]; - prev_inputs[idx] = mutate_param_value( - &function - .inputs - .get(idx) - .expect("Could not get input to mutate") - .selector_type() - .parse()?, - prev_inputs[idx].clone(), - test_runner, - fuzz_state, - ); - arg_mutation_rounds -= 1; - } - - tx.call_details.calldata = - function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); - Ok(()) - } - - /// Collects coverage from call result and updates metrics. - pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { - if !self.corpus.config.collect_edge_coverage() { - return false; - } - - let mut history_map_write = self.corpus.history_map.write(); - let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut history_map_write); - if new_coverage { - self.corpus.metrics.write().update_seen(is_edge); - } - new_coverage - } -} - -impl SharedCorpus { - pub fn new( - config: FuzzCorpusConfig, - executor: &Executor, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result { - let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; - let mut metrics = CorpusMetrics::default(); - let mut in_memory_corpus = vec![]; - let failed_replays = AtomicUsize::new(0); - - // Early return if corpus dir / coverage guided fuzzing not configured. - let Some(corpus_dir) = &config.corpus_dir else { - return Ok(Self { - config: config.into(), - in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), - failed_replays: failed_replays.into(), - history_map: Arc::new(RwLock::new(history_map)), - metrics: Arc::new(RwLock::new(metrics)), - }); - }; - - // Ensure corpus dir for current test is created. - if !corpus_dir.is_dir() { - foundry_common::fs::create_dir_all(corpus_dir)?; - } - - let can_replay_tx = |tx: &BasicTxDetails| -> bool { - fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) - || fuzzed_function.is_some_and(|function| { - tx.call_details - .calldata - .get(..4) - .is_some_and(|selector| function.selector() == selector) - }) - }; - - 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { - let path = entry?.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && name.contains(METADATA_SUFFIX) - { - // Ignore metadata files - continue; - } - - let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { - Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), - _ => foundry_common::fs::read_json_file::>(&path), - }; - - let Ok(tx_seq) = read_corpus_result else { - trace!(target: "corpus", "failed to load corpus from {}", path.display()); - continue; - }; - - if !tx_seq.is_empty() { - // Warm up history map from loaded sequences. - let mut executor = executor.clone(); - for tx in &tx_seq { - if can_replay_tx(tx) { - let mut call_result = executor - .call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - ) - .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; - - let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut history_map); - if new_coverage { - metrics.update_seen(is_edge); - } - - // Commit only when running invariant / stateful tests. - if fuzzed_contracts.is_some() { - executor.commit(&mut call_result); - } - } else { - failed_replays.fetch_add(1, Ordering::Relaxed); - - // If the only input for fuzzed function cannot be replied, then move to - // next one without adding it in memory. - if fuzzed_function.is_some() { - continue 'corpus_replay; - } - } - } - - metrics.corpus_count += 1; - - trace!( - target: "corpus", - "load sequence with len {} from corpus file {}", - tx_seq.len(), - path.display() - ); - - // Populate in memory corpus with the sequence from corpus file. - - in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); - } - } - - Ok(Self { - config: config.into(), - in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), - failed_replays: failed_replays.into(), - history_map: Arc::new(RwLock::new(history_map)), - metrics: Arc::new(RwLock::new(metrics)), - }) - } - - pub fn new_worker(&self, tx_generator: BoxedStrategy) -> CorpusWorker { - CorpusWorker::new(self.clone(), tx_generator) - } - - /// Returns campaign failed replays. - pub fn failed_replays(self) -> usize { - self.failed_replays.load(Ordering::Relaxed) - } -} From 41944cdd2208b7ec73b1e63f4855166c8cb4344e Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:35:06 +0530 Subject: [PATCH 04/16] feat: Master-Worker corpus basic setup --- crates/evm/evm/src/executors/corpus.rs | 24 +- crates/evm/evm/src/executors/fuzz/mod.rs | 16 +- crates/evm/evm/src/executors/mod.rs | 1 + crates/evm/evm/src/executors/worker_corpus.rs | 594 ++++++++++++++++++ 4 files changed, 615 insertions(+), 20 deletions(-) create mode 100644 crates/evm/evm/src/executors/worker_corpus.rs diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 32c4ec3a45554..56f0e29f8ec86 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -35,7 +35,7 @@ const COVERAGE_MAP_SIZE: usize = 65536; /// Possible mutation strategies to apply on a call sequence. #[derive(Debug, Clone)] -enum MutationType { +pub(crate) enum MutationType { /// Splice original call sequence. Splice, /// Repeat selected call several times. @@ -51,20 +51,20 @@ enum MutationType { } /// Holds Corpus information. -#[derive(Serialize)] -struct CorpusEntry { +#[derive(Clone, Serialize)] +pub(crate) struct CorpusEntry { // Unique corpus identifier. - uuid: Uuid, + pub(crate) uuid: Uuid, // Total mutations of corpus as primary source. - total_mutations: usize, + pub(crate) total_mutations: usize, // New coverage found as a result of mutating this corpus. - new_finds_produced: usize, + pub(crate) new_finds_produced: usize, // Corpus call sequence. #[serde(skip_serializing)] - tx_seq: Vec, + pub(crate) tx_seq: Vec, // Whether this corpus is favored, i.e. producing new finds more often than // `FAVORABILITY_THRESHOLD`. - is_favored: bool, + pub(crate) is_favored: bool, } impl CorpusEntry { @@ -93,13 +93,13 @@ impl CorpusEntry { #[derive(Serialize, Default)] pub(crate) struct CorpusMetrics { // Number of edges seen during the invariant run. - cumulative_edges_seen: usize, + pub(crate) cumulative_edges_seen: usize, // Number of features (new hitcount bin of previously hit edge) seen during the invariant run. - cumulative_features_seen: usize, + pub(crate) cumulative_features_seen: usize, // Number of corpus entries. - corpus_count: usize, + pub(crate) corpus_count: usize, // Number of corpus entries that are favored. - favored_items: usize, + pub(crate) favored_items: usize, } impl fmt::Display for CorpusMetrics { diff --git a/crates/evm/evm/src/executors/fuzz/mod.rs b/crates/evm/evm/src/executors/fuzz/mod.rs index 147483acb2374..5854777246d5c 100644 --- a/crates/evm/evm/src/executors/fuzz/mod.rs +++ b/crates/evm/evm/src/executors/fuzz/mod.rs @@ -1,6 +1,7 @@ use crate::executors::{ DURATION_BETWEEN_METRICS_REPORT, Executor, FailFast, FuzzTestTimer, RawCallResult, corpus::{CorpusWorker, SharedCorpus}, + worker_corpus::{MasterCorpus, WorkerCorpus}, }; use alloy_dyn_abi::JsonAbiExt; use alloy_json_abi::Function; @@ -118,10 +119,10 @@ impl FuzzedExecutor { // We want to collect at least one trace which will be displayed to user. let max_traces_to_collect = std::cmp::max(1, self.config.gas_report_samples) as usize; - let shared_corpus = - SharedCorpus::new(self.config.corpus.clone(), &self.executor, Some(func), None)?; + let master_corpus = + MasterCorpus::new(self.config.corpus.clone(), &self.executor, Some(func), None)?; - let mut corpus_manager = shared_corpus.new_worker(strategy.boxed()); + let mut corpus_manager = WorkerCorpus::new(0, &master_corpus, strategy.boxed())?; // Start timer for this fuzz test. let timer = FuzzTestTimer::new(self.config.timeout); @@ -141,12 +142,11 @@ impl FuzzedExecutor { failure.calldata } else { // If running with progress, then increment current run. - let metrics_read = shared_corpus.metrics.read(); if let Some(progress) = progress { progress.inc(1); // Display metrics in progress bar. if self.config.corpus.collect_edge_coverage() { - progress.set_message(format!("{}", &metrics_read)); + progress.set_message(format!("{}", &corpus_manager.metrics)); } } else if self.config.corpus.collect_edge_coverage() && last_metrics_report.elapsed() > DURATION_BETWEEN_METRICS_REPORT @@ -157,7 +157,7 @@ impl FuzzedExecutor { .duration_since(UNIX_EPOCH)? .as_secs(), "test": func.name, - "metrics": &*metrics_read, + "metrics": &corpus_manager.metrics, }); let _ = sh_println!("{}", serde_json::to_string(&metrics)?); last_metrics_report = Instant::now(); @@ -255,7 +255,7 @@ impl FuzzedExecutor { gas_report_traces: traces.into_iter().map(|a| a.arena).collect(), line_coverage: test_data.coverage, deprecated_cheatcodes: test_data.deprecated_cheatcodes, - failed_corpus_replays: shared_corpus.failed_replays(), + failed_corpus_replays: master_corpus.failed_replays, }; match test_data.failure { @@ -296,7 +296,7 @@ impl FuzzedExecutor { &mut self, address: Address, calldata: Bytes, - coverage_metrics: &mut CorpusWorker, + coverage_metrics: &mut WorkerCorpus, ) -> Result { let mut call = self .executor diff --git a/crates/evm/evm/src/executors/mod.rs b/crates/evm/evm/src/executors/mod.rs index d519f1617cd5f..43952c4e1cd9f 100644 --- a/crates/evm/evm/src/executors/mod.rs +++ b/crates/evm/evm/src/executors/mod.rs @@ -62,6 +62,7 @@ pub use invariant::InvariantExecutor; mod corpus; mod trace; +mod worker_corpus; pub use trace::TracingExecutor; diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs new file mode 100644 index 0000000000000..79c8046cb4348 --- /dev/null +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -0,0 +1,594 @@ +use std::{ + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, +}; + +use alloy_dyn_abi::JsonAbiExt; +use alloy_json_abi::Function; +use alloy_primitives::{Bytes, U256}; +use eyre::eyre; +use foundry_config::FuzzCorpusConfig; +use foundry_evm_fuzz::{ + BasicTxDetails, + invariant::FuzzRunIdentifiedContracts, + strategies::{EvmFuzzState, mutate_param_value}, +}; +use proptest::{ + prelude::{BoxedStrategy, Just, Rng, Strategy}, + prop_oneof, + test_runner::TestRunner, +}; +use uuid::Uuid; + +use crate::executors::{ + Executor, RawCallResult, + corpus::{CorpusEntry, CorpusMetrics, MutationType}, +}; + +const METADATA_SUFFIX: &str = "metadata.json"; +const JSON_EXTENSION: &str = ".json"; +const FAVORABILITY_THRESHOLD: f64 = 0.3; +const COVERAGE_MAP_SIZE: usize = 65536; +const WORKER: &str = "worker"; + +/// Per-worker corpus manager. +pub struct WorkerCorpus { + /// Worker Id + id: u32, + /// In-memory corpus entries populated from the persisted files and + /// runs administered by this worker. + in_memory_corpus: Vec, + /// History of binned hitcount of edges seen during fuzzing + history_map: Vec, + /// Worker Metrics + pub(crate) metrics: CorpusMetrics, + /// Fuzzed calls generator. + tx_generator: BoxedStrategy, + /// Call sequence mutation strategy type generator used by stateful fuzzing. + mutation_generator: BoxedStrategy, + /// Identifier of current mutated entry for this worker. + current_mutated: Option, + /// Config + config: Arc, + /// Indices of new entries added to [`WorkerCorpus::in_memory_corpus`] since last sync. + new_entry_indices: Vec, +} + +impl WorkerCorpus { + pub fn new( + id: u32, + master: &MasterCorpus, + tx_generator: BoxedStrategy, + ) -> eyre::Result { + let config = master.config.clone(); + let mutation_generator = prop_oneof![ + Just(MutationType::Splice), + Just(MutationType::Repeat), + Just(MutationType::Interleave), + Just(MutationType::Prefix), + Just(MutationType::Suffix), + Just(MutationType::Abi), + ] + .boxed(); + + if let Some(corpus_dir) = &config.corpus_dir { + let worker_dir = corpus_dir.join(format!("{WORKER}{id}")); + + if !worker_dir.is_dir() { + foundry_common::fs::create_dir_all(worker_dir)?; + } + } + + Ok(Self { + id, + in_memory_corpus: master.in_memory_corpus.clone(), + // TODO: This clones the history_map with size COVERAGE_MAP_SIZE + // history_map size per worker should be dependent on the total number of workers? + history_map: master.history_map.clone(), + metrics: Default::default(), + tx_generator, + mutation_generator, + current_mutated: None, + config, + new_entry_indices: Default::default(), + }) + } + + /// Updates stats for the given call sequence, if new coverage produced. + /// Persists the call sequence (if corpus directory is configured and new coverage) and updates + /// in-memory corpus. + pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { + // Early return if corpus dir / coverage guided fuzzing is not configured. + let worker_dir = if let Some(corpus_dir) = &self.config.corpus_dir { + corpus_dir.join(format!("{WORKER}{}", self.id)) + } else { + return; + }; + + // Update stats of current mutated primary corpus. + if let Some(uuid) = &self.current_mutated { + if let Some(corpus) = + self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) + { + corpus.total_mutations += 1; + if new_coverage { + corpus.new_finds_produced += 1 + } + let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) + < FAVORABILITY_THRESHOLD; + self.metrics.update_favored(is_favored, corpus.is_favored); + corpus.is_favored = is_favored; + + trace!( + target: "corpus", + "updated worker {} corpus {}, total mutations: {}, new finds: {}", + self.id, corpus.uuid, corpus.total_mutations, corpus.new_finds_produced + ); + } + + self.current_mutated = None; + } + + // Collect inputs only if current run produced new coverage. + if !new_coverage { + return; + } + + let corpus = CorpusEntry::from_tx_seq(inputs); + let corpus_uuid = corpus.uuid; + + // Persist to disk if corpus dir is configured. + let write_result = if self.config.corpus_gzip { + foundry_common::fs::write_json_gzip_file( + worker_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), + &corpus.tx_seq, + ) + } else { + foundry_common::fs::write_json_file( + worker_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(), + &corpus.tx_seq, + ) + }; + + if let Err(err) = write_result { + debug!(target: "corpus", %err, "Failed to record call sequence {:?} in worker {}", &corpus.tx_seq, self.id); + } else { + trace!( + target: "corpus", + "persisted {} inputs for new coverage in worker {} for {corpus_uuid} corpus", + self.id, &corpus.tx_seq.len() + ); + } + + // Track in-memory corpus changes to update MasterWorker on sync + let new_index = self.in_memory_corpus.len(); + self.new_entry_indices.push(new_index); + + // This includes reverting txs in the corpus and `can_continue` removes + // them. We want this as it is new coverage and may help reach the other branch. + self.metrics.corpus_count += 1; + self.in_memory_corpus.push(corpus); + } + + /// Collects coverage from call result and updates metrics. + pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { + if !self.config.collect_edge_coverage() { + return false; + } + + let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut self.history_map); + if new_coverage { + self.metrics.update_seen(is_edge); + } + new_coverage + } + + /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than + /// configured max mutations value. Used by invariant test campaigns. + pub fn new_inputs( + &mut self, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + targeted_contracts: &FuzzRunIdentifiedContracts, + ) -> eyre::Result> { + let mut new_seq = vec![]; + + // Early return with first_input only if corpus dir / coverage guided fuzzing not + // configured. + if !self.config.is_coverage_guided() { + new_seq.push(self.new_tx(test_runner)?); + return Ok(new_seq); + }; + + if !self.in_memory_corpus.is_empty() { + self.evict_oldest_corpus()?; + + let mutation_type = self + .mutation_generator + .new_tree(test_runner) + .map_err(|err| eyre!("Could not generate mutation type {err}"))? + .current(); + + let rng = test_runner.rng(); + let corpus_len = self.in_memory_corpus.len(); + let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; + let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; + + match mutation_type { + MutationType::Splice => { + trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid); + + self.current_mutated = Some(primary.uuid); + + let start1 = rng.random_range(0..primary.tx_seq.len()); + let end1 = rng.random_range(start1..primary.tx_seq.len()); + + let start2 = rng.random_range(0..secondary.tx_seq.len()); + let end2 = rng.random_range(start2..secondary.tx_seq.len()); + + for tx in primary.tx_seq.iter().take(end1).skip(start1) { + new_seq.push(tx.clone()); + } + for tx in secondary.tx_seq.iter().take(end2).skip(start2) { + new_seq.push(tx.clone()); + } + } + MutationType::Repeat => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "repeat {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + let start = rng.random_range(0..corpus.tx_seq.len()); + let end = rng.random_range(start..corpus.tx_seq.len()); + let item_idx = rng.random_range(0..corpus.tx_seq.len()); + let repeated = vec![new_seq[item_idx].clone(); end - start]; + new_seq.splice(start..end, repeated); + } + MutationType::Interleave => { + trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid); + + self.current_mutated = Some(primary.uuid); + + for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) { + // chunks? + let tx = if rng.random::() { tx1.clone() } else { tx2.clone() }; + new_seq.push(tx); + } + } + MutationType::Prefix => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + for i in 0..rng.random_range(0..=new_seq.len()) { + new_seq[i] = self.new_tx(test_runner)?; + } + } + MutationType::Suffix => { + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len() + { + new_seq[i] = self.new_tx(test_runner)?; + } + } + MutationType::Abi => { + let targets = targeted_contracts.targets.lock(); + let corpus = if rng.random::() { primary } else { secondary }; + trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid); + + self.current_mutated = Some(corpus.uuid); + + new_seq = corpus.tx_seq.clone(); + + let idx = rng.random_range(0..new_seq.len()); + let tx = new_seq.get_mut(idx).unwrap(); + if let (_, Some(function)) = targets.fuzzed_artifacts(tx) { + // TODO add call_value to call details and mutate it as well as sender some + // of the time + if !function.inputs.is_empty() { + self.abi_mutate(tx, function, test_runner, fuzz_state)?; + } + } + } + } + } + + // Make sure the new sequence contains at least one tx to start fuzzing from. + if new_seq.is_empty() { + new_seq.push(self.new_tx(test_runner)?); + } + trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len()); + + Ok(new_seq) + } + + /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more + /// than configured max mutations value. Used by fuzz (stateless) test campaigns. + pub fn new_input( + &mut self, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + function: &Function, + ) -> eyre::Result { + // Early return if not running with coverage guided fuzzing. + if !self.config.is_coverage_guided() { + return Ok(self.new_tx(test_runner)?.call_details.calldata); + } + + self.evict_oldest_corpus()?; + + let tx = if !self.in_memory_corpus.is_empty() { + let corpus = &self.in_memory_corpus + [test_runner.rng().random_range(0..self.in_memory_corpus.len())]; + self.current_mutated = Some(corpus.uuid); + let new_seq = corpus.tx_seq.clone(); + let mut tx = new_seq.first().unwrap().clone(); + self.abi_mutate(&mut tx, function, test_runner, fuzz_state)?; + tx + } else { + self.new_tx(test_runner)? + }; + + Ok(tx.call_details.calldata) + } + + /// Generates single call from corpus strategy. + pub fn new_tx(&self, test_runner: &mut TestRunner) -> eyre::Result { + Ok(self + .tx_generator + .new_tree(test_runner) + .map_err(|_| eyre!("Could not generate case"))? + .current()) + } + + /// Returns the next call to be used in call sequence. + /// If coverage guided fuzzing is not configured or if previous input was discarded then this is + /// a new tx from strategy. + /// If running with coverage guided fuzzing it returns a new call only when sequence + /// does not have enough entries, or randomly. Otherwise, returns the next call from initial + /// sequence. + pub fn generate_next_input( + &mut self, + test_runner: &mut TestRunner, + sequence: &[BasicTxDetails], + discarded: bool, + depth: usize, + ) -> eyre::Result { + // Early return with new input if corpus dir / coverage guided fuzzing not configured or if + // call was discarded. + if self.config.corpus_dir.is_none() || discarded { + return self.new_tx(test_runner); + } + + // When running with coverage guided fuzzing enabled then generate new sequence if initial + // sequence's length is less than depth or randomly, to occasionally intermix new txs. + if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) { + return self.new_tx(test_runner); + } + + // Continue with the next call initial sequence + Ok(sequence[depth].clone()) + } + + /// Flush the oldest corpus mutated more than configured max mutations unless they are + /// favored. + fn evict_oldest_corpus(&mut self) -> eyre::Result<()> { + if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1) + && let Some(index) = self.in_memory_corpus.iter().position(|corpus| { + corpus.total_mutations > self.config.corpus_min_mutations && !corpus.is_favored + }) + { + let corpus = self.in_memory_corpus.get(index).unwrap(); + + let uuid = corpus.uuid; + debug!(target: "corpus", "evict corpus {uuid} in worker {}", self.id); + + // Flush to disk the seed metadata at the time of eviction. + let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); + foundry_common::fs::write_json_file( + self.config + .corpus_dir + .clone() + .unwrap() + .join(format!("{WORKER}{}", self.id)) // Worker dir + .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) + .as_path(), + &corpus, + )?; + + // Remove corpus from memory. + self.in_memory_corpus.remove(index); + + // Adjust the tracked indices + self.new_entry_indices.retain_mut(|i| { + if *i > index { + *i -= 1; // Shift indices down + true // Keep this index + } else { + *i != index // Remove if it's the deleted index, keep otherwise + } + }); + } + Ok(()) + } + + /// Mutates calldata of provided tx by abi decoding current values and randomly selecting the + /// inputs to change. + fn abi_mutate( + &self, + tx: &mut BasicTxDetails, + function: &Function, + test_runner: &mut TestRunner, + fuzz_state: &EvmFuzzState, + ) -> eyre::Result<()> { + // let rng = test_runner.rng(); + let mut arg_mutation_rounds = + test_runner.rng().random_range(0..=function.inputs.len()).max(1); + let round_arg_idx: Vec = if function.inputs.len() <= 1 { + vec![0] + } else { + (0..arg_mutation_rounds) + .map(|_| test_runner.rng().random_range(0..function.inputs.len())) + .collect() + }; + let mut prev_inputs = function + .abi_decode_input(&tx.call_details.calldata[4..]) + .map_err(|err| eyre!("failed to load previous inputs: {err}"))?; + + while arg_mutation_rounds > 0 { + let idx = round_arg_idx[arg_mutation_rounds - 1]; + prev_inputs[idx] = mutate_param_value( + &function + .inputs + .get(idx) + .expect("Could not get input to mutate") + .selector_type() + .parse()?, + prev_inputs[idx].clone(), + test_runner, + fuzz_state, + ); + arg_mutation_rounds -= 1; + } + + tx.call_details.calldata = + function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); + Ok(()) + } +} + +/// Global corpus across workers to share coverage updates +pub struct MasterCorpus { + /// Config + config: Arc, + /// In-memory corpus entries populated from the persisted files. This is global corpus entry + /// across workers. + in_memory_corpus: Vec, + /// Number of failed replays from the persisted files. + pub(crate) failed_replays: usize, + /// History of binned hitcount of edges seen during fuzzing + history_map: Vec, + /// Master Metrics + metrics: CorpusMetrics, +} + +impl MasterCorpus { + pub fn new( + config: FuzzCorpusConfig, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result { + let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; + let mut metrics = CorpusMetrics::default(); + let mut in_memory_corpus = vec![]; + let mut failed_replays = 0; + + // Early return if corpus dir / coverage guided fuzzing not configured. + let Some(corpus_dir) = &config.corpus_dir else { + return Ok(Self { + config: config.into(), + in_memory_corpus, + failed_replays, + history_map, + metrics, + }); + }; + + // Ensure corpus dir for current test is created. + if !corpus_dir.is_dir() { + foundry_common::fs::create_dir_all(corpus_dir)?; + } + + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { + let path = entry?.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && name.contains(METADATA_SUFFIX) + { + // Ignore metadata files + continue; + } + + let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { + Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), + _ => foundry_common::fs::read_json_file::>(&path), + }; + + let Ok(tx_seq) = read_corpus_result else { + trace!(target: "corpus", "failed to load corpus from {}", path.display()); + continue; + }; + + if !tx_seq.is_empty() { + // Warm up history map from loaded sequences. + let mut executor = executor.clone(); + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor + .call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + ) + .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; + + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut history_map); + if new_coverage { + metrics.update_seen(is_edge); + } + + // Commit only when running invariant / stateful tests. + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + } else { + failed_replays += 1; + + // If the only input for fuzzed function cannot be replied, then move to + // next one without adding it in memory. + if fuzzed_function.is_some() { + continue 'corpus_replay; + } + } + } + + metrics.corpus_count += 1; + + trace!( + target: "corpus", + "load sequence with len {} from corpus file {}", + tx_seq.len(), + path.display() + ); + + // Populate in memory corpus with the sequence from corpus file. + + in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + } + } + + Ok(Self { config: config.into(), in_memory_corpus, failed_replays, history_map, metrics }) + } +} From 2bb34c438c23a0476f68138d39792ea538e52f29 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:37:53 +0530 Subject: [PATCH 05/16] worker_dir + last_sync_timestamp fields. Write corpus to worker/corpus --- crates/evm/evm/src/executors/worker_corpus.rs | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 79c8046cb4348..0d85418be09b6 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -1,4 +1,5 @@ use std::{ + path::PathBuf, sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; @@ -30,6 +31,8 @@ const JSON_EXTENSION: &str = ".json"; const FAVORABILITY_THRESHOLD: f64 = 0.3; const COVERAGE_MAP_SIZE: usize = 65536; const WORKER: &str = "worker"; +const CORPUS_DIR: &str = "corpus"; +const SYNC_DIR: &str = "sync"; /// Per-worker corpus manager. pub struct WorkerCorpus { @@ -52,6 +55,10 @@ pub struct WorkerCorpus { config: Arc, /// Indices of new entries added to [`WorkerCorpus::in_memory_corpus`] since last sync. new_entry_indices: Vec, + /// Last sync timestamp in seconds. + last_sync_timestamp: u64, + /// Worker Dir + worker_dir: Option, } impl WorkerCorpus { @@ -71,13 +78,23 @@ impl WorkerCorpus { ] .boxed(); - if let Some(corpus_dir) = &config.corpus_dir { + let worker_dir = if let Some(corpus_dir) = &config.corpus_dir { let worker_dir = corpus_dir.join(format!("{WORKER}{id}")); + let worker_corpus = &worker_dir.join(CORPUS_DIR); + let sync_dir = &worker_dir.join(SYNC_DIR); - if !worker_dir.is_dir() { - foundry_common::fs::create_dir_all(worker_dir)?; + if !worker_corpus.is_dir() { + foundry_common::fs::create_dir_all(worker_corpus)?; } - } + + if !sync_dir.is_dir() { + foundry_common::fs::create_dir_all(sync_dir)?; + } + + Some(worker_dir) + } else { + None + }; Ok(Self { id, @@ -91,6 +108,8 @@ impl WorkerCorpus { current_mutated: None, config, new_entry_indices: Default::default(), + last_sync_timestamp: 0, + worker_dir, }) } @@ -98,10 +117,7 @@ impl WorkerCorpus { /// Persists the call sequence (if corpus directory is configured and new coverage) and updates /// in-memory corpus. pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { - // Early return if corpus dir / coverage guided fuzzing is not configured. - let worker_dir = if let Some(corpus_dir) = &self.config.corpus_dir { - corpus_dir.join(format!("{WORKER}{}", self.id)) - } else { + let Some(worker_corpus) = &self.worker_dir else { return; }; @@ -136,16 +152,19 @@ impl WorkerCorpus { let corpus = CorpusEntry::from_tx_seq(inputs); let corpus_uuid = corpus.uuid; - + // TODO: Remove unwrap + let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(); // Persist to disk if corpus dir is configured. let write_result = if self.config.corpus_gzip { foundry_common::fs::write_json_gzip_file( - worker_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), + worker_corpus + .join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}.gz")) + .as_path(), &corpus.tx_seq, ) } else { foundry_common::fs::write_json_file( - worker_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(), + worker_corpus.join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}")).as_path(), &corpus.tx_seq, ) }; @@ -395,8 +414,7 @@ impl WorkerCorpus { // Flush to disk the seed metadata at the time of eviction. let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); foundry_common::fs::write_json_file( - self.config - .corpus_dir + self.worker_dir .clone() .unwrap() .join(format!("{WORKER}{}", self.id)) // Worker dir @@ -464,6 +482,8 @@ impl WorkerCorpus { function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); Ok(()) } + + // Sync Methods } /// Global corpus across workers to share coverage updates From 089f30b52a2c5edb56e9928e183616e742dab272 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:04:19 +0530 Subject: [PATCH 06/16] feat: export worker corpus to master and import from sync dir --- crates/evm/evm/src/executors/corpus.rs | 18 ++- crates/evm/evm/src/executors/worker_corpus.rs | 111 +++++++++++++++++- 2 files changed, 124 insertions(+), 5 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 56f0e29f8ec86..1c9b59b600d5e 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -16,7 +16,7 @@ use proptest::{ strategy::{BoxedStrategy, ValueTree}, test_runner::TestRunner, }; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use std::{ fmt, path::PathBuf, @@ -51,7 +51,7 @@ pub(crate) enum MutationType { } /// Holds Corpus information. -#[derive(Clone, Serialize)] +#[derive(Clone, Serialize, Deserialize)] pub(crate) struct CorpusEntry { // Unique corpus identifier. pub(crate) uuid: Uuid, @@ -65,17 +65,28 @@ pub(crate) struct CorpusEntry { // Whether this corpus is favored, i.e. producing new finds more often than // `FAVORABILITY_THRESHOLD`. pub(crate) is_favored: bool, + /// Timestamp of when this entry was written to disk in seconds. + #[serde(skip_serializing)] + pub(crate) timestamp: u64, } impl CorpusEntry { /// New corpus from given call sequence and corpus path to read uuid. pub fn new(tx_seq: Vec, path: PathBuf) -> eyre::Result { let uuid = if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { + // TODO: Account for "-timestamp" Uuid::try_from(stem.strip_suffix(JSON_EXTENSION).unwrap_or(stem).to_string())? } else { Uuid::new_v4() }; - Ok(Self { uuid, total_mutations: 0, new_finds_produced: 0, tx_seq, is_favored: false }) + Ok(Self { + uuid, + total_mutations: 0, + new_finds_produced: 0, + tx_seq, + is_favored: false, + timestamp: SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(), + }) } /// New corpus with given call sequence and new uuid. @@ -86,6 +97,7 @@ impl CorpusEntry { new_finds_produced: 0, tx_seq: tx_seq.into(), is_favored: false, + timestamp: SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(), } } } diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 0d85418be09b6..ffbe0befae436 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -1,4 +1,5 @@ use std::{ + ffi::{OsStr, OsString}, path::PathBuf, sync::Arc, time::{SystemTime, UNIX_EPOCH}, @@ -152,8 +153,7 @@ impl WorkerCorpus { let corpus = CorpusEntry::from_tx_seq(inputs); let corpus_uuid = corpus.uuid; - // TODO: Remove unwrap - let timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(); + let timestamp = corpus.timestamp; // Persist to disk if corpus dir is configured. let write_result = if self.config.corpus_gzip { foundry_common::fs::write_json_gzip_file( @@ -484,6 +484,113 @@ impl WorkerCorpus { } // Sync Methods + + /// Exports the new corpus entries to the master workers (id = 0) sync dir. + pub fn export(&self) -> eyre::Result<()> { + // Early return if no new entries or corpus dir not configured + if self.new_entry_indices.is_empty() || self.worker_dir.is_none() { + return Ok(()); + } + + let worker_dir = self.worker_dir.as_ref().unwrap(); + + // Master doesn't export (it only receives from others) + if self.id == 0 { + return Ok(()); + } + + let Some(master_sync_dir) = self + .config + .corpus_dir + .as_ref() + .map(|dir| dir.join(format!("{WORKER}0")).join(SYNC_DIR)) + else { + return Ok(()); + }; + + let mut exported = 0; + let corpus_dir = worker_dir.join(CORPUS_DIR); + + for &index in &self.new_entry_indices { + if let Some(entry) = self.in_memory_corpus.get(index) { + let ext = if self.config.corpus_gzip { + format!("{JSON_EXTENSION}.gz") + } else { + JSON_EXTENSION.to_string() + }; + let file_name = format!("{}-{}{ext}", entry.uuid, entry.timestamp); + let file_path = corpus_dir.join(&file_name); + let sync_path = master_sync_dir.join(&file_name); + + let Ok(_) = foundry_common::fs::copy(file_path, sync_path) else { + debug!(target: "corpus", "failed to export corpus {} from worker {}", entry.uuid, self.id); + continue; + }; + + exported += 1; + } + } + + trace!(target: "corpus", "exported {exported} new corpus entries from worker {}", self.id); + + Ok(()) + } + + /// Imports the new corpus entries that were written to the workers sync dir. + pub fn import(&self) -> eyre::Result> { + let Some(worker_dir) = &self.worker_dir else { + return Ok(vec![]); + }; + + let sync_dir = worker_dir.join(SYNC_DIR); + if !sync_dir.is_dir() { + return Ok(vec![]); + } + + let mut imports = vec![]; + for entry in std::fs::read_dir(sync_dir)? { + let Ok(entry) = entry else { + continue; + }; + + // Get the uuid and timestamp from the filename + let timestamp = if let Some(name) = entry.file_name().to_str() + && let Ok((_, timestamp)) = parse_corpus_filename(name) + { + timestamp + } else { + continue; + }; + + if timestamp <= self.last_sync_timestamp { + // TODO: Delete synced file + continue; + } + + // TODO: This is not useful right as `tx_seq` of CorpusEntry are not serialized i.e we + // can replay the corpus. + let corpus = if self.config.corpus_gzip { + foundry_common::fs::read_json_gzip_file::(&entry.path())? + } else { + foundry_common::fs::read_json_file::(&entry.path())? + }; + + imports.push(corpus); + } + + Ok(imports) + } +} + +/// Parses the corpus filename and returns the uuid and timestamp associated with it. +fn parse_corpus_filename(name: &str) -> eyre::Result<(Uuid, u64)> { + let name = name.trim_end_matches(".gz").trim_end_matches(JSON_EXTENSION); + + let parts = name.rsplitn(2, "-").collect::>(); + let uuid = Uuid::parse_str(parts[0])?; + let timestamp = parts[1].parse()?; + + Ok((uuid, timestamp)) } /// Global corpus across workers to share coverage updates From 5cdff6421939db8ffc71675d0b0fe50e48a4fa1c Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:10:19 +0530 Subject: [PATCH 07/16] fix: deser as tx_seq in import --- crates/evm/evm/src/executors/corpus.rs | 2 +- crates/evm/evm/src/executors/worker_corpus.rs | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 1c9b59b600d5e..6bb862754977d 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -51,7 +51,7 @@ pub(crate) enum MutationType { } /// Holds Corpus information. -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize)] pub(crate) struct CorpusEntry { // Unique corpus identifier. pub(crate) uuid: Uuid, diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index ffbe0befae436..e27d4da27a73f 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -536,8 +536,9 @@ impl WorkerCorpus { Ok(()) } - /// Imports the new corpus entries that were written to the workers sync dir. - pub fn import(&self) -> eyre::Result> { + /// Imports the new corpus entries tx sequence which will be used to replay and update history + /// map. + pub fn import(&self) -> eyre::Result>> { let Some(worker_dir) = &self.worker_dir else { return Ok(vec![]); }; @@ -567,12 +568,10 @@ impl WorkerCorpus { continue; } - // TODO: This is not useful right as `tx_seq` of CorpusEntry are not serialized i.e we - // can replay the corpus. let corpus = if self.config.corpus_gzip { - foundry_common::fs::read_json_gzip_file::(&entry.path())? + foundry_common::fs::read_json_gzip_file::>(&entry.path())? } else { - foundry_common::fs::read_json_file::(&entry.path())? + foundry_common::fs::read_json_file::>(&entry.path())? }; imports.push(corpus); From 488d09dd4b4ab59aee2cbc151fc660d000d0bf7c Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:20:25 +0530 Subject: [PATCH 08/16] feat: calibrate - update the in_memory_corpus + history_map for entries that provided in new coverage --- crates/evm/evm/src/executors/corpus.rs | 1 - crates/evm/evm/src/executors/worker_corpus.rs | 113 +++++++++++++++++- 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 6bb862754977d..5d812347bea4c 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -74,7 +74,6 @@ impl CorpusEntry { /// New corpus from given call sequence and corpus path to read uuid. pub fn new(tx_seq: Vec, path: PathBuf) -> eyre::Result { let uuid = if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) { - // TODO: Account for "-timestamp" Uuid::try_from(stem.strip_suffix(JSON_EXTENSION).unwrap_or(stem).to_string())? } else { Uuid::new_v4() diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index e27d4da27a73f..66356145159eb 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -513,11 +513,11 @@ impl WorkerCorpus { for &index in &self.new_entry_indices { if let Some(entry) = self.in_memory_corpus.get(index) { - let ext = if self.config.corpus_gzip { - format!("{JSON_EXTENSION}.gz") - } else { - JSON_EXTENSION.to_string() - }; + let ext = self + .config + .corpus_gzip + .then_some(format!("{JSON_EXTENSION}.gz")) + .unwrap_or(JSON_EXTENSION.to_string()); let file_name = format!("{}-{}{ext}", entry.uuid, entry.timestamp); let file_path = corpus_dir.join(&file_name); let sync_path = master_sync_dir.join(&file_name); @@ -538,7 +538,7 @@ impl WorkerCorpus { /// Imports the new corpus entries tx sequence which will be used to replay and update history /// map. - pub fn import(&self) -> eyre::Result>> { + fn import(&self) -> eyre::Result>> { let Some(worker_dir) = &self.worker_dir else { return Ok(vec![]); }; @@ -579,6 +579,107 @@ impl WorkerCorpus { Ok(imports) } + + pub fn calibrate( + &mut self, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result<()> { + let Some(worker_dir) = &self.worker_dir else { + return Ok(()); + }; + + // Helper to check if tx can be replayed + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + let sync_dir = worker_dir.join(SYNC_DIR); + let corpus_dir = worker_dir.join(CORPUS_DIR); + + let mut executor = executor.clone(); + for tx_seq in self.import()? { + if !tx_seq.is_empty() { + let mut new_coverage_on_sync = false; + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor.call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + )?; + + // Check if this provides new coverage + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut self.history_map); + + if new_coverage { + self.metrics.update_seen(is_edge); + new_coverage_on_sync = true; + } + + // Commit only for stateful tests + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + + trace!( + target: "corpus", + %new_coverage, + "replayed tx for syncing worker {}: {:?}", + self.id, &tx + ); + } + } + + if new_coverage_on_sync { + let corpus_entry = CorpusEntry::from_tx_seq(&tx_seq); + let ext = self + .config + .corpus_gzip + .then_some(format!("{JSON_EXTENSION}.gz")) + .unwrap_or(JSON_EXTENSION.to_string()); + + let file_name = + format!("{}-{}{ext}", corpus_entry.uuid, corpus_entry.timestamp); + + // Move file from sync/ to corpus/ directory + let sync_path = sync_dir.join(&file_name); + let corpus_path = corpus_dir.join(&file_name); + + let Ok(_) = std::fs::rename(&sync_path, &corpus_path) else { + debug!(target: "corpus", "failed to move synced corpus {} from {sync_path:?} to {corpus_path:?} dir in worker {}", corpus_entry.uuid, self.id); + continue; + }; + + trace!( + target: "corpus", + "moved synced corpus {} to corpus dir in worker {}", + corpus_entry.uuid, self.id + ); + + self.in_memory_corpus.push(corpus_entry); + } + } + } + + let last_sync = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); + trace!(target: "corpus", "sync complete for worker {}, updating last sync time to {}", + self.id, + last_sync + ); + self.last_sync_timestamp = last_sync; + + Ok(()) + } } /// Parses the corpus filename and returns the uuid and timestamp associated with it. From cdfb4fc35ba5bfd657a1003a4f465fbc00e9dab9 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:30:35 +0530 Subject: [PATCH 09/16] docs --- crates/evm/evm/src/executors/worker_corpus.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 66356145159eb..aa3e15ad2b0fd 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -580,6 +580,8 @@ impl WorkerCorpus { Ok(imports) } + /// Syncs and calibrates the in memory corpus and updates the history_map if new coverage is + /// found from the corpus findings of other workers. pub fn calibrate( &mut self, executor: &Executor, From d4200e4b4fe26d284c8be48cf776cc00a610485f Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 16:13:21 +0530 Subject: [PATCH 10/16] feat: distribute corpus from master to workers --- crates/evm/evm/src/executors/worker_corpus.rs | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index aa3e15ad2b0fd..8975d8710a28f 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -682,6 +682,60 @@ impl WorkerCorpus { Ok(()) } + + /// To be run by the master worker (id = 0) to distribute the global corpus to sync/ directories + /// of other workers. + pub fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { + if self.id == 0 || self.worker_dir.is_none() { + return Ok(()); + } + + let worker_dir = self.worker_dir.as_ref().unwrap(); + let master_corpus_dir = worker_dir.join(CORPUS_DIR); + + for target_worker in 1..num_workers { + let target_dir = self + .config + .corpus_dir + .as_ref() + .unwrap() + .join(format!("{WORKER}{target_worker}")) + .join(SYNC_DIR); + + if !target_dir.is_dir() { + foundry_common::fs::create_dir_all(&target_dir)?; + } + + for entry in std::fs::read_dir(&master_corpus_dir)? { + let Ok(entry) = entry else { + continue; + }; + + let path = entry.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && !name.contains(METADATA_SUFFIX) + { + let sync_path = target_dir.join(name); + + let Ok((_, timestamp)) = parse_corpus_filename(name) else { + continue; + }; + + if timestamp > self.last_sync_timestamp { + let Ok(_) = foundry_common::fs::copy(&path, &sync_path) else { + debug!(target: "corpus", "failed to distribute corpus {} from worker {} to {target_dir:?}", name, self.id); + continue; + }; + + trace!(target: "corpus", "distributed corpus {} from worker {} to {target_dir:?}", name, self.id); + } + } + } + } + + Ok(()) + } } /// Parses the corpus filename and returns the uuid and timestamp associated with it. From e9d8d3ce06f85fb3be43ac773084f5a388eeda26 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 16:27:06 +0530 Subject: [PATCH 11/16] sync --- crates/evm/evm/src/executors/worker_corpus.rs | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 8975d8710a28f..21c0352b09bb2 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -486,7 +486,7 @@ impl WorkerCorpus { // Sync Methods /// Exports the new corpus entries to the master workers (id = 0) sync dir. - pub fn export(&self) -> eyre::Result<()> { + fn export(&self) -> eyre::Result<()> { // Early return if no new entries or corpus dir not configured if self.new_entry_indices.is_empty() || self.worker_dir.is_none() { return Ok(()); @@ -582,7 +582,7 @@ impl WorkerCorpus { /// Syncs and calibrates the in memory corpus and updates the history_map if new coverage is /// found from the corpus findings of other workers. - pub fn calibrate( + fn calibrate( &mut self, executor: &Executor, fuzzed_function: Option<&Function>, @@ -685,8 +685,8 @@ impl WorkerCorpus { /// To be run by the master worker (id = 0) to distribute the global corpus to sync/ directories /// of other workers. - pub fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { - if self.id == 0 || self.worker_dir.is_none() { + fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { + if self.id != 0 || self.worker_dir.is_none() { return Ok(()); } @@ -736,6 +736,38 @@ impl WorkerCorpus { Ok(()) } + + /// Syncs the workers in_memory_corpus and history_map with the findings from other workers. + pub fn sync( + &mut self, + num_workers: usize, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result<()> { + if self.id == 0 { + // Master worker + self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; + + self.distribute(num_workers)?; + + self.new_entry_indices.clear(); + + trace!(target: "corpus", "master worker synced"); + + return Ok(()); + } + + self.export()?; + + self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; + + self.new_entry_indices.clear(); + + trace!(target: "corpus", "synced worker {}", self.id); + + Ok(()) + } } /// Parses the corpus filename and returns the uuid and timestamp associated with it. From 312225b50d28adcd429155923ea510693de5dca5 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 16:57:46 +0530 Subject: [PATCH 12/16] cleanup: remove MasterCorpus --- crates/evm/evm/src/executors/worker_corpus.rs | 240 ++++++++---------- 1 file changed, 105 insertions(+), 135 deletions(-) diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 21c0352b09bb2..26314e9ac600a 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -44,6 +44,8 @@ pub struct WorkerCorpus { in_memory_corpus: Vec, /// History of binned hitcount of edges seen during fuzzing history_map: Vec, + /// Number of failed replays from initial corpus + failed_replays: usize, /// Worker Metrics pub(crate) metrics: CorpusMetrics, /// Fuzzed calls generator. @@ -65,10 +67,13 @@ pub struct WorkerCorpus { impl WorkerCorpus { pub fn new( id: u32, - master: &MasterCorpus, + config: FuzzCorpusConfig, tx_generator: BoxedStrategy, + // Only required by master worker (id = 0) to replay existing corpus + executor: Option<&Executor>, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, ) -> eyre::Result { - let config = master.config.clone(); let mutation_generator = prop_oneof![ Just(MutationType::Splice), Just(MutationType::Repeat), @@ -80,6 +85,7 @@ impl WorkerCorpus { .boxed(); let worker_dir = if let Some(corpus_dir) = &config.corpus_dir { + // Create the necessary directories for the worker let worker_dir = corpus_dir.join(format!("{WORKER}{id}")); let worker_corpus = &worker_dir.join(CORPUS_DIR); let sync_dir = &worker_dir.join(SYNC_DIR); @@ -97,17 +103,108 @@ impl WorkerCorpus { None }; + let mut in_memory_corpus = vec![]; + let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; + let mut metrics = CorpusMetrics::default(); + let mut failed_replays = 0; + + if id == 0 && config.corpus_dir.is_some() { + // Master worker loads the initial corpus if it exists + let corpus_dir = config.corpus_dir.as_ref().unwrap(); + + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + let executor = executor.expect("Executor required for master worker"); + 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { + let path = entry?.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && name.contains(METADATA_SUFFIX) + { + // Ignore metadata files + continue; + } + + let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { + Some("gz") => { + foundry_common::fs::read_json_gzip_file::>(&path) + } + _ => foundry_common::fs::read_json_file::>(&path), + }; + + let Ok(tx_seq) = read_corpus_result else { + trace!(target: "corpus", "failed to load corpus from {}", path.display()); + continue; + }; + + if !tx_seq.is_empty() { + // Warm up history map from loaded sequences. + let mut executor = executor.clone(); + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor + .call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + ) + .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; + + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut history_map); + if new_coverage { + metrics.update_seen(is_edge); + } + + // Commit only when running invariant / stateful tests. + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + } else { + failed_replays += 1; + + // If the only input for fuzzed function cannot be replied, then move to + // next one without adding it in memory. + if fuzzed_function.is_some() { + continue 'corpus_replay; + } + } + } + + metrics.corpus_count += 1; + + trace!( + target: "corpus", + "load sequence with len {} from corpus file {}", + tx_seq.len(), + path.display() + ); + + // Populate in memory corpus with the sequence from corpus file. + in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + } + } + } + Ok(Self { id, - in_memory_corpus: master.in_memory_corpus.clone(), - // TODO: This clones the history_map with size COVERAGE_MAP_SIZE - // history_map size per worker should be dependent on the total number of workers? - history_map: master.history_map.clone(), - metrics: Default::default(), + in_memory_corpus, + history_map, + failed_replays, + metrics, tx_generator, mutation_generator, current_mutated: None, - config, + config: config.into(), new_entry_indices: Default::default(), last_sync_timestamp: 0, worker_dir, @@ -780,130 +877,3 @@ fn parse_corpus_filename(name: &str) -> eyre::Result<(Uuid, u64)> { Ok((uuid, timestamp)) } - -/// Global corpus across workers to share coverage updates -pub struct MasterCorpus { - /// Config - config: Arc, - /// In-memory corpus entries populated from the persisted files. This is global corpus entry - /// across workers. - in_memory_corpus: Vec, - /// Number of failed replays from the persisted files. - pub(crate) failed_replays: usize, - /// History of binned hitcount of edges seen during fuzzing - history_map: Vec, - /// Master Metrics - metrics: CorpusMetrics, -} - -impl MasterCorpus { - pub fn new( - config: FuzzCorpusConfig, - executor: &Executor, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result { - let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; - let mut metrics = CorpusMetrics::default(); - let mut in_memory_corpus = vec![]; - let mut failed_replays = 0; - - // Early return if corpus dir / coverage guided fuzzing not configured. - let Some(corpus_dir) = &config.corpus_dir else { - return Ok(Self { - config: config.into(), - in_memory_corpus, - failed_replays, - history_map, - metrics, - }); - }; - - // Ensure corpus dir for current test is created. - if !corpus_dir.is_dir() { - foundry_common::fs::create_dir_all(corpus_dir)?; - } - - let can_replay_tx = |tx: &BasicTxDetails| -> bool { - fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) - || fuzzed_function.is_some_and(|function| { - tx.call_details - .calldata - .get(..4) - .is_some_and(|selector| function.selector() == selector) - }) - }; - - 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { - let path = entry?.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && name.contains(METADATA_SUFFIX) - { - // Ignore metadata files - continue; - } - - let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { - Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), - _ => foundry_common::fs::read_json_file::>(&path), - }; - - let Ok(tx_seq) = read_corpus_result else { - trace!(target: "corpus", "failed to load corpus from {}", path.display()); - continue; - }; - - if !tx_seq.is_empty() { - // Warm up history map from loaded sequences. - let mut executor = executor.clone(); - for tx in &tx_seq { - if can_replay_tx(tx) { - let mut call_result = executor - .call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - ) - .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; - - let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut history_map); - if new_coverage { - metrics.update_seen(is_edge); - } - - // Commit only when running invariant / stateful tests. - if fuzzed_contracts.is_some() { - executor.commit(&mut call_result); - } - } else { - failed_replays += 1; - - // If the only input for fuzzed function cannot be replied, then move to - // next one without adding it in memory. - if fuzzed_function.is_some() { - continue 'corpus_replay; - } - } - } - - metrics.corpus_count += 1; - - trace!( - target: "corpus", - "load sequence with len {} from corpus file {}", - tx_seq.len(), - path.display() - ); - - // Populate in memory corpus with the sequence from corpus file. - - in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); - } - } - - Ok(Self { config: config.into(), in_memory_corpus, failed_replays, history_map, metrics }) - } -} From 8a477d3625f9b3e8d5ef4fdf1f329d72c5d7e25a Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 17:07:05 +0530 Subject: [PATCH 13/16] integrate WorkerCorpus in existing sequential impls of fuzz and invariants --- crates/evm/evm/src/executors/corpus.rs | 2 +- crates/evm/evm/src/executors/fuzz/mod.rs | 17 ++++++++------ crates/evm/evm/src/executors/invariant/mod.rs | 22 +++++++++---------- crates/evm/evm/src/executors/worker_corpus.rs | 3 +-- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 5d812347bea4c..d557d7c80e90a 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -16,7 +16,7 @@ use proptest::{ strategy::{BoxedStrategy, ValueTree}, test_runner::TestRunner, }; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{ fmt, path::PathBuf, diff --git a/crates/evm/evm/src/executors/fuzz/mod.rs b/crates/evm/evm/src/executors/fuzz/mod.rs index 5854777246d5c..4d2f0b35ea780 100644 --- a/crates/evm/evm/src/executors/fuzz/mod.rs +++ b/crates/evm/evm/src/executors/fuzz/mod.rs @@ -1,7 +1,6 @@ use crate::executors::{ DURATION_BETWEEN_METRICS_REPORT, Executor, FailFast, FuzzTestTimer, RawCallResult, - corpus::{CorpusWorker, SharedCorpus}, - worker_corpus::{MasterCorpus, WorkerCorpus}, + worker_corpus::WorkerCorpus, }; use alloy_dyn_abi::JsonAbiExt; use alloy_json_abi::Function; @@ -119,10 +118,14 @@ impl FuzzedExecutor { // We want to collect at least one trace which will be displayed to user. let max_traces_to_collect = std::cmp::max(1, self.config.gas_report_samples) as usize; - let master_corpus = - MasterCorpus::new(self.config.corpus.clone(), &self.executor, Some(func), None)?; - - let mut corpus_manager = WorkerCorpus::new(0, &master_corpus, strategy.boxed())?; + let mut corpus_manager = WorkerCorpus::new( + 0, // Id of the Master + self.config.corpus.clone(), + strategy.boxed(), + Some(&self.executor), + Some(func), + None, + )?; // Start timer for this fuzz test. let timer = FuzzTestTimer::new(self.config.timeout); @@ -255,7 +258,7 @@ impl FuzzedExecutor { gas_report_traces: traces.into_iter().map(|a| a.arena).collect(), line_coverage: test_data.coverage, deprecated_cheatcodes: test_data.deprecated_cheatcodes, - failed_corpus_replays: master_corpus.failed_replays, + failed_corpus_replays: corpus_manager.failed_replays, }; match test_data.failure { diff --git a/crates/evm/evm/src/executors/invariant/mod.rs b/crates/evm/evm/src/executors/invariant/mod.rs index cd8f6b6904b47..911d0f8525d0e 100644 --- a/crates/evm/evm/src/executors/invariant/mod.rs +++ b/crates/evm/evm/src/executors/invariant/mod.rs @@ -1,7 +1,7 @@ use crate::{ executors::{ Executor, RawCallResult, - corpus::{CorpusWorker, SharedCorpus}, + worker_corpus::WorkerCorpus, }, inspectors::Fuzzer, }; @@ -338,7 +338,7 @@ impl<'a> InvariantExecutor<'a> { return Err(eyre!("Invariant test function should have no inputs")); } - let (mut invariant_test, shared_corpus, mut corpus_manager) = + let (mut invariant_test, mut corpus_manager) = self.prepare_test(&invariant_contract, fuzz_fixtures, deployed_libs)?; // Start timer for this invariant test. @@ -511,13 +511,12 @@ impl<'a> InvariantExecutor<'a> { // End current invariant test run. invariant_test.end_run(current_run, self.config.gas_report_samples as usize); - let metrics_read = shared_corpus.metrics.read(); if let Some(progress) = progress { // If running with progress then increment completed runs. progress.inc(1); // Display metrics in progress bar. if edge_coverage_enabled { - progress.set_message(format!("{}", &metrics_read)); + progress.set_message(format!("{}", &corpus_manager.metrics)); } } else if edge_coverage_enabled && last_metrics_report.elapsed() > DURATION_BETWEEN_METRICS_REPORT @@ -528,7 +527,7 @@ impl<'a> InvariantExecutor<'a> { .duration_since(UNIX_EPOCH)? .as_secs(), "invariant": invariant_contract.invariant_function.name, - "metrics": &*metrics_read, + "metrics": &corpus_manager.metrics, }); let _ = sh_println!("{}", serde_json::to_string(&metrics)?); last_metrics_report = Instant::now(); @@ -549,7 +548,7 @@ impl<'a> InvariantExecutor<'a> { gas_report_traces: result.gas_report_traces, line_coverage: result.line_coverage, metrics: result.metrics, - failed_corpus_replays: shared_corpus.failed_replays(), + failed_corpus_replays: corpus_manager.failed_replays, }) } @@ -561,7 +560,7 @@ impl<'a> InvariantExecutor<'a> { invariant_contract: &InvariantContract<'_>, fuzz_fixtures: &FuzzFixtures, deployed_libs: &[Address], - ) -> Result<(InvariantTest, SharedCorpus, CorpusWorker)> { + ) -> Result<(InvariantTest, WorkerCorpus)> { // Finds out the chosen deployed contracts and/or senders. self.select_contract_artifacts(invariant_contract.address)?; let (targeted_senders, targeted_contracts) = @@ -635,14 +634,15 @@ impl<'a> InvariantExecutor<'a> { return Err(eyre!(error.revert_reason().unwrap_or_default())); } - let shared_corpus = SharedCorpus::new( + let worker = WorkerCorpus::new( + 0, self.config.corpus.clone(), - &self.executor, + strategy.boxed(), + Some(&self.executor), None, Some(&targeted_contracts), )?; - let corpus_worker = shared_corpus.new_worker(strategy.boxed()); let invariant_test = InvariantTest::new( fuzz_state, targeted_contracts, @@ -651,7 +651,7 @@ impl<'a> InvariantExecutor<'a> { self.runner.clone(), ); - Ok((invariant_test, shared_corpus, corpus_worker)) + Ok((invariant_test, worker)) } /// Fills the `InvariantExecutor` with the artifact identifier filters (in `path:name` string diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs index 26314e9ac600a..0544ec8900e46 100644 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ b/crates/evm/evm/src/executors/worker_corpus.rs @@ -1,5 +1,4 @@ use std::{ - ffi::{OsStr, OsString}, path::PathBuf, sync::Arc, time::{SystemTime, UNIX_EPOCH}, @@ -45,7 +44,7 @@ pub struct WorkerCorpus { /// History of binned hitcount of edges seen during fuzzing history_map: Vec, /// Number of failed replays from initial corpus - failed_replays: usize, + pub(crate) failed_replays: usize, /// Worker Metrics pub(crate) metrics: CorpusMetrics, /// Fuzzed calls generator. From 5ddc908c9df6fed8b7fe7320c868cc5e3c8d0f21 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Thu, 25 Sep 2025 17:12:41 +0530 Subject: [PATCH 14/16] cleanup: remove SharedCorpus and CorpusWorker --- crates/evm/evm/src/executors/corpus.rs | 671 +++++++++---- crates/evm/evm/src/executors/fuzz/mod.rs | 2 +- crates/evm/evm/src/executors/invariant/mod.rs | 5 +- crates/evm/evm/src/executors/mod.rs | 1 - crates/evm/evm/src/executors/worker_corpus.rs | 878 ------------------ 5 files changed, 493 insertions(+), 1064 deletions(-) delete mode 100644 crates/evm/evm/src/executors/worker_corpus.rs diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index d557d7c80e90a..f84190c138d5c 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -9,7 +9,6 @@ use foundry_evm_fuzz::{ invariant::FuzzRunIdentifiedContracts, strategies::{EvmFuzzState, mutate_param_value}, }; -use parking_lot::RwLock; use proptest::{ prelude::{Just, Rng, Strategy}, prop_oneof, @@ -20,10 +19,7 @@ use serde::Serialize; use std::{ fmt, path::PathBuf, - sync::{ - Arc, - atomic::{AtomicUsize, Ordering}, - }, + sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; use uuid::Uuid; @@ -32,10 +28,13 @@ const METADATA_SUFFIX: &str = "metadata.json"; const JSON_EXTENSION: &str = ".json"; const FAVORABILITY_THRESHOLD: f64 = 0.3; const COVERAGE_MAP_SIZE: usize = 65536; +const WORKER: &str = "worker"; +const CORPUS_DIR: &str = "corpus"; +const SYNC_DIR: &str = "sync"; /// Possible mutation strategies to apply on a call sequence. #[derive(Debug, Clone)] -pub(crate) enum MutationType { +enum MutationType { /// Splice original call sequence. Splice, /// Repeat selected call several times. @@ -52,22 +51,22 @@ pub(crate) enum MutationType { /// Holds Corpus information. #[derive(Clone, Serialize)] -pub(crate) struct CorpusEntry { +struct CorpusEntry { // Unique corpus identifier. - pub(crate) uuid: Uuid, + uuid: Uuid, // Total mutations of corpus as primary source. - pub(crate) total_mutations: usize, + total_mutations: usize, // New coverage found as a result of mutating this corpus. - pub(crate) new_finds_produced: usize, + new_finds_produced: usize, // Corpus call sequence. #[serde(skip_serializing)] - pub(crate) tx_seq: Vec, + tx_seq: Vec, // Whether this corpus is favored, i.e. producing new finds more often than // `FAVORABILITY_THRESHOLD`. - pub(crate) is_favored: bool, + is_favored: bool, /// Timestamp of when this entry was written to disk in seconds. #[serde(skip_serializing)] - pub(crate) timestamp: u64, + timestamp: u64, } impl CorpusEntry { @@ -104,13 +103,13 @@ impl CorpusEntry { #[derive(Serialize, Default)] pub(crate) struct CorpusMetrics { // Number of edges seen during the invariant run. - pub(crate) cumulative_edges_seen: usize, + cumulative_edges_seen: usize, // Number of features (new hitcount bin of previously hit edge) seen during the invariant run. - pub(crate) cumulative_features_seen: usize, + cumulative_features_seen: usize, // Number of corpus entries. - pub(crate) corpus_count: usize, + corpus_count: usize, // Number of corpus entries that are favored. - pub(crate) favored_items: usize, + favored_items: usize, } impl fmt::Display for CorpusMetrics { @@ -144,40 +143,46 @@ impl CorpusMetrics { } } -/// Shared corpus used for coverage guided fuzzing campaigns by both stateless and stateful tests. -#[derive(Clone)] -pub(crate) struct SharedCorpus { - // Corpus configuration. - config: Arc, - /// Shared in-memory corpus, populated from the persisted files and runs across multiple - /// workers. Mutation is performed on these. - /// - /// Map of corpus [`Uuid`] to [`CorpusEntry`]. - in_memory_corpus: Arc>>, - /// Number of failed replays from persisted corpus. - failed_replays: Arc, +/// Per-worker corpus manager. +pub struct WorkerCorpus { + /// Worker Id + id: u32, + /// In-memory corpus entries populated from the persisted files and + /// runs administered by this worker. + in_memory_corpus: Vec, /// History of binned hitcount of edges seen during fuzzing - history_map: Arc>>, - /// Corpus metrics. - pub(crate) metrics: Arc>, -} - -/// Operates on the [`SharedCorpus`] for coverage guided fuzzing and generating fuzz inputs using -/// [`CorpusWorker::new_input`] for stateless tests, [`CorpusWorker::new_inputs`] for stateful -/// tests. -pub(crate) struct CorpusWorker { - /// Shared Corpus - corpus: SharedCorpus, + history_map: Vec, + /// Number of failed replays from initial corpus + pub(crate) failed_replays: usize, + /// Worker Metrics + pub(crate) metrics: CorpusMetrics, /// Fuzzed calls generator. tx_generator: BoxedStrategy, - /// Call sequence mutation strategy type generator. + /// Call sequence mutation strategy type generator used by stateful fuzzing. mutation_generator: BoxedStrategy, /// Identifier of current mutated entry for this worker. current_mutated: Option, + /// Config + config: Arc, + /// Indices of new entries added to [`WorkerCorpus::in_memory_corpus`] since last sync. + new_entry_indices: Vec, + /// Last sync timestamp in seconds. + last_sync_timestamp: u64, + /// Worker Dir + /// corpus_dir/worker1/ + worker_dir: Option, } -impl CorpusWorker { - pub fn new(corpus: SharedCorpus, tx_generator: BoxedStrategy) -> Self { +impl WorkerCorpus { + pub fn new( + id: u32, + config: FuzzCorpusConfig, + tx_generator: BoxedStrategy, + // Only required by master worker (id = 0) to replay existing corpus + executor: Option<&Executor>, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result { let mutation_generator = prop_oneof![ Just(MutationType::Splice), Just(MutationType::Repeat), @@ -188,36 +193,159 @@ impl CorpusWorker { ] .boxed(); - Self { corpus, tx_generator, mutation_generator, current_mutated: None } + let worker_dir = if let Some(corpus_dir) = &config.corpus_dir { + // Create the necessary directories for the worker + let worker_dir = corpus_dir.join(format!("{WORKER}{id}")); + let worker_corpus = &worker_dir.join(CORPUS_DIR); + let sync_dir = &worker_dir.join(SYNC_DIR); + + if !worker_corpus.is_dir() { + foundry_common::fs::create_dir_all(worker_corpus)?; + } + + if !sync_dir.is_dir() { + foundry_common::fs::create_dir_all(sync_dir)?; + } + + Some(worker_dir) + } else { + None + }; + + let mut in_memory_corpus = vec![]; + let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; + let mut metrics = CorpusMetrics::default(); + let mut failed_replays = 0; + + if id == 0 && config.corpus_dir.is_some() { + // Master worker loads the initial corpus if it exists + let corpus_dir = config.corpus_dir.as_ref().unwrap(); + + let can_replay_tx = |tx: &BasicTxDetails| -> bool { + fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) + || fuzzed_function.is_some_and(|function| { + tx.call_details + .calldata + .get(..4) + .is_some_and(|selector| function.selector() == selector) + }) + }; + + let executor = executor.expect("Executor required for master worker"); + 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { + let path = entry?.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && name.contains(METADATA_SUFFIX) + { + // Ignore metadata files + continue; + } + + let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { + Some("gz") => { + foundry_common::fs::read_json_gzip_file::>(&path) + } + _ => foundry_common::fs::read_json_file::>(&path), + }; + + let Ok(tx_seq) = read_corpus_result else { + trace!(target: "corpus", "failed to load corpus from {}", path.display()); + continue; + }; + + if !tx_seq.is_empty() { + // Warm up history map from loaded sequences. + let mut executor = executor.clone(); + for tx in &tx_seq { + if can_replay_tx(tx) { + let mut call_result = executor + .call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + ) + .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; + + let (new_coverage, is_edge) = + call_result.merge_edge_coverage(&mut history_map); + if new_coverage { + metrics.update_seen(is_edge); + } + + // Commit only when running invariant / stateful tests. + if fuzzed_contracts.is_some() { + executor.commit(&mut call_result); + } + } else { + failed_replays += 1; + + // If the only input for fuzzed function cannot be replied, then move to + // next one without adding it in memory. + if fuzzed_function.is_some() { + continue 'corpus_replay; + } + } + } + + metrics.corpus_count += 1; + + trace!( + target: "corpus", + "load sequence with len {} from corpus file {}", + tx_seq.len(), + path.display() + ); + + // Populate in memory corpus with the sequence from corpus file. + in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + } + } + } + + Ok(Self { + id, + in_memory_corpus, + history_map, + failed_replays, + metrics, + tx_generator, + mutation_generator, + current_mutated: None, + config: config.into(), + new_entry_indices: Default::default(), + last_sync_timestamp: 0, + worker_dir, + }) } /// Updates stats for the given call sequence, if new coverage produced. /// Persists the call sequence (if corpus directory is configured and new coverage) and updates /// in-memory corpus. pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { - // Early return if corpus dir / coverage guided fuzzing is not configured. - let Some(corpus_dir) = &self.corpus.config.corpus_dir else { + let Some(worker_corpus) = &self.worker_dir else { return; }; - let mut in_mem_write = self.corpus.in_memory_corpus.write(); - let mut metrics_write = self.corpus.metrics.write(); // Update stats of current mutated primary corpus. if let Some(uuid) = &self.current_mutated { - if let Some(corpus) = in_mem_write.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) { + if let Some(corpus) = + self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) + { corpus.total_mutations += 1; if new_coverage { corpus.new_finds_produced += 1 } let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) < FAVORABILITY_THRESHOLD; - metrics_write.update_favored(is_favored, corpus.is_favored); + self.metrics.update_favored(is_favored, corpus.is_favored); corpus.is_favored = is_favored; trace!( target: "corpus", - "updated corpus {}, total mutations: {}, new finds: {}", - corpus.uuid, corpus.total_mutations, corpus.new_finds_produced + "updated worker {} corpus {}, total mutations: {}, new finds: {}", + self.id, corpus.uuid, corpus.total_mutations, corpus.new_finds_produced ); } @@ -231,33 +359,53 @@ impl CorpusWorker { let corpus = CorpusEntry::from_tx_seq(inputs); let corpus_uuid = corpus.uuid; - + let timestamp = corpus.timestamp; // Persist to disk if corpus dir is configured. - let write_result = if self.corpus.config.corpus_gzip { + let write_result = if self.config.corpus_gzip { foundry_common::fs::write_json_gzip_file( - corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}.gz")).as_path(), + worker_corpus + .join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}.gz")) + .as_path(), &corpus.tx_seq, ) } else { foundry_common::fs::write_json_file( - corpus_dir.join(format!("{corpus_uuid}{JSON_EXTENSION}")).as_path(), + worker_corpus.join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}")).as_path(), &corpus.tx_seq, ) }; if let Err(err) = write_result { - debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq); + debug!(target: "corpus", %err, "Failed to record call sequence {:?} in worker {}", &corpus.tx_seq, self.id); } else { trace!( target: "corpus", - "persisted {} inputs for new coverage in {corpus_uuid} corpus", - &corpus.tx_seq.len() + "persisted {} inputs for new coverage in worker {} for {corpus_uuid} corpus", + self.id, &corpus.tx_seq.len() ); } + + // Track in-memory corpus changes to update MasterWorker on sync + let new_index = self.in_memory_corpus.len(); + self.new_entry_indices.push(new_index); + // This includes reverting txs in the corpus and `can_continue` removes // them. We want this as it is new coverage and may help reach the other branch. - metrics_write.corpus_count += 1; - in_mem_write.push(corpus); + self.metrics.corpus_count += 1; + self.in_memory_corpus.push(corpus); + } + + /// Collects coverage from call result and updates metrics. + pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { + if !self.config.collect_edge_coverage() { + return false; + } + + let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut self.history_map); + if new_coverage { + self.metrics.update_seen(is_edge); + } + new_coverage } /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than @@ -272,13 +420,12 @@ impl CorpusWorker { // Early return with first_input only if corpus dir / coverage guided fuzzing not // configured. - if !self.corpus.config.is_coverage_guided() { + if !self.config.is_coverage_guided() { new_seq.push(self.new_tx(test_runner)?); return Ok(new_seq); }; - let in_mem_read = self.corpus.in_memory_corpus.read(); - if !in_mem_read.is_empty() { + if !self.in_memory_corpus.is_empty() { self.evict_oldest_corpus()?; let mutation_type = self @@ -286,10 +433,11 @@ impl CorpusWorker { .new_tree(test_runner) .map_err(|err| eyre!("Could not generate mutation type {err}"))? .current(); + let rng = test_runner.rng(); - let corpus_len = in_mem_read.len(); - let primary = &in_mem_read[rng.random_range(0..corpus_len)]; - let secondary = &in_mem_read[rng.random_range(0..corpus_len)]; + let corpus_len = self.in_memory_corpus.len(); + let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; + let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; match mutation_type { MutationType::Splice => { @@ -389,7 +537,7 @@ impl CorpusWorker { } /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more - /// than configured max mutations value. Used by fuzz test campaigns. + /// than configured max mutations value. Used by fuzz (stateless) test campaigns. pub fn new_input( &mut self, test_runner: &mut TestRunner, @@ -397,15 +545,15 @@ impl CorpusWorker { function: &Function, ) -> eyre::Result { // Early return if not running with coverage guided fuzzing. - if !self.corpus.config.is_coverage_guided() { + if !self.config.is_coverage_guided() { return Ok(self.new_tx(test_runner)?.call_details.calldata); } self.evict_oldest_corpus()?; - let in_mem_read = self.corpus.in_memory_corpus.read(); - let tx = if !in_mem_read.is_empty() { - let corpus = &in_mem_read[test_runner.rng().random_range(0..in_mem_read.len())]; + let tx = if !self.in_memory_corpus.is_empty() { + let corpus = &self.in_memory_corpus + [test_runner.rng().random_range(0..self.in_memory_corpus.len())]; self.current_mutated = Some(corpus.uuid); let new_seq = corpus.tx_seq.clone(); let mut tx = new_seq.first().unwrap().clone(); @@ -442,7 +590,7 @@ impl CorpusWorker { ) -> eyre::Result { // Early return with new input if corpus dir / coverage guided fuzzing not configured or if // call was discarded. - if self.corpus.config.corpus_dir.is_none() || discarded { + if self.config.corpus_dir.is_none() || discarded { return self.new_tx(test_runner); } @@ -458,34 +606,41 @@ impl CorpusWorker { /// Flush the oldest corpus mutated more than configured max mutations unless they are /// favored. - fn evict_oldest_corpus(&self) -> eyre::Result<()> { - let mut in_mem_write = self.corpus.in_memory_corpus.write(); - if in_mem_write.len() > self.corpus.config.corpus_min_size.max(1) - && let Some(index) = in_mem_write.iter().position(|corpus| { - corpus.total_mutations > self.corpus.config.corpus_min_mutations - && !corpus.is_favored + fn evict_oldest_corpus(&mut self) -> eyre::Result<()> { + if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1) + && let Some(index) = self.in_memory_corpus.iter().position(|corpus| { + corpus.total_mutations > self.config.corpus_min_mutations && !corpus.is_favored }) { - let corpus = in_mem_write.get(index).unwrap(); + let corpus = self.in_memory_corpus.get(index).unwrap(); let uuid = corpus.uuid; - debug!(target: "corpus", "evict corpus {uuid}"); + debug!(target: "corpus", "evict corpus {uuid} in worker {}", self.id); // Flush to disk the seed metadata at the time of eviction. let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); foundry_common::fs::write_json_file( - self.corpus - .config - .corpus_dir + self.worker_dir .clone() .unwrap() + .join(format!("{WORKER}{}", self.id)) // Worker dir .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) .as_path(), &corpus, )?; // Remove corpus from memory. - in_mem_write.remove(index); + self.in_memory_corpus.remove(index); + + // Adjust the tracked indices + self.new_entry_indices.retain_mut(|i| { + if *i > index { + *i -= 1; // Shift indices down + true // Keep this index + } else { + *i != index // Remove if it's the deleted index, keep otherwise + } + }); } Ok(()) } @@ -534,49 +689,116 @@ impl CorpusWorker { Ok(()) } - /// Collects coverage from call result and updates metrics. - pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { - if !self.corpus.config.collect_edge_coverage() { - return false; + // Sync Methods + + /// Exports the new corpus entries to the master workers (id = 0) sync dir. + fn export(&self) -> eyre::Result<()> { + // Early return if no new entries or corpus dir not configured + if self.new_entry_indices.is_empty() || self.worker_dir.is_none() { + return Ok(()); } - let mut history_map_write = self.corpus.history_map.write(); - let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut history_map_write); - if new_coverage { - self.corpus.metrics.write().update_seen(is_edge); + let worker_dir = self.worker_dir.as_ref().unwrap(); + + // Master doesn't export (it only receives from others) + if self.id == 0 { + return Ok(()); } - new_coverage + + let Some(master_sync_dir) = self + .config + .corpus_dir + .as_ref() + .map(|dir| dir.join(format!("{WORKER}0")).join(SYNC_DIR)) + else { + return Ok(()); + }; + + let mut exported = 0; + let corpus_dir = worker_dir.join(CORPUS_DIR); + + for &index in &self.new_entry_indices { + if let Some(entry) = self.in_memory_corpus.get(index) { + let ext = self + .config + .corpus_gzip + .then_some(format!("{JSON_EXTENSION}.gz")) + .unwrap_or(JSON_EXTENSION.to_string()); + let file_name = format!("{}-{}{ext}", entry.uuid, entry.timestamp); + let file_path = corpus_dir.join(&file_name); + let sync_path = master_sync_dir.join(&file_name); + + let Ok(_) = foundry_common::fs::copy(file_path, sync_path) else { + debug!(target: "corpus", "failed to export corpus {} from worker {}", entry.uuid, self.id); + continue; + }; + + exported += 1; + } + } + + trace!(target: "corpus", "exported {exported} new corpus entries from worker {}", self.id); + + Ok(()) } -} -impl SharedCorpus { - pub fn new( - config: FuzzCorpusConfig, + /// Imports the new corpus entries tx sequence which will be used to replay and update history + /// map. + fn import(&self) -> eyre::Result>> { + let Some(worker_dir) = &self.worker_dir else { + return Ok(vec![]); + }; + + let sync_dir = worker_dir.join(SYNC_DIR); + if !sync_dir.is_dir() { + return Ok(vec![]); + } + + let mut imports = vec![]; + for entry in std::fs::read_dir(sync_dir)? { + let Ok(entry) = entry else { + continue; + }; + + // Get the uuid and timestamp from the filename + let timestamp = if let Some(name) = entry.file_name().to_str() + && let Ok((_, timestamp)) = parse_corpus_filename(name) + { + timestamp + } else { + continue; + }; + + if timestamp <= self.last_sync_timestamp { + // TODO: Delete synced file + continue; + } + + let corpus = if self.config.corpus_gzip { + foundry_common::fs::read_json_gzip_file::>(&entry.path())? + } else { + foundry_common::fs::read_json_file::>(&entry.path())? + }; + + imports.push(corpus); + } + + Ok(imports) + } + + /// Syncs and calibrates the in memory corpus and updates the history_map if new coverage is + /// found from the corpus findings of other workers. + fn calibrate( + &mut self, executor: &Executor, fuzzed_function: Option<&Function>, fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result { - let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; - let mut metrics = CorpusMetrics::default(); - let mut in_memory_corpus = vec![]; - let failed_replays = AtomicUsize::new(0); - - // Early return if corpus dir / coverage guided fuzzing not configured. - let Some(corpus_dir) = &config.corpus_dir else { - return Ok(Self { - config: config.into(), - in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), - failed_replays: failed_replays.into(), - history_map: Arc::new(RwLock::new(history_map)), - metrics: Arc::new(RwLock::new(metrics)), - }); + ) -> eyre::Result<()> { + let Some(worker_dir) = &self.worker_dir else { + return Ok(()); }; - // Ensure corpus dir for current test is created. - if !corpus_dir.is_dir() { - foundry_common::fs::create_dir_all(corpus_dir)?; - } - + // Helper to check if tx can be replayed let can_replay_tx = |tx: &BasicTxDetails| -> bool { fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) || fuzzed_function.is_some_and(|function| { @@ -587,91 +809,180 @@ impl SharedCorpus { }) }; - 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { - let path = entry?.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && name.contains(METADATA_SUFFIX) - { - // Ignore metadata files - continue; - } - - let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { - Some("gz") => foundry_common::fs::read_json_gzip_file::>(&path), - _ => foundry_common::fs::read_json_file::>(&path), - }; - - let Ok(tx_seq) = read_corpus_result else { - trace!(target: "corpus", "failed to load corpus from {}", path.display()); - continue; - }; + let sync_dir = worker_dir.join(SYNC_DIR); + let corpus_dir = worker_dir.join(CORPUS_DIR); + let mut executor = executor.clone(); + for tx_seq in self.import()? { if !tx_seq.is_empty() { - // Warm up history map from loaded sequences. - let mut executor = executor.clone(); + let mut new_coverage_on_sync = false; for tx in &tx_seq { if can_replay_tx(tx) { - let mut call_result = executor - .call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - ) - .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; - + let mut call_result = executor.call_raw( + tx.sender, + tx.call_details.target, + tx.call_details.calldata.clone(), + U256::ZERO, + )?; + + // Check if this provides new coverage let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut history_map); + call_result.merge_edge_coverage(&mut self.history_map); + if new_coverage { - metrics.update_seen(is_edge); + self.metrics.update_seen(is_edge); + new_coverage_on_sync = true; } - // Commit only when running invariant / stateful tests. + // Commit only for stateful tests if fuzzed_contracts.is_some() { executor.commit(&mut call_result); } - } else { - failed_replays.fetch_add(1, Ordering::Relaxed); - // If the only input for fuzzed function cannot be replied, then move to - // next one without adding it in memory. - if fuzzed_function.is_some() { - continue 'corpus_replay; - } + trace!( + target: "corpus", + %new_coverage, + "replayed tx for syncing worker {}: {:?}", + self.id, &tx + ); } } - metrics.corpus_count += 1; + if new_coverage_on_sync { + let corpus_entry = CorpusEntry::from_tx_seq(&tx_seq); + let ext = self + .config + .corpus_gzip + .then_some(format!("{JSON_EXTENSION}.gz")) + .unwrap_or(JSON_EXTENSION.to_string()); + + let file_name = + format!("{}-{}{ext}", corpus_entry.uuid, corpus_entry.timestamp); + + // Move file from sync/ to corpus/ directory + let sync_path = sync_dir.join(&file_name); + let corpus_path = corpus_dir.join(&file_name); + + let Ok(_) = std::fs::rename(&sync_path, &corpus_path) else { + debug!(target: "corpus", "failed to move synced corpus {} from {sync_path:?} to {corpus_path:?} dir in worker {}", corpus_entry.uuid, self.id); + continue; + }; + + trace!( + target: "corpus", + "moved synced corpus {} to corpus dir in worker {}", + corpus_entry.uuid, self.id + ); + + self.in_memory_corpus.push(corpus_entry); + } + } + } + + let last_sync = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); + trace!(target: "corpus", "sync complete for worker {}, updating last sync time to {}", + self.id, + last_sync + ); + self.last_sync_timestamp = last_sync; - trace!( - target: "corpus", - "load sequence with len {} from corpus file {}", - tx_seq.len(), - path.display() - ); + Ok(()) + } + + /// To be run by the master worker (id = 0) to distribute the global corpus to sync/ directories + /// of other workers. + fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { + if self.id != 0 || self.worker_dir.is_none() { + return Ok(()); + } - // Populate in memory corpus with the sequence from corpus file. + let worker_dir = self.worker_dir.as_ref().unwrap(); + let master_corpus_dir = worker_dir.join(CORPUS_DIR); - in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); + for target_worker in 1..num_workers { + let target_dir = self + .config + .corpus_dir + .as_ref() + .unwrap() + .join(format!("{WORKER}{target_worker}")) + .join(SYNC_DIR); + + if !target_dir.is_dir() { + foundry_common::fs::create_dir_all(&target_dir)?; + } + + for entry in std::fs::read_dir(&master_corpus_dir)? { + let Ok(entry) = entry else { + continue; + }; + + let path = entry.path(); + if path.is_file() + && let Some(name) = path.file_name().and_then(|s| s.to_str()) + && !name.contains(METADATA_SUFFIX) + { + let sync_path = target_dir.join(name); + + let Ok((_, timestamp)) = parse_corpus_filename(name) else { + continue; + }; + + if timestamp > self.last_sync_timestamp { + let Ok(_) = foundry_common::fs::copy(&path, &sync_path) else { + debug!(target: "corpus", "failed to distribute corpus {} from worker {} to {target_dir:?}", name, self.id); + continue; + }; + + trace!(target: "corpus", "distributed corpus {} from worker {} to {target_dir:?}", name, self.id); + } + } } } - Ok(Self { - config: config.into(), - in_memory_corpus: Arc::new(RwLock::new(in_memory_corpus)), - failed_replays: failed_replays.into(), - history_map: Arc::new(RwLock::new(history_map)), - metrics: Arc::new(RwLock::new(metrics)), - }) + Ok(()) } - pub fn new_worker(&self, tx_generator: BoxedStrategy) -> CorpusWorker { - CorpusWorker::new(self.clone(), tx_generator) - } + /// Syncs the workers in_memory_corpus and history_map with the findings from other workers. + pub fn sync( + &mut self, + num_workers: usize, + executor: &Executor, + fuzzed_function: Option<&Function>, + fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, + ) -> eyre::Result<()> { + if self.id == 0 { + // Master worker + self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; + + self.distribute(num_workers)?; + + self.new_entry_indices.clear(); + + trace!(target: "corpus", "master worker synced"); + + return Ok(()); + } - /// Returns campaign failed replays. - pub fn failed_replays(self) -> usize { - self.failed_replays.load(Ordering::Relaxed) + self.export()?; + + self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; + + self.new_entry_indices.clear(); + + trace!(target: "corpus", "synced worker {}", self.id); + + Ok(()) } } + +/// Parses the corpus filename and returns the uuid and timestamp associated with it. +fn parse_corpus_filename(name: &str) -> eyre::Result<(Uuid, u64)> { + let name = name.trim_end_matches(".gz").trim_end_matches(JSON_EXTENSION); + + let parts = name.rsplitn(2, "-").collect::>(); + let uuid = Uuid::parse_str(parts[0])?; + let timestamp = parts[1].parse()?; + + Ok((uuid, timestamp)) +} diff --git a/crates/evm/evm/src/executors/fuzz/mod.rs b/crates/evm/evm/src/executors/fuzz/mod.rs index 4d2f0b35ea780..2541b9fd4c4f8 100644 --- a/crates/evm/evm/src/executors/fuzz/mod.rs +++ b/crates/evm/evm/src/executors/fuzz/mod.rs @@ -1,6 +1,6 @@ use crate::executors::{ DURATION_BETWEEN_METRICS_REPORT, Executor, FailFast, FuzzTestTimer, RawCallResult, - worker_corpus::WorkerCorpus, + corpus::WorkerCorpus, }; use alloy_dyn_abi::JsonAbiExt; use alloy_json_abi::Function; diff --git a/crates/evm/evm/src/executors/invariant/mod.rs b/crates/evm/evm/src/executors/invariant/mod.rs index 911d0f8525d0e..183355421e565 100644 --- a/crates/evm/evm/src/executors/invariant/mod.rs +++ b/crates/evm/evm/src/executors/invariant/mod.rs @@ -1,8 +1,5 @@ use crate::{ - executors::{ - Executor, RawCallResult, - worker_corpus::WorkerCorpus, - }, + executors::{Executor, RawCallResult, corpus::WorkerCorpus}, inspectors::Fuzzer, }; use alloy_primitives::{ diff --git a/crates/evm/evm/src/executors/mod.rs b/crates/evm/evm/src/executors/mod.rs index 43952c4e1cd9f..d519f1617cd5f 100644 --- a/crates/evm/evm/src/executors/mod.rs +++ b/crates/evm/evm/src/executors/mod.rs @@ -62,7 +62,6 @@ pub use invariant::InvariantExecutor; mod corpus; mod trace; -mod worker_corpus; pub use trace::TracingExecutor; diff --git a/crates/evm/evm/src/executors/worker_corpus.rs b/crates/evm/evm/src/executors/worker_corpus.rs deleted file mode 100644 index 0544ec8900e46..0000000000000 --- a/crates/evm/evm/src/executors/worker_corpus.rs +++ /dev/null @@ -1,878 +0,0 @@ -use std::{ - path::PathBuf, - sync::Arc, - time::{SystemTime, UNIX_EPOCH}, -}; - -use alloy_dyn_abi::JsonAbiExt; -use alloy_json_abi::Function; -use alloy_primitives::{Bytes, U256}; -use eyre::eyre; -use foundry_config::FuzzCorpusConfig; -use foundry_evm_fuzz::{ - BasicTxDetails, - invariant::FuzzRunIdentifiedContracts, - strategies::{EvmFuzzState, mutate_param_value}, -}; -use proptest::{ - prelude::{BoxedStrategy, Just, Rng, Strategy}, - prop_oneof, - test_runner::TestRunner, -}; -use uuid::Uuid; - -use crate::executors::{ - Executor, RawCallResult, - corpus::{CorpusEntry, CorpusMetrics, MutationType}, -}; - -const METADATA_SUFFIX: &str = "metadata.json"; -const JSON_EXTENSION: &str = ".json"; -const FAVORABILITY_THRESHOLD: f64 = 0.3; -const COVERAGE_MAP_SIZE: usize = 65536; -const WORKER: &str = "worker"; -const CORPUS_DIR: &str = "corpus"; -const SYNC_DIR: &str = "sync"; - -/// Per-worker corpus manager. -pub struct WorkerCorpus { - /// Worker Id - id: u32, - /// In-memory corpus entries populated from the persisted files and - /// runs administered by this worker. - in_memory_corpus: Vec, - /// History of binned hitcount of edges seen during fuzzing - history_map: Vec, - /// Number of failed replays from initial corpus - pub(crate) failed_replays: usize, - /// Worker Metrics - pub(crate) metrics: CorpusMetrics, - /// Fuzzed calls generator. - tx_generator: BoxedStrategy, - /// Call sequence mutation strategy type generator used by stateful fuzzing. - mutation_generator: BoxedStrategy, - /// Identifier of current mutated entry for this worker. - current_mutated: Option, - /// Config - config: Arc, - /// Indices of new entries added to [`WorkerCorpus::in_memory_corpus`] since last sync. - new_entry_indices: Vec, - /// Last sync timestamp in seconds. - last_sync_timestamp: u64, - /// Worker Dir - worker_dir: Option, -} - -impl WorkerCorpus { - pub fn new( - id: u32, - config: FuzzCorpusConfig, - tx_generator: BoxedStrategy, - // Only required by master worker (id = 0) to replay existing corpus - executor: Option<&Executor>, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result { - let mutation_generator = prop_oneof![ - Just(MutationType::Splice), - Just(MutationType::Repeat), - Just(MutationType::Interleave), - Just(MutationType::Prefix), - Just(MutationType::Suffix), - Just(MutationType::Abi), - ] - .boxed(); - - let worker_dir = if let Some(corpus_dir) = &config.corpus_dir { - // Create the necessary directories for the worker - let worker_dir = corpus_dir.join(format!("{WORKER}{id}")); - let worker_corpus = &worker_dir.join(CORPUS_DIR); - let sync_dir = &worker_dir.join(SYNC_DIR); - - if !worker_corpus.is_dir() { - foundry_common::fs::create_dir_all(worker_corpus)?; - } - - if !sync_dir.is_dir() { - foundry_common::fs::create_dir_all(sync_dir)?; - } - - Some(worker_dir) - } else { - None - }; - - let mut in_memory_corpus = vec![]; - let mut history_map = vec![0u8; COVERAGE_MAP_SIZE]; - let mut metrics = CorpusMetrics::default(); - let mut failed_replays = 0; - - if id == 0 && config.corpus_dir.is_some() { - // Master worker loads the initial corpus if it exists - let corpus_dir = config.corpus_dir.as_ref().unwrap(); - - let can_replay_tx = |tx: &BasicTxDetails| -> bool { - fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) - || fuzzed_function.is_some_and(|function| { - tx.call_details - .calldata - .get(..4) - .is_some_and(|selector| function.selector() == selector) - }) - }; - - let executor = executor.expect("Executor required for master worker"); - 'corpus_replay: for entry in std::fs::read_dir(corpus_dir)? { - let path = entry?.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && name.contains(METADATA_SUFFIX) - { - // Ignore metadata files - continue; - } - - let read_corpus_result = match path.extension().and_then(|ext| ext.to_str()) { - Some("gz") => { - foundry_common::fs::read_json_gzip_file::>(&path) - } - _ => foundry_common::fs::read_json_file::>(&path), - }; - - let Ok(tx_seq) = read_corpus_result else { - trace!(target: "corpus", "failed to load corpus from {}", path.display()); - continue; - }; - - if !tx_seq.is_empty() { - // Warm up history map from loaded sequences. - let mut executor = executor.clone(); - for tx in &tx_seq { - if can_replay_tx(tx) { - let mut call_result = executor - .call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - ) - .map_err(|e| eyre!(format!("Could not make raw evm call: {e}")))?; - - let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut history_map); - if new_coverage { - metrics.update_seen(is_edge); - } - - // Commit only when running invariant / stateful tests. - if fuzzed_contracts.is_some() { - executor.commit(&mut call_result); - } - } else { - failed_replays += 1; - - // If the only input for fuzzed function cannot be replied, then move to - // next one without adding it in memory. - if fuzzed_function.is_some() { - continue 'corpus_replay; - } - } - } - - metrics.corpus_count += 1; - - trace!( - target: "corpus", - "load sequence with len {} from corpus file {}", - tx_seq.len(), - path.display() - ); - - // Populate in memory corpus with the sequence from corpus file. - in_memory_corpus.push(CorpusEntry::new(tx_seq, path)?); - } - } - } - - Ok(Self { - id, - in_memory_corpus, - history_map, - failed_replays, - metrics, - tx_generator, - mutation_generator, - current_mutated: None, - config: config.into(), - new_entry_indices: Default::default(), - last_sync_timestamp: 0, - worker_dir, - }) - } - - /// Updates stats for the given call sequence, if new coverage produced. - /// Persists the call sequence (if corpus directory is configured and new coverage) and updates - /// in-memory corpus. - pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { - let Some(worker_corpus) = &self.worker_dir else { - return; - }; - - // Update stats of current mutated primary corpus. - if let Some(uuid) = &self.current_mutated { - if let Some(corpus) = - self.in_memory_corpus.iter_mut().find(|corpus| corpus.uuid.eq(uuid)) - { - corpus.total_mutations += 1; - if new_coverage { - corpus.new_finds_produced += 1 - } - let is_favored = (corpus.new_finds_produced as f64 / corpus.total_mutations as f64) - < FAVORABILITY_THRESHOLD; - self.metrics.update_favored(is_favored, corpus.is_favored); - corpus.is_favored = is_favored; - - trace!( - target: "corpus", - "updated worker {} corpus {}, total mutations: {}, new finds: {}", - self.id, corpus.uuid, corpus.total_mutations, corpus.new_finds_produced - ); - } - - self.current_mutated = None; - } - - // Collect inputs only if current run produced new coverage. - if !new_coverage { - return; - } - - let corpus = CorpusEntry::from_tx_seq(inputs); - let corpus_uuid = corpus.uuid; - let timestamp = corpus.timestamp; - // Persist to disk if corpus dir is configured. - let write_result = if self.config.corpus_gzip { - foundry_common::fs::write_json_gzip_file( - worker_corpus - .join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}.gz")) - .as_path(), - &corpus.tx_seq, - ) - } else { - foundry_common::fs::write_json_file( - worker_corpus.join(format!("{corpus_uuid}-{timestamp}{JSON_EXTENSION}")).as_path(), - &corpus.tx_seq, - ) - }; - - if let Err(err) = write_result { - debug!(target: "corpus", %err, "Failed to record call sequence {:?} in worker {}", &corpus.tx_seq, self.id); - } else { - trace!( - target: "corpus", - "persisted {} inputs for new coverage in worker {} for {corpus_uuid} corpus", - self.id, &corpus.tx_seq.len() - ); - } - - // Track in-memory corpus changes to update MasterWorker on sync - let new_index = self.in_memory_corpus.len(); - self.new_entry_indices.push(new_index); - - // This includes reverting txs in the corpus and `can_continue` removes - // them. We want this as it is new coverage and may help reach the other branch. - self.metrics.corpus_count += 1; - self.in_memory_corpus.push(corpus); - } - - /// Collects coverage from call result and updates metrics. - pub fn merge_edge_coverage(&mut self, call_result: &mut RawCallResult) -> bool { - if !self.config.collect_edge_coverage() { - return false; - } - - let (new_coverage, is_edge) = call_result.merge_edge_coverage(&mut self.history_map); - if new_coverage { - self.metrics.update_seen(is_edge); - } - new_coverage - } - - /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than - /// configured max mutations value. Used by invariant test campaigns. - pub fn new_inputs( - &mut self, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - targeted_contracts: &FuzzRunIdentifiedContracts, - ) -> eyre::Result> { - let mut new_seq = vec![]; - - // Early return with first_input only if corpus dir / coverage guided fuzzing not - // configured. - if !self.config.is_coverage_guided() { - new_seq.push(self.new_tx(test_runner)?); - return Ok(new_seq); - }; - - if !self.in_memory_corpus.is_empty() { - self.evict_oldest_corpus()?; - - let mutation_type = self - .mutation_generator - .new_tree(test_runner) - .map_err(|err| eyre!("Could not generate mutation type {err}"))? - .current(); - - let rng = test_runner.rng(); - let corpus_len = self.in_memory_corpus.len(); - let primary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; - let secondary = &self.in_memory_corpus[rng.random_range(0..corpus_len)]; - - match mutation_type { - MutationType::Splice => { - trace!(target: "corpus", "splice {} and {}", primary.uuid, secondary.uuid); - - self.current_mutated = Some(primary.uuid); - - let start1 = rng.random_range(0..primary.tx_seq.len()); - let end1 = rng.random_range(start1..primary.tx_seq.len()); - - let start2 = rng.random_range(0..secondary.tx_seq.len()); - let end2 = rng.random_range(start2..secondary.tx_seq.len()); - - for tx in primary.tx_seq.iter().take(end1).skip(start1) { - new_seq.push(tx.clone()); - } - for tx in secondary.tx_seq.iter().take(end2).skip(start2) { - new_seq.push(tx.clone()); - } - } - MutationType::Repeat => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "repeat {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - let start = rng.random_range(0..corpus.tx_seq.len()); - let end = rng.random_range(start..corpus.tx_seq.len()); - let item_idx = rng.random_range(0..corpus.tx_seq.len()); - let repeated = vec![new_seq[item_idx].clone(); end - start]; - new_seq.splice(start..end, repeated); - } - MutationType::Interleave => { - trace!(target: "corpus", "interleave {} with {}", primary.uuid, secondary.uuid); - - self.current_mutated = Some(primary.uuid); - - for (tx1, tx2) in primary.tx_seq.iter().zip(secondary.tx_seq.iter()) { - // chunks? - let tx = if rng.random::() { tx1.clone() } else { tx2.clone() }; - new_seq.push(tx); - } - } - MutationType::Prefix => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "overwrite prefix of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - for i in 0..rng.random_range(0..=new_seq.len()) { - new_seq[i] = self.new_tx(test_runner)?; - } - } - MutationType::Suffix => { - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "overwrite suffix of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - for i in new_seq.len() - rng.random_range(0..new_seq.len())..corpus.tx_seq.len() - { - new_seq[i] = self.new_tx(test_runner)?; - } - } - MutationType::Abi => { - let targets = targeted_contracts.targets.lock(); - let corpus = if rng.random::() { primary } else { secondary }; - trace!(target: "corpus", "ABI mutate args of {}", corpus.uuid); - - self.current_mutated = Some(corpus.uuid); - - new_seq = corpus.tx_seq.clone(); - - let idx = rng.random_range(0..new_seq.len()); - let tx = new_seq.get_mut(idx).unwrap(); - if let (_, Some(function)) = targets.fuzzed_artifacts(tx) { - // TODO add call_value to call details and mutate it as well as sender some - // of the time - if !function.inputs.is_empty() { - self.abi_mutate(tx, function, test_runner, fuzz_state)?; - } - } - } - } - } - - // Make sure the new sequence contains at least one tx to start fuzzing from. - if new_seq.is_empty() { - new_seq.push(self.new_tx(test_runner)?); - } - trace!(target: "corpus", "new sequence of {} calls generated", new_seq.len()); - - Ok(new_seq) - } - - /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more - /// than configured max mutations value. Used by fuzz (stateless) test campaigns. - pub fn new_input( - &mut self, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - function: &Function, - ) -> eyre::Result { - // Early return if not running with coverage guided fuzzing. - if !self.config.is_coverage_guided() { - return Ok(self.new_tx(test_runner)?.call_details.calldata); - } - - self.evict_oldest_corpus()?; - - let tx = if !self.in_memory_corpus.is_empty() { - let corpus = &self.in_memory_corpus - [test_runner.rng().random_range(0..self.in_memory_corpus.len())]; - self.current_mutated = Some(corpus.uuid); - let new_seq = corpus.tx_seq.clone(); - let mut tx = new_seq.first().unwrap().clone(); - self.abi_mutate(&mut tx, function, test_runner, fuzz_state)?; - tx - } else { - self.new_tx(test_runner)? - }; - - Ok(tx.call_details.calldata) - } - - /// Generates single call from corpus strategy. - pub fn new_tx(&self, test_runner: &mut TestRunner) -> eyre::Result { - Ok(self - .tx_generator - .new_tree(test_runner) - .map_err(|_| eyre!("Could not generate case"))? - .current()) - } - - /// Returns the next call to be used in call sequence. - /// If coverage guided fuzzing is not configured or if previous input was discarded then this is - /// a new tx from strategy. - /// If running with coverage guided fuzzing it returns a new call only when sequence - /// does not have enough entries, or randomly. Otherwise, returns the next call from initial - /// sequence. - pub fn generate_next_input( - &mut self, - test_runner: &mut TestRunner, - sequence: &[BasicTxDetails], - discarded: bool, - depth: usize, - ) -> eyre::Result { - // Early return with new input if corpus dir / coverage guided fuzzing not configured or if - // call was discarded. - if self.config.corpus_dir.is_none() || discarded { - return self.new_tx(test_runner); - } - - // When running with coverage guided fuzzing enabled then generate new sequence if initial - // sequence's length is less than depth or randomly, to occasionally intermix new txs. - if depth > sequence.len().saturating_sub(1) || test_runner.rng().random_ratio(1, 10) { - return self.new_tx(test_runner); - } - - // Continue with the next call initial sequence - Ok(sequence[depth].clone()) - } - - /// Flush the oldest corpus mutated more than configured max mutations unless they are - /// favored. - fn evict_oldest_corpus(&mut self) -> eyre::Result<()> { - if self.in_memory_corpus.len() > self.config.corpus_min_size.max(1) - && let Some(index) = self.in_memory_corpus.iter().position(|corpus| { - corpus.total_mutations > self.config.corpus_min_mutations && !corpus.is_favored - }) - { - let corpus = self.in_memory_corpus.get(index).unwrap(); - - let uuid = corpus.uuid; - debug!(target: "corpus", "evict corpus {uuid} in worker {}", self.id); - - // Flush to disk the seed metadata at the time of eviction. - let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); - foundry_common::fs::write_json_file( - self.worker_dir - .clone() - .unwrap() - .join(format!("{WORKER}{}", self.id)) // Worker dir - .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) - .as_path(), - &corpus, - )?; - - // Remove corpus from memory. - self.in_memory_corpus.remove(index); - - // Adjust the tracked indices - self.new_entry_indices.retain_mut(|i| { - if *i > index { - *i -= 1; // Shift indices down - true // Keep this index - } else { - *i != index // Remove if it's the deleted index, keep otherwise - } - }); - } - Ok(()) - } - - /// Mutates calldata of provided tx by abi decoding current values and randomly selecting the - /// inputs to change. - fn abi_mutate( - &self, - tx: &mut BasicTxDetails, - function: &Function, - test_runner: &mut TestRunner, - fuzz_state: &EvmFuzzState, - ) -> eyre::Result<()> { - // let rng = test_runner.rng(); - let mut arg_mutation_rounds = - test_runner.rng().random_range(0..=function.inputs.len()).max(1); - let round_arg_idx: Vec = if function.inputs.len() <= 1 { - vec![0] - } else { - (0..arg_mutation_rounds) - .map(|_| test_runner.rng().random_range(0..function.inputs.len())) - .collect() - }; - let mut prev_inputs = function - .abi_decode_input(&tx.call_details.calldata[4..]) - .map_err(|err| eyre!("failed to load previous inputs: {err}"))?; - - while arg_mutation_rounds > 0 { - let idx = round_arg_idx[arg_mutation_rounds - 1]; - prev_inputs[idx] = mutate_param_value( - &function - .inputs - .get(idx) - .expect("Could not get input to mutate") - .selector_type() - .parse()?, - prev_inputs[idx].clone(), - test_runner, - fuzz_state, - ); - arg_mutation_rounds -= 1; - } - - tx.call_details.calldata = - function.abi_encode_input(&prev_inputs).map_err(|e| eyre!(e.to_string()))?.into(); - Ok(()) - } - - // Sync Methods - - /// Exports the new corpus entries to the master workers (id = 0) sync dir. - fn export(&self) -> eyre::Result<()> { - // Early return if no new entries or corpus dir not configured - if self.new_entry_indices.is_empty() || self.worker_dir.is_none() { - return Ok(()); - } - - let worker_dir = self.worker_dir.as_ref().unwrap(); - - // Master doesn't export (it only receives from others) - if self.id == 0 { - return Ok(()); - } - - let Some(master_sync_dir) = self - .config - .corpus_dir - .as_ref() - .map(|dir| dir.join(format!("{WORKER}0")).join(SYNC_DIR)) - else { - return Ok(()); - }; - - let mut exported = 0; - let corpus_dir = worker_dir.join(CORPUS_DIR); - - for &index in &self.new_entry_indices { - if let Some(entry) = self.in_memory_corpus.get(index) { - let ext = self - .config - .corpus_gzip - .then_some(format!("{JSON_EXTENSION}.gz")) - .unwrap_or(JSON_EXTENSION.to_string()); - let file_name = format!("{}-{}{ext}", entry.uuid, entry.timestamp); - let file_path = corpus_dir.join(&file_name); - let sync_path = master_sync_dir.join(&file_name); - - let Ok(_) = foundry_common::fs::copy(file_path, sync_path) else { - debug!(target: "corpus", "failed to export corpus {} from worker {}", entry.uuid, self.id); - continue; - }; - - exported += 1; - } - } - - trace!(target: "corpus", "exported {exported} new corpus entries from worker {}", self.id); - - Ok(()) - } - - /// Imports the new corpus entries tx sequence which will be used to replay and update history - /// map. - fn import(&self) -> eyre::Result>> { - let Some(worker_dir) = &self.worker_dir else { - return Ok(vec![]); - }; - - let sync_dir = worker_dir.join(SYNC_DIR); - if !sync_dir.is_dir() { - return Ok(vec![]); - } - - let mut imports = vec![]; - for entry in std::fs::read_dir(sync_dir)? { - let Ok(entry) = entry else { - continue; - }; - - // Get the uuid and timestamp from the filename - let timestamp = if let Some(name) = entry.file_name().to_str() - && let Ok((_, timestamp)) = parse_corpus_filename(name) - { - timestamp - } else { - continue; - }; - - if timestamp <= self.last_sync_timestamp { - // TODO: Delete synced file - continue; - } - - let corpus = if self.config.corpus_gzip { - foundry_common::fs::read_json_gzip_file::>(&entry.path())? - } else { - foundry_common::fs::read_json_file::>(&entry.path())? - }; - - imports.push(corpus); - } - - Ok(imports) - } - - /// Syncs and calibrates the in memory corpus and updates the history_map if new coverage is - /// found from the corpus findings of other workers. - fn calibrate( - &mut self, - executor: &Executor, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result<()> { - let Some(worker_dir) = &self.worker_dir else { - return Ok(()); - }; - - // Helper to check if tx can be replayed - let can_replay_tx = |tx: &BasicTxDetails| -> bool { - fuzzed_contracts.is_some_and(|contracts| contracts.targets.lock().can_replay(tx)) - || fuzzed_function.is_some_and(|function| { - tx.call_details - .calldata - .get(..4) - .is_some_and(|selector| function.selector() == selector) - }) - }; - - let sync_dir = worker_dir.join(SYNC_DIR); - let corpus_dir = worker_dir.join(CORPUS_DIR); - - let mut executor = executor.clone(); - for tx_seq in self.import()? { - if !tx_seq.is_empty() { - let mut new_coverage_on_sync = false; - for tx in &tx_seq { - if can_replay_tx(tx) { - let mut call_result = executor.call_raw( - tx.sender, - tx.call_details.target, - tx.call_details.calldata.clone(), - U256::ZERO, - )?; - - // Check if this provides new coverage - let (new_coverage, is_edge) = - call_result.merge_edge_coverage(&mut self.history_map); - - if new_coverage { - self.metrics.update_seen(is_edge); - new_coverage_on_sync = true; - } - - // Commit only for stateful tests - if fuzzed_contracts.is_some() { - executor.commit(&mut call_result); - } - - trace!( - target: "corpus", - %new_coverage, - "replayed tx for syncing worker {}: {:?}", - self.id, &tx - ); - } - } - - if new_coverage_on_sync { - let corpus_entry = CorpusEntry::from_tx_seq(&tx_seq); - let ext = self - .config - .corpus_gzip - .then_some(format!("{JSON_EXTENSION}.gz")) - .unwrap_or(JSON_EXTENSION.to_string()); - - let file_name = - format!("{}-{}{ext}", corpus_entry.uuid, corpus_entry.timestamp); - - // Move file from sync/ to corpus/ directory - let sync_path = sync_dir.join(&file_name); - let corpus_path = corpus_dir.join(&file_name); - - let Ok(_) = std::fs::rename(&sync_path, &corpus_path) else { - debug!(target: "corpus", "failed to move synced corpus {} from {sync_path:?} to {corpus_path:?} dir in worker {}", corpus_entry.uuid, self.id); - continue; - }; - - trace!( - target: "corpus", - "moved synced corpus {} to corpus dir in worker {}", - corpus_entry.uuid, self.id - ); - - self.in_memory_corpus.push(corpus_entry); - } - } - } - - let last_sync = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); - trace!(target: "corpus", "sync complete for worker {}, updating last sync time to {}", - self.id, - last_sync - ); - self.last_sync_timestamp = last_sync; - - Ok(()) - } - - /// To be run by the master worker (id = 0) to distribute the global corpus to sync/ directories - /// of other workers. - fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { - if self.id != 0 || self.worker_dir.is_none() { - return Ok(()); - } - - let worker_dir = self.worker_dir.as_ref().unwrap(); - let master_corpus_dir = worker_dir.join(CORPUS_DIR); - - for target_worker in 1..num_workers { - let target_dir = self - .config - .corpus_dir - .as_ref() - .unwrap() - .join(format!("{WORKER}{target_worker}")) - .join(SYNC_DIR); - - if !target_dir.is_dir() { - foundry_common::fs::create_dir_all(&target_dir)?; - } - - for entry in std::fs::read_dir(&master_corpus_dir)? { - let Ok(entry) = entry else { - continue; - }; - - let path = entry.path(); - if path.is_file() - && let Some(name) = path.file_name().and_then(|s| s.to_str()) - && !name.contains(METADATA_SUFFIX) - { - let sync_path = target_dir.join(name); - - let Ok((_, timestamp)) = parse_corpus_filename(name) else { - continue; - }; - - if timestamp > self.last_sync_timestamp { - let Ok(_) = foundry_common::fs::copy(&path, &sync_path) else { - debug!(target: "corpus", "failed to distribute corpus {} from worker {} to {target_dir:?}", name, self.id); - continue; - }; - - trace!(target: "corpus", "distributed corpus {} from worker {} to {target_dir:?}", name, self.id); - } - } - } - } - - Ok(()) - } - - /// Syncs the workers in_memory_corpus and history_map with the findings from other workers. - pub fn sync( - &mut self, - num_workers: usize, - executor: &Executor, - fuzzed_function: Option<&Function>, - fuzzed_contracts: Option<&FuzzRunIdentifiedContracts>, - ) -> eyre::Result<()> { - if self.id == 0 { - // Master worker - self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; - - self.distribute(num_workers)?; - - self.new_entry_indices.clear(); - - trace!(target: "corpus", "master worker synced"); - - return Ok(()); - } - - self.export()?; - - self.calibrate(executor, fuzzed_function, fuzzed_contracts)?; - - self.new_entry_indices.clear(); - - trace!(target: "corpus", "synced worker {}", self.id); - - Ok(()) - } -} - -/// Parses the corpus filename and returns the uuid and timestamp associated with it. -fn parse_corpus_filename(name: &str) -> eyre::Result<(Uuid, u64)> { - let name = name.trim_end_matches(".gz").trim_end_matches(JSON_EXTENSION); - - let parts = name.rsplitn(2, "-").collect::>(); - let uuid = Uuid::parse_str(parts[0])?; - let timestamp = parts[1].parse()?; - - Ok((uuid, timestamp)) -} From fced74c66961708bc2a8cc294bcd473bd98ed8f3 Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Fri, 3 Oct 2025 19:51:26 +0530 Subject: [PATCH 15/16] fix tests --- crates/evm/evm/src/executors/corpus.rs | 28 ++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 6724e1ffc76f9..03de74df2c8d9 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -623,7 +623,6 @@ impl WorkerCorpus { self.worker_dir .clone() .unwrap() - .join(format!("{WORKER}{}", self.id)) // Worker dir .join(format!("{uuid}-{eviction_time}-{METADATA_SUFFIX}")) .as_path(), &corpus, @@ -1009,7 +1008,7 @@ mod tests { dir } - fn new_manager_with_single_corpus() -> (CorpusManager, Uuid) { + fn new_manager_with_single_corpus() -> (WorkerCorpus, Uuid) { let tx_gen = Just(basic_tx()).boxed(); let config = FuzzCorpusConfig { corpus_dir: Some(temp_corpus_dir()), @@ -1023,15 +1022,24 @@ mod tests { let corpus = CorpusEntry::from_tx_seq(&tx_seq); let seed_uuid = corpus.uuid; - let manager = CorpusManager { + // Create corpus root dir and worker subdirectory + let corpus_root = config.corpus_dir.clone().unwrap(); + let worker_subdir = corpus_root.join("worker0"); + let _ = fs::create_dir_all(&worker_subdir); + + let manager = WorkerCorpus { + id: 0, tx_generator: tx_gen, mutation_generator: Just(MutationType::Repeat).boxed(), - config, + config: config.into(), in_memory_corpus: vec![corpus], current_mutated: Some(seed_uuid), failed_replays: 0, history_map: vec![0u8; COVERAGE_MAP_SIZE], metrics: CorpusMetrics::default(), + new_entry_indices: Default::default(), + last_sync_timestamp: 0, + worker_dir: Some(corpus_root), }; (manager, seed_uuid) @@ -1121,15 +1129,23 @@ mod tests { non_favored.is_favored = false; let non_favored_uuid = non_favored.uuid; - let mut manager = CorpusManager { + let corpus_root = temp_corpus_dir(); + let worker_subdir = corpus_root.join("worker0"); + fs::create_dir_all(&worker_subdir).unwrap(); + + let mut manager = WorkerCorpus { + id: 0, tx_generator: tx_gen, mutation_generator: Just(MutationType::Repeat).boxed(), - config, + config: config.into(), in_memory_corpus: vec![favored, non_favored], current_mutated: None, failed_replays: 0, history_map: vec![0u8; COVERAGE_MAP_SIZE], metrics: CorpusMetrics::default(), + new_entry_indices: Default::default(), + last_sync_timestamp: 0, + worker_dir: Some(corpus_root), }; // First eviction should remove the non-favored one From 62a4b57a36dc481321004c506a2fd2e06e900a8a Mon Sep 17 00:00:00 2001 From: Yash Atreya <44857776+yash-atreya@users.noreply.github.com> Date: Mon, 6 Oct 2025 17:32:11 +0530 Subject: [PATCH 16/16] tracing span --- crates/evm/evm/src/executors/corpus.rs | 46 ++++++++++++++------------ 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/crates/evm/evm/src/executors/corpus.rs b/crates/evm/evm/src/executors/corpus.rs index 03de74df2c8d9..0b49d6904e63c 100644 --- a/crates/evm/evm/src/executors/corpus.rs +++ b/crates/evm/evm/src/executors/corpus.rs @@ -323,6 +323,7 @@ impl WorkerCorpus { /// Updates stats for the given call sequence, if new coverage produced. /// Persists the call sequence (if corpus directory is configured and new coverage) and updates /// in-memory corpus. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] pub fn process_inputs(&mut self, inputs: &[BasicTxDetails], new_coverage: bool) { let Some(worker_corpus) = &self.worker_dir else { return; @@ -344,8 +345,8 @@ impl WorkerCorpus { trace!( target: "corpus", - "updated worker {} corpus {}, total mutations: {}, new finds: {}", - self.id, corpus.uuid, corpus.total_mutations, corpus.new_finds_produced + "updated corpus {}, total mutations: {}, new finds: {}", + corpus.uuid, corpus.total_mutations, corpus.new_finds_produced ); } @@ -376,12 +377,12 @@ impl WorkerCorpus { }; if let Err(err) = write_result { - debug!(target: "corpus", %err, "Failed to record call sequence {:?} in worker {}", &corpus.tx_seq, self.id); + debug!(target: "corpus", %err, "Failed to record call sequence {:?}", &corpus.tx_seq); } else { trace!( target: "corpus", - "persisted {} inputs for new coverage in worker {} for {corpus_uuid} corpus", - self.id, &corpus.tx_seq.len() + "persisted {} inputs for new coverage for {corpus_uuid} corpus", + &corpus.tx_seq.len() ); } @@ -410,6 +411,7 @@ impl WorkerCorpus { /// Generates new call sequence from in memory corpus. Evicts oldest corpus mutated more than /// configured max mutations value. Used by invariant test campaigns. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] pub fn new_inputs( &mut self, test_runner: &mut TestRunner, @@ -538,6 +540,7 @@ impl WorkerCorpus { /// Generates a new input from the shared in memory corpus. Evicts oldest corpus mutated more /// than configured max mutations value. Used by fuzz (stateless) test campaigns. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] pub fn new_input( &mut self, test_runner: &mut TestRunner, @@ -615,7 +618,7 @@ impl WorkerCorpus { let corpus = self.in_memory_corpus.get(index).unwrap(); let uuid = corpus.uuid; - debug!(target: "corpus", "evict corpus {uuid} in worker {}", self.id); + debug!(target: "corpus", "evict corpus {uuid}"); // Flush to disk the seed metadata at the time of eviction. let eviction_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); @@ -691,6 +694,7 @@ impl WorkerCorpus { // Sync Methods /// Exports the new corpus entries to the master workers (id = 0) sync dir. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] fn export(&self) -> eyre::Result<()> { // Early return if no new entries or corpus dir not configured if self.new_entry_indices.is_empty() || self.worker_dir.is_none() { @@ -728,7 +732,7 @@ impl WorkerCorpus { let sync_path = master_sync_dir.join(&file_name); let Ok(_) = foundry_common::fs::copy(file_path, sync_path) else { - debug!(target: "corpus", "failed to export corpus {} from worker {}", entry.uuid, self.id); + debug!(target: "corpus", "failed to export corpus {}", entry.uuid); continue; }; @@ -736,7 +740,7 @@ impl WorkerCorpus { } } - trace!(target: "corpus", "exported {exported} new corpus entries from worker {}", self.id); + trace!(target: "corpus", "exported {exported} new corpus entries"); Ok(()) } @@ -787,6 +791,7 @@ impl WorkerCorpus { /// Syncs and calibrates the in memory corpus and updates the history_map if new coverage is /// found from the corpus findings of other workers. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] fn calibrate( &mut self, executor: &Executor, @@ -841,8 +846,8 @@ impl WorkerCorpus { trace!( target: "corpus", %new_coverage, - "replayed tx for syncing worker {}: {:?}", - self.id, &tx + "replayed tx for syncing: {:?}", + &tx ); } } @@ -863,14 +868,14 @@ impl WorkerCorpus { let corpus_path = corpus_dir.join(&file_name); let Ok(_) = std::fs::rename(&sync_path, &corpus_path) else { - debug!(target: "corpus", "failed to move synced corpus {} from {sync_path:?} to {corpus_path:?} dir in worker {}", corpus_entry.uuid, self.id); + debug!(target: "corpus", "failed to move synced corpus {} from {sync_path:?} to {corpus_path:?} dir", corpus_entry.uuid); continue; }; trace!( target: "corpus", - "moved synced corpus {} to corpus dir in worker {}", - corpus_entry.uuid, self.id + "moved synced corpus {} to corpus dir", + corpus_entry.uuid ); self.in_memory_corpus.push(corpus_entry); @@ -879,10 +884,7 @@ impl WorkerCorpus { } let last_sync = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); - trace!(target: "corpus", "sync complete for worker {}, updating last sync time to {}", - self.id, - last_sync - ); + trace!(target: "corpus", "sync complete, updating last sync time to {}", last_sync); self.last_sync_timestamp = last_sync; Ok(()) @@ -890,6 +892,7 @@ impl WorkerCorpus { /// To be run by the master worker (id = 0) to distribute the global corpus to sync/ directories /// of other workers. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] fn distribute(&mut self, num_workers: usize) -> eyre::Result<()> { if self.id != 0 || self.worker_dir.is_none() { return Ok(()); @@ -929,11 +932,11 @@ impl WorkerCorpus { if timestamp > self.last_sync_timestamp { let Ok(_) = foundry_common::fs::copy(&path, &sync_path) else { - debug!(target: "corpus", "failed to distribute corpus {} from worker {} to {target_dir:?}", name, self.id); + debug!(target: "corpus", "failed to distribute corpus {} to {target_dir:?}", name); continue; }; - trace!(target: "corpus", "distributed corpus {} from worker {} to {target_dir:?}", name, self.id); + trace!(target: "corpus", "distributed corpus {} to {target_dir:?}", name); } } } @@ -943,6 +946,7 @@ impl WorkerCorpus { } /// Syncs the workers in_memory_corpus and history_map with the findings from other workers. + #[tracing::instrument(skip_all, fields(worker_id = self.id))] pub fn sync( &mut self, num_workers: usize, @@ -958,7 +962,7 @@ impl WorkerCorpus { self.new_entry_indices.clear(); - trace!(target: "corpus", "master worker synced"); + trace!(target: "corpus", "master synced"); return Ok(()); } @@ -969,7 +973,7 @@ impl WorkerCorpus { self.new_entry_indices.clear(); - trace!(target: "corpus", "synced worker {}", self.id); + trace!(target: "corpus", "synced"); Ok(()) }