From 08fa37bc2849bcff7a40c0e41ea8910b818f00d4 Mon Sep 17 00:00:00 2001 From: qdzzyb2015 Date: Fri, 13 Feb 2026 09:59:30 +0800 Subject: [PATCH 01/16] Optimize event selector execution and branch probing --- src/events/mod.rs | 707 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 707 insertions(+) create mode 100644 src/events/mod.rs diff --git a/src/events/mod.rs b/src/events/mod.rs new file mode 100644 index 0000000..109431f --- /dev/null +++ b/src/events/mod.rs @@ -0,0 +1,707 @@ +use std::{cmp::Ordering, collections::hash_map::DefaultHasher, hash::Hasher}; + +use crate::collections::{HashMap, HashSet}; +use crate::evm::{op, vm::Vm}; +use crate::utils::execute_until_function_start; + +mod calldata; +use calldata::CallDataImpl; + +/// Event selector is a 32-byte keccak256 hash of the event signature +pub type EventSelector = [u8; 32]; + +#[derive(Clone, Debug, PartialEq, Eq)] +enum Label {} + +const PROBE_STEP_LIMIT: u16 = 12; +const PROBE_GAS_LIMIT: u32 = 2_500; +const STACK_FINGERPRINT_ELEMS: usize = 10; +const MEMORY_FINGERPRINT_WRITES: usize = 6; +const MEMORY_FINGERPRINT_BYTES: usize = 8; +const MAX_PENDING_STATES: usize = 4_096; +const MAX_VISITED_STATES: usize = 50_000; +const EXEC_ROUNDS: [(u32, u8, u32); 3] = [ + // (gas_limit, max_fork_depth, max_steps_per_state) + (80_000, 2, 2_000), + (150_000, 4, 5_000), + (260_000, 5, 10_000), +]; + +/// Checks if a 32-byte value looks like a keccak256 hash (event selector). +fn is_plausible_event_hash(val: &[u8; 32]) -> bool { + if val == &[0u8; 32] { + return false; + } + if val[..6] == [0u8; 6] { + return false; + } + if val[26..] == [0u8; 6] { + return false; + } + let mut zero_run = 0u8; + let mut ff_run = 0u8; + for &b in val { + if b == 0 { + zero_run += 1; + if zero_run >= 4 { + return false; + } + } else { + zero_run = 0; + } + if b == 0xff { + ff_run += 1; + if ff_run >= 4 { + return false; + } + } else { + ff_run = 0; + } + } + true +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct StateKey { + pc: usize, + stack_len: usize, + memory_writes: usize, + stack_hash: u64, + memory_hash: u64, +} + +fn state_key(vm: &Vm) -> StateKey { + let mut stack_hasher = DefaultHasher::new(); + let stack_start = vm.stack.data.len().saturating_sub(STACK_FINGERPRINT_ELEMS); + for el in &vm.stack.data[stack_start..] { + stack_hasher.write(&el.data); + } + + let mut memory_hasher = DefaultHasher::new(); + for (offset, mem) in vm.memory.data.iter().rev().take(MEMORY_FINGERPRINT_WRITES) { + memory_hasher.write_u32(*offset); + memory_hasher.write_usize(mem.data.len()); + let n = std::cmp::min(MEMORY_FINGERPRINT_BYTES, mem.data.len()); + memory_hasher.write(&mem.data[..n]); + if mem.data.len() > n { + memory_hasher.write(&mem.data[mem.data.len() - n..]); + } + } + + StateKey { + pc: vm.pc, + stack_len: vm.stack.data.len(), + memory_writes: vm.memory.data.len(), + stack_hash: stack_hasher.finish(), + memory_hash: memory_hasher.finish(), + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum ProbeOutcome { + DeadEnd, + Terminated, + Alive, + HitsLog, +} + +impl ProbeOutcome { + fn score(self) -> u8 { + match self { + ProbeOutcome::DeadEnd => 0, + ProbeOutcome::Terminated => 1, + ProbeOutcome::Alive => 2, + ProbeOutcome::HitsLog => 3, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum JumpDecision { + KeepCurrent, + SwitchOther, + ForkBoth, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct JumpClassify { + decision: JumpDecision, + needs_more: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +struct ProbeCacheKey { + from_pc: usize, + to_pc: usize, + stack_top: [u8; 32], + stack_len: usize, +} + +fn probe_cache_key(vm: &Vm, to_pc: usize) -> ProbeCacheKey { + ProbeCacheKey { + from_pc: vm.pc, + to_pc, + stack_top: vm.stack.peek().map_or([0u8; 32], |v| v.data), + stack_len: vm.stack.data.len(), + } +} + +fn is_static_dead_end(code: &[u8], pc: usize) -> bool { + if pc >= code.len() { + return true; + } + + let mut cur = pc; + for _ in 0..10 { + if cur >= code.len() { + return false; + } + + let op = code[cur]; + match op { + op::REVERT | op::INVALID => return true, + + // Common prelude before revert branch bodies. + op::JUMPDEST + | op::PUSH0..=op::PUSH32 + | op::DUP1..=op::DUP16 + | op::SWAP1..=op::SWAP16 + | op::POP => { + cur += op::info(op).size; + } + + _ => return false, + } + } + + false +} + +fn probe_branch_cached( + vm: &Vm, + start_pc: usize, + step_limit: u16, + gas_limit: u32, + cache: &mut HashMap, +) -> ProbeOutcome { + let key = probe_cache_key(vm, start_pc); + if let Some(outcome) = cache.get(&key) { + return *outcome; + } + + let mut branch = vm.fork(); + branch.pc = start_pc; + let outcome = probe_branch(branch, start_pc, step_limit, gas_limit); + cache.insert(key, outcome); + outcome +} + +fn probe_branch( + mut vm: Vm, + start_pc: usize, + step_limit: u16, + gas_limit: u32, +) -> ProbeOutcome { + if start_pc >= vm.code.len() { + return ProbeOutcome::DeadEnd; + } + + vm.pc = start_pc; + let mut gas_used = 0u32; + + for _ in 0..step_limit { + if vm.stopped { + return ProbeOutcome::Terminated; + } + + let ret = match vm.step() { + Ok(v) => v, + Err(_) => return ProbeOutcome::DeadEnd, + }; + + gas_used = gas_used.saturating_add(ret.gas_used); + if gas_used > gas_limit { + return ProbeOutcome::Alive; + } + + match ret.op { + op::LOG1..=op::LOG4 => return ProbeOutcome::HitsLog, + op::REVERT | op::INVALID => return ProbeOutcome::DeadEnd, + op::STOP | op::RETURN | op::SELFDESTRUCT => return ProbeOutcome::Terminated, + _ => {} + } + } + + if vm.stopped { + ProbeOutcome::Terminated + } else { + ProbeOutcome::Alive + } +} + +fn classify_jump( + vm: &Vm, + other_pc: usize, + can_fork: bool, + probe_steps: u16, + probe_gas: u32, + probe_cache: &mut HashMap, +) -> JumpClassify { + if other_pc == vm.pc { + return JumpClassify { + decision: JumpDecision::KeepCurrent, + needs_more: false, + }; + } + + // Solidity require() revert branches are often statically obvious: + // JUMPDEST -> PUSH* ... -> REVERT/INVALID. + let other_static_dead = is_static_dead_end(vm.code, other_pc); + if other_static_dead { + return JumpClassify { + decision: JumpDecision::KeepCurrent, + needs_more: false, + }; + } + + let current_static_dead = is_static_dead_end(vm.code, vm.pc); + if current_static_dead { + return JumpClassify { + decision: JumpDecision::SwitchOther, + needs_more: false, + }; + } + + if can_fork { + return JumpClassify { + decision: JumpDecision::ForkBoth, + needs_more: false, + }; + } + + let other = probe_branch_cached(vm, other_pc, probe_steps, probe_gas, probe_cache); + if other == ProbeOutcome::DeadEnd { + return JumpClassify { + decision: JumpDecision::KeepCurrent, + needs_more: false, + }; + } + + let current = probe_branch_cached(vm, vm.pc, probe_steps, probe_gas, probe_cache); + if current == ProbeOutcome::DeadEnd { + return JumpClassify { + decision: JumpDecision::SwitchOther, + needs_more: false, + }; + } + + match other.score().cmp(¤t.score()) { + Ordering::Greater => JumpClassify { + decision: JumpDecision::SwitchOther, + needs_more: false, + }, + Ordering::Less => JumpClassify { + decision: JumpDecision::KeepCurrent, + needs_more: false, + }, + Ordering::Equal => JumpClassify { + decision: JumpDecision::KeepCurrent, + // Only keep escalating when both branches look equally open-ended. + needs_more: current == ProbeOutcome::Alive, + }, + } +} + +fn collect_event( + events: &mut Vec, + seen: &mut HashSet, + topic0: EventSelector, +) { + if is_plausible_event_hash(&topic0) && seen.insert(topic0) { + events.push(topic0); + } +} + +fn execute_paths<'a>( + start_vm: Vm<'a, Label, CallDataImpl>, + initial_gas: u32, + events: &mut Vec, + seen: &mut HashSet, + gas_limit: u32, + max_depth: u8, + max_steps: u32, +) -> bool { + if initial_gas > gas_limit { + return true; + } + + let mut needs_more = false; + let mut visited: HashSet = HashSet::default(); + let mut probe_cache: HashMap = HashMap::default(); + let mut queue: Vec<(Vm, u32, u8, u32)> = Vec::new(); + queue.push((start_vm, initial_gas, 0, 0)); + + while let Some((mut vm, mut gas_used, depth, mut steps)) = queue.pop() { + while !vm.stopped { + if gas_used >= gas_limit || steps >= max_steps { + needs_more = true; + break; + } + + let step_pc = vm.pc; + let ret = match vm.step() { + Ok(v) => v, + Err(_) => break, + }; + + gas_used = gas_used.saturating_add(ret.gas_used); + steps += 1; + + if gas_used > gas_limit { + break; + } + + match ret.op { + op::LOG1..=op::LOG4 => collect_event(events, seen, ret.args[0].data), + op::JUMPI => { + if visited.len() >= MAX_VISITED_STATES { + needs_more = true; + break; + } + if !visited.insert(state_key(&vm)) { + break; + } + + let cond_zero = ret.args[1].data == [0u8; 32]; + let other_pc = if cond_zero { + usize::try_from(&ret.args[0]).ok() + } else { + step_pc.checked_add(1) + }; + + let Some(other_pc) = other_pc else { + continue; + }; + + let other_is_valid = if cond_zero { + other_pc < vm.code.len() && vm.code[other_pc] == op::JUMPDEST + } else { + other_pc < vm.code.len() + }; + + if !other_is_valid { + continue; + } + + let probe_gas = gas_limit.saturating_sub(gas_used).min(PROBE_GAS_LIMIT); + let can_fork = depth < max_depth && queue.len() < MAX_PENDING_STATES; + let jump = classify_jump( + &vm, + other_pc, + can_fork, + PROBE_STEP_LIMIT, + probe_gas, + &mut probe_cache, + ); + if jump.needs_more { + needs_more = true; + } + + match jump.decision { + JumpDecision::KeepCurrent => {} + JumpDecision::SwitchOther => vm.pc = other_pc, + JumpDecision::ForkBoth => { + if can_fork { + let mut forked = vm.fork(); + forked.pc = other_pc; + queue.push((forked, gas_used, depth + 1, steps)); + } + } + } + } + op::JUMP => { + if visited.len() >= MAX_VISITED_STATES { + needs_more = true; + break; + } + if !visited.insert(state_key(&vm)) { + break; + } + } + _ => {} + } + } + } + needs_more +} + +fn execute_function( + code: &[u8], + calldata: &CallDataImpl, + events: &mut Vec, + seen: &mut HashSet, + gas_limit: u32, + max_depth: u8, + max_steps: u32, +) -> bool { + let mut vm = Vm::new(code, calldata); + let Some(initial_gas) = execute_until_function_start(&mut vm, gas_limit) else { + return true; + }; + + execute_paths( + vm, + initial_gas, + events, + seen, + gas_limit, + max_depth, + max_steps, + ) +} + +fn execute_function_from_offset( + code: &[u8], + calldata: &CallDataImpl, + offset: usize, + events: &mut Vec, + seen: &mut HashSet, + gas_limit: u32, + max_depth: u8, + max_steps: u32, +) -> bool { + if offset < code.len() && code[offset] == op::JUMPDEST { + let mut vm = Vm::new(code, calldata); + vm.pc = offset; + execute_paths(vm, 0, events, seen, gas_limit, max_depth, max_steps) + } else { + execute_function( + code, calldata, events, seen, gas_limit, max_depth, max_steps, + ) + } +} + +fn execute_from_entry( + code: &[u8], + calldata: &CallDataImpl, + events: &mut Vec, + seen: &mut HashSet, + gas_limit: u32, + max_depth: u8, + max_steps: u32, +) -> bool { + let vm = Vm::new(code, calldata); + execute_paths(vm, 0, events, seen, gas_limit, max_depth, max_steps) +} + +/// Extracts all event selectors from contract bytecode. +pub fn contract_events(code: &[u8]) -> Vec { + if code.is_empty() { + return Vec::new(); + } + + let (selectors, _) = crate::selectors::function_selectors(code, 0); + let mut events = Vec::::new(); + let mut seen = HashSet::::default(); + let mut stable_rounds = 0u8; + + if selectors.is_empty() { + let calldata = CallDataImpl { selector: [0; 4] }; + let mut pending = true; + for &(gas_limit, max_depth, max_steps) in &EXEC_ROUNDS { + if !pending { + break; + } + let before = seen.len(); + pending = execute_from_entry( + code, + &calldata, + &mut events, + &mut seen, + gas_limit, + max_depth, + max_steps, + ); + if seen.len() == before { + stable_rounds += 1; + if stable_rounds >= 2 { + break; + } + } else { + stable_rounds = 0; + } + } + } else { + let mut pending: Vec<([u8; 4], usize)> = selectors + .iter() + .map(|(selector, offset)| (*selector, *offset)) + .collect(); + + for &(gas_limit, max_depth, max_steps) in &EXEC_ROUNDS { + if pending.is_empty() { + break; + } + let before = seen.len(); + let mut next_pending = Vec::with_capacity(pending.len()); + + for (selector, offset) in pending.into_iter() { + let calldata = CallDataImpl { selector }; + let needs_more = execute_function_from_offset( + code, + &calldata, + offset, + &mut events, + &mut seen, + gas_limit, + max_depth, + max_steps, + ); + if needs_more { + next_pending.push((selector, offset)); + } + } + pending = next_pending; + + if seen.len() == before { + stable_rounds += 1; + if stable_rounds >= 2 { + break; + } + } else { + stable_rounds = 0; + } + } + } + + events.sort_unstable(); + events +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeSet; + + use super::*; + use crate::evm::op; + + fn append_log1(code: &mut Vec, selector: [u8; 32]) { + code.push(op::PUSH32); + code.extend_from_slice(&selector); + code.extend_from_slice(&[op::PUSH1, 0x00, op::PUSH1, 0x00, op::LOG1]); + } + + fn append_single_selector_dispatch(code: &mut Vec, selector: [u8; 4]) -> usize { + code.extend_from_slice(&[ + op::PUSH1, + 0x00, + op::CALLDATALOAD, + op::PUSH1, + 0xE0, + op::SHR, + op::PUSH4, + ]); + code.extend_from_slice(&selector); + code.push(op::EQ); + code.extend_from_slice(&[op::PUSH1, 0x00]); + let entry_patch = code.len() - 1; + code.push(op::JUMPI); + code.push(op::STOP); + entry_patch + } + + #[test] + fn test_simple_log1() { + let selector = [0xab; 32]; + let mut code = Vec::new(); + append_log1(&mut code, selector); + code.push(op::STOP); + + let events = contract_events(&code); + assert_eq!(events, vec![selector]); + } + + #[test] + fn test_require_guarded_event() { + let function_selector = [0xaa, 0xbb, 0xcc, 0xdd]; + let event_selector = [0x42; 32]; + + let mut code = Vec::new(); + let entry_patch = append_single_selector_dispatch(&mut code, function_selector); + + let function_entry = code.len(); + code[entry_patch] = u8::try_from(function_entry).unwrap(); + code.push(op::JUMPDEST); + + // Emulate a require guard: + // if (!cond) revert(); else emit LOG1(topic0) + code.extend_from_slice(&[op::PUSH1, 0x00]); // cond = 0 + code.extend_from_slice(&[op::PUSH1, 0x00]); // destination (patched below) + let emit_patch = code.len() - 1; + code.extend_from_slice(&[op::JUMPI, op::PUSH1, 0x00, op::PUSH1, 0x00, op::REVERT]); + let emit_pc = code.len(); + code[emit_patch] = u8::try_from(emit_pc).unwrap(); + + code.push(op::JUMPDEST); + append_log1(&mut code, event_selector); + code.push(op::STOP); + + let events = contract_events(&code); + assert_eq!(events, vec![event_selector]); + } + + #[test] + fn test_forks_when_both_branches_are_alive() { + let function_selector = [0xaa, 0xbb, 0xcc, 0xdd]; + let event_true = [0x11; 32]; + let event_false = [0x22; 32]; + + let mut code = Vec::new(); + let entry_patch = append_single_selector_dispatch(&mut code, function_selector); + + let function_entry = code.len(); + code[entry_patch] = u8::try_from(function_entry).unwrap(); + code.push(op::JUMPDEST); + + // Always-false condition. VM takes fallthrough branch, but both branches emit, + // so branch classifier should fork and collect both events. + code.extend_from_slice(&[op::PUSH1, 0x00]); // cond = 0 + code.extend_from_slice(&[op::PUSH1, 0x00]); // true destination (patched below) + let true_patch = code.len() - 1; + code.push(op::JUMPI); + + code.push(op::JUMPDEST); + append_log1(&mut code, event_false); + code.push(op::STOP); + + let true_pc = code.len(); + code[true_patch] = u8::try_from(true_pc).unwrap(); + + code.push(op::JUMPDEST); + append_log1(&mut code, event_true); + code.push(op::STOP); + + let events = contract_events(&code); + let found: BTreeSet<_> = events.into_iter().collect(); + let expected: BTreeSet<_> = [event_true, event_false].into_iter().collect(); + assert_eq!(found, expected); + } + + #[test] + fn test_no_events() { + let code = alloy_primitives::hex::decode("6080604052348015600e575f80fd5b50").unwrap(); + let events = contract_events(&code); + assert!(events.is_empty()); + } + + #[test] + fn test_push32_no_log() { + let mut code = Vec::new(); + code.push(op::PUSH32); + code.extend_from_slice(&[0xab; 32]); + code.push(op::POP); + code.push(op::STOP); + + let events = contract_events(&code); + assert!(events.is_empty()); + } +} From 6817d7324077a737604a610f230f3b1f1cd9f58a Mon Sep 17 00:00:00 2001 From: qdzzyb2015 Date: Fri, 13 Feb 2026 10:05:01 +0800 Subject: [PATCH 02/16] Merge selector execution paths for shared-state event extraction --- src/events/mod.rs | 231 ++++++++++++++++++++++++++++++---------------- 1 file changed, 150 insertions(+), 81 deletions(-) diff --git a/src/events/mod.rs b/src/events/mod.rs index 109431f..eb3fce9 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -63,6 +63,7 @@ fn is_plausible_event_hash(val: &[u8; 32]) -> bool { #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] struct StateKey { + context: usize, pc: usize, stack_len: usize, memory_writes: usize, @@ -70,7 +71,7 @@ struct StateKey { memory_hash: u64, } -fn state_key(vm: &Vm) -> StateKey { +fn state_key(vm: &Vm, context: usize) -> StateKey { let mut stack_hasher = DefaultHasher::new(); let stack_start = vm.stack.data.len().saturating_sub(STACK_FINGERPRINT_ELEMS); for el in &vm.stack.data[stack_start..] { @@ -89,6 +90,7 @@ fn state_key(vm: &Vm) -> StateKey { } StateKey { + context, pc: vm.pc, stack_len: vm.stack.data.len(), memory_writes: vm.memory.data.len(), @@ -131,14 +133,16 @@ struct JumpClassify { #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] struct ProbeCacheKey { + context: usize, from_pc: usize, to_pc: usize, stack_top: [u8; 32], stack_len: usize, } -fn probe_cache_key(vm: &Vm, to_pc: usize) -> ProbeCacheKey { +fn probe_cache_key(vm: &Vm, to_pc: usize, context: usize) -> ProbeCacheKey { ProbeCacheKey { + context, from_pc: vm.pc, to_pc, stack_top: vm.stack.peek().map_or([0u8; 32], |v| v.data), @@ -182,9 +186,10 @@ fn probe_branch_cached( start_pc: usize, step_limit: u16, gas_limit: u32, + context: usize, cache: &mut HashMap, ) -> ProbeOutcome { - let key = probe_cache_key(vm, start_pc); + let key = probe_cache_key(vm, start_pc, context); if let Some(outcome) = cache.get(&key) { return *outcome; } @@ -241,6 +246,7 @@ fn probe_branch( fn classify_jump( vm: &Vm, + context: usize, other_pc: usize, can_fork: bool, probe_steps: u16, @@ -279,7 +285,7 @@ fn classify_jump( }; } - let other = probe_branch_cached(vm, other_pc, probe_steps, probe_gas, probe_cache); + let other = probe_branch_cached(vm, other_pc, probe_steps, probe_gas, context, probe_cache); if other == ProbeOutcome::DeadEnd { return JumpClassify { decision: JumpDecision::KeepCurrent, @@ -287,7 +293,7 @@ fn classify_jump( }; } - let current = probe_branch_cached(vm, vm.pc, probe_steps, probe_gas, probe_cache); + let current = probe_branch_cached(vm, vm.pc, probe_steps, probe_gas, context, probe_cache); if current == ProbeOutcome::DeadEnd { return JumpClassify { decision: JumpDecision::SwitchOther, @@ -322,6 +328,15 @@ fn collect_event( } } +struct BatchState<'a> { + idx: usize, + context: usize, + vm: Vm<'a, Label, CallDataImpl>, + gas_used: u32, + depth: u8, + steps: u32, +} + fn execute_paths<'a>( start_vm: Vm<'a, Label, CallDataImpl>, initial_gas: u32, @@ -331,20 +346,66 @@ fn execute_paths<'a>( max_depth: u8, max_steps: u32, ) -> bool { - if initial_gas > gas_limit { - return true; + let needs_more = execute_paths_batch( + vec![BatchState { + idx: 0, + context: 0, + vm: start_vm, + gas_used: initial_gas, + depth: 0, + steps: 0, + }], + 1, + events, + seen, + gas_limit, + max_depth, + max_steps, + ); + needs_more.into_iter().next().unwrap_or(false) +} + +fn execute_paths_batch<'a>( + initial_states: Vec>, + states_count: usize, + events: &mut Vec, + seen: &mut HashSet, + gas_limit: u32, + max_depth: u8, + max_steps: u32, +) -> Vec { + let mut needs_more = vec![false; states_count]; + if initial_states.is_empty() { + return needs_more; + } + + let mut queue = initial_states; + queue.retain(|s| { + if s.gas_used > gas_limit { + needs_more[s.idx] = true; + false + } else { + true + } + }); + + if queue.is_empty() { + return needs_more; } - let mut needs_more = false; let mut visited: HashSet = HashSet::default(); let mut probe_cache: HashMap = HashMap::default(); - let mut queue: Vec<(Vm, u32, u8, u32)> = Vec::new(); - queue.push((start_vm, initial_gas, 0, 0)); + while let Some(state) = queue.pop() { + let idx = state.idx; + let context = state.context; + let mut vm = state.vm; + let mut gas_used = state.gas_used; + let depth = state.depth; + let mut steps = state.steps; - while let Some((mut vm, mut gas_used, depth, mut steps)) = queue.pop() { while !vm.stopped { if gas_used >= gas_limit || steps >= max_steps { - needs_more = true; + needs_more[idx] = true; break; } @@ -365,10 +426,10 @@ fn execute_paths<'a>( op::LOG1..=op::LOG4 => collect_event(events, seen, ret.args[0].data), op::JUMPI => { if visited.len() >= MAX_VISITED_STATES { - needs_more = true; + needs_more[idx] = true; break; } - if !visited.insert(state_key(&vm)) { + if !visited.insert(state_key(&vm, context)) { break; } @@ -397,6 +458,7 @@ fn execute_paths<'a>( let can_fork = depth < max_depth && queue.len() < MAX_PENDING_STATES; let jump = classify_jump( &vm, + context, other_pc, can_fork, PROBE_STEP_LIMIT, @@ -404,7 +466,7 @@ fn execute_paths<'a>( &mut probe_cache, ); if jump.needs_more { - needs_more = true; + needs_more[idx] = true; } match jump.decision { @@ -414,17 +476,24 @@ fn execute_paths<'a>( if can_fork { let mut forked = vm.fork(); forked.pc = other_pc; - queue.push((forked, gas_used, depth + 1, steps)); + queue.push(BatchState { + idx, + context, + vm: forked, + gas_used, + depth: depth + 1, + steps, + }); } } } } op::JUMP => { if visited.len() >= MAX_VISITED_STATES { - needs_more = true; + needs_more[idx] = true; break; } - if !visited.insert(state_key(&vm)) { + if !visited.insert(state_key(&vm, context)) { break; } } @@ -435,52 +504,6 @@ fn execute_paths<'a>( needs_more } -fn execute_function( - code: &[u8], - calldata: &CallDataImpl, - events: &mut Vec, - seen: &mut HashSet, - gas_limit: u32, - max_depth: u8, - max_steps: u32, -) -> bool { - let mut vm = Vm::new(code, calldata); - let Some(initial_gas) = execute_until_function_start(&mut vm, gas_limit) else { - return true; - }; - - execute_paths( - vm, - initial_gas, - events, - seen, - gas_limit, - max_depth, - max_steps, - ) -} - -fn execute_function_from_offset( - code: &[u8], - calldata: &CallDataImpl, - offset: usize, - events: &mut Vec, - seen: &mut HashSet, - gas_limit: u32, - max_depth: u8, - max_steps: u32, -) -> bool { - if offset < code.len() && code[offset] == op::JUMPDEST { - let mut vm = Vm::new(code, calldata); - vm.pc = offset; - execute_paths(vm, 0, events, seen, gas_limit, max_depth, max_steps) - } else { - execute_function( - code, calldata, events, seen, gas_limit, max_depth, max_steps, - ) - } -} - fn execute_from_entry( code: &[u8], calldata: &CallDataImpl, @@ -542,25 +565,71 @@ pub fn contract_events(code: &[u8]) -> Vec { break; } let before = seen.len(); - let mut next_pending = Vec::with_capacity(pending.len()); - - for (selector, offset) in pending.into_iter() { - let calldata = CallDataImpl { selector }; - let needs_more = execute_function_from_offset( - code, - &calldata, - offset, - &mut events, - &mut seen, - gas_limit, - max_depth, - max_steps, - ); - if needs_more { - next_pending.push((selector, offset)); + + let calldatas: Vec = pending + .iter() + .map(|(selector, _)| CallDataImpl { + selector: *selector, + }) + .collect(); + + let mut initial_states = Vec::with_capacity(pending.len()); + let mut round_needs_more = vec![false; pending.len()]; + + for (idx, ((_, offset), calldata)) in pending.iter().zip(calldatas.iter()).enumerate() { + if *offset < code.len() && code[*offset] == op::JUMPDEST { + let mut vm = Vm::new(code, calldata); + vm.pc = *offset; + initial_states.push(BatchState { + idx, + context: *offset, + vm, + gas_used: 0, + depth: 0, + steps: 0, + }); + } else { + let mut vm = Vm::new(code, calldata); + if let Some(initial_gas) = execute_until_function_start(&mut vm, gas_limit) { + initial_states.push(BatchState { + idx, + context: *offset, + vm, + gas_used: initial_gas, + depth: 0, + steps: 0, + }); + } else { + round_needs_more[idx] = true; + } } } - pending = next_pending; + + let batch_needs_more = execute_paths_batch( + initial_states, + pending.len(), + &mut events, + &mut seen, + gas_limit, + max_depth, + max_steps, + ); + + for (dst, src) in round_needs_more.iter_mut().zip(batch_needs_more.iter()) { + *dst |= *src; + } + + pending = pending + .into_iter() + .enumerate() + .filter_map(|(idx, item)| { + if round_needs_more[idx] { + Some(item) + } else { + None + } + }) + .collect(); if seen.len() == before { stable_rounds += 1; From ca24557c3c2d51b457326a8a99ce79b997cc3671 Mon Sep 17 00:00:00 2001 From: qdzzyb2015 Date: Mon, 16 Feb 2026 10:15:50 +0800 Subject: [PATCH 03/16] feat(events): add profiling debug path and mutability-based selector pruning Summary: - add event execution profiling in Rust via EventExecutionProfile and contract_events_with_profile - keep contract_events_with_stats API and extend extraction stats (selector pruning and runtime counters) - add src/bin/events_debug.rs for single-contract deep profiling (hot JUMPI PCs, queue/branch metrics) - prune event exploration selectors by state mutability (skip view/pure selectors; keep payable/nonpayable) - wire Python interface stats updates and type stubs for event_selectors_with_stats Validation: - cargo test --lib events:: -- --nocapture (10 passed) - cargo test --lib --features python -- --nocapture (34 passed) - cargo run --release --bin events_debug -- --code-file benchmark/results/row96.hex --iters 3 --warmup 1 --profile --top-pc 8 -> code_len=11099, events=7, selectors pruned 35 -> 15, avg=475.282ms (min=435.249, max=510.231) - release benchmark (first 20 contracts): total=2.39s, avg=119.14ms/contract, recall=0.9254, precision=1.0000 --- evmole.pyi | 33 + src/bin/events_debug.rs | 356 +++++++++++ src/events/calldata.rs | 44 ++ src/events/mod.rs | 1336 ++++++++++++++++++++++++++++++++++++--- src/interface_py.rs | 111 +++- src/lib.rs | 5 + 6 files changed, 1778 insertions(+), 107 deletions(-) create mode 100644 src/bin/events_debug.rs create mode 100644 src/events/calldata.rs diff --git a/evmole.pyi b/evmole.pyi index 7f755a7..f3b7b6d 100644 --- a/evmole.pyi +++ b/evmole.pyi @@ -103,6 +103,8 @@ class Contract: Attributes: functions (Optional[List[Function]]): List of detected contract functions. None if no functions were extracted + events (Optional[List[str]]): List of event selectors found in the contract bytecode as hex strings. + None if events were not extracted storage (Optional[List[StorageRecord]]): List of contract storage records. None if storage layout was not extracted disassembled (Optional[List[Tuple[int, str]]]): List of bytecode instructions, where each element is [offset, instruction]. @@ -114,17 +116,38 @@ class Contract: """ functions: Optional[List[Function]] + events: Optional[List[str]] storage: Optional[List[StorageRecord]] disassembled: Optional[List[Tuple[int, str]]] basic_blocks: Optional[List[Tuple[int, int]]] control_flow_graph: Optional[ControlFlowGraph] +class EventExtractionStats: + """ + Cache hit/miss statistics collected during event selector extraction. + """ + + jump_classify_cache_hits: int + jump_classify_cache_misses: int + jump_classify_cache_hit_rate: float + entry_state_cache_hits: int + entry_state_cache_misses: int + entry_state_cache_hit_rate: float + jump_classify_can_fork_true: int + jump_classify_can_fork_false: int + probe_cache_hits: int + probe_cache_misses: int + probe_cache_hit_rate: float + static_dead_other_prunes: int + static_dead_current_prunes: int + def contract_info( code: Union[bytes, str], *, selectors: bool = False, arguments: bool = False, state_mutability: bool = False, + events: bool = False, storage: bool = False, disassemble: bool = False, basic_blocks: bool = False, @@ -140,6 +163,8 @@ def contract_info( arguments (bool, optional): When True, extracts function arguments. Defaults to False. state_mutability (bool, optional): When True, extracts function state mutability. Defaults to False. + events (bool, optional): When True, extracts event selectors found in the contract bytecode. + Defaults to False. storage (bool, optional): When True, extracts the contract's storage layout. Defaults to False. disassemble (bool, optional): When True, includes disassembled bytecode. @@ -154,3 +179,11 @@ def contract_info( weren't requested to be extracted will be None. """ ... + +def event_selectors_with_stats( + code: Union[bytes, str], +) -> Tuple[List[str], EventExtractionStats]: + """ + Extracts event selectors and returns cache hit/miss statistics for the run. + """ + ... diff --git a/src/bin/events_debug.rs b/src/bin/events_debug.rs new file mode 100644 index 0000000..b97f8e2 --- /dev/null +++ b/src/bin/events_debug.rs @@ -0,0 +1,356 @@ +use std::{collections::BTreeMap, fs, path::PathBuf, time::Instant}; + +use evmole::{ + EventExecutionProfile, EventSelector, contract_events_with_profile, contract_events_with_stats, +}; + +#[derive(Debug, Default)] +struct Args { + code_hex: Option, + code_file: Option, + raw_file: Option, + iters: usize, + warmup: usize, + profile: bool, + top_pc: usize, + show_events: bool, +} + +fn usage() -> &'static str { + "Usage: + cargo run --release --bin events_debug -- [OPTIONS] + +Options: + --code-hex Bytecode hex string (with or without 0x prefix) + --code-file Text file containing bytecode hex + --raw-file Raw bytecode file + --iters Timed iterations (default: 1) + --warmup Warmup iterations (default: 0) + --profile Enable detailed execution profile logs + --top-pc Top N PCs to print in profile mode (default: 12) + --show-events Print extracted event selectors + -h, --help Show this help + +Exactly one of --code-hex / --code-file / --raw-file must be provided." +} + +fn parse_args() -> Result { + let mut args = Args { + iters: 1, + warmup: 0, + profile: false, + top_pc: 12, + show_events: false, + ..Default::default() + }; + + let mut it = std::env::args().skip(1); + while let Some(arg) = it.next() { + match arg.as_str() { + "-h" | "--help" => { + println!("{}", usage()); + std::process::exit(0); + } + "--code-hex" => { + args.code_hex = Some(it.next().ok_or("--code-hex requires a value")?); + } + "--code-file" => { + args.code_file = Some(PathBuf::from( + it.next().ok_or("--code-file requires a value")?, + )); + } + "--raw-file" => { + args.raw_file = Some(PathBuf::from( + it.next().ok_or("--raw-file requires a value")?, + )); + } + "--iters" => { + let v = it.next().ok_or("--iters requires a value")?; + args.iters = v.parse().map_err(|_| format!("invalid --iters: {v}"))?; + } + "--warmup" => { + let v = it.next().ok_or("--warmup requires a value")?; + args.warmup = v.parse().map_err(|_| format!("invalid --warmup: {v}"))?; + } + "--profile" => args.profile = true, + "--top-pc" => { + let v = it.next().ok_or("--top-pc requires a value")?; + args.top_pc = v.parse().map_err(|_| format!("invalid --top-pc: {v}"))?; + } + "--show-events" => args.show_events = true, + _ => return Err(format!("unknown argument: {arg}")), + } + } + + let inputs = [ + args.code_hex.is_some(), + args.code_file.is_some(), + args.raw_file.is_some(), + ] + .into_iter() + .filter(|v| *v) + .count(); + if inputs != 1 { + return Err("provide exactly one of --code-hex / --code-file / --raw-file".to_string()); + } + if args.iters == 0 { + return Err("--iters must be >= 1".to_string()); + } + if args.top_pc == 0 { + return Err("--top-pc must be >= 1".to_string()); + } + Ok(args) +} + +fn decode_hex(input: &str) -> Result, String> { + let s = input.trim(); + let s = s.strip_prefix("0x").unwrap_or(s); + alloy_primitives::hex::decode(s).map_err(|e| format!("hex decode failed: {e}")) +} + +fn load_code(args: &Args) -> Result, String> { + if let Some(hex) = &args.code_hex { + return decode_hex(hex); + } + if let Some(path) = &args.code_file { + let text = fs::read_to_string(path) + .map_err(|e| format!("failed to read code file '{}': {e}", path.display()))?; + return decode_hex(&text); + } + if let Some(path) = &args.raw_file { + return fs::read(path) + .map_err(|e| format!("failed to read raw file '{}': {e}", path.display())); + } + Err("no input provided".to_string()) +} + +fn fmt_selector(s: &EventSelector) -> String { + alloy_primitives::hex::encode(s) +} + +fn print_top_pc(title: &str, map: &BTreeMap, top_n: usize) { + if map.is_empty() { + println!("{title}: (empty)"); + return; + } + let mut pairs: Vec<(usize, u64)> = map.iter().map(|(k, v)| (*k, *v)).collect(); + pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0))); + println!("{title}:"); + for (pc, cnt) in pairs.into_iter().take(top_n) { + println!(" pc=0x{pc:x} ({pc}) count={cnt}"); + } +} + +fn main() { + let args = match parse_args() { + Ok(v) => v, + Err(e) => { + eprintln!("error: {e}"); + eprintln!("{}", usage()); + std::process::exit(2); + } + }; + + let code = match load_code(&args) { + Ok(v) => v, + Err(e) => { + eprintln!("error: {e}"); + std::process::exit(2); + } + }; + if code.is_empty() { + eprintln!("error: empty bytecode"); + std::process::exit(2); + } + + for _ in 0..args.warmup { + if args.profile { + let _ = contract_events_with_profile(&code); + } else { + let _ = contract_events_with_stats(&code); + } + } + + let mut last_events = Vec::new(); + let mut last_stats = evmole::EventExtractionStats::default(); + let mut last_profile = EventExecutionProfile::default(); + let mut elapsed_ms = Vec::with_capacity(args.iters); + + for _ in 0..args.iters { + let t0 = Instant::now(); + let (events, stats, profile) = if args.profile { + contract_events_with_profile(&code) + } else { + let (events, stats) = contract_events_with_stats(&code); + (events, stats, EventExecutionProfile::default()) + }; + let dt = t0.elapsed().as_secs_f64() * 1000.0; + elapsed_ms.push(dt); + last_events = events; + last_stats = stats; + last_profile = profile; + } + + let total_ms: f64 = elapsed_ms.iter().sum(); + let avg_ms = total_ms / elapsed_ms.len() as f64; + let min_ms = elapsed_ms.iter().copied().fold(f64::INFINITY, f64::min); + let max_ms = elapsed_ms.iter().copied().fold(0.0, f64::max); + + let jump_total = last_stats + .jump_classify_cache_hits + .saturating_add(last_stats.jump_classify_cache_misses); + let jump_hit_rate = if jump_total == 0 { + 0.0 + } else { + last_stats.jump_classify_cache_hits as f64 / jump_total as f64 + }; + + let entry_total = last_stats + .entry_state_cache_hits + .saturating_add(last_stats.entry_state_cache_misses); + let entry_hit_rate = if entry_total == 0 { + 0.0 + } else { + last_stats.entry_state_cache_hits as f64 / entry_total as f64 + }; + + let probe_total = last_stats + .probe_cache_hits + .saturating_add(last_stats.probe_cache_misses); + let probe_hit_rate = if probe_total == 0 { + 0.0 + } else { + last_stats.probe_cache_hits as f64 / probe_total as f64 + }; + + let can_fork_total = last_stats + .jump_classify_can_fork_true + .saturating_add(last_stats.jump_classify_can_fork_false); + let can_fork_true_rate = if can_fork_total == 0 { + 0.0 + } else { + last_stats.jump_classify_can_fork_true as f64 / can_fork_total as f64 + }; + + println!("code_len: {}", code.len()); + println!( + "time_ms: avg={avg_ms:.3} min={min_ms:.3} max={max_ms:.3} (iters={})", + args.iters + ); + println!("events: {}", last_events.len()); + println!( + "selectors: total={} after_mutability_prune={} pruned_view_or_pure={}", + last_stats.selectors_total, + last_stats.selectors_after_mutability_prune, + last_stats.selectors_pruned_view_or_pure + ); + println!( + "jump_cache: hit={} miss={} rate={:.2}%", + last_stats.jump_classify_cache_hits, + last_stats.jump_classify_cache_misses, + jump_hit_rate * 100.0 + ); + println!( + "entry_cache: hit={} miss={} rate={:.2}%", + last_stats.entry_state_cache_hits, + last_stats.entry_state_cache_misses, + entry_hit_rate * 100.0 + ); + println!( + "probe_cache: hit={} miss={} rate={:.2}%", + last_stats.probe_cache_hits, + last_stats.probe_cache_misses, + probe_hit_rate * 100.0 + ); + println!( + "jump_can_fork_true: {}/{} ({:.2}%)", + last_stats.jump_classify_can_fork_true, + can_fork_total, + can_fork_true_rate * 100.0 + ); + let static_dead_total = last_stats + .static_dead_other_prunes + .saturating_add(last_stats.static_dead_current_prunes); + println!( + "static_dead_prunes: other={} current={} total={}", + last_stats.static_dead_other_prunes, + last_stats.static_dead_current_prunes, + static_dead_total + ); + + if args.profile { + println!( + "states: pushed={} popped={} queue_peak={} state_limit_breaks={}", + last_profile.states_pushed, + last_profile.states_popped, + last_profile.queue_peak, + last_profile.state_limit_breaks + ); + println!( + "jump ops: jump_total={} jump_visited_breaks={} jumpi_total={} jumpi_visited_breaks={} visited_cap_hits={}", + last_profile.jump_total, + last_profile.jump_visited_breaks, + last_profile.jumpi_total, + last_profile.jumpi_visited_breaks, + last_profile.visited_cap_hits + ); + println!( + "jumpi outcomes: keep={} switch={} fork={} invalid_other_pc={} unreachable_both={} unreachable_current={} unreachable_other={}", + last_profile.jumpi_decision_keep, + last_profile.jumpi_decision_switch, + last_profile.jumpi_decision_fork, + last_profile.jumpi_invalid_other_pc, + last_profile.jumpi_unreachable_both, + last_profile.jumpi_unreachable_current, + last_profile.jumpi_unreachable_other + ); + print_top_pc( + "top context starts", + &last_profile.context_start_by_pc, + args.top_pc, + ); + print_top_pc("top jumpi", &last_profile.jumpi_by_pc, args.top_pc); + print_top_pc( + "top jumpi can_fork=true", + &last_profile.jumpi_can_fork_true_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi can_fork=false", + &last_profile.jumpi_can_fork_false_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi cache miss", + &last_profile.jumpi_cache_miss_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi decision fork", + &last_profile.jumpi_decision_fork_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi decision switch", + &last_profile.jumpi_decision_switch_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi unreachable_both", + &last_profile.jumpi_unreachable_both_by_pc, + args.top_pc, + ); + print_top_pc( + "top jumpi invalid_other_pc", + &last_profile.jumpi_invalid_other_pc_by_pc, + args.top_pc, + ); + } + + if args.show_events { + last_events.sort_unstable(); + for evt in &last_events { + println!("event: {}", fmt_selector(evt)); + } + } +} diff --git a/src/events/calldata.rs b/src/events/calldata.rs new file mode 100644 index 0000000..849b0d9 --- /dev/null +++ b/src/events/calldata.rs @@ -0,0 +1,44 @@ +use super::Label; +use crate::evm::{U256, VAL_4, VAL_131072, calldata::CallData, element::Element}; +use std::error; + +pub(super) struct CallDataImpl { + pub selector: [u8; 4], +} + +impl CallData