Skip to content

Commit

Permalink
perf: Fix a performance regression caused by the refactoring work
Browse files Browse the repository at this point in the history
In the original refactoring work, the verifying context data is kept in
`Arc<SgData>` structure, which contains `Arc<TxData>`, which then contains
`Arc<ResolvedTransaction>`. Accessing a specific field on a CKB transaction
requires traversing through 3 `Arc` structures, which would be 3 levels of
indirection.

Benchmarks show that those nested memory indirections cost us ~3% of transaction
verifying performance, our best guess is that the nested indirections result in
enough cache misses to cause the differences.

This commit changes the code so instead of nested `Arc`s, we now use flattened
structs that then contain a series of `Arc`s. Now at most one `Arc` would get in
the way to access a specific field of the CKB transaction to verify.

Note that `consensus` and `tx_env` still hids in 2 nested levels of `Arc`, here we
are balancing the nested levels of `Arc`, and the number of `Arc`s contained in the
flattend `TxData` and `SgData` structure. Right now we work under the assumption that
if consensus parameters are required indeed, a nested level of indirection won't be
the bottleneck. However, this might or might not change in the future, when it becomes
a problem, we will revisit the tradeoff then.
  • Loading branch information
xxuejie committed Feb 13, 2025
1 parent 19c0b3c commit 8b6654a
Show file tree
Hide file tree
Showing 18 changed files with 319 additions and 290 deletions.
14 changes: 6 additions & 8 deletions script/src/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ where
DL: CellDataProvider,
{
/// Immutable context data for current running transaction & script.
pub sg_data: Arc<SgData<DL>>,
pub sg_data: SgData<DL>,

/// Mutable context data used by current scheduler
pub debug_context: DebugContext,
Expand Down Expand Up @@ -113,7 +113,7 @@ where
/// Create a new scheduler from empty state
pub fn new(sg_data: SgData<DL>, debug_context: DebugContext) -> Self {
Self {
sg_data: Arc::new(sg_data),
sg_data,
debug_context,
total_cycles: Arc::new(AtomicU64::new(0)),
iteration_cycles: 0,
Expand Down Expand Up @@ -153,7 +153,7 @@ where
full: FullSuspendedState,
) -> Self {
let mut scheduler = Self {
sg_data: Arc::new(sg_data),
sg_data,
debug_context,
total_cycles: Arc::new(AtomicU64::new(full.total_cycles)),
iteration_cycles: 0,
Expand Down Expand Up @@ -232,7 +232,7 @@ where
pub fn run(&mut self, mode: RunMode) -> Result<(i8, Cycle), Error> {
if self.states.is_empty() {
// Booting phase, we will need to initialize the first VM.
let program_id = self.sg_data.program_data_piece_id.clone();
let program_id = self.sg_data.sg_info.program_data_piece_id.clone();
assert_eq!(
self.boot_vm(
&DataLocation {
Expand Down Expand Up @@ -877,7 +877,7 @@ where
// The code here looks slightly weird, since I don't want to copy over all syscall
// impls here again. Ideally, this scheduler package should be merged with ckb-script,
// or simply replace ckb-script. That way, the quirks here will be eliminated.
let version = &self.sg_data.script_version;
let version = &self.sg_data.sg_info.script_version;
let core_machine = CoreMachineType::new(
version.vm_isa(),
version.vm_version(),
Expand All @@ -887,9 +887,7 @@ where
let vm_context = VmContext {
base_cycles: Arc::clone(&self.total_cycles),
message_box: Arc::clone(&self.message_box),
snapshot2_context: Arc::new(Mutex::new(Snapshot2Context::new(Arc::clone(
&self.sg_data,
)))),
snapshot2_context: Arc::new(Mutex::new(Snapshot2Context::new(self.sg_data.clone()))),
};

let machine_builder = DefaultMachineBuilder::new(core_machine)
Expand Down
18 changes: 10 additions & 8 deletions script/src/syscalls/debugger.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
use crate::types::{DebugContext, DebugPrinter, SgData};
use crate::types::{
DebugContext, DebugPrinter, {SgData, SgInfo},
};
use crate::{cost_model::transferred_byte_cycles, syscalls::DEBUG_PRINT_SYSCALL_NUMBER};
use ckb_vm::{
registers::{A0, A7},
Error as VMError, Memory, Register, SupportMachine, Syscalls,
};
use std::sync::Arc;

pub struct Debugger<DL> {
sg_data: Arc<SgData<DL>>,
pub struct Debugger {
sg_info: Arc<SgInfo>,
printer: DebugPrinter,
}

impl<DL> Debugger<DL> {
pub fn new(sg_data: &Arc<SgData<DL>>, debug_context: &DebugContext) -> Debugger<DL> {
impl Debugger {
pub fn new<DL>(sg_data: &SgData<DL>, debug_context: &DebugContext) -> Debugger {
Debugger {
sg_data: Arc::clone(sg_data),
sg_info: Arc::clone(&sg_data.sg_info),
printer: Arc::clone(&debug_context.debug_printer),
}
}
}

impl<Mac: SupportMachine, DL: Send + Sync> Syscalls<Mac> for Debugger<DL> {
impl<Mac: SupportMachine> Syscalls<Mac> for Debugger {
fn initialize(&mut self, _machine: &mut Mac) -> Result<(), VMError> {
Ok(())
}
Expand Down Expand Up @@ -49,7 +51,7 @@ impl<Mac: SupportMachine, DL: Send + Sync> Syscalls<Mac> for Debugger<DL> {
machine.add_cycles_no_checking(transferred_byte_cycles(buffer.len() as u64))?;
let s = String::from_utf8(buffer)
.map_err(|e| VMError::External(format!("String from buffer {e:?}")))?;
(self.printer)(self.sg_data.current_script_hash(), s.as_str());
(self.printer)(&self.sg_info.script_hash, s.as_str());

Ok(true)
}
Expand Down
17 changes: 8 additions & 9 deletions script/src/syscalls/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,32 @@ use ckb_vm::{
Error as VMError, Register, SupportMachine, Syscalls,
};
use ckb_vm::{DEFAULT_STACK_SIZE, RISCV_MAX_MEMORY};
use std::sync::Arc;

#[derive(Debug)]
pub struct Exec<DL> {
sg_data: Arc<SgData<DL>>,
sg_data: SgData<DL>,
}

impl<DL: CellDataProvider> Exec<DL> {
pub fn new(sg_data: &Arc<SgData<DL>>) -> Exec<DL> {
impl<DL: CellDataProvider + Clone> Exec<DL> {
pub fn new(sg_data: &SgData<DL>) -> Exec<DL> {
Exec {
sg_data: Arc::clone(sg_data),
sg_data: sg_data.clone(),
}
}

#[inline]
fn resolved_inputs(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_inputs
&self.sg_data.rtx.resolved_inputs
}

#[inline]
fn resolved_cell_deps(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_cell_deps
&self.sg_data.rtx.resolved_cell_deps
}

#[inline]
fn witnesses(&self) -> BytesVec {
self.sg_data.rtx().transaction.witnesses()
self.sg_data.rtx.transaction.witnesses()
}

fn fetch_cell(&self, source: Source, index: usize) -> Result<&CellMeta, u8> {
Expand Down Expand Up @@ -92,7 +91,7 @@ impl<DL: CellDataProvider> Exec<DL> {
}
}

impl<Mac: SupportMachine, DL: CellDataProvider + Send + Sync> Syscalls<Mac> for Exec<DL> {
impl<Mac: SupportMachine, DL: CellDataProvider + Send + Sync + Clone> Syscalls<Mac> for Exec<DL> {
fn initialize(&mut self, _machine: &mut Mac) -> Result<(), VMError> {
Ok(())
}
Expand Down
11 changes: 5 additions & 6 deletions script/src/syscalls/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ use crate::{
};
use ckb_traits::{CellDataProvider, ExtensionProvider, HeaderProvider};
use ckb_vm::Syscalls;
use std::sync::Arc;

/// Generate RISC-V syscalls in CKB environment
pub fn generate_ckb_syscalls<DL>(
vm_id: &VmId,
sg_data: &Arc<SgData<DL>>,
sg_data: &SgData<DL>,
vm_context: &VmContext<DL>,
debug_context: &DebugContext,
) -> Vec<Box<(dyn Syscalls<CoreMachine>)>>
Expand All @@ -31,7 +30,7 @@ where
Box::new(LoadCellData::new(vm_context)),
Box::new(Debugger::new(sg_data, debug_context)),
];
let script_version = &sg_data.script_version;
let script_version = &sg_data.sg_info.script_version;
if script_version >= &ScriptVersion::V1 {
syscalls.append(&mut vec![
Box::new(VMVersion::new()),
Expand All @@ -56,8 +55,8 @@ where
]);
}
#[cfg(test)]
syscalls.push(Box::new(crate::syscalls::Pause::new(Arc::clone(
&debug_context.skip_pause,
))));
syscalls.push(Box::new(crate::syscalls::Pause::new(
std::sync::Arc::clone(&debug_context.skip_pause),
)));
syscalls
}
17 changes: 8 additions & 9 deletions script/src/syscalls/load_block_extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,32 @@ use ckb_vm::{
registers::{A0, A3, A4, A7},
Error as VMError, Register, SupportMachine, Syscalls,
};
use std::sync::Arc;

#[derive(Debug)]
pub struct LoadBlockExtension<DL> {
sg_data: Arc<SgData<DL>>,
sg_data: SgData<DL>,
}

impl<DL: ExtensionProvider> LoadBlockExtension<DL> {
pub fn new(sg_data: &Arc<SgData<DL>>) -> LoadBlockExtension<DL> {
impl<DL: ExtensionProvider + Clone> LoadBlockExtension<DL> {
pub fn new(sg_data: &SgData<DL>) -> LoadBlockExtension<DL> {
LoadBlockExtension {
sg_data: Arc::clone(sg_data),
sg_data: sg_data.clone(),
}
}

#[inline]
fn header_deps(&self) -> Byte32Vec {
self.sg_data.rtx().transaction.header_deps()
self.sg_data.rtx.transaction.header_deps()
}

#[inline]
fn resolved_inputs(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_inputs
&self.sg_data.rtx.resolved_inputs
}

#[inline]
fn resolved_cell_deps(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_cell_deps
&self.sg_data.rtx.resolved_cell_deps
}

fn load_block_extension(&self, cell_meta: &CellMeta) -> Option<packed::Bytes> {
Expand Down Expand Up @@ -102,7 +101,7 @@ impl<DL: ExtensionProvider> LoadBlockExtension<DL> {
}
}

impl<DL: ExtensionProvider + Send + Sync, Mac: SupportMachine> Syscalls<Mac>
impl<DL: ExtensionProvider + Send + Sync + Clone, Mac: SupportMachine> Syscalls<Mac>
for LoadBlockExtension<DL>
{
fn initialize(&mut self, _machine: &mut Mac) -> Result<(), VMError> {
Expand Down
29 changes: 15 additions & 14 deletions script/src/syscalls/load_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{
utils::store_data, CellField, Source, SourceEntry, INDEX_OUT_OF_BOUND, ITEM_MISSING,
LOAD_CELL_BY_FIELD_SYSCALL_NUMBER, LOAD_CELL_SYSCALL_NUMBER, SUCCESS,
},
types::{SgData, TxData},
types::{SgData, TxInfo},
};
use byteorder::{LittleEndian, WriteBytesExt};
use ckb_traits::CellDataProvider;
Expand All @@ -17,32 +17,31 @@ use ckb_vm::{
registers::{A0, A3, A4, A5, A7},
Error as VMError, Register, SupportMachine, Syscalls,
};
use std::sync::Arc;

pub struct LoadCell<DL> {
sg_data: Arc<SgData<DL>>,
sg_data: SgData<DL>,
}

impl<DL: CellDataProvider> LoadCell<DL> {
pub fn new(sg_data: &Arc<SgData<DL>>) -> LoadCell<DL> {
impl<DL: CellDataProvider + Clone> LoadCell<DL> {
pub fn new(sg_data: &SgData<DL>) -> LoadCell<DL> {
LoadCell {
sg_data: Arc::clone(sg_data),
sg_data: sg_data.clone(),
}
}

#[inline]
fn tx_data(&self) -> &TxData<DL> {
&self.sg_data.tx_data
fn tx_info(&self) -> &TxInfo<DL> {
&self.sg_data.tx_info
}

#[inline]
fn resolved_inputs(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_inputs
&self.sg_data.rtx.resolved_inputs
}

#[inline]
fn resolved_cell_deps(&self) -> &Vec<CellMeta> {
&self.sg_data.rtx().resolved_cell_deps
&self.sg_data.rtx.resolved_cell_deps
}

fn fetch_cell(&self, source: Source, index: usize) -> Result<&CellMeta, u8> {
Expand All @@ -51,7 +50,7 @@ impl<DL: CellDataProvider> LoadCell<DL> {
self.resolved_inputs().get(index).ok_or(INDEX_OUT_OF_BOUND)
}
Source::Transaction(SourceEntry::Output) => {
self.tx_data().outputs.get(index).ok_or(INDEX_OUT_OF_BOUND)
self.tx_info().outputs.get(index).ok_or(INDEX_OUT_OF_BOUND)
}
Source::Transaction(SourceEntry::CellDep) => self
.resolved_cell_deps()
Expand All @@ -74,7 +73,7 @@ impl<DL: CellDataProvider> LoadCell<DL> {
.get(index)
.ok_or(INDEX_OUT_OF_BOUND)
.and_then(|actual_index| {
self.tx_data()
self.tx_info()
.outputs
.get(*actual_index)
.ok_or(INDEX_OUT_OF_BOUND)
Expand Down Expand Up @@ -110,7 +109,7 @@ impl<DL: CellDataProvider> LoadCell<DL> {
(SUCCESS, store_data(machine, &buffer)?)
}
CellField::DataHash => {
if let Some(bytes) = self.tx_data().data_loader.load_cell_data_hash(cell) {
if let Some(bytes) = self.tx_info().data_loader.load_cell_data_hash(cell) {
(SUCCESS, store_data(machine, &bytes.as_bytes())?)
} else {
(ITEM_MISSING, 0)
Expand Down Expand Up @@ -160,7 +159,9 @@ impl<DL: CellDataProvider> LoadCell<DL> {
}
}

impl<Mac: SupportMachine, DL: CellDataProvider + Send + Sync> Syscalls<Mac> for LoadCell<DL> {
impl<Mac: SupportMachine, DL: CellDataProvider + Send + Sync + Clone> Syscalls<Mac>
for LoadCell<DL>
{
fn initialize(&mut self, _machine: &mut Mac) -> Result<(), VMError> {
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion script/src/syscalls/load_cell_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct LoadCellData<DL>
where
DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
{
snapshot2_context: Arc<Mutex<Snapshot2Context<DataPieceId, Arc<SgData<DL>>>>>,
snapshot2_context: Arc<Mutex<Snapshot2Context<DataPieceId, SgData<DL>>>>,
}

impl<DL> LoadCellData<DL>
Expand Down
Loading

0 comments on commit 8b6654a

Please sign in to comment.