diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index 8b25ecd..d0f6fa8 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -15,10 +15,26 @@ bytes = "1.11.0" reth.workspace = true reth-db = { workspace = true, features = ["test-utils"] } reth-db-api.workspace = true -reth-libmdbx.workspace = true +signet-db.workspace = true thiserror.workspace = true +tokio.workspace = true +tokio-util = { version = "0.7", features = ["rt"] } +tracing.workspace = true trevm.workspace = true +reth-libmdbx = { workspace = true, optional = true } +page_size = { version = "0.6.0", optional = true } +dashmap = "6.1.0" +tempfile = { workspace = true, optional = true } + + [dev-dependencies] serial_test = "3.3.1" tempfile.workspace = true + +[features] +default = ["mdbx", "in-mem"] +mdbx = ["dep:reth-libmdbx", "impls", "dep:page_size"] +in-mem = ["impls"] +test-utils = ["in-mem", "dep:tempfile"] +impls = [] diff --git a/crates/storage/README.md b/crates/storage/README.md index a7d6488..72e63ce 100644 --- a/crates/storage/README.md +++ b/crates/storage/README.md @@ -1,17 +1,31 @@ # Signet Storage -High-level API for Signet's storage layer +High-level APIs for Signet's storage layer. -This library contains the following: +## Design Overview -- Traits for serializing and deserializing Signet data structures as DB keys/ - value. -- Traits for hot and cold storage operations. -- Relevant KV table definitions. +We divide the storage system into two main components: -## Significant Traits +1. Hot storage, used in the critical consensus path. +2. Cold storage, used for historical data, RPC queries, and archival. -- `HotKv` - Encapsulates logic for reading and writing to hot storage. -- `ColdKv` - Encapsulates logic for reading and writing to cold storage. -- `KeySer` - Provides methods for serializing a type as a DB key. -- `ValueSer` - Provides methods for serializing a type as a DB value. +Hot and cold storage have different designs because they serve different +purposes: + +- **Mutability**: Hot state changes constantly during block execution; cold + data is finalized history that only grows (or truncates during reorgs). +- **Access patterns**: State execution requires fast point lookups; historical + queries are block-centric and sequential. +- **Consistency**: Hot storage needs ACID transactions to maintain consistent + state mid-block; cold storage can use eventual consistency via async ops. + +This separation allows us to optimize each layer for its specific access +patterns and performance requirements. Hot storage needs to be fast and mutable, +while cold storage can be optimized for bulk writes, and asynchronous access. + +See the module documentation for `hot` and `cold` for more details on each +design. + +```ignore,bash +cargo doc --no-deps --open -p signet-storage +``` diff --git a/crates/storage/src/cold/conformance.rs b/crates/storage/src/cold/conformance.rs new file mode 100644 index 0000000..862fa61 --- /dev/null +++ b/crates/storage/src/cold/conformance.rs @@ -0,0 +1,173 @@ +//! Conformance tests for ColdStorage backends. +//! +//! These tests verify that any backend implementation behaves correctly +//! according to the ColdStorage trait contract. To use these tests with +//! a custom backend, call the test functions with your backend instance. + +use crate::cold::{BlockData, BlockTag, ColdResult, ColdStorage, HeaderSpecifier}; +use alloy::primitives::{B256, BlockNumber}; +use reth::primitives::Header; + +/// Run all conformance tests against a backend. +/// +/// This is the main entry point for testing a custom backend implementation. +pub async fn conformance(backend: &B) -> ColdResult<()> { + test_empty_storage(backend).await?; + test_append_and_read_header(backend).await?; + test_header_hash_lookup(backend).await?; + test_header_tag_lookup(backend).await?; + test_transaction_lookups(backend).await?; + test_receipt_lookups(backend).await?; + test_truncation(backend).await?; + test_batch_append(backend).await?; + test_latest_block_tracking(backend).await?; + Ok(()) +} + +/// Create test block data for conformance tests. +/// +/// Creates a minimal valid block with the given block number. +pub fn make_test_block(block_number: BlockNumber) -> BlockData { + let header = Header { number: block_number, ..Default::default() }; + + BlockData::new(header, vec![], vec![], vec![], None) +} + +/// Test that empty storage returns None/empty for all lookups. +pub async fn test_empty_storage(backend: &B) -> ColdResult<()> { + assert!(backend.get_header(HeaderSpecifier::Number(0)).await?.is_none()); + assert!(backend.get_header(HeaderSpecifier::Hash(B256::ZERO)).await?.is_none()); + assert!(backend.get_header(HeaderSpecifier::Tag(BlockTag::Latest)).await?.is_none()); + assert!(backend.get_latest_block().await?.is_none()); + assert!(backend.get_transactions_in_block(0).await?.is_empty()); + assert!(backend.get_receipts_in_block(0).await?.is_empty()); + assert_eq!(backend.get_transaction_count(0).await?, 0); + Ok(()) +} + +/// Test basic append and read for headers. +pub async fn test_append_and_read_header(backend: &B) -> ColdResult<()> { + let block_data = make_test_block(100); + let expected_header = block_data.header.clone(); + + backend.append_block(block_data).await?; + + let retrieved = backend.get_header(HeaderSpecifier::Number(100)).await?; + assert!(retrieved.is_some()); + assert_eq!(retrieved.unwrap(), expected_header); + + Ok(()) +} + +/// Test header lookup by hash. +pub async fn test_header_hash_lookup(backend: &B) -> ColdResult<()> { + let block_data = make_test_block(101); + let header_hash = block_data.header.hash_slow(); + + backend.append_block(block_data).await?; + + let retrieved = backend.get_header(HeaderSpecifier::Hash(header_hash)).await?; + assert!(retrieved.is_some()); + + // Non-existent hash should return None + let missing = backend.get_header(HeaderSpecifier::Hash(B256::ZERO)).await?; + assert!(missing.is_none()); + + Ok(()) +} + +/// Test header lookup by tag. +pub async fn test_header_tag_lookup(backend: &B) -> ColdResult<()> { + backend.append_block(make_test_block(50)).await?; + backend.append_block(make_test_block(51)).await?; + backend.append_block(make_test_block(52)).await?; + + // Latest should return block 52 + let latest = backend.get_header(HeaderSpecifier::Tag(BlockTag::Latest)).await?; + assert!(latest.is_some()); + + // Earliest should return block 50 + let earliest = backend.get_header(HeaderSpecifier::Tag(BlockTag::Earliest)).await?; + assert!(earliest.is_some()); + + Ok(()) +} + +/// Test transaction lookups by hash and by block+index. +pub async fn test_transaction_lookups(backend: &B) -> ColdResult<()> { + // Create block with empty transactions for now + let block_data = make_test_block(200); + + backend.append_block(block_data).await?; + + let txs = backend.get_transactions_in_block(200).await?; + let count = backend.get_transaction_count(200).await?; + assert_eq!(txs.len() as u64, count); + + Ok(()) +} + +/// Test receipt lookups. +pub async fn test_receipt_lookups(backend: &B) -> ColdResult<()> { + let block_data = make_test_block(201); + + backend.append_block(block_data).await?; + + let receipts = backend.get_receipts_in_block(201).await?; + // Empty receipts for now + assert!(receipts.is_empty()); + + Ok(()) +} + +/// Test truncation removes data correctly. +pub async fn test_truncation(backend: &B) -> ColdResult<()> { + // Append blocks 300, 301, 302 + backend.append_block(make_test_block(300)).await?; + backend.append_block(make_test_block(301)).await?; + backend.append_block(make_test_block(302)).await?; + + // Truncate above 300 (removes 301, 302) + backend.truncate_above(300).await?; + + // Block 300 should still exist + assert!(backend.get_header(HeaderSpecifier::Number(300)).await?.is_some()); + + // Blocks 301, 302 should be gone + assert!(backend.get_header(HeaderSpecifier::Number(301)).await?.is_none()); + assert!(backend.get_header(HeaderSpecifier::Number(302)).await?.is_none()); + + // Latest should now be 300 + assert_eq!(backend.get_latest_block().await?, Some(300)); + + Ok(()) +} + +/// Test batch append. +pub async fn test_batch_append(backend: &B) -> ColdResult<()> { + let blocks = vec![make_test_block(400), make_test_block(401), make_test_block(402)]; + + backend.append_blocks(blocks).await?; + + assert!(backend.get_header(HeaderSpecifier::Number(400)).await?.is_some()); + assert!(backend.get_header(HeaderSpecifier::Number(401)).await?.is_some()); + assert!(backend.get_header(HeaderSpecifier::Number(402)).await?.is_some()); + + Ok(()) +} + +/// Test latest block tracking. +pub async fn test_latest_block_tracking(backend: &B) -> ColdResult<()> { + // Append out of order + backend.append_block(make_test_block(502)).await?; + assert_eq!(backend.get_latest_block().await?, Some(502)); + + backend.append_block(make_test_block(500)).await?; + // Latest should still be 502 + assert_eq!(backend.get_latest_block().await?, Some(502)); + + backend.append_block(make_test_block(505)).await?; + assert_eq!(backend.get_latest_block().await?, Some(505)); + + Ok(()) +} diff --git a/crates/storage/src/cold/error.rs b/crates/storage/src/cold/error.rs new file mode 100644 index 0000000..7421fb5 --- /dev/null +++ b/crates/storage/src/cold/error.rs @@ -0,0 +1,30 @@ +//! Error types for cold storage operations. + +/// Result type alias for cold storage operations. +pub type ColdResult = Result; + +/// Error type for cold storage operations. +#[derive(Debug, thiserror::Error)] +pub enum ColdStorageError { + /// An error occurred in the storage backend. + #[error("Backend error: {0}")] + Backend(#[from] Box), + + /// The requested resource was not found. + #[error("Not found: {0}")] + NotFound(String), + + /// The storage task was cancelled. + #[error("Task cancelled")] + Cancelled, +} + +impl ColdStorageError { + /// Create a new backend error from any error type. + pub fn backend(error: E) -> Self + where + E: core::error::Error + Send + Sync + 'static, + { + Self::Backend(Box::new(error)) + } +} diff --git a/crates/storage/src/cold/impls/mem.rs b/crates/storage/src/cold/impls/mem.rs new file mode 100644 index 0000000..50c33df --- /dev/null +++ b/crates/storage/src/cold/impls/mem.rs @@ -0,0 +1,310 @@ +//! In-memory cold storage backend for testing. +//! +//! This backend stores all data in memory using standard Rust collections. +//! It is primarily intended for testing and development. + +use crate::cold::{ + BlockData, BlockTag, ColdResult, ColdStorage, HeaderSpecifier, ReceiptSpecifier, + SignetEventsSpecifier, TransactionSpecifier, ZenithHeaderSpecifier, +}; +use alloy::primitives::{B256, BlockNumber}; +use reth::primitives::{Header, Receipt, TransactionSigned}; +use signet_db::{DbSignetEvent, DbZenithHeader}; +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; +use tokio::sync::RwLock; + +/// Inner storage state. +#[derive(Default)] +struct MemColdBackendInner { + /// Headers indexed by block number. + headers: BTreeMap, + /// Header hash to block number index. + header_hashes: HashMap, + + /// Transactions indexed by block number. + transactions: BTreeMap>, + /// Transaction hash to (block number, tx index) index. + tx_hashes: HashMap, + + /// Receipts indexed by block number. + receipts: BTreeMap>, + /// Transaction hash to (block number, receipt index) index for receipts. + receipt_tx_hashes: HashMap, + + /// Signet events indexed by block number. + signet_events: BTreeMap>, + + /// Zenith headers indexed by block number. + zenith_headers: BTreeMap, + + /// The latest (highest) block number in storage. + latest_block: Option, +} + +/// In-memory cold storage backend. +/// +/// This backend is thread-safe and suitable for concurrent access. +/// All operations are protected by an async read-write lock. +#[derive(Default)] +pub struct MemColdBackend { + inner: Arc>, +} + +impl MemColdBackend { + /// Create a new empty in-memory backend. + pub fn new() -> Self { + Self::default() + } +} + +impl std::fmt::Debug for MemColdBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MemColdBackend").finish_non_exhaustive() + } +} + +impl ColdStorage for MemColdBackend { + async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + HeaderSpecifier::Number(n) => Ok(inner.headers.get(&n).cloned()), + HeaderSpecifier::Hash(h) => { + let block = inner.header_hashes.get(&h).copied(); + Ok(block.and_then(|n| inner.headers.get(&n).cloned())) + } + HeaderSpecifier::Tag(tag) => match tag { + BlockTag::Latest | BlockTag::Finalized | BlockTag::Safe => { + Ok(inner.latest_block.and_then(|n| inner.headers.get(&n).cloned())) + } + BlockTag::Earliest => Ok(inner.headers.first_key_value().map(|(_, h)| h.clone())), + }, + } + } + + async fn get_headers(&self, specs: Vec) -> ColdResult>> { + let mut results = Vec::with_capacity(specs.len()); + for spec in specs { + results.push(self.get_header(spec).await?); + } + Ok(results) + } + + async fn get_transaction( + &self, + spec: TransactionSpecifier, + ) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + TransactionSpecifier::Hash(h) => { + let loc = inner.tx_hashes.get(&h).copied(); + Ok(loc.and_then(|(block, idx)| { + inner.transactions.get(&block).and_then(|txs| txs.get(idx as usize).cloned()) + })) + } + TransactionSpecifier::BlockAndIndex { block, index } => { + Ok(inner.transactions.get(&block).and_then(|txs| txs.get(index as usize).cloned())) + } + TransactionSpecifier::BlockHashAndIndex { block_hash, index } => { + let block = inner.header_hashes.get(&block_hash).copied(); + Ok(block.and_then(|n| { + inner.transactions.get(&n).and_then(|txs| txs.get(index as usize).cloned()) + })) + } + } + } + + async fn get_transactions_in_block( + &self, + block: BlockNumber, + ) -> ColdResult> { + let inner = self.inner.read().await; + Ok(inner.transactions.get(&block).cloned().unwrap_or_default()) + } + + async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { + let inner = self.inner.read().await; + Ok(inner.transactions.get(&block).map(|txs| txs.len() as u64).unwrap_or(0)) + } + + async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + ReceiptSpecifier::TxHash(h) => { + let loc = inner.receipt_tx_hashes.get(&h).copied(); + Ok(loc.and_then(|(block, idx)| { + inner.receipts.get(&block).and_then(|rs| rs.get(idx as usize).cloned()) + })) + } + ReceiptSpecifier::BlockAndIndex { block, index } => { + Ok(inner.receipts.get(&block).and_then(|rs| rs.get(index as usize).cloned())) + } + } + } + + async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { + let inner = self.inner.read().await; + Ok(inner.receipts.get(&block).cloned().unwrap_or_default()) + } + + async fn get_signet_events( + &self, + spec: SignetEventsSpecifier, + ) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + SignetEventsSpecifier::Block(block) => { + Ok(inner.signet_events.get(&block).cloned().unwrap_or_default()) + } + SignetEventsSpecifier::BlockRange { start, end } => { + let mut results = Vec::new(); + for (_, events) in inner.signet_events.range(start..=end) { + results.extend(events.iter().cloned()); + } + Ok(results) + } + } + } + + async fn get_zenith_header( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + ZenithHeaderSpecifier::Number(n) => Ok(inner.zenith_headers.get(&n).cloned()), + ZenithHeaderSpecifier::Range { start, .. } => { + // For single lookup via range, return first in range + Ok(inner.zenith_headers.get(&start).cloned()) + } + } + } + + async fn get_zenith_headers( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + let inner = self.inner.read().await; + match spec { + ZenithHeaderSpecifier::Number(n) => { + Ok(inner.zenith_headers.get(&n).cloned().into_iter().collect()) + } + ZenithHeaderSpecifier::Range { start, end } => { + Ok(inner.zenith_headers.range(start..=end).map(|(_, h)| *h).collect()) + } + } + } + + async fn get_latest_block(&self) -> ColdResult> { + let inner = self.inner.read().await; + Ok(inner.latest_block) + } + + async fn append_block(&self, data: BlockData) -> ColdResult<()> { + let mut inner = self.inner.write().await; + + let block = data.block_number(); + + // Store header and index by hash + let header_hash = data.header.hash_slow(); + inner.headers.insert(block, data.header); + inner.header_hashes.insert(header_hash, block); + + // Build tx hash list for indexing before moving transactions + let tx_hashes: Vec<_> = data.transactions.iter().map(|tx| *tx.hash()).collect(); + + // Store transactions and index by hash + for (idx, tx_hash) in tx_hashes.iter().enumerate() { + inner.tx_hashes.insert(*tx_hash, (block, idx as u64)); + } + + inner.transactions.insert(block, data.transactions); + + // Store receipts and index by tx hash + for (idx, tx_hash) in tx_hashes.iter().enumerate() { + inner.receipt_tx_hashes.insert(*tx_hash, (block, idx as u64)); + } + inner.receipts.insert(block, data.receipts); + + // Store signet events + inner.signet_events.insert(block, data.signet_events); + + // Store zenith header if present + if let Some(zh) = data.zenith_header { + inner.zenith_headers.insert(block, zh); + } + + // Update latest block + inner.latest_block = Some(inner.latest_block.map_or(block, |prev| prev.max(block))); + + Ok(()) + } + + async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + for block_data in data { + self.append_block(block_data).await?; + } + Ok(()) + } + + async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { + let mut inner = self.inner.write().await; + + // Collect keys to remove + let to_remove: Vec<_> = inner.headers.range((block + 1)..).map(|(k, _)| *k).collect(); + + // Remove headers above block + for k in &to_remove { + if let Some(header) = inner.headers.remove(k) { + inner.header_hashes.remove(&header.hash_slow()); + } + } + + // Remove transactions above block + for k in &to_remove { + if let Some(txs) = inner.transactions.remove(k) { + for tx in txs { + inner.tx_hashes.remove(tx.hash()); + } + } + } + + // Remove receipts above block + for k in &to_remove { + if inner.receipts.remove(k).is_some() { + // Also remove from receipt_tx_hashes + inner.receipt_tx_hashes.retain(|_, (b, _)| *b <= block); + } + } + + // Remove signet events above block + for k in &to_remove { + inner.signet_events.remove(k); + } + + // Remove zenith headers above block + for k in &to_remove { + inner.zenith_headers.remove(k); + } + + // Update latest block + inner.latest_block = inner.headers.last_key_value().map(|(k, _)| *k); + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + use crate::cold::conformance::conformance; + + #[tokio::test] + async fn mem_backend_conformance() { + let backend = MemColdBackend::new(); + conformance(&backend).await.unwrap(); + } +} diff --git a/crates/storage/src/cold/impls/mod.rs b/crates/storage/src/cold/impls/mod.rs new file mode 100644 index 0000000..54b671b --- /dev/null +++ b/crates/storage/src/cold/impls/mod.rs @@ -0,0 +1,7 @@ +//! Cold storage backend implementations. +//! +//! This module contains implementations of the [`ColdStorage`] trait +//! for various backends. + +#[cfg(any(test, feature = "in-mem"))] +pub mod mem; diff --git a/crates/storage/src/cold/mod.rs b/crates/storage/src/cold/mod.rs index 9b9d9be..1a908e0 100644 --- a/crates/storage/src/cold/mod.rs +++ b/crates/storage/src/cold/mod.rs @@ -1 +1,60 @@ -//! Placeholder module for cold storage implementation. +//! Async cold storage engine for historical Ethereum data. +//! +//! This module provides an abstraction over various backend storage systems +//! for historical blockchain data. Unlike hot storage which uses transaction +//! semantics for mutable state, cold storage is optimized for: +//! +//! - **Append-only writes** with block-ordered data +//! - **Efficient bulk reads** by block number or index +//! - **Truncation** (reorg handling) that removes data beyond a certain block +//! - **Index maintenance** for hash-based lookups +//! +//! # Architecture +//! +//! The cold storage engine uses a task-based architecture: +//! +//! - [`ColdStorage`] trait defines the backend interface +//! - [`ColdStorageTask`] processes requests from a channel +//! - [`ColdStorageHandle`] provides an ergonomic API for sending requests +//! +//! # Example +//! +//! ```ignore +//! use tokio_util::sync::CancellationToken; +//! use signet_storage::cold::{ColdStorageTask, impls::MemColdBackend}; +//! +//! let cancel = CancellationToken::new(); +//! let handle = ColdStorageTask::spawn(MemColdBackend::new(), cancel); +//! +//! // Use the handle to interact with cold storage +//! let header = handle.get_header_by_number(100).await?; +//! ``` + +mod error; +pub use error::{ColdResult, ColdStorageError}; + +mod request; +pub use request::{ + AppendBlockRequest, ColdReadRequest, ColdStorageRequest, ColdWriteRequest, Responder, +}; + +mod specifier; +pub use specifier::{ + BlockTag, HeaderSpecifier, ReceiptSpecifier, SignetEventsSpecifier, TransactionSpecifier, + ZenithHeaderSpecifier, +}; + +mod traits; +pub use traits::{BlockData, ColdStorage}; + +/// Task module containing the storage task runner and handle. +pub mod task; +pub use task::{ColdStorageHandle, ColdStorageTask}; + +/// Backend implementations. +#[cfg(feature = "impls")] +pub mod impls; + +/// Conformance tests for cold storage backends. +#[cfg(any(test, feature = "test-utils"))] +pub mod conformance; diff --git a/crates/storage/src/cold/request.rs b/crates/storage/src/cold/request.rs new file mode 100644 index 0000000..a8d7ed2 --- /dev/null +++ b/crates/storage/src/cold/request.rs @@ -0,0 +1,157 @@ +//! Request and response types for the cold storage task. +//! +//! These types define the messages sent over channels to the cold storage task. + +use crate::cold::{ + BlockData, ColdStorageError, HeaderSpecifier, ReceiptSpecifier, SignetEventsSpecifier, + TransactionSpecifier, ZenithHeaderSpecifier, +}; +use alloy::primitives::BlockNumber; +use reth::primitives::{Header, Receipt, TransactionSigned}; +use signet_db::{DbSignetEvent, DbZenithHeader}; +use tokio::sync::oneshot; + +/// Response sender type alias that propagates Result types. +pub type Responder = oneshot::Sender>; + +/// Block append request data (wrapper struct). +#[derive(Debug)] +pub struct AppendBlockRequest { + /// The block data to append. + pub data: BlockData, + /// The response channel. + pub resp: Responder<()>, +} + +/// Read requests for cold storage. +#[derive(Debug)] +pub enum ColdReadRequest { + // --- Headers --- + /// Get a single header by specifier. + GetHeader { + /// The header specifier. + spec: HeaderSpecifier, + /// The response channel. + resp: Responder>, + }, + /// Get multiple headers by specifiers. + GetHeaders { + /// The header specifiers. + specs: Vec, + /// The response channel. + resp: Responder>>, + }, + + // --- Transactions --- + /// Get a single transaction by specifier. + GetTransaction { + /// The transaction specifier. + spec: TransactionSpecifier, + /// The response channel. + resp: Responder>, + }, + /// Get all transactions in a block. + GetTransactionsInBlock { + /// The block number. + block: BlockNumber, + /// The response channel. + resp: Responder>, + }, + /// Get the transaction count for a block. + GetTransactionCount { + /// The block number. + block: BlockNumber, + /// The response channel. + resp: Responder, + }, + + // --- Receipts --- + /// Get a single receipt by specifier. + GetReceipt { + /// The receipt specifier. + spec: ReceiptSpecifier, + /// The response channel. + resp: Responder>, + }, + /// Get all receipts in a block. + GetReceiptsInBlock { + /// The block number. + block: BlockNumber, + /// The response channel. + resp: Responder>, + }, + + // --- SignetEvents --- + /// Get signet events by specifier. + GetSignetEvents { + /// The signet events specifier. + spec: SignetEventsSpecifier, + /// The response channel. + resp: Responder>, + }, + + // --- ZenithHeaders --- + /// Get a single zenith header by specifier. + GetZenithHeader { + /// The zenith header specifier. + spec: ZenithHeaderSpecifier, + /// The response channel. + resp: Responder>, + }, + /// Get multiple zenith headers by specifier. + GetZenithHeaders { + /// The zenith header specifier. + spec: ZenithHeaderSpecifier, + /// The response channel. + resp: Responder>, + }, + + // --- Metadata --- + /// Get the latest block number. + GetLatestBlock { + /// The response channel. + resp: Responder>, + }, +} + +/// Write requests for cold storage. +#[derive(Debug)] +pub enum ColdWriteRequest { + /// Append a single block. + AppendBlock(Box), + /// Append multiple blocks. + AppendBlocks { + /// The block data to append. + data: Vec, + /// The response channel. + resp: Responder<()>, + }, + /// Truncate all data above the given block. + TruncateAbove { + /// The block number to truncate above. + block: BlockNumber, + /// The response channel. + resp: Responder<()>, + }, +} + +/// Combined request enum for the cold storage task. +#[derive(Debug)] +pub enum ColdStorageRequest { + /// A read request. + Read(ColdReadRequest), + /// A write request. + Write(ColdWriteRequest), +} + +impl From for ColdStorageRequest { + fn from(req: ColdReadRequest) -> Self { + Self::Read(req) + } +} + +impl From for ColdStorageRequest { + fn from(req: ColdWriteRequest) -> Self { + Self::Write(req) + } +} diff --git a/crates/storage/src/cold/specifier.rs b/crates/storage/src/cold/specifier.rs new file mode 100644 index 0000000..2f26dbe --- /dev/null +++ b/crates/storage/src/cold/specifier.rs @@ -0,0 +1,151 @@ +//! Specifier enums for cold storage lookups. +//! +//! These types define how to locate data in cold storage, supporting +//! the standard Ethereum JSON-RPC lookup patterns. + +use alloy::{ + primitives::{B256, BlockNumber}, + rpc::types::eth::BlockNumberOrTag, +}; + +/// Block tag for semantic block lookups. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum BlockTag { + /// The most recent block. + Latest, + /// The most recent finalized block. + Finalized, + /// The most recent safe block. + Safe, + /// The earliest/genesis block. + Earliest, +} + +/// Specifier for header lookups. +#[derive(Debug, Clone, Copy)] +pub enum HeaderSpecifier { + /// Lookup by block number. + Number(BlockNumber), + /// Lookup by block hash. + Hash(B256), + /// Lookup by semantic tag. + Tag(BlockTag), +} + +impl From for HeaderSpecifier { + fn from(value: BlockNumberOrTag) -> Self { + match value { + BlockNumberOrTag::Number(num) => Self::Number(num), + BlockNumberOrTag::Latest => Self::Tag(BlockTag::Latest), + BlockNumberOrTag::Finalized => Self::Tag(BlockTag::Finalized), + BlockNumberOrTag::Safe => Self::Tag(BlockTag::Safe), + BlockNumberOrTag::Earliest => Self::Tag(BlockTag::Earliest), + BlockNumberOrTag::Pending => Self::Tag(BlockTag::Latest), // Treat pending as latest + } + } +} + +impl From for HeaderSpecifier { + fn from(number: BlockNumber) -> Self { + Self::Number(number) + } +} + +impl From for HeaderSpecifier { + fn from(hash: B256) -> Self { + Self::Hash(hash) + } +} + +impl From for HeaderSpecifier { + fn from(tag: BlockTag) -> Self { + Self::Tag(tag) + } +} + +/// Specifier for transaction lookups. +#[derive(Debug, Clone, Copy)] +pub enum TransactionSpecifier { + /// Lookup by transaction hash. + Hash(B256), + /// Lookup by block number and transaction index within the block. + BlockAndIndex { + /// The block number. + block: BlockNumber, + /// The transaction index within the block. + index: u64, + }, + /// Lookup by block hash and transaction index within the block. + BlockHashAndIndex { + /// The block hash. + block_hash: B256, + /// The transaction index within the block. + index: u64, + }, +} + +impl From for TransactionSpecifier { + fn from(hash: B256) -> Self { + Self::Hash(hash) + } +} + +/// Specifier for receipt lookups. +#[derive(Debug, Clone, Copy)] +pub enum ReceiptSpecifier { + /// Lookup by transaction hash. + TxHash(B256), + /// Lookup by block number and transaction index within the block. + BlockAndIndex { + /// The block number. + block: BlockNumber, + /// The transaction index within the block. + index: u64, + }, +} + +impl From for ReceiptSpecifier { + fn from(tx_hash: B256) -> Self { + Self::TxHash(tx_hash) + } +} + +/// Specifier for SignetEvents lookups. +#[derive(Debug, Clone, Copy)] +pub enum SignetEventsSpecifier { + /// Lookup all events in a single block. + Block(BlockNumber), + /// Lookup all events in a range of blocks (inclusive). + BlockRange { + /// The start block number (inclusive). + start: BlockNumber, + /// The end block number (inclusive). + end: BlockNumber, + }, +} + +impl From for SignetEventsSpecifier { + fn from(block: BlockNumber) -> Self { + Self::Block(block) + } +} + +/// Specifier for ZenithHeader lookups. +#[derive(Debug, Clone, Copy)] +pub enum ZenithHeaderSpecifier { + /// Lookup by block number. + Number(BlockNumber), + /// Lookup a range of blocks (inclusive). + Range { + /// The start block number (inclusive). + start: BlockNumber, + /// The end block number (inclusive). + end: BlockNumber, + }, +} + +impl From for ZenithHeaderSpecifier { + fn from(number: BlockNumber) -> Self { + Self::Number(number) + } +} diff --git a/crates/storage/src/cold/task/handle.rs b/crates/storage/src/cold/task/handle.rs new file mode 100644 index 0000000..6ec5d87 --- /dev/null +++ b/crates/storage/src/cold/task/handle.rs @@ -0,0 +1,254 @@ +//! Ergonomic handle for interacting with cold storage. +//! +//! The [`ColdStorageHandle`] provides a convenient API for sending requests +//! to the cold storage task without needing to construct request types manually. + +use crate::cold::{ + AppendBlockRequest, BlockData, ColdReadRequest, ColdResult, ColdStorageError, + ColdStorageRequest, ColdWriteRequest, HeaderSpecifier, ReceiptSpecifier, SignetEventsSpecifier, + TransactionSpecifier, ZenithHeaderSpecifier, +}; +use alloy::primitives::{B256, BlockNumber}; +use reth::primitives::{Header, Receipt, TransactionSigned}; +use signet_db::{DbSignetEvent, DbZenithHeader}; +use tokio::sync::{mpsc, oneshot}; + +/// Handle for interacting with the cold storage task. +/// +/// This handle can be cloned and shared across tasks. It provides an ergonomic +/// API for sending requests to the storage task and receiving responses. +#[derive(Clone, Debug)] +pub struct ColdStorageHandle { + sender: mpsc::Sender, +} + +impl ColdStorageHandle { + /// Create a new handle with the given sender. + pub(crate) const fn new(sender: mpsc::Sender) -> Self { + Self { sender } + } + + /// Send a request and wait for the response. + async fn send( + &self, + req: ColdStorageRequest, + rx: oneshot::Receiver>, + ) -> ColdResult { + self.sender.send(req).await.map_err(|_| ColdStorageError::Cancelled)?; + rx.await.map_err(|_| ColdStorageError::Cancelled)? + } + + // ========================================================================== + // Headers + // ========================================================================== + + /// Get a header by specifier. + pub async fn get_header(&self, spec: HeaderSpecifier) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetHeader { spec, resp }.into(), rx).await + } + + /// Get a header by block number. + pub async fn get_header_by_number(&self, block: BlockNumber) -> ColdResult> { + self.get_header(HeaderSpecifier::Number(block)).await + } + + /// Get a header by block hash. + pub async fn get_header_by_hash(&self, hash: B256) -> ColdResult> { + self.get_header(HeaderSpecifier::Hash(hash)).await + } + + /// Get multiple headers by specifiers. + pub async fn get_headers( + &self, + specs: Vec, + ) -> ColdResult>> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetHeaders { specs, resp }.into(), rx).await + } + + // ========================================================================== + // Transactions + // ========================================================================== + + /// Get a transaction by specifier. + pub async fn get_transaction( + &self, + spec: TransactionSpecifier, + ) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetTransaction { spec, resp }.into(), rx).await + } + + /// Get a transaction by hash. + pub async fn get_tx_by_hash(&self, hash: B256) -> ColdResult> { + self.get_transaction(TransactionSpecifier::Hash(hash)).await + } + + /// Get a transaction by block number and index. + pub async fn get_tx_by_block_and_index( + &self, + block: BlockNumber, + index: u64, + ) -> ColdResult> { + self.get_transaction(TransactionSpecifier::BlockAndIndex { block, index }).await + } + + /// Get a transaction by block hash and index. + pub async fn get_tx_by_block_hash_and_index( + &self, + block_hash: B256, + index: u64, + ) -> ColdResult> { + self.get_transaction(TransactionSpecifier::BlockHashAndIndex { block_hash, index }).await + } + + /// Get all transactions in a block. + pub async fn get_transactions_in_block( + &self, + block: BlockNumber, + ) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetTransactionsInBlock { block, resp }.into(), rx).await + } + + /// Get the transaction count for a block. + pub async fn get_transaction_count(&self, block: BlockNumber) -> ColdResult { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetTransactionCount { block, resp }.into(), rx).await + } + + // ========================================================================== + // Receipts + // ========================================================================== + + /// Get a receipt by specifier. + pub async fn get_receipt(&self, spec: ReceiptSpecifier) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetReceipt { spec, resp }.into(), rx).await + } + + /// Get a receipt by transaction hash. + pub async fn get_receipt_by_tx_hash(&self, hash: B256) -> ColdResult> { + self.get_receipt(ReceiptSpecifier::TxHash(hash)).await + } + + /// Get a receipt by block number and index. + pub async fn get_receipt_by_block_and_index( + &self, + block: BlockNumber, + index: u64, + ) -> ColdResult> { + self.get_receipt(ReceiptSpecifier::BlockAndIndex { block, index }).await + } + + /// Get all receipts in a block. + pub async fn get_receipts_in_block(&self, block: BlockNumber) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetReceiptsInBlock { block, resp }.into(), rx).await + } + + // ========================================================================== + // SignetEvents + // ========================================================================== + + /// Get signet events by specifier. + pub async fn get_signet_events( + &self, + spec: SignetEventsSpecifier, + ) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetSignetEvents { spec, resp }.into(), rx).await + } + + /// Get signet events in a block. + pub async fn get_signet_events_in_block( + &self, + block: BlockNumber, + ) -> ColdResult> { + self.get_signet_events(SignetEventsSpecifier::Block(block)).await + } + + /// Get signet events in a range of blocks. + pub async fn get_signet_events_in_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ColdResult> { + self.get_signet_events(SignetEventsSpecifier::BlockRange { start, end }).await + } + + // ========================================================================== + // ZenithHeaders + // ========================================================================== + + /// Get a zenith header by block number. + pub async fn get_zenith_header( + &self, + block: BlockNumber, + ) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send( + ColdReadRequest::GetZenithHeader { spec: ZenithHeaderSpecifier::Number(block), resp } + .into(), + rx, + ) + .await + } + + /// Get zenith headers by specifier. + pub async fn get_zenith_headers( + &self, + spec: ZenithHeaderSpecifier, + ) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetZenithHeaders { spec, resp }.into(), rx).await + } + + /// Get zenith headers in a range of blocks. + pub async fn get_zenith_headers_in_range( + &self, + start: BlockNumber, + end: BlockNumber, + ) -> ColdResult> { + self.get_zenith_headers(ZenithHeaderSpecifier::Range { start, end }).await + } + + // ========================================================================== + // Metadata + // ========================================================================== + + /// Get the latest block number in storage. + pub async fn get_latest_block(&self) -> ColdResult> { + let (resp, rx) = oneshot::channel(); + self.send(ColdReadRequest::GetLatestBlock { resp }.into(), rx).await + } + + // ========================================================================== + // Write Operations + // ========================================================================== + + /// Append a single block to cold storage. + pub async fn append_block(&self, data: BlockData) -> ColdResult<()> { + let (resp, rx) = oneshot::channel(); + self.send( + ColdWriteRequest::AppendBlock(Box::new(AppendBlockRequest { data, resp })).into(), + rx, + ) + .await + } + + /// Append multiple blocks to cold storage. + pub async fn append_blocks(&self, data: Vec) -> ColdResult<()> { + let (resp, rx) = oneshot::channel(); + self.send(ColdWriteRequest::AppendBlocks { data, resp }.into(), rx).await + } + + /// Truncate all data above the given block number. + /// + /// This removes block N+1 and higher from all tables. + pub async fn truncate_above(&self, block: BlockNumber) -> ColdResult<()> { + let (resp, rx) = oneshot::channel(); + self.send(ColdWriteRequest::TruncateAbove { block, resp }.into(), rx).await + } +} diff --git a/crates/storage/src/cold/task/mod.rs b/crates/storage/src/cold/task/mod.rs new file mode 100644 index 0000000..8e3b4ab --- /dev/null +++ b/crates/storage/src/cold/task/mod.rs @@ -0,0 +1,12 @@ +//! Cold storage task and handle. +//! +//! This module provides the task-based architecture for cold storage: +//! +//! - [`ColdStorageTask`] processes requests from a channel +//! - [`ColdStorageHandle`] provides an ergonomic API for sending requests + +mod handle; +pub use handle::ColdStorageHandle; + +mod runner; +pub use runner::ColdStorageTask; diff --git a/crates/storage/src/cold/task/runner.rs b/crates/storage/src/cold/task/runner.rs new file mode 100644 index 0000000..e922673 --- /dev/null +++ b/crates/storage/src/cold/task/runner.rs @@ -0,0 +1,187 @@ +//! Cold storage task runner. +//! +//! The [`ColdStorageTask`] processes requests from a channel and dispatches +//! them to the storage backend. + +use crate::cold::{ + ColdReadRequest, ColdStorage, ColdStorageHandle, ColdStorageRequest, ColdWriteRequest, +}; +use std::sync::Arc; +use tokio::sync::mpsc; +use tokio_util::{sync::CancellationToken, task::TaskTracker}; +use tracing::{debug, instrument}; + +/// Channel size for cold storage requests. +const COLD_STORAGE_CHANNEL_SIZE: usize = 256; + +/// Maximum concurrent request handlers. +const MAX_CONCURRENT_HANDLERS: usize = 64; + +/// The cold storage task that processes requests. +/// +/// This task receives requests over a channel and dispatches them to the +/// storage backend. It supports graceful shutdown via a cancellation token. +pub struct ColdStorageTask { + backend: Arc, + receiver: mpsc::Receiver, + cancel_token: CancellationToken, + task_tracker: TaskTracker, +} + +impl std::fmt::Debug for ColdStorageTask { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ColdStorageTask").finish_non_exhaustive() + } +} + +impl ColdStorageTask { + /// Create a new cold storage task and return its handle. + pub fn new(backend: B, cancel_token: CancellationToken) -> (Self, ColdStorageHandle) { + let (sender, receiver) = mpsc::channel(COLD_STORAGE_CHANNEL_SIZE); + let task = Self { + backend: Arc::new(backend), + receiver, + cancel_token, + task_tracker: TaskTracker::new(), + }; + let handle = ColdStorageHandle::new(sender); + (task, handle) + } + + /// Spawn the task and return the handle. + /// + /// The task will run until the cancellation token is triggered or the + /// channel is closed. + pub fn spawn(backend: B, cancel_token: CancellationToken) -> ColdStorageHandle { + let (task, handle) = Self::new(backend, cancel_token); + tokio::spawn(task.run()); + handle + } + + /// Run the task, processing requests until shutdown. + #[instrument(skip(self), name = "cold_storage_task")] + pub async fn run(mut self) { + debug!("Cold storage task started"); + + loop { + tokio::select! { + // Check for cancellation + _ = self.cancel_token.cancelled() => { + debug!("Cold storage task received cancellation signal"); + break; + } + + // Process incoming requests + maybe_request = self.receiver.recv() => { + match maybe_request { + Some(request) => { + // Wait if we've hit the concurrent handler limit (backpressure) + while self.task_tracker.len() >= MAX_CONCURRENT_HANDLERS { + // Wait for at least one task to complete + tokio::select! { + _ = self.cancel_token.cancelled() => { + debug!("Cancellation while waiting for task slot"); + break; + } + _ = self.task_tracker.wait() => {} + } + } + + let backend = Arc::clone(&self.backend); + self.task_tracker.spawn(async move { + Self::handle_request(backend, request).await; + }); + } + None => { + debug!("Cold storage channel closed"); + break; + } + } + } + } + } + + // Graceful shutdown: wait for in-progress tasks to complete + debug!("Waiting for in-progress handlers to complete"); + self.task_tracker.close(); + self.task_tracker.wait().await; + debug!("Cold storage task shut down gracefully"); + } + + async fn handle_request(backend: Arc, request: ColdStorageRequest) { + match request { + ColdStorageRequest::Read(read_req) => { + Self::handle_read(backend, read_req).await; + } + ColdStorageRequest::Write(write_req) => { + Self::handle_write(backend, write_req).await; + } + } + } + + async fn handle_read(backend: Arc, req: ColdReadRequest) { + match req { + ColdReadRequest::GetHeader { spec, resp } => { + let result = backend.get_header(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetHeaders { specs, resp } => { + let result = backend.get_headers(specs).await; + let _ = resp.send(result); + } + ColdReadRequest::GetTransaction { spec, resp } => { + let result = backend.get_transaction(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetTransactionsInBlock { block, resp } => { + let result = backend.get_transactions_in_block(block).await; + let _ = resp.send(result); + } + ColdReadRequest::GetTransactionCount { block, resp } => { + let result = backend.get_transaction_count(block).await; + let _ = resp.send(result); + } + ColdReadRequest::GetReceipt { spec, resp } => { + let result = backend.get_receipt(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetReceiptsInBlock { block, resp } => { + let result = backend.get_receipts_in_block(block).await; + let _ = resp.send(result); + } + ColdReadRequest::GetSignetEvents { spec, resp } => { + let result = backend.get_signet_events(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetZenithHeader { spec, resp } => { + let result = backend.get_zenith_header(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetZenithHeaders { spec, resp } => { + let result = backend.get_zenith_headers(spec).await; + let _ = resp.send(result); + } + ColdReadRequest::GetLatestBlock { resp } => { + let result = backend.get_latest_block().await; + let _ = resp.send(result); + } + } + } + + async fn handle_write(backend: Arc, req: ColdWriteRequest) { + match req { + ColdWriteRequest::AppendBlock(boxed) => { + let result = backend.append_block(boxed.data).await; + let _ = boxed.resp.send(result); + } + ColdWriteRequest::AppendBlocks { data, resp } => { + let result = backend.append_blocks(data).await; + let _ = resp.send(result); + } + ColdWriteRequest::TruncateAbove { block, resp } => { + let result = backend.truncate_above(block).await; + let _ = resp.send(result); + } + } + } +} diff --git a/crates/storage/src/cold/traits.rs b/crates/storage/src/cold/traits.rs new file mode 100644 index 0000000..ba001a4 --- /dev/null +++ b/crates/storage/src/cold/traits.rs @@ -0,0 +1,168 @@ +//! Core trait definition for cold storage backends. +//! +//! The [`ColdStorage`] trait defines the interface that all cold storage +//! backends must implement. Backends are responsible for data organization, +//! indexing, and keying - the trait is agnostic to these implementation details. + +use alloy::primitives::BlockNumber; +use reth::primitives::{Header, Receipt, TransactionSigned}; +use signet_db::{DbSignetEvent, DbZenithHeader}; +use std::future::Future; + +use super::{ + ColdResult, HeaderSpecifier, ReceiptSpecifier, SignetEventsSpecifier, TransactionSpecifier, + ZenithHeaderSpecifier, +}; + +/// Data for appending a complete block to cold storage. +#[derive(Debug, Clone)] +pub struct BlockData { + /// The block header. + pub header: Header, + /// The transactions in the block. + pub transactions: Vec, + /// The receipts for the transactions. + pub receipts: Vec, + /// The signet events in the block. + pub signet_events: Vec, + /// The zenith header for the block, if present. + pub zenith_header: Option, +} + +impl BlockData { + /// Create new block data. + pub const fn new( + header: Header, + transactions: Vec, + receipts: Vec, + signet_events: Vec, + zenith_header: Option, + ) -> Self { + Self { header, transactions, receipts, signet_events, zenith_header } + } + + /// Get the block number of the block. + pub const fn block_number(&self) -> BlockNumber { + self.header.number + } +} + +/// Unified cold storage backend trait. +/// +/// Backend is responsible for all data organization, indexing, and keying. +/// The trait is agnostic to how the backend stores or indexes data. +/// +/// All methods are async and return futures that are `Send`. +/// +/// # Implementation Guide +/// +/// Implementers must ensure: +/// +/// - **Append-only ordering**: `append_block` must enforce monotonically +/// increasing block numbers. Attempting to append a block with a number <= +/// the current latest should return an error. +/// +/// - **Atomic truncation**: `truncate_above` must remove all data for blocks +/// N+1 and higher atomically. Partial truncation is not acceptable. +/// +/// - **Index maintenance**: Hash-based lookups (e.g., header by hash, +/// transaction by hash) require the implementation to maintain appropriate +/// indexes. These indexes must be updated during `append_block` and cleaned +/// during `truncate_above`. +/// +/// - **Consistent reads**: Read operations should return consistent snapshots. +/// A read started before a write completes should not see partial data from +/// that write. +/// +/// - **Tag resolution**: `HeaderSpecifier::Tag` variants (Latest, Finalized, +/// Safe, Earliest) must be resolved by the implementation. For simple +/// backends, Latest/Finalized/Safe may all resolve to the same block. +pub trait ColdStorage: Send + Sync + 'static { + // --- Headers --- + + /// Get a header by specifier. + fn get_header( + &self, + spec: HeaderSpecifier, + ) -> impl Future>> + Send; + + /// Get multiple headers by specifiers. + fn get_headers( + &self, + specs: Vec, + ) -> impl Future>>> + Send; + + // --- Transactions --- + + /// Get a transaction by specifier. + fn get_transaction( + &self, + spec: TransactionSpecifier, + ) -> impl Future>> + Send; + + /// Get all transactions in a block. + fn get_transactions_in_block( + &self, + block: BlockNumber, + ) -> impl Future>> + Send; + + /// Get the number of transactions in a block. + fn get_transaction_count( + &self, + block: BlockNumber, + ) -> impl Future> + Send; + + // --- Receipts --- + + /// Get a receipt by specifier. + fn get_receipt( + &self, + spec: ReceiptSpecifier, + ) -> impl Future>> + Send; + + /// Get all receipts in a block. + fn get_receipts_in_block( + &self, + block: BlockNumber, + ) -> impl Future>> + Send; + + // --- SignetEvents --- + + /// Get signet events by specifier. + fn get_signet_events( + &self, + spec: SignetEventsSpecifier, + ) -> impl Future>> + Send; + + // --- ZenithHeaders --- + + /// Get a zenith header by specifier. + fn get_zenith_header( + &self, + spec: ZenithHeaderSpecifier, + ) -> impl Future>> + Send; + + /// Get multiple zenith headers by specifier. + fn get_zenith_headers( + &self, + spec: ZenithHeaderSpecifier, + ) -> impl Future>> + Send; + + // --- Metadata --- + + /// Get the latest block number in storage. + fn get_latest_block(&self) -> impl Future>> + Send; + + // --- Write operations --- + + /// Append a single block to cold storage. + fn append_block(&self, data: BlockData) -> impl Future> + Send; + + /// Append multiple blocks to cold storage. + fn append_blocks(&self, data: Vec) -> impl Future> + Send; + + /// Truncate all data above the given block number (exclusive). + /// + /// This removes block N+1 and higher from all tables. Used for reorg handling. + fn truncate_above(&self, block: BlockNumber) -> impl Future> + Send; +} diff --git a/crates/storage/src/hot/conformance.rs b/crates/storage/src/hot/conformance.rs new file mode 100644 index 0000000..0101f92 --- /dev/null +++ b/crates/storage/src/hot/conformance.rs @@ -0,0 +1,2571 @@ +#![allow(dead_code)] + +use crate::hot::{ + db::{HistoryError, HistoryRead, HistoryWrite, HotDbRead, UnsafeDbWrite, UnsafeHistoryWrite}, + model::{ + DualKeyValue, DualTableTraverse, HotKv, HotKvRead, HotKvWrite, KeyValue, TableTraverse, + }, + tables::{self, DualKey, SingleKey}, +}; +use alloy::primitives::{Address, B256, Bytes, U256, address, b256}; +use reth::primitives::{Account, Bytecode, Header, SealedHeader}; +use reth_db::BlockNumberList; +use std::collections::HashMap; +use std::fmt::Debug; +use trevm::revm::{ + bytecode::Bytecode as RevmBytecode, + database::{ + AccountStatus, BundleAccount, BundleState, + states::{ + StorageSlot, + reverts::{AccountInfoRevert, AccountRevert, RevertToSlot, Reverts}, + }, + }, + primitives::map::DefaultHashBuilder, + state::AccountInfo, +}; + +/// Run all conformance tests against a [`HotKv`] implementation. +pub fn conformance(hot_kv: &T) { + test_header_roundtrip(hot_kv); + test_account_roundtrip(hot_kv); + test_storage_roundtrip(hot_kv); + test_storage_update_replaces(hot_kv); + test_bytecode_roundtrip(hot_kv); + test_account_history(hot_kv); + test_storage_history(hot_kv); + test_account_changes(hot_kv); + test_storage_changes(hot_kv); + test_missing_reads(hot_kv); +} + +// /// Run append and unwind conformance tests. +// /// +// /// This test requires a fresh database (no prior state) to properly test +// /// the append/unwind functionality. +// pub fn conformance_append_unwind(hot_kv: &T) { +// test_append_and_unwind_blocks(hot_kv); +// } + +/// Test writing and reading headers via HotDbWrite/HotDbRead +fn test_header_roundtrip(hot_kv: &T) { + let header = Header { number: 42, gas_limit: 1_000_000, ..Default::default() }; + let sealed = SealedHeader::seal_slow(header.clone()); + let hash = sealed.hash(); + + // Write header + { + let writer = hot_kv.writer().unwrap(); + writer.put_header(&sealed).unwrap(); + writer.commit().unwrap(); + } + + // Read header by number + { + let reader = hot_kv.reader().unwrap(); + let read_header = reader.get_header(42).unwrap(); + assert!(read_header.is_some()); + assert_eq!(read_header.unwrap().number, 42); + } + + // Read header number by hash + { + let reader = hot_kv.reader().unwrap(); + let read_number = reader.get_header_number(&hash).unwrap(); + assert!(read_number.is_some()); + assert_eq!(read_number.unwrap(), 42); + } + + // Read header by hash + { + let reader = hot_kv.reader().unwrap(); + let read_header = reader.header_by_hash(&hash).unwrap(); + assert!(read_header.is_some()); + assert_eq!(read_header.unwrap().number, 42); + } +} + +/// Test writing and reading accounts via HotDbWrite/HotDbRead +fn test_account_roundtrip(hot_kv: &T) { + let addr = address!("0x1234567890123456789012345678901234567890"); + let account = Account { nonce: 5, balance: U256::from(1000), bytecode_hash: Some(B256::ZERO) }; + + // Write account + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &account).unwrap(); + writer.commit().unwrap(); + } + + // Read account + { + let reader = hot_kv.reader().unwrap(); + let read_account = reader.get_account(&addr).unwrap(); + assert!(read_account.is_some()); + let read_account = read_account.unwrap(); + assert_eq!(read_account.nonce, 5); + assert_eq!(read_account.balance, U256::from(1000)); + } +} + +/// Test writing and reading storage via HotDbWrite/HotDbRead +fn test_storage_roundtrip(hot_kv: &T) { + let addr = address!("0xabcdef0123456789abcdef0123456789abcdef01"); + let slot = U256::from(42); + let value = U256::from(999); + + // Write storage + { + let writer = hot_kv.writer().unwrap(); + writer.put_storage(&addr, &slot, &value).unwrap(); + writer.commit().unwrap(); + } + + // Read storage + { + let reader = hot_kv.reader().unwrap(); + let read_value = reader.get_storage(&addr, &slot).unwrap(); + assert!(read_value.is_some()); + assert_eq!(read_value.unwrap(), U256::from(999)); + } + + // Read storage entry + { + let reader = hot_kv.reader().unwrap(); + let read_entry = reader.get_storage_entry(&addr, &slot).unwrap(); + assert!(read_entry.is_some()); + let entry = read_entry.unwrap(); + assert_eq!(entry.key, B256::new(slot.to_be_bytes())); + assert_eq!(entry.value, U256::from(999)); + } +} + +/// Test that updating a storage slot replaces the value (no duplicates). +/// +/// This test verifies that DUPSORT tables properly handle updates by deleting +/// existing entries before inserting new ones. +fn test_storage_update_replaces(hot_kv: &T) { + let addr = address!("0x2222222222222222222222222222222222222222"); + let slot = U256::from(1); + + // Write initial value + { + let writer = hot_kv.writer().unwrap(); + writer.put_storage(&addr, &slot, &U256::from(10)).unwrap(); + writer.commit().unwrap(); + } + + // Update to new value + { + let writer = hot_kv.writer().unwrap(); + writer.put_storage(&addr, &slot, &U256::from(20)).unwrap(); + writer.commit().unwrap(); + } + + // Verify: only ONE entry exists with the NEW value + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse_dual::().unwrap(); + + let mut count = 0; + let mut found_value = None; + while let Some((k, k2, v)) = cursor.read_next().unwrap() { + if k == addr && k2 == slot { + count += 1; + found_value = Some(v); + } + } + + assert_eq!(count, 1, "Should have exactly one entry, not duplicates"); + assert_eq!(found_value, Some(U256::from(20)), "Value should be 20"); +} + +/// Test writing and reading bytecode via HotDbWrite/HotDbRead +fn test_bytecode_roundtrip(hot_kv: &T) { + let code = Bytes::from_static(&[0x60, 0x00, 0x60, 0x00, 0xf3]); // Simple EVM bytecode + let bytecode = Bytecode::new_raw(code); + let code_hash = bytecode.hash_slow(); + + // Write bytecode + { + let writer = hot_kv.writer().unwrap(); + writer.put_bytecode(&code_hash, &bytecode).unwrap(); + writer.commit().unwrap(); + } + + // Read bytecode + { + let reader = hot_kv.reader().unwrap(); + let read_bytecode = reader.get_bytecode(&code_hash).unwrap(); + assert!(read_bytecode.is_some()); + } +} + +/// Test account history via HotHistoryWrite/HotHistoryRead +fn test_account_history(hot_kv: &T) { + let addr = address!("0x1111111111111111111111111111111111111111"); + let touched_blocks = BlockNumberList::new([10, 20, 30]).unwrap(); + let latest_height = 100u64; + + // Write account history + { + let writer = hot_kv.writer().unwrap(); + writer.write_account_history(&addr, latest_height, &touched_blocks).unwrap(); + writer.commit().unwrap(); + } + + // Read account history + { + let reader = hot_kv.reader().unwrap(); + let read_history = reader.get_account_history(&addr, latest_height).unwrap(); + assert!(read_history.is_some()); + let history = read_history.unwrap(); + assert_eq!(history.iter().collect::>(), vec![10, 20, 30]); + } +} + +/// Test storage history via HotHistoryWrite/HotHistoryRead +fn test_storage_history(hot_kv: &T) { + let addr = address!("0x2222222222222222222222222222222222222222"); + let slot = U256::from(42); + let touched_blocks = BlockNumberList::new([5, 15, 25]).unwrap(); + let highest_block = 50u64; + + // Write storage history + { + let writer = hot_kv.writer().unwrap(); + writer.write_storage_history(&addr, slot, highest_block, &touched_blocks).unwrap(); + writer.commit().unwrap(); + } + + // Read storage history + { + let reader = hot_kv.reader().unwrap(); + let read_history = reader.get_storage_history(&addr, slot, highest_block).unwrap(); + assert!(read_history.is_some()); + let history = read_history.unwrap(); + assert_eq!(history.iter().collect::>(), vec![5, 15, 25]); + } +} + +/// Test account change sets via HotHistoryWrite/HotHistoryRead +fn test_account_changes(hot_kv: &T) { + let addr = address!("0x3333333333333333333333333333333333333333"); + let pre_state = Account { nonce: 10, balance: U256::from(5000), bytecode_hash: None }; + let block_number = 100u64; + + // Write account change + { + let writer = hot_kv.writer().unwrap(); + writer.write_account_prestate(block_number, addr, &pre_state).unwrap(); + writer.commit().unwrap(); + } + + // Read account change + { + let reader = hot_kv.reader().unwrap(); + + let read_change = reader.get_account_change(block_number, &addr).unwrap(); + + assert!(read_change.is_some()); + let change = read_change.unwrap(); + assert_eq!(change.nonce, 10); + assert_eq!(change.balance, U256::from(5000)); + } +} + +/// Test storage change sets via HotHistoryWrite/HotHistoryRead +fn test_storage_changes(hot_kv: &T) { + let addr = address!("0x4444444444444444444444444444444444444444"); + let slot = U256::from(153); + let pre_value = U256::from(12345); + let block_number = 200u64; + + // Write storage change + { + let writer = hot_kv.writer().unwrap(); + writer.write_storage_prestate(block_number, addr, &slot, &pre_value).unwrap(); + writer.commit().unwrap(); + } + + // Read storage change + { + let reader = hot_kv.reader().unwrap(); + let read_change = reader.get_storage_change(block_number, &addr, &slot).unwrap(); + assert!(read_change.is_some()); + assert_eq!(read_change.unwrap(), U256::from(12345)); + } +} + +/// Test that missing reads return None +fn test_missing_reads(hot_kv: &T) { + let missing_addr = address!("0x9999999999999999999999999999999999999999"); + let missing_hash = b256!("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); + let missing_slot = U256::from(99999); + + let reader = hot_kv.reader().unwrap(); + + // Missing header + assert!(reader.get_header(999999).unwrap().is_none()); + + // Missing header number + assert!(reader.get_header_number(&missing_hash).unwrap().is_none()); + + // Missing account + assert!(reader.get_account(&missing_addr).unwrap().is_none()); + + // Missing storage + assert!(reader.get_storage(&missing_addr, &missing_slot).unwrap().is_none()); + + // Missing bytecode + assert!(reader.get_bytecode(&missing_hash).unwrap().is_none()); + + // Missing header by hash + assert!(reader.header_by_hash(&missing_hash).unwrap().is_none()); + + // Missing account history + assert!(reader.get_account_history(&missing_addr, 1000).unwrap().is_none()); + + // Missing storage history + assert!(reader.get_storage_history(&missing_addr, missing_slot, 1000).unwrap().is_none()); + + // Missing account change + assert!(reader.get_account_change(999999, &missing_addr).unwrap().is_none()); + + // Missing storage change + assert!(reader.get_storage_change(999999, &missing_addr, &missing_slot).unwrap().is_none()); +} + +/// Helper to create a sealed header at a given height with specific parent +fn make_header(number: u64, parent_hash: B256) -> SealedHeader { + let header = Header { number, parent_hash, gas_limit: 1_000_000, ..Default::default() }; + SealedHeader::seal_slow(header) +} + +/// Test update_history_indices_inconsistent for account history. +/// +/// This test verifies that: +/// 1. Account change sets are correctly indexed into account history +/// 2. Appending to existing history works correctly +/// 3. Old shards are deleted when appending +pub fn test_update_history_indices_account(hot_kv: &T) { + let addr1 = address!("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + let addr2 = address!("0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + + // Phase 1: Write account change sets for blocks 1-3 + { + let writer = hot_kv.writer().unwrap(); + + // Block 1: addr1 changed + let pre_acc = Account::default(); + writer.write_account_prestate(1, addr1, &pre_acc).unwrap(); + + // Block 2: addr1 and addr2 changed + let acc1 = Account { nonce: 1, balance: U256::from(100), bytecode_hash: None }; + writer.write_account_prestate(2, addr1, &acc1).unwrap(); + writer.write_account_prestate(2, addr2, &pre_acc).unwrap(); + + // Block 3: addr2 changed + let acc2 = Account { nonce: 1, balance: U256::from(200), bytecode_hash: None }; + writer.write_account_prestate(3, addr2, &acc2).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 2: Run update_history_indices_inconsistent for blocks 1-3 + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(1..=3).unwrap(); + writer.commit().unwrap(); + } + + // Phase 3: Verify account history was created correctly + { + let reader = hot_kv.reader().unwrap(); + + // addr1 should have history at blocks 1, 2 + let (_, history1) = + reader.last_account_history(addr1).unwrap().expect("addr1 should have history"); + let blocks1: Vec = history1.iter().collect(); + assert_eq!(blocks1, vec![1, 2], "addr1 history mismatch"); + + // addr2 should have history at blocks 2, 3 + let (_, history2) = + reader.last_account_history(addr2).unwrap().expect("addr2 should have history"); + let blocks2: Vec = history2.iter().collect(); + assert_eq!(blocks2, vec![2, 3], "addr2 history mismatch"); + } + + // Phase 4: Write more change sets for blocks 4-5 + { + let writer = hot_kv.writer().unwrap(); + + // Block 4: addr1 changed + let acc1 = Account { nonce: 2, balance: U256::from(300), bytecode_hash: None }; + writer.write_account_prestate(4, addr1, &acc1).unwrap(); + + // Block 5: addr1 changed again + let acc1_v2 = Account { nonce: 3, balance: U256::from(400), bytecode_hash: None }; + writer.write_account_prestate(5, addr1, &acc1_v2).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 5: Run update_history_indices_inconsistent for blocks 4-5 + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(4..=5).unwrap(); + writer.commit().unwrap(); + } + + // Phase 6: Verify history was appended correctly + { + let reader = hot_kv.reader().unwrap(); + + // addr1 should now have history at blocks 1, 2, 4, 5 + let (_, history1) = + reader.last_account_history(addr1).unwrap().expect("addr1 should have history"); + let blocks1: Vec = history1.iter().collect(); + assert_eq!(blocks1, vec![1, 2, 4, 5], "addr1 history mismatch after append"); + + // addr2 should still have history at blocks 2, 3 (unchanged) + let (_, history2) = + reader.last_account_history(addr2).unwrap().expect("addr2 should have history"); + let blocks2: Vec = history2.iter().collect(); + assert_eq!(blocks2, vec![2, 3], "addr2 history should be unchanged"); + } +} + +/// Test update_history_indices_inconsistent for storage history. +/// +/// This test verifies that: +/// 1. Storage change sets are correctly indexed into storage history +/// 2. Appending to existing history works correctly +/// 3. Old shards are deleted when appending +/// 4. Different slots for the same address are tracked separately +pub fn test_update_history_indices_storage(hot_kv: &T) { + let addr1 = address!("0xcccccccccccccccccccccccccccccccccccccccc"); + let slot1 = U256::from(1); + let slot2 = U256::from(2); + + // Phase 1: Write storage change sets for blocks 1-3 + { + let writer = hot_kv.writer().unwrap(); + + // Block 1: addr1.slot1 changed + writer.write_storage_prestate(1, addr1, &slot1, &U256::ZERO).unwrap(); + + // Block 2: addr1.slot1 and addr1.slot2 changed + writer.write_storage_prestate(2, addr1, &slot1, &U256::from(100)).unwrap(); + writer.write_storage_prestate(2, addr1, &slot2, &U256::ZERO).unwrap(); + + // Block 3: addr1.slot2 changed + writer.write_storage_prestate(3, addr1, &slot2, &U256::from(200)).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 2: Run update_history_indices_inconsistent for blocks 1-3 + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(1..=3).unwrap(); + writer.commit().unwrap(); + } + + // Phase 3: Verify storage history was created correctly + { + let reader = hot_kv.reader().unwrap(); + + // addr1.slot1 should have history at blocks 1, 2 + let (_, history1) = reader + .last_storage_history(&addr1, &slot1) + .unwrap() + .expect("addr1.slot1 should have history"); + let blocks1: Vec = history1.iter().collect(); + assert_eq!(blocks1, vec![1, 2], "addr1.slot1 history mismatch"); + + // addr1.slot2 should have history at blocks 2, 3 + let (_, history2) = reader + .last_storage_history(&addr1, &slot2) + .unwrap() + .expect("addr1.slot2 should have history"); + let blocks2: Vec = history2.iter().collect(); + assert_eq!(blocks2, vec![2, 3], "addr1.slot2 history mismatch"); + } + + // Phase 4: Write more change sets for blocks 4-5 + { + let writer = hot_kv.writer().unwrap(); + + // Block 4: addr1.slot1 changed + writer.write_storage_prestate(4, addr1, &slot1, &U256::from(300)).unwrap(); + + // Block 5: addr1.slot1 changed again + writer.write_storage_prestate(5, addr1, &slot1, &U256::from(400)).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 5: Run update_history_indices_inconsistent for blocks 4-5 + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(4..=5).unwrap(); + writer.commit().unwrap(); + } + + // Phase 6: Verify history was appended correctly + { + let reader = hot_kv.reader().unwrap(); + + // addr1.slot1 should now have history at blocks 1, 2, 4, 5 + let (_, history1) = reader + .last_storage_history(&addr1, &slot1) + .unwrap() + .expect("addr1.slot1 should have history"); + let blocks1: Vec = history1.iter().collect(); + assert_eq!(blocks1, vec![1, 2, 4, 5], "addr1.slot1 history mismatch after append"); + + // addr1.slot2 should still have history at blocks 2, 3 (unchanged) + let (_, history2) = reader + .last_storage_history(&addr1, &slot2) + .unwrap() + .expect("addr1.slot2 should have history"); + let blocks2: Vec = history2.iter().collect(); + assert_eq!(blocks2, vec![2, 3], "addr1.slot2 history should be unchanged"); + } +} + +/// Test that appending to history correctly removes old entries at same k1,k2. +/// +/// This test specifically verifies that when we append new indices to an existing +/// shard, the old shard is properly deleted so we don't end up with duplicate data. +pub fn test_history_append_removes_old_entries(hot_kv: &T) { + let addr = address!("0xdddddddddddddddddddddddddddddddddddddddd"); + + // Phase 1: Manually write account history + { + let writer = hot_kv.writer().unwrap(); + let initial_history = BlockNumberList::new([10, 20, 30]).unwrap(); + writer.write_account_history(&addr, u64::MAX, &initial_history).unwrap(); + writer.commit().unwrap(); + } + + // Verify initial state + { + let reader = hot_kv.reader().unwrap(); + let (key, history) = + reader.last_account_history(addr).unwrap().expect("should have history"); + assert_eq!(key, u64::MAX); + let blocks: Vec = history.iter().collect(); + assert_eq!(blocks, vec![10, 20, 30]); + } + + // Phase 2: Write account change set for block 40 + { + let writer = hot_kv.writer().unwrap(); + let acc = Account { nonce: 1, balance: U256::from(100), bytecode_hash: None }; + writer.write_account_prestate(40, addr, &acc).unwrap(); + writer.commit().unwrap(); + } + + // Phase 3: Run update_history_indices_inconsistent + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(40..=40).unwrap(); + writer.commit().unwrap(); + } + + // Phase 4: Verify history was correctly appended + { + let reader = hot_kv.reader().unwrap(); + let (key, history) = + reader.last_account_history(addr).unwrap().expect("should have history"); + assert_eq!(key, u64::MAX, "key should still be u64::MAX"); + let blocks: Vec = history.iter().collect(); + assert_eq!(blocks, vec![10, 20, 30, 40], "history should include appended block"); + } +} + +/// Test deleting dual-keyed account history entries. +/// +/// This test verifies that: +/// 1. Writing dual-keyed entries works correctly +/// 2. Deleting specific dual-keyed entries removes only that entry +/// 3. Other entries for the same k1 remain intact +/// 4. Traversal after deletion shows the entry is gone +pub fn test_delete_dual_account_history(hot_kv: &T) { + let addr1 = address!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"); + let addr2 = address!("0xffffffffffffffffffffffffffffffffffffffff"); + + // Phase 1: Write account history entries for multiple addresses + { + let writer = hot_kv.writer().unwrap(); + + // Write history for addr1 at two different shard keys + let history1_a = BlockNumberList::new([1, 2, 3]).unwrap(); + let history1_b = BlockNumberList::new([4, 5, 6]).unwrap(); + writer.write_account_history(&addr1, 100, &history1_a).unwrap(); + writer.write_account_history(&addr1, u64::MAX, &history1_b).unwrap(); + + // Write history for addr2 + let history2 = BlockNumberList::new([10, 20, 30]).unwrap(); + writer.write_account_history(&addr2, u64::MAX, &history2).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 2: Verify all entries exist + { + let reader = hot_kv.reader().unwrap(); + + // Check addr1 entries + let hist1_a = reader.get_account_history(&addr1, 100).unwrap(); + assert!(hist1_a.is_some(), "addr1 shard 100 should exist"); + assert_eq!(hist1_a.unwrap().iter().collect::>(), vec![1, 2, 3]); + + let hist1_b = reader.get_account_history(&addr1, u64::MAX).unwrap(); + assert!(hist1_b.is_some(), "addr1 shard u64::MAX should exist"); + assert_eq!(hist1_b.unwrap().iter().collect::>(), vec![4, 5, 6]); + + // Check addr2 entry + let hist2 = reader.get_account_history(&addr2, u64::MAX).unwrap(); + assert!(hist2.is_some(), "addr2 should exist"); + assert_eq!(hist2.unwrap().iter().collect::>(), vec![10, 20, 30]); + } + + // Phase 3: Delete addr1's u64::MAX entry + { + let writer = hot_kv.writer().unwrap(); + writer.queue_delete_dual::(&addr1, &u64::MAX).unwrap(); + writer.commit().unwrap(); + } + + // Phase 4: Verify only the deleted entry is gone + { + let reader = hot_kv.reader().unwrap(); + + // addr1 shard 100 should still exist + let hist1_a = reader.get_account_history(&addr1, 100).unwrap(); + assert!(hist1_a.is_some(), "addr1 shard 100 should still exist after delete"); + assert_eq!(hist1_a.unwrap().iter().collect::>(), vec![1, 2, 3]); + + // addr1 shard u64::MAX should be gone + let hist1_b = reader.get_account_history(&addr1, u64::MAX).unwrap(); + assert!(hist1_b.is_none(), "addr1 shard u64::MAX should be deleted"); + + // addr2 should be unaffected + let hist2 = reader.get_account_history(&addr2, u64::MAX).unwrap(); + assert!(hist2.is_some(), "addr2 should be unaffected by delete"); + assert_eq!(hist2.unwrap().iter().collect::>(), vec![10, 20, 30]); + + // Verify last_account_history now returns shard 100 for addr1 + let (key, _) = + reader.last_account_history(addr1).unwrap().expect("addr1 should still have history"); + assert_eq!(key, 100, "last shard for addr1 should now be 100"); + } +} + +/// Test deleting dual-keyed storage history entries. +/// +/// This test verifies that: +/// 1. Writing storage history entries works correctly +/// 2. Deleting specific (address, slot, shard) entries removes only that entry +/// 3. Other slots for the same address remain intact +/// 4. Traversal after deletion shows the entry is gone +pub fn test_delete_dual_storage_history(hot_kv: &T) { + use reth_db::models::ShardedKey; + + let addr = address!("0x1111111111111111111111111111111111111111"); + let slot1 = U256::from(100); + let slot2 = U256::from(200); + + // Phase 1: Write storage history entries for multiple slots + { + let writer = hot_kv.writer().unwrap(); + + // Write history for slot1 + let history1 = BlockNumberList::new([1, 2, 3]).unwrap(); + writer.write_storage_history(&addr, slot1, u64::MAX, &history1).unwrap(); + + // Write history for slot2 + let history2 = BlockNumberList::new([10, 20, 30]).unwrap(); + writer.write_storage_history(&addr, slot2, u64::MAX, &history2).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 2: Verify both entries exist + { + let reader = hot_kv.reader().unwrap(); + + let hist1 = reader.get_storage_history(&addr, slot1, u64::MAX).unwrap(); + assert!(hist1.is_some(), "slot1 should exist"); + assert_eq!(hist1.unwrap().iter().collect::>(), vec![1, 2, 3]); + + let hist2 = reader.get_storage_history(&addr, slot2, u64::MAX).unwrap(); + assert!(hist2.is_some(), "slot2 should exist"); + assert_eq!(hist2.unwrap().iter().collect::>(), vec![10, 20, 30]); + } + + // Phase 3: Delete slot1's entry + { + let writer = hot_kv.writer().unwrap(); + let key_to_delete = ShardedKey::new(slot1, u64::MAX); + writer.queue_delete_dual::(&addr, &key_to_delete).unwrap(); + writer.commit().unwrap(); + } + + // Phase 4: Verify only slot1 is gone + { + let reader = hot_kv.reader().unwrap(); + + // slot1 should be gone + let hist1 = reader.get_storage_history(&addr, slot1, u64::MAX).unwrap(); + assert!(hist1.is_none(), "slot1 should be deleted"); + + // slot2 should be unaffected + let hist2 = reader.get_storage_history(&addr, slot2, u64::MAX).unwrap(); + assert!(hist2.is_some(), "slot2 should be unaffected"); + assert_eq!(hist2.unwrap().iter().collect::>(), vec![10, 20, 30]); + + // last_storage_history for slot1 should return None + let last1 = reader.last_storage_history(&addr, &slot1).unwrap(); + assert!(last1.is_none(), "last_storage_history for slot1 should return None"); + + // last_storage_history for slot2 should still work + let last2 = reader.last_storage_history(&addr, &slot2).unwrap(); + assert!(last2.is_some(), "last_storage_history for slot2 should still work"); + } +} + +/// Test deleting and re-adding dual-keyed entries. +/// +/// This test verifies that after deleting an entry, we can write a new entry +/// with the same key and it works correctly. +pub fn test_delete_and_rewrite_dual(hot_kv: &T) { + let addr = address!("0x2222222222222222222222222222222222222222"); + + // Phase 1: Write initial entry + { + let writer = hot_kv.writer().unwrap(); + let history = BlockNumberList::new([1, 2, 3]).unwrap(); + writer.write_account_history(&addr, u64::MAX, &history).unwrap(); + writer.commit().unwrap(); + } + + // Verify initial state + { + let reader = hot_kv.reader().unwrap(); + let hist = reader.get_account_history(&addr, u64::MAX).unwrap(); + assert_eq!(hist.unwrap().iter().collect::>(), vec![1, 2, 3]); + } + + // Phase 2: Delete the entry + { + let writer = hot_kv.writer().unwrap(); + writer.queue_delete_dual::(&addr, &u64::MAX).unwrap(); + writer.commit().unwrap(); + } + + // Verify deleted + { + let reader = hot_kv.reader().unwrap(); + let hist = reader.get_account_history(&addr, u64::MAX).unwrap(); + assert!(hist.is_none(), "entry should be deleted"); + } + + // Phase 3: Write new entry with same key but different value + { + let writer = hot_kv.writer().unwrap(); + let new_history = BlockNumberList::new([100, 200, 300]).unwrap(); + writer.write_account_history(&addr, u64::MAX, &new_history).unwrap(); + writer.commit().unwrap(); + } + + // Verify new value + { + let reader = hot_kv.reader().unwrap(); + let hist = reader.get_account_history(&addr, u64::MAX).unwrap(); + assert!(hist.is_some(), "new entry should exist"); + assert_eq!(hist.unwrap().iter().collect::>(), vec![100, 200, 300]); + } +} + +/// Test clear_range on a single-keyed table. +/// +/// This test verifies that: +/// 1. Keys within the range are deleted +/// 2. Keys outside the range remain intact +/// 3. Edge cases like adjacent keys and boundary conditions work correctly +pub fn test_clear_range(hot_kv: &T) { + // Phase 1: Write 15 headers with block numbers 0-14 + { + let writer = hot_kv.writer().unwrap(); + for i in 0u64..15 { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + // Verify all headers exist + { + let reader = hot_kv.reader().unwrap(); + for i in 0u64..15 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should exist", i); + } + } + + // Phase 2: Clear range 5..=9 (middle range) + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range::(5..=9).unwrap(); + writer.commit().unwrap(); + } + + // Verify: 0-4 and 10-14 should exist, 5-9 should be gone + { + let reader = hot_kv.reader().unwrap(); + + // Keys before range should exist + for i in 0u64..5 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should still exist", i); + } + + // Keys in range should be deleted + for i in 5u64..10 { + assert!(reader.get_header(i).unwrap().is_none(), "header {} should be deleted", i); + } + + // Keys after range should exist + for i in 10u64..15 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should still exist", i); + } + } + + // Phase 3: Test corner case - clear adjacent keys at the boundary + { + let writer = hot_kv.writer().unwrap(); + // Clear keys 3 and 4 (adjacent to the already cleared range) + writer.clear_range::(3..=4).unwrap(); + writer.commit().unwrap(); + } + + // Verify: 0-2 and 10-14 should exist, 3-9 should be gone + { + let reader = hot_kv.reader().unwrap(); + + // Keys 0-2 should exist + for i in 0u64..3 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should still exist", i); + } + + // Keys 3-9 should all be deleted now + for i in 3u64..10 { + assert!(reader.get_header(i).unwrap().is_none(), "header {} should be deleted", i); + } + + // Keys 10-14 should exist + for i in 10u64..15 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should still exist", i); + } + } + + // Phase 4: Test clearing a range that includes the first key + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range::(0..=1).unwrap(); + writer.commit().unwrap(); + } + + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_header(0).unwrap().is_none(), "header 0 should be deleted"); + assert!(reader.get_header(1).unwrap().is_none(), "header 1 should be deleted"); + assert!(reader.get_header(2).unwrap().is_some(), "header 2 should still exist"); + } + + // Phase 5: Test clearing a range that includes the last key + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range::(13..=14).unwrap(); + writer.commit().unwrap(); + } + + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_header(12).unwrap().is_some(), "header 12 should still exist"); + assert!(reader.get_header(13).unwrap().is_none(), "header 13 should be deleted"); + assert!(reader.get_header(14).unwrap().is_none(), "header 14 should be deleted"); + } + + // Phase 6: Test clearing a single key + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range::(11..=11).unwrap(); + writer.commit().unwrap(); + } + + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_header(10).unwrap().is_some(), "header 10 should still exist"); + assert!(reader.get_header(11).unwrap().is_none(), "header 11 should be deleted"); + assert!(reader.get_header(12).unwrap().is_some(), "header 12 should still exist"); + } + + // Phase 7: Test clearing a range where nothing exists (should be no-op) + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range::(100..=200).unwrap(); + writer.commit().unwrap(); + } + + // Verify remaining keys are still intact + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_header(2).unwrap().is_some(), "header 2 should still exist"); + assert!(reader.get_header(10).unwrap().is_some(), "header 10 should still exist"); + assert!(reader.get_header(12).unwrap().is_some(), "header 12 should still exist"); + } +} + +/// Test take_range on a single-keyed table. +/// +/// Similar to clear_range but also returns the removed keys. +pub fn test_take_range(hot_kv: &T) { + let headers = (0..10u64) + .map(|i| Header { number: i, gas_limit: 1_000_000, ..Default::default() }) + .collect::>(); + + // Phase 1: Write 10 headers with block numbers 0-9 + { + let writer = hot_kv.writer().unwrap(); + for header in headers.iter() { + writer.put_header_inconsistent(header).unwrap(); + } + writer.commit().unwrap(); + } + + // Phase 2: Take range 3..=6 and verify returned keys + { + let writer = hot_kv.writer().unwrap(); + let removed = writer.take_range::(3..=6).unwrap(); + writer.commit().unwrap(); + + // Should return keys 3, 4, 5, 6 in order + assert_eq!(removed.len(), 4); + + for i in 0..4 { + assert_eq!(removed[i].0, (i as u64) + 3); + assert_eq!(&removed[i].1, &headers[i + 3]); + } + } + + // Verify the keys are actually removed + { + let reader = hot_kv.reader().unwrap(); + for i in 0u64..3 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should exist", i); + } + for i in 3u64..7 { + assert!(reader.get_header(i).unwrap().is_none(), "header {} should be gone", i); + } + for i in 7u64..10 { + assert!(reader.get_header(i).unwrap().is_some(), "header {} should exist", i); + } + } + + // Phase 3: Take empty range (nothing to remove) + { + let writer = hot_kv.writer().unwrap(); + let removed = writer.take_range::(100..=200).unwrap(); + writer.commit().unwrap(); + + assert!(removed.is_empty(), "should return empty vec for non-existent range"); + } + + // Phase 4: Take single key + { + let writer = hot_kv.writer().unwrap(); + let removed = writer.take_range::(8..=8).unwrap(); + writer.commit().unwrap(); + + assert_eq!(removed.len(), 1); + assert_eq!(removed[0].0, 8); + assert_eq!(&removed[0].1, &headers[8]); + } + + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_header(7).unwrap().is_some()); + assert!(reader.get_header(8).unwrap().is_none()); + assert!(reader.get_header(9).unwrap().is_some()); + } +} + +/// Test clear_range_dual on a dual-keyed table. +/// +/// This test verifies that: +/// 1. All k2 entries for k1 values within the range are deleted +/// 2. k1 values outside the range remain intact +/// 3. Edge cases work correctly +pub fn test_clear_range_dual(hot_kv: &T) { + let addr1 = address!("0x1000000000000000000000000000000000000001"); + let addr2 = address!("0x2000000000000000000000000000000000000002"); + let addr3 = address!("0x3000000000000000000000000000000000000003"); + let addr4 = address!("0x4000000000000000000000000000000000000004"); + let addr5 = address!("0x5000000000000000000000000000000000000005"); + + // Phase 1: Write account history entries for multiple addresses with multiple shards + { + let writer = hot_kv.writer().unwrap(); + + // addr1: two shards + let history1_a = BlockNumberList::new([1, 2, 3]).unwrap(); + let history1_b = BlockNumberList::new([4, 5, 6]).unwrap(); + writer.write_account_history(&addr1, 100, &history1_a).unwrap(); + writer.write_account_history(&addr1, u64::MAX, &history1_b).unwrap(); + + // addr2: one shard + let history2 = BlockNumberList::new([10, 20]).unwrap(); + writer.write_account_history(&addr2, u64::MAX, &history2).unwrap(); + + // addr3: one shard + let history3 = BlockNumberList::new([30, 40]).unwrap(); + writer.write_account_history(&addr3, u64::MAX, &history3).unwrap(); + + // addr4: two shards + let history4_a = BlockNumberList::new([50, 60]).unwrap(); + let history4_b = BlockNumberList::new([70, 80]).unwrap(); + writer.write_account_history(&addr4, 200, &history4_a).unwrap(); + writer.write_account_history(&addr4, u64::MAX, &history4_b).unwrap(); + + // addr5: one shard + let history5 = BlockNumberList::new([90, 100]).unwrap(); + writer.write_account_history(&addr5, u64::MAX, &history5).unwrap(); + + writer.commit().unwrap(); + } + + // Verify all entries exist + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_account_history(&addr1, 100).unwrap().is_some()); + assert!(reader.get_account_history(&addr1, u64::MAX).unwrap().is_some()); + assert!(reader.get_account_history(&addr2, u64::MAX).unwrap().is_some()); + assert!(reader.get_account_history(&addr3, u64::MAX).unwrap().is_some()); + assert!(reader.get_account_history(&addr4, 200).unwrap().is_some()); + assert!(reader.get_account_history(&addr4, u64::MAX).unwrap().is_some()); + assert!(reader.get_account_history(&addr5, u64::MAX).unwrap().is_some()); + } + + // Phase 2: Clear range addr2..=addr3 (middle range) + { + let writer = hot_kv.writer().unwrap(); + writer.clear_range_dual::((addr2, 0)..=(addr3, u64::MAX)).unwrap(); + writer.commit().unwrap(); + } + + // Verify: addr1 and addr4, addr5 should exist, addr2 and addr3 should be gone + { + let reader = hot_kv.reader().unwrap(); + + // addr1 entries should still exist + assert!( + reader.get_account_history(&addr1, 100).unwrap().is_some(), + "addr1 shard 100 should exist" + ); + assert!( + reader.get_account_history(&addr1, u64::MAX).unwrap().is_some(), + "addr1 shard max should exist" + ); + + // addr2 and addr3 should be deleted + assert!( + reader.get_account_history(&addr2, u64::MAX).unwrap().is_none(), + "addr2 should be deleted" + ); + assert!( + reader.get_account_history(&addr3, u64::MAX).unwrap().is_none(), + "addr3 should be deleted" + ); + + // addr4 and addr5 entries should still exist + assert!( + reader.get_account_history(&addr4, 200).unwrap().is_some(), + "addr4 shard 200 should exist" + ); + assert!( + reader.get_account_history(&addr4, u64::MAX).unwrap().is_some(), + "addr4 shard max should exist" + ); + assert!( + reader.get_account_history(&addr5, u64::MAX).unwrap().is_some(), + "addr5 should exist" + ); + } +} + +/// Test take_range_dual on a dual-keyed table. +/// +/// Similar to clear_range_dual but also returns the removed (k1, k2) pairs. +pub fn test_take_range_dual(hot_kv: &T) { + let addr1 = address!("0xa000000000000000000000000000000000000001"); + let addr2 = address!("0xb000000000000000000000000000000000000002"); + let addr3 = address!("0xc000000000000000000000000000000000000003"); + + // Phase 1: Write account history entries + { + let writer = hot_kv.writer().unwrap(); + + // addr1: two shards + let history1_a = BlockNumberList::new([1, 2]).unwrap(); + let history1_b = BlockNumberList::new([3, 4]).unwrap(); + writer.write_account_history(&addr1, 50, &history1_a).unwrap(); + writer.write_account_history(&addr1, u64::MAX, &history1_b).unwrap(); + + // addr2: one shard + let history2 = BlockNumberList::new([10, 20]).unwrap(); + writer.write_account_history(&addr2, u64::MAX, &history2).unwrap(); + + // addr3: one shard + let history3 = BlockNumberList::new([30, 40]).unwrap(); + writer.write_account_history(&addr3, u64::MAX, &history3).unwrap(); + + writer.commit().unwrap(); + } + + // Phase 2: Take range addr1..=addr2 and verify returned pairs + { + let writer = hot_kv.writer().unwrap(); + let removed = writer + .take_range_dual::((addr1, 0)..=(addr2, u64::MAX)) + .unwrap(); + writer.commit().unwrap(); + + // Should return (addr1, 50), (addr1, max), (addr2, max) + assert_eq!(removed.len(), 3, "should have removed 3 entries"); + assert_eq!(removed[0].0, addr1); + assert_eq!(removed[0].1, 50); + assert_eq!(removed[1].0, addr1); + assert_eq!(removed[1].1, u64::MAX); + assert_eq!(removed[2].0, addr2); + assert_eq!(removed[2].1, u64::MAX); + } + + // Verify only addr3 remains + { + let reader = hot_kv.reader().unwrap(); + assert!(reader.get_account_history(&addr1, 50).unwrap().is_none()); + assert!(reader.get_account_history(&addr1, u64::MAX).unwrap().is_none()); + assert!(reader.get_account_history(&addr2, u64::MAX).unwrap().is_none()); + assert!(reader.get_account_history(&addr3, u64::MAX).unwrap().is_some()); + } + + // Phase 3: Take empty range + { + let writer = hot_kv.writer().unwrap(); + let removed = writer + .take_range_dual::( + (address!("0xf000000000000000000000000000000000000000"), 0) + ..=(address!("0xff00000000000000000000000000000000000000"), u64::MAX), + ) + .unwrap(); + writer.commit().unwrap(); + + assert!(removed.is_empty(), "should return empty vec for non-existent range"); + } +} + +// ============================================================================ +// Unwind Conformance Test +// ============================================================================ + +/// Collect all entries from a single-keyed table. +fn collect_single_table(reader: &R) -> Vec> +where + T: SingleKey, + T::Key: Ord, + R: HotKvRead, +{ + let mut cursor = reader.traverse::().unwrap(); + let mut entries = Vec::new(); + if let Some(first) = TableTraverse::::first(&mut *cursor.inner_mut()).unwrap() { + entries.push(first); + while let Some(next) = TableTraverse::::read_next(&mut *cursor.inner_mut()).unwrap() { + entries.push(next); + } + } + entries.sort_by(|a, b| a.0.cmp(&b.0)); + entries +} + +/// Collect all entries from a dual-keyed table. +fn collect_dual_table(reader: &R) -> Vec> +where + T: DualKey, + T::Key: Ord, + T::Key2: Ord, + R: HotKvRead, +{ + let mut cursor = reader.traverse_dual::().unwrap(); + let mut entries = Vec::new(); + if let Some(first) = DualTableTraverse::::first(&mut *cursor.inner_mut()).unwrap() { + entries.push(first); + while let Some(next) = + DualTableTraverse::::read_next(&mut *cursor.inner_mut()).unwrap() + { + entries.push(next); + } + } + entries.sort_by(|a, b| (&a.0, &a.1).cmp(&(&b.0, &b.1))); + entries +} + +/// Assert two single-keyed table contents are equal. +fn assert_single_tables_equal(table_name: &str, a: Vec>, b: Vec>) +where + T: SingleKey, + T::Key: Debug + PartialEq, + T::Value: Debug + PartialEq, +{ + assert_eq!( + a.len(), + b.len(), + "{} table entry count mismatch: {} vs {}", + table_name, + a.len(), + b.len() + ); + for (i, (entry_a, entry_b)) in a.iter().zip(b.iter()).enumerate() { + assert_eq!( + entry_a, entry_b, + "{} table entry {} mismatch:\n A: {:?}\n B: {:?}", + table_name, i, entry_a, entry_b + ); + } +} + +/// Assert two dual-keyed table contents are equal. +fn assert_dual_tables_equal(table_name: &str, a: Vec>, b: Vec>) +where + T: DualKey, + T::Key: Debug + PartialEq, + T::Key2: Debug + PartialEq, + T::Value: Debug + PartialEq, +{ + assert_eq!( + a.len(), + b.len(), + "{} table entry count mismatch: {} vs {}", + table_name, + a.len(), + b.len() + ); + for (i, (entry_a, entry_b)) in a.iter().zip(b.iter()).enumerate() { + assert_eq!( + entry_a, entry_b, + "{} table entry {} mismatch:\n A: {:?}\n B: {:?}", + table_name, i, entry_a, entry_b + ); + } +} + +/// Create a BundleState with account and storage changes. +/// +/// This function creates a proper BundleState with reverts populated so that +/// `to_plain_state_and_reverts` will produce the expected output. +#[allow(clippy::type_complexity)] +fn make_bundle_state( + accounts: Vec<(Address, Option, Option)>, + storage: Vec<(Address, Vec<(U256, U256, U256)>)>, // (addr, [(slot, old, new)]) + _contracts: Vec<(B256, RevmBytecode)>, +) -> BundleState { + let mut state: HashMap = Default::default(); + + // Build account reverts for this block + let mut block_reverts: Vec<(Address, AccountRevert)> = Vec::new(); + + for (addr, original, info) in &accounts { + let account_storage: HashMap = Default::default(); + state.insert( + *addr, + BundleAccount { + info: info.clone(), + original_info: original.clone(), + storage: account_storage, + status: AccountStatus::Changed, + }, + ); + + // Create account revert - this stores what to restore to when unwinding + let account_info_revert = match original { + Some(orig) => AccountInfoRevert::RevertTo(orig.clone()), + None => AccountInfoRevert::DeleteIt, + }; + + block_reverts.push(( + *addr, + AccountRevert { + account: account_info_revert, + storage: Default::default(), // Storage reverts added below + previous_status: AccountStatus::Changed, + wipe_storage: false, + }, + )); + } + + // Process storage changes + for (addr, slots) in &storage { + let account = state.entry(*addr).or_insert_with(|| BundleAccount { + info: None, + original_info: None, + storage: Default::default(), + status: AccountStatus::Changed, + }); + + // Find or create the account revert entry + let revert_entry = block_reverts.iter_mut().find(|(a, _)| a == addr); + let account_revert = if let Some((_, revert)) = revert_entry { + revert + } else { + block_reverts.push(( + *addr, + AccountRevert { + account: AccountInfoRevert::DoNothing, + storage: Default::default(), + previous_status: AccountStatus::Changed, + wipe_storage: false, + }, + )); + &mut block_reverts.last_mut().unwrap().1 + }; + + for (slot, old_value, new_value) in slots { + account.storage.insert( + *slot, + StorageSlot { previous_or_original_value: *old_value, present_value: *new_value }, + ); + + // Add storage revert entry + account_revert.storage.insert(*slot, RevertToSlot::Some(*old_value)); + } + } + + // Create Reverts with one block's worth of reverts + let reverts = Reverts::new(vec![block_reverts]); + + BundleState { state, contracts: Default::default(), reverts, state_size: 0, reverts_size: 0 } +} + +/// Create a simple AccountInfo for testing. +fn make_account_info(nonce: u64, balance: U256, code_hash: Option) -> AccountInfo { + AccountInfo { nonce, balance, code_hash: code_hash.unwrap_or(B256::ZERO), code: None } +} + +/// Test that unwinding produces the exact same state as never having appended. +/// +/// This test: +/// 1. Creates 5 blocks with complex state changes +/// 2. Appends all 5 blocks to store_a, then unwinds to block 1 (keeping blocks 0, 1) +/// 3. Appends only blocks 0, 1 to store_b +/// 4. Compares ALL tables between the two stores - they must be exactly equal +/// +/// This proves that `unwind_above` correctly reverses all state changes including: +/// - Plain account state +/// - Plain storage state +/// - Headers and header number mappings +/// - Account and storage change sets +/// - Account and storage history indices +pub fn test_unwind_conformance(store_a: &Kv, store_b: &Kv) { + // Test addresses + let addr1 = address!("0x1111111111111111111111111111111111111111"); + let addr2 = address!("0x2222222222222222222222222222222222222222"); + let addr3 = address!("0x3333333333333333333333333333333333333333"); + let addr4 = address!("0x4444444444444444444444444444444444444444"); + + // Storage slots + let slot1 = U256::from(1); + let slot2 = U256::from(2); + let slot3 = U256::from(3); + + // Create bytecode + let code = Bytes::from_static(&[0x60, 0x00, 0x60, 0x00, 0xf3]); + let bytecode = RevmBytecode::new_raw(code); + let code_hash = bytecode.hash_slow(); + + // Create 5 blocks with complex state + let mut blocks: Vec<(SealedHeader, BundleState)> = Vec::new(); + let mut prev_hash = B256::ZERO; + + // Block 0: Create addr1, addr2, addr3 with different states + { + let header = Header { + number: 0, + parent_hash: prev_hash, + gas_limit: 1_000_000, + ..Default::default() + }; + let sealed = SealedHeader::seal_slow(header); + prev_hash = sealed.hash(); + + let bundle = make_bundle_state( + vec![ + (addr1, None, Some(make_account_info(1, U256::from(100), None))), + (addr2, None, Some(make_account_info(1, U256::from(200), None))), + (addr3, None, Some(make_account_info(1, U256::from(300), None))), + ], + vec![(addr1, vec![(slot1, U256::ZERO, U256::from(10))])], + vec![], + ); + blocks.push((sealed, bundle)); + } + + // Block 1: Update addr1, addr2; add storage to addr2 + { + let header = Header { + number: 1, + parent_hash: prev_hash, + gas_limit: 1_000_000, + ..Default::default() + }; + let sealed = SealedHeader::seal_slow(header); + prev_hash = sealed.hash(); + + let bundle = make_bundle_state( + vec![ + ( + addr1, + Some(make_account_info(1, U256::from(100), None)), + Some(make_account_info(2, U256::from(150), None)), + ), + ( + addr2, + Some(make_account_info(1, U256::from(200), None)), + Some(make_account_info(2, U256::from(250), None)), + ), + ], + vec![ + (addr1, vec![(slot1, U256::from(10), U256::from(20))]), + (addr2, vec![(slot1, U256::ZERO, U256::from(100))]), + ], + vec![], + ); + blocks.push((sealed, bundle)); + } + + // Block 2: Update addr3, add bytecode (this is the boundary - will be unwound) + { + let header = Header { + number: 2, + parent_hash: prev_hash, + gas_limit: 1_000_000, + ..Default::default() + }; + let sealed = SealedHeader::seal_slow(header); + prev_hash = sealed.hash(); + + let bundle = make_bundle_state( + vec![( + addr3, + Some(make_account_info(1, U256::from(300), None)), + Some(make_account_info(2, U256::from(350), Some(code_hash))), + )], + vec![(addr3, vec![(slot1, U256::ZERO, U256::from(1000))])], + vec![(code_hash, bytecode.clone())], + ); + blocks.push((sealed, bundle)); + } + + // Block 3: Create addr4, update existing storage + { + let header = Header { + number: 3, + parent_hash: prev_hash, + gas_limit: 1_000_000, + ..Default::default() + }; + let sealed = SealedHeader::seal_slow(header); + prev_hash = sealed.hash(); + + let bundle = make_bundle_state( + vec![ + (addr4, None, Some(make_account_info(1, U256::from(400), None))), + ( + addr1, + Some(make_account_info(2, U256::from(150), None)), + Some(make_account_info(3, U256::from(175), None)), + ), + ], + vec![ + ( + addr1, + vec![ + (slot1, U256::from(20), U256::from(30)), + (slot2, U256::ZERO, U256::from(50)), + ], + ), + (addr4, vec![(slot1, U256::ZERO, U256::from(500))]), + ], + vec![], + ); + blocks.push((sealed, bundle)); + } + + // Block 4: Update multiple addresses and storage + { + let header = Header { + number: 4, + parent_hash: prev_hash, + gas_limit: 1_000_000, + ..Default::default() + }; + let sealed = SealedHeader::seal_slow(header); + + let bundle = make_bundle_state( + vec![ + ( + addr1, + Some(make_account_info(3, U256::from(175), None)), + Some(make_account_info(4, U256::from(200), None)), + ), + ( + addr2, + Some(make_account_info(2, U256::from(250), None)), + Some(make_account_info(3, U256::from(275), None)), + ), + ( + addr4, + Some(make_account_info(1, U256::from(400), None)), + Some(make_account_info(2, U256::from(450), None)), + ), + ], + vec![ + ( + addr1, + vec![ + (slot1, U256::from(30), U256::from(40)), + (slot3, U256::ZERO, U256::from(60)), + ], + ), + ( + addr2, + vec![ + (slot1, U256::from(100), U256::from(150)), + (slot2, U256::ZERO, U256::from(200)), + ], + ), + ], + vec![], + ); + blocks.push((sealed, bundle)); + } + + // Store A: Append all 5 blocks, then unwind to block 1 + { + let writer = store_a.writer().unwrap(); + writer.append_blocks(&blocks).unwrap(); + writer.commit().unwrap(); + } + { + let writer = store_a.writer().unwrap(); + writer.unwind_above(1).unwrap(); + writer.commit().unwrap(); + } + + // Store B: Append only blocks 0, 1 + { + let writer = store_b.writer().unwrap(); + writer.append_blocks(&blocks[0..2]).unwrap(); + writer.commit().unwrap(); + } + + // Compare all tables + let reader_a = store_a.reader().unwrap(); + let reader_b = store_b.reader().unwrap(); + + // Single-keyed tables + assert_single_tables_equal::( + "Headers", + collect_single_table::(&reader_a), + collect_single_table::(&reader_b), + ); + + assert_single_tables_equal::( + "HeaderNumbers", + collect_single_table::(&reader_a), + collect_single_table::(&reader_b), + ); + + assert_single_tables_equal::( + "PlainAccountState", + collect_single_table::(&reader_a), + collect_single_table::(&reader_b), + ); + + // Note: Bytecodes are not removed on unwind (they're content-addressed), + // so store_a may have more bytecodes than store_b. We skip this comparison. + // assert_single_tables_equal::(...) + + // Dual-keyed tables + assert_dual_tables_equal::( + "PlainStorageState", + collect_dual_table::(&reader_a), + collect_dual_table::(&reader_b), + ); + + assert_dual_tables_equal::( + "AccountChangeSets", + collect_dual_table::(&reader_a), + collect_dual_table::(&reader_b), + ); + + assert_dual_tables_equal::( + "StorageChangeSets", + collect_dual_table::(&reader_a), + collect_dual_table::(&reader_b), + ); + + assert_dual_tables_equal::( + "AccountsHistory", + collect_dual_table::(&reader_a), + collect_dual_table::(&reader_b), + ); + + assert_dual_tables_equal::( + "StorageHistory", + collect_dual_table::(&reader_a), + collect_dual_table::(&reader_b), + ); +} + +// ============================================================================ +// Value Edge Case Tests +// ============================================================================ + +/// Test that zero storage values are correctly stored and retrieved. +/// +/// This verifies that U256::ZERO is not confused with "not set" or deleted. +pub fn test_zero_storage_value(hot_kv: &T) { + let addr = address!("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1"); + let slot = U256::from(1); + + // Write zero value + { + let writer = hot_kv.writer().unwrap(); + writer.put_storage(&addr, &slot, &U256::ZERO).unwrap(); + writer.commit().unwrap(); + } + + // Read zero value - should return Some(ZERO), not None + { + let reader = hot_kv.reader().unwrap(); + let value = reader.get_storage(&addr, &slot).unwrap(); + assert!(value.is_some(), "Zero storage value should be Some, not None"); + assert_eq!(value.unwrap(), U256::ZERO, "Zero storage value should be U256::ZERO"); + } + + // Verify via traversal that the entry exists + { + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse_dual::().unwrap(); + let mut found = false; + while let Some((k1, k2, v)) = cursor.read_next().unwrap() { + if k1 == addr && k2 == slot { + found = true; + assert_eq!(v, U256::ZERO); + } + } + assert!(found, "Zero value entry should exist in table"); + } +} + +/// Test that empty accounts (all zero fields) are correctly stored and retrieved. +/// +/// This verifies that an account with nonce=0, balance=0, no code is not +/// confused with a non-existent account. +pub fn test_empty_account(hot_kv: &T) { + let addr = address!("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa2"); + let empty_account = Account { nonce: 0, balance: U256::ZERO, bytecode_hash: None }; + + // Write empty account + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &empty_account).unwrap(); + writer.commit().unwrap(); + } + + // Read empty account - should return Some, not None + { + let reader = hot_kv.reader().unwrap(); + let account = reader.get_account(&addr).unwrap(); + assert!(account.is_some(), "Empty account should be Some, not None"); + let account = account.unwrap(); + assert_eq!(account.nonce, 0); + assert_eq!(account.balance, U256::ZERO); + assert!(account.bytecode_hash.is_none()); + } +} + +/// Test that maximum storage values (U256::MAX) are correctly stored and retrieved. +pub fn test_max_storage_value(hot_kv: &T) { + let addr = address!("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3"); + let slot = U256::from(1); + + // Write max value + { + let writer = hot_kv.writer().unwrap(); + writer.put_storage(&addr, &slot, &U256::MAX).unwrap(); + writer.commit().unwrap(); + } + + // Read max value + { + let reader = hot_kv.reader().unwrap(); + let value = reader.get_storage(&addr, &slot).unwrap(); + assert!(value.is_some()); + assert_eq!(value.unwrap(), U256::MAX, "Max storage value should be preserved"); + } +} + +/// Test that maximum block numbers (u64::MAX) work correctly in headers. +pub fn test_max_block_number(hot_kv: &T) { + let header = Header { number: u64::MAX, gas_limit: 1_000_000, ..Default::default() }; + let sealed = SealedHeader::seal_slow(header.clone()); + + // Write header at max block number + { + let writer = hot_kv.writer().unwrap(); + writer.put_header(&sealed).unwrap(); + writer.commit().unwrap(); + } + + // Read header + { + let reader = hot_kv.reader().unwrap(); + let read_header = reader.get_header(u64::MAX).unwrap(); + assert!(read_header.is_some()); + assert_eq!(read_header.unwrap().number, u64::MAX); + } +} + +// ============================================================================ +// Cursor Operation Tests +// ============================================================================ + +/// Test cursor operations on an empty table. +/// +/// Verifies that first(), last(), exact(), lower_bound() return None on empty tables. +pub fn test_cursor_empty_table(hot_kv: &T) { + // Use a table that we haven't written to in this test + // We'll use HeaderNumbers which should be empty if we haven't written headers with hashes + let reader = hot_kv.reader().unwrap(); + + // Create a fresh address that definitely doesn't exist + let missing_addr = address!("0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb01"); + + // Test single-key cursor on PlainAccountState for a non-existent key + { + let mut cursor = reader.traverse::().unwrap(); + + // exact() for non-existent key should return None + let exact_result = cursor.exact(&missing_addr).unwrap(); + assert!(exact_result.is_none(), "exact() on non-existent key should return None"); + + // lower_bound for a key beyond all existing should return None + let lb_result = + cursor.lower_bound(&address!("0xffffffffffffffffffffffffffffffffffffff99")).unwrap(); + // This might return something if there are entries, but for a truly empty table it would be None + // We're mainly testing that it doesn't panic + let _ = lb_result; + } + + // Test dual-key cursor + { + let mut cursor = reader.traverse_dual::().unwrap(); + + // exact_dual for non-existent keys should return None + let exact_result = cursor.exact_dual(&missing_addr, &U256::from(999)).unwrap(); + assert!(exact_result.is_none(), "exact_dual() on non-existent key should return None"); + } +} + +/// Test cursor exact() match semantics. +/// +/// Verifies that exact() returns only exact matches, not lower_bound semantics. +pub fn test_cursor_exact_match(hot_kv: &T) { + // Write headers at block numbers 10, 20, 30 + { + let writer = hot_kv.writer().unwrap(); + for i in [10u64, 20, 30] { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse::().unwrap(); + + // exact() for existing key should return value + let exact_10 = cursor.exact(&10u64).unwrap(); + assert!(exact_10.is_some(), "exact(10) should find the header"); + assert_eq!(exact_10.unwrap().number, 10); + + // exact() for non-existing key should return None, not the next key + let exact_15 = cursor.exact(&15u64).unwrap(); + assert!(exact_15.is_none(), "exact(15) should return None, not header 20"); + + // Verify lower_bound would have found something at 15 + let lb_15 = cursor.lower_bound(&15u64).unwrap(); + assert!(lb_15.is_some(), "lower_bound(15) should find header 20"); + assert_eq!(lb_15.unwrap().0, 20); +} + +/// Test cursor backward iteration with read_prev(). +pub fn test_cursor_backward_iteration(hot_kv: &T) { + // Write headers at block numbers 100, 101, 102, 103, 104 + { + let writer = hot_kv.writer().unwrap(); + for i in 100u64..105 { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse::().unwrap(); + + // Position at last entry + let last = cursor.last().unwrap(); + assert!(last.is_some()); + let (num, _) = last.unwrap(); + assert_eq!(num, 104); + + // Iterate backward + let prev1 = cursor.read_prev().unwrap(); + assert!(prev1.is_some()); + assert_eq!(prev1.unwrap().0, 103); + + let prev2 = cursor.read_prev().unwrap(); + assert!(prev2.is_some()); + assert_eq!(prev2.unwrap().0, 102); + + let prev3 = cursor.read_prev().unwrap(); + assert!(prev3.is_some()); + assert_eq!(prev3.unwrap().0, 101); + + let prev4 = cursor.read_prev().unwrap(); + assert!(prev4.is_some()); + assert_eq!(prev4.unwrap().0, 100); + + // Should hit beginning + let prev5 = cursor.read_prev().unwrap(); + assert!(prev5.is_none(), "read_prev() past beginning should return None"); +} + +/// Test dual-key cursor navigation between k1 values. +pub fn test_cursor_dual_navigation(hot_kv: &T) { + let addr1 = address!("0xcccccccccccccccccccccccccccccccccccccc01"); + let addr2 = address!("0xcccccccccccccccccccccccccccccccccccccc02"); + let addr3 = address!("0xcccccccccccccccccccccccccccccccccccccc03"); + + // Write storage for multiple addresses with multiple slots + { + let writer = hot_kv.writer().unwrap(); + + // addr1: slots 1, 2, 3 + writer.put_storage(&addr1, &U256::from(1), &U256::from(10)).unwrap(); + writer.put_storage(&addr1, &U256::from(2), &U256::from(20)).unwrap(); + writer.put_storage(&addr1, &U256::from(3), &U256::from(30)).unwrap(); + + // addr2: slots 1, 2 + writer.put_storage(&addr2, &U256::from(1), &U256::from(100)).unwrap(); + writer.put_storage(&addr2, &U256::from(2), &U256::from(200)).unwrap(); + + // addr3: slot 1 + writer.put_storage(&addr3, &U256::from(1), &U256::from(1000)).unwrap(); + + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse_dual::().unwrap(); + + // Position at first entry + let first = + DualTableTraverse::::first(&mut *cursor.inner_mut()).unwrap(); + assert!(first.is_some()); + let (k1, k2, _) = first.unwrap(); + assert_eq!(k1, addr1); + assert_eq!(k2, U256::from(1)); + + // next_k1() should jump to addr2 + let next_addr = cursor.next_k1().unwrap(); + assert!(next_addr.is_some()); + let (k1, k2, _) = next_addr.unwrap(); + assert_eq!(k1, addr2, "next_k1() should jump to addr2"); + assert_eq!(k2, U256::from(1), "Should be at first slot of addr2"); + + // next_k1() again should jump to addr3 + let next_addr = cursor.next_k1().unwrap(); + assert!(next_addr.is_some()); + let (k1, _, _) = next_addr.unwrap(); + assert_eq!(k1, addr3, "next_k1() should jump to addr3"); + + // next_k1() again should return None (no more k1 values) + let next_addr = cursor.next_k1().unwrap(); + assert!(next_addr.is_none(), "next_k1() at end should return None"); + + // Test previous_k1() + // First position at addr3 + cursor.last_of_k1(&addr3).unwrap(); + let prev_addr = cursor.previous_k1().unwrap(); + assert!(prev_addr.is_some()); + let (k1, _, _) = prev_addr.unwrap(); + assert_eq!(k1, addr2, "previous_k1() from addr3 should go to addr2"); +} + +/// Test cursor on table with single entry. +pub fn test_cursor_single_entry(hot_kv: &T) { + let addr = address!("0xdddddddddddddddddddddddddddddddddddddd01"); + let account = Account { nonce: 42, balance: U256::from(1000), bytecode_hash: None }; + + // Write single account + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &account).unwrap(); + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + let mut cursor = reader.traverse::().unwrap(); + + // first() and last() should return the same entry + let first = cursor.first().unwrap(); + assert!(first.is_some()); + let (first_addr, _) = first.unwrap(); + + let last = cursor.last().unwrap(); + assert!(last.is_some()); + let (last_addr, _) = last.unwrap(); + + assert_eq!(first_addr, last_addr, "first() and last() should be same for single entry"); + + // read_next() after first() should return None + cursor.first().unwrap(); + let next = cursor.read_next().unwrap(); + assert!(next.is_none(), "read_next() after first() on single entry should return None"); +} + +// ============================================================================ +// Batch Operation Tests +// ============================================================================ + +/// Test get_many batch retrieval. +pub fn test_get_many(hot_kv: &T) { + let addr1 = address!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee01"); + let addr2 = address!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee02"); + let addr3 = address!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee03"); + let addr4 = address!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee04"); // non-existent + + let acc1 = Account { nonce: 1, balance: U256::from(100), bytecode_hash: None }; + let acc2 = Account { nonce: 2, balance: U256::from(200), bytecode_hash: None }; + let acc3 = Account { nonce: 3, balance: U256::from(300), bytecode_hash: None }; + + // Write accounts + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr1, &acc1).unwrap(); + writer.put_account(&addr2, &acc2).unwrap(); + writer.put_account(&addr3, &acc3).unwrap(); + writer.commit().unwrap(); + } + + // Batch retrieve + { + let reader = hot_kv.reader().unwrap(); + let keys = [addr1, addr2, addr3, addr4]; + let results = reader.get_many::(&keys).unwrap(); + + assert_eq!(results.len(), 4); + + // Build a map for easier checking (order not guaranteed) + let result_map: HashMap<&Address, Option> = + results.iter().map(|(k, v)| (*k, *v)).collect(); + + assert!(result_map[&addr1].is_some()); + assert_eq!(result_map[&addr1].as_ref().unwrap().nonce, 1); + + assert!(result_map[&addr2].is_some()); + assert_eq!(result_map[&addr2].as_ref().unwrap().nonce, 2); + + assert!(result_map[&addr3].is_some()); + assert_eq!(result_map[&addr3].as_ref().unwrap().nonce, 3); + + assert!(result_map[&addr4].is_none(), "Non-existent key should return None"); + } +} + +/// Test queue_put_many batch writes. +pub fn test_queue_put_many(hot_kv: &T) { + let entries: Vec<(u64, Header)> = (200u64..210) + .map(|i| (i, Header { number: i, gas_limit: 1_000_000, ..Default::default() })) + .collect(); + + // Batch write using queue_put_many + { + let writer = hot_kv.writer().unwrap(); + let refs: Vec<(&u64, &Header)> = entries.iter().map(|(k, v)| (k, v)).collect(); + writer.queue_put_many::(refs).unwrap(); + writer.commit().unwrap(); + } + + // Verify all entries exist + { + let reader = hot_kv.reader().unwrap(); + for i in 200u64..210 { + let header = reader.get_header(i).unwrap(); + assert!(header.is_some(), "Header {} should exist after batch write", i); + assert_eq!(header.unwrap().number, i); + } + } +} + +/// Test queue_clear clears all entries in a table. +pub fn test_queue_clear(hot_kv: &T) { + // Write some headers + { + let writer = hot_kv.writer().unwrap(); + for i in 300u64..310 { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + // Verify entries exist + { + let reader = hot_kv.reader().unwrap(); + for i in 300u64..310 { + assert!(reader.get_header(i).unwrap().is_some()); + } + } + + // Clear the table + { + let writer = hot_kv.writer().unwrap(); + writer.queue_clear::().unwrap(); + writer.commit().unwrap(); + } + + // Verify all entries are gone + { + let reader = hot_kv.reader().unwrap(); + for i in 300u64..310 { + assert!( + reader.get_header(i).unwrap().is_none(), + "Header {} should be gone after clear", + i + ); + } + } +} + +// ============================================================================ +// Transaction Ordering Tests +// ============================================================================ + +/// Test that put-then-delete in the same transaction results in deletion. +pub fn test_put_then_delete_same_key(hot_kv: &T) { + let addr = address!("0xffffffffffffffffffffffffffffffffffff0001"); + let account = Account { nonce: 99, balance: U256::from(9999), bytecode_hash: None }; + + // In a single transaction: put then delete + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &account).unwrap(); + writer.queue_delete::(&addr).unwrap(); + writer.commit().unwrap(); + } + + // Account should not exist + { + let reader = hot_kv.reader().unwrap(); + let result = reader.get_account(&addr).unwrap(); + assert!(result.is_none(), "Put-then-delete should result in no entry"); + } +} + +/// Test that delete-then-put in the same transaction results in the put value. +pub fn test_delete_then_put_same_key(hot_kv: &T) { + let addr = address!("0xffffffffffffffffffffffffffffffffffff0002"); + let old_account = Account { nonce: 1, balance: U256::from(100), bytecode_hash: None }; + let new_account = Account { nonce: 2, balance: U256::from(200), bytecode_hash: None }; + + // First, write an account + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &old_account).unwrap(); + writer.commit().unwrap(); + } + + // In a single transaction: delete then put new value + { + let writer = hot_kv.writer().unwrap(); + writer.queue_delete::(&addr).unwrap(); + writer.put_account(&addr, &new_account).unwrap(); + writer.commit().unwrap(); + } + + // Should have the new value + { + let reader = hot_kv.reader().unwrap(); + let result = reader.get_account(&addr).unwrap(); + assert!(result.is_some(), "Delete-then-put should result in entry existing"); + let account = result.unwrap(); + assert_eq!(account.nonce, 2, "Should have the new nonce"); + assert_eq!(account.balance, U256::from(200), "Should have the new balance"); + } +} + +/// Test that multiple puts to the same key in one transaction use last value. +pub fn test_multiple_puts_same_key(hot_kv: &T) { + let addr = address!("0xffffffffffffffffffffffffffffffffffff0003"); + + // In a single transaction: put three different values + { + let writer = hot_kv.writer().unwrap(); + writer + .put_account( + &addr, + &Account { nonce: 1, balance: U256::from(100), bytecode_hash: None }, + ) + .unwrap(); + writer + .put_account( + &addr, + &Account { nonce: 2, balance: U256::from(200), bytecode_hash: None }, + ) + .unwrap(); + writer + .put_account( + &addr, + &Account { nonce: 3, balance: U256::from(300), bytecode_hash: None }, + ) + .unwrap(); + writer.commit().unwrap(); + } + + // Should have the last value + { + let reader = hot_kv.reader().unwrap(); + let result = reader.get_account(&addr).unwrap(); + assert!(result.is_some()); + let account = result.unwrap(); + assert_eq!(account.nonce, 3, "Should have the last nonce (3)"); + assert_eq!(account.balance, U256::from(300), "Should have the last balance (300)"); + } +} + +/// Test that abandoned transaction (dropped without commit) makes no changes. +pub fn test_abandoned_transaction(hot_kv: &T) { + let addr = address!("0xffffffffffffffffffffffffffffffffffff0004"); + let account = Account { nonce: 42, balance: U256::from(4200), bytecode_hash: None }; + + // Start a transaction, write, but don't commit (drop it) + { + let writer = hot_kv.writer().unwrap(); + writer.put_account(&addr, &account).unwrap(); + // writer is dropped here without commit + } + + // Account should not exist + { + let reader = hot_kv.reader().unwrap(); + let result = reader.get_account(&addr).unwrap(); + assert!(result.is_none(), "Abandoned transaction should not persist changes"); + } +} + +// ============================================================================ +// Chain Validation Error Tests +// ============================================================================ + +/// Test that validate_chain_extension rejects non-contiguous blocks. +pub fn test_validate_noncontiguous_blocks(hot_kv: &Kv) { + // First, append a genesis block + let genesis = make_header(0, B256::ZERO); + { + let writer = hot_kv.writer().unwrap(); + let bundle = make_bundle_state(vec![], vec![], vec![]); + writer.append_blocks(&[(genesis.clone(), bundle)]).unwrap(); + writer.commit().unwrap(); + } + + // Try to append block 2 (skipping block 1) + let block2 = make_header(2, genesis.hash()); + { + let writer = hot_kv.writer().unwrap(); + let bundle = make_bundle_state(vec![], vec![], vec![]); + let result = writer.append_blocks(&[(block2, bundle)]); + + match result { + Err(HistoryError::NonContiguousBlock { expected, got }) => { + assert_eq!(expected, 1, "Expected block should be 1"); + assert_eq!(got, 2, "Got block should be 2"); + } + Err(e) => panic!("Expected NonContiguousBlock error, got: {:?}", e), + Ok(_) => panic!("Expected error for non-contiguous blocks"), + } + } +} + +/// Test that validate_chain_extension rejects wrong parent hash. +pub fn test_validate_parent_hash_mismatch(hot_kv: &Kv) { + // Append genesis block + let genesis = make_header(0, B256::ZERO); + { + let writer = hot_kv.writer().unwrap(); + let bundle = make_bundle_state(vec![], vec![], vec![]); + writer.append_blocks(&[(genesis.clone(), bundle)]).unwrap(); + writer.commit().unwrap(); + } + + // Try to append block 1 with wrong parent hash + let wrong_parent = b256!("0x1111111111111111111111111111111111111111111111111111111111111111"); + let block1 = make_header(1, wrong_parent); + { + let writer = hot_kv.writer().unwrap(); + let bundle = make_bundle_state(vec![], vec![], vec![]); + let result = writer.append_blocks(&[(block1, bundle)]); + + match result { + Err(HistoryError::ParentHashMismatch { expected, got }) => { + assert_eq!(expected, genesis.hash(), "Expected parent should be genesis hash"); + assert_eq!(got, wrong_parent, "Got parent should be wrong_parent"); + } + Err(e) => panic!("Expected ParentHashMismatch error, got: {:?}", e), + Ok(_) => panic!("Expected error for parent hash mismatch"), + } + } +} + +/// Test appending genesis block (block 0) to empty database. +pub fn test_append_genesis_block(hot_kv: &Kv) { + let addr = address!("0x0000000000000000000000000000000000000001"); + + // Create genesis block with initial state + let genesis = make_header(0, B256::ZERO); + let bundle = make_bundle_state( + vec![(addr, None, Some(make_account_info(0, U256::from(1_000_000), None)))], + vec![], + vec![], + ); + + // Append genesis + { + let writer = hot_kv.writer().unwrap(); + writer.append_blocks(&[(genesis.clone(), bundle)]).unwrap(); + writer.commit().unwrap(); + } + + // Verify genesis exists + { + let reader = hot_kv.reader().unwrap(); + let header = reader.get_header(0).unwrap(); + assert!(header.is_some(), "Genesis header should exist"); + assert_eq!(header.unwrap().number, 0); + + // Verify chain tip + let tip = reader.get_chain_tip().unwrap(); + assert!(tip.is_some()); + let (num, hash) = tip.unwrap(); + assert_eq!(num, 0); + assert_eq!(hash, genesis.hash()); + } +} + +/// Test unwinding to block 0 (keeping only genesis). +pub fn test_unwind_to_zero(hot_kv: &Kv) { + let addr = address!("0x1111111111111111111111111111111111111111"); + + // Build a chain of 5 blocks + let mut blocks = Vec::new(); + let mut prev_hash = B256::ZERO; + + for i in 0u64..5 { + let header = make_header(i, prev_hash); + prev_hash = header.hash(); + + let bundle = make_bundle_state( + vec![( + addr, + if i == 0 { + None + } else { + Some(make_account_info(i - 1, U256::from(i * 100), None)) + }, + Some(make_account_info(i, U256::from((i + 1) * 100), None)), + )], + vec![], + vec![], + ); + blocks.push((header, bundle)); + } + + // Append all blocks + { + let writer = hot_kv.writer().unwrap(); + writer.append_blocks(&blocks).unwrap(); + writer.commit().unwrap(); + } + + // Verify chain tip is at block 4 + { + let reader = hot_kv.reader().unwrap(); + let tip = reader.last_block_number().unwrap(); + assert_eq!(tip, Some(4)); + } + + // Unwind to block 0 (keep only genesis) + { + let writer = hot_kv.writer().unwrap(); + writer.unwind_above(0).unwrap(); + writer.commit().unwrap(); + } + + // Verify only genesis remains + { + let reader = hot_kv.reader().unwrap(); + let tip = reader.last_block_number().unwrap(); + assert_eq!(tip, Some(0), "Only genesis should remain after unwind to 0"); + + // Verify blocks 1-4 are gone + for i in 1u64..5 { + assert!(reader.get_header(i).unwrap().is_none(), "Block {} should be gone", i); + } + + // Verify genesis account state (nonce=0 from block 0) + let account = reader.get_account(&addr).unwrap(); + assert!(account.is_some()); + assert_eq!(account.unwrap().nonce, 0, "Account should have genesis state"); + } +} + +// ============================================================================ +// History Sharding Tests +// ============================================================================ + +/// Test history at exactly the shard boundary. +/// +/// NUM_OF_INDICES_IN_SHARD is typically 1000. This test writes exactly that many +/// entries to verify boundary handling. +pub fn test_history_shard_boundary(hot_kv: &T) { + use reth_db::models::sharded_key; + + let addr = address!("0xaaaabbbbccccddddeeeeffffaaaabbbbccccdddd"); + let shard_size = sharded_key::NUM_OF_INDICES_IN_SHARD; + + // Write exactly shard_size account changes + { + let writer = hot_kv.writer().unwrap(); + for i in 1..=shard_size { + let acc = Account { nonce: i as u64, balance: U256::from(i), bytecode_hash: None }; + writer.write_account_prestate(i as u64, addr, &acc).unwrap(); + } + writer.commit().unwrap(); + } + + // Build history indices + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(1..=(shard_size as u64)).unwrap(); + writer.commit().unwrap(); + } + + // Verify history - should fit in exactly one shard + { + let reader = hot_kv.reader().unwrap(); + let (key, history) = + reader.last_account_history(addr).unwrap().expect("Should have history"); + + // With exactly shard_size entries, it should be stored with key = u64::MAX + assert_eq!(key, u64::MAX, "Shard key should be u64::MAX for single full shard"); + + let blocks: Vec = history.iter().collect(); + assert_eq!(blocks.len(), shard_size, "Should have exactly {} blocks", shard_size); + } +} + +/// Test history overflow into multiple shards. +pub fn test_history_multi_shard(hot_kv: &T) { + use reth_db::models::sharded_key; + + let addr = address!("0xbbbbccccddddeeeeffffaaaabbbbccccddddeee1"); + let shard_size = sharded_key::NUM_OF_INDICES_IN_SHARD; + let total_entries = shard_size + 100; // Overflow into second shard + + // Write more than shard_size account changes + { + let writer = hot_kv.writer().unwrap(); + for i in 1..=total_entries { + let acc = Account { nonce: i as u64, balance: U256::from(i), bytecode_hash: None }; + writer.write_account_prestate(i as u64, addr, &acc).unwrap(); + } + writer.commit().unwrap(); + } + + // Build history indices + { + let writer = hot_kv.writer().unwrap(); + writer.update_history_indices_inconsistent(1..=(total_entries as u64)).unwrap(); + writer.commit().unwrap(); + } + + // Verify we have multiple shards + { + let reader = hot_kv.reader().unwrap(); + + // Count shards by traversing + let mut cursor = reader.traverse_dual::().unwrap(); + let mut shard_count = 0; + let mut total_blocks = 0; + + // Find entries for our address + if let Some((k1, _, list)) = cursor.next_dual_above(&addr, &0u64).unwrap() + && k1 == addr + { + shard_count += 1; + total_blocks += list.iter().count(); + + // Continue reading for same address + while let Some((k1, _, list)) = cursor.read_next().unwrap() { + if k1 != addr { + break; + } + shard_count += 1; + total_blocks += list.iter().count(); + } + } + + assert!(shard_count >= 2, "Should have at least 2 shards, got {}", shard_count); + assert_eq!(total_blocks, total_entries, "Total blocks across shards should match"); + } +} + +// ============================================================================ +// HistoryRead Method Tests +// ============================================================================ + +/// Test get_headers_range retrieves headers in range. +pub fn test_get_headers_range(hot_kv: &T) { + // Write headers 500-509 + { + let writer = hot_kv.writer().unwrap(); + for i in 500u64..510 { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + + // Get range 502-506 + let headers = reader.get_headers_range(502, 506).unwrap(); + assert_eq!(headers.len(), 5, "Should get 5 headers (502, 503, 504, 505, 506)"); + for (i, header) in headers.iter().enumerate() { + assert_eq!(header.number, 502 + i as u64); + } + + // Get range that starts before existing entries + let headers = reader.get_headers_range(498, 502).unwrap(); + // Should get 500, 501, 502 (498 and 499 don't exist) + assert_eq!(headers.len(), 3); + + // Get range with no entries + let headers = reader.get_headers_range(600, 610).unwrap(); + assert!(headers.is_empty(), "Should get empty vec for non-existent range"); +} + +/// Test first_header and last_header. +pub fn test_first_last_header(hot_kv: &T) { + // Write headers 1000, 1005, 1010 + { + let writer = hot_kv.writer().unwrap(); + for i in [1000u64, 1005, 1010] { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + + let first = reader.first_header().unwrap(); + assert!(first.is_some()); + assert_eq!(first.unwrap().number, 1000); + + let last = reader.last_header().unwrap(); + assert!(last.is_some()); + assert_eq!(last.unwrap().number, 1010); +} + +/// Test has_block returns correct boolean. +pub fn test_has_block(hot_kv: &T) { + // Write header at block 2000 + { + let writer = hot_kv.writer().unwrap(); + let header = Header { number: 2000, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + + assert!(reader.has_block(2000).unwrap(), "Block 2000 should exist"); + assert!(!reader.has_block(2001).unwrap(), "Block 2001 should not exist"); + assert!(!reader.has_block(1999).unwrap(), "Block 1999 should not exist"); +} + +/// Test get_execution_range returns first and last block numbers. +pub fn test_get_execution_range(hot_kv: &T) { + // Write headers 3000, 3001, 3002 + { + let writer = hot_kv.writer().unwrap(); + for i in [3000u64, 3001, 3002] { + let header = Header { number: i, gas_limit: 1_000_000, ..Default::default() }; + writer.put_header_inconsistent(&header).unwrap(); + } + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + + let range = reader.get_execution_range().unwrap(); + assert!(range.is_some()); + let (first, last) = range.unwrap(); + assert_eq!(first, 3000); + assert_eq!(last, 3002); +} + +/// Test get_chain_tip returns highest block number and hash. +pub fn test_get_chain_tip(hot_kv: &T) { + let header = Header { number: 4000, gas_limit: 1_000_000, ..Default::default() }; + let expected_hash = header.hash_slow(); + + { + let writer = hot_kv.writer().unwrap(); + writer.put_header_inconsistent(&header).unwrap(); + writer.commit().unwrap(); + } + + let reader = hot_kv.reader().unwrap(); + + let tip = reader.get_chain_tip().unwrap(); + assert!(tip.is_some()); + let (num, hash) = tip.unwrap(); + assert_eq!(num, 4000); + assert_eq!(hash, expected_hash); +} diff --git a/crates/storage/src/hot/db/consistent.rs b/crates/storage/src/hot/db/consistent.rs new file mode 100644 index 0000000..0af75af --- /dev/null +++ b/crates/storage/src/hot/db/consistent.rs @@ -0,0 +1,220 @@ +use crate::hot::{ + db::{HistoryError, UnsafeDbWrite, UnsafeHistoryWrite}, + tables, +}; +use alloy::primitives::{Address, BlockNumber, U256, address}; +use reth::primitives::SealedHeader; +use reth_db::{BlockNumberList, models::BlockNumberAddress}; +use std::collections::HashSet; +use trevm::revm::database::BundleState; + +/// Maximum address value (all bits set to 1). +const ADDRESS_MAX: Address = address!("0xffffffffffffffffffffffffffffffffffffffff"); + +/// Trait for database write operations on hot history tables. This trait +/// maintains a consistent state of the database. +pub trait HistoryWrite: UnsafeDbWrite + UnsafeHistoryWrite { + /// Validate that a range of headers forms a valid chain extension. + /// + /// Headers must be in order and each must extend the previous. + /// The first header must extend the current database tip (or be the first + /// block if the database is empty). + /// + /// Returns `Ok(())` if valid, or an error describing the inconsistency. + fn validate_chain_extension<'a, I>(&self, headers: I) -> Result<(), HistoryError> + where + I: IntoIterator, + { + let headers: Vec<_> = headers.into_iter().collect(); + if headers.is_empty() { + return Err(HistoryError::EmptyRange); + } + + // Validate first header against current DB tip + let first = headers[0]; + match self.get_chain_tip().map_err(HistoryError::Db)? { + None => { + // Empty DB - first block is valid as genesis + } + Some((tip_number, tip_hash)) => { + let expected_number = tip_number + 1; + if first.number != expected_number { + return Err(HistoryError::NonContiguousBlock { + expected: expected_number, + got: first.number, + }); + } + if first.parent_hash != tip_hash { + return Err(HistoryError::ParentHashMismatch { + expected: tip_hash, + got: first.parent_hash, + }); + } + } + } + + // Validate each subsequent header extends the previous + for window in headers.windows(2) { + let prev = window[0]; + let curr = window[1]; + + let expected_number = prev.number + 1; + if curr.number != expected_number { + return Err(HistoryError::NonContiguousBlock { + expected: expected_number, + got: curr.number, + }); + } + + let expected_hash = prev.hash(); + if curr.parent_hash != expected_hash { + return Err(HistoryError::ParentHashMismatch { + expected: expected_hash, + got: curr.parent_hash, + }); + } + } + + Ok(()) + } + + /// Append a range of blocks and their associated state to the database. + fn append_blocks( + &self, + blocks: &[(SealedHeader, BundleState)], + ) -> Result<(), HistoryError> { + self.validate_chain_extension(blocks.iter().map(|(h, _)| h))?; + + let Some(first_num) = blocks.first().map(|(h, _)| h.number) else { return Ok(()) }; + let last_num = blocks.last().map(|(h, _)| h.number).expect("non-empty; qed"); + self.append_blocks_inconsistent(blocks)?; + + self.update_history_indices_inconsistent(first_num..=last_num) + } + + /// Unwind all data above the given block number. + /// + /// This completely reverts the database state to what it was at block `block`, + /// including: + /// - Plain account state + /// - Plain storage state + /// - Headers and header number mappings + /// - Account and storage change sets + /// - Account and storage history indices + fn unwind_above(&self, block: BlockNumber) -> Result<(), HistoryError> { + let first_block_number = block + 1; + let Some(last_block_number) = self.last_block_number()? else { + return Ok(()); + }; + + if first_block_number > last_block_number { + return Ok(()); + } + + // 1. Take and process changesets (reverts plain state) + let storage_range = (BlockNumberAddress((first_block_number, Address::ZERO)), U256::ZERO) + ..=(BlockNumberAddress((last_block_number, ADDRESS_MAX)), U256::MAX); + let acct_range = (first_block_number, Address::ZERO)..=(last_block_number, ADDRESS_MAX); + + let storage_changeset = self.take_range_dual::(storage_range)?; + let account_changeset = self.take_range_dual::(acct_range)?; + + // Collect affected addresses and slots for history cleanup + let mut affected_addresses: HashSet
= HashSet::new(); + let mut affected_storage: HashSet<(Address, U256)> = HashSet::new(); + + for (_, address, _) in &account_changeset { + affected_addresses.insert(*address); + } + for (block_addr, slot, _) in &storage_changeset { + affected_storage.insert((block_addr.address(), *slot)); + } + + // Revert plain state using existing logic + let mut plain_accounts_cursor = self.traverse_mut::()?; + let mut plain_storage_cursor = self.traverse_dual_mut::()?; + + let state = self.populate_bundle_state( + account_changeset, + storage_changeset, + &mut plain_accounts_cursor, + &mut plain_storage_cursor, + )?; + + for (address, (old_account, new_account, storage)) in &state { + if old_account != new_account { + let existing_entry = plain_accounts_cursor.lower_bound(address)?; + if let Some(account) = old_account { + // Check if the old account is effectively empty (account didn't exist before) + // An empty account has nonce=0, balance=0, no bytecode + let is_empty = account.nonce == 0 + && account.balance.is_zero() + && account.bytecode_hash.is_none(); + + if is_empty { + // Account was created - delete it + if existing_entry.is_some_and(|(k, _)| k == *address) { + plain_accounts_cursor.delete_current()?; + } + } else { + // Account existed before - restore it + self.put_account(address, account)?; + } + } else if existing_entry.is_some_and(|(k, _)| k == *address) { + plain_accounts_cursor.delete_current()?; + } + } + + for (storage_key_b256, (old_storage_value, _)) in storage { + let storage_key = U256::from_be_bytes(storage_key_b256.0); + + if plain_storage_cursor + .next_dual_above(address, &storage_key)? + .is_some_and(|(k, k2, _)| k == *address && k2 == storage_key) + { + plain_storage_cursor.delete_current()?; + } + + if !old_storage_value.is_zero() { + self.put_storage(address, &storage_key, old_storage_value)?; + } + } + } + + // 2. Remove headers and header number mappings + let removed_headers = + self.take_range::(first_block_number..=last_block_number)?; + for (_, header) in removed_headers { + let hash = header.hash_slow(); + self.delete_header_number(&hash)?; + } + + // 3. Clean up account history indices + for address in affected_addresses { + if let Some((shard_key, list)) = self.last_account_history(address)? { + let filtered: Vec = list.iter().filter(|&bn| bn <= block).collect(); + self.queue_delete_dual::(&address, &shard_key)?; + if !filtered.is_empty() { + let new_list = BlockNumberList::new_pre_sorted(filtered); + self.write_account_history(&address, u64::MAX, &new_list)?; + } + } + } + + // 4. Clean up storage history indices + for (address, slot) in affected_storage { + if let Some((shard_key, list)) = self.last_storage_history(&address, &slot)? { + let filtered: Vec = list.iter().filter(|&bn| bn <= block).collect(); + self.queue_delete_dual::(&address, &shard_key)?; + if !filtered.is_empty() { + let new_list = BlockNumberList::new_pre_sorted(filtered); + self.write_storage_history(&address, slot, u64::MAX, &new_list)?; + } + } + } + + Ok(()) + } +} + +impl HistoryWrite for T where T: UnsafeDbWrite + UnsafeHistoryWrite {} diff --git a/crates/storage/src/hot/db/errors.rs b/crates/storage/src/hot/db/errors.rs new file mode 100644 index 0000000..a35726e --- /dev/null +++ b/crates/storage/src/hot/db/errors.rs @@ -0,0 +1,49 @@ +use alloy::primitives::B256; +use reth_db::models::integer_list::IntegerListError; + +/// A result type for history operations. +pub type HistoryResult = Result>; + +/// Error type for history operations. +/// +/// This error is returned by methods that append or unwind history, +/// and includes both chain consistency errors and database errors. +#[derive(Debug, thiserror::Error)] +pub enum HistoryError { + /// Block number doesn't extend the chain contiguously. + #[error("non-contiguous block: expected {expected}, got {got}")] + NonContiguousBlock { + /// The expected block number (current tip + 1). + expected: u64, + /// The actual block number provided. + got: u64, + }, + /// Parent hash doesn't match current tip or previous block in range. + + #[error("parent hash mismatch: expected {expected}, got {got}")] + ParentHashMismatch { + /// The expected parent hash. + expected: B256, + /// The actual parent hash provided. + got: B256, + }, + + /// Empty header range provided to a method that requires at least one header. + #[error("empty header range provided")] + EmptyRange, + + /// Database error. + #[error("{0}")] + Db(#[from] E), + + /// Integer List + #[error(transparent)] + IntList(IntegerListError), +} + +impl HistoryError { + /// Helper to create a database error + pub const fn intlist(err: IntegerListError) -> Self { + HistoryError::IntList(err) + } +} diff --git a/crates/storage/src/hot/db/inconsistent.rs b/crates/storage/src/hot/db/inconsistent.rs new file mode 100644 index 0000000..473728e --- /dev/null +++ b/crates/storage/src/hot/db/inconsistent.rs @@ -0,0 +1,562 @@ +use std::{ + collections::{BTreeMap, HashMap, hash_map}, + ops::RangeInclusive, +}; + +use crate::hot::{ + db::{HistoryError, HistoryRead}, + model::{DualKeyTraverse, DualTableCursor, HotKvWrite, KvTraverse, TableCursor}, + tables, +}; +use alloy::primitives::{Address, B256, BlockNumber, U256}; +use reth::{ + primitives::{Account, Header, SealedHeader}, + revm::db::BundleState, +}; +use reth_db::{ + BlockNumberList, + models::{BlockNumberAddress, sharded_key}, +}; +use reth_db_api::models::ShardedKey; +use trevm::revm::{ + bytecode::Bytecode, + database::{ + OriginalValuesKnown, + states::{PlainStateReverts, PlainStorageChangeset, PlainStorageRevert, StateChangeset}, + }, + state::AccountInfo, +}; + +/// Bundle state initialization type. +/// Maps address -> (old_account, new_account, storage_changes) +/// where storage_changes maps slot (B256) -> (old_value, new_value) +pub type BundleInit = + HashMap, Option, HashMap)>; + +/// Trait for database write operations on standard hot tables. +/// +/// This trait is low-level, and usage may leave the database in an +/// inconsistent state if not used carefully. Users should prefer +/// [`HotHistoryWrite`] or higher-level abstractions when possible. +/// +/// [`HotHistoryWrite`]: crate::hot::db::HistoryWrite +pub trait UnsafeDbWrite: HotKvWrite + super::sealed::Sealed { + /// Write a block header. This will leave the DB in an inconsistent state + /// until the corresponding header number is also written. Users should + /// prefer [`Self::put_header`] instead. + fn put_header_inconsistent(&self, header: &Header) -> Result<(), Self::Error> { + self.queue_put::(&header.number, header) + } + + /// Write a block number by its hash. This will leave the DB in an + /// inconsistent state until the corresponding header is also written. + /// Users should prefer [`Self::put_header`] instead. + fn put_header_number_inconsistent(&self, hash: &B256, number: u64) -> Result<(), Self::Error> { + self.queue_put::(hash, &number) + } + + /// Write contract Bytecode by its hash. + fn put_bytecode(&self, code_hash: &B256, bytecode: &Bytecode) -> Result<(), Self::Error> { + self.queue_put::(code_hash, bytecode) + } + + /// Write an account by its address. + fn put_account(&self, address: &Address, account: &Account) -> Result<(), Self::Error> { + self.queue_put::(address, account) + } + + /// Write a storage entry by its address and key. + fn put_storage(&self, address: &Address, key: &U256, entry: &U256) -> Result<(), Self::Error> { + self.queue_put_dual::(address, key, entry) + } + + /// Write a sealed block header (header + number). + fn put_header(&self, header: &SealedHeader) -> Result<(), Self::Error> { + self.put_header_inconsistent(header.header()) + .and_then(|_| self.put_header_number_inconsistent(&header.hash(), header.number)) + } + + /// Delete a header by block number. + fn delete_header(&self, number: u64) -> Result<(), Self::Error> { + self.queue_delete::(&number) + } + + /// Delete a header number mapping by hash. + fn delete_header_number(&self, hash: &B256) -> Result<(), Self::Error> { + self.queue_delete::(hash) + } + + /// Commit the write transaction. + fn commit(self) -> Result<(), Self::Error> + where + Self: Sized, + { + HotKvWrite::raw_commit(self) + } +} + +impl UnsafeDbWrite for T where T: HotKvWrite {} + +/// Trait for history write operations. +/// +/// These tables maintain historical information about accounts and storage +/// changes, and their contents can be used to reconstruct past states or +/// roll back changes. +pub trait UnsafeHistoryWrite: UnsafeDbWrite + HistoryRead { + /// Maintain a list of block numbers where an account was touched. + /// + /// Accounts are keyed + fn write_account_history( + &self, + address: &Address, + latest_height: u64, + touched: &BlockNumberList, + ) -> Result<(), Self::Error> { + self.queue_put_dual::(address, &latest_height, touched) + } + + /// Write an account change (pre-state) for an account at a specific + /// block. + fn write_account_prestate( + &self, + block_number: u64, + address: Address, + pre_state: &Account, + ) -> Result<(), Self::Error> { + self.queue_put_dual::(&block_number, &address, pre_state) + } + + /// Write storage history, by highest block number and touched block + /// numbers. + fn write_storage_history( + &self, + address: &Address, + slot: U256, + highest_block_number: u64, + touched: &BlockNumberList, + ) -> Result<(), Self::Error> { + let sharded_key = ShardedKey::new(slot, highest_block_number); + self.queue_put_dual::(address, &sharded_key, touched) + } + + /// Write a storage change (before state) for an account at a specific + /// block. + fn write_storage_prestate( + &self, + block_number: u64, + address: Address, + slot: &U256, + prestate: &U256, + ) -> Result<(), Self::Error> { + let block_number_address = BlockNumberAddress((block_number, address)); + self.queue_put_dual::(&block_number_address, slot, prestate) + } + + /// Write a pre-state for every storage key that exists for an account at a + /// specific block. + fn write_wipe(&self, block_number: u64, address: &Address) -> Result<(), Self::Error> { + let mut cursor = self.traverse_dual::()?; + + cursor.for_each_k2(address, &U256::ZERO, |_addr, slot, value| { + self.write_storage_prestate(block_number, *address, &slot, &value) + }) + } + + /// Write a block's plain state revert information. + fn write_plain_revert( + &self, + block_number: u64, + accounts: &[(Address, Option)], + storage: &[PlainStorageRevert], + ) -> Result<(), Self::Error> { + for (address, info) in accounts { + let account = info.as_ref().map(Account::from).unwrap_or_default(); + + if let Some(bytecode) = info.as_ref().and_then(|info| info.code.clone()) { + let code_hash = account.bytecode_hash.expect("info has bytecode; hash must exist"); + self.put_bytecode(&code_hash, &bytecode)?; + } + + self.write_account_prestate(block_number, *address, &account)?; + } + + for entry in storage { + if entry.wiped { + return self.write_wipe(block_number, &entry.address); + } + for (key, old_value) in entry.storage_revert.iter() { + self.write_storage_prestate( + block_number, + entry.address, + key, + &old_value.to_previous_value(), + )?; + } + } + + Ok(()) + } + + /// Write multiple blocks' plain state revert information. + fn write_plain_reverts( + &self, + first_block_number: u64, + PlainStateReverts { accounts, storage }: &PlainStateReverts, + ) -> Result<(), Self::Error> { + accounts.iter().zip(storage.iter()).enumerate().try_for_each(|(idx, (acc, sto))| { + self.write_plain_revert(first_block_number + idx as u64, acc, sto) + }) + } + + /// Write changed accounts from a [`StateChangeset`]. + fn write_changed_account( + &self, + address: &Address, + account: &Option, + ) -> Result<(), Self::Error> { + let Some(info) = account.as_ref() else { + // Account removal + return self.queue_delete::(address); + }; + + let account = Account::from(info.clone()); + if let Some(bytecode) = info.code.clone() { + let code_hash = account.bytecode_hash.expect("info has bytecode; hash must exist"); + self.put_bytecode(&code_hash, &bytecode)?; + } + self.put_account(address, &account) + } + + /// Write changed storage from a [`StateChangeset`]. + fn write_changed_storage( + &self, + PlainStorageChangeset { address, wipe_storage, storage }: &PlainStorageChangeset, + ) -> Result<(), Self::Error> { + if *wipe_storage { + let mut cursor = self.traverse_dual_mut::()?; + + while let Some((key, _, _)) = cursor.next_k2()? { + if key != *address { + break; + } + cursor.delete_current()?; + } + + return Ok(()); + } + + storage.iter().try_for_each(|(key, value)| self.put_storage(address, key, value)) + } + + /// Write changed contract bytecode from a [`StateChangeset`]. + fn write_changed_contracts( + &self, + code_hash: &B256, + bytecode: &Bytecode, + ) -> Result<(), Self::Error> { + self.put_bytecode(code_hash, bytecode) + } + + /// Write a state changeset for a specific block. + fn write_state_changes( + &self, + StateChangeset { accounts, storage, contracts }: &StateChangeset, + ) -> Result<(), Self::Error> { + contracts.iter().try_for_each(|(code_hash, bytecode)| { + self.write_changed_contracts(code_hash, bytecode) + })?; + accounts + .iter() + .try_for_each(|(address, account)| self.write_changed_account(address, account))?; + storage + .iter() + .try_for_each(|storage_changeset| self.write_changed_storage(storage_changeset))?; + Ok(()) + } + + /// Get all changed accounts with the list of block numbers in the given + /// range. + /// + /// Note: This iterates using `next_k2()` which stays within the same k1 + /// (block number). It effectively only collects changes from the first + /// block number in the range. + fn changed_accounts_with_range( + &self, + range: RangeInclusive, + ) -> Result>, Self::Error> { + let mut changeset_cursor = self.traverse_dual::()?; + let mut result: BTreeMap> = BTreeMap::new(); + + changeset_cursor.for_each_while_k2( + range.start(), + &Address::ZERO, + |num, _, _| range.contains(num), + |num, addr, _| { + result.entry(addr).or_default().push(num); + Ok(()) + }, + )?; + + Ok(result) + } + + /// Append account history indices for multiple accounts. + fn append_account_history_index( + &self, + index_updates: impl IntoIterator)>, + ) -> Result<(), HistoryError> { + for (acct, indices) in index_updates { + // Get the existing last shard (if any) and remember its key so we can + // delete it before writing new shards + let existing = self.last_account_history(acct)?; + // Save the old key before taking ownership of the list + let old_key = existing.as_ref().map(|(key, _)| *key); + // Take ownership instead of cloning + let mut last_shard = existing.map(|(_, list)| list).unwrap_or_default(); + + last_shard.append(indices).map_err(HistoryError::IntList)?; + + // Delete the existing shard before writing new ones to avoid duplicates + if let Some(old_key) = old_key { + self.queue_delete_dual::(&acct, &old_key)?; + } + + // fast path: all indices fit in one shard + if last_shard.len() <= sharded_key::NUM_OF_INDICES_IN_SHARD as u64 { + self.write_account_history(&acct, u64::MAX, &last_shard)?; + continue; + } + + // slow path: rechunk into multiple shards + // Reuse a single buffer to avoid allocating a new Vec per chunk + let mut chunk_buf = Vec::with_capacity(sharded_key::NUM_OF_INDICES_IN_SHARD); + let mut iter = last_shard.iter().peekable(); + + while iter.peek().is_some() { + chunk_buf.clear(); + chunk_buf.extend(iter.by_ref().take(sharded_key::NUM_OF_INDICES_IN_SHARD)); + + let highest_block_number = if iter.peek().is_some() { + *chunk_buf.last().expect("chunk_buf is non-empty") + } else { + // Insert last list with `u64::MAX`. + u64::MAX + }; + + let shard = BlockNumberList::new_pre_sorted(chunk_buf.iter().copied()); + self.write_account_history(&acct, highest_block_number, &shard)?; + } + } + Ok(()) + } + + /// Get all changed storages with the list of block numbers in the given + /// range. + /// + /// Note: This iterates using `next_k2()` which stays within the same k1 + /// (block number + address). It effectively only collects changes from + /// the first key1 value in the range. + #[allow(clippy::type_complexity)] + fn changed_storages_with_range( + &self, + range: RangeInclusive, + ) -> Result>, Self::Error> { + let mut changeset_cursor = self.traverse_dual::()?; + let mut result: BTreeMap<(Address, U256), Vec> = BTreeMap::new(); + + changeset_cursor.for_each_while_k2( + &BlockNumberAddress((*range.start(), Address::ZERO)), + &U256::ZERO, + |num_addr, _, _| range.contains(&num_addr.block_number()), + |num_addr, slot, _| { + result.entry((num_addr.address(), slot)).or_default().push(num_addr.block_number()); + Ok(()) + }, + )?; + + Ok(result) + } + + /// Append storage history indices for multiple (address, slot) pairs. + fn append_storage_history_index( + &self, + index_updates: impl IntoIterator)>, + ) -> Result<(), HistoryError> { + for ((addr, slot), indices) in index_updates { + // Get the existing last shard (if any) and remember its key so we can + // delete it before writing new shards + let existing = self.last_storage_history(&addr, &slot)?; + // Save the old key before taking ownership of the list (clone is cheap for ShardedKey) + let old_key = existing.as_ref().map(|(key, _)| key.clone()); + // Take ownership instead of cloning the BlockNumberList + let mut last_shard = existing.map(|(_, list)| list).unwrap_or_default(); + + last_shard.append(indices).map_err(HistoryError::IntList)?; + + // Delete the existing shard before writing new ones to avoid duplicates + if let Some(old_key) = old_key { + self.queue_delete_dual::(&addr, &old_key)?; + } + + // fast path: all indices fit in one shard + if last_shard.len() <= sharded_key::NUM_OF_INDICES_IN_SHARD as u64 { + self.write_storage_history(&addr, slot, u64::MAX, &last_shard)?; + continue; + } + + // slow path: rechunk into multiple shards + // Reuse a single buffer to avoid allocating a new Vec per chunk + let mut chunk_buf = Vec::with_capacity(sharded_key::NUM_OF_INDICES_IN_SHARD); + let mut iter = last_shard.iter().peekable(); + + while iter.peek().is_some() { + chunk_buf.clear(); + chunk_buf.extend(iter.by_ref().take(sharded_key::NUM_OF_INDICES_IN_SHARD)); + + let highest_block_number = if iter.peek().is_some() { + *chunk_buf.last().expect("chunk_buf is non-empty") + } else { + // Insert last list with `u64::MAX`. + u64::MAX + }; + + let shard = BlockNumberList::new_pre_sorted(chunk_buf.iter().copied()); + self.write_storage_history(&addr, slot, highest_block_number, &shard)?; + } + } + Ok(()) + } + + /// Update the history indices for accounts and storage in the given block + /// range. + fn update_history_indices_inconsistent( + &self, + range: RangeInclusive, + ) -> Result<(), HistoryError> { + // account history stage + { + let indices = self.changed_accounts_with_range(range.clone())?; + self.append_account_history_index(indices)?; + } + + // storage history stage + { + let indices = self.changed_storages_with_range(range)?; + self.append_storage_history_index(indices)?; + } + + Ok(()) + } + + /// Append a block's header and state changes in an inconsistent manner. + /// + /// This may leave the database in an inconsistent state. Users should + /// prefer higher-level abstractions when possible. + /// + /// 1. It MUST be checked that the header is the child of the current chain + /// tip before calling this method. + /// 2. After calling this method, the caller MUST call + /// `update_history_indices`. + fn append_block_inconsistent( + &self, + header: &SealedHeader, + state_changes: &BundleState, + ) -> Result<(), Self::Error> { + self.put_header_inconsistent(header.header())?; + self.put_header_number_inconsistent(&header.hash(), header.number)?; + + let (state_changes, reverts) = + state_changes.to_plain_state_and_reverts(OriginalValuesKnown::No); + + self.write_state_changes(&state_changes)?; + self.write_plain_reverts(header.number, &reverts) + } + + /// Append multiple blocks' headers and state changes in an inconsistent + /// manner. + /// + /// This may leave the database in an inconsistent state. Users should + /// prefer higher-level abstractions when possible. + /// 1. It MUST be checked that the first header is the child of the current + /// chain tip before calling this method. + /// 2. After calling this method, the caller MUST call + /// `update_history_indices`. + fn append_blocks_inconsistent( + &self, + blocks: &[(SealedHeader, BundleState)], + ) -> Result<(), Self::Error> { + blocks.iter().try_for_each(|(header, state)| self.append_block_inconsistent(header, state)) + } + + /// Populate a [`BundleInit`] using cursors over the + /// [`tables::PlainAccountState`] and [`tables::PlainStorageState`] tables, + /// based on the given storage and account changesets. + /// + /// Returns a map of address -> (old_account, new_account, storage_changes) + /// where storage_changes maps slot -> (old_value, new_value). + fn populate_bundle_state( + &self, + account_changeset: Vec<(u64, Address, Account)>, + storage_changeset: Vec<(BlockNumberAddress, U256, U256)>, + plain_accounts_cursor: &mut TableCursor, + plain_storage_cursor: &mut DualTableCursor, + ) -> Result + where + C: KvTraverse, + D: DualKeyTraverse, + { + // iterate previous value and get plain state value to create changeset + // Double option around Account represent if Account state is known (first option) and + // account is removed (second option) + let mut state: BundleInit = Default::default(); + + // add account changeset changes in reverse order + for (_block_number, address, old_account) in account_changeset.into_iter().rev() { + match state.entry(address) { + hash_map::Entry::Vacant(entry) => { + let new_account = plain_accounts_cursor.exact(&address)?; + entry.insert((Some(old_account), new_account, HashMap::default())); + } + hash_map::Entry::Occupied(mut entry) => { + // overwrite old account state. + entry.get_mut().0 = Some(old_account); + } + } + } + + // add storage changeset changes + for (block_and_address, storage_key, old_value) in storage_changeset.into_iter().rev() { + let BlockNumberAddress((_block_number, address)) = block_and_address; + // get account state or insert from plain state. + let account_state = match state.entry(address) { + hash_map::Entry::Vacant(entry) => { + let present_account = plain_accounts_cursor.exact(&address)?; + entry.insert((present_account, present_account, HashMap::default())) + } + hash_map::Entry::Occupied(entry) => entry.into_mut(), + }; + + // Convert U256 storage key to B256 for the BundleInit map + let storage_key_b256 = B256::from(storage_key); + + // match storage. + match account_state.2.entry(storage_key_b256) { + hash_map::Entry::Vacant(entry) => { + let new_value = plain_storage_cursor + .next_dual_above(&address, &storage_key)? + .filter(|(k, k2, _)| *k == address && *k2 == storage_key) + .map(|(_, _, v)| v) + .unwrap_or_default(); + entry.insert((old_value, new_value)); + } + hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().0 = old_value; + } + }; + } + + Ok(state) + } +} + +impl UnsafeHistoryWrite for T where T: UnsafeDbWrite + HotKvWrite {} diff --git a/crates/storage/src/hot/db/mod.rs b/crates/storage/src/hot/db/mod.rs new file mode 100644 index 0000000..cc9bf29 --- /dev/null +++ b/crates/storage/src/hot/db/mod.rs @@ -0,0 +1,22 @@ +//! Primary access traits for hot storage backends. + +mod consistent; +pub use consistent::HistoryWrite; + +mod errors; +pub use errors::{HistoryError, HistoryResult}; + +mod inconsistent; +pub use inconsistent::{BundleInit, UnsafeDbWrite, UnsafeHistoryWrite}; + +mod read; +pub use read::{HistoryRead, HotDbRead}; + +pub(crate) mod sealed { + use crate::hot::model::HotKvRead; + + /// Sealed trait to prevent external implementations of hot database traits. + #[allow(dead_code, unreachable_pub)] + pub trait Sealed {} + impl Sealed for T where T: HotKvRead {} +} diff --git a/crates/storage/src/hot/db/read.rs b/crates/storage/src/hot/db/read.rs new file mode 100644 index 0000000..50e3439 --- /dev/null +++ b/crates/storage/src/hot/db/read.rs @@ -0,0 +1,252 @@ +use crate::hot::{model::HotKvRead, tables}; +use alloy::primitives::{Address, B256, U256}; +use reth::primitives::{Account, Header, StorageEntry}; +use reth_db::{BlockNumberList, models::BlockNumberAddress}; +use reth_db_api::models::ShardedKey; +use trevm::revm::bytecode::Bytecode; + +/// Trait for database read operations on standard hot tables. +/// +/// This is a high-level trait that provides convenient methods for reading +/// common data types from predefined hot storage tables. It builds upon the +/// lower-level [`HotKvRead`] trait, which provides raw key-value access. +/// +/// Users should prefer this trait unless customizations are needed to the +/// table set. +pub trait HotDbRead: HotKvRead + super::sealed::Sealed { + /// Read a block header by its number. + fn get_header(&self, number: u64) -> Result, Self::Error> { + self.get::(&number) + } + + /// Read a block number by its hash. + fn get_header_number(&self, hash: &B256) -> Result, Self::Error> { + self.get::(hash) + } + + /// Read contract Bytecode by its hash. + fn get_bytecode(&self, code_hash: &B256) -> Result, Self::Error> { + self.get::(code_hash) + } + + /// Read an account by its address. + fn get_account(&self, address: &Address) -> Result, Self::Error> { + self.get::(address) + } + + /// Read a storage slot by its address and key. + fn get_storage(&self, address: &Address, key: &U256) -> Result, Self::Error> { + self.get_dual::(address, key) + } + + /// Read a [`StorageEntry`] by its address and key. + fn get_storage_entry( + &self, + address: &Address, + key: &U256, + ) -> Result, Self::Error> { + let opt = self.get_storage(address, key)?; + Ok(opt.map(|value| StorageEntry { key: B256::new(key.to_be_bytes()), value })) + } + + /// Read a block header by its hash. + fn header_by_hash(&self, hash: &B256) -> Result, Self::Error> { + let Some(number) = self.get_header_number(hash)? else { + return Ok(None); + }; + self.get_header(number) + } +} + +impl HotDbRead for T where T: HotKvRead {} + +/// Trait for history read operations. +/// +/// These tables maintain historical information about accounts and storage +/// changes, and their contents can be used to reconstruct past states or +/// roll back changes. +/// +/// This is a high-level trait that provides convenient methods for reading +/// common data types from predefined hot storage history tables. It builds +/// upon the lower-level [`HotDbRead`] trait, which provides raw key-value +/// access. +/// +/// Users should prefer this trait unless customizations are needed to the +/// table set. +pub trait HistoryRead: HotDbRead { + /// Get the list of block numbers where an account was touched. + /// Get the list of block numbers where an account was touched. + fn get_account_history( + &self, + address: &Address, + latest_height: u64, + ) -> Result, Self::Error> { + self.get_dual::(address, &latest_height) + } + + /// Get the last (highest) account history entry for an address. + fn last_account_history( + &self, + address: Address, + ) -> Result, Self::Error> { + let mut cursor = self.traverse_dual::()?; + + // Move the cursor to the last entry for the given address + let Some(res) = cursor.last_of_k1(&address)? else { + return Ok(None); + }; + + Ok(Some((res.1, res.2))) + } + + /// Get the account change (pre-state) for an account at a specific block. + /// + /// If the return value is `None`, the account was not changed in that + /// block. + fn get_account_change( + &self, + block_number: u64, + address: &Address, + ) -> Result, Self::Error> { + self.get_dual::(&block_number, address) + } + + /// Get the storage history for an account and storage slot. The returned + /// list will contain block numbers where the storage slot was changed. + fn get_storage_history( + &self, + address: &Address, + slot: U256, + highest_block_number: u64, + ) -> Result, Self::Error> { + let sharded_key = ShardedKey::new(slot, highest_block_number); + self.get_dual::(address, &sharded_key) + } + + /// Get the last (highest) storage history entry for an address and slot. + fn last_storage_history( + &self, + address: &Address, + slot: &U256, + ) -> Result, BlockNumberList)>, Self::Error> { + let mut cursor = self.traverse_dual::()?; + + // Seek to the highest possible key for this (address, slot) combination. + // ShardedKey encodes as slot || highest_block_number, so seeking to + // (address, ShardedKey::new(slot, u64::MAX)) positions us at or after + // the last shard for this slot. + let target = ShardedKey::new(*slot, u64::MAX); + let result = cursor.next_dual_above(address, &target)?; + + // Check if we found an exact match for this address and slot + if let Some((addr, sharded_key, list)) = result + && addr == *address + && sharded_key.key == *slot + { + return Ok(Some((sharded_key, list))); + } + + // The cursor is positioned at or after our target. Go backwards to find + // the last entry for this (address, slot). + let Some((addr, sharded_key, list)) = cursor.previous_k2()? else { + return Ok(None); + }; + + if addr == *address && sharded_key.key == *slot { + Ok(Some((sharded_key, list))) + } else { + Ok(None) + } + } + + /// Get the storage change (before state) for a specific storage slot at a + /// specific block. + /// + /// If the return value is `None`, the storage slot was not changed in that + /// block. If the return value is `Some(value)`, the value is the pre-state + /// of the storage slot before the change in that block. If the value is + /// `U256::ZERO`, that indicates that the storage slot was not set before + /// the change. + fn get_storage_change( + &self, + block_number: u64, + address: &Address, + slot: &U256, + ) -> Result, Self::Error> { + let block_number_address = BlockNumberAddress((block_number, *address)); + self.get_dual::(&block_number_address, slot) + } + + /// Get the last (highest) header in the database. + /// Returns None if the database is empty. + fn last_header(&self) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + Ok(cursor.last()?.map(|(_, header)| header)) + } + + /// Get the last (highest) block number in the database. + /// Returns None if the database is empty. + fn last_block_number(&self) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + Ok(cursor.last()?.map(|(number, _)| number)) + } + + /// Get the first (lowest) header in the database. + /// Returns None if the database is empty. + fn first_header(&self) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + Ok(cursor.first()?.map(|(_, header)| header)) + } + + /// Get the current chain tip (highest block number and hash). + /// Returns None if the database is empty. + fn get_chain_tip(&self) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + let Some((number, header)) = cursor.last()? else { + return Ok(None); + }; + let hash = header.hash_slow(); + Ok(Some((number, hash))) + } + + /// Get the execution range (first and last block numbers with headers). + /// Returns None if the database is empty. + fn get_execution_range(&self) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + let Some((first, _)) = cursor.first()? else { + return Ok(None); + }; + let Some((last, _)) = cursor.last()? else { + return Ok(None); + }; + Ok(Some((first, last))) + } + + /// Check if a specific block number exists in history. + fn has_block(&self, number: u64) -> Result { + self.get_header(number).map(|opt| opt.is_some()) + } + + /// Get headers in a range (inclusive). + fn get_headers_range(&self, start: u64, end: u64) -> Result, Self::Error> { + let mut cursor = self.traverse::()?; + let mut headers = Vec::new(); + + if cursor.lower_bound(&start)?.is_none() { + return Ok(headers); + } + + loop { + match cursor.read_next()? { + Some((num, header)) if num <= end => { + headers.push(header); + } + _ => break, + } + } + + Ok(headers) + } +} + +impl HistoryRead for T where T: HotDbRead {} diff --git a/crates/storage/src/hot/impls/mdbx/cursor.rs b/crates/storage/src/hot/impls/mdbx/cursor.rs new file mode 100644 index 0000000..ef19237 --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/cursor.rs @@ -0,0 +1,385 @@ +//! Cursor wrapper for libmdbx-sys. + +use std::{ + borrow::Cow, + ops::{Deref, DerefMut}, +}; + +use crate::hot::{ + MAX_FIXED_VAL_SIZE, MAX_KEY_SIZE, + impls::mdbx::{DbInfo, MdbxError}, + model::{DualKeyTraverse, KvTraverse, KvTraverseMut, RawDualKeyValue, RawKeyValue, RawValue}, +}; +use dashmap::mapref::one::Ref; +use reth_libmdbx::{RO, RW, TransactionKind}; + +/// Read only Cursor. +pub type CursorRO<'a> = Cursor<'a, RO>; + +/// Read write cursor. +pub type CursorRW<'a> = Cursor<'a, RW>; + +/// Cursor wrapper to access KV items. +pub struct Cursor<'a, K: TransactionKind> { + /// Inner `libmdbx` cursor. + pub(crate) inner: reth_libmdbx::Cursor, + + /// Database flags that were used to open the database. + db_info: Ref<'a, &'static str, DbInfo>, + + /// Scratch buffer for key2 operations in DUPSORT tables. + /// Sized to hold key2 + fixed value for DUP_FIXED tables. + buf: [u8; MAX_KEY_SIZE + MAX_FIXED_VAL_SIZE], +} + +impl std::fmt::Debug for Cursor<'_, K> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let flag_names = + self.db_info.flags().iter_names().map(|t| t.0).collect::>().join("|"); + f.debug_struct("Cursor") + .field("inner", &self.inner) + .field("db_flags", &flag_names) + .field("buf", &self.buf) + .finish() + } +} + +impl Deref for Cursor<'_, K> { + type Target = reth_libmdbx::Cursor; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'a> DerefMut for Cursor<'a, RW> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<'a, K: TransactionKind> Cursor<'a, K> { + /// Creates a new `Cursor` wrapping the given `libmdbx` cursor. + pub const fn new(inner: reth_libmdbx::Cursor, db: Ref<'a, &'static str, DbInfo>) -> Self { + Self { inner, db_info: db, buf: [0u8; MAX_KEY_SIZE + MAX_FIXED_VAL_SIZE] } + } + + /// Returns the database info for this cursor. + pub fn db_info(&self) -> &DbInfo { + &self.db_info + } +} + +impl KvTraverse for Cursor<'_, K> +where + K: TransactionKind, +{ + fn first<'a>(&'a mut self) -> Result>, MdbxError> { + self.inner.first().map_err(MdbxError::Mdbx) + } + + fn last<'a>(&'a mut self) -> Result>, MdbxError> { + self.inner.last().map_err(MdbxError::Mdbx) + } + + fn exact<'a>(&'a mut self, key: &[u8]) -> Result>, MdbxError> { + self.inner.set(key).map_err(MdbxError::Mdbx) + } + + fn lower_bound<'a>(&'a mut self, key: &[u8]) -> Result>, MdbxError> { + self.inner.set_range(key).map_err(MdbxError::Mdbx) + } + + fn read_next<'a>(&'a mut self) -> Result>, MdbxError> { + self.inner.next().map_err(MdbxError::Mdbx) + } + + fn read_prev<'a>(&'a mut self) -> Result>, MdbxError> { + self.inner.prev().map_err(MdbxError::Mdbx) + } +} + +impl KvTraverseMut for Cursor<'_, RW> { + fn delete_current(&mut self) -> Result<(), MdbxError> { + self.inner.del(Default::default()).map_err(MdbxError::Mdbx) + } +} + +/// Splits a [`Cow`] slice at the given index, preserving borrowed status. +/// +/// When the input is `Cow::Borrowed`, both outputs will be `Cow::Borrowed` +/// referencing subslices of the original data. When the input is `Cow::Owned`, +/// both outputs will be `Cow::Owned` with newly allocated vectors. +#[inline] +fn split_cow_at(cow: Cow<'_, [u8]>, at: usize) -> (Cow<'_, [u8]>, Cow<'_, [u8]>) { + match cow { + Cow::Borrowed(slice) => (Cow::Borrowed(&slice[..at]), Cow::Borrowed(&slice[at..])), + Cow::Owned(mut vec) => { + let right = vec.split_off(at); + (Cow::Owned(vec), Cow::Owned(right)) + } + } +} + +impl DualKeyTraverse for Cursor<'_, K> +where + K: TransactionKind, +{ + fn first<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + match self.inner.first::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + + fn last<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + match self.inner.last::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + + fn read_next<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + match self.inner.next::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + + fn read_prev<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + match self.inner.prev::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + + fn exact_dual<'a>( + &'a mut self, + key1: &[u8], + key2: &[u8], + ) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + // For DUPSORT tables, we use get_both which finds exact (key1, key2) match. + // The "value" in MDBX DUPSORT is key2 || actual_value, so we return that. + // Prepare key2 (may need padding for DUP_FIXED) + let fsi = self.db_info.dup_fixed_val_size(); + let key2_prepared = if let Some(total_size) = fsi.total_size() { + // Copy key2 to scratch buffer and zero-pad to total fixed size + self.buf[..key2.len()].copy_from_slice(key2); + self.buf[key2.len()..total_size].fill(0); + &self.buf[..total_size] + } else { + key2 + }; + self.inner.get_both(key1, key2_prepared).map_err(MdbxError::Mdbx) + } + + fn next_dual_above<'a>( + &'a mut self, + key1: &[u8], + key2: &[u8], + ) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + let fsi = self.db_info.dup_fixed_val_size(); + let key2_size = fsi.key2_size().unwrap_or(key2.len()); + + // Use set_range to find the first entry with key1 >= search_key1 + let Some((found_k1, v)) = self.inner.set_range::, Cow<'_, [u8]>>(key1)? + else { + return Ok(None); + }; + + // If found_k1 > search_key1, we have our answer (first entry in next key1) + if found_k1.as_ref() > key1 { + let (k2, val) = split_cow_at(v, key2_size); + return Ok(Some((found_k1, k2, val))); + } + + // found_k1 == search_key1, so we need to filter by key2 >= search_key2 + // Use get_both_range to find entry with exact key1 and value >= key2 + let key2_prepared = if let Some(total_size) = fsi.total_size() { + // Copy key2 to scratch buffer and zero-pad to total fixed size + self.buf[..key2.len()].copy_from_slice(key2); + self.buf[key2.len()..total_size].fill(0); + &self.buf[..total_size] + } else { + key2 + }; + + match self.inner.get_both_range::>(key1, key2_prepared)? { + Some(v) => { + let (k2, val) = split_cow_at(v, key2_size); + // key1 must be owned here since we're returning a reference to the input + Ok(Some((Cow::Owned(key1.to_vec()), k2, val))) + } + None => { + // No entry with key2 >= search_key2 in this key1, try next key1 + match self.inner.next_nodup::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + } + } + + fn next_k1<'a>(&'a mut self) -> Result>, MdbxError> { + // Move to the next distinct key1 (skip remaining duplicates for current key1) + if self.db_info.is_dupsort() { + match self.inner.next_nodup::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + // Split using the known key2 size. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } else { + // Not a DUPSORT table - just get next entry + match self.inner.next()? { + Some((k, v)) => Ok(Some((k, Cow::Borrowed(&[] as &[u8]), v))), + None => Ok(None), + } + } + } + + fn next_k2<'a>(&'a mut self) -> Result>, MdbxError> { + // Move to the next duplicate (same key1, next key2) + if self.db_info.is_dupsort() { + match self.inner.next_dup::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, the value contains key2 || actual_value. + // Split using the known key2 size. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } else { + // Not a DUPSORT table - no concept of "next duplicate" + Ok(None) + } + } + + fn last_of_k1<'a>(&'a mut self, key1: &[u8]) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + // First, position at key1 (any duplicate) + let Some(_) = self.inner.set::>(key1)? else { + return Ok(None); + }; + + // Then move to the last duplicate for this key1 + let Some(v) = self.inner.last_dup::>()? else { + return Ok(None); + }; + + // Split the value into key2 and actual value + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + + // key1 must be owned here since we're returning a reference to the input + Ok(Some((Cow::Owned(key1.to_vec()), k2, val))) + } + + fn previous_k1<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + // prev_nodup positions at the last data item of the previous key + match self.inner.prev_nodup::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + // For DUPSORT, prev_nodup already positions at the last duplicate + // of the previous key. Split the value. + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } + + fn previous_k2<'a>(&'a mut self) -> Result>, MdbxError> { + if !self.db_info.is_dupsort() { + return Err(MdbxError::NotDupSort); + } + + // prev_dup positions at the previous duplicate of the current key + match self.inner.prev_dup::, Cow<'_, [u8]>>()? { + Some((k1, v)) => { + let Some(key2_size) = self.db_info.dup_fixed_val_size().key2_size() else { + return Err(MdbxError::UnknownFixedSize); + }; + let (k2, val) = split_cow_at(v, key2_size); + Ok(Some((k1, k2, val))) + } + None => Ok(None), + } + } +} diff --git a/crates/storage/src/hot/impls/mdbx/db_info.rs b/crates/storage/src/hot/impls/mdbx/db_info.rs new file mode 100644 index 0000000..a6a927a --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/db_info.rs @@ -0,0 +1,202 @@ +use crate::hot::ValSer; +use reth_libmdbx::DatabaseFlags; + +/// Type alias for the database info cache. +pub type DbCache = std::sync::Arc>; + +/// Information about fixed size values in a database. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FixedSizeInfo { + /// Not a DUPSORT table. + None, + /// DUPSORT table without DUP_FIXED (variable value size). + DupSort { + /// Size of key2 in bytes. + key2_size: usize, + }, + /// DUP_FIXED table with known key2 and total size. + DupFixed { + /// Size of key2 in bytes. + key2_size: usize, + /// Total fixed size (key2 + value). + total_size: usize, + }, +} + +impl FixedSizeInfo { + /// Returns true if this is a DUP_FIXED table with known total size. + pub const fn is_dup_fixed(&self) -> bool { + matches!(self, Self::DupFixed { .. }) + } + + /// Returns true if there is no fixed size (not a DUPSORT table). + pub const fn is_none(&self) -> bool { + matches!(self, Self::None) + } + + /// Returns true if this is a DUPSORT table (with or without DUP_FIXED). + pub const fn is_dupsort(&self) -> bool { + matches!(self, Self::DupSort { .. } | Self::DupFixed { .. }) + } + + /// Returns the key2 size if known (for DUPSORT tables). + pub const fn key2_size(&self) -> Option { + match self { + Self::DupSort { key2_size } => Some(*key2_size), + Self::DupFixed { key2_size, .. } => Some(*key2_size), + Self::None => None, + } + } + + /// Returns the total stored size (key2 + value) if known (only for DUP_FIXED tables). + pub const fn total_size(&self) -> Option { + match self { + Self::DupFixed { total_size, .. } => Some(*total_size), + _ => None, + } + } +} + +/// Information about an MDBX database. +pub struct DbInfo { + flags: DatabaseFlags, + dup_fixed_val_size: FixedSizeInfo, + dbi: u32, +} + +impl std::fmt::Debug for DbInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let flags = self.flags.iter_names().map(|t| t.0).collect::>().join("|"); + + f.debug_struct("DbInfo") + .field("flags", &flags) + .field("dbi", &self.dbi) + .field("fixed_val_size", &self.dup_fixed_val_size) + .finish() + } +} + +impl Clone for DbInfo { + fn clone(&self) -> Self { + Self { + flags: DatabaseFlags::from_bits(self.flags.bits()).unwrap(), + dbi: self.dbi, + dup_fixed_val_size: self.dup_fixed_val_size, + } + } +} + +impl DbInfo { + /// Creates a new `DbInfo`. + pub(crate) const fn new( + flags: DatabaseFlags, + dbi: u32, + dup_fixed_val_size: FixedSizeInfo, + ) -> Self { + Self { flags, dbi, dup_fixed_val_size } + } + + /// Returns the flags of the database. + pub const fn flags(&self) -> DatabaseFlags { + DatabaseFlags::from_bits(self.flags.bits()).unwrap() + } + + /// Returns true if the database has the INTEGER_KEY flag. + pub const fn is_integerkey(&self) -> bool { + self.flags.contains(DatabaseFlags::INTEGER_KEY) + } + + /// Returns true if the database has the DUP_SORT flag. + pub const fn is_dupsort(&self) -> bool { + self.flags.contains(DatabaseFlags::DUP_SORT) + } + + /// Returns true if the database has the DUP_FIXED flag. + pub const fn is_dupfixed(&self) -> bool { + self.flags.contains(DatabaseFlags::DUP_FIXED) + } + + /// Returns the fixed value size of the database, if any. This will be the + /// size of the values in a DUP_FIXED database. + /// + /// This will be set the the SUM of the sizes of the key2 and value for + /// dual-keyed tables. + pub const fn dup_fixed_val_size(&self) -> FixedSizeInfo { + self.dup_fixed_val_size + } + + /// Returns the dbi of the database. + pub const fn dbi(&self) -> u32 { + self.dbi + } +} + +impl ValSer for FixedSizeInfo { + fn encoded_size(&self) -> usize { + 8 // two u32s: key2_size and total_size + } + + fn encode_value_to(&self, buf: &mut B) + where + B: bytes::BufMut + AsMut<[u8]>, + { + match self { + FixedSizeInfo::None => { + buf.put_u32(0); + buf.put_u32(0); + } + FixedSizeInfo::DupSort { key2_size } => { + buf.put_u32(*key2_size as u32); + buf.put_u32(0); // total_size = 0 means variable value + } + FixedSizeInfo::DupFixed { key2_size, total_size } => { + buf.put_u32(*key2_size as u32); + buf.put_u32(*total_size as u32); + } + } + } + + fn decode_value(data: &[u8]) -> Result + where + Self: Sized, + { + let key2_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let total_size = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + if key2_size == 0 { + Ok(FixedSizeInfo::None) + } else if total_size == 0 { + Ok(FixedSizeInfo::DupSort { key2_size }) + } else { + Ok(FixedSizeInfo::DupFixed { key2_size, total_size }) + } + } +} + +impl ValSer for DbInfo { + fn encoded_size(&self) -> usize { + // 4 u32s: dbi + flags + key2_size + total_size + 4 + 4 + 8 + } + + fn encode_value_to(&self, buf: &mut B) + where + B: bytes::BufMut + AsMut<[u8]>, + { + self.dbi.encode_value_to(buf); + self.flags.bits().encode_value_to(buf); + self.dup_fixed_val_size.encode_value_to(buf); + } + + fn decode_value(data: &[u8]) -> Result + where + Self: Sized, + { + let dbi = u32::decode_value(&data[0..4])?; + let flags_bits = u32::decode_value(&data[4..8])?; + let flags = DatabaseFlags::from_bits(flags_bits).ok_or_else(|| { + crate::hot::DeserError::String("Invalid database flags bits".to_string()) + })?; + let dup_fixed_val_size = FixedSizeInfo::decode_value(&data[8..16])?; + Ok(Self { flags, dbi, dup_fixed_val_size }) + } +} diff --git a/crates/storage/src/hot/impls/mdbx/error.rs b/crates/storage/src/hot/impls/mdbx/error.rs new file mode 100644 index 0000000..e9ec4a6 --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/error.rs @@ -0,0 +1,60 @@ +use reth::providers::errors::lockfile::StorageLockError; + +use crate::hot::{ + DeserError, + model::{HotKvError, HotKvReadError}, +}; + +/// Error type for reth-libmdbx based hot storage. +#[derive(Debug, thiserror::Error)] +pub enum MdbxError { + /// Inner error + #[error(transparent)] + Mdbx(#[from] reth_libmdbx::Error), + + /// Error when a raw value does not conform to expected fixed size. + #[error("Error with dup fixed value size: expected {expected} bytes, found {found} bytes")] + DupFixedErr { + /// Expected size + expected: usize, + /// Found size + found: usize, + }, + + /// Tried to invoke a DUPSORT operation on a table that is not flagged + /// DUPSORT + #[error("tried to invoke a DUPSORT operation on a table that is not flagged DUPSORT")] + NotDupSort, + + /// Key2 size is unknown, cannot split DUPSORT value. + /// This error occurs when using raw cursor methods on a DUP_FIXED table + /// without first setting the key2/value sizes via typed methods. + /// Use typed methods instead of raw methods when working with dual-key tables. + #[error( + "fixed size for DUPSORT value is unknown. Hint: use typed methods instead of raw methods when working with dual-key tables" + )] + UnknownFixedSize, + + /// Table not found + #[error("table not found: {0}")] + UnknownTable(&'static str), + + /// Storage lock error + #[error(transparent)] + Locked(#[from] StorageLockError), + + /// Deser. + #[error(transparent)] + Deser(#[from] DeserError), +} + +impl trevm::revm::database::DBErrorMarker for MdbxError {} + +impl HotKvReadError for MdbxError { + fn into_hot_kv_error(self) -> HotKvError { + match self { + MdbxError::Deser(e) => HotKvError::Deser(e), + _ => HotKvError::from_err(self), + } + } +} diff --git a/crates/storage/src/hot/impls/mdbx/mod.rs b/crates/storage/src/hot/impls/mdbx/mod.rs new file mode 100644 index 0000000..196859e --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/mod.rs @@ -0,0 +1,415 @@ +//! Implementation of the hot key-value storage using MDBX as the underlying +//! database. +//! +//! ## Notes on implementation +//! +//! This module provides an implementation of the [`HotKv`] trait using MDBX as +//! the underlying database. It includes functionality for opening and +//! managing the MDBX environment, handling read-only and read-write +//! transactions, and managing database tables. +//! +//! The [`DatabaseEnv`] struct encapsulates the MDBX environment and provides +//! methods for starting transactions. The [`DatabaseArguments`] struct +//! allows for configuring various parameters of the database environment, +//! such as geometry, sync mode, and maximum readers. +//! +//! ### Table Metadata +//! +//! This implementation uses the default MDBX table to store metadata about +//! each table, including whether it uses dual keys or fixed-size values. This +//! metadata is cached in memory for efficient access during the lifetime of +//! the environment. Each time a table is opened, its metadata is checked +//! against the cached values to ensure consistency. +//! +//! Rought Edges: +//! - The cache does not respect dropped transactions. Creating multiple tables +//! with the same name but different metadata in different transactions +//! may lead to inconsistencies. +//! - Tables created outside of this implementation (e.g., via external tools) +//! will not have their metadata cached, which may lead to inconsistencies if +//! the same table is later opened with different metadata. +//! +//! Overall, we do NOT recommend using this to open existing databases that +//! were not created and managed by this implementation. + +use reth_db::lockfile::StorageLock; +use reth_libmdbx::{ + Environment, EnvironmentFlags, Geometry, HandleSlowReadersReturnCode, + MaxReadTransactionDuration, Mode, PageSize, RO, RW, SyncMode, ffi, +}; +use std::{ + ops::{Deref, Range}, + path::Path, + sync::Arc, +}; + +mod cursor; +pub use cursor::{Cursor, CursorRO, CursorRW}; + +mod db_info; +pub use db_info::{DbCache, DbInfo, FixedSizeInfo}; + +mod error; +pub use error::MdbxError; + +#[cfg(any(test, feature = "test-utils"))] +pub mod test_utils; + +mod tx; +pub use tx::Tx; + +use crate::hot::model::{HotKv, HotKvError}; + +mod utils; + +/// 1 KB in bytes +pub const KILOBYTE: usize = 1024; +/// 1 MB in bytes +pub const MEGABYTE: usize = KILOBYTE * 1024; +/// 1 GB in bytes +pub const GIGABYTE: usize = MEGABYTE * 1024; +/// 1 TB in bytes +pub const TERABYTE: usize = GIGABYTE * 1024; + +/// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`), but we limit it to slightly below that +const DEFAULT_MAX_READERS: u64 = 32_000; + +/// Space that a read-only transaction can occupy until the warning is emitted. +/// See [`reth_libmdbx::EnvironmentBuilder::set_handle_slow_readers`] for more information. +const MAX_SAFE_READER_SPACE: usize = 10 * GIGABYTE; + +/// Environment used when opening a MDBX environment. Read-only or Read-write. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum DatabaseEnvKind { + /// Read-only MDBX environment. + RO, + /// Read-write MDBX environment. + RW, +} + +impl DatabaseEnvKind { + /// Returns `true` if the environment is read-write. + pub const fn is_rw(&self) -> bool { + matches!(self, Self::RW) + } +} + +/// Arguments for database initialization. +#[derive(Clone, Debug)] +pub struct DatabaseArguments { + /// Database geometry settings. + geometry: Geometry>, + + /// Maximum duration of a read transaction. If [None], the default value is used. + max_read_transaction_duration: Option, + /// Open environment in exclusive/monopolistic mode. If [None], the default value is used. + /// + /// This can be used as a replacement for `MDB_NOLOCK`, which don't supported by MDBX. In this + /// way, you can get the minimal overhead, but with the correct multi-process and multi-thread + /// locking. + /// + /// If `true` = open environment in exclusive/monopolistic mode or return `MDBX_BUSY` if + /// environment already used by other process. The main feature of the exclusive mode is the + /// ability to open the environment placed on a network share. + /// + /// If `false` = open environment in cooperative mode, i.e. for multi-process + /// access/interaction/cooperation. The main requirements of the cooperative mode are: + /// - Data files MUST be placed in the LOCAL file system, but NOT on a network share. + /// - Environment MUST be opened only by LOCAL processes, but NOT over a network. + /// - OS kernel (i.e. file system and memory mapping implementation) and all processes that + /// open the given environment MUST be running in the physically single RAM with + /// cache-coherency. The only exception for cache-consistency requirement is Linux on MIPS + /// architecture, but this case has not been tested for a long time). + /// + /// This flag affects only at environment opening but can't be changed after. + exclusive: Option, + /// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`). This arg is to configure the max + /// readers. + max_readers: Option, + /// Defines the synchronization strategy used by the MDBX database when writing data to disk. + /// + /// This determines how aggressively MDBX ensures data durability versus prioritizing + /// performance. The available modes are: + /// + /// - [`SyncMode::Durable`]: Ensures all transactions are fully flushed to disk before they are + /// considered committed. This provides the highest level of durability and crash safety + /// but may have a performance cost. + /// - [`SyncMode::SafeNoSync`]: Skips certain fsync operations to improve write performance. + /// This mode still maintains database integrity but may lose the most recent transactions if + /// the system crashes unexpectedly. + /// + /// Choose `Durable` if consistency and crash safety are critical (e.g., production + /// environments). Choose `SafeNoSync` if performance is more important and occasional data + /// loss is acceptable (e.g., testing or ephemeral data). + sync_mode: SyncMode, +} + +impl Default for DatabaseArguments { + fn default() -> Self { + Self::new() + } +} + +impl DatabaseArguments { + /// Create new database arguments with given client version. + pub fn new() -> Self { + Self { + geometry: Geometry { + size: Some(0..(8 * TERABYTE)), + growth_step: Some(4 * GIGABYTE as isize), + shrink_threshold: Some(0), + page_size: Some(PageSize::Set(utils::default_page_size())), + }, + max_read_transaction_duration: None, + exclusive: None, + max_readers: None, + sync_mode: SyncMode::Durable, + } + } + + /// Sets the upper size limit of the db environment, the maximum database size in bytes. + pub const fn with_geometry_max_size(mut self, max_size: Option) -> Self { + if let Some(max_size) = max_size { + self.geometry.size = Some(0..max_size); + } + self + } + + /// Sets the database page size value. + pub const fn with_geometry_page_size(mut self, page_size: Option) -> Self { + if let Some(size) = page_size { + self.geometry.page_size = Some(reth_libmdbx::PageSize::Set(size)); + } + + self + } + + /// Sets the database sync mode. + pub const fn with_sync_mode(mut self, sync_mode: Option) -> Self { + if let Some(sync_mode) = sync_mode { + self.sync_mode = sync_mode; + } + + self + } + + /// Configures the database growth step in bytes. + pub const fn with_growth_step(mut self, growth_step: Option) -> Self { + if let Some(growth_step) = growth_step { + self.geometry.growth_step = Some(growth_step as isize); + } + self + } + + /// Set the maximum duration of a read transaction. + pub const fn max_read_transaction_duration( + &mut self, + max_read_transaction_duration: Option, + ) { + self.max_read_transaction_duration = max_read_transaction_duration; + } + + /// Set the maximum duration of a read transaction. + pub const fn with_max_read_transaction_duration( + mut self, + max_read_transaction_duration: Option, + ) -> Self { + self.max_read_transaction_duration(max_read_transaction_duration); + self + } + + /// Set the mdbx exclusive flag. + pub const fn with_exclusive(mut self, exclusive: Option) -> Self { + self.exclusive = exclusive; + self + } + + /// Set `max_readers` flag. + pub const fn with_max_readers(mut self, max_readers: Option) -> Self { + self.max_readers = max_readers; + self + } + + /// Open a read-only database at `path` with the current arguments + pub fn open_ro(self, path: &Path) -> Result { + DatabaseEnv::open(path, DatabaseEnvKind::RO, self) + } + + /// Open a read-write database at `path` with the current arguments + pub fn open_rw(self, path: &Path) -> Result { + DatabaseEnv::open(path, DatabaseEnvKind::RW, self) + } +} + +/// MDBX database environment. Wraps the low-level [Environment], and +/// implements the [`HotKv`] trait. + +#[derive(Debug)] +pub struct DatabaseEnv { + /// Libmdbx-sys environment. + inner: Environment, + /// Opened db info for reuse. + /// + /// Important: Do not manually close these DBIs, like via `mdbx_dbi_close`. + /// More generally, do not dynamically create, re-open, or drop tables at + /// runtime. It's better to perform table creation and migration only once + /// at startup. + db_cache: DbCache, + + /// Write lock for when dealing with a read-write environment. + _lock_file: Option, +} + +impl DatabaseEnv { + /// Opens the database at the specified path with the given `EnvKind`. + /// Acquires a lock file if opening in read-write mode. + pub fn open( + path: &Path, + kind: DatabaseEnvKind, + args: DatabaseArguments, + ) -> Result { + let _lock_file = if kind.is_rw() { Some(StorageLock::try_acquire(path)?) } else { None }; + + let mut inner_env = Environment::builder(); + + let mode = match kind { + DatabaseEnvKind::RO => Mode::ReadOnly, + DatabaseEnvKind::RW => { + // enable writemap mode in RW mode + inner_env.write_map(); + Mode::ReadWrite { sync_mode: args.sync_mode } + } + }; + + inner_env.set_max_dbs(256); + inner_env.set_geometry(args.geometry); + + fn is_current_process(id: u32) -> bool { + #[cfg(unix)] + { + id == std::os::unix::process::parent_id() || id == std::process::id() + } + + #[cfg(not(unix))] + { + id == std::process::id() + } + } + + extern "C" fn handle_slow_readers( + _env: *const ffi::MDBX_env, + _txn: *const ffi::MDBX_txn, + process_id: ffi::mdbx_pid_t, + thread_id: ffi::mdbx_tid_t, + read_txn_id: u64, + gap: std::ffi::c_uint, + space: usize, + retry: std::ffi::c_int, + ) -> HandleSlowReadersReturnCode { + if space > MAX_SAFE_READER_SPACE { + let message = if is_current_process(process_id as u32) { + "Current process has a long-lived database transaction that grows the database file." + } else { + "External process has a long-lived database transaction that grows the database file. \ + Use shorter-lived read transactions or shut down the node." + }; + tracing::warn!( + target: "storage::db::mdbx", + ?process_id, + ?thread_id, + ?read_txn_id, + ?gap, + ?space, + ?retry, + "{message}" + ) + } + + reth_libmdbx::HandleSlowReadersReturnCode::ProceedWithoutKillingReader + } + inner_env.set_handle_slow_readers(handle_slow_readers); + + inner_env.set_flags(EnvironmentFlags { + mode, + // We disable readahead because it improves performance for linear scans, but + // worsens it for random access (which is our access pattern outside of sync) + no_rdahead: true, + coalesce: true, + exclusive: args.exclusive.unwrap_or_default(), + ..Default::default() + }); + // Configure more readers + inner_env.set_max_readers(args.max_readers.unwrap_or(DEFAULT_MAX_READERS)); + // This parameter sets the maximum size of the "reclaimed list", and the unit of measurement + // is "pages". Reclaimed list is the list of freed pages that's populated during the + // lifetime of DB transaction, and through which MDBX searches when it needs to insert new + // record with overflow pages. The flow is roughly the following: + // 0. We need to insert a record that requires N number of overflow pages (in consecutive + // sequence inside the DB file). + // 1. Get some pages from the freelist, put them into the reclaimed list. + // 2. Search through the reclaimed list for the sequence of size N. + // 3. a. If found, return the sequence. + // 3. b. If not found, repeat steps 1-3. If the reclaimed list size is larger than + // the `rp augment limit`, stop the search and allocate new pages at the end of the file: + // https://github.com/paradigmxyz/reth/blob/2a4c78759178f66e30c8976ec5d243b53102fc9a/crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c#L11479-L11480. + // + // Basically, this parameter controls for how long do we search through the freelist before + // trying to allocate new pages. Smaller value will make MDBX to fallback to + // allocation faster, higher value will force MDBX to search through the freelist + // longer until the sequence of pages is found. + // + // The default value of this parameter is set depending on the DB size. The bigger the + // database, the larger is `rp augment limit`. + // https://github.com/paradigmxyz/reth/blob/2a4c78759178f66e30c8976ec5d243b53102fc9a/crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c#L10018-L10024. + // + // Previously, MDBX set this value as `256 * 1024` constant. Let's fallback to this, + // because we want to prioritize freelist lookup speed over database growth. + // https://github.com/paradigmxyz/reth/blob/fa2b9b685ed9787636d962f4366caf34a9186e66/crates/storage/libmdbx-rs/mdbx-sys/libmdbx/mdbx.c#L16017. + inner_env.set_rp_augment_limit(256 * 1024); + + if let Some(max_read_transaction_duration) = args.max_read_transaction_duration { + inner_env.set_max_read_transaction_duration(max_read_transaction_duration); + } + + let env = Self { inner: inner_env.open(path)?, db_cache: Arc::default(), _lock_file }; + + Ok(env) + } + + /// Start a new read-only transaction. + fn tx(&self) -> Result, MdbxError> { + self.inner + .begin_ro_txn() + .map(|tx| Tx::new(tx, self.db_cache.clone())) + .map_err(MdbxError::Mdbx) + } + + /// Start a new read-write transaction. + fn tx_mut(&self) -> Result, MdbxError> { + self.inner + .begin_rw_txn() + .map(|tx| Tx::new(tx, self.db_cache.clone())) + .map_err(MdbxError::Mdbx) + } +} + +impl Deref for DatabaseEnv { + type Target = Environment; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl HotKv for DatabaseEnv { + type RoTx = Tx; + type RwTx = Tx; + + fn reader(&self) -> Result { + self.tx().map_err(HotKvError::from_err) + } + + fn writer(&self) -> Result { + self.tx_mut().map_err(HotKvError::from_err) + } +} diff --git a/crates/storage/src/hot/impls/mdbx.rs b/crates/storage/src/hot/impls/mdbx/test_utils.rs similarity index 60% rename from crates/storage/src/hot/impls/mdbx.rs rename to crates/storage/src/hot/impls/mdbx/test_utils.rs index 31c642e..cec09dc 100644 --- a/crates/storage/src/hot/impls/mdbx.rs +++ b/crates/storage/src/hot/impls/mdbx/test_utils.rs @@ -1,402 +1,80 @@ -use crate::{ - hot::model::{ - DualKeyValue, DualKeyedTraverse, DualTableTraverse, HotKv, HotKvError, HotKvRead, - HotKvReadError, HotKvWrite, KvTraverse, KvTraverseMut, RawDualKeyValue, RawKeyValue, - RawValue, - }, - ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}, - tables::{DualKeyed, MAX_FIXED_VAL_SIZE}, -}; -use bytes::{BufMut, BytesMut}; -use reth_db::{ - Database, DatabaseEnv, - mdbx::{RW, TransactionKind, WriteFlags, tx::Tx}, -}; -use reth_db_api::DatabaseError; -use reth_libmdbx::{Cursor, DatabaseFlags, RO}; -use std::borrow::Cow; - -/// Error type for reth-libmdbx based hot storage. -#[derive(Debug, thiserror::Error)] -pub enum MdbxError { - /// Inner error - #[error(transparent)] - Mdbx(#[from] reth_libmdbx::Error), - - /// Reth error. - #[error(transparent)] - Reth(#[from] DatabaseError), - - /// Deser. - #[error(transparent)] - Deser(#[from] DeserError), -} - -impl HotKvReadError for MdbxError { - fn into_hot_kv_error(self) -> HotKvError { - match self { - MdbxError::Mdbx(e) => HotKvError::from_err(e), - MdbxError::Deser(e) => HotKvError::Deser(e), - MdbxError::Reth(e) => HotKvError::from_err(e), - } - } -} - -impl From for DatabaseError { - fn from(value: DeserError) -> Self { - DatabaseError::Other(value.to_string()) - } -} - -impl HotKv for DatabaseEnv { - type RoTx = Tx; - type RwTx = Tx; - - fn reader(&self) -> Result { - self.tx().map_err(HotKvError::from_err) - } - - fn writer(&self) -> Result { - self.tx_mut().map_err(HotKvError::from_err) - } -} - -impl HotKvRead for Tx -where - K: TransactionKind, -{ - type Error = MdbxError; - - type Traverse<'a> = Cursor; - - fn raw_traverse<'a>(&'a self, table: &str) -> Result, Self::Error> { - let dbi = self.get_dbi_raw(table)?; - let cursor = self.inner.cursor(dbi)?; - - Ok(cursor) - } - - fn raw_get<'a>( - &'a self, - table: &str, - key: &[u8], - ) -> Result>, Self::Error> { - let dbi = self.get_dbi_raw(table)?; - - self.inner.get(dbi, key.as_ref()).map_err(MdbxError::Mdbx) - } - - fn raw_get_dual<'a>( - &'a self, - _table: &str, - _key1: &[u8], - _key2: &[u8], - ) -> Result>, Self::Error> { - unimplemented!("Not implemented: raw_get_dual. Use get_dual instead."); - } - - fn get_dual( - &self, - key1: &T::Key, - key2: &T::Key2, - ) -> Result, Self::Error> { - let dbi = self.get_dbi_raw(T::NAME)?; - let mut cursor = self.inner.cursor(dbi)?; - - DualTableTraverse::::exact_dual(&mut cursor, key1, key2) - } -} - -impl HotKvWrite for Tx { - type TraverseMut<'a> = Cursor; - - fn raw_traverse_mut<'a>( - &'a mut self, - table: &str, - ) -> Result, Self::Error> { - let dbi = self.get_dbi_raw(table)?; - let cursor = self.inner.cursor(dbi)?; - - Ok(cursor) - } +//! Utilities for testing MDBX storage implementation. - fn queue_raw_put(&mut self, table: &str, key: &[u8], value: &[u8]) -> Result<(), Self::Error> { - let dbi = self.get_dbi_raw(table)?; - - self.inner.put(dbi, key, value, WriteFlags::UPSERT).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_put_dual( - &mut self, - _table: &str, - _key1: &[u8], - _key2: &[u8], - _value: &[u8], - ) -> Result<(), Self::Error> { - unimplemented!("Not implemented: queue_raw_put_dual. Use queue_put_dual instead."); - } - - // Specialized put for dual-keyed tables. - fn queue_put_dual( - &mut self, - key1: &T::Key, - key2: &T::Key2, - value: &T::Value, - ) -> Result<(), Self::Error> { - let k2_size = ::SIZE; - let mut scratch = [0u8; MAX_KEY_SIZE]; - - // This will be the total length of key2 + value, reserved in mdbx - let encoded_len = k2_size + value.encoded_size(); - - // Prepend the value with k2. - let mut buf = BytesMut::with_capacity(encoded_len); - let encoded_k2 = key2.encode_key(&mut scratch); - buf.put_slice(encoded_k2); - value.encode_value_to(&mut buf); - - let encoded_k1 = key1.encode_key(&mut scratch); - // NB: DUPSORT and RESERVE are incompatible :( - let dbi = self.get_dbi_raw(T::NAME)?; - self.inner.put(dbi, encoded_k1, &buf, Default::default())?; - - Ok(()) - } - - fn queue_raw_delete(&mut self, table: &str, key: &[u8]) -> Result<(), Self::Error> { - let dbi = self.get_dbi_raw(table)?; - self.inner.del(dbi, key, None).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_clear(&mut self, table: &str) -> Result<(), Self::Error> { - let dbi = self.get_dbi_raw(table)?; - self.inner.clear_db(dbi).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_create( - &mut self, - table: &str, - dual_key: bool, - fixed_val: bool, - ) -> Result<(), Self::Error> { - let mut flags = DatabaseFlags::default(); - - if dual_key { - flags.set(reth_libmdbx::DatabaseFlags::DUP_SORT, true); - if fixed_val { - flags.set(reth_libmdbx::DatabaseFlags::DUP_FIXED, true); - } - } - - self.inner.create_db(Some(table), flags).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn raw_commit(self) -> Result<(), Self::Error> { - // when committing, mdbx returns true on failure - self.inner.commit().map(drop).map_err(MdbxError::Mdbx) - } -} - -impl KvTraverse for Cursor -where - K: TransactionKind, -{ - fn first<'a>(&'a mut self) -> Result>, MdbxError> { - Cursor::first(self).map_err(MdbxError::Mdbx) - } - - fn last<'a>(&'a mut self) -> Result>, MdbxError> { - Cursor::last(self).map_err(MdbxError::Mdbx) - } - - fn exact<'a>(&'a mut self, key: &[u8]) -> Result>, MdbxError> { - Cursor::set(self, key).map_err(MdbxError::Mdbx) - } - - fn lower_bound<'a>(&'a mut self, key: &[u8]) -> Result>, MdbxError> { - Cursor::set_range(self, key).map_err(MdbxError::Mdbx) - } - - fn read_next<'a>(&'a mut self) -> Result>, MdbxError> { - Cursor::next(self).map_err(MdbxError::Mdbx) - } - - fn read_prev<'a>(&'a mut self) -> Result>, MdbxError> { - Cursor::prev(self).map_err(MdbxError::Mdbx) - } -} - -impl KvTraverseMut for Cursor { - fn delete_current(&mut self) -> Result<(), MdbxError> { - Cursor::del(self, Default::default()).map_err(MdbxError::Mdbx) - } +use crate::hot::{ + db::UnsafeDbWrite, + impls::mdbx::{DatabaseArguments, DatabaseEnv, DatabaseEnvKind}, + model::{HotKv, HotKvWrite}, + tables::{self, SingleKey, Table}, +}; +use alloy::primitives::Bytes; +use reth_libmdbx::MaxReadTransactionDuration; +use tempfile::{TempDir, tempdir}; + +// Test table definitions for traversal tests +#[derive(Debug)] +struct TestTable; + +impl Table for TestTable { + const NAME: &'static str = "mdbx_test_table"; + type Key = u64; + type Value = Bytes; } -impl DualKeyedTraverse for Cursor -where - K: TransactionKind, -{ - fn exact_dual<'a>( - &'a mut self, - _key1: &[u8], - _key2: &[u8], - ) -> Result>, MdbxError> { - unimplemented!("Use DualTableTraverse for exact_dual"); - } +impl SingleKey for TestTable {} - fn next_dual_above<'a>( - &'a mut self, - _key1: &[u8], - _key2: &[u8], - ) -> Result>, MdbxError> { - unimplemented!("Use DualTableTraverse for next_dual_above"); - } +/// Creates a temporary MDBX database for testing that will be automatically +/// cleaned up when the TempDir is dropped. +pub fn create_test_rw_db() -> (TempDir, DatabaseEnv) { + let dir = tempdir().unwrap(); - fn next_k1<'a>(&'a mut self) -> Result>, MdbxError> { - unimplemented!("Use DualTableTraverse for next_k1"); - } + let args = DatabaseArguments::new() + .with_max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)); - fn next_k2<'a>(&'a mut self) -> Result>, MdbxError> { - unimplemented!("Use DualTableTraverse for next_k2"); - } -} + let db = DatabaseEnv::open(dir.path(), DatabaseEnvKind::RW, args).unwrap(); -impl DualTableTraverse for Cursor -where - T: DualKeyed, - K: TransactionKind, -{ - fn next_dual_above( - &mut self, - key1: &T::Key, - key2: &T::Key2, - ) -> Result>, MdbxError> { - Ok(get_both_range_helper::(self, key1, key2)? - .map(T::decode_prepended_value) - .transpose()? - .map(|(k2, v)| (key1.clone(), k2, v))) - } + // Create tables from the `crate::tables::hot` module + let writer = db.writer().unwrap(); - fn next_k1(&mut self) -> Result>, MdbxError> { - let Some((k, v)) = self.next_nodup::, Cow<'_, [u8]>>()? else { - return Ok(None); - }; + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); - let k1 = T::Key::decode_key(&k)?; - let (k2, v) = T::decode_prepended_value(v)?; + writer.queue_create::().unwrap(); - Ok(Some((k1, k2, v))) - } + writer.commit().expect("Failed to commit table creation"); - fn next_k2(&mut self) -> Result>, MdbxError> { - let Some((k, v)) = self.next_dup::, Cow<'_, [u8]>>()? else { - return Ok(None); - }; - - let k = T::Key::decode_key(&k)?; - let (k2, v) = T::decode_prepended_value(v)?; - - Ok(Some((k, k2, v))) - } -} - -/// Helper to handle dup fixed value tables -fn dup_fixed_helper( - cursor: &mut Cursor, - key1: &T::Key, - key2: &T::Key2, - f: impl FnOnce(&mut Cursor, &[u8], &[u8]) -> Result, -) -> Result -where - T: DualKeyed, - K: TransactionKind, -{ - let mut key1_buf = [0u8; MAX_KEY_SIZE]; - let mut key2_buf = [0u8; MAX_KEY_SIZE]; - let key1_bytes = key1.encode_key(&mut key1_buf); - let key2_bytes = key2.encode_key(&mut key2_buf); - - // K2 slice must be EXACTLY the size of the fixed value size, if the - // table has one. This is a bit ugly, and results in an extra - // allocation for fixed-size values. This could be avoided using - // max value size. - if T::IS_FIXED_VAL { - let mut buf = [0u8; MAX_KEY_SIZE + MAX_FIXED_VAL_SIZE]; - buf[..::SIZE].copy_from_slice(key2_bytes); - - let kvs: usize = ::SIZE + T::FIXED_VAL_SIZE.unwrap(); - - f(cursor, key1_bytes, &buf[..kvs]) - } else { - f(cursor, key1_bytes, key2_bytes) - } -} - -// Helper to call get_both_range with dup fixed handling -fn get_both_range_helper<'a, T, K>( - cursor: &'a mut Cursor, - key1: &T::Key, - key2: &T::Key2, -) -> Result>, MdbxError> -where - T: DualKeyed, - K: TransactionKind, -{ - dup_fixed_helper::>>( - cursor, - key1, - key2, - |cursor, key1_bytes, key2_bytes| { - cursor.get_both_range(key1_bytes, key2_bytes).map_err(MdbxError::Mdbx) - }, - ) + (dir, db) } #[cfg(test)] mod tests { use super::*; - use crate::{ - hot::model::{HotDbWrite, HotKv, HotKvRead, HotKvWrite, TableTraverse, TableTraverseMut}, - tables::{SingleKey, Table, hot}, + use crate::hot::{ + conformance::{conformance, test_unwind_conformance}, + db::UnsafeDbWrite, + impls::mdbx::Tx, + model::{DualTableTraverse, HotKv, HotKvRead, HotKvWrite, TableTraverse, TableTraverseMut}, + tables, }; use alloy::primitives::{Address, B256, BlockNumber, Bytes, U256}; - use reth::primitives::{Account, Bytecode, Header, SealedHeader}; - use reth_db::DatabaseEnv; + use reth::primitives::{Account, Header, SealedHeader}; + use reth_libmdbx::{RO, RW}; use serial_test::serial; - - // Test table definitions for traversal tests - #[derive(Debug)] - struct TestTable; - - impl Table for TestTable { - const NAME: &'static str = "mdbx_test_table"; - type Key = u64; - type Value = Bytes; - } - - impl SingleKey for TestTable {} + use trevm::revm::bytecode::Bytecode; /// Create a temporary MDBX database for testing that will be automatically cleaned up fn run_test(f: F) { - let db = reth_db::test_utils::create_test_rw_db(); - - // Create tables from the `crate::tables::hot` module - let mut writer = db.db().writer().unwrap(); + let (dir, db) = create_test_rw_db(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); + f(&db); - writer.commit().expect("Failed to commit table creation"); - - f(db.db()); + drop(dir); } /// Create test data @@ -412,7 +90,7 @@ mod tests { fn create_test_bytecode() -> (B256, Bytecode) { let hash = B256::from_slice(&[0x2; 32]); - let code = reth::primitives::Bytecode::new_raw(vec![0x60, 0x80, 0x60, 0x40].into()); + let code = Bytecode::new_raw(vec![0x60, 0x80, 0x60, 0x40].into()); (hash, code) } @@ -442,14 +120,14 @@ mod tests { // Test HotKv::writer() and basic write operations { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); // Create tables first - writer.queue_create::().unwrap(); + writer.queue_create::().unwrap(); // Write account data - writer.queue_put::(&address, &account).unwrap(); - writer.queue_put::(&hash, &bytecode).unwrap(); + writer.queue_put::(&address, &account).unwrap(); + writer.queue_put::(&hash, &bytecode).unwrap(); // Commit the transaction writer.raw_commit().unwrap(); @@ -461,17 +139,17 @@ mod tests { // Read account data let read_account: Option = - reader.get::(&address).unwrap(); + reader.get::(&address).unwrap(); assert_eq!(read_account, Some(account)); // Read bytecode - let read_bytecode: Option = reader.get::(&hash).unwrap(); + let read_bytecode: Option = reader.get::(&hash).unwrap(); assert_eq!(read_bytecode, Some(bytecode)); // Test non-existent data let nonexistent_addr = Address::from_slice(&[0xff; 20]); let nonexistent_account: Option = - reader.get::(&nonexistent_addr).unwrap(); + reader.get::(&nonexistent_addr).unwrap(); assert_eq!(nonexistent_account, None); } } @@ -489,10 +167,10 @@ mod tests { // Test raw write operations { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); // Create table - writer.queue_raw_create(table_name, false, false).unwrap(); + writer.queue_raw_create(table_name, None, None).unwrap(); // Put raw data writer.queue_raw_put(table_name, key, value).unwrap(); @@ -514,7 +192,7 @@ mod tests { // Test raw delete { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); writer.queue_raw_delete(table_name, key).unwrap(); writer.raw_commit().unwrap(); @@ -536,16 +214,16 @@ mod tests { fn test_dual_keyed_operations_inner(db: &DatabaseEnv) { let address = Address::from_slice(&[0x1; 20]); - let storage_key = B256::from_slice(&[0x5; 32]); + let storage_key = U256::from(5); let storage_value = U256::from(999u64); // Test dual-keyed table operations { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); // Put storage data using dual keys writer - .queue_put_dual::(&address, &storage_key, &storage_value) + .queue_put_dual::(&address, &storage_key, &storage_value) .unwrap(); writer.raw_commit().unwrap(); @@ -556,8 +234,10 @@ mod tests { let reader: Tx = db.reader().unwrap(); // Read storage using dual key lookup - let read_value = - reader.get_dual::(&address, &storage_key).unwrap().unwrap(); + let read_value = reader + .get_dual::(&address, &storage_key) + .unwrap() + .unwrap(); assert_eq!(read_value, storage_value); } @@ -573,29 +253,29 @@ mod tests { // Add some data let (block_number, header) = create_test_header(); { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&block_number, &header).unwrap(); + let writer: Tx = db.writer().unwrap(); + writer.queue_put::(&block_number, &header).unwrap(); writer.raw_commit().unwrap(); } // Verify data exists { let reader: Tx = db.reader().unwrap(); - let read_header: Option
= reader.get::(&block_number).unwrap(); + let read_header: Option
= reader.get::(&block_number).unwrap(); assert_eq!(read_header, Some(header.clone())); } // Clear the table { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_clear::().unwrap(); + let writer: Tx = db.writer().unwrap(); + writer.queue_clear::().unwrap(); writer.raw_commit().unwrap(); } // Verify table is empty { let reader: Tx = db.reader().unwrap(); - let read_header: Option
= reader.get::(&block_number).unwrap(); + let read_header: Option
= reader.get::(&block_number).unwrap(); assert_eq!(read_header, None); } } @@ -623,11 +303,11 @@ mod tests { // Test batch writes { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); // Write multiple accounts for (address, account) in &accounts { - writer.queue_put::(address, account).unwrap(); + writer.queue_put::(address, account).unwrap(); } writer.raw_commit().unwrap(); @@ -639,7 +319,7 @@ mod tests { for (address, expected_account) in &accounts { let read_account: Option = - reader.get::(address).unwrap(); + reader.get::(address).unwrap(); assert_eq!(read_account.as_ref(), Some(expected_account)); } } @@ -649,7 +329,7 @@ mod tests { let reader: Tx = db.reader().unwrap(); let addresses: Vec
= accounts.iter().map(|(addr, _)| *addr).collect(); let read_accounts: Vec<(_, Option)> = - reader.get_many::(addresses.iter()).unwrap(); + reader.get_many::(addresses.iter()).unwrap(); for (i, (_, expected_account)) in accounts.iter().enumerate() { assert_eq!(read_accounts[i].1.as_ref(), Some(expected_account)); @@ -667,8 +347,8 @@ mod tests { // Setup initial data { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&address, &account).unwrap(); + let writer: Tx = db.writer().unwrap(); + writer.queue_put::(&address, &account).unwrap(); writer.raw_commit().unwrap(); } @@ -677,17 +357,17 @@ mod tests { // Modify data in a writer transaction { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); let modified_account = Account { nonce: 999, balance: U256::from(9999u64), bytecode_hash: None }; - writer.queue_put::(&address, &modified_account).unwrap(); + writer.queue_put::(&address, &modified_account).unwrap(); writer.raw_commit().unwrap(); } // Reader should still see original data (snapshot isolation) { let read_account: Option = - reader.get::(&address).unwrap(); + reader.get::(&address).unwrap(); assert_eq!(read_account, Some(account)); } @@ -695,7 +375,7 @@ mod tests { { let new_reader: Tx = db.reader().unwrap(); let read_account: Option = - new_reader.get::(&address).unwrap(); + new_reader.get::(&address).unwrap(); assert_eq!(read_account.unwrap().nonce, 999); } } @@ -710,8 +390,8 @@ mod tests { // Setup data { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&address, &account).unwrap(); + let writer: Tx = db.writer().unwrap(); + writer.queue_put::(&address, &account).unwrap(); writer.raw_commit().unwrap(); } @@ -721,9 +401,9 @@ mod tests { let reader3: Tx = db.reader().unwrap(); // All readers should see the same data - let account1: Option = reader1.get::(&address).unwrap(); - let account2: Option = reader2.get::(&address).unwrap(); - let account3: Option = reader3.get::(&address).unwrap(); + let account1: Option = reader1.get::(&address).unwrap(); + let account2: Option = reader2.get::(&address).unwrap(); + let account3: Option = reader3.get::(&address).unwrap(); assert_eq!(account1, Some(account)); assert_eq!(account2, Some(account)); @@ -751,11 +431,11 @@ mod tests { // Test writing to a table without creating it first { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); let (address, account) = create_test_account(); // This should handle the case where table doesn't exist - let result = writer.queue_put::(&address, &account); + let result = writer.queue_put::(&address, &account); match result { Ok(_) => { // If it succeeds, commit should work @@ -779,7 +459,7 @@ mod tests { let header = SealedHeader::new_unhashed(header); { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); // Write different types writer.put_header(&header).unwrap(); @@ -791,10 +471,11 @@ mod tests { let reader: Tx = db.reader().unwrap(); // Read and verify - let read_header: Option
= reader.get::(&block_number).unwrap(); + let read_header: Option
= reader.get::(&block_number).unwrap(); assert_eq!(read_header.as_ref(), Some(header.header())); - let read_hash: Option = reader.get::(&header.hash()).unwrap(); + let read_hash: Option = + reader.get::(&header.hash()).unwrap(); assert_eq!(read_hash, Some(header.number)); } } @@ -811,15 +492,15 @@ mod tests { let large_bytecode = Bytecode::new_raw(large_code_vec.clone().into()); { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_put::(&hash, &large_bytecode).unwrap(); + let writer: Tx = db.writer().unwrap(); + writer.queue_create::().unwrap(); + writer.queue_put::(&hash, &large_bytecode).unwrap(); writer.raw_commit().unwrap(); } { let reader: Tx = db.reader().unwrap(); - let read_bytecode: Option = reader.get::(&hash).unwrap(); + let read_bytecode: Option = reader.get::(&hash).unwrap(); assert_eq!(read_bytecode, Some(large_bytecode)); } } @@ -845,7 +526,7 @@ mod tests { // Insert test data { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -855,8 +536,7 @@ mod tests { // Test cursor traversal { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Test first() let first_result = TableTraverse::::first(&mut cursor).unwrap(); @@ -910,7 +590,7 @@ mod tests { // Insert test data { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -920,8 +600,7 @@ mod tests { // Test sequential navigation { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Start from first and traverse forward let mut current_idx = 0; @@ -978,7 +657,7 @@ mod tests { // Insert test data { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -988,8 +667,7 @@ mod tests { { let tx: Tx = db.writer().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Navigate to middle entry let first = TableTraverse::::first(&mut cursor).unwrap().unwrap(); @@ -1001,14 +679,14 @@ mod tests { // Delete current entry (key 2) TableTraverseMut::::delete_current(&mut cursor).unwrap(); + drop(cursor); tx.raw_commit().unwrap(); } // Verify deletion { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Should only have first and third entries let first = TableTraverse::::first(&mut cursor).unwrap().unwrap(); @@ -1051,10 +729,10 @@ mod tests { // Insert test data { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (address, account) in &test_accounts { - writer.queue_put::(address, account).unwrap(); + writer.queue_put::(address, account).unwrap(); } writer.raw_commit().unwrap(); @@ -1063,18 +741,19 @@ mod tests { // Test typed table traversal { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(hot::PlainAccountState::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Test first with type-safe operations - let first_raw = TableTraverse::::first(&mut cursor).unwrap(); + let first_raw = + TableTraverse::::first(&mut cursor).unwrap(); assert!(first_raw.is_some()); let (first_key, first_account) = first_raw.unwrap(); assert_eq!(first_key, test_accounts[0].0); assert_eq!(first_account, test_accounts[0].1); // Test last - let last_raw = TableTraverse::::last(&mut cursor).unwrap(); + let last_raw = + TableTraverse::::last(&mut cursor).unwrap(); assert!(last_raw.is_some()); let (last_key, last_account) = last_raw.unwrap(); assert_eq!(last_key, test_accounts.last().unwrap().0); @@ -1083,7 +762,7 @@ mod tests { // Test exact lookup let target_address = &test_accounts[2].0; let exact_account = - TableTraverse::::exact(&mut cursor, target_address) + TableTraverse::::exact(&mut cursor, target_address) .unwrap(); assert!(exact_account.is_some()); assert_eq!(exact_account.unwrap(), test_accounts[2].1); @@ -1093,9 +772,11 @@ mod tests { partial_addr[19] = 3; // Between entries 2 and 3 let range_addr = Address::from_slice(&partial_addr); - let range_result = - TableTraverse::::lower_bound(&mut cursor, &range_addr) - .unwrap(); + let range_result = TableTraverse::::lower_bound( + &mut cursor, + &range_addr, + ) + .unwrap(); assert!(range_result.is_some()); let (found_addr, found_account) = range_result.unwrap(); assert_eq!(found_addr, test_accounts[3].0); @@ -1112,9 +793,9 @@ mod tests { let one_addr = Address::repeat_byte(0x01); let two_addr = Address::repeat_byte(0x02); - let one_slot = B256::with_last_byte(0x01); - let two_slot = B256::with_last_byte(0x06); - let three_slot = B256::with_last_byte(0x09); + let one_slot = U256::from(0x01); + let two_slot = U256::from(0x06); + let three_slot = U256::from(0x09); let one_value = U256::from(0x100); let two_value = U256::from(0x200); @@ -1123,7 +804,7 @@ mod tests { let five_value = U256::from(0x500); // Setup test storage data - let test_storage: Vec<(Address, B256, U256)> = vec![ + let test_storage: Vec<(Address, U256, U256)> = vec![ (one_addr, one_slot, one_value), (one_addr, two_slot, two_value), (one_addr, three_slot, three_value), @@ -1133,11 +814,11 @@ mod tests { // Insert test data { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (address, storage_key, value) in &test_storage { writer - .queue_put_dual::(address, storage_key, value) + .queue_put_dual::(address, storage_key, value) .unwrap(); } @@ -1147,15 +828,14 @@ mod tests { // Test dual-keyed traversal { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(hot::PlainStorageState::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Test exact dual lookup let address = &test_storage[1].0; let storage_key = &test_storage[1].1; let expected_value = &test_storage[1].2; - let exact_result = DualTableTraverse::::exact_dual( + let exact_result = DualTableTraverse::::exact_dual( &mut cursor, address, storage_key, @@ -1165,8 +845,8 @@ mod tests { assert_eq!(exact_result, *expected_value); // Test range lookup for dual keys - let search_key = B256::with_last_byte(0x02); - let range_result = DualTableTraverse::::next_dual_above( + let search_key = U256::from(0x02); + let range_result = DualTableTraverse::::next_dual_above( &mut cursor, &test_storage[0].0, // Address 0x01 &search_key, @@ -1181,7 +861,7 @@ mod tests { // Test next_k1 (move to next primary key) // First position cursor at first entry of first address - DualTableTraverse::::exact_dual( + DualTableTraverse::::exact_dual( &mut cursor, &test_storage[0].0, &test_storage[0].1, @@ -1190,7 +870,7 @@ mod tests { // Move to next primary key (different address) let next_k1_result = - DualTableTraverse::::next_k1(&mut cursor).unwrap(); + DualTableTraverse::::next_k1(&mut cursor).unwrap(); assert!(next_k1_result.is_some()); let (next_addr, next_storage_key, next_value) = next_k1_result.unwrap(); assert_eq!(next_addr, test_storage[3].0); // Address 0x02 @@ -1207,27 +887,26 @@ mod tests { fn test_dual_table_traverse_empty_results_inner(db: &DatabaseEnv) { // Setup minimal test data let address = Address::from_slice(&[0x01; 20]); - let storage_key = B256::from_slice(&[0x01; 32]); + let storage_key = U256::from(1); let value = U256::from(100); { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); writer - .queue_put_dual::(&address, &storage_key, &value) + .queue_put_dual::(&address, &storage_key, &value) .unwrap(); writer.raw_commit().unwrap(); } { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(hot::PlainStorageState::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Test exact lookup for non-existent dual key let missing_addr = Address::from_slice(&[0xFF; 20]); - let missing_key = B256::from_slice(&[0xFF; 32]); + let missing_key = U256::from(0xFF); - let exact_missing = DualTableTraverse::::exact_dual( + let exact_missing = DualTableTraverse::::exact_dual( &mut cursor, &missing_addr, &missing_key, @@ -1236,8 +915,8 @@ mod tests { assert!(exact_missing.is_none()); // Test range lookup beyond all data - let beyond_key = B256::from_slice(&[0xFF; 32]); - let range_missing = DualTableTraverse::::next_dual_above( + let beyond_key = U256::MAX; + let range_missing = DualTableTraverse::::next_dual_above( &mut cursor, &address, &beyond_key, @@ -1246,7 +925,7 @@ mod tests { assert!(range_missing.is_none()); // Position at the only entry, then try next_k1 - DualTableTraverse::::exact_dual( + DualTableTraverse::::exact_dual( &mut cursor, &address, &storage_key, @@ -1254,7 +933,7 @@ mod tests { .unwrap(); let next_k1_missing = - DualTableTraverse::::next_k1(&mut cursor).unwrap(); + DualTableTraverse::::next_k1(&mut cursor).unwrap(); assert!(next_k1_missing.is_none()); } } @@ -1268,8 +947,7 @@ mod tests { // TestTable is already created but empty { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // All operations should return None on empty table assert!(TableTraverse::::first(&mut cursor).unwrap().is_none()); @@ -1296,7 +974,7 @@ mod tests { ]; { - let mut writer: Tx = db.writer().unwrap(); + let writer: Tx = db.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1305,8 +983,7 @@ mod tests { { let tx: Tx = db.reader().unwrap(); - let dbi = tx.get_dbi_raw(TestTable::NAME).unwrap(); - let mut cursor = tx.inner.cursor(dbi).unwrap(); + let mut cursor = tx.new_cursor::().unwrap(); // Test that cursor operations maintain state correctly @@ -1349,4 +1026,81 @@ mod tests { assert_eq!(next_after_range.0, test_data[1].0); } } + + #[test] + fn mdbx_conformance() { + run_test(conformance) + } + + #[test] + fn test_cache_db_info() { + run_test(test_cache_db_info_inner) + } + + fn test_cache_db_info_inner(db: &DatabaseEnv) { + // Tables are already created in create_test_rw_db() + // Try to get cache_db_info for an existing table + let reader: Tx = db.reader().unwrap(); + + // This should work - Headers table was created in setup + reader.cache_db_info::().unwrap(); + + // Try with TestTable which was also created + reader.cache_db_info::().unwrap(); + + // Use a DUP_FIXED table and assert the result contains the expected + // flags + let result3 = reader.cache_db_info::().unwrap(); + assert!(result3.is_dupfixed()); + } + + #[test] + fn test_storage_roundtrip_debug() { + run_test(test_storage_roundtrip_debug_inner) + } + + fn test_storage_roundtrip_debug_inner(db: &DatabaseEnv) { + use alloy::primitives::address; + + let addr = address!("0xabcdef0123456789abcdef0123456789abcdef01"); + let slot = U256::from(1); + let value = U256::from(999); + + // Write storage + { + let writer: Tx = db.writer().unwrap(); + + // Check db_info before write + { + let db_info = writer.cache_db_info::().unwrap(); + assert!(db_info.is_dupfixed()); + } + + writer.queue_put_dual::(&addr, &slot, &value).unwrap(); + writer.raw_commit().unwrap(); + } + + // Read storage + { + let reader: Tx = db.reader().unwrap(); + + // Check db_info after write + { + let db_info = reader.cache_db_info::().unwrap(); + assert!(db_info.is_dupfixed()); + } + + let read_value = reader.get_dual::(&addr, &slot).unwrap(); + assert!(read_value.is_some()); + assert_eq!(read_value.unwrap(), U256::from(999)); + } + } + + #[test] + #[serial] + fn mdbx_unwind_conformance() { + let (_dir_a, db_a) = create_test_rw_db(); + let (_dir_b, db_b) = create_test_rw_db(); + test_unwind_conformance(&db_a, &db_b); + } } diff --git a/crates/storage/src/hot/impls/mdbx/tx.rs b/crates/storage/src/hot/impls/mdbx/tx.rs new file mode 100644 index 0000000..9385955 --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/tx.rs @@ -0,0 +1,353 @@ +//! Transaction wrapper for libmdbx-sys. +use crate::hot::{ + KeySer, MAX_FIXED_VAL_SIZE, MAX_KEY_SIZE, ValSer, + impls::mdbx::{Cursor, DbCache, DbInfo, FixedSizeInfo, MdbxError}, + model::{DualTableTraverse, HotKvRead, HotKvWrite}, + tables::{DualKey, SingleKey, Table}, +}; +use alloy::primitives::B256; +use dashmap::mapref::one::Ref; +use reth_libmdbx::{DatabaseFlags, RW, Transaction, TransactionKind, WriteFlags}; +use std::borrow::Cow; + +const TX_BUFFER_SIZE: usize = MAX_KEY_SIZE + MAX_FIXED_VAL_SIZE; + +/// Wrapper for the libmdbx transaction. +#[derive(Debug)] +pub struct Tx { + /// Libmdbx-sys transaction. + pub inner: Transaction, + + /// Cached MDBX DBIs for reuse. + dbs: DbCache, +} + +impl Tx { + /// Creates new `Tx` object with a `RO` or `RW` transaction and optionally enables metrics. + #[inline] + pub(crate) const fn new(inner: Transaction, dbis: DbCache) -> Self { + Self { inner, dbs: dbis } + } + + /// Gets the database handle for the DbInfo table. + fn db_info_table_dbi(&self) -> Result { + self.inner.open_db(None).map(|db| db.dbi()).map_err(MdbxError::Mdbx) + } + + fn read_db_info_table(&self, name: &'static str) -> Result { + let mut key = B256::ZERO; + let to_copy = core::cmp::min(32, name.len()); + key[..to_copy].copy_from_slice(&name.as_bytes()[..to_copy]); + + let db_info_dbi = self.db_info_table_dbi()?; + self.inner + .get::>(db_info_dbi, key.as_slice())? + .as_deref() + .map(DbInfo::decode_value) + .transpose() + .map_err(MdbxError::Deser)? + .ok_or(MdbxError::UnknownTable(name)) + } + + /// Cache the database info for a specific table by name. + pub fn cache_db_info_raw( + &self, + table: &'static str, + ) -> Result, MdbxError> { + if let Some(info) = self.dbs.get(table) { + return Ok(info); + } + + let db_info = self.read_db_info_table(table)?; + + self.dbs.insert(table, db_info); + Ok(self.dbs.get(table).expect("Just inserted")) + } + + /// Caches the database info for a specific table. + pub fn cache_db_info(&self) -> Result, MdbxError> { + self.cache_db_info_raw(T::NAME) + } + + /// Gets the database handle for the given table name. + pub fn get_dbi_raw(&self, table: &'static str) -> Result { + self.cache_db_info_raw(table).map(|info| info.dbi()) + } + + /// Gets the database handle for the given table. + pub fn get_dbi(&self) -> Result { + self.get_dbi_raw(T::NAME) + } + + /// Gets this transaction ID. + pub fn id(&self) -> Result { + self.inner.id().map_err(MdbxError::Mdbx) + } + + /// Create [`Cursor`] for raw table name. + pub fn new_cursor_raw<'a>(&'a self, name: &'static str) -> Result, MdbxError> { + let info = self.cache_db_info_raw(name)?; + + let inner = self.inner.cursor_with_dbi(info.dbi())?; + + Ok(Cursor::new(inner, info)) + } + + /// Create a [`Cursor`] for the given table. + pub fn new_cursor<'a, T: Table>(&'a self) -> Result, MdbxError> { + Self::new_cursor_raw(self, T::NAME) + } +} + +impl Tx { + fn store_db_info(&self, table: &'static str, db_info: DbInfo) -> Result<(), MdbxError> { + // This needs to be low-level to avoid issues + let dbi = self.db_info_table_dbi()?; + + // reuse the scratch buffer for encoding the DbInfo key + // The first 32 bytes are for the key, the rest for the value + + // SAFETY: The write buffer cannot be aliased while we have &self + + let mut key_buf = [0u8; MAX_KEY_SIZE]; + let mut value_buf: &mut [u8] = &mut [0u8; MAX_FIXED_VAL_SIZE]; + + { + let to_copy = core::cmp::min(32, table.len()); + key_buf[..to_copy].copy_from_slice(&table.as_bytes()[..to_copy]); + key_buf[to_copy..32].fill(0); + } + { + db_info.encode_value_to(&mut value_buf); + } + + self.inner + .put(dbi, key_buf, &value_buf[..db_info.encoded_size()], WriteFlags::UPSERT) + .map(|_| ()) + .map_err(MdbxError::Mdbx)?; + self.dbs.insert(table, db_info); + + Ok(()) + } +} + +impl HotKvRead for Tx +where + K: TransactionKind, +{ + type Error = MdbxError; + + type Traverse<'a> = Cursor<'a, K>; + + fn raw_traverse<'a>(&'a self, table: &'static str) -> Result, Self::Error> { + self.new_cursor_raw(table) + } + + fn raw_get<'a>( + &'a self, + table: &'static str, + key: &[u8], + ) -> Result>, Self::Error> { + let dbi = self.get_dbi_raw(table)?; + + self.inner.get(dbi, key.as_ref()).map_err(MdbxError::Mdbx) + } + + fn raw_get_dual<'a>( + &'a self, + _table: &'static str, + _key1: &[u8], + _key2: &[u8], + ) -> Result>, Self::Error> { + unimplemented!("Use DualTableTraverse for raw_get_dual"); + } + + fn get_dual( + &self, + key1: &T::Key, + key2: &T::Key2, + ) -> Result, Self::Error> { + let mut cursor = self.new_cursor::()?; + + DualTableTraverse::::exact_dual(&mut cursor, key1, key2) + } +} + +impl HotKvWrite for Tx { + type TraverseMut<'a> = Cursor<'a, RW>; + + fn raw_traverse_mut<'a>( + &'a self, + table: &'static str, + ) -> Result, Self::Error> { + self.new_cursor_raw(table) + } + + fn queue_raw_put( + &self, + table: &'static str, + key: &[u8], + value: &[u8], + ) -> Result<(), Self::Error> { + let dbi = self.get_dbi_raw(table)?; + + self.inner.put(dbi, key, value, WriteFlags::UPSERT).map(|_| ()).map_err(MdbxError::Mdbx) + } + + fn queue_raw_put_dual( + &self, + table: &'static str, + key1: &[u8], + key2: &[u8], + value: &[u8], + ) -> Result<(), Self::Error> { + // Get the DBI and release the borrow, allowing us to write to buf + let db_info = self.cache_db_info_raw(table)?; + let fsi = db_info.dup_fixed_val_size(); + let dbi = db_info.dbi(); + drop(db_info); + + // For DUPSORT tables, we must delete any existing entry with the same + // (key1, key2) before inserting, because MDBX stores key2 as part of + // the value (key2||actual_value). Without deletion, putting a new value + // for the same key2 creates a duplicate entry instead of replacing. + if fsi.is_dupsort() { + // Prepare search value (key2, optionally padded for DUP_FIXED) + let mut search_buf = [0u8; TX_BUFFER_SIZE]; + let search_val = if let Some(ts) = fsi.total_size() { + search_buf[..key2.len()].copy_from_slice(key2); + search_buf[key2.len()..ts].fill(0); + &search_buf[..ts] + } else { + key2 + }; + + // get_both_range finds entry where key=key1 and value >= search_val + // If found and the key2 portion matches, delete it + let mut cursor = self.inner.cursor_with_dbi(dbi).map_err(MdbxError::Mdbx)?; + if let Some(found_val) = + cursor.get_both_range::>(key1, search_val).map_err(MdbxError::Mdbx)? + { + // Check if found value starts with our key2 + if found_val.starts_with(key2) { + cursor.del(Default::default()).map_err(MdbxError::Mdbx)?; + } + } + } + + // For DUPSORT tables, the "value" is key2 concatenated with the actual + // value. + // If the value is fixed size, we can write directly into our scratch + // buffer. Otherwise, we need to allocate + // + // NB: DUPSORT and RESERVE are incompatible :( + if key2.len() + value.len() > TX_BUFFER_SIZE { + // Allocate a buffer for the combined value + let mut combined = Vec::with_capacity(key2.len() + value.len()); + combined.extend_from_slice(key2); + combined.extend_from_slice(value); + return self + .inner + .put(dbi, key1, &combined, WriteFlags::default()) + .map(|_| ()) + .map_err(MdbxError::Mdbx); + } else { + // Use the scratch buffer + let mut buffer = [0u8; TX_BUFFER_SIZE]; + let buf = &mut buffer[..key2.len() + value.len()]; + buf[..key2.len()].copy_from_slice(key2); + buf[key2.len()..].copy_from_slice(value); + self.inner.put(dbi, key1, buf, Default::default())?; + } + + Ok(()) + } + + fn queue_raw_delete(&self, table: &'static str, key: &[u8]) -> Result<(), Self::Error> { + let dbi = self.get_dbi_raw(table)?; + self.inner.del(dbi, key, None).map(|_| ()).map_err(MdbxError::Mdbx) + } + + fn queue_raw_delete_dual( + &self, + table: &'static str, + key1: &[u8], + key2: &[u8], + ) -> Result<(), Self::Error> { + // Get the table info, then release the borrow + let db_info = self.cache_db_info_raw(table)?; + let fixed_val = db_info.dup_fixed_val_size(); + let dbi = db_info.dbi(); + drop(db_info); + + // For DUPSORT tables, the "value" is key2 concatenated with the actual + // value. If the table is ALSO dupfixed, we need to pad key2 to the + // fixed size + if let Some(total_size) = fixed_val.total_size() { + // Copy key2 to scratch buffer and zero-pad to total fixed size + let mut buffer = [0u8; TX_BUFFER_SIZE]; + buffer[..key2.len()].copy_from_slice(key2); + buffer[key2.len()..total_size].fill(0); + let k2 = &buffer[..total_size]; + + self.inner.del(dbi, key1, Some(k2)).map(|_| ()).map_err(MdbxError::Mdbx) + } else { + self.inner.del(dbi, key1, Some(key2)).map(|_| ()).map_err(MdbxError::Mdbx) + } + } + + fn queue_raw_clear(&self, table: &'static str) -> Result<(), Self::Error> { + let dbi = self.get_dbi_raw(table)?; + self.inner.clear_db(dbi).map(|_| ()).map_err(MdbxError::Mdbx) + } + + fn queue_raw_create( + &self, + table: &'static str, + dual_key: Option, + fixed_val: Option, + ) -> Result<(), Self::Error> { + let mut flags = DatabaseFlags::default(); + + let mut fsi = FixedSizeInfo::None; + + if let Some(key2_size) = dual_key { + flags.set(reth_libmdbx::DatabaseFlags::DUP_SORT, true); + if let Some(value_size) = fixed_val { + flags.set(reth_libmdbx::DatabaseFlags::DUP_FIXED, true); + fsi = FixedSizeInfo::DupFixed { key2_size, total_size: key2_size + value_size }; + } else { + // DUPSORT without DUP_FIXED - variable value size + fsi = FixedSizeInfo::DupSort { key2_size }; + } + } + + // no clone. sad. + let flags2 = DatabaseFlags::from_bits(flags.bits()).unwrap(); + + self.inner.create_db(Some(table), flags2).map(|_| ())?; + let dbi = self.inner.open_db(Some(table))?.dbi(); + + let db_info = DbInfo::new(flags, dbi, fsi); + + self.store_db_info(table, db_info)?; + + Ok(()) + } + + fn queue_put(&self, key: &T::Key, value: &T::Value) -> Result<(), Self::Error> { + let dbi = self.get_dbi::()?; + let mut key_buf = [0u8; MAX_KEY_SIZE]; + let key_bytes = key.encode_key(&mut key_buf); + + self.inner + .reserve(dbi, key_bytes, value.encoded_size(), WriteFlags::UPSERT) + .map_err(MdbxError::Mdbx) + .map(|mut reserved| value.encode_value_to(&mut reserved)) + } + + fn raw_commit(self) -> Result<(), Self::Error> { + // when committing, mdbx returns true on failure + self.inner.commit().map(drop).map_err(MdbxError::Mdbx) + } +} diff --git a/crates/storage/src/hot/impls/mdbx/utils.rs b/crates/storage/src/hot/impls/mdbx/utils.rs new file mode 100644 index 0000000..6ca8dab --- /dev/null +++ b/crates/storage/src/hot/impls/mdbx/utils.rs @@ -0,0 +1,13 @@ +/// Returns the default page size that can be used in this OS. +pub(crate) fn default_page_size() -> usize { + let os_page_size = page_size::get(); + + // source: https://gitflic.ru/project/erthink/libmdbx/blob?file=mdbx.h#line-num-821 + let libmdbx_max_page_size = 0x10000; + + // May lead to errors if it's reduced further because of the potential size of the + // data. + let min_page_size = 4096; + + os_page_size.clamp(min_page_size, libmdbx_max_page_size) +} diff --git a/crates/storage/src/hot/impls/mem.rs b/crates/storage/src/hot/impls/mem.rs index ed43cd2..e747388 100644 --- a/crates/storage/src/hot/impls/mem.rs +++ b/crates/storage/src/hot/impls/mem.rs @@ -1,17 +1,20 @@ -use crate::{ - hot::model::{ - DualKeyValue, DualKeyedTraverse, DualTableTraverse, HotKv, HotKvError, HotKvRead, - HotKvReadError, HotKvWrite, KvTraverse, KvTraverseMut, RawDualKeyValue, RawKeyValue, - RawValue, +//! In-memory key-value store implementation. +//! +//! This is not a good implementation for production use, but is useful for +//! testing. + +use crate::hot::{ + model::{ + DualKeyTraverse, HotKv, HotKvError, HotKvRead, HotKvReadError, HotKvWrite, KvTraverse, + KvTraverseMut, RawDualKeyValue, RawKeyValue, RawValue, }, - ser::{DeserError, KeySer, MAX_KEY_SIZE}, - tables::DualKeyed, + ser::{DeserError, MAX_KEY_SIZE}, }; use bytes::Bytes; use std::{ borrow::Cow, collections::BTreeMap, - sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}, + sync::{Arc, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}, }; // Type aliases for store structure @@ -69,15 +72,15 @@ impl MemKv { buf } - /// SAFETY: - /// Caller must ensure that `key` lives long enough. + /// Split a dual key into its two component keys. + /// Returns owned copies of the key parts to avoid lifetime issues. #[track_caller] - fn split_dual_key<'a>(key: &[u8]) -> (Cow<'a, [u8]>, Cow<'a, [u8]>) { + fn split_dual_key(key: &[u8]) -> (Cow<'static, [u8]>, Cow<'static, [u8]>) { assert_eq!(key.len(), MAX_KEY_SIZE * 2, "Key length does not match expected dual key size"); - let k1 = &key[..MAX_KEY_SIZE]; - let k2 = &key[MAX_KEY_SIZE..]; + let k1 = key[..MAX_KEY_SIZE].to_vec(); + let k2 = key[MAX_KEY_SIZE..].to_vec(); - unsafe { std::mem::transmute((Cow::Borrowed(k1), Cow::Borrowed(k2))) } + (Cow::Owned(k1), Cow::Owned(k2)) } } @@ -108,7 +111,7 @@ unsafe impl Sync for MemKvRoTx {} /// Read-write transaction for MemKv. pub struct MemKvRwTx { guard: RwLockWriteGuard<'static, Store>, - queued_ops: OpStore, + queued_ops: Mutex, // Keep the store alive while the transaction exists _store: Arc>, @@ -116,7 +119,7 @@ pub struct MemKvRwTx { impl MemKvRwTx { fn commit_inner(&mut self) { - let ops = std::mem::take(&mut self.queued_ops); + let ops = std::mem::take(&mut *self.queued_ops.lock().unwrap()); for (table, table_op) in ops.into_iter() { table_op.apply(&table, &mut self.guard); @@ -171,13 +174,13 @@ impl QueuedKvOp { /// Queued table operation #[derive(Debug)] enum QueuedTableOp { - Modify { ops: TableOp }, - Clear { new_table: TableOp }, + Modify { ops: Mutex }, + Clear { new_table: Mutex }, } impl Default for QueuedTableOp { fn default() -> Self { - QueuedTableOp::Modify { ops: TableOp::new() } + QueuedTableOp::Modify { ops: Mutex::new(TableOp::new()) } } } @@ -186,41 +189,55 @@ impl QueuedTableOp { matches!(self, QueuedTableOp::Clear { .. }) } - fn get(&self, key: &MemStoreKey) -> Option<&QueuedKvOp> { + fn get(&self, key: &MemStoreKey) -> Option { match self { - QueuedTableOp::Modify { ops } => ops.get(key), - QueuedTableOp::Clear { new_table } => new_table.get(key), + QueuedTableOp::Modify { ops } => ops.lock().unwrap().get(key).cloned(), + QueuedTableOp::Clear { new_table } => new_table.lock().unwrap().get(key).cloned(), } } - fn put(&mut self, key: MemStoreKey, op: QueuedKvOp) { + fn put(&self, key: MemStoreKey, op: QueuedKvOp) { match self { - QueuedTableOp::Modify { ops } | QueuedTableOp::Clear { new_table: ops } => { - ops.insert(key, op); + QueuedTableOp::Modify { ops } => { + ops.lock().unwrap().insert(key, op); + } + QueuedTableOp::Clear { new_table } => { + new_table.lock().unwrap().insert(key, op); } } } - fn delete(&mut self, key: MemStoreKey) { + fn delete(&self, key: MemStoreKey) { match self { - QueuedTableOp::Modify { ops } | QueuedTableOp::Clear { new_table: ops } => { - ops.insert(key, QueuedKvOp::Delete); + QueuedTableOp::Modify { ops } => { + ops.lock().unwrap().insert(key, QueuedKvOp::Delete); + } + QueuedTableOp::Clear { new_table } => { + new_table.lock().unwrap().insert(key, QueuedKvOp::Delete); } } } - /// Get mutable reference to the inner ops if applicable + /// Get the inner ops mutex + const fn ops_mutex(&self) -> &Mutex { + match self { + QueuedTableOp::Modify { ops } => ops, + QueuedTableOp::Clear { new_table } => new_table, + } + } + + /// Apply the queued operations to the store fn apply(self, key: &str, store: &mut Store) { match self { QueuedTableOp::Modify { ops } => { let table = store.entry(key.to_owned()).or_default(); - for (key, op) in ops { + for (key, op) in ops.into_inner().unwrap() { op.apply(table, key); } } QueuedTableOp::Clear { new_table } => { let mut table = StoreTable::new(); - for (k, op) in new_table { + for (k, op) in new_table.into_inner().unwrap() { op.apply(&mut table, k); } @@ -260,7 +277,7 @@ impl HotKvReadError for MemKvError { /// Memory cursor for traversing a BTreeMap pub struct MemKvCursor<'a> { table: &'a StoreTable, - current_key: Option, + current_key: Mutex>, } impl core::fmt::Debug for MemKvCursor<'_> { @@ -272,27 +289,29 @@ impl core::fmt::Debug for MemKvCursor<'_> { impl<'a> MemKvCursor<'a> { /// Create a new cursor for the given table pub const fn new(table: &'a StoreTable) -> Self { - Self { table, current_key: None } + Self { table, current_key: Mutex::new(None) } } /// Get the current key the cursor is positioned at pub fn current_key(&self) -> MemStoreKey { - self.current_key.unwrap_or([0u8; MAX_KEY_SIZE * 2]) + self.current_key.lock().unwrap().unwrap_or([0u8; MAX_KEY_SIZE * 2]) } /// Set the current key the cursor is positioned at - pub const fn set_current_key(&mut self, key: MemStoreKey) { - self.current_key = Some(key); + pub fn set_current_key(&self, key: MemStoreKey) { + *self.current_key.lock().unwrap() = Some(key); } /// Clear the current key the cursor is positioned at - pub const fn clear_current_key(&mut self) { - self.current_key = None; + pub fn clear_current_key(&self) { + *self.current_key.lock().unwrap() = None; } /// Get the current k1 the cursor is positioned at fn current_k1(&self) -> [u8; MAX_KEY_SIZE] { self.current_key + .lock() + .unwrap() .map(|key| key[..MAX_KEY_SIZE].try_into().unwrap()) .unwrap_or([0u8; MAX_KEY_SIZE]) } @@ -304,7 +323,7 @@ impl<'a> KvTraverse for MemKvCursor<'a> { self.clear_current_key(); return Ok(None); }; - self.current_key = Some(*key); + self.set_current_key(*key); Ok(Some((Cow::Borrowed(key), Cow::Borrowed(value.as_ref())))) } @@ -313,7 +332,7 @@ impl<'a> KvTraverse for MemKvCursor<'a> { self.clear_current_key(); return Ok(None); }; - self.current_key = Some(*key); + self.set_current_key(*key); Ok(Some((Cow::Borrowed(key), Cow::Borrowed(value.as_ref())))) } @@ -335,7 +354,7 @@ impl<'a> KvTraverse for MemKvCursor<'a> { self.set_current_key(*found_key); Ok(Some((Cow::Borrowed(found_key), Cow::Borrowed(value.as_ref())))) } else { - self.current_key = self.table.last_key_value().map(|(k, _)| *k); + *self.current_key.lock().unwrap() = self.table.last_key_value().map(|(k, _)| *k); Ok(None) } } @@ -365,7 +384,52 @@ impl<'a> KvTraverse for MemKvCursor<'a> { } // Implement DualKeyedTraverse (basic implementation - delegates to raw methods) -impl<'a> DualKeyedTraverse for MemKvCursor<'a> { +impl<'a> DualKeyTraverse for MemKvCursor<'a> { + fn first<'b>(&'b mut self) -> Result>, MemKvError> { + let Some((key, value)) = self.table.first_key_value() else { + self.clear_current_key(); + return Ok(None); + }; + self.set_current_key(*key); + let (k1, k2) = MemKv::split_dual_key(key); + Ok(Some((k1, k2, Cow::Borrowed(value.as_ref())))) + } + + fn last<'b>(&'b mut self) -> Result>, MemKvError> { + let Some((key, value)) = self.table.last_key_value() else { + self.clear_current_key(); + return Ok(None); + }; + self.set_current_key(*key); + let (k1, k2) = MemKv::split_dual_key(key); + Ok(Some((k1, k2, Cow::Borrowed(value.as_ref())))) + } + + fn read_next<'b>(&'b mut self) -> Result>, MemKvError> { + use core::ops::Bound; + let current = self.current_key(); + // Use Excluded bound to find strictly greater than current key + let Some((found_key, value)) = + self.table.range((Bound::Excluded(current), Bound::Unbounded)).next() + else { + return Ok(None); + }; + self.set_current_key(*found_key); + let (k1, k2) = MemKv::split_dual_key(found_key); + Ok(Some((k1, k2, Cow::Borrowed(value.as_ref())))) + } + + fn read_prev<'b>(&'b mut self) -> Result>, MemKvError> { + let current = self.current_key(); + let Some((k, v)) = self.table.range(..current).next_back() else { + self.clear_current_key(); + return Ok(None); + }; + self.set_current_key(*k); + let (k1, k2) = MemKv::split_dual_key(k); + Ok(Some((k1, k2, Cow::Borrowed(v.as_ref())))) + } + fn exact_dual<'b>( &'b mut self, key1: &[u8], @@ -392,7 +456,7 @@ impl<'a> DualKeyedTraverse for MemKvCursor<'a> { // scan forward until finding a new k1 let last_k1 = self.current_k1(); - DualKeyedTraverse::next_dual_above(self, &last_k1, &[0xffu8; MAX_KEY_SIZE]) + DualKeyTraverse::next_dual_above(self, &last_k1, &[0xffu8; MAX_KEY_SIZE]) } fn next_k2<'b>(&'b mut self) -> Result>, MemKvError> { @@ -400,46 +464,64 @@ impl<'a> DualKeyedTraverse for MemKvCursor<'a> { let (current_k1, current_k2) = MemKv::split_dual_key(¤t_key); // scan forward until finding a new k2 for the same k1 - DualKeyedTraverse::next_dual_above(self, ¤t_k1, ¤t_k2) + DualKeyTraverse::next_dual_above(self, ¤t_k1, ¤t_k2) } -} - -// Implement DualTableTraverse for typed dual-keyed table access -impl<'a, T> DualTableTraverse for MemKvCursor<'a> -where - T: DualKeyed, -{ - fn next_dual_above( - &mut self, - key1: &T::Key, - key2: &T::Key2, - ) -> Result>, MemKvError> { - let mut key1_buf = [0u8; MAX_KEY_SIZE]; - let mut key2_buf = [0u8; MAX_KEY_SIZE]; - let key1_bytes = key1.encode_key(&mut key1_buf); - let key2_bytes = key2.encode_key(&mut key2_buf); - DualKeyedTraverse::next_dual_above(self, key1_bytes, key2_bytes)? - .map(T::decode_kkv_tuple) - .transpose() - .map_err(Into::into) + fn last_of_k1<'b>( + &'b mut self, + key1: &[u8], + ) -> Result>, MemKvError> { + // Search for (k1, 0xff...) and find last entry <= that + let search_key = MemKv::dual_key(key1, &[0xffu8; MAX_KEY_SIZE]); + let Some((found_key, value)) = self.table.range(..=search_key).next_back() else { + self.clear_current_key(); + return Ok(None); + }; + let (found_k1, found_k2) = MemKv::split_dual_key(found_key); + // Compare only the relevant prefix of found_k1 with key1 + // found_k1 is MAX_KEY_SIZE bytes, key1 may be shorter + if &found_k1.as_ref()[..key1.len()] != key1 { + self.clear_current_key(); + return Ok(None); + } + self.set_current_key(*found_key); + Ok(Some((found_k1, found_k2, Cow::Borrowed(value.as_ref())))) } - fn next_k1(&mut self) -> Result>, MemKvError> { - DualKeyedTraverse::next_k1(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + fn previous_k1<'b>(&'b mut self) -> Result>, MemKvError> { + let current_k1 = self.current_k1(); + // Find entries before start of current k1 + let search_start = MemKv::dual_key(¤t_k1, &[0u8; MAX_KEY_SIZE]); + let Some((found_key, value)) = self.table.range(..search_start).next_back() else { + self.clear_current_key(); + return Ok(None); + }; + self.set_current_key(*found_key); + let (k1, k2) = MemKv::split_dual_key(found_key); + Ok(Some((k1, k2, Cow::Borrowed(value.as_ref())))) } - fn next_k2(&mut self) -> Result>, MemKvError> { - DualKeyedTraverse::next_k2(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + fn previous_k2<'b>(&'b mut self) -> Result>, MemKvError> { + let current_key = self.current_key(); + let (current_k1, _) = MemKv::split_dual_key(¤t_key); + let Some((found_key, value)) = self.table.range(..current_key).next_back() else { + return Ok(None); + }; + let (found_k1, found_k2) = MemKv::split_dual_key(found_key); + if found_k1.as_ref() != current_k1.as_ref() { + return Ok(None); // Previous entry has different k1 + } + self.set_current_key(*found_key); + Ok(Some((found_k1, found_k2, Cow::Borrowed(value.as_ref())))) } } /// Memory cursor for read-write operations pub struct MemKvCursorMut<'a> { table: &'a StoreTable, - queued_ops: &'a mut TableOp, + queued_ops: &'a Mutex, is_cleared: bool, - current_key: Option, + current_key: Mutex>, } impl core::fmt::Debug for MemKvCursorMut<'_> { @@ -450,35 +532,38 @@ impl core::fmt::Debug for MemKvCursorMut<'_> { impl<'a> MemKvCursorMut<'a> { /// Create a new mutable cursor for the given table and queued operations - const fn new(table: &'a StoreTable, queued_ops: &'a mut TableOp, is_cleared: bool) -> Self { - Self { table, queued_ops, is_cleared, current_key: None } + const fn new(table: &'a StoreTable, queued_ops: &'a Mutex, is_cleared: bool) -> Self { + Self { table, queued_ops, is_cleared, current_key: Mutex::new(None) } } /// Get the current key the cursor is positioned at pub fn current_key(&self) -> MemStoreKey { - self.current_key.unwrap_or([0u8; MAX_KEY_SIZE * 2]) + self.current_key.lock().unwrap().unwrap_or([0u8; MAX_KEY_SIZE * 2]) } /// Set the current key the cursor is positioned at - pub const fn set_current_key(&mut self, key: MemStoreKey) { - self.current_key = Some(key); + pub fn set_current_key(&self, key: MemStoreKey) { + *self.current_key.lock().unwrap() = Some(key); } /// Clear the current key the cursor is positioned at - pub const fn clear_current_key(&mut self) { - self.current_key = None; + pub fn clear_current_key(&self) { + *self.current_key.lock().unwrap() = None; } /// Get the current k1 the cursor is positioned at fn current_k1(&self) -> [u8; MAX_KEY_SIZE] { self.current_key + .lock() + .unwrap() .map(|key| key[..MAX_KEY_SIZE].try_into().unwrap()) .unwrap_or([0u8; MAX_KEY_SIZE]) } /// Get value for a key, returning owned bytes fn get_owned(&self, key: &MemStoreKey) -> Option { - if let Some(op) = self.queued_ops.get(key) { + let queued_ops = self.queued_ops.lock().unwrap(); + if let Some(op) = queued_ops.get(key) { match op { QueuedKvOp::Put { value } => Some(value.clone()), QueuedKvOp::Delete => None, @@ -490,20 +575,28 @@ impl<'a> MemKvCursorMut<'a> { } } - /// Get the first key-value pair >= key, returning owned data + /// Get the first key-value pair >= key, returning owned data. + /// + /// Merges queued operations with committed data, giving precedence to queued + /// ops for read-your-writes consistency. fn get_range_owned(&self, key: &MemStoreKey) -> Option<(MemStoreKey, Bytes)> { - let q = self.queued_ops.range(*key..).next(); + // Find the first candidate from both queued ops and committed storage. + let queued_ops = self.queued_ops.lock().unwrap(); + let q = queued_ops.range(*key..).next(); let c = if !self.is_cleared { self.table.range(*key..).next() } else { None }; match (q, c) { (None, None) => None, + + // Both sources have candidates - pick the smaller key, preferring + // queued ops on ties for read-your-writes consistency. (Some((qk, queued)), Some((ck, current))) => { if qk <= ck { - // Queued operation takes precedence match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), QueuedKvOp::Delete => { - // Skip deleted entry and look for next + // This key is marked deleted; increment to skip it + // and recurse to find the next valid entry. let mut next_key = *qk; for i in (0..next_key.len()).rev() { if next_key[i] < u8::MAX { @@ -512,6 +605,7 @@ impl<'a> MemKvCursorMut<'a> { } next_key[i] = 0; } + drop(queued_ops); self.get_range_owned(&next_key) } } @@ -519,9 +613,12 @@ impl<'a> MemKvCursorMut<'a> { Some((*ck, current.clone())) } } + + // Only queued ops have a candidate. (Some((qk, queued)), None) => match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), QueuedKvOp::Delete => { + // Increment past the deleted key and recurse. let mut next_key = *qk; for i in (0..next_key.len()).rev() { if next_key[i] < u8::MAX { @@ -530,18 +627,26 @@ impl<'a> MemKvCursorMut<'a> { } next_key[i] = 0; } + drop(queued_ops); self.get_range_owned(&next_key) } }, + + // Only committed storage has a candidate. (None, Some((ck, current))) => Some((*ck, current.clone())), } } - /// Get the first key-value pair > key (strictly greater), returning owned data + /// Get the first key-value pair > key (strictly greater), returning owned data. + /// + /// Similar to `get_range_owned` but uses exclusive bounds for cursor + /// navigation (read_next). fn get_range_exclusive_owned(&self, key: &MemStoreKey) -> Option<(MemStoreKey, Bytes)> { use core::ops::Bound; - let q = self.queued_ops.range((Bound::Excluded(*key), Bound::Unbounded)).next(); + // Find candidates strictly greater than the given key. + let queued_ops = self.queued_ops.lock().unwrap(); + let q = queued_ops.range((Bound::Excluded(*key), Bound::Unbounded)).next(); let c = if !self.is_cleared { self.table.range((Bound::Excluded(*key), Bound::Unbounded)).next() } else { @@ -550,33 +655,47 @@ impl<'a> MemKvCursorMut<'a> { match (q, c) { (None, None) => None, + + // Both sources have candidates. (Some((qk, queued)), Some((ck, current))) => { if qk <= ck { - // Queued operation takes precedence match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), + // Deleted in queue; skip and recurse. QueuedKvOp::Delete => { - // This key is deleted, recurse to find the next one - self.get_range_exclusive_owned(qk) + let next_key = *qk; + drop(queued_ops); + self.get_range_exclusive_owned(&next_key) } } } else { - // Check if the current key has a delete queued - if let Some(QueuedKvOp::Delete) = self.queued_ops.get(ck) { - self.get_range_exclusive_owned(ck) + // Committed key is smaller, but check if it's been deleted. + if let Some(QueuedKvOp::Delete) = queued_ops.get(ck) { + let next_key = *ck; + drop(queued_ops); + self.get_range_exclusive_owned(&next_key) } else { Some((*ck, current.clone())) } } } + + // Only queued ops have a candidate. (Some((qk, queued)), None) => match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), - QueuedKvOp::Delete => self.get_range_exclusive_owned(qk), + QueuedKvOp::Delete => { + let next_key = *qk; + drop(queued_ops); + self.get_range_exclusive_owned(&next_key) + } }, + + // Only committed storage has a candidate; verify not deleted. (None, Some((ck, current))) => { - // Check if the current key has a delete queued - if let Some(QueuedKvOp::Delete) = self.queued_ops.get(ck) { - self.get_range_exclusive_owned(ck) + if let Some(QueuedKvOp::Delete) = queued_ops.get(ck) { + let next_key = *ck; + drop(queued_ops); + self.get_range_exclusive_owned(&next_key) } else { Some((*ck, current.clone())) } @@ -584,28 +703,101 @@ impl<'a> MemKvCursorMut<'a> { } } - /// Get the last key-value pair < key, returning owned data + /// Get the last key-value pair < key, returning owned data. + /// + /// Reverse iteration for cursor navigation (read_prev). Merges queued ops + /// with committed data, preferring the larger key (closest to search key). fn get_range_reverse_owned(&self, key: &MemStoreKey) -> Option<(MemStoreKey, Bytes)> { - let q = self.queued_ops.range(..*key).next_back(); + // Find candidates strictly less than the given key, scanning backwards. + let queued_ops = self.queued_ops.lock().unwrap(); + let q = queued_ops.range(..*key).next_back(); let c = if !self.is_cleared { self.table.range(..*key).next_back() } else { None }; match (q, c) { (None, None) => None, + + // Both sources have candidates - pick the larger key (closest to + // search position), preferring queued ops on ties. (Some((qk, queued)), Some((ck, current))) => { if qk >= ck { - // Queued operation takes precedence match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), - QueuedKvOp::Delete => self.get_range_reverse_owned(qk), + // Deleted; recurse to find the previous valid entry. + QueuedKvOp::Delete => { + let next_key = *qk; + drop(queued_ops); + self.get_range_reverse_owned(&next_key) + } } } else { Some((*ck, current.clone())) } } + + // Only queued ops have a candidate. (Some((qk, queued)), None) => match queued { QueuedKvOp::Put { value } => Some((*qk, value.clone())), - QueuedKvOp::Delete => self.get_range_reverse_owned(qk), + QueuedKvOp::Delete => { + let next_key = *qk; + drop(queued_ops); + self.get_range_reverse_owned(&next_key) + } }, + + // Only committed storage has a candidate. + (None, Some((ck, current))) => Some((*ck, current.clone())), + } + } + + /// Get the last key-value pair <= key, returning owned data. + /// + /// Reverse iteration for cursor navigation (last_of_k1). Merges queued ops + /// with committed data, preferring the larger key (closest to search key). + fn get_range_reverse_inclusive_owned(&self, key: &MemStoreKey) -> Option<(MemStoreKey, Bytes)> { + // Find candidates <= the given key, scanning backwards. + let queued_ops = self.queued_ops.lock().unwrap(); + let q = queued_ops.range(..=*key).next_back(); + let c = if !self.is_cleared { self.table.range(..=*key).next_back() } else { None }; + + match (q, c) { + (None, None) => None, + + // Both sources have candidates - pick the larger key (closest to + // search position), preferring queued ops on ties. + (Some((qk, queued)), Some((ck, current))) => { + if qk >= ck { + match queued { + QueuedKvOp::Put { value } => Some((*qk, value.clone())), + // Deleted; recurse to find the previous valid entry. + QueuedKvOp::Delete => { + let next_key = *qk; + drop(queued_ops); + self.get_range_reverse_owned(&next_key) + } + } + } else { + // Check if the committed key was deleted + if let Some(QueuedKvOp::Delete) = queued_ops.get(ck) { + let next_key = *ck; + drop(queued_ops); + self.get_range_reverse_owned(&next_key) + } else { + Some((*ck, current.clone())) + } + } + } + + // Only queued ops have a candidate. + (Some((qk, queued)), None) => match queued { + QueuedKvOp::Put { value } => Some((*qk, value.clone())), + QueuedKvOp::Delete => { + let next_key = *qk; + drop(queued_ops); + self.get_range_reverse_owned(&next_key) + } + }, + + // Only committed storage has a candidate. (None, Some((ck, current))) => Some((*ck, current.clone())), } } @@ -617,10 +809,10 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { // Get the first effective key-value pair if let Some((key, value)) = self.get_range_owned(&start_key) { - self.current_key = Some(key); + self.set_current_key(key); Ok(Some((Cow::Owned(key.to_vec()), Cow::Owned(value.to_vec())))) } else { - self.current_key = None; + self.clear_current_key(); Ok(None) } } @@ -629,17 +821,17 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { let end_key = [0xffu8; MAX_KEY_SIZE * 2]; if let Some((key, value)) = self.get_range_reverse_owned(&end_key) { - self.current_key = Some(key); + self.set_current_key(key); Ok(Some((Cow::Owned(key.to_vec()), Cow::Owned(value.to_vec())))) } else { - self.current_key = None; + self.clear_current_key(); Ok(None) } } fn exact<'b>(&'b mut self, key: &[u8]) -> Result>, MemKvError> { let search_key = MemKv::key(key); - self.current_key = Some(search_key); + self.set_current_key(search_key); if let Some(value) = self.get_owned(&search_key) { Ok(Some(Cow::Owned(value.to_vec()))) @@ -652,10 +844,10 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { let search_key = MemKv::key(key); if let Some((found_key, value)) = self.get_range_owned(&search_key) { - self.current_key = Some(found_key); + self.set_current_key(found_key); Ok(Some((Cow::Owned(found_key.to_vec()), Cow::Owned(value.to_vec())))) } else { - self.current_key = None; + self.clear_current_key(); Ok(None) } } @@ -665,10 +857,10 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { // Use exclusive range to find strictly greater than current key if let Some((found_key, value)) = self.get_range_exclusive_owned(¤t) { - self.current_key = Some(found_key); + self.set_current_key(found_key); Ok(Some((Cow::Owned(found_key.to_vec()), Cow::Owned(value.to_vec())))) } else { - self.current_key = None; + self.clear_current_key(); Ok(None) } } @@ -677,10 +869,10 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { let current = self.current_key(); if let Some((found_key, value)) = self.get_range_reverse_owned(¤t) { - self.current_key = Some(found_key); + self.set_current_key(found_key); Ok(Some((Cow::Owned(found_key.to_vec()), Cow::Owned(value.to_vec())))) } else { - self.current_key = None; + self.clear_current_key(); Ok(None) } } @@ -688,9 +880,10 @@ impl<'a> KvTraverse for MemKvCursorMut<'a> { impl<'a> KvTraverseMut for MemKvCursorMut<'a> { fn delete_current(&mut self) -> Result<(), MemKvError> { - if let Some(key) = self.current_key { + let current_key = *self.current_key.lock().unwrap(); + if let Some(key) = current_key { // Queue a delete operation - self.queued_ops.insert(key, QueuedKvOp::Delete); + self.queued_ops.lock().unwrap().insert(key, QueuedKvOp::Delete); Ok(()) } else { Err(MemKvError::HotKv(HotKvError::Inner("No current key to delete".into()))) @@ -698,7 +891,61 @@ impl<'a> KvTraverseMut for MemKvCursorMut<'a> { } } -impl<'a> DualKeyedTraverse for MemKvCursorMut<'a> { +impl<'a> DualKeyTraverse for MemKvCursorMut<'a> { + fn first<'b>(&'b mut self) -> Result>, MemKvError> { + let start_key = [0u8; MAX_KEY_SIZE * 2]; + + // Get the first effective key-value pair + if let Some((key, value)) = self.get_range_owned(&start_key) { + self.set_current_key(key); + let (k1, k2) = MemKv::split_dual_key(&key); + Ok(Some((Cow::Owned(k1.to_vec()), Cow::Owned(k2.to_vec()), Cow::Owned(value.to_vec())))) + } else { + self.clear_current_key(); + Ok(None) + } + } + + fn last<'b>(&'b mut self) -> Result>, MemKvError> { + let end_key = [0xffu8; MAX_KEY_SIZE * 2]; + + if let Some((key, value)) = self.get_range_reverse_owned(&end_key) { + self.set_current_key(key); + let (k1, k2) = MemKv::split_dual_key(&key); + Ok(Some((Cow::Owned(k1.to_vec()), Cow::Owned(k2.to_vec()), Cow::Owned(value.to_vec())))) + } else { + self.clear_current_key(); + Ok(None) + } + } + + fn read_next<'b>(&'b mut self) -> Result>, MemKvError> { + let current = self.current_key(); + + // Use exclusive range to find strictly greater than current key + if let Some((found_key, value)) = self.get_range_exclusive_owned(¤t) { + self.set_current_key(found_key); + let (k1, k2) = MemKv::split_dual_key(&found_key); + Ok(Some((Cow::Owned(k1.to_vec()), Cow::Owned(k2.to_vec()), Cow::Owned(value.to_vec())))) + } else { + self.clear_current_key(); + Ok(None) + } + } + + fn read_prev<'b>(&'b mut self) -> Result>, MemKvError> { + let current = self.current_key(); + + if let Some((found_key, value)) = self.get_range_reverse_owned(¤t) { + self.set_current_key(found_key); + let (k1, k2) = MemKv::split_dual_key(&found_key); + Ok(Some((Cow::Owned(k1.to_vec()), Cow::Owned(k2.to_vec()), Cow::Owned(value.to_vec())))) + } else { + self.clear_current_key(); + Ok(None) + } + } + fn exact_dual<'b>( &'b mut self, key1: &[u8], @@ -718,15 +965,15 @@ impl<'a> DualKeyedTraverse for MemKvCursorMut<'a> { return Ok(None); }; - let (key1, key2) = MemKv::split_dual_key(found_key.as_ref()); - Ok(Some((key1, key2, value))) + let (split_k1, split_k2) = MemKv::split_dual_key(found_key.as_ref()); + Ok(Some((split_k1, split_k2, value))) } fn next_k1<'b>(&'b mut self) -> Result>, MemKvError> { // scan forward until finding a new k1 let last_k1 = self.current_k1(); - DualKeyedTraverse::next_dual_above(self, &last_k1, &[0xffu8; MAX_KEY_SIZE]) + DualKeyTraverse::next_dual_above(self, &last_k1, &[0xffu8; MAX_KEY_SIZE]) } fn next_k2<'b>(&'b mut self) -> Result>, MemKvError> { @@ -734,37 +981,61 @@ impl<'a> DualKeyedTraverse for MemKvCursorMut<'a> { let (current_k1, current_k2) = MemKv::split_dual_key(¤t_key); // scan forward until finding a new k2 for the same k1 - DualKeyedTraverse::next_dual_above(self, ¤t_k1, ¤t_k2) + DualKeyTraverse::next_dual_above(self, ¤t_k1, ¤t_k2) } -} - -// Implement DualTableTraverse for typed dual-keyed table access -impl<'a, T> DualTableTraverse for MemKvCursorMut<'a> -where - T: DualKeyed, -{ - fn next_dual_above( - &mut self, - key1: &T::Key, - key2: &T::Key2, - ) -> Result>, MemKvError> { - let mut key1_buf = [0u8; MAX_KEY_SIZE]; - let mut key2_buf = [0u8; MAX_KEY_SIZE]; - let key1_bytes = key1.encode_key(&mut key1_buf); - let key2_bytes = key2.encode_key(&mut key2_buf); - DualKeyedTraverse::next_dual_above(self, key1_bytes, key2_bytes)? - .map(T::decode_kkv_tuple) - .transpose() - .map_err(Into::into) + fn last_of_k1<'b>( + &'b mut self, + key1: &[u8], + ) -> Result>, MemKvError> { + // Search for (k1, 0xff...) and find last entry <= that + let search_key = MemKv::dual_key(key1, &[0xffu8; MAX_KEY_SIZE]); + let Some((found_key, value)) = self.get_range_reverse_inclusive_owned(&search_key) else { + self.clear_current_key(); + return Ok(None); + }; + let (found_k1, found_k2) = MemKv::split_dual_key(&found_key); + if found_k1.as_ref() != key1 { + self.clear_current_key(); + return Ok(None); + } + self.set_current_key(found_key); + Ok(Some(( + Cow::Owned(found_k1.to_vec()), + Cow::Owned(found_k2.to_vec()), + Cow::Owned(value.to_vec()), + ))) } - fn next_k1(&mut self) -> Result>, MemKvError> { - DualKeyedTraverse::next_k1(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + fn previous_k1<'b>(&'b mut self) -> Result>, MemKvError> { + let current_k1 = self.current_k1(); + // Find entries before start of current k1 + let search_start = MemKv::dual_key(¤t_k1, &[0u8; MAX_KEY_SIZE]); + let Some((found_key, value)) = self.get_range_reverse_owned(&search_start) else { + self.clear_current_key(); + return Ok(None); + }; + self.set_current_key(found_key); + let (k1, k2) = MemKv::split_dual_key(&found_key); + Ok(Some((Cow::Owned(k1.to_vec()), Cow::Owned(k2.to_vec()), Cow::Owned(value.to_vec())))) } - fn next_k2(&mut self) -> Result>, MemKvError> { - DualKeyedTraverse::next_k2(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + fn previous_k2<'b>(&'b mut self) -> Result>, MemKvError> { + let current_key = self.current_key(); + let (current_k1, _) = MemKv::split_dual_key(¤t_key); + let Some((found_key, value)) = self.get_range_reverse_owned(¤t_key) else { + return Ok(None); + }; + let (found_k1, found_k2) = MemKv::split_dual_key(&found_key); + if found_k1.as_ref() != current_k1.as_ref() { + return Ok(None); // Previous entry has different k1 + } + self.set_current_key(found_key); + Ok(Some(( + Cow::Owned(found_k1.to_vec()), + Cow::Owned(found_k2.to_vec()), + Cow::Owned(value.to_vec()), + ))) } } @@ -792,7 +1063,7 @@ impl HotKv for MemKv { // the guard is also dropped. let guard: RwLockWriteGuard<'static, Store> = unsafe { std::mem::transmute(guard) }; - Ok(MemKvRwTx { guard, _store: self.map.clone(), queued_ops: OpStore::new() }) + Ok(MemKvRwTx { guard, _store: self.map.clone(), queued_ops: Mutex::new(OpStore::new()) }) } } @@ -808,7 +1079,7 @@ impl HotKvRead for MemKvRoTx { fn raw_get<'a>( &'a self, - table: &str, + table: &'static str, key: &[u8], ) -> Result>, Self::Error> { // Check queued operations first (read-your-writes consistency) @@ -825,7 +1096,7 @@ impl HotKvRead for MemKvRoTx { fn raw_get_dual<'a>( &'a self, - table: &str, + table: &'static str, key1: &[u8], key2: &[u8], ) -> Result>, Self::Error> { @@ -856,20 +1127,21 @@ impl HotKvRead for MemKvRwTx { fn raw_get<'a>( &'a self, - table: &str, + table: &'static str, key: &[u8], ) -> Result>, Self::Error> { // Check queued operations first (read-your-writes consistency) let key = MemKv::key(key); - if let Some(table) = self.queued_ops.get(table) { - if table.is_clear() { + let queued_ops = self.queued_ops.lock().unwrap(); + if let Some(table_op) = queued_ops.get(table) { + if table_op.is_clear() { return Ok(None); } - match table.get(&key) { + match table_op.get(&key) { Some(QueuedKvOp::Put { value }) => { - return Ok(Some(Cow::Borrowed(value.as_ref()))); + return Ok(Some(Cow::Owned(value.to_vec()))); } Some(QueuedKvOp::Delete) => { return Ok(None); @@ -877,6 +1149,7 @@ impl HotKvRead for MemKvRwTx { None => {} } } + drop(queued_ops); // If not found in queued ops, check the underlying map Ok(self @@ -888,7 +1161,7 @@ impl HotKvRead for MemKvRwTx { fn raw_get_dual<'a>( &'a self, - table: &str, + table: &'static str, key1: &[u8], key2: &[u8], ) -> Result>, Self::Error> { @@ -910,22 +1183,29 @@ impl MemKvRwTx { /// Get a mutable cursor for the specified table /// This cursor will see both committed data and pending writes from this transaction - pub fn cursor_mut<'a>(&'a mut self, table: &str) -> Result, MemKvError> { - // Get or create the table data - let table_data = self.guard.entry(table.to_owned()).or_default(); + pub fn cursor_mut<'a>(&'a self, table: &str) -> Result, MemKvError> { + // Get the table data (use EMPTY_TABLE if not present) + let table_data = self.guard.get(table).unwrap_or(&EMPTY_TABLE); // Get or create the queued operations for this table - let table_ops = self.queued_ops.entry(table.to_owned()).or_default(); + let mut queued_ops = self.queued_ops.lock().unwrap(); + let table_ops = queued_ops.entry(table.to_owned()).or_default(); let is_cleared = table_ops.is_clear(); - // Extract the inner TableOp from QueuedTableOp - let ops = match table_ops { - QueuedTableOp::Modify { ops } => ops, - QueuedTableOp::Clear { new_table } => new_table, - }; + // Get reference to the inner ops mutex + let ops_mutex = table_ops.ops_mutex(); - Ok(MemKvCursorMut::new(table_data, ops, is_cleared)) + // SAFETY: We need to return a reference that outlives the lock guard. + // This is safe because: + // 1. The Mutex is owned by the QueuedTableOp in the OpStore + // 2. The OpStore is owned by the MemKvRwTx (inside its Mutex) + // 3. The cursor only lives as long as 'a which is tied to &'a self + let ops_mutex: &'a Mutex = unsafe { std::mem::transmute(ops_mutex) }; + + drop(queued_ops); + + Ok(MemKvCursorMut::new(table_data, ops_mutex, is_cleared)) } } @@ -936,18 +1216,25 @@ impl HotKvWrite for MemKvRwTx { Self: 'a; fn raw_traverse_mut<'a>( - &'a mut self, - table: &str, + &'a self, + table: &'static str, ) -> Result, Self::Error> { self.cursor_mut(table) } - fn queue_raw_put(&mut self, table: &str, key: &[u8], value: &[u8]) -> Result<(), Self::Error> { + fn queue_raw_put( + &self, + table: &'static str, + key: &[u8], + value: &[u8], + ) -> Result<(), Self::Error> { let key = MemKv::key(key); let value_bytes = Bytes::copy_from_slice(value); self.queued_ops + .lock() + .unwrap() .entry(table.to_owned()) .or_default() .put(key, QueuedKvOp::Put { value: value_bytes }); @@ -955,8 +1242,8 @@ impl HotKvWrite for MemKvRwTx { } fn queue_raw_put_dual( - &mut self, - table: &str, + &self, + table: &'static str, key1: &[u8], key2: &[u8], value: &[u8], @@ -965,24 +1252,36 @@ impl HotKvWrite for MemKvRwTx { self.queue_raw_put(table, &key, value) } - fn queue_raw_delete(&mut self, table: &str, key: &[u8]) -> Result<(), Self::Error> { + fn queue_raw_delete(&self, table: &'static str, key: &[u8]) -> Result<(), Self::Error> { let key = MemKv::key(key); - self.queued_ops.entry(table.to_owned()).or_default().delete(key); + self.queued_ops.lock().unwrap().entry(table.to_owned()).or_default().delete(key); Ok(()) } - fn queue_raw_clear(&mut self, table: &str) -> Result<(), Self::Error> { - self.queued_ops - .insert(table.to_owned(), QueuedTableOp::Clear { new_table: TableOp::new() }); + fn queue_raw_delete_dual( + &self, + table: &'static str, + key1: &[u8], + key2: &[u8], + ) -> Result<(), Self::Error> { + let key = MemKv::dual_key(key1, key2); + self.queue_raw_delete(table, &key) + } + + fn queue_raw_clear(&self, table: &str) -> Result<(), Self::Error> { + self.queued_ops.lock().unwrap().insert( + table.to_owned(), + QueuedTableOp::Clear { new_table: Mutex::new(TableOp::new()) }, + ); Ok(()) } fn queue_raw_create( - &mut self, - _table: &str, - _dual_key: bool, - _dual_fixed: bool, + &self, + _table: &'static str, + _dual_key: Option, + _dual_fixed: Option, ) -> Result<(), Self::Error> { Ok(()) } @@ -999,9 +1298,10 @@ impl HotKvWrite for MemKvRwTx { #[cfg(test)] mod tests { use super::*; - use crate::{ - hot::model::{DualTableTraverse, TableTraverse, TableTraverseMut}, - tables::{SingleKey, Table}, + use crate::hot::{ + conformance::{conformance, test_unwind_conformance}, + model::{DualTableTraverse, TableTraverse, TableTraverseMut}, + tables::{DualKey, SingleKey, Table}, }; use alloy::primitives::{Address, U256}; use bytes::Bytes; @@ -1039,7 +1339,7 @@ mod tests { type Value = Bytes; } - impl crate::tables::DualKeyed for DualTestTable { + impl DualKey for DualTestTable { type Key2 = u32; } @@ -1058,7 +1358,7 @@ mod tests { // Write some data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1, 2, 3], b"value1").unwrap(); writer.queue_raw_put("table1", &[4, 5, 6], b"value2").unwrap(); writer.raw_commit().unwrap(); @@ -1083,7 +1383,7 @@ mod tests { // Write to different tables { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"table1_value").unwrap(); writer.queue_raw_put("table2", &[1], b"table2_value").unwrap(); writer.raw_commit().unwrap(); @@ -1106,14 +1406,14 @@ mod tests { // Write initial value { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"original").unwrap(); writer.raw_commit().unwrap(); } // Overwrite with new value { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"updated").unwrap(); writer.raw_commit().unwrap(); } @@ -1129,7 +1429,7 @@ mod tests { #[test] fn test_read_your_writes() { let store = MemKv::new(); - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); // Queue some operations but don't commit yet writer.queue_raw_put("table1", &[1], b"queued_value").unwrap(); @@ -1154,7 +1454,7 @@ mod tests { // Write using typed interface { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_put::(&42u64, &Bytes::from_static(b"hello world")).unwrap(); writer.queue_put::(&100u64, &Bytes::from_static(b"another value")).unwrap(); writer.raw_commit().unwrap(); @@ -1184,7 +1484,7 @@ mod tests { // Write address data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_put::(&addr1, &balance1).unwrap(); writer.queue_put::(&addr2, &balance2).unwrap(); writer.raw_commit().unwrap(); @@ -1213,7 +1513,7 @@ mod tests { // Write batch { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); let entry_refs: Vec<_> = entries.iter().map(|(k, v)| (k, v)).collect(); writer.queue_put_many::(entry_refs).unwrap(); writer.raw_commit().unwrap(); @@ -1238,7 +1538,7 @@ mod tests { // Write some initial data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"value1").unwrap(); writer.raw_commit().unwrap(); } @@ -1274,7 +1574,7 @@ mod tests { let store = MemKv::new(); { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"").unwrap(); writer.raw_commit().unwrap(); } @@ -1291,7 +1591,7 @@ mod tests { let store = MemKv::new(); { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); // Multiple operations on same key - last one should win writer.queue_raw_put("table1", &[1], b"first").unwrap(); @@ -1318,7 +1618,7 @@ mod tests { // Write initial value { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"original").unwrap(); writer.raw_commit().unwrap(); } @@ -1332,7 +1632,7 @@ mod tests { // Update the value in a separate transaction { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"updated").unwrap(); writer.raw_commit().unwrap(); } @@ -1351,7 +1651,7 @@ mod tests { let store = MemKv::new(); { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"should_not_persist").unwrap(); // Drop without committing } @@ -1369,7 +1669,7 @@ mod tests { let store = MemKv::new(); { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"value1").unwrap(); writer.queue_raw_put("table2", &[2], b"value2").unwrap(); writer.raw_commit().unwrap(); @@ -1391,7 +1691,7 @@ mod tests { { // Write some data // Start a read-write transaction - let mut rw_tx = store.writer().unwrap(); + let rw_tx = store.writer().unwrap(); rw_tx.queue_raw_put("table1", &[1, 2, 3], b"value1").unwrap(); rw_tx.queue_raw_put("table1", &[4, 5, 6], b"value2").unwrap(); @@ -1407,7 +1707,7 @@ mod tests { { // Start another read-write transaction - let mut rw_tx = store.writer().unwrap(); + let rw_tx = store.writer().unwrap(); rw_tx.queue_raw_put("table2", &[7, 8, 9], b"value3").unwrap(); // Value should not be set @@ -1425,7 +1725,7 @@ mod tests { let store = MemKv::new(); { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); writer.queue_raw_put("table1", &[1], b"value1").unwrap(); writer.queue_raw_put("table1", &[2], b"value2").unwrap(); writer.raw_commit().unwrap(); @@ -1442,7 +1742,7 @@ mod tests { } { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); let value1 = writer.raw_get("table1", &[1]).unwrap(); let value2 = writer.raw_get("table1", &[2]).unwrap(); @@ -1490,7 +1790,7 @@ mod tests { // Insert data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1544,7 +1844,7 @@ mod tests { // Insert data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1610,7 +1910,7 @@ mod tests { // Insert initial data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1619,7 +1919,7 @@ mod tests { // Test mutable cursor operations { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); let mut cursor = writer.cursor_mut(TestTable::NAME).unwrap(); // Navigate to middle entry @@ -1673,7 +1973,7 @@ mod tests { // Insert data { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1724,8 +2024,8 @@ mod tests { // Create an empty table first { - let mut writer = store.writer().unwrap(); - writer.queue_raw_create(TestTable::NAME, false, false).unwrap(); + let writer = store.writer().unwrap(); + writer.queue_raw_create(TestTable::NAME, None, None).unwrap(); writer.raw_commit().unwrap(); } @@ -1756,7 +2056,7 @@ mod tests { ]; { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key, value) in &test_data { writer.queue_put::(key, value).unwrap(); } @@ -1821,7 +2121,7 @@ mod tests { ]; { - let mut writer = store.writer().unwrap(); + let writer = store.writer().unwrap(); for (key1, key2, value) in &dual_data { writer.queue_put_dual::(key1, key2, value).unwrap(); } @@ -1866,4 +2166,202 @@ mod tests { assert_eq!(value, Bytes::from_static(b"value3")); } } + + #[test] + fn mem_conformance() { + let hot_kv = MemKv::new(); + conformance(&hot_kv); + } + + #[test] + fn mem_unwind_conformance() { + let store_a = MemKv::new(); + let store_b = MemKv::new(); + test_unwind_conformance(&store_a, &store_b); + } + + #[test] + fn test_dual_key_last_of_k1() { + let store = MemKv::new(); + + // Setup test data: + // k1=1: k2=[10, 20, 30] + // k1=2: k2=[100, 200] + // k1=3: k2=[1000] + let dual_data = vec![ + (1u64, 10u32, Bytes::from_static(b"v1_10")), + (1u64, 20u32, Bytes::from_static(b"v1_20")), + (1u64, 30u32, Bytes::from_static(b"v1_30")), + (2u64, 100u32, Bytes::from_static(b"v2_100")), + (2u64, 200u32, Bytes::from_static(b"v2_200")), + (3u64, 1000u32, Bytes::from_static(b"v3_1000")), + ]; + + { + let writer = store.writer().unwrap(); + for (key1, key2, value) in &dual_data { + writer.queue_put_dual::(key1, key2, value).unwrap(); + } + writer.raw_commit().unwrap(); + } + + let reader = store.reader().unwrap(); + let mut cursor = reader.cursor(DualTestTable::NAME).unwrap(); + + // Test last_of_k1 for k1=1 should return k2=30 + let result = DualTableTraverse::::last_of_k1(&mut cursor, &1u64).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 1u64); + assert_eq!(k2, 30u32); + assert_eq!(value, Bytes::from_static(b"v1_30")); + + // Test last_of_k1 for k1=2 should return k2=200 + let result = DualTableTraverse::::last_of_k1(&mut cursor, &2u64).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 2u64); + assert_eq!(k2, 200u32); + assert_eq!(value, Bytes::from_static(b"v2_200")); + + // Test last_of_k1 for k1=3 should return k2=1000 (only entry) + let result = DualTableTraverse::::last_of_k1(&mut cursor, &3u64).unwrap(); + assert!(result.is_some()); + let (k1, k2, _) = result.unwrap(); + assert_eq!(k1, 3u64); + assert_eq!(k2, 1000u32); + + // Test last_of_k1 for non-existent k1=999 should return None + let result = + DualTableTraverse::::last_of_k1(&mut cursor, &999u64).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn test_dual_key_previous_k1() { + let store = MemKv::new(); + + let dual_data = vec![ + (1u64, 10u32, Bytes::from_static(b"v1_10")), + (1u64, 20u32, Bytes::from_static(b"v1_20")), + (1u64, 30u32, Bytes::from_static(b"v1_30")), + (2u64, 100u32, Bytes::from_static(b"v2_100")), + (2u64, 200u32, Bytes::from_static(b"v2_200")), + (3u64, 1000u32, Bytes::from_static(b"v3_1000")), + ]; + + { + let writer = store.writer().unwrap(); + for (key1, key2, value) in &dual_data { + writer.queue_put_dual::(key1, key2, value).unwrap(); + } + writer.raw_commit().unwrap(); + } + + let reader = store.reader().unwrap(); + let mut cursor = reader.cursor(DualTestTable::NAME).unwrap(); + + // Position at k1=3, k2=1000 + let _ = DualTableTraverse::::next_dual_above(&mut cursor, &3u64, &0u32) + .unwrap(); + + // previous_k1 should return last entry of k1=2 (k2=200) + let result = DualTableTraverse::::previous_k1(&mut cursor).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 2u64); + assert_eq!(k2, 200u32); + assert_eq!(value, Bytes::from_static(b"v2_200")); + + // previous_k1 again should return last entry of k1=1 (k2=30) + let result = DualTableTraverse::::previous_k1(&mut cursor).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 1u64); + assert_eq!(k2, 30u32); + assert_eq!(value, Bytes::from_static(b"v1_30")); + + // previous_k1 again should return None (no k1 before 1) + let result = DualTableTraverse::::previous_k1(&mut cursor).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn test_dual_key_previous_k2() { + let store = MemKv::new(); + + let dual_data = vec![ + (1u64, 10u32, Bytes::from_static(b"v1_10")), + (1u64, 20u32, Bytes::from_static(b"v1_20")), + (1u64, 30u32, Bytes::from_static(b"v1_30")), + (2u64, 100u32, Bytes::from_static(b"v2_100")), + ]; + + { + let writer = store.writer().unwrap(); + for (key1, key2, value) in &dual_data { + writer.queue_put_dual::(key1, key2, value).unwrap(); + } + writer.raw_commit().unwrap(); + } + + let reader = store.reader().unwrap(); + let mut cursor = reader.cursor(DualTestTable::NAME).unwrap(); + + // Position at last of k1=1 (k2=30) + let _ = DualTableTraverse::::last_of_k1(&mut cursor, &1u64).unwrap(); + + // previous_k2 should return k2=20 + let result = DualTableTraverse::::previous_k2(&mut cursor).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 1u64); + assert_eq!(k2, 20u32); + assert_eq!(value, Bytes::from_static(b"v1_20")); + + // previous_k2 should return k2=10 + let result = DualTableTraverse::::previous_k2(&mut cursor).unwrap(); + assert!(result.is_some()); + let (k1, k2, value) = result.unwrap(); + assert_eq!(k1, 1u64); + assert_eq!(k2, 10u32); + assert_eq!(value, Bytes::from_static(b"v1_10")); + + // previous_k2 should return None (no k2 before 10 for k1=1) + let result = DualTableTraverse::::previous_k2(&mut cursor).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn test_dual_key_backward_edge_cases() { + let store = MemKv::new(); + + let dual_data = vec![ + (1u64, 10u32, Bytes::from_static(b"v1_10")), + (2u64, 100u32, Bytes::from_static(b"v2_100")), + ]; + + { + let writer = store.writer().unwrap(); + for (key1, key2, value) in &dual_data { + writer.queue_put_dual::(key1, key2, value).unwrap(); + } + writer.raw_commit().unwrap(); + } + + let reader = store.reader().unwrap(); + let mut cursor = reader.cursor(DualTestTable::NAME).unwrap(); + + // Position at first entry (k1=1, k2=10) + let _ = DualTableTraverse::::next_dual_above(&mut cursor, &1u64, &10u32) + .unwrap(); + + // previous_k2 at first entry of k1 should return None + let result = DualTableTraverse::::previous_k2(&mut cursor).unwrap(); + assert!(result.is_none()); + + // previous_k1 at first k1 should return None + let result = DualTableTraverse::::previous_k1(&mut cursor).unwrap(); + assert!(result.is_none()); + } } diff --git a/crates/storage/src/hot/impls/mod.rs b/crates/storage/src/hot/impls/mod.rs index 78c38a4..255f73a 100644 --- a/crates/storage/src/hot/impls/mod.rs +++ b/crates/storage/src/hot/impls/mod.rs @@ -1,320 +1,5 @@ -/// An in-memory key-value store implementation. +#[cfg(any(test, feature = "in-mem"))] pub mod mem; -/// MDBX-backed key-value store implementation. +#[cfg(feature = "mdbx")] pub mod mdbx; - -#[cfg(test)] -mod test { - use crate::{ - hot::{ - mem, - model::{HotDbRead, HotDbWrite, HotHistoryRead, HotHistoryWrite, HotKv, HotKvWrite}, - }, - tables::hot, - }; - use alloy::primitives::{B256, Bytes, U256, address, b256}; - use reth::primitives::{Account, Bytecode, Header, SealedHeader}; - use reth_db::{ - BlockNumberList, ClientVersion, mdbx::DatabaseArguments, test_utils::tempdir_path, - }; - use reth_libmdbx::MaxReadTransactionDuration; - - #[test] - fn mem_conformance() { - let hot_kv = mem::MemKv::new(); - conformance(&hot_kv); - } - - #[test] - fn mdbx_conformance() { - let path = tempdir_path(); - let db = reth_db::create_db( - &path, - DatabaseArguments::new(ClientVersion::default()) - .with_max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), - ) - .unwrap(); - - // Create tables from the `crate::tables::hot` module - let mut writer = db.writer().unwrap(); - - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - - writer.commit().expect("Failed to commit table creation"); - - conformance(&db); - } - - fn conformance(hot_kv: &T) { - test_header_roundtrip(hot_kv); - test_account_roundtrip(hot_kv); - test_storage_roundtrip(hot_kv); - test_bytecode_roundtrip(hot_kv); - test_account_history(hot_kv); - test_storage_history(hot_kv); - test_account_changes(hot_kv); - test_storage_changes(hot_kv); - test_missing_reads(hot_kv); - } - - /// Test writing and reading headers via HotDbWrite/HotDbRead - fn test_header_roundtrip(hot_kv: &T) { - let header = Header { number: 42, gas_limit: 1_000_000, ..Default::default() }; - let sealed = SealedHeader::seal_slow(header.clone()); - let hash = sealed.hash(); - - // Write header - { - let mut writer = hot_kv.writer().unwrap(); - writer.put_header(&sealed).unwrap(); - writer.commit().unwrap(); - } - - // Read header by number - { - let reader = hot_kv.reader().unwrap(); - let read_header = reader.get_header(42).unwrap(); - assert!(read_header.is_some()); - assert_eq!(read_header.unwrap().number, 42); - } - - // Read header number by hash - { - let reader = hot_kv.reader().unwrap(); - let read_number = reader.get_header_number(&hash).unwrap(); - assert!(read_number.is_some()); - assert_eq!(read_number.unwrap(), 42); - } - - // Read header by hash - { - let reader = hot_kv.reader().unwrap(); - let read_header = reader.header_by_hash(&hash).unwrap(); - assert!(read_header.is_some()); - assert_eq!(read_header.unwrap().number, 42); - } - } - - /// Test writing and reading accounts via HotDbWrite/HotDbRead - fn test_account_roundtrip(hot_kv: &T) { - let addr = address!("0x1234567890123456789012345678901234567890"); - let account = - Account { nonce: 5, balance: U256::from(1000), bytecode_hash: Some(B256::ZERO) }; - - // Write account - { - let mut writer = hot_kv.writer().unwrap(); - writer.put_account(&addr, &account).unwrap(); - writer.commit().unwrap(); - } - - // Read account - { - let reader = hot_kv.reader().unwrap(); - let read_account = reader.get_account(&addr).unwrap(); - assert!(read_account.is_some()); - let read_account = read_account.unwrap(); - assert_eq!(read_account.nonce, 5); - assert_eq!(read_account.balance, U256::from(1000)); - } - } - - /// Test writing and reading storage via HotDbWrite/HotDbRead - fn test_storage_roundtrip(hot_kv: &T) { - let addr = address!("0xabcdef0123456789abcdef0123456789abcdef01"); - let slot = b256!("0x0000000000000000000000000000000000000000000000000000000000000001"); - let value = U256::from(999); - - // Write storage - { - let mut writer = hot_kv.writer().unwrap(); - writer.put_storage(&addr, &slot, &value).unwrap(); - writer.commit().unwrap(); - } - - // Read storage - { - let reader = hot_kv.reader().unwrap(); - let read_value = reader.get_storage(&addr, &slot).unwrap(); - assert!(read_value.is_some()); - assert_eq!(read_value.unwrap(), U256::from(999)); - } - - // Read storage entry - { - let reader = hot_kv.reader().unwrap(); - let read_entry = reader.get_storage_entry(&addr, &slot).unwrap(); - assert!(read_entry.is_some()); - let entry = read_entry.unwrap(); - assert_eq!(entry.key, slot); - assert_eq!(entry.value, U256::from(999)); - } - } - - /// Test writing and reading bytecode via HotDbWrite/HotDbRead - fn test_bytecode_roundtrip(hot_kv: &T) { - let code = Bytes::from_static(&[0x60, 0x00, 0x60, 0x00, 0xf3]); // Simple EVM bytecode - let bytecode = Bytecode::new_raw(code); - let code_hash = bytecode.hash_slow(); - - // Write bytecode - { - let mut writer = hot_kv.writer().unwrap(); - writer.put_bytecode(&code_hash, &bytecode).unwrap(); - writer.commit().unwrap(); - } - - // Read bytecode - { - let reader = hot_kv.reader().unwrap(); - let read_bytecode = reader.get_bytecode(&code_hash).unwrap(); - assert!(read_bytecode.is_some()); - } - } - - /// Test account history via HotHistoryWrite/HotHistoryRead - fn test_account_history(hot_kv: &T) { - let addr = address!("0x1111111111111111111111111111111111111111"); - let touched_blocks = BlockNumberList::new([10, 20, 30]).unwrap(); - let latest_height = 100u64; - - // Write account history - { - let mut writer = hot_kv.writer().unwrap(); - writer.write_account_history(&addr, latest_height, &touched_blocks).unwrap(); - writer.commit().unwrap(); - } - - // Read account history - { - let reader = hot_kv.reader().unwrap(); - let read_history = reader.get_account_history(&addr, latest_height).unwrap(); - assert!(read_history.is_some()); - let history = read_history.unwrap(); - assert_eq!(history.iter().collect::>(), vec![10, 20, 30]); - } - } - - /// Test storage history via HotHistoryWrite/HotHistoryRead - fn test_storage_history(hot_kv: &T) { - let addr = address!("0x2222222222222222222222222222222222222222"); - let slot = b256!("0x0000000000000000000000000000000000000000000000000000000000000042"); - let touched_blocks = BlockNumberList::new([5, 15, 25]).unwrap(); - let highest_block = 50u64; - - // Write storage history - { - let mut writer = hot_kv.writer().unwrap(); - writer.write_storage_history(&addr, slot, highest_block, &touched_blocks).unwrap(); - writer.commit().unwrap(); - } - - // Read storage history - { - let reader = hot_kv.reader().unwrap(); - let read_history = reader.get_storage_history(&addr, slot, highest_block).unwrap(); - assert!(read_history.is_some()); - let history = read_history.unwrap(); - assert_eq!(history.iter().collect::>(), vec![5, 15, 25]); - } - } - - /// Test account change sets via HotHistoryWrite/HotHistoryRead - fn test_account_changes(hot_kv: &T) { - let addr = address!("0x3333333333333333333333333333333333333333"); - let pre_state = Account { nonce: 10, balance: U256::from(5000), bytecode_hash: None }; - let block_number = 100u64; - - // Write account change - { - let mut writer = hot_kv.writer().unwrap(); - writer.write_account_change(block_number, addr, &pre_state).unwrap(); - writer.commit().unwrap(); - } - - // Read account change - { - let reader = hot_kv.reader().unwrap(); - - let read_change = reader.get_account_change(block_number, &addr).unwrap(); - - assert!(read_change.is_some()); - let change = read_change.unwrap(); - assert_eq!(change.nonce, 10); - assert_eq!(change.balance, U256::from(5000)); - } - } - - /// Test storage change sets via HotHistoryWrite/HotHistoryRead - fn test_storage_changes(hot_kv: &T) { - let addr = address!("0x4444444444444444444444444444444444444444"); - let slot = b256!("0x0000000000000000000000000000000000000000000000000000000000000099"); - let pre_value = U256::from(12345); - let block_number = 200u64; - - // Write storage change - { - let mut writer = hot_kv.writer().unwrap(); - writer.write_storage_change(block_number, addr, &slot, &pre_value).unwrap(); - writer.commit().unwrap(); - } - - // Read storage change - { - let reader = hot_kv.reader().unwrap(); - let read_change = reader.get_storage_change(block_number, &addr, &slot).unwrap(); - assert!(read_change.is_some()); - assert_eq!(read_change.unwrap(), U256::from(12345)); - } - } - - /// Test that missing reads return None - fn test_missing_reads(hot_kv: &T) { - let missing_addr = address!("0x9999999999999999999999999999999999999999"); - let missing_hash = - b256!("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); - let missing_slot = - b256!("0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"); - - let reader = hot_kv.reader().unwrap(); - - // Missing header - assert!(reader.get_header(999999).unwrap().is_none()); - - // Missing header number - assert!(reader.get_header_number(&missing_hash).unwrap().is_none()); - - // Missing account - assert!(reader.get_account(&missing_addr).unwrap().is_none()); - - // Missing storage - assert!(reader.get_storage(&missing_addr, &missing_slot).unwrap().is_none()); - - // Missing bytecode - assert!(reader.get_bytecode(&missing_hash).unwrap().is_none()); - - // Missing header by hash - assert!(reader.header_by_hash(&missing_hash).unwrap().is_none()); - - // Missing account history - assert!(reader.get_account_history(&missing_addr, 1000).unwrap().is_none()); - - // Missing storage history - assert!(reader.get_storage_history(&missing_addr, missing_slot, 1000).unwrap().is_none()); - - // Missing account change - assert!(reader.get_account_change(999999, &missing_addr).unwrap().is_none()); - - // Missing storage change - assert!(reader.get_storage_change(999999, &missing_addr, &missing_slot).unwrap().is_none()); - } -} diff --git a/crates/storage/src/hot/mdbx.rs b/crates/storage/src/hot/mdbx.rs deleted file mode 100644 index 1414e41..0000000 --- a/crates/storage/src/hot/mdbx.rs +++ /dev/null @@ -1,658 +0,0 @@ -use crate::{ - hot::{HotKv, HotKvError, HotKvRead, HotKvReadError, HotKvWrite}, - ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}, - tables::{DualKeyed, MAX_FIXED_VAL_SIZE}, -}; -use bytes::{BufMut, BytesMut}; -use reth_db::{ - Database, DatabaseEnv, - mdbx::{RW, TransactionKind, WriteFlags, tx::Tx}, -}; -use reth_db_api::DatabaseError; -use reth_libmdbx::RO; -use std::borrow::Cow; - -/// Error type for reth-libmdbx based hot storage. -#[derive(Debug, thiserror::Error)] -pub enum MdbxError { - /// Inner error - #[error(transparent)] - Mdbx(#[from] reth_libmdbx::Error), - - /// Reth error. - #[error(transparent)] - Reth(#[from] DatabaseError), - - /// Deser. - #[error(transparent)] - Deser(#[from] DeserError), -} - -impl HotKvReadError for MdbxError { - fn into_hot_kv_error(self) -> HotKvError { - match self { - MdbxError::Mdbx(e) => HotKvError::from_err(e), - MdbxError::Deser(e) => HotKvError::Deser(e), - MdbxError::Reth(e) => HotKvError::from_err(e), - } - } -} - -impl From for DatabaseError { - fn from(value: DeserError) -> Self { - DatabaseError::Other(value.to_string()) - } -} - -impl HotKv for DatabaseEnv { - type RoTx = Tx; - type RwTx = Tx; - - fn reader(&self) -> Result { - self.tx().map_err(HotKvError::from_err) - } - - fn writer(&self) -> Result { - self.tx_mut().map_err(HotKvError::from_err) - } -} - -impl HotKvRead for Tx -where - K: TransactionKind, -{ - type Error = MdbxError; - - fn raw_get<'a>( - &'a self, - table: &str, - key: &[u8], - ) -> Result>, Self::Error> { - let dbi = self.inner.open_db(Some(table)).map(|db| db.dbi())?; - - self.inner.get(dbi, key.as_ref()).map_err(MdbxError::Mdbx) - } - - fn raw_get_dual<'a>( - &'a self, - _table: &str, - _key1: &[u8], - _key2: &[u8], - ) -> Result>, Self::Error> { - unimplemented!("Not implemented: raw_get_dual. Use get_dual instead."); - } - - fn get_dual( - &self, - key1: &T::Key, - key2: &T::Key2, - ) -> Result, Self::Error> { - let mut key1_buf = [0u8; MAX_KEY_SIZE]; - let key1_bytes = key1.encode_key(&mut key1_buf); - - // K2 slice must be EXACTLY the size of the fixed value size, if the - // table has one. This is a bit ugly, and results in an extra - // allocation for fixed-size values. This could be avoided using - // max value size. - let value_bytes = if T::IS_FIXED_VAL { - let buf = [0u8; MAX_KEY_SIZE + MAX_FIXED_VAL_SIZE]; - let _ = key2.encode_key(&mut buf[..MAX_KEY_SIZE].try_into().unwrap()); - - let kv_size = ::SIZE + T::FIXED_VAL_SIZE.unwrap(); - - let db = self.inner.open_db(Some(T::NAME))?; - let mut cursor = self.inner.cursor(&db).map_err(MdbxError::Mdbx)?; - cursor.get_both_range(key1_bytes, &buf[..kv_size]).map_err(MdbxError::Mdbx) - } else { - let mut buf = [0u8; MAX_KEY_SIZE]; - let encoded = key2.encode_key(&mut buf); - - let db = self.inner.open_db(Some(T::NAME))?; - let mut cursor = self.inner.cursor(&db).map_err(MdbxError::Mdbx)?; - cursor.get_both_range::>(key1_bytes, encoded).map_err(MdbxError::Mdbx) - }; - - let Some(value_bytes) = value_bytes? else { - return Ok(None); - }; - // we need to strip the key2 prefix from the value bytes before decoding - let value_bytes = &value_bytes[<::Key2 as KeySer>::SIZE..]; - - T::Value::decode_value(value_bytes).map(Some).map_err(Into::into) - } -} - -impl HotKvWrite for Tx { - fn queue_raw_put(&mut self, table: &str, key: &[u8], value: &[u8]) -> Result<(), Self::Error> { - let dbi = self.inner.open_db(Some(table)).map(|db| db.dbi())?; - - self.inner.put(dbi, key, value, WriteFlags::UPSERT).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_put_dual( - &mut self, - _table: &str, - _key1: &[u8], - _key2: &[u8], - _value: &[u8], - ) -> Result<(), Self::Error> { - unimplemented!("Not implemented: queue_raw_put_dual. Use queue_put_dual instead."); - } - - // Specialized put for dual-keyed tables. - fn queue_put_dual( - &mut self, - key1: &T::Key, - key2: &T::Key2, - value: &T::Value, - ) -> Result<(), Self::Error> { - let k2_size = ::SIZE; - let mut scratch = [0u8; MAX_KEY_SIZE]; - - // This will be the total length of key2 + value, reserved in mdbx - let encoded_len = k2_size + value.encoded_size(); - - // Prepend the value with k2. - let mut buf = BytesMut::with_capacity(encoded_len); - let encoded_k2 = key2.encode_key(&mut scratch); - buf.put_slice(encoded_k2); - value.encode_value_to(&mut buf); - - let encoded_k1 = key1.encode_key(&mut scratch); - // NB: DUPSORT and RESERVE are incompatible :( - let db = self.inner.open_db(Some(T::NAME))?; - self.inner.put(db.dbi(), encoded_k1, &buf, Default::default())?; - - Ok(()) - } - - fn queue_raw_delete(&mut self, table: &str, key: &[u8]) -> Result<(), Self::Error> { - let dbi = self.inner.open_db(Some(table)).map(|db| db.dbi())?; - - self.inner.del(dbi, key, None).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_clear(&mut self, table: &str) -> Result<(), Self::Error> { - // Future: port more of reth's db env with dbi caching to avoid - // repeated open_db calls - let dbi = self.inner.open_db(Some(table)).map(|db| db.dbi())?; - self.inner.clear_db(dbi).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn queue_raw_create( - &mut self, - table: &str, - dual_key: bool, - fixed_val: bool, - ) -> Result<(), Self::Error> { - let mut flags = Default::default(); - - if dual_key { - flags |= reth_libmdbx::DatabaseFlags::DUP_SORT; - if fixed_val { - flags |= reth_libmdbx::DatabaseFlags::DUP_FIXED; - } - } - - self.inner.create_db(Some(table), flags).map(|_| ()).map_err(MdbxError::Mdbx) - } - - fn raw_commit(self) -> Result<(), Self::Error> { - // when committing, mdbx returns true on failure - self.inner.commit().map(drop).map_err(MdbxError::Mdbx) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - hot::{HotDbWrite, HotKv, HotKvRead, HotKvWrite}, - tables::hot, - }; - use alloy::{ - consensus::Sealed, - primitives::{Address, B256, BlockNumber, U256}, - }; - use reth::primitives::{Account, Bytecode, Header}; - use reth_db::DatabaseEnv; - - /// A test database wrapper that automatically cleans up on drop - struct TestDb { - db: DatabaseEnv, - #[allow(dead_code)] - temp_dir: tempfile::TempDir, - } - - impl std::ops::Deref for TestDb { - type Target = DatabaseEnv; - - fn deref(&self) -> &Self::Target { - &self.db - } - } - - /// Create a temporary MDBX database for testing that will be automatically cleaned up - fn create_test_db() -> TestDb { - let temp_dir = tempfile::tempdir().expect("Failed to create temp directory"); - - // Create the database - let db = reth_db::create_db(&temp_dir, Default::default()).unwrap(); - - // Create tables from the `crate::tables::hot` module - let mut writer = db.writer().unwrap(); - - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_create::().unwrap(); - - writer.commit().expect("Failed to commit table creation"); - - TestDb { db, temp_dir } - } - - /// Create test data - fn create_test_account() -> (Address, Account) { - let address = Address::from_slice(&[0x1; 20]); - let account = Account { - nonce: 42, - balance: U256::from(1000u64), - bytecode_hash: Some(B256::from_slice(&[0x2; 32])), - }; - (address, account) - } - - fn create_test_bytecode() -> (B256, Bytecode) { - let hash = B256::from_slice(&[0x2; 32]); - let code = reth::primitives::Bytecode::new_raw(vec![0x60, 0x80, 0x60, 0x40].into()); - (hash, code) - } - - fn create_test_header() -> (BlockNumber, Header) { - let block_number = 12345; - let header = Header { - number: block_number, - gas_limit: 8000000, - gas_used: 100000, - timestamp: 1640995200, - parent_hash: B256::from_slice(&[0x3; 32]), - state_root: B256::from_slice(&[0x4; 32]), - ..Default::default() - }; - (block_number, header) - } - - #[test] - fn test_hotkv_basic_operations() { - let db = create_test_db(); - let (address, account) = create_test_account(); - let (hash, bytecode) = create_test_bytecode(); - - // Test HotKv::writer() and basic write operations - { - let mut writer: Tx = db.writer().unwrap(); - - // Create tables first - writer.queue_create::().unwrap(); - - // Write account data - writer.queue_put::(&address, &account).unwrap(); - writer.queue_put::(&hash, &bytecode).unwrap(); - - // Commit the transaction - writer.raw_commit().unwrap(); - } - - // Test HotKv::reader() and basic read operations - { - let reader: Tx = db.reader().unwrap(); - - // Read account data - let read_account: Option = - reader.get::(&address).unwrap(); - assert_eq!(read_account, Some(account)); - - // Read bytecode - let read_bytecode: Option = reader.get::(&hash).unwrap(); - assert_eq!(read_bytecode, Some(bytecode)); - - // Test non-existent data - let nonexistent_addr = Address::from_slice(&[0xff; 20]); - let nonexistent_account: Option = - reader.get::(&nonexistent_addr).unwrap(); - assert_eq!(nonexistent_account, None); - } - } - - #[test] - fn test_raw_operations() { - let db = create_test_db(); - - let table_name = "test_table"; - let key = b"test_key"; - let value = b"test_value"; - - // Test raw write operations - { - let mut writer: Tx = db.writer().unwrap(); - - // Create table - writer.queue_raw_create(table_name, false, false).unwrap(); - - // Put raw data - writer.queue_raw_put(table_name, key, value).unwrap(); - - writer.raw_commit().unwrap(); - } - - // Test raw read operations - { - let reader: Tx = db.reader().unwrap(); - - let read_value = reader.raw_get(table_name, key).unwrap(); - assert_eq!(read_value.as_deref(), Some(value.as_slice())); - - // Test non-existent key - let nonexistent = reader.raw_get(table_name, b"nonexistent").unwrap(); - assert_eq!(nonexistent, None); - } - - // Test raw delete - { - let mut writer: Tx = db.writer().unwrap(); - - writer.queue_raw_delete(table_name, key).unwrap(); - writer.raw_commit().unwrap(); - } - - // Verify deletion - { - let reader: Tx = db.reader().unwrap(); - let deleted_value = reader.raw_get(table_name, key).unwrap(); - assert_eq!(deleted_value, None); - } - } - - #[test] - fn test_dual_keyed_operations() { - let db = create_test_db(); - - let address = Address::from_slice(&[0x1; 20]); - let storage_key = B256::from_slice(&[0x5; 32]); - let storage_value = U256::from(999u64); - - // Test dual-keyed table operations - { - let mut writer: Tx = db.writer().unwrap(); - - // Put storage data using dual keys - writer - .queue_put_dual::(&address, &storage_key, &storage_value) - .unwrap(); - - writer.raw_commit().unwrap(); - } - - // Test reading dual-keyed data - { - let reader: Tx = db.reader().unwrap(); - - // Read storage using dual key lookup - let read_value = - reader.get_dual::(&address, &storage_key).unwrap().unwrap(); - - assert_eq!(read_value, storage_value); - } - } - - #[test] - fn test_table_management() { - let db = create_test_db(); - - // Add some data - let (block_number, header) = create_test_header(); - { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&block_number, &header).unwrap(); - writer.raw_commit().unwrap(); - } - - // Verify data exists - { - let reader: Tx = db.reader().unwrap(); - let read_header: Option
= reader.get::(&block_number).unwrap(); - assert_eq!(read_header, Some(header.clone())); - } - - // Clear the table - { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_clear::().unwrap(); - writer.raw_commit().unwrap(); - } - - // Verify table is empty - { - let reader: Tx = db.reader().unwrap(); - let read_header: Option
= reader.get::(&block_number).unwrap(); - assert_eq!(read_header, None); - } - } - - #[test] - fn test_batch_operations() { - let db = create_test_db(); - - // Create test data - let accounts: Vec<(Address, Account)> = (0..10) - .map(|i| { - let mut addr_bytes = [0u8; 20]; - addr_bytes[19] = i; - let address = Address::from_slice(&addr_bytes); - let account = Account { - nonce: i.into(), - balance: U256::from((i as u64) * 100), - bytecode_hash: None, - }; - (address, account) - }) - .collect(); - - // Test batch writes - { - let mut writer: Tx = db.writer().unwrap(); - - // Write multiple accounts - for (address, account) in &accounts { - writer.queue_put::(address, account).unwrap(); - } - - writer.raw_commit().unwrap(); - } - - // Test batch reads - { - let reader: Tx = db.reader().unwrap(); - - for (address, expected_account) in &accounts { - let read_account: Option = - reader.get::(address).unwrap(); - assert_eq!(read_account.as_ref(), Some(expected_account)); - } - } - - // Test batch get_many - { - let reader: Tx = db.reader().unwrap(); - let addresses: Vec
= accounts.iter().map(|(addr, _)| *addr).collect(); - let read_accounts: Vec<(_, Option)> = - reader.get_many::(addresses.iter()).unwrap(); - - for (i, (_, expected_account)) in accounts.iter().enumerate() { - assert_eq!(read_accounts[i].1.as_ref(), Some(expected_account)); - } - } - } - - #[test] - fn test_transaction_isolation() { - let db = create_test_db(); - let (address, account) = create_test_account(); - - // Setup initial data - { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&address, &account).unwrap(); - writer.raw_commit().unwrap(); - } - - // Start a reader transaction - let reader: Tx = db.reader().unwrap(); - - // Modify data in a writer transaction - { - let mut writer: Tx = db.writer().unwrap(); - let modified_account = - Account { nonce: 999, balance: U256::from(9999u64), bytecode_hash: None }; - writer.queue_put::(&address, &modified_account).unwrap(); - writer.raw_commit().unwrap(); - } - - // Reader should still see original data (snapshot isolation) - { - let read_account: Option = - reader.get::(&address).unwrap(); - assert_eq!(read_account, Some(account)); - } - - // New reader should see modified data - { - let new_reader: Tx = db.reader().unwrap(); - let read_account: Option = - new_reader.get::(&address).unwrap(); - assert_eq!(read_account.unwrap().nonce, 999); - } - } - - #[test] - fn test_multiple_readers() { - let db = create_test_db(); - let (address, account) = create_test_account(); - - // Setup data - { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_put::(&address, &account).unwrap(); - writer.raw_commit().unwrap(); - } - - // Create multiple readers - let reader1: Tx = db.reader().unwrap(); - let reader2: Tx = db.reader().unwrap(); - let reader3: Tx = db.reader().unwrap(); - - // All readers should see the same data - let account1: Option = reader1.get::(&address).unwrap(); - let account2: Option = reader2.get::(&address).unwrap(); - let account3: Option = reader3.get::(&address).unwrap(); - - assert_eq!(account1, Some(account)); - assert_eq!(account2, Some(account)); - assert_eq!(account3, Some(account)); - } - - #[test] - fn test_error_handling() { - let db = create_test_db(); - - // Test reading from non-existent table - { - let reader: Tx = db.reader().unwrap(); - let result = reader.raw_get("nonexistent_table", b"key"); - - // Should handle gracefully (may return None or error depending on MDBX behavior) - match result { - Ok(None) => {} // This is fine - Err(_) => {} // This is also acceptable for non-existent table - Ok(Some(_)) => panic!("Should not return data for non-existent table"), - } - } - - // Test writing to a table without creating it first - { - let mut writer: Tx = db.writer().unwrap(); - let (address, account) = create_test_account(); - - // This should handle the case where table doesn't exist - let result = writer.queue_put::(&address, &account); - match result { - Ok(_) => { - // If it succeeds, commit should work - writer.raw_commit().unwrap(); - } - Err(_) => { - // If it fails, that's expected behavior - } - } - } - } - - #[test] - fn test_serialization_roundtrip() { - let db = create_test_db(); - - // Test various data types - let (block_number, header) = create_test_header(); - let header = Sealed::new(header); - - { - let mut writer: Tx = db.writer().unwrap(); - - // Write different types - writer.queue_put::(&block_number, &header).unwrap(); - writer.queue_put::(&header.hash(), &block_number).unwrap(); - - writer.raw_commit().unwrap(); - } - - { - let reader: Tx = db.reader().unwrap(); - - // Read and verify - let read_header: Option
= reader.get::(&block_number).unwrap(); - assert_eq!(read_header.as_ref(), Some(header.inner())); - - let read_hash: Option = reader.get::(&header.hash()).unwrap(); - assert_eq!(read_hash, Some(header.number)); - } - } - - #[test] - fn test_large_data() { - let db = create_test_db(); - - // Create a large bytecode - let hash = B256::from_slice(&[0x8; 32]); - let large_code_vec: Vec = (0..10000).map(|i| (i % 256) as u8).collect(); - let large_bytecode = Bytecode::new_raw(large_code_vec.clone().into()); - - { - let mut writer: Tx = db.writer().unwrap(); - writer.queue_create::().unwrap(); - writer.queue_put::(&hash, &large_bytecode).unwrap(); - writer.raw_commit().unwrap(); - } - - { - let reader: Tx = db.reader().unwrap(); - let read_bytecode: Option = reader.get::(&hash).unwrap(); - assert_eq!(read_bytecode, Some(large_bytecode)); - } - } -} diff --git a/crates/storage/src/hot/mod.rs b/crates/storage/src/hot/mod.rs index 4e73f9c..9001aab 100644 --- a/crates/storage/src/hot/mod.rs +++ b/crates/storage/src/hot/mod.rs @@ -1,5 +1,71 @@ -/// Hot storage models and traits. +//! Hot storage module. +//! +//! Hot storage is designed for fast read and write access to frequently used +//! data. It provides abstractions and implementations for key-value storage +//! backends. +//! +//! ## Serialization +//! +//! Hot storage is opinionated with respect to serialization. Each table defines +//! the key and value types it uses, and these types must implement the +//! appropriate serialization traits. See the [`KeySer`] and [`ValSer`] traits +//! for more information. +//! +//! # Trait Model +//! +//! The hot storage module defines a set of traits to abstract over different +//! hot storage backends. The primary traits are: +//! +//! - [`HotKvRead`]: for transactional read-only access to hot storage. +//! - [`HotKvWrite`]: for transactional read-write access to hot storage. +//! - [`HotKv`]: for creating read and write transactions. +//! +//! These traits provide methods for common operations such as getting, +//! setting, and deleting key-value pairs in hot storage tables. The raw +//! key-value operations use byte slices for maximum flexibility. The +//! [`HistoryRead`] and [`HistoryWrite`] traits provide higher-level +//! abstractions that work with the predefined tables and their associated key +//! and value types. +//! +//! See the [`model`] module documentation for more details on the traits and +//! their usage. +//! +//! ## Tables +//! +//! Hot storage tables are predefined in the [`tables`] module. Each table +//! defines the key and value types it uses, along with serialization logic. +//! The [`Table`] and [`DualKey`] traits define the interface for tables. +//! The [`SingleKey`] trait is a marker for tables with single keys. +//! +//! See the [`Table`] trait documentation for more information on defining and +//! using tables. +//! +//! [`HistoryRead`]: db::HistoryRead +//! [`HistoryWrite`]: db::HistoryWrite +//! [`HotKvRead`]: model::HotKvRead +//! [`HotKvWrite`]: model::HotKvWrite +//! [`HotKv`]: model::HotKv +//! [`DualKey`]: tables::DualKey +//! [`SingleKey`]: tables::SingleKey +//! [`Table`]: tables::Table + +/// Conformance tests for hot storage backends. +#[cfg(any(test, feature = "test-utils"))] +pub mod conformance; + +pub mod db; +pub use db::{HistoryError, HistoryRead, HistoryWrite}; + pub mod model; +pub use model::HotKv; + +/// Implementations of hot storage backends. +#[cfg(feature = "impls")] +pub mod impls; + +/// Serialization module. +pub mod ser; +pub use ser::{DeserError, KeySer, MAX_FIXED_VAL_SIZE, MAX_KEY_SIZE, ValSer}; -mod impls; -pub use impls::{mdbx, mem}; +/// Predefined tables module. +pub mod tables; diff --git a/crates/storage/src/hot/model/db_traits.rs b/crates/storage/src/hot/model/db_traits.rs deleted file mode 100644 index 9b08f49..0000000 --- a/crates/storage/src/hot/model/db_traits.rs +++ /dev/null @@ -1,235 +0,0 @@ -use crate::{ - hot::model::{HotKvRead, HotKvWrite}, - tables::hot::{self as tables}, -}; -use alloy::primitives::{Address, B256, U256}; -use reth::primitives::{Account, Bytecode, Header, SealedHeader, StorageEntry}; -use reth_db::{BlockNumberList, models::BlockNumberAddress}; -use reth_db_api::models::ShardedKey; - -/// Trait for database read operations. -pub trait HotDbRead: HotKvRead + sealed::Sealed { - /// Read a block header by its number. - fn get_header(&self, number: u64) -> Result, Self::Error> { - self.get::(&number) - } - - /// Read a block number by its hash. - fn get_header_number(&self, hash: &B256) -> Result, Self::Error> { - self.get::(hash) - } - - /// Read contract Bytecode by its hash. - fn get_bytecode(&self, code_hash: &B256) -> Result, Self::Error> { - self.get::(code_hash) - } - - /// Read an account by its address. - fn get_account(&self, address: &Address) -> Result, Self::Error> { - self.get::(address) - } - - /// Read a storage slot by its address and key. - fn get_storage(&self, address: &Address, key: &B256) -> Result, Self::Error> { - self.get_dual::(address, key) - } - - /// Read a [`StorageEntry`] by its address and key. - fn get_storage_entry( - &self, - address: &Address, - key: &B256, - ) -> Result, Self::Error> { - let opt = self.get_storage(address, key)?; - Ok(opt.map(|value| StorageEntry { key: *key, value })) - } - - /// Read a block header by its hash. - fn header_by_hash(&self, hash: &B256) -> Result, Self::Error> { - let Some(number) = self.get_header_number(hash)? else { - return Ok(None); - }; - self.get_header(number) - } -} - -impl HotDbRead for T where T: HotKvRead {} - -/// Trait for database write operations. This trait is low-level, and usage may -/// leave the database in an inconsistent state if not used carefully. Users -/// should prefer [`HotHistoryWrite`] or higher-level abstractions when -/// possible. -pub trait HotDbWrite: HotKvWrite + sealed::Sealed { - /// Write a block header. This will leave the DB in an inconsistent state - /// until the corresponding header number is also written. Users should - /// prefer [`Self::put_header`] instead. - fn put_header_inconsistent(&mut self, header: &Header) -> Result<(), Self::Error> { - self.queue_put::(&header.number, header) - } - - /// Write a block number by its hash. This will leave the DB in an - /// inconsistent state until the corresponding header is also written. - /// Users should prefer [`Self::put_header`] instead. - fn put_header_number_inconsistent( - &mut self, - hash: &B256, - number: u64, - ) -> Result<(), Self::Error> { - self.queue_put::(hash, &number) - } - - /// Write contract Bytecode by its hash. - fn put_bytecode(&mut self, code_hash: &B256, bytecode: &Bytecode) -> Result<(), Self::Error> { - self.queue_put::(code_hash, bytecode) - } - - /// Write an account by its address. - fn put_account(&mut self, address: &Address, account: &Account) -> Result<(), Self::Error> { - self.queue_put::(address, account) - } - - /// Write a storage entry by its address and key. - fn put_storage( - &mut self, - address: &Address, - key: &B256, - entry: &U256, - ) -> Result<(), Self::Error> { - self.queue_put_dual::(address, key, entry) - } - - /// Write a sealed block header (header + number). - fn put_header(&mut self, header: &SealedHeader) -> Result<(), Self::Error> { - self.put_header_inconsistent(header.header()) - .and_then(|_| self.put_header_number_inconsistent(&header.hash(), header.number)) - } - - /// Commit the write transaction. - fn commit(self) -> Result<(), Self::Error> - where - Self: Sized, - { - HotKvWrite::raw_commit(self) - } -} - -impl HotDbWrite for T where T: HotKvWrite {} - -/// Trait for history read operations. -pub trait HotHistoryRead: HotDbRead { - /// Get the list of block numbers where an account was touched. - /// Get the list of block numbers where an account was touched. - fn get_account_history( - &self, - address: &Address, - latest_height: u64, - ) -> Result, Self::Error> { - self.get_dual::(address, &latest_height) - } - /// Get the account change (pre-state) for an account at a specific block. - /// - /// If the return value is `None`, the account was not changed in that - /// block. - fn get_account_change( - &self, - block_number: u64, - address: &Address, - ) -> Result, Self::Error> { - self.get_dual::(&block_number, address) - } - - /// Get the storage history for an account and storage slot. The returned - /// list will contain block numbers where the storage slot was changed. - fn get_storage_history( - &self, - address: &Address, - slot: B256, - highest_block_number: u64, - ) -> Result, Self::Error> { - let sharded_key = ShardedKey::new(slot, highest_block_number); - self.get_dual::(address, &sharded_key) - } - - /// Get the storage change (before state) for a specific storage slot at a - /// specific block. - /// - /// If the return value is `None`, the storage slot was not changed in that - /// block. If the return value is `Some(value)`, the value is the pre-state - /// of the storage slot before the change in that block. If the value is - /// `U256::ZERO`, that indicates that the storage slot was not set before - /// the change. - fn get_storage_change( - &self, - block_number: u64, - address: &Address, - slot: &B256, - ) -> Result, Self::Error> { - let block_number_address = BlockNumberAddress((block_number, *address)); - self.get_dual::(&block_number_address, slot) - } -} - -impl HotHistoryRead for T where T: HotDbRead {} - -/// Trait for history write operations. -pub trait HotHistoryWrite: HotDbWrite { - /// Maintain a list of block numbers where an account was touched. - /// - /// Accounts are keyed - fn write_account_history( - &mut self, - address: &Address, - latest_height: u64, - touched: &BlockNumberList, - ) -> Result<(), Self::Error> { - self.queue_put_dual::(address, &latest_height, touched) - } - - /// Write an account change (pre-state) for an account at a specific - /// block. - fn write_account_change( - &mut self, - block_number: u64, - address: Address, - pre_state: &Account, - ) -> Result<(), Self::Error> { - self.queue_put_dual::(&block_number, &address, pre_state) - } - - /// Write storage history, by highest block number and touched block - /// numbers. - fn write_storage_history( - &mut self, - address: &Address, - slot: B256, - highest_block_number: u64, - touched: &BlockNumberList, - ) -> Result<(), Self::Error> { - let sharded_key = ShardedKey::new(slot, highest_block_number); - self.queue_put_dual::(address, &sharded_key, touched) - } - - /// Write a storage change (before state) for an account at a specific - /// block. - fn write_storage_change( - &mut self, - block_number: u64, - address: Address, - slot: &B256, - value: &U256, - ) -> Result<(), Self::Error> { - let block_number_address = BlockNumberAddress((block_number, address)); - self.queue_put_dual::(&block_number_address, slot, value) - } -} - -impl HotHistoryWrite for T where T: HotDbWrite + HotKvWrite {} - -mod sealed { - use crate::hot::model::HotKvRead; - - /// Sealed trait to prevent external implementations of HotDbReader and HotDbWriter. - #[allow(dead_code, unreachable_pub)] - pub trait Sealed {} - impl Sealed for T where T: HotKvRead {} -} diff --git a/crates/storage/src/hot/model/error.rs b/crates/storage/src/hot/model/error.rs index 8dd0924..380a197 100644 --- a/crates/storage/src/hot/model/error.rs +++ b/crates/storage/src/hot/model/error.rs @@ -1,4 +1,4 @@ -use crate::ser::DeserError; +use crate::hot::ser::DeserError; /// Trait for hot storage read/write errors. #[derive(thiserror::Error, Debug)] @@ -9,7 +9,7 @@ pub enum HotKvError { /// Deserialization error. Indicates an issue deserializing a key or value. #[error("Deserialization error: {0}")] - Deser(#[from] crate::ser::DeserError), + Deser(#[from] DeserError), /// Indicates that a write transaction is already in progress. #[error("A write transaction is already in progress")] diff --git a/crates/storage/src/hot/model/mod.rs b/crates/storage/src/hot/model/mod.rs index f499db7..17f09f2 100644 --- a/crates/storage/src/hot/model/mod.rs +++ b/crates/storage/src/hot/model/mod.rs @@ -1,5 +1,38 @@ -mod db_traits; -pub use db_traits::{HotDbRead, HotDbWrite, HotHistoryRead, HotHistoryWrite}; +//! Hot storage models and traits. +//! +//! The core trait for the hot storage module is [`HotKv`], which provides +//! a transaction factory for creating read and write transactions. The +//! [`HotKvRead`] and [`HotKvWrite`] traits provide transactional read-only and +//! read-write access to hot storage, respectively. +//! +//! ## Dual-Keyed Tables +//! +//! The hot storage module supports dual-keyed tables, which allow for +//! storing values associated with a combination of two keys. The [`DualKey`] +//! trait defines the interface for dual keys. Dual-keying is a common +//! optimization in KV stores like MDBX and RocksDB, allowing for efficient +//! storage and retrieval of values based on composite keys. +//! +//! [`HotKvRead`] and [`HotKvWrite`] provide methods for working with dual-keyed +//! tables, including getting, setting, and deleting values based on dual keys. +//! +//! ## Traversal +//! +//! The hot storage module provides traversal abstractions for iterating +//! over key-value pairs in tables. The [`KvTraverse`] and [`KvTraverseMut`] +//! traits provide methods for traversing single-keyed tables, while the +//! [`DualTableTraverse`] and [`DualKeyTraverse`] traits provide methods for +//! traversing dual-keyed tables. +//! +//! These traversal traits allow for efficient iteration over key-value pairs, +//! supporting operations like seeking to specific keys, moving to the next or +//! previous entries, and retrieving the current key-value pair. These are then +//! extended with the [`TableTraverse`], [`TableTraverseMut`], +//! and [`DualTableTraverse`] to provide automatic (de)serialization of keys +//! and values. +//! +//! The library wraps these into the [`TableCursor`] and [`DualTableCursor`] +//! structs for ease of use and consistency across different backends. mod error; pub use error::{HotKvError, HotKvReadError, HotKvResult}; @@ -12,11 +45,11 @@ pub use traits::{HotKv, HotKvRead, HotKvWrite}; mod traverse; pub use traverse::{ - DualKeyedTraverse, DualTableCursor, DualTableTraverse, KvTraverse, KvTraverseMut, TableCursor, + DualKeyTraverse, DualTableCursor, DualTableTraverse, KvTraverse, KvTraverseMut, TableCursor, TableTraverse, TableTraverseMut, }; -use crate::tables::{DualKeyed, Table}; +use crate::hot::tables::{DualKey, Table}; use std::borrow::Cow; /// A key-value pair from a table. @@ -35,4 +68,4 @@ pub type RawValue<'a> = Cow<'a, [u8]>; pub type RawDualKeyValue<'a> = (Cow<'a, [u8]>, RawValue<'a>, RawValue<'a>); /// A dual key-value tuple from a table. -pub type DualKeyValue = (::Key, ::Key2, ::Value); +pub type DualKeyValue = (::Key, ::Key2, ::Value); diff --git a/crates/storage/src/hot/model/revm.rs b/crates/storage/src/hot/model/revm.rs index 598da12..7125eb5 100644 --- a/crates/storage/src/hot/model/revm.rs +++ b/crates/storage/src/hot/model/revm.rs @@ -1,9 +1,6 @@ -use crate::{ - hot::model::{GetManyItem, HotKvError, HotKvRead, HotKvWrite}, - tables::{ - DualKeyed, SingleKey, Table, - hot::{self, Bytecodes, PlainAccountState}, - }, +use crate::hot::{ + model::{GetManyItem, HotKvError, HotKvRead, HotKvWrite}, + tables::{self, Bytecodes, DualKey, PlainAccountState, SingleKey, Table}, }; use alloy::primitives::{Address, B256, KECCAK256_EMPTY}; use core::fmt; @@ -45,13 +42,13 @@ impl HotKvRead for RevmRead { where U: 'a; - fn raw_traverse<'a>(&'a self, table: &str) -> Result, Self::Error> { + fn raw_traverse<'a>(&'a self, table: &'static str) -> Result, Self::Error> { self.reader.raw_traverse(table) } fn raw_get<'a>( &'a self, - table: &str, + table: &'static str, key: &[u8], ) -> Result>, Self::Error> { self.reader.raw_get(table, key) @@ -59,7 +56,7 @@ impl HotKvRead for RevmRead { fn raw_get_dual<'a>( &'a self, - table: &str, + table: &'static str, key1: &[u8], key2: &[u8], ) -> Result>, Self::Error> { @@ -70,7 +67,7 @@ impl HotKvRead for RevmRead { self.reader.get::(key) } - fn get_dual( + fn get_dual( &self, key1: &T::Key, key2: &T::Key2, @@ -123,13 +120,13 @@ impl HotKvRead for RevmWrite { where U: 'a; - fn raw_traverse<'a>(&'a self, table: &str) -> Result, Self::Error> { + fn raw_traverse<'a>(&'a self, table: &'static str) -> Result, Self::Error> { self.writer.raw_traverse(table) } fn raw_get<'a>( &'a self, - table: &str, + table: &'static str, key: &[u8], ) -> Result>, Self::Error> { self.writer.raw_get(table, key) @@ -137,7 +134,7 @@ impl HotKvRead for RevmWrite { fn raw_get_dual<'a>( &'a self, - table: &str, + table: &'static str, key1: &[u8], key2: &[u8], ) -> Result>, Self::Error> { @@ -148,7 +145,7 @@ impl HotKvRead for RevmWrite { self.writer.get::(key) } - fn get_dual( + fn get_dual( &self, key1: &T::Key, key2: &T::Key2, @@ -173,19 +170,24 @@ impl HotKvWrite for RevmWrite { U: 'a; fn raw_traverse_mut<'a>( - &'a mut self, - table: &str, + &'a self, + table: &'static str, ) -> Result, Self::Error> { self.writer.raw_traverse_mut(table) } - fn queue_raw_put(&mut self, table: &str, key: &[u8], value: &[u8]) -> Result<(), Self::Error> { + fn queue_raw_put( + &self, + table: &'static str, + key: &[u8], + value: &[u8], + ) -> Result<(), Self::Error> { self.writer.queue_raw_put(table, key, value) } fn queue_raw_put_dual( - &mut self, - table: &str, + &self, + table: &'static str, key1: &[u8], key2: &[u8], value: &[u8], @@ -193,19 +195,28 @@ impl HotKvWrite for RevmWrite { self.writer.queue_raw_put_dual(table, key1, key2, value) } - fn queue_raw_delete(&mut self, table: &str, key: &[u8]) -> Result<(), Self::Error> { + fn queue_raw_delete(&self, table: &'static str, key: &[u8]) -> Result<(), Self::Error> { self.writer.queue_raw_delete(table, key) } - fn queue_raw_clear(&mut self, table: &str) -> Result<(), Self::Error> { + fn queue_raw_delete_dual( + &self, + table: &'static str, + key1: &[u8], + key2: &[u8], + ) -> Result<(), Self::Error> { + self.writer.queue_raw_delete_dual(table, key1, key2) + } + + fn queue_raw_clear(&self, table: &'static str) -> Result<(), Self::Error> { self.writer.queue_raw_clear(table) } fn queue_raw_create( - &mut self, - table: &str, - dual_key: bool, - dual_fixed: bool, + &self, + table: &'static str, + dual_key: Option, + dual_fixed: Option, ) -> Result<(), Self::Error> { self.writer.queue_raw_create(table, dual_key, dual_fixed) } @@ -214,16 +225,12 @@ impl HotKvWrite for RevmWrite { self.writer.raw_commit() } - fn queue_put( - &mut self, - key: &T::Key, - value: &T::Value, - ) -> Result<(), Self::Error> { + fn queue_put(&self, key: &T::Key, value: &T::Value) -> Result<(), Self::Error> { self.writer.queue_put::(key, value) } - fn queue_put_dual( - &mut self, + fn queue_put_dual( + &self, key1: &T::Key, key2: &T::Key2, value: &T::Value, @@ -231,11 +238,11 @@ impl HotKvWrite for RevmWrite { self.writer.queue_put_dual::(key1, key2, value) } - fn queue_delete(&mut self, key: &T::Key) -> Result<(), Self::Error> { + fn queue_delete(&self, key: &T::Key) -> Result<(), Self::Error> { self.writer.queue_delete::(key) } - fn queue_put_many<'a, 'b, T, I>(&mut self, entries: I) -> Result<(), Self::Error> + fn queue_put_many<'a, 'b, T, I>(&self, entries: I) -> Result<(), Self::Error> where T: SingleKey, T::Key: 'a, @@ -245,14 +252,14 @@ impl HotKvWrite for RevmWrite { self.writer.queue_put_many::(entries) } - fn queue_create(&mut self) -> Result<(), Self::Error> + fn queue_create(&self) -> Result<(), Self::Error> where T: Table, { self.writer.queue_create::() } - fn queue_clear(&mut self) -> Result<(), Self::Error> + fn queue_clear(&self) -> Result<(), Self::Error> where T: Table, { @@ -276,7 +283,7 @@ where let code_hash = account.bytecode_hash.unwrap_or(KECCAK256_EMPTY); let code = if code_hash != KECCAK256_EMPTY { - self.reader.get::(&code_hash)?.map(|b| b.0) + self.reader.get::(&code_hash)? } else { None }; @@ -285,7 +292,7 @@ where } fn code_by_hash_ref(&self, code_hash: B256) -> Result { - Ok(self.reader.get::(&code_hash)?.map(|bytecode| bytecode.0).unwrap_or_default()) + Ok(self.reader.get::(&code_hash)?.unwrap_or_default()) } fn storage_ref( @@ -293,9 +300,7 @@ where address: Address, index: StorageKey, ) -> Result { - let key = B256::from_slice(&index.to_be_bytes::<32>()); - - Ok(self.reader.get_dual::(&address, &key)?.unwrap_or_default()) + Ok(self.reader.get_dual::(&address, &index)?.unwrap_or_default()) } fn block_hash_ref(&self, _number: u64) -> Result { @@ -349,7 +354,7 @@ where let code_hash = account.bytecode_hash.unwrap_or(KECCAK256_EMPTY); let code = if code_hash != KECCAK256_EMPTY { - self.writer.get::(&code_hash)?.map(|b| b.0) + self.writer.get::(&code_hash)? } else { None }; @@ -358,7 +363,7 @@ where } fn code_by_hash_ref(&self, code_hash: B256) -> Result { - Ok(self.writer.get::(&code_hash)?.map(|bytecode| bytecode.0).unwrap_or_default()) + Ok(self.writer.get::(&code_hash)?.unwrap_or_default()) } fn storage_ref( @@ -366,8 +371,7 @@ where address: Address, index: StorageKey, ) -> Result { - let key = B256::from_slice(&index.to_be_bytes::<32>()); - Ok(self.writer.get_dual::(&address, &key)?.unwrap_or_default()) + Ok(self.writer.get_dual::(&address, &index)?.unwrap_or_default()) } fn block_hash_ref(&self, _number: u64) -> Result { @@ -425,8 +429,7 @@ where // Handle storage changes for (key, value) in account.storage { - let key = B256::from_slice(&key.to_be_bytes::<32>()); - self.writer.queue_put_dual::( + self.writer.queue_put_dual::( &address, &key, &value.present_value(), @@ -438,15 +441,13 @@ where } } -#[cfg(test)] +#[cfg(all(test, feature = "in-mem"))] mod tests { use super::*; - use crate::{ - hot::{ - mem::MemKv, - model::{HotKv, HotKvRead, HotKvWrite}, - }, - tables::hot::{Bytecodes, PlainAccountState}, + use crate::hot::{ + impls::mem::MemKv, + model::{HotKv, HotKvRead, HotKvWrite}, + tables::{Bytecodes, PlainAccountState}, }; use alloy::primitives::{Address, B256, U256}; use reth::primitives::{Account, Bytecode}; @@ -484,7 +485,7 @@ mod tests { { // Setup data using HotKv - let mut writer = mem_kv.revm_writer()?; + let writer = mem_kv.revm_writer()?; writer.queue_put::(&address, &account)?; writer.queue_put::(&hash, &bytecode)?; writer.persist()?; @@ -527,7 +528,7 @@ mod tests { { // Setup data using HotKv - let mut writer = mem_kv.revm_writer()?; + let writer = mem_kv.revm_writer()?; writer.queue_put::(&address, &account)?; writer.queue_put::(&hash, &bytecode)?; writer.persist()?; @@ -569,7 +570,7 @@ mod tests { { // Setup initial data - let mut writer = mem_kv.revm_writer()?; + let writer = mem_kv.revm_writer()?; writer.queue_put::(&address, &account)?; writer.queue_put::(&hash, &bytecode)?; writer.persist()?; @@ -647,9 +648,9 @@ mod tests { assert_eq!(acc.balance, U256::from(2000u64)); assert_eq!(acc.bytecode_hash, None); - let key = B256::with_last_byte(100); + let key = U256::from(100); let storage_val: Option = - reader.get_dual::(&address, &key)?; + reader.get_dual::(&address, &key)?; assert_eq!(storage_val, Some(U256::from(200u64))); } @@ -665,7 +666,7 @@ mod tests { // Write some data using HotKv { - let mut writer = mem_kv.revm_writer()?; + let writer = mem_kv.revm_writer()?; let account = Account { nonce: 10, balance: U256::from(500u64), bytecode_hash: None }; writer.queue_put::(&address1, &account)?; writer.persist()?; @@ -741,7 +742,7 @@ mod tests { // Setup data { - let mut writer = mem_kv.revm_writer()?; + let writer = mem_kv.revm_writer()?; writer.queue_put::(&address, &account)?; writer.persist()?; } diff --git a/crates/storage/src/hot/model/traits.rs b/crates/storage/src/hot/model/traits.rs index 07942b8..7437b2f 100644 --- a/crates/storage/src/hot/model/traits.rs +++ b/crates/storage/src/hot/model/traits.rs @@ -1,15 +1,28 @@ -use crate::{ - hot::model::{ - DualTableCursor, GetManyItem, HotKvError, HotKvReadError, KvTraverse, KvTraverseMut, - TableCursor, +use crate::hot::{ + model::{ + DualKeyTraverse, DualKeyValue, DualTableCursor, GetManyItem, HotKvError, HotKvReadError, + KeyValue, KvTraverse, KvTraverseMut, TableCursor, revm::{RevmRead, RevmWrite}, }, ser::{KeySer, MAX_KEY_SIZE, ValSer}, - tables::{DualKeyed, SingleKey, Table}, + tables::{DualKey, SingleKey, Table}, }; -use std::borrow::Cow; +use std::{borrow::Cow, ops::RangeInclusive}; /// Trait for hot storage. This is a KV store with read/write transactions. +/// +/// This is the top-level trait for hot storage backends, providing +/// transactional access through read-only and read-write transactions. +/// +/// We recommend using [`HistoryRead`] and [`HistoryWrite`] for most use cases, +/// as they provide higher-level abstractions over predefined tables. +/// +/// When implementing this trait, consult the [`model`] module documentation for +/// details on the associated types and their requirements. +/// +/// [`HistoryRead`]: crate::hot::db::HistoryRead +/// [`HistoryWrite`]: crate::hot::db::HistoryWrite +/// [`model`]: crate::hot::model #[auto_impl::auto_impl(&, Arc, Box)] pub trait HotKv { /// The read-only transaction type. @@ -24,6 +37,7 @@ pub trait HotKv { /// revm [`DatabaseRef`] trait. The resulting reader can be used directly /// with [`trevm`] and [`revm`]. /// + /// [`revm`]: trevm::revm /// [`DatabaseRef`]: trevm::revm::database::DatabaseRef fn revm_reader(&self) -> Result, HotKvError> { self.reader().map(RevmRead::new) @@ -59,18 +73,22 @@ pub trait HotKv { } /// Trait for hot storage read transactions. +/// +/// This trait provides read-only access to hot storage tables. It should only +/// be imported if accessing custom tables, or when implementing new hot storage +/// backends. #[auto_impl::auto_impl(&, Arc, Box)] pub trait HotKvRead { /// Error type for read operations. type Error: HotKvReadError; /// The cursor type for traversing key-value pairs. - type Traverse<'a>: KvTraverse + type Traverse<'a>: KvTraverse + DualKeyTraverse where Self: 'a; /// Get a raw cursor to traverse the database. - fn raw_traverse<'a>(&'a self, table: &str) -> Result, Self::Error>; + fn raw_traverse<'a>(&'a self, table: &'static str) -> Result, Self::Error>; /// Get a raw value from a specific table. /// @@ -78,8 +96,11 @@ pub trait HotKvRead { /// allowed to panic if this is not the case. /// /// If the table is dual-keyed, the output MAY be implementation-defined. - fn raw_get<'a>(&'a self, table: &str, key: &[u8]) - -> Result>, Self::Error>; + fn raw_get<'a>( + &'a self, + table: &'static str, + key: &[u8], + ) -> Result>, Self::Error>; /// Get a raw value from a specific table with dual keys. /// @@ -92,7 +113,7 @@ pub trait HotKvRead { /// implementation-defined. fn raw_get_dual<'a>( &'a self, - table: &str, + table: &'static str, key1: &[u8], key2: &[u8], ) -> Result>, Self::Error>; @@ -107,7 +128,7 @@ pub trait HotKvRead { /// Traverse a specific dual-keyed table. Returns a typed dual-keyed /// cursor wrapper. - fn traverse_dual<'a, T: DualKeyed>( + fn traverse_dual<'a, T: DualKey>( &'a self, ) -> Result, T, Self::Error>, Self::Error> { let cursor = self.raw_traverse(T::NAME)?; @@ -138,7 +159,7 @@ pub trait HotKvRead { /// /// If the table is not dual-keyed, the output MAY be /// implementation-defined. - fn get_dual( + fn get_dual( &self, key1: &T::Key, key2: &T::Key2, @@ -194,31 +215,38 @@ pub trait HotKvRead { } /// Trait for hot storage write transactions. +/// +/// This extends the [`HotKvRead`] trait with write capabilities. pub trait HotKvWrite: HotKvRead { /// The mutable cursor type for traversing key-value pairs. - type TraverseMut<'a>: KvTraverseMut + type TraverseMut<'a>: KvTraverseMut + DualKeyTraverse where Self: 'a; /// Get a raw mutable cursor to traverse the database. fn raw_traverse_mut<'a>( - &'a mut self, - table: &str, + &'a self, + table: &'static str, ) -> Result, Self::Error>; /// Queue a raw put operation. /// /// The `key` buf must be <= [`MAX_KEY_SIZE`] bytes. Implementations are /// allowed to panic if this is not the case. - fn queue_raw_put(&mut self, table: &str, key: &[u8], value: &[u8]) -> Result<(), Self::Error>; + fn queue_raw_put( + &self, + table: &'static str, + key: &[u8], + value: &[u8], + ) -> Result<(), Self::Error>; /// Queue a raw put operation for a dual-keyed table. //// /// The `key1` and `key2` buf must be <= [`MAX_KEY_SIZE`] bytes. /// Implementations are allowed to panic if this is not the case. fn queue_raw_put_dual( - &mut self, - table: &str, + &self, + table: &'static str, key1: &[u8], key2: &[u8], value: &[u8], @@ -228,31 +256,45 @@ pub trait HotKvWrite: HotKvRead { /// /// The `key` buf must be <= [`MAX_KEY_SIZE`] bytes. Implementations are /// allowed to panic if this is not the case. - fn queue_raw_delete(&mut self, table: &str, key: &[u8]) -> Result<(), Self::Error>; + fn queue_raw_delete(&self, table: &'static str, key: &[u8]) -> Result<(), Self::Error>; + + /// Queue a raw delete operation for a dual-keyed table. + /// + /// The `key1` and `key2` buf must be <= [`MAX_KEY_SIZE`] bytes. + /// Implementations are allowed to panic if this is not the case. + fn queue_raw_delete_dual( + &self, + table: &'static str, + key1: &[u8], + key2: &[u8], + ) -> Result<(), Self::Error>; /// Queue a raw clear operation for a specific table. - fn queue_raw_clear(&mut self, table: &str) -> Result<(), Self::Error>; + fn queue_raw_clear(&self, table: &'static str) -> Result<(), Self::Error>; /// Queue a raw create operation for a specific table. /// /// This abstraction supports two table specializations: - /// 1. `dual_key`: whether the table uses dual keys (interior maps, called - /// `DUPSORT` in LMDB/MDBX). - /// 2. `fixed_val`: whether the table has fixed-size values. + /// 1. `dual_key_size` - whether the table is dual-keyed (i.e., + /// `DUPSORT` in LMDB/MDBX). If so, the argument MUST be the + /// encoded size of the second key. If not, it MUST be `None`. + /// 2. `fixed_val_size`: whether the table has fixed-size values. + /// If so, the argument MUST be the size of the fixed value. + /// If not, it MUST be `None`. /// /// Database implementations can use this information for optimizations. fn queue_raw_create( - &mut self, - table: &str, - dual_key: bool, - fixed_val: bool, + &self, + table: &'static str, + dual_key_size: Option, + fixed_val_size: Option, ) -> Result<(), Self::Error>; /// Traverse a specific table. Returns a mutable typed cursor wrapper. /// If invoked for a dual-keyed table, it will traverse the primary keys /// only, and the return value may be implementation-defined. fn traverse_mut<'a, T: SingleKey>( - &'a mut self, + &'a self, ) -> Result, T, Self::Error>, Self::Error> { let cursor = self.raw_traverse_mut(T::NAME)?; Ok(TableCursor::new(cursor)) @@ -260,19 +302,15 @@ pub trait HotKvWrite: HotKvRead { /// Traverse a specific dual-keyed table. Returns a mutable typed /// dual-keyed cursor wrapper. - fn traverse_dual_mut<'a, T: DualKeyed>( - &'a mut self, + fn traverse_dual_mut<'a, T: DualKey>( + &'a self, ) -> Result, T, Self::Error>, Self::Error> { let cursor = self.raw_traverse_mut(T::NAME)?; Ok(DualTableCursor::new(cursor)) } /// Queue a put operation for a specific table. - fn queue_put( - &mut self, - key: &T::Key, - value: &T::Value, - ) -> Result<(), Self::Error> { + fn queue_put(&self, key: &T::Key, value: &T::Value) -> Result<(), Self::Error> { let mut key_buf = [0u8; MAX_KEY_SIZE]; let key_bytes = key.encode_key(&mut key_buf); let value_bytes = value.encoded(); @@ -281,8 +319,8 @@ pub trait HotKvWrite: HotKvRead { } /// Queue a put operation for a specific dual-keyed table. - fn queue_put_dual( - &mut self, + fn queue_put_dual( + &self, key1: &T::Key, key2: &T::Key2, value: &T::Value, @@ -297,15 +335,29 @@ pub trait HotKvWrite: HotKvRead { } /// Queue a delete operation for a specific table. - fn queue_delete(&mut self, key: &T::Key) -> Result<(), Self::Error> { + fn queue_delete(&self, key: &T::Key) -> Result<(), Self::Error> { let mut key_buf = [0u8; MAX_KEY_SIZE]; let key_bytes = key.encode_key(&mut key_buf); self.queue_raw_delete(T::NAME, key_bytes) } + /// Queue a delete operation for a specific dual-keyed table. + fn queue_delete_dual( + &self, + key1: &T::Key, + key2: &T::Key2, + ) -> Result<(), Self::Error> { + let mut key1_buf = [0u8; MAX_KEY_SIZE]; + let mut key2_buf = [0u8; MAX_KEY_SIZE]; + let key1_bytes = key1.encode_key(&mut key1_buf); + let key2_bytes = key2.encode_key(&mut key2_buf); + + self.queue_raw_delete_dual(T::NAME, key1_bytes, key2_bytes) + } + /// Queue many put operations for a specific table. - fn queue_put_many<'a, 'b, T, I>(&mut self, entries: I) -> Result<(), Self::Error> + fn queue_put_many<'a, 'b, T, I>(&self, entries: I) -> Result<(), Self::Error> where T: SingleKey, T::Key: 'a, @@ -325,21 +377,144 @@ pub trait HotKvWrite: HotKvRead { } /// Queue creation of a specific table. - fn queue_create(&mut self) -> Result<(), Self::Error> + fn queue_create(&self) -> Result<(), Self::Error> where T: Table, { - self.queue_raw_create(T::NAME, T::DUAL_KEY, T::IS_FIXED_VAL) + self.queue_raw_create(T::NAME, T::DUAL_KEY_SIZE, T::FIXED_VAL_SIZE) } /// Queue clearing all entries in a specific table. - fn queue_clear(&mut self) -> Result<(), Self::Error> + fn queue_clear(&self) -> Result<(), Self::Error> where T: Table, { self.queue_raw_clear(T::NAME) } + /// Remove all data in the given range and return the removed keys. + fn clear_with_op( + &self, + range: RangeInclusive, + mut op: impl FnMut(T::Key, T::Value), + ) -> Result<(), Self::Error> { + let mut cursor = self.traverse_mut::()?; + + // Position cursor at first entry at or above range start + let Some((key, value)) = cursor.lower_bound(range.start())? else { + // No entries at or above range start + return Ok(()); + }; + + if !range.contains(&key) { + // First entry is outside range + return Ok(()); + } + + op(key, value); + cursor.delete_current()?; + + // Iterate through remaining entries + while let Some((key, value)) = cursor.read_next()? { + if !range.contains(&key) { + break; + } + op(key, value); + cursor.delete_current()?; + } + + Ok(()) + } + + /// Remove all data in the given range from the database. + fn clear_range(&self, range: RangeInclusive) -> Result<(), Self::Error> { + self.clear_with_op::(range, |_, _| {}) + } + + /// Remove all data in the given range and return the removed key-value + /// pairs. + fn take_range( + &self, + range: RangeInclusive, + ) -> Result>, Self::Error> { + let mut vec = Vec::new(); + self.clear_with_op::(range, |key, value| vec.push((key, value)))?; + Ok(vec) + } + + /// Remove all dual-keyed data in the given range from the database. + fn clear_range_dual_with_op( + &self, + range: RangeInclusive<(T::Key, T::Key2)>, + mut op: impl FnMut(T::Key, T::Key2, T::Value), + ) -> Result<(), Self::Error> { + let mut cursor = self.traverse_dual_mut::()?; + + let (start_k1, start_k2) = range.start(); + + // Position at first entry at or above (range.start(), minimal_k2) + let Some((k1, k2, value)) = cursor.next_dual_above(start_k1, start_k2)? else { + // No entries at or above range start + return Ok(()); + }; + + // inline range contains to avoid moving k1,k2 + let (range_1, range_2) = range.start(); + if range_1 > &k1 || (range_1 == &k1 && range_2 > &k2) { + // First entry is outside range + return Ok(()); + } + let (range_1, range_2) = range.end(); + if range_1 < &k1 || (range_1 == &k1 && range_2 < &k2) { + // First entry is outside range + return Ok(()); + } + // end of inline range contains + + op(k1, k2, value); + cursor.delete_current()?; + + // Iterate through all entries (both k1 and k2 changes) + // Use read_next() instead of next_k2() to navigate across different k1 values + while let Some((k1, k2, value)) = cursor.read_next()? { + // inline range contains to avoid moving k1,k2 + let (range_1, range_2) = range.start(); + if range_1 > &k1 || (range_1 == &k1 && range_2 > &k2) { + break; + } + let (range_1, range_2) = range.end(); + if range_1 < &k1 || (range_1 == &k1 && range_2 < &k2) { + break; + } + // end of inline range contains + op(k1, k2, value); + cursor.delete_current()?; + } + + Ok(()) + } + + /// Remove all dual-keyed data in the given k1,k2 range from the database. + fn clear_range_dual( + &self, + range: RangeInclusive<(T::Key, T::Key2)>, + ) -> Result<(), Self::Error> { + self.clear_range_dual_with_op::(range, |_, _, _| {}) + } + + /// Remove all dual-keyed data in the given k1,k2 range and return the + /// removed key-key-value tuples. + fn take_range_dual( + &self, + range: RangeInclusive<(T::Key, T::Key2)>, + ) -> Result>, Self::Error> { + let mut vec = Vec::new(); + self.clear_range_dual_with_op::(range, |k1, k2, value| { + vec.push((k1, k2, value)); + })?; + Ok(vec) + } + /// Commit the queued operations. fn raw_commit(self) -> Result<(), Self::Error>; } diff --git a/crates/storage/src/hot/model/traverse.rs b/crates/storage/src/hot/model/traverse.rs index c8bec3f..08acec4 100644 --- a/crates/storage/src/hot/model/traverse.rs +++ b/crates/storage/src/hot/model/traverse.rs @@ -1,9 +1,9 @@ //! Cursor traversal traits and typed wrappers for database navigation. -use crate::{ - hot::model::{DualKeyValue, HotKvReadError, KeyValue, RawDualKeyValue, RawKeyValue, RawValue}, +use crate::hot::{ + model::{DualKeyValue, HotKvReadError, KeyValue, RawDualKeyValue, RawKeyValue, RawValue}, ser::{KeySer, MAX_KEY_SIZE}, - tables::{DualKeyed, Table}, + tables::{DualKey, SingleKey}, }; use std::ops::Range; @@ -58,7 +58,27 @@ pub trait KvTraverseMut: KvTraverse { } /// Trait for traversing dual-keyed key-value pairs in the database. -pub trait DualKeyedTraverse: KvTraverse { +pub trait DualKeyTraverse { + /// Set position to the first key-value pair in the database, and return + /// the KV pair with both keys. + fn first<'a>(&'a mut self) -> Result>, E>; + + /// Set position to the last key-value pair in the database, and return the + /// KV pair with both keys. + fn last<'a>(&'a mut self) -> Result>, E>; + + /// Get the next key-value pair in the database, and advance the cursor. + /// + /// Returning `Ok(None)` indicates the cursor is past the end of the + /// database. + fn read_next<'a>(&'a mut self) -> Result>, E>; + + /// Get the previous key-value pair in the database, and move the cursor. + /// + /// Returning `Ok(None)` indicates the cursor is before the start of the + /// database. + fn read_prev<'a>(&'a mut self) -> Result>, E>; + /// Set the cursor to specific dual key in the database, and return the /// EXACT KV pair if it exists. /// @@ -85,6 +105,26 @@ pub trait DualKeyedTraverse: KvTraverse { /// Move the cursor to the next distinct key2 for the current key1, and /// return the first key-value pair with that key2. fn next_k2<'a>(&'a mut self) -> Result>, E>; + + /// Seek to the LAST key2 entry for the specified key1. + /// + /// This positions the cursor at the last duplicate value for the given key1. + /// Returning `Ok(None)` indicates the key1 does not exist. + fn last_of_k1<'a>(&'a mut self, key1: &[u8]) -> Result>, E>; + + /// Move the cursor to the LAST key2 entry of the PREVIOUS key1. + /// + /// This is the reverse of `next_k1` - it moves backward to the previous distinct + /// key1 and positions at its last key2 entry. + /// Returning `Ok(None)` indicates there is no previous key1. + fn previous_k1<'a>(&'a mut self) -> Result>, E>; + + /// Move the cursor to the PREVIOUS key2 entry for the CURRENT key1. + /// + /// This is the reverse of `next_k2` - it moves backward within the current key1's + /// duplicate values. + /// Returning `Ok(None)` indicates there is no previous key2 for this key1. + fn previous_k2<'a>(&'a mut self) -> Result>, E>; } // ============================================================================ @@ -95,7 +135,7 @@ pub trait DualKeyedTraverse: KvTraverse { /// /// This trait provides type-safe access to table entries by encoding keys /// and decoding values according to the table's schema. -pub trait TableTraverse: KvTraverse { +pub trait TableTraverse: KvTraverse { /// Get the first key-value pair in the table. fn first(&mut self) -> Result>, E> { KvTraverse::first(self)?.map(T::decode_kv_tuple).transpose().map_err(Into::into) @@ -134,19 +174,66 @@ pub trait TableTraverse: KvTraverse { fn read_prev(&mut self) -> Result>, E> { KvTraverse::read_prev(self)?.map(T::decode_kv_tuple).transpose().map_err(Into::into) } + + /// Iterate entries starting from a key while a predicate holds. + /// + /// Positions the cursor at `start_key` and calls `f` for each entry + /// while `predicate` returns true. + /// + /// Returns `Ok(())` on successful completion, or the first error encountered. + fn for_each_while(&mut self, start_key: &T::Key, predicate: P, mut f: F) -> Result<(), E> + where + P: Fn(&T::Key, &T::Value) -> bool, + F: FnMut(T::Key, T::Value) -> Result<(), E>, + { + let Some((k, v)) = TableTraverse::lower_bound(self, start_key)? else { + return Ok(()); + }; + + if !predicate(&k, &v) { + return Ok(()); + } + + f(k, v)?; + + while let Some((k, v)) = TableTraverse::read_next(self)? { + if !predicate(&k, &v) { + break; + } + f(k, v)?; + } + + Ok(()) + } + + /// Collect entries from start_key while predicate holds. + /// + /// This is useful when you need to process entries after iteration completes + /// or when the closure would need to borrow mutably from multiple sources. + fn collect_while

(&mut self, start_key: &T::Key, predicate: P) -> Result>, E> + where + P: Fn(&T::Key, &T::Value) -> bool, + { + let mut result = Vec::new(); + self.for_each_while(start_key, predicate, |k, v| { + result.push((k, v)); + Ok(()) + })?; + Ok(result) + } } /// Blanket implementation of `TableTraverse` for any cursor that implements `KvTraverse`. impl TableTraverse for C where C: KvTraverse, - T: Table, + T: SingleKey, E: HotKvReadError, { } /// Extension trait for typed table traversal with mutation capabilities. -pub trait TableTraverseMut: KvTraverseMut { +pub trait TableTraverseMut: KvTraverseMut { /// Delete the current key-value pair. fn delete_current(&mut self) -> Result<(), E> { KvTraverseMut::delete_current(self) @@ -163,11 +250,11 @@ pub trait TableTraverseMut: KvTraverseMut { } } -/// Blanket implementation of `TableTraverseMut` for any cursor that implements `KvTraverseMut`. +/// Blanket implementation of [`TableTraverseMut`] for any cursor that implements [`KvTraverseMut`]. impl TableTraverseMut for C where C: KvTraverseMut, - T: Table, + T: SingleKey, E: HotKvReadError, { } @@ -177,10 +264,30 @@ where /// This is an extension trait rather than a wrapper struct because MDBX /// requires specialized implementations for DUPSORT tables that need access /// to the table type `T` to handle fixed-size values correctly. -pub trait DualTableTraverse { +pub trait DualTableTraverse: DualKeyTraverse { + /// Get the first key-value pair in the table. + fn first(&mut self) -> Result>, E> { + DualKeyTraverse::first(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + /// Get the last key-value pair in the table. + fn last(&mut self) -> Result>, E> { + DualKeyTraverse::last(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + /// Get the next key-value pair and advance the cursor. + fn read_next(&mut self) -> Result>, E> { + DualKeyTraverse::read_next(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + /// Get the previous key-value pair and move the cursor backward. + fn read_prev(&mut self) -> Result>, E> { + DualKeyTraverse::read_prev(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + /// Return the EXACT value for the specified dual key if it exists. fn exact_dual(&mut self, key1: &T::Key, key2: &T::Key2) -> Result, E> { - let Some((k1, k2, v)) = self.next_dual_above(key1, key2)? else { + let Some((k1, k2, v)) = DualTableTraverse::next_dual_above(self, key1, key2)? else { return Ok(None); }; @@ -199,6 +306,152 @@ pub trait DualTableTraverse { /// Seek to the next distinct key2 for the current key1. fn next_k2(&mut self) -> Result>, E>; + + /// Seek to the LAST key2 entry for the specified key1. + fn last_of_k1(&mut self, key1: &T::Key) -> Result>, E>; + + /// Move to the LAST key2 entry of the PREVIOUS key1. + fn previous_k1(&mut self) -> Result>, E>; + + /// Move to the PREVIOUS key2 entry for the CURRENT key1. + fn previous_k2(&mut self) -> Result>, E>; + + /// Iterate entries (crossing k1 boundaries) while a predicate holds. + /// + /// Positions the cursor at `(key1, start_k2)` and calls `f` for each entry + /// while `predicate` returns true. Uses `read_next()` to cross k1 boundaries. + /// + /// Returns `Ok(())` on successful completion, or the first error encountered. + fn for_each_while( + &mut self, + key1: &T::Key, + start_k2: &T::Key2, + predicate: P, + mut f: F, + ) -> Result<(), E> + where + P: Fn(&T::Key, &T::Key2, &T::Value) -> bool, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + let Some((k1, k2, v)) = DualTableTraverse::next_dual_above(self, key1, start_k2)? else { + return Ok(()); + }; + + if !predicate(&k1, &k2, &v) { + return Ok(()); + } + + f(k1, k2, v)?; + + while let Some((k1, k2, v)) = DualTableTraverse::read_next(self)? { + if !predicate(&k1, &k2, &v) { + break; + } + f(k1, k2, v)?; + } + + Ok(()) + } + + /// Iterate entries within the same k1 while a predicate holds. + /// + /// Positions the cursor at `(key1, start_k2)` and calls `f` for each entry + /// while `predicate` returns true. Uses `next_k2()` which stays within + /// the same k1 value. + /// + /// Returns `Ok(())` on successful completion, or the first error encountered. + fn for_each_while_k2( + &mut self, + key1: &T::Key, + start_k2: &T::Key2, + predicate: P, + f: F, + ) -> Result<(), E> + where + P: Fn(&T::Key, &T::Key2, &T::Value) -> bool, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + self.for_each_while(key1, start_k2, |k, k2, v| key1 == k && predicate(k, k2, v), f) + } + + /// Iterate all k2 entries for a given k1, starting from `start_k2`. + /// + /// Calls `f` for each (k1, k2, v) tuple where k1 matches the provided key1 + /// and k2 >= start_k2. Stops when k1 changes or the table is exhausted. + /// + /// Returns `Ok(())` on successful completion, or the first error encountered. + fn for_each_k2(&mut self, key1: &T::Key, start_k2: &T::Key2, f: F) -> Result<(), E> + where + T::Key: PartialEq, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + self.for_each_while_k2(key1, start_k2, |_, _, _| true, f) + } + + /// Collect all k2 entries for a given k1 into a Vec. + /// + /// This is useful when you need to process entries after iteration completes + /// or when the closure would need to borrow mutably from multiple sources. + fn collect_k2(&mut self, key1: &T::Key, start_k2: &T::Key2) -> Result>, E> + where + T::Key: PartialEq, + { + let mut result = Vec::new(); + self.for_each_k2(key1, start_k2, |k1, k2, v| { + result.push((k1, k2, v)); + Ok(()) + })?; + Ok(result) + } +} + +impl DualTableTraverse for C +where + C: DualKeyTraverse, + T: DualKey, + E: HotKvReadError, +{ + fn next_dual_above( + &mut self, + key1: &T::Key, + key2: &T::Key2, + ) -> Result>, E> { + let mut key1_buf = [0u8; MAX_KEY_SIZE]; + let mut key2_buf = [0u8; MAX_KEY_SIZE]; + let key1_bytes = key1.encode_key(&mut key1_buf); + let key2_bytes = key2.encode_key(&mut key2_buf); + + DualKeyTraverse::next_dual_above(self, key1_bytes, key2_bytes)? + .map(T::decode_kkv_tuple) + .transpose() + .map_err(Into::into) + } + + fn next_k1(&mut self) -> Result>, E> { + DualKeyTraverse::next_k1(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + fn next_k2(&mut self) -> Result>, E> { + DualKeyTraverse::next_k2(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + fn last_of_k1(&mut self, key1: &T::Key) -> Result>, E> { + let mut key1_buf = [0u8; MAX_KEY_SIZE]; + let key1_bytes = key1.encode_key(&mut key1_buf); + + DualKeyTraverse::last_of_k1(self, key1_bytes)? + .map(T::decode_kkv_tuple) + .transpose() + .map_err(Into::into) + } + + fn previous_k1(&mut self) -> Result>, E> { + DualKeyTraverse::previous_k1(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } + + fn previous_k2(&mut self) -> Result>, E> { + DualKeyTraverse::previous_k2(self)?.map(T::decode_kkv_tuple).transpose().map_err(Into::into) + } } // ============================================================================ @@ -243,7 +496,7 @@ impl TableCursor { impl TableCursor where C: KvTraverse, - T: Table, + T: SingleKey, E: HotKvReadError, { /// Get the first key-value pair in the table. @@ -275,12 +528,36 @@ where pub fn read_prev(&mut self) -> Result>, E> { TableTraverse::::read_prev(&mut self.inner) } + + /// Iterate entries starting from a key while a predicate holds. + /// + /// Positions the cursor at `start_key` and calls `f` for each entry + /// while `predicate` returns true. + pub fn for_each_while(&mut self, start_key: &T::Key, predicate: P, f: F) -> Result<(), E> + where + P: Fn(&T::Key, &T::Value) -> bool, + F: FnMut(T::Key, T::Value) -> Result<(), E>, + { + TableTraverse::::for_each_while(&mut self.inner, start_key, predicate, f) + } + + /// Collect entries from start_key while predicate holds. + pub fn collect_while

( + &mut self, + start_key: &T::Key, + predicate: P, + ) -> Result>, E> + where + P: Fn(&T::Key, &T::Value) -> bool, + { + TableTraverse::::collect_while(&mut self.inner, start_key, predicate) + } } impl TableCursor where C: KvTraverseMut, - T: Table, + T: SingleKey, E: HotKvReadError, { /// Delete the current key-value pair. @@ -330,7 +607,7 @@ impl DualTableCursor { impl DualTableCursor where C: DualTableTraverse, - T: DualKeyed, + T: DualKey, E: HotKvReadError, { /// Return the EXACT value for the specified dual key if it exists. @@ -356,44 +633,120 @@ where pub fn next_k2(&mut self) -> Result>, E> { DualTableTraverse::::next_k2(&mut self.inner) } + + /// Seek to the LAST key2 entry for the specified key1. + pub fn last_of_k1(&mut self, key1: &T::Key) -> Result>, E> { + DualTableTraverse::::last_of_k1(&mut self.inner, key1) + } + + /// Move to the LAST key2 entry of the PREVIOUS key1. + pub fn previous_k1(&mut self) -> Result>, E> { + DualTableTraverse::::previous_k1(&mut self.inner) + } + + /// Move to the PREVIOUS key2 entry for the CURRENT key1. + pub fn previous_k2(&mut self) -> Result>, E> { + DualTableTraverse::::previous_k2(&mut self.inner) + } } -// Also provide access to single-key traversal methods for dual-keyed cursors +// Also provide access to first/last/read_next/read_prev methods for dual-keyed cursors impl DualTableCursor where - C: KvTraverse, - T: DualKeyed, + C: DualTableTraverse, + T: DualKey, E: HotKvReadError, { - /// Get the first key-value pair in the table (raw traversal). - pub fn first(&mut self) -> Result>, E> { - TableTraverse::::first(&mut self.inner) + /// Get the first key-value pair in the table. + pub fn first(&mut self) -> Result>, E> { + DualTableTraverse::::first(&mut self.inner) } - /// Get the last key-value pair in the table (raw traversal). - pub fn last(&mut self) -> Result>, E> { - TableTraverse::::last(&mut self.inner) + /// Get the last key-value pair in the table. + pub fn last(&mut self) -> Result>, E> { + DualTableTraverse::::last(&mut self.inner) } /// Get the next key-value pair and advance the cursor. - pub fn read_next(&mut self) -> Result>, E> { - TableTraverse::::read_next(&mut self.inner) + pub fn read_next(&mut self) -> Result>, E> { + DualTableTraverse::::read_next(&mut self.inner) } /// Get the previous key-value pair and move the cursor backward. - pub fn read_prev(&mut self) -> Result>, E> { - TableTraverse::::read_prev(&mut self.inner) + pub fn read_prev(&mut self) -> Result>, E> { + DualTableTraverse::::read_prev(&mut self.inner) + } + + /// Iterate all k2 entries for a given k1, starting from `start_k2`. + /// + /// Calls `f` for each (k1, k2, v) tuple where k1 matches the provided key1 + /// and k2 >= start_k2. Stops when k1 changes or the table is exhausted. + pub fn for_each_k2(&mut self, key1: &T::Key, start_k2: &T::Key2, f: F) -> Result<(), E> + where + T::Key: PartialEq, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + DualTableTraverse::::for_each_k2(&mut self.inner, key1, start_k2, f) + } + + /// Iterate entries within the same k1 while a predicate holds. + /// + /// Positions the cursor at `(key1, start_k2)` and calls `f` for each entry + /// while `predicate` returns true. Uses `next_k2()` which stays within + /// the same k1 value. + pub fn for_each_while_k2( + &mut self, + key1: &T::Key, + start_k2: &T::Key2, + predicate: P, + f: F, + ) -> Result<(), E> + where + P: Fn(&T::Key, &T::Key2, &T::Value) -> bool, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + DualTableTraverse::::for_each_while_k2(&mut self.inner, key1, start_k2, predicate, f) + } + + /// Iterate entries (crossing k1 boundaries) while a predicate holds. + /// + /// Positions the cursor at `(key1, start_k2)` and calls `f` for each entry + /// while `predicate` returns true. Uses `read_next()` to cross k1 boundaries. + pub fn for_each_while( + &mut self, + key1: &T::Key, + start_k2: &T::Key2, + predicate: P, + f: F, + ) -> Result<(), E> + where + P: Fn(&T::Key, &T::Key2, &T::Value) -> bool, + F: FnMut(T::Key, T::Key2, T::Value) -> Result<(), E>, + { + DualTableTraverse::::for_each_while(&mut self.inner, key1, start_k2, predicate, f) + } + + /// Collect all k2 entries for a given k1 into a Vec. + pub fn collect_k2( + &mut self, + key1: &T::Key, + start_k2: &T::Key2, + ) -> Result>, E> + where + T::Key: PartialEq, + { + DualTableTraverse::::collect_k2(&mut self.inner, key1, start_k2) } } impl DualTableCursor where C: KvTraverseMut, - T: DualKeyed, + T: DualKey, E: HotKvReadError, { /// Delete the current key-value pair. pub fn delete_current(&mut self) -> Result<(), E> { - TableTraverseMut::::delete_current(&mut self.inner) + KvTraverseMut::delete_current(&mut self.inner) } } diff --git a/crates/storage/src/ser/error.rs b/crates/storage/src/hot/ser/error.rs similarity index 100% rename from crates/storage/src/ser/error.rs rename to crates/storage/src/hot/ser/error.rs diff --git a/crates/storage/src/ser/impls.rs b/crates/storage/src/hot/ser/impls.rs similarity index 98% rename from crates/storage/src/ser/impls.rs rename to crates/storage/src/hot/ser/impls.rs index 6c4f3d1..f9ee422 100644 --- a/crates/storage/src/ser/impls.rs +++ b/crates/storage/src/hot/ser/impls.rs @@ -1,4 +1,4 @@ -use crate::ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}; +use crate::hot::ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}; use alloy::primitives::{Address, B256, Bloom}; use bytes::BufMut; use reth::primitives::StorageEntry; @@ -7,6 +7,8 @@ use reth_db::models::BlockNumberAddress; macro_rules! delegate_val_to_key { ($ty:ty) => { impl ValSer for $ty { + const FIXED_SIZE: Option = Some(::SIZE); + fn encoded_size(&self) -> usize { ::SIZE } @@ -140,6 +142,8 @@ impl KeySer for Address { } impl ValSer for Bloom { + const FIXED_SIZE: Option = Some(256); + fn encoded_size(&self) -> usize { self.as_slice().len() } @@ -317,6 +321,8 @@ impl KeySer for BlockNumberAddress { } impl ValSer for StorageEntry { + const FIXED_SIZE: Option = Some(64); + fn encoded_size(&self) -> usize { self.key.encoded_size() + self.value.encoded_size() } diff --git a/crates/storage/src/ser/mod.rs b/crates/storage/src/hot/ser/mod.rs similarity index 54% rename from crates/storage/src/ser/mod.rs rename to crates/storage/src/hot/ser/mod.rs index 2f34e88..11319f1 100644 --- a/crates/storage/src/ser/mod.rs +++ b/crates/storage/src/hot/ser/mod.rs @@ -1,9 +1,9 @@ mod error; pub use error::DeserError; -mod traits; -pub use traits::{KeySer, MAX_KEY_SIZE, ValSer}; - mod impls; mod reth_impls; + +mod traits; +pub use traits::{KeySer, MAX_FIXED_VAL_SIZE, MAX_KEY_SIZE, ValSer}; diff --git a/crates/storage/src/ser/reth_impls.rs b/crates/storage/src/hot/ser/reth_impls.rs similarity index 94% rename from crates/storage/src/ser/reth_impls.rs rename to crates/storage/src/hot/ser/reth_impls.rs index 24d5dd6..73914c5 100644 --- a/crates/storage/src/ser/reth_impls.rs +++ b/crates/storage/src/hot/ser/reth_impls.rs @@ -1,4 +1,4 @@ -use crate::ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}; +use crate::hot::ser::{DeserError, KeySer, MAX_KEY_SIZE, ValSer}; use alloy::{ consensus::{EthereumTxEnvelope, Signed, TxEip1559, TxEip2930, TxEip4844, TxEip7702, TxLegacy}, eips::{ @@ -7,14 +7,14 @@ use alloy::{ }, primitives::{Address, B256, FixedBytes, KECCAK256_EMPTY, Signature, TxKind, U256}, }; -use reth::{ - primitives::{Account, Bytecode, Header, Log, LogData, TransactionSigned, TxType}, - revm::bytecode::{JumpTable, LegacyAnalyzedBytecode, eip7702::Eip7702Bytecode}, -}; +use reth::primitives::{Account, Header, Log, LogData, TransactionSigned, TxType}; use reth_db_api::{ BlockNumberList, models::{AccountBeforeTx, ShardedKey, StoredBlockBodyIndices}, }; +use trevm::revm::bytecode::{ + Bytecode, JumpTable, LegacyAnalyzedBytecode, eip7702::Eip7702Bytecode, +}; // Specialized impls for the sharded key types. This was implemented // generically, but there are only 2 types, and we can skip pushing a scratch @@ -31,9 +31,7 @@ macro_rules! sharded_key { ) -> &'c [u8] { const SIZE: usize = <$ty as KeySer>::SIZE; - let prefix = self.key.as_slice(); - - buf[0..SIZE].copy_from_slice(prefix); + buf[0..SIZE].copy_from_slice(&self.key[..SIZE]); buf[SIZE..Self::SIZE].copy_from_slice(&self.highest_block_number.to_be_bytes()); &buf[0..Self::SIZE] @@ -49,7 +47,7 @@ macro_rules! sharded_key { } let key = <$ty as KeySer>::decode_key(&data[0..SIZE])?; - let highest_block_number = u64::decode_key(&data[SIZE..SIZE + 8])?; + let highest_block_number = u64::decode_key(&data[SIZE..Self::SIZE])?; Ok(Self { key, highest_block_number }) } } @@ -59,6 +57,26 @@ macro_rules! sharded_key { sharded_key!(B256); sharded_key!(Address); +impl KeySer for ShardedKey { + const SIZE: usize = U256::SIZE + u64::SIZE; + + fn encode_key<'a: 'c, 'b: 'c, 'c>(&'a self, buf: &'b mut [u8; MAX_KEY_SIZE]) -> &'c [u8] { + self.key.encode_key(buf); + buf[U256::SIZE..Self::SIZE].copy_from_slice(&self.highest_block_number.to_be_bytes()); + &buf[0..Self::SIZE] + } + + fn decode_key(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(DeserError::InsufficientData { needed: Self::SIZE, available: data.len() }); + } + + let key = U256::decode_key(&data[0..U256::SIZE])?; + let highest_block_number = u64::decode_key(&data[U256::SIZE..Self::SIZE])?; + Ok(Self { key, highest_block_number }) + } +} + macro_rules! by_props { (@size $($prop:ident),* $(,)?) => { { @@ -285,12 +303,10 @@ impl ValSer for Header { } impl ValSer for Account { - fn encoded_size(&self) -> usize { - // NB: Destructure to ensure changes are compile errors and mistakes - // are unused var warnings. - let Account { nonce, balance, bytecode_hash: _ } = self; + const FIXED_SIZE: Option = Some(8 + 32 + 32); - nonce.encoded_size() + balance.encoded_size() + 32 + fn encoded_size(&self) -> usize { + Self::FIXED_SIZE.unwrap() } fn encode_value_to(&self, buf: &mut B) @@ -419,8 +435,10 @@ impl ValSer for Log { } impl ValSer for TxType { + const FIXED_SIZE: Option = Some(1); + fn encoded_size(&self) -> usize { - 1 + Self::FIXED_SIZE.unwrap() } fn encode_value_to(&self, buf: &mut B) @@ -602,9 +620,9 @@ impl ValSer for LegacyAnalyzedBytecode { impl ValSer for Bytecode { fn encoded_size(&self) -> usize { - 1 + match &self.0 { - reth::revm::state::Bytecode::Eip7702(code) => code.encoded_size(), - reth::revm::state::Bytecode::LegacyAnalyzed(code) => code.encoded_size(), + 1 + match &self { + Bytecode::Eip7702(code) => code.encoded_size(), + Bytecode::LegacyAnalyzed(code) => code.encoded_size(), } } @@ -612,12 +630,12 @@ impl ValSer for Bytecode { where B: bytes::BufMut + AsMut<[u8]>, { - match &self.0 { - reth::revm::state::Bytecode::Eip7702(code) => { + match &self { + Bytecode::Eip7702(code) => { buf.put_u8(1); code.encode_value_to(buf); } - reth::revm::state::Bytecode::LegacyAnalyzed(code) => { + Bytecode::LegacyAnalyzed(code) => { buf.put_u8(0); code.encode_value_to(buf); } @@ -633,11 +651,11 @@ impl ValSer for Bytecode { match ty { 0 => { let analyzed = LegacyAnalyzedBytecode::decode_value(data)?; - Ok(Bytecode(reth::revm::state::Bytecode::LegacyAnalyzed(analyzed))) + Ok(Bytecode::LegacyAnalyzed(analyzed)) } 1 => { let eip7702 = Eip7702Bytecode::decode_value(data)?; - Ok(Bytecode(reth::revm::state::Bytecode::Eip7702(eip7702))) + Ok(Bytecode::Eip7702(eip7702)) } _ => Err(DeserError::String(format!("Invalid Bytecode type value: {}. Max is 1.", ty))), } @@ -683,8 +701,10 @@ impl ValSer for AccountBeforeTx { } impl ValSer for Signature { + const FIXED_SIZE: Option = Some(65); + fn encoded_size(&self) -> usize { - 65 + Self::FIXED_SIZE.unwrap() } fn encode_value_to(&self, buf: &mut B) @@ -804,14 +824,10 @@ impl ValSer for AccessList { } impl ValSer for Authorization { + const FIXED_SIZE: Option = Some(32 + 20 + 8); + fn encoded_size(&self) -> usize { - let Authorization { chain_id, address, nonce } = self; - by_props!( - @size - chain_id, - address, - nonce, - ) + Self::FIXED_SIZE.unwrap() } fn encode_value_to(&self, buf: &mut B) @@ -846,6 +862,15 @@ impl ValSer for Authorization { } impl ValSer for SignedAuthorization { + const FIXED_SIZE: Option = { + Some( + ::FIXED_SIZE.unwrap() + + 1 // y_parity + + 32 // r + + 32, // s + ) + }; + fn encoded_size(&self) -> usize { let auth = self.inner(); let y_parity = self.y_parity(); @@ -1886,7 +1911,8 @@ mod tests { let key1 = ShardedKey { key: Address::ZERO, highest_block_number: 0 }; test_key_roundtrip(&key1); - let key2 = ShardedKey { key: Address::repeat_byte(0xFF), highest_block_number: u64::MAX }; + let key2: ShardedKey

= + ShardedKey { key: Address::repeat_byte(0xFF), highest_block_number: u64::MAX }; test_key_roundtrip(&key2); let key3 = ShardedKey { @@ -1899,6 +1925,24 @@ mod tests { test_key_roundtrip(&key3); } + #[test] + fn test_sharded_key_u256() { + let keys = vec![ + ShardedKey { key: U256::ZERO, highest_block_number: 0 }, + ShardedKey { key: U256::ZERO, highest_block_number: 1 }, + ShardedKey { key: U256::ZERO, highest_block_number: u64::MAX }, + ShardedKey { key: U256::from(1u64), highest_block_number: 0 }, + ShardedKey { key: U256::from(1u64), highest_block_number: 1 }, + ShardedKey { key: U256::MAX, highest_block_number: 0 }, + ShardedKey { key: U256::MAX, highest_block_number: u64::MAX }, + ]; + test_key_ordering(&keys); + + for key in &keys { + test_key_roundtrip(key); + } + } + #[test] fn test_sharded_key_b256_ordering() { let keys = vec![ diff --git a/crates/storage/src/ser/traits.rs b/crates/storage/src/hot/ser/traits.rs similarity index 85% rename from crates/storage/src/ser/traits.rs rename to crates/storage/src/hot/ser/traits.rs index 7d3fcb4..98fadc5 100644 --- a/crates/storage/src/ser/traits.rs +++ b/crates/storage/src/hot/ser/traits.rs @@ -1,9 +1,12 @@ -use crate::ser::error::DeserError; +use crate::hot::ser::error::DeserError; use alloy::primitives::Bytes; /// Maximum allowed key size in bytes. pub const MAX_KEY_SIZE: usize = 64; +/// The maximum size of a dual key (in bytes). +pub const MAX_FIXED_VAL_SIZE: usize = 64; + /// Trait for key serialization with fixed-size keys of size no greater than 32 /// bytes. /// @@ -21,12 +24,13 @@ pub trait KeySer: PartialOrd + Ord + Sized + Clone + core::fmt::Debug { /// Compile-time assertion to ensure SIZE is within limits. #[doc(hidden)] - const ASSERT: () = { + const ASSERT: sealed::Seal = { assert!( Self::SIZE <= MAX_KEY_SIZE, "KeySer implementations must have SIZE <= MAX_KEY_SIZE" ); assert!(Self::SIZE > 0, "KeySer implementations must have SIZE > 0"); + sealed::Seal }; /// Encode the key, optionally using the provided buffer. @@ -51,10 +55,7 @@ pub trait KeySer: PartialOrd + Ord + Sized + Clone + core::fmt::Debug { /// Useful in DB decoding, where the absence of a key is represented by /// `None`. fn maybe_decode_key(data: Option<&[u8]>) -> Result, DeserError> { - match data { - Some(d) => Ok(Some(Self::decode_key(d)?)), - None => Ok(None), - } + data.map(Self::decode_key).transpose() } } @@ -68,6 +69,9 @@ pub trait KeySer: PartialOrd + Ord + Sized + Clone + core::fmt::Debug { /// E.g. a correct implementation for an array serializes the length of the /// array first, so that the deserializer knows how many items to expect. pub trait ValSer { + /// The fixed size of the value, if applicable. + const FIXED_SIZE: Option = None; + /// The encoded size of the value in bytes. This MUST be accurate, as it is /// used to allocate buffers for serialization. Inaccurate sizes may result /// in panics or incorrect behavior. @@ -98,10 +102,7 @@ pub trait ValSer { where Self: Sized, { - match data { - Some(d) => Ok(Some(Self::decode_value(d)?)), - None => Ok(None), - } + data.map(Self::decode_value).transpose() } /// Deserialize the value from bytes, ensuring all bytes are consumed. @@ -115,3 +116,14 @@ pub trait ValSer { .ok_or(DeserError::InexactDeser { extra_bytes: data.len() }) } } + +mod sealed { + /// Sealed struct to prevent overriding the `KeySer::ASSERT` constant. + #[allow( + dead_code, + unreachable_pub, + missing_copy_implementations, + missing_debug_implementations + )] + pub struct Seal; +} diff --git a/crates/storage/src/tables/hot.rs b/crates/storage/src/hot/tables/definitions.rs similarity index 81% rename from crates/storage/src/tables/hot.rs rename to crates/storage/src/hot/tables/definitions.rs index 04f90cc..f4ad594 100644 --- a/crates/storage/src/tables/hot.rs +++ b/crates/storage/src/hot/tables/definitions.rs @@ -1,7 +1,8 @@ use alloy::primitives::{Address, B256, BlockNumber, U256}; -use reth::primitives::{Account, Bytecode, Header}; +use reth::primitives::{Account, Header}; use reth_db::models::BlockNumberAddress; use reth_db_api::{BlockNumberList, models::ShardedKey}; +use trevm::revm::bytecode::Bytecode; table! { /// Records recent block Headers, by their number. @@ -25,7 +26,7 @@ table! { table! { /// Records plain storage states, keyed by address and storage key. - PlainStorageState
B256 => U256> is 32 + PlainStorageState
U256 => U256> is 32 } table! { @@ -40,10 +41,10 @@ table! { table! { /// Records storage state change history, keyed by address and storage key. - StorageHistory
ShardedKey => BlockNumberList> + StorageHistory
ShardedKey => BlockNumberList> } table! { /// Records account states before transactions, keyed by (address, block number). - StorageChangeSets B256 => U256> is 32 + StorageChangeSets U256 => U256> is 32 } diff --git a/crates/storage/src/tables/macros.rs b/crates/storage/src/hot/tables/macros.rs similarity index 75% rename from crates/storage/src/tables/macros.rs rename to crates/storage/src/hot/tables/macros.rs index 77cf8ce..7cabed3 100644 --- a/crates/storage/src/tables/macros.rs +++ b/crates/storage/src/hot/tables/macros.rs @@ -8,11 +8,10 @@ macro_rules! table { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct $name; - impl crate::tables::Table for $name { + impl crate::hot::tables::Table for $name { const NAME: &'static str = stringify!($name); - const DUAL_KEY: bool = $dual; const FIXED_VAL_SIZE: Option = $fixed; - + const DUAL_KEY_SIZE: Option = $dual; type Key = $key; type Value = $value; } @@ -28,11 +27,11 @@ macro_rules! table { $name, $key, $value, - false, + None, None ); - impl crate::tables::SingleKey for $name {} + impl crate::hot::tables::SingleKey for $name {} }; @@ -45,11 +44,11 @@ macro_rules! table { $name, $key, $value, - true, + Some(<$subkey as crate::hot::ser::KeySer>::SIZE), None ); - impl crate::tables::DualKeyed for $name { + impl crate::hot::tables::DualKey for $name { type Key2 = $subkey; } }; @@ -63,11 +62,11 @@ macro_rules! table { $name, $key, $value, - true, + Some(<$subkey as crate::hot::ser::KeySer>::SIZE), Some($fixed) ); - impl crate::tables::DualKeyed for $name { + impl crate::hot::tables::DualKey for $name { type Key2 = $subkey; } }; diff --git a/crates/storage/src/tables/mod.rs b/crates/storage/src/hot/tables/mod.rs similarity index 60% rename from crates/storage/src/tables/mod.rs rename to crates/storage/src/hot/tables/mod.rs index f93dad5..9a28446 100644 --- a/crates/storage/src/tables/mod.rs +++ b/crates/storage/src/hot/tables/mod.rs @@ -1,42 +1,72 @@ #[macro_use] mod macros; -/// Tables that are not hot. -pub mod cold; - /// Tables that are hot, or conditionally hot. -pub mod hot; +mod definitions; +pub use definitions::*; -use crate::{ - hot::model::{DualKeyValue, KeyValue}, - ser::{DeserError, KeySer, ValSer}, +use crate::hot::{ + DeserError, KeySer, MAX_FIXED_VAL_SIZE, ValSer, + model::{DualKeyValue, KeyValue}, }; -/// The maximum size of a dual key (in bytes). -pub const MAX_FIXED_VAL_SIZE: usize = 64; - /// Trait for table definitions. -pub trait Table { +/// +/// Tables are compile-time definitions of key-value pairs stored in hot +/// storage. Each table defines the key and value types it uses, along with +/// a name, and information that backends can use for optimizations (e.g., +/// whether the key or value is fixed-size). +/// +/// Tables can be extended to support dual keys by implementing the [`DualKey`] +/// trait. This indicates that the table uses a composite key made up of two +/// distinct parts. Backends can then optimize storage and retrieval of values +/// based on the dual keys. +/// +/// Tables that do not implement [`DualKey`] are considered single-keyed tables. +/// Such tables MUST implement the [`SingleKey`] marker trait to indicate that +/// they use a single key. The [`SingleKey`] and [`DualKey`] traits are +/// incompatible, and a table MUST implement exactly one of them. +pub trait Table: Sized + Send + Sync + 'static { /// A short, human-readable name for the table. const NAME: &'static str; /// Indicates that this table uses dual keys. - const DUAL_KEY: bool = false; + const DUAL_KEY: bool = Self::DUAL_KEY_SIZE.is_some(); + + /// True if the table is guaranteed to have fixed-size values of size + /// [`MAX_FIXED_VAL_SIZE`] or less, false otherwise. + const FIXED_VAL_SIZE: Option = { + match ::FIXED_SIZE { + Some(size) if size <= MAX_FIXED_VAL_SIZE => Some(size), + _ => None, + } + }; - /// True if the table is guaranteed to have fixed-size values, false - /// otherwise. - const FIXED_VAL_SIZE: Option = None; + /// If the table uses dual keys, this is the size of the second key. + /// Otherwise, it is `None`. + const DUAL_KEY_SIZE: Option = None; /// Indicates that this table has fixed-size values. const IS_FIXED_VAL: bool = Self::FIXED_VAL_SIZE.is_some(); /// Compile-time assertions for the table. #[doc(hidden)] - const ASSERT: () = { + const ASSERT: sealed::Seal = { // Ensure that fixed-size values do not exceed the maximum allowed size. if let Some(size) = Self::FIXED_VAL_SIZE { assert!(size <= MAX_FIXED_VAL_SIZE, "Fixed value size exceeds maximum allowed size"); } + + if let Some(dual_key_size) = Self::DUAL_KEY_SIZE { + assert!(Self::DUAL_KEY, "DUAL_KEY_SIZE is set but DUAL_KEY is false"); + assert!(dual_key_size > 0, "DUAL_KEY_SIZE must be greater than zero"); + } else { + assert!(!Self::DUAL_KEY, "DUAL_KEY is true but DUAL_KEY_SIZE is None"); + } + + assert!(std::mem::size_of::() == 0, "Table types must be zero-sized types (ZSTs)."); + + sealed::Seal }; /// The key type. @@ -76,8 +106,9 @@ pub trait Table { pub trait SingleKey: Table { /// Compile-time assertions for the single-keyed table. #[doc(hidden)] - const ASSERT: () = { + const ASSERT: sealed::Seal = { assert!(!Self::DUAL_KEY, "SingleKey tables must have DUAL_KEY = false"); + sealed::Seal }; } @@ -86,14 +117,15 @@ pub trait SingleKey: Table { /// This trait aims to capture tables that use a composite key made up of two /// distinct parts. This is useful for representing (e.g.) dupsort or other /// nested map optimizations. -pub trait DualKeyed: Table { +pub trait DualKey: Table { /// The second key type. type Key2: KeySer; /// Compile-time assertions for the dual-keyed table. #[doc(hidden)] - const ASSERT: () = { + const ASSERT: sealed::Seal = { assert!(Self::DUAL_KEY, "DualKeyed tables must have DUAL_KEY = true"); + sealed::Seal }; /// Shortcut to decode the second key. @@ -131,3 +163,14 @@ pub trait DualKeyed: Table { Self::decode_kkv(data.0, data.1, data.2) } } + +mod sealed { + /// Sealed struct to prevent overriding the `Table::ASSERT` constants. + #[allow( + dead_code, + unreachable_pub, + missing_copy_implementations, + missing_debug_implementations + )] + pub struct Seal; +} diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 381014c..dbf21ff 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,4 +1,58 @@ -#![doc = include_str!("../README.md")] +//! Signet Storage Components +//! +//! High-level abstractions and implementations for storage backends used in +//! Signet. +//! +//! ## Design Overview +//! +//! We divide storage into two main categories: cold storage and hot storage. +//! The distinction is not access patterns, but whether the data is used in the +//! critical consensu s path (hot) or not (cold). Cold storage is used for +//! serving blocks and transactions over RPC, while hot storage is used for +//! fast access to frequently used data during block processing and consensus. +//! +//! The crate has two modules: +//! - [`cold`]: Cold storage abstractions and implementations. +//! - [`hot`]: Hot storage abstractions and implementations. +//! +//! ## Hot Storage +//! +//! Hot storage is modeled as a key-value store with predefined tables. The core +//! trait is [`HotKv`], which provides a factory for creating read and write +//! transactions. +//! +//! The primary traits for accessing hot storage are: +//! - [`HistoryRead`]: for read-only transactions. +//! - [`HistoryWrite`]: for read-write transactions. +//! +//! Other traits should generally only be used when implementing new backends. +//! +//! [`HotKv::revm_reader`] and [`HotKv::revm_writer`] create a transaction +//! wrapper that implements the [`revm`] crate's storage traits, allowing +//! seamless integration with the EVM execution engine. +//! +//! When the "mdbx" flag is enabled, we provide an MDBX-based implementation of +//! the hot storage traits. See the `hot_impls::mdbx` module for more details. +//! +//! ## Cold Storage +//! +//! Cold storage provides abstractions for storing and retrieving blocks, +//! transactions, and related data. +//! +//! Unlike hot storage, cold is intended to be accessed asynchronously. The core +//! trait is [`ColdStorage`], which defines methods for appending and reading +//! from the store. +//! +//! A [`ColdStorage`] implementation is typically run in a separate task using +//! the [`ColdStorageTask`]. The task processes requests sent via a channel, +//! allowing non-blocking access to cold storage operations. The +//! [`ColdStorageHandle`] provides an ergonomic API for sending requests to the +//! task. +//! +//! Like [`hot`], the majority of users will not need to interact with cold +//! storage directly. Instead, they will use the task and handle abstractions. +//! +//! [`revm`]: trevm::revm #![warn( missing_copy_implementations, missing_debug_implementations, @@ -13,12 +67,13 @@ /// Cold storage module. pub mod cold; +pub use cold::{ColdStorage, ColdStorageError, ColdStorageHandle, ColdStorageTask}; -/// Hot storage module. -pub mod hot; +#[cfg(feature = "impls")] +pub use cold::impls as cold_impls; -/// Serialization module. -pub mod ser; +pub mod hot; +pub use hot::{HistoryError, HistoryRead, HistoryWrite, HotKv}; -/// Predefined tables module. -pub mod tables; +#[cfg(feature = "impls")] +pub use hot::impls as hot_impls; diff --git a/crates/storage/src/tables/cold.rs b/crates/storage/src/tables/cold.rs deleted file mode 100644 index 8b13789..0000000 --- a/crates/storage/src/tables/cold.rs +++ /dev/null @@ -1 +0,0 @@ -