diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0cfd5c716c..adb751c2e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
### 2025-11-12
+- Separate Account and storage Column families in rocksdb [#5055](https://github.com/lambdaclass/ethrex/pull/5055)
- Avoid copying while reading account code [#5289](https://github.com/lambdaclass/ethrex/pull/5289)
- Cache `BLOBBASEFEE` opcode value [#5288](https://github.com/lambdaclass/ethrex/pull/5288)
diff --git a/crates/storage/store.rs b/crates/storage/store.rs
index 529fcdcc8b..c974331547 100644
--- a/crates/storage/store.rs
+++ b/crates/storage/store.rs
@@ -1,8 +1,8 @@
+use crate::api::StoreEngine;
use crate::error::StoreError;
use crate::store_db::in_memory::Store as InMemoryStore;
#[cfg(feature = "rocksdb")]
use crate::store_db::rocksdb::Store as RocksDBStore;
-use crate::{api::StoreEngine, apply_prefix};
use ethereum_types::{Address, H256, U256};
use ethrex_common::{
@@ -533,50 +533,43 @@ impl Store {
&self,
genesis_accounts: BTreeMap
,
) -> Result {
- let mut nodes = HashMap::new();
- let mut genesis_state_trie = self.engine.open_direct_state_trie(*EMPTY_TRIE_HASH)?;
+ let mut account_trie = self.engine.open_direct_state_trie(*EMPTY_TRIE_HASH)?;
+
for (address, account) in genesis_accounts {
let hashed_address = hash_address(&address);
+
// Store account code (as this won't be stored in the trie)
let code = Code::from_bytecode(account.code);
let code_hash = code.hash;
self.add_account_code(code).await?;
+
// Store the account's storage in a clean storage trie and compute its root
let mut storage_trie = self
.engine
.open_direct_storage_trie(H256::from_slice(&hashed_address), *EMPTY_TRIE_HASH)?;
+
for (storage_key, storage_value) in account.storage {
if !storage_value.is_zero() {
let hashed_key = hash_key(&H256(storage_key.to_big_endian()));
storage_trie.insert(hashed_key, storage_value.encode_to_vec())?;
}
}
- let (storage_root, new_nodes) = storage_trie.collect_changes_since_last_hash();
- nodes.insert(H256::from_slice(&hashed_address), new_nodes);
+
+ // TODO(#5195): committing each storage trie individually is inefficient.
+ // We would benefit form a mass storage node insertion method.
+
// Add account to trie
let account_state = AccountState {
nonce: account.nonce,
balance: account.balance,
- storage_root,
+ storage_root: storage_trie.hash()?,
code_hash,
};
- genesis_state_trie.insert(hashed_address, account_state.encode_to_vec())?;
+
+ account_trie.insert(hashed_address, account_state.encode_to_vec())?;
}
- let (state_root, state_nodes) = genesis_state_trie.collect_changes_since_last_hash();
-
- // TODO: replace this with a Store method
- genesis_state_trie.db().put_batch(
- nodes
- .into_iter()
- .flat_map(|(account_hash, nodes)| {
- nodes
- .into_iter()
- .map(move |(path, node)| (apply_prefix(Some(account_hash), path), node))
- })
- .chain(state_nodes)
- .collect(),
- )?;
- Ok(state_root)
+
+ Ok(account_trie.hash()?)
}
pub async fn add_receipt(
diff --git a/crates/storage/store_db/rocksdb.rs b/crates/storage/store_db/rocksdb.rs
index a24298b456..eaae27e899 100644
--- a/crates/storage/store_db/rocksdb.rs
+++ b/crates/storage/store_db/rocksdb.rs
@@ -95,10 +95,15 @@ const CF_CHAIN_DATA: &str = "chain_data";
/// - [`Vec`] = `BlockHashRLP::from(block_hash).bytes().clone()`
const CF_SNAP_STATE: &str = "snap_state";
-/// State trie nodes column family: [`Nibbles`] => [`Vec`]
+/// Account State trie nodes column family: [`Nibbles`] => [`Vec`]
/// - [`Nibbles`] = `node_hash.as_ref()`
/// - [`Vec`] = `node_data`
-const CF_TRIE_NODES: &str = "trie_nodes";
+const CF_ACCOUNT_TRIE_NODES: &str = "account_trie_nodes";
+
+/// Storage trie nodes column family: [`Nibbles`] => [`Vec`]
+/// - [`Nibbles`] = `node_hash.as_ref()`
+/// - [`Vec`] = `node_data`
+const CF_STORAGE_TRIE_NODES: &str = "storage_trie_nodes";
/// Pending blocks column family: [`Vec`] => [`Vec`]
/// - [`Vec`] = `BlockHashRLP::from(block.hash()).bytes().clone()`
@@ -115,7 +120,15 @@ const CF_INVALID_ANCESTORS: &str = "invalid_ancestors";
/// - [`Vec`] = `BlockHeaderRLP::from(block.header.clone()).bytes().clone()`
const CF_FULLSYNC_HEADERS: &str = "fullsync_headers";
-pub const CF_FLATKEYVALUE: &str = "flatkeyvalue";
+/// Account sate flat key-value store: [`Nibbles`] => [`Vec`]
+/// - [`Nibbles`] = `node_hash.as_ref()`
+/// - [`Vec`] = `node_data`
+pub const CF_ACCOUNT_FLATKEYVALUE: &str = "account_flatkeyvalue";
+
+/// Storage slots key-value store: [`Nibbles`] => [`Vec`]
+/// - [`Nibbles`] = `node_hash.as_ref()`
+/// - [`Vec`] = `node_data`
+pub const CF_STORAGE_FLATKEYVALUE: &str = "storage_flatkeyvalue";
pub const CF_MISC_VALUES: &str = "misc_values";
@@ -197,11 +210,13 @@ impl Store {
CF_TRANSACTION_LOCATIONS,
CF_CHAIN_DATA,
CF_SNAP_STATE,
- CF_TRIE_NODES,
+ CF_ACCOUNT_TRIE_NODES,
+ CF_STORAGE_TRIE_NODES,
CF_PENDING_BLOCKS,
CF_INVALID_ANCESTORS,
CF_FULLSYNC_HEADERS,
- CF_FLATKEYVALUE,
+ CF_ACCOUNT_FLATKEYVALUE,
+ CF_STORAGE_FLATKEYVALUE,
CF_MISC_VALUES,
];
@@ -263,7 +278,7 @@ impl Store {
block_opts.set_bloom_filter(10.0, false);
cf_opts.set_block_based_table_factory(&block_opts);
}
- CF_TRIE_NODES => {
+ CF_ACCOUNT_TRIE_NODES | CF_STORAGE_TRIE_NODES => {
cf_opts.set_write_buffer_size(512 * 1024 * 1024); // 512MB
cf_opts.set_max_write_buffer_number(6);
cf_opts.set_min_write_buffer_number_to_merge(2);
@@ -275,7 +290,7 @@ impl Store {
block_opts.set_bloom_filter(10.0, false); // 10 bits per key
cf_opts.set_block_based_table_factory(&block_opts);
}
- CF_FLATKEYVALUE => {
+ CF_ACCOUNT_FLATKEYVALUE | CF_STORAGE_FLATKEYVALUE => {
cf_opts.set_write_buffer_size(512 * 1024 * 1024); // 512MB
cf_opts.set_max_write_buffer_number(6);
cf_opts.set_min_write_buffer_number_to_merge(2);
@@ -579,7 +594,8 @@ impl Store {
control_rx: &mut std::sync::mpsc::Receiver,
) -> Result<(), StoreError> {
let cf_misc = self.cf_handle(CF_MISC_VALUES)?;
- let cf_flatkeyvalue = self.cf_handle(CF_FLATKEYVALUE)?;
+ let cf_accounts_fkv = self.cf_handle(CF_ACCOUNT_FLATKEYVALUE)?;
+ let cf_storage_fkv = self.cf_handle(CF_STORAGE_FLATKEYVALUE)?;
let last_written = self
.db
@@ -590,11 +606,13 @@ impl Store {
}
self.db
- .delete_range_cf(&cf_flatkeyvalue, last_written, vec![0xff])?;
+ .delete_range_cf(&cf_accounts_fkv, &last_written, vec![0xff].as_ref())?;
+ self.db
+ .delete_range_cf(&cf_storage_fkv, &last_written, vec![0xff].as_ref())?;
loop {
let root = self
- .read_sync(CF_TRIE_NODES, [])?
+ .read_sync(CF_ACCOUNT_TRIE_NODES, [])?
.ok_or(StoreError::MissingLatestBlockNumber)?;
let root: Node = ethrex_trie::Node::decode(&root)?;
let state_root = root.compute_hash().finalize();
@@ -616,18 +634,18 @@ impl Store {
let mut ctr = 0;
let mut batch = WriteBatch::default();
- let mut iter = self.open_direct_state_trie(state_root)?.into_iter();
+ let mut account_iter = self.open_direct_state_trie(state_root)?.into_iter();
if last_written_account > Nibbles::default() {
- iter.advance(last_written_account.to_bytes())?;
+ account_iter.advance(last_written_account.to_bytes())?;
}
- let res = iter.try_for_each(|(path, node)| -> Result<(), StoreError> {
- let Node::Leaf(node) = node else {
+ let res = account_iter.try_for_each(|(path, account_node)| -> Result<(), StoreError> {
+ let Node::Leaf(node) = account_node else {
return Ok(());
};
let account_state = AccountState::decode(&node.value)?;
let account_hash = H256::from_slice(&path.to_bytes());
batch.put_cf(&cf_misc, "last_written", path.as_ref());
- batch.put_cf(&cf_flatkeyvalue, path.as_ref(), node.value);
+ batch.put_cf(&cf_accounts_fkv, path.as_ref(), node.value);
ctr += 1;
if ctr > 10_000 {
self.db.write(std::mem::take(&mut batch))?;
@@ -638,20 +656,20 @@ impl Store {
ctr = 0;
}
- let mut iter_inner = self
+ let mut storage_iter = self
.open_direct_storage_trie(account_hash, account_state.storage_root)?
.into_iter();
if last_written_storage > Nibbles::default() {
- iter_inner.advance(last_written_storage.to_bytes())?;
+ storage_iter.advance(last_written_storage.to_bytes())?;
last_written_storage = Nibbles::default();
}
- iter_inner.try_for_each(|(path, node)| -> Result<(), StoreError> {
- let Node::Leaf(node) = node else {
+ storage_iter.try_for_each(|(path, storage_node)| -> Result<(), StoreError> {
+ let Node::Leaf(node) = storage_node else {
return Ok(());
};
let key = apply_prefix(Some(account_hash), path);
batch.put_cf(&cf_misc, "last_written", key.as_ref());
- batch.put_cf(&cf_flatkeyvalue, key.as_ref(), node.value);
+ batch.put_cf(&cf_storage_fkv, key.as_ref(), node.value);
ctr += 1;
if ctr > 10_000 {
self.db.write(std::mem::take(&mut batch))?;
@@ -754,22 +772,47 @@ impl Store {
// RCU to remove the bottom layer: update step needs to happen after disk layer is updated.
let mut trie_mut = (*trie).clone();
let mut batch = WriteBatch::default();
- let [cf_trie_nodes, cf_flatkeyvalue, cf_misc] =
- open_cfs(db, [CF_TRIE_NODES, CF_FLATKEYVALUE, CF_MISC_VALUES])?;
+ let [
+ cf_accounts_trie_nodes,
+ cf_accounts_flatkeyvalue,
+ cf_storage_trie_nodes,
+ cf_storage_flatkeyvalue,
+ cf_misc,
+ ] = open_cfs(
+ db,
+ [
+ CF_ACCOUNT_TRIE_NODES,
+ CF_ACCOUNT_FLATKEYVALUE,
+ CF_STORAGE_TRIE_NODES,
+ CF_STORAGE_FLATKEYVALUE,
+ CF_MISC_VALUES,
+ ],
+ )?;
let last_written = db.get_cf(&cf_misc, "last_written")?.unwrap_or_default();
+
+ // Before encoding, accounts have only the account address as their path, while storage keys have
+ // the account address (32 bytes) + storage path (up to 32 bytes).
+
// Commit removes the bottom layer and returns it, this is the mutation step.
let nodes = trie_mut.commit(root).unwrap_or_default();
for (key, value) in nodes {
let is_leaf = key.len() == 65 || key.len() == 131;
+ let is_account = key.len() <= 65;
if is_leaf && key > last_written {
continue;
}
let cf = if is_leaf {
- &cf_flatkeyvalue
+ if is_account {
+ &cf_accounts_flatkeyvalue
+ } else {
+ &cf_storage_flatkeyvalue
+ }
+ } else if is_account {
+ &cf_accounts_trie_nodes
} else {
- &cf_trie_nodes
+ &cf_storage_trie_nodes
};
if value.is_empty() {
batch.delete_cf(cf, key);
@@ -836,6 +879,7 @@ impl StoreEngine for Store {
CF_BODIES,
],
)?;
+
let mut batch = WriteBatch::default();
let UpdateBatch {
@@ -1482,7 +1526,8 @@ impl StoreEngine for Store {
// FIXME: use a DB snapshot here
let db = Box::new(RocksDBTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_STORAGE_TRIE_NODES,
+ CF_STORAGE_FLATKEYVALUE,
None,
self.last_written()?,
)?);
@@ -1503,7 +1548,8 @@ impl StoreEngine for Store {
// FIXME: use a DB snapshot here
let db = Box::new(RocksDBTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_ACCOUNT_TRIE_NODES,
+ CF_ACCOUNT_FLATKEYVALUE,
None,
self.last_written()?,
)?);
@@ -1527,7 +1573,8 @@ impl StoreEngine for Store {
) -> Result {
let db = Box::new(RocksDBTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_STORAGE_TRIE_NODES,
+ CF_STORAGE_FLATKEYVALUE,
Some(hashed_address),
self.last_written()?,
)?);
@@ -1537,7 +1584,8 @@ impl StoreEngine for Store {
fn open_direct_state_trie(&self, state_root: H256) -> Result {
let db = Box::new(RocksDBTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_ACCOUNT_TRIE_NODES,
+ CF_ACCOUNT_FLATKEYVALUE,
None,
self.last_written()?,
)?);
@@ -1547,7 +1595,8 @@ impl StoreEngine for Store {
fn open_locked_state_trie(&self, state_root: H256) -> Result {
let db = Box::new(RocksDBLockedTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_ACCOUNT_TRIE_NODES,
+ CF_ACCOUNT_FLATKEYVALUE,
None,
self.last_written()?,
)?);
@@ -1572,7 +1621,8 @@ impl StoreEngine for Store {
) -> Result {
let db = Box::new(RocksDBLockedTrieDB::new(
self.db.clone(),
- CF_TRIE_NODES,
+ CF_STORAGE_TRIE_NODES,
+ CF_STORAGE_FLATKEYVALUE,
None,
self.last_written()?,
)?);
@@ -1848,8 +1898,8 @@ impl StoreEngine for Store {
let db = self.db.clone();
tokio::task::spawn_blocking(move || {
let mut batch = WriteBatch::default();
- let cf = db.cf_handle(CF_TRIE_NODES).ok_or_else(|| {
- StoreError::Custom("Column family not found: CF_TRIE_NODES".to_string())
+ let cf = db.cf_handle(CF_STORAGE_TRIE_NODES).ok_or_else(|| {
+ StoreError::Custom("Column family not found: CF_STORAGE_TRIE_NODES".to_string())
})?;
for (address_hash, nodes) in storage_trie_nodes {
diff --git a/crates/storage/trie_db/rocksdb.rs b/crates/storage/trie_db/rocksdb.rs
index 629f804466..3547393dd7 100644
--- a/crates/storage/trie_db/rocksdb.rs
+++ b/crates/storage/trie_db/rocksdb.rs
@@ -4,14 +4,16 @@ use ethrex_trie::{Nibbles, Node, TrieDB, error::TrieError};
use rocksdb::{DBWithThreadMode, MultiThreaded};
use std::sync::Arc;
-use crate::{store_db::rocksdb::CF_FLATKEYVALUE, trie_db::layering::apply_prefix};
+use crate::trie_db::layering::apply_prefix;
/// RocksDB implementation for the TrieDB trait, with get and put operations.
pub struct RocksDBTrieDB {
/// RocksDB database
db: Arc>,
- /// Column family name
- cf_name: String,
+ /// Column family name for the trie nodes
+ trie_cf_name: String,
+ /// Column family name for the flatkeyvalue nodes
+ flatkeyvalue_cf_name: String,
/// Storage trie address prefix
address_prefix: Option,
/// Last flatkeyvalue path already generated
@@ -21,39 +23,52 @@ pub struct RocksDBTrieDB {
impl RocksDBTrieDB {
pub fn new(
db: Arc>,
- cf_name: &str,
+ trie_cf_name: &str,
+ flatkeyvalue_cf_name: &str,
address_prefix: Option,
last_written: Vec,
) -> Result {
// Verify column family exists
- if db.cf_handle(cf_name).is_none() {
+ if db.cf_handle(trie_cf_name).is_none() {
return Err(TrieError::DbError(anyhow::anyhow!(
- "Column family not found: {}",
- cf_name
+ "Column family for the trie not found: {}",
+ trie_cf_name
+ )));
+ }
+
+ if db.cf_handle(flatkeyvalue_cf_name).is_none() {
+ return Err(TrieError::DbError(anyhow::anyhow!(
+ "Column family for the flatkeyvalue not found: {}",
+ flatkeyvalue_cf_name
)));
}
let last_computed_flatkeyvalue = Nibbles::from_hex(last_written);
Ok(Self {
db,
- cf_name: cf_name.to_string(),
+ trie_cf_name: trie_cf_name.to_string(),
+ flatkeyvalue_cf_name: flatkeyvalue_cf_name.to_string(),
address_prefix,
last_computed_flatkeyvalue,
})
}
fn cf_handle(&self) -> Result>, TrieError> {
- self.db
- .cf_handle(&self.cf_name)
- .ok_or_else(|| TrieError::DbError(anyhow::anyhow!("Column family not found")))
+ self.db.cf_handle(&self.trie_cf_name).ok_or_else(|| {
+ TrieError::DbError(anyhow::anyhow!("Column family for the trie not found"))
+ })
}
fn cf_handle_flatkeyvalue(
&self,
) -> Result>, TrieError> {
self.db
- .cf_handle(CF_FLATKEYVALUE)
- .ok_or_else(|| TrieError::DbError(anyhow::anyhow!("Column family not found")))
+ .cf_handle(&self.flatkeyvalue_cf_name)
+ .ok_or_else(|| {
+ TrieError::DbError(anyhow::anyhow!(
+ "Column family for the flat key value store not found"
+ ))
+ })
}
fn make_key(&self, node_hash: Nibbles) -> Vec {
@@ -61,39 +76,48 @@ impl RocksDBTrieDB {
.as_ref()
.to_vec()
}
+
+ // Gets the correct column family handle based on whether the key is a leaf or not.
+ fn cf_handle_for_key(
+ &self,
+ key: &Nibbles,
+ ) -> Result>, TrieError> {
+ if key.is_leaf() {
+ self.cf_handle_flatkeyvalue()
+ } else {
+ self.cf_handle()
+ }
+ }
}
impl TrieDB for RocksDBTrieDB {
fn flatkeyvalue_computed(&self, key: Nibbles) -> bool {
self.last_computed_flatkeyvalue >= key
}
+
fn get(&self, key: Nibbles) -> Result