Skip to content

Commit a97d6f0

Browse files
iovoidMegaRedHandfedackinggianbelinchejrchatruc
authored
feat(l1): make the database path-based (#4806)
**Motivation** We want a path-based database: * To reduce database size, because updates nodes replace old ones instead of coexisting * As preparation for pruning, which reduces database size even further **Description** Support for "archive mode" (keeping indefinitely old state) is dropped. To handle reorgs we use in-memory layers that exist on top of the db. --------- Co-authored-by: Tomás Grüner <[email protected]> Co-authored-by: Francisco Xavier Gauna <[email protected]> Co-authored-by: Gianbelinche <[email protected]> Co-authored-by: Javier Rodríguez Chatruc <[email protected]> Co-authored-by: Mario Rugiero <[email protected]> Co-authored-by: Pablo Deymonnaz <[email protected]> Co-authored-by: fmoletta <[email protected]> Co-authored-by: cdiielsi <[email protected]> Co-authored-by: Edgar <[email protected]> Co-authored-by: Manuel Iñaki Bilbao <[email protected]> Co-authored-by: Ivan Litteri <[email protected]> Co-authored-by: ilitteri <[email protected]> Co-authored-by: Javier Chatruc <[email protected]>
1 parent 246ed67 commit a97d6f0

File tree

40 files changed

+1220
-616
lines changed

40 files changed

+1220
-616
lines changed

cmd/ethrex/initializers.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,8 @@ pub async fn init_l1(
418418
},
419419
);
420420

421+
regenerate_head_state(&store, &blockchain).await?;
422+
421423
let signer = get_signer(datadir);
422424

423425
let local_p2p_node = get_local_p2p_node(&opts, &signer);
@@ -484,3 +486,43 @@ pub async fn init_l1(
484486
local_node_record,
485487
))
486488
}
489+
490+
async fn regenerate_head_state(store: &Store, blockchain: &Arc<Blockchain>) -> eyre::Result<()> {
491+
let head_block_number = store.get_latest_block_number().await?;
492+
let Some(last_header) = store.get_block_header(head_block_number)? else {
493+
unreachable!("Database is empty, genesis block should be present");
494+
};
495+
496+
let mut current_last_header = last_header;
497+
498+
while !store.has_state_root(current_last_header.state_root)? {
499+
let parent_number = current_last_header.number - 1;
500+
debug!("Need to regenerate state for block {parent_number}");
501+
let Some(parent_header) = store.get_block_header(parent_number)? else {
502+
return Err(eyre::eyre!(
503+
"Parent header for block {parent_number} not found"
504+
));
505+
};
506+
current_last_header = parent_header;
507+
}
508+
509+
let last_state_number = current_last_header.number;
510+
511+
if last_state_number == head_block_number {
512+
debug!("State is already up to date");
513+
return Ok(());
514+
}
515+
info!("Regenerating state from block {last_state_number} to {head_block_number}");
516+
517+
for i in (last_state_number + 1)..=head_block_number {
518+
debug!("Re-applying block {i} to regenerate state");
519+
520+
let block = store
521+
.get_block_by_number(i)
522+
.await?
523+
.ok_or_else(|| eyre::eyre!("Block {i} not found"))?;
524+
blockchain.add_block(block).await?;
525+
}
526+
info!("Finished regenerating state");
527+
Ok(())
528+
}

cmd/ethrex/l2/command.rs

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use ethrex_l2_sdk::call_contract;
2020
use ethrex_rpc::{
2121
EthClient, clients::beacon::BeaconClient, types::block_identifier::BlockIdentifier,
2222
};
23-
use ethrex_storage::{EngineType, Store, UpdateBatch};
23+
use ethrex_storage::{EngineType, Store};
2424
use ethrex_storage_rollup::StoreRollup;
2525
use eyre::OptionExt;
2626
use itertools::Itertools;
@@ -405,11 +405,8 @@ impl Command {
405405

406406
// Get genesis
407407
let genesis_header = store.get_block_header(0)?.expect("Genesis block not found");
408-
let genesis_block_hash = genesis_header.hash();
409408

410-
let mut new_trie = store
411-
.state_trie(genesis_block_hash)?
412-
.expect("Genesis block not found");
409+
let mut current_state_root = genesis_header.state_root;
413410

414411
let mut last_block_number = 0;
415412
let mut new_canonical_blocks = vec![];
@@ -433,37 +430,30 @@ impl Command {
433430
let state_diff = StateDiff::decode(&blob)?;
434431

435432
// Apply all account updates to trie
436-
let account_updates = state_diff.to_account_updates(&new_trie)?;
433+
let trie = store.open_direct_state_trie(current_state_root)?;
434+
435+
let account_updates = state_diff.to_account_updates(&trie)?;
436+
437437
let account_updates_list = store
438-
.apply_account_updates_from_trie_batch(new_trie, account_updates.values())
438+
.apply_account_updates_from_trie_batch(trie, account_updates.values())
439439
.await
440440
.map_err(|e| format!("Error applying account updates: {e}"))
441441
.unwrap();
442442

443-
let (new_state_root, state_updates, accounts_updates) = (
444-
account_updates_list.state_trie_hash,
445-
account_updates_list.state_updates,
446-
account_updates_list.storage_updates,
447-
);
443+
store
444+
.open_direct_state_trie(current_state_root)?
445+
.db()
446+
.put_batch(account_updates_list.state_updates)?;
448447

449-
let pseudo_update_batch = UpdateBatch {
450-
account_updates: state_updates,
451-
storage_updates: accounts_updates,
452-
blocks: vec![],
453-
receipts: vec![],
454-
code_updates: vec![],
455-
};
448+
current_state_root = account_updates_list.state_trie_hash;
456449

457450
store
458-
.store_block_updates(pseudo_update_batch)
459-
.await
460-
.map_err(|e| format!("Error storing trie updates: {e}"))
461-
.unwrap();
451+
.write_storage_trie_nodes_batch(account_updates_list.storage_updates)
452+
.await?;
462453

463-
new_trie = store
464-
.open_state_trie(new_state_root)
465-
.map_err(|e| format!("Error opening new state trie: {e}"))
466-
.unwrap();
454+
store
455+
.write_account_code_batch(account_updates_list.code_updates)
456+
.await?;
467457

468458
// Get withdrawal hashes
469459
let message_hashes = state_diff
@@ -479,10 +469,7 @@ impl Command {
479469
// Note that its state_root is the root of new_trie.
480470
let new_block = BlockHeader {
481471
coinbase,
482-
state_root: new_trie
483-
.hash()
484-
.map_err(|e| format!("Error committing state: {e}"))
485-
.unwrap(),
472+
state_root: account_updates_list.state_trie_hash,
486473
..state_diff.last_header
487474
};
488475

crates/blockchain/blockchain.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,6 @@ impl Blockchain {
416416
};
417417

418418
self.storage
419-
.clone()
420419
.store_block_updates(update_batch)
421420
.await
422421
.map_err(|e| e.into())

crates/blockchain/smoke_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ mod blockchain_integration_test {
310310
};
311311

312312
// Create blockchain
313-
let blockchain = Blockchain::default_with_store(store.clone().clone());
313+
let blockchain = Blockchain::default_with_store(store.clone());
314314

315315
let block = create_payload(&args, store, Bytes::new()).unwrap();
316316
let result = blockchain.build_payload(block).await.unwrap();

crates/blockchain/tracing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ async fn get_missing_state_parents(
135135
let Some(parent_block) = store.get_block_by_hash(parent_hash).await? else {
136136
return Err(ChainError::Custom("Parent Block not Found".to_string()));
137137
};
138-
if store.contains_state_node(parent_block.header.state_root)? {
138+
if store.has_state_root(parent_block.header.state_root)? {
139139
break;
140140
}
141141
parent_hash = parent_block.header.parent_hash;

crates/common/trie/db.rs

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,58 @@
11
use ethereum_types::H256;
22
use ethrex_rlp::encode::RLPEncode;
33

4-
use crate::{Node, NodeHash, NodeRLP, Trie, error::TrieError};
4+
use crate::{Nibbles, Node, NodeRLP, Trie, error::TrieError};
55
use std::{
66
collections::BTreeMap,
77
sync::{Arc, Mutex},
88
};
99

10+
// Nibbles -> encoded node
11+
pub type NodeMap = Arc<Mutex<BTreeMap<Vec<u8>, Vec<u8>>>>;
12+
1013
pub trait TrieDB: Send + Sync {
11-
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError>;
12-
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError>;
14+
fn get(&self, key: Nibbles) -> Result<Option<Vec<u8>>, TrieError>;
15+
fn put_batch(&self, key_values: Vec<(Nibbles, Vec<u8>)>) -> Result<(), TrieError>;
1316
// TODO: replace putbatch with this function.
14-
fn put_batch_no_alloc(&self, key_values: &[(NodeHash, Node)]) -> Result<(), TrieError> {
17+
fn put_batch_no_alloc(&self, key_values: &[(Nibbles, Node)]) -> Result<(), TrieError> {
1518
self.put_batch(
1619
key_values
1720
.iter()
18-
.map(|node| (node.0, node.1.encode_to_vec()))
21+
.map(|node| (node.0.clone(), node.1.encode_to_vec()))
1922
.collect(),
2023
)
2124
}
22-
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError> {
25+
fn put(&self, key: Nibbles, value: Vec<u8>) -> Result<(), TrieError> {
2326
self.put_batch(vec![(key, value)])
2427
}
2528
}
2629

2730
/// InMemory implementation for the TrieDB trait, with get and put operations.
2831
#[derive(Default)]
2932
pub struct InMemoryTrieDB {
30-
pub inner: Arc<Mutex<BTreeMap<NodeHash, Vec<u8>>>>,
33+
inner: NodeMap,
34+
prefix: Option<Nibbles>,
3135
}
3236

3337
impl InMemoryTrieDB {
34-
pub const fn new(map: Arc<Mutex<BTreeMap<NodeHash, Vec<u8>>>>) -> Self {
35-
Self { inner: map }
38+
pub const fn new(map: NodeMap) -> Self {
39+
Self {
40+
inner: map,
41+
prefix: None,
42+
}
43+
}
44+
45+
pub const fn new_with_prefix(map: NodeMap, prefix: Nibbles) -> Self {
46+
Self {
47+
inner: map,
48+
prefix: Some(prefix),
49+
}
3650
}
51+
3752
pub fn new_empty() -> Self {
3853
Self {
3954
inner: Default::default(),
55+
prefix: None,
4056
}
4157
}
4258

@@ -45,33 +61,56 @@ impl InMemoryTrieDB {
4561
state_nodes: &BTreeMap<H256, NodeRLP>,
4662
) -> Result<Self, TrieError> {
4763
let mut embedded_root = Trie::get_embedded_root(state_nodes, root_hash)?;
48-
let mut hashed_nodes: Vec<(NodeHash, Vec<u8>)> = vec![];
49-
embedded_root.commit(&mut hashed_nodes);
64+
let mut hashed_nodes = vec![];
65+
embedded_root.commit(Nibbles::default(), &mut hashed_nodes);
5066

51-
let hashed_nodes = hashed_nodes.into_iter().collect();
67+
let hashed_nodes = hashed_nodes
68+
.into_iter()
69+
.map(|(k, v)| (k.into_vec(), v))
70+
.collect();
5271

5372
let in_memory_trie = Arc::new(Mutex::new(hashed_nodes));
5473
Ok(Self::new(in_memory_trie))
5574
}
75+
76+
fn apply_prefix(&self, path: Nibbles) -> Nibbles {
77+
match &self.prefix {
78+
Some(prefix) => prefix.concat(&path),
79+
None => path,
80+
}
81+
}
5682
}
5783

5884
impl TrieDB for InMemoryTrieDB {
59-
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
85+
fn get(&self, key: Nibbles) -> Result<Option<Vec<u8>>, TrieError> {
6086
Ok(self
6187
.inner
6288
.lock()
6389
.map_err(|_| TrieError::LockError)?
64-
.get(&key)
90+
.get(self.apply_prefix(key).as_ref())
6591
.cloned())
6692
}
6793

68-
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
94+
fn put_batch(&self, key_values: Vec<(Nibbles, Vec<u8>)>) -> Result<(), TrieError> {
6995
let mut db = self.inner.lock().map_err(|_| TrieError::LockError)?;
7096

7197
for (key, value) in key_values {
72-
db.insert(key, value);
98+
let prefixed_key = self.apply_prefix(key);
99+
db.insert(prefixed_key.into_vec(), value);
73100
}
74101

75102
Ok(())
76103
}
77104
}
105+
106+
pub fn nibbles_to_fixed_size(nibbles: Nibbles) -> [u8; 33] {
107+
let node_hash_ref = nibbles.to_bytes();
108+
let original_len = node_hash_ref.len();
109+
110+
let mut buffer = [0u8; 33];
111+
112+
// Encode the node as [node_path..., original_len]
113+
buffer[32] = nibbles.len() as u8;
114+
buffer[..original_len].copy_from_slice(&node_hash_ref);
115+
buffer
116+
}

crates/common/trie/logger.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::{
55

66
use ethrex_rlp::decode::RLPDecode;
77

8-
use crate::{Node, NodeHash, Trie, TrieDB, TrieError};
8+
use crate::{Nibbles, Node, Trie, TrieDB, TrieError};
99

1010
pub type TrieWitness = Arc<Mutex<HashSet<Vec<u8>>>>;
1111

@@ -33,7 +33,7 @@ impl TrieLogger {
3333
}
3434

3535
impl TrieDB for TrieLogger {
36-
fn get(&self, key: NodeHash) -> Result<Option<Vec<u8>>, TrieError> {
36+
fn get(&self, key: Nibbles) -> Result<Option<Vec<u8>>, TrieError> {
3737
let result = self.inner_db.get(key)?;
3838
if let Some(result) = result.as_ref()
3939
&& let Ok(decoded) = Node::decode(result)
@@ -44,11 +44,11 @@ impl TrieDB for TrieLogger {
4444
Ok(result)
4545
}
4646

47-
fn put(&self, key: NodeHash, value: Vec<u8>) -> Result<(), TrieError> {
47+
fn put(&self, key: Nibbles, value: Vec<u8>) -> Result<(), TrieError> {
4848
self.inner_db.put(key, value)
4949
}
5050

51-
fn put_batch(&self, key_values: Vec<(NodeHash, Vec<u8>)>) -> Result<(), TrieError> {
51+
fn put_batch(&self, key_values: Vec<(Nibbles, Vec<u8>)>) -> Result<(), TrieError> {
5252
self.inner_db.put_batch(key_values)
5353
}
5454
}

0 commit comments

Comments
 (0)