Skip to content

Commit 610df53

Browse files
authored
fix(l1): reduce spammy trace-level logs during snap-sync (#5492)
**Motivation** Running ethrex with trace-level logs results in a lot of spam of "We didn't get a peer from the table" or similar, due to busy loops we have when we don't have many peers. This is especially important for the first few stages, since we start with a small number of peers. **Description** This PR reduces the spam by adding a sleep of 10ms, plus a counter to make the warning be printed only once every 1000 times (effectively once every 10s). Also does the same in other stages of snap-sync, adding logs in other busy loops to detect if/when we land in those cases instead of silently ignoring them.
1 parent 3797ce0 commit 610df53

File tree

5 files changed

+68
-21
lines changed

5 files changed

+68
-21
lines changed

crates/common/trie/trie_sorted.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use crossbeam::channel::{Receiver, Sender, bounded};
66
use ethereum_types::H256;
77
use ethrex_threadpool::ThreadPool;
88
use std::{sync::Arc, thread::scope};
9-
use tracing::debug;
109

1110
/// The elements of the stack represent the branch node that is the parent of the current
1211
/// parent element. When the current parent is no longer valid (is not the parent of
@@ -139,16 +138,6 @@ fn add_current_to_parent_and_write_queue(
139138
};
140139
parent_element.element.choices[index as usize] =
141140
node.compute_hash_no_alloc(&mut nodehash_buffer).into();
142-
debug!(
143-
"branch {:x?}",
144-
parent_element
145-
.element
146-
.choices
147-
.iter()
148-
.enumerate()
149-
.filter_map(|(index, child)| child.is_valid().then_some(index))
150-
.collect::<Vec<_>>()
151-
);
152141
nodes_to_write.push((target_path, node));
153142
Ok(())
154143
}

crates/networking/p2p/peer_handler.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ impl PeerHandler {
281281

282282
*METRICS.headers_download_start_time.lock().await = Some(SystemTime::now());
283283

284+
let mut logged_no_free_peers_count = 0;
285+
284286
loop {
285287
if let Ok((headers, peer_id, _connection, startblock, previous_chunk_limit)) =
286288
task_receiver.try_recv()
@@ -337,7 +339,14 @@ impl PeerHandler {
337339
.get_best_peer(&SUPPORTED_ETH_CAPABILITIES)
338340
.await?
339341
else {
340-
trace!("We didn't get a peer from the table");
342+
// Log ~ once every 10 seconds
343+
if logged_no_free_peers_count == 0 {
344+
trace!("We are missing peers in request_block_headers");
345+
logged_no_free_peers_count = 1000;
346+
}
347+
logged_no_free_peers_count -= 1;
348+
// Sleep a bit to avoid busy polling
349+
tokio::time::sleep(Duration::from_millis(10)).await;
341350
continue;
342351
};
343352

@@ -656,6 +665,8 @@ impl PeerHandler {
656665
let mut last_update: SystemTime = SystemTime::now();
657666
let mut write_set = tokio::task::JoinSet::new();
658667

668+
let mut logged_no_free_peers_count = 0;
669+
659670
loop {
660671
if all_accounts_state.len() * size_of::<AccountState>() >= RANGE_FILE_CHUNK_SIZE {
661672
let current_account_hashes = std::mem::take(&mut all_account_hashes);
@@ -736,7 +747,14 @@ impl PeerHandler {
736747
.inspect_err(|err| error!(err= ?err, "Error requesting a peer for account range"))
737748
.unwrap_or(None)
738749
else {
739-
trace!("We are missing peers in request_account_range_request");
750+
// Log ~ once every 10 seconds
751+
if logged_no_free_peers_count == 0 {
752+
trace!("We are missing peers in request_account_range");
753+
logged_no_free_peers_count = 1000;
754+
}
755+
logged_no_free_peers_count -= 1;
756+
// Sleep a bit to avoid busy polling
757+
tokio::time::sleep(Duration::from_millis(10)).await;
740758
continue;
741759
};
742760

@@ -974,6 +992,8 @@ impl PeerHandler {
974992

975993
let mut completed_tasks = 0;
976994

995+
let mut logged_no_free_peers_count = 0;
996+
977997
loop {
978998
if let Ok(result) = task_receiver.try_recv() {
979999
let TaskResult {
@@ -1012,6 +1032,14 @@ impl PeerHandler {
10121032
.get_best_peer(&SUPPORTED_ETH_CAPABILITIES)
10131033
.await?
10141034
else {
1035+
// Log ~ once every 10 seconds
1036+
if logged_no_free_peers_count == 0 {
1037+
trace!("We are missing peers in request_bytecodes");
1038+
logged_no_free_peers_count = 1000;
1039+
}
1040+
logged_no_free_peers_count -= 1;
1041+
// Sleep a bit to avoid busy polling
1042+
tokio::time::sleep(Duration::from_millis(10)).await;
10151043
continue;
10161044
};
10171045

@@ -1181,6 +1209,8 @@ impl PeerHandler {
11811209
// vector of hashed storage keys and storage values.
11821210
let mut current_account_storages: BTreeMap<H256, AccountsWithStorage> = BTreeMap::new();
11831211

1212+
let mut logged_no_free_peers_count = 0;
1213+
11841214
debug!("Starting request_storage_ranges loop");
11851215
loop {
11861216
if current_account_storages
@@ -1526,6 +1556,14 @@ impl PeerHandler {
15261556
.get_best_peer(&SUPPORTED_ETH_CAPABILITIES)
15271557
.await?
15281558
else {
1559+
// Log ~ once every 10 seconds
1560+
if logged_no_free_peers_count == 0 {
1561+
trace!("We are missing peers in request_storage_ranges");
1562+
logged_no_free_peers_count = 1000;
1563+
}
1564+
logged_no_free_peers_count -= 1;
1565+
// Sleep a bit to avoid busy polling
1566+
tokio::time::sleep(Duration::from_millis(10)).await;
15291567
continue;
15301568
};
15311569

crates/networking/p2p/rlpx/connection/server.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ impl GenServer for PeerConnectionServer {
325325
trace!(
326326
peer=%established_state.node,
327327
%message,
328-
"Received incomming message",
328+
"Received incoming message",
329329
);
330330
handle_incoming_message(established_state, message).await
331331
}

crates/networking/p2p/sync/state_healing.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use ethrex_common::{H256, constants::EMPTY_KECCACK_HASH, types::AccountState};
1919
use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode};
2020
use ethrex_storage::Store;
2121
use ethrex_trie::{EMPTY_TRIE_HASH, Nibbles, Node, TrieDB, TrieError};
22-
use tracing::debug;
22+
use tracing::{debug, trace};
2323

2424
use crate::{
2525
metrics::{CurrentStepValue, METRICS},
@@ -119,6 +119,9 @@ async fn heal_state_trie(
119119
)>(1000);
120120
// Contains both nodes and their corresponding paths to heal
121121
let mut nodes_to_heal = Vec::new();
122+
123+
let mut logged_no_free_peers_count = 0;
124+
122125
loop {
123126
if last_update.elapsed() >= SHOW_PROGRESS_INTERVAL_DURATION {
124127
let num_peers = peers
@@ -232,6 +235,16 @@ async fn heal_state_trie(
232235
else {
233236
// If there are no peers available, re-add the batch to the paths vector, and continue
234237
paths.extend(batch);
238+
239+
// Log ~ once every 10 seconds
240+
if logged_no_free_peers_count == 0 {
241+
trace!("We are missing peers in heal_state_trie");
242+
logged_no_free_peers_count = 1000;
243+
}
244+
logged_no_free_peers_count -= 1;
245+
246+
// Sleep a bit to avoid busy polling
247+
tokio::time::sleep(Duration::from_millis(10)).await;
235248
continue;
236249
};
237250

crates/networking/p2p/sync/storage_healing.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ pub async fn heal_storage_trie(
164164
let (task_sender, mut task_receiver) =
165165
tokio::sync::mpsc::channel::<Result<TrieNodes, RequestStorageTrieNodes>>(1000);
166166

167+
let mut logged_no_free_peers_count = 0;
168+
167169
loop {
168170
yield_now().await;
169171
if state.last_update.elapsed() >= SHOW_PROGRESS_INTERVAL_DURATION {
@@ -247,6 +249,7 @@ pub async fn heal_storage_trie(
247249
peers,
248250
state.state_root,
249251
&task_sender,
252+
&mut logged_no_free_peers_count,
250253
)
251254
.await;
252255

@@ -317,20 +320,24 @@ async fn ask_peers_for_nodes(
317320
peers: &mut PeerHandler,
318321
state_root: H256,
319322
task_sender: &Sender<Result<TrieNodes, RequestStorageTrieNodes>>,
323+
logged_no_free_peers_count: &mut u32,
320324
) {
321325
if (requests.len() as u32) < MAX_IN_FLIGHT_REQUESTS && !download_queue.is_empty() {
322326
let Some((peer_id, connection)) = peers
323327
.peer_table
324328
.get_best_peer(&SUPPORTED_SNAP_CAPABILITIES)
325329
.await
326-
.inspect_err(
327-
|err| debug!(err= ?err, "Error requesting a peer to perform storage healing"),
328-
)
330+
.inspect_err(|err| debug!(?err, "Error requesting a peer to perform storage healing"))
329331
.unwrap_or(None)
330332
else {
331-
// warn!("We have no free peers for storage healing!"); way too spammy, moving to trace
332-
// If we have no peers we shrug our shoulders and wait until next free peer
333-
trace!("We have no free peers for storage healing!");
333+
// Log ~ once every 10 seconds
334+
if *logged_no_free_peers_count == 0 {
335+
trace!("We are missing peers in heal_storage_trie");
336+
*logged_no_free_peers_count = 1000;
337+
}
338+
*logged_no_free_peers_count -= 1;
339+
// Sleep for a bit to avoid busy polling
340+
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
334341
return;
335342
};
336343
let at = download_queue.len().saturating_sub(STORAGE_BATCH_SIZE);

0 commit comments

Comments
 (0)