diff --git a/zebra-network/src/config.rs b/zebra-network/src/config.rs index e49bfe6049b..1a6e0cb1ed0 100644 --- a/zebra-network/src/config.rs +++ b/zebra-network/src/config.rs @@ -1050,6 +1050,11 @@ impl<'de> Deserialize<'de> for Config { non_zero_config_field.filter(|config_value| config_value > &0).unwrap_or(default_config_value) }); + // Clamp the in-flight byte budget up to the checkpoint-range floor (with a + // warning) rather than rejecting too-small configs, so older configs keep + // starting while checkpoint sync stays deadlock-free. + let mut zakura = zakura; + zakura.block_sync.clamp_inflight_block_bytes_to_floor(); zakura.block_sync.validate().map_err(|error| { de::Error::custom(format!("invalid zakura.block_sync config: {error}")) })?; diff --git a/zebra-network/src/zakura/block_sync/config.rs b/zebra-network/src/zakura/block_sync/config.rs index cc2c8c24a52..a295a465523 100644 --- a/zebra-network/src/zakura/block_sync/config.rs +++ b/zebra-network/src/zakura/block_sync/config.rs @@ -293,14 +293,33 @@ impl ZakuraBlockSyncConfig { if self.max_inflight_block_bytes <= self.floor_request_byte_reservation() { return Err("max_inflight_block_bytes must exceed one floor request"); } - if self.max_inflight_block_bytes < BS_CHECKPOINT_RANGE_BYTE_FLOOR { - return Err( - "max_inflight_block_bytes must hold one full checkpoint range \ - (MIN_BS_CHECKPOINT_SUBMITTED_BLOCK_APPLIES * BS_PER_BLOCK_WORST_CASE_BYTES) \ - or checkpoint sync can deadlock", + Ok(()) + } + + /// Raise `max_inflight_block_bytes` up to the checkpoint-range floor when it + /// is configured below it, warning once. + /// + /// A positive budget below [`BS_CHECKPOINT_RANGE_BYTE_FLOOR`] cannot hold one + /// full worst-case checkpoint range. The checkpoint verifier only commits a + /// range once the whole range is submitted, and every submitted body stays + /// reserved against the budget until it is durable, so a budget below the + /// floor would deadlock: the verifier never commits, nothing becomes durable, + /// and no bytes are ever released. Rather than refuse to start -- which would + /// break older configs that set a smaller budget -- clamp the budget up to the + /// floor and warn. Zero is left untouched so [`validate`](Self::validate) + /// still rejects it as an explicit misconfiguration. + pub fn clamp_inflight_block_bytes_to_floor(&mut self) { + if self.max_inflight_block_bytes > 0 + && self.max_inflight_block_bytes < BS_CHECKPOINT_RANGE_BYTE_FLOOR + { + tracing::warn!( + configured_max_inflight_block_bytes = self.max_inflight_block_bytes, + checkpoint_range_byte_floor = BS_CHECKPOINT_RANGE_BYTE_FLOOR, + "zakura.block_sync.max_inflight_block_bytes is below the checkpoint-range \ + floor; clamping it up so checkpoint sync cannot deadlock", ); + self.max_inflight_block_bytes = BS_CHECKPOINT_RANGE_BYTE_FLOOR; } - Ok(()) } /// Build the inert local status used before the block-sync reactor is wired. diff --git a/zebra-network/src/zakura/block_sync/tests.rs b/zebra-network/src/zakura/block_sync/tests.rs index ab1072688d2..13fb6fbeefa 100644 --- a/zebra-network/src/zakura/block_sync/tests.rs +++ b/zebra-network/src/zakura/block_sync/tests.rs @@ -779,11 +779,15 @@ fn config_validate_rejects_degenerate_values() { }; assert!(config.validate().is_err()); + // A positive budget below the checkpoint-range floor is no longer rejected by + // `validate`: it is clamped up to the floor (with a warning) at load instead, + // so older configs keep starting. See + // `config_clamps_below_floor_inflight_block_bytes`. config = ZakuraBlockSyncConfig { max_inflight_block_bytes: BS_CHECKPOINT_RANGE_BYTE_FLOOR - 1, ..ZakuraBlockSyncConfig::default() }; - assert!(config.validate().is_err()); + assert!(config.validate().is_ok()); config = ZakuraBlockSyncConfig { max_inflight_block_bytes: BS_CHECKPOINT_RANGE_BYTE_FLOOR, @@ -793,6 +797,62 @@ fn config_validate_rejects_degenerate_values() { assert!(config.validate().is_ok()); } +#[test] +fn config_clamps_below_floor_inflight_block_bytes() { + // A positive budget below the checkpoint-range floor is clamped up to the + // floor so checkpoint sync cannot deadlock (instead of refusing to start). + let mut below = ZakuraBlockSyncConfig { + // 256 MiB, the historical `v4.5.0-zakura-blocksync.toml` value, which is + // below the ~802 MB checkpoint-range floor. + max_inflight_block_bytes: 256 * 1024 * 1024, + ..ZakuraBlockSyncConfig::default() + }; + assert!(below.max_inflight_block_bytes < BS_CHECKPOINT_RANGE_BYTE_FLOOR); + below.clamp_inflight_block_bytes_to_floor(); + assert_eq!( + below.max_inflight_block_bytes, + BS_CHECKPOINT_RANGE_BYTE_FLOOR + ); + + // A budget at or above the floor is left untouched. + let mut at_floor = ZakuraBlockSyncConfig { + max_inflight_block_bytes: BS_CHECKPOINT_RANGE_BYTE_FLOOR + 1, + ..ZakuraBlockSyncConfig::default() + }; + at_floor.clamp_inflight_block_bytes_to_floor(); + assert_eq!( + at_floor.max_inflight_block_bytes, + BS_CHECKPOINT_RANGE_BYTE_FLOOR + 1 + ); + + // Zero is left untouched so `validate` still rejects it as a misconfiguration. + let mut zero = ZakuraBlockSyncConfig { + max_inflight_block_bytes: 0, + ..ZakuraBlockSyncConfig::default() + }; + zero.clamp_inflight_block_bytes_to_floor(); + assert_eq!(zero.max_inflight_block_bytes, 0); + assert!(zero.validate().is_err()); +} + +#[test] +fn config_deserialize_clamps_below_floor_inflight_block_bytes() { + // Regression: an older config with a too-small `max_inflight_block_bytes` + // (e.g. the stored `v4.5.0-zakura-blocksync.toml`) must still load -- clamped + // up to the checkpoint-range floor -- rather than being rejected at startup. + let config: crate::Config = toml::from_str( + r#" + [zakura.block_sync] + max_inflight_block_bytes = 268435456 + "#, + ) + .expect("a below-floor max_inflight_block_bytes config still loads"); + assert_eq!( + config.zakura.block_sync.max_inflight_block_bytes, + BS_CHECKPOINT_RANGE_BYTE_FLOOR, + ); +} + #[test] fn codec_round_trips_every_message_variant() { round_trip(BlockSyncMessage::Status(status()));