From 2d1e710d4efd63ceef23176ff6d555ccdb38b33e Mon Sep 17 00:00:00 2001 From: Adam Tucker Date: Tue, 23 Jun 2026 14:12:50 -0600 Subject: [PATCH 1/2] fix(state): repair pruned pre-Ironwood history tree in place instead of forcing a re-sync The Ironwood history-tree upgrade rebuilds the tip ZIP-221 tree from finalized blocks. A node pruned before the Ironwood bump no longer has those blocks, so the upgrade fails with a fatal "delete the cache directory and re-sync" error. The on-disk failure is purely a buffer-width change: adding V3 (Ironwood) node data grew the history-tree Entry from 253 to 326 bytes. A pre-Ironwood tip's V1/V2 node data is consensus-fixed and still present in the stored entry. This adds an in-place re-encode fallback: when the from-blocks rebuild reports MissingData, read the stored old-format tip entry, widen each peak's buffer to the current size (padding only), and write it back -- no blocks required. The inner reader consumes only the meaningful node-data prefix and ignores the trailing zero padding, and version dispatch keys on the tip height, so a pre-NU6.3 tip's peaks are parsed as V1/V2 exactly as written. The re-encoded tree is therefore byte-for-byte equal (same peaks, same size, same MMR root) to what the from-blocks rebuild and a fresh sync produce. The archive from-blocks path is unchanged; the re-encode runs only on the previously-fatal pruned path, and a genuinely unreadable entry still fails loudly. --- zebra-chain/src/primitives/zcash_history.rs | 27 ++++++++ zebra-state/Cargo.toml | 1 + .../finalized_state/disk_format/chain.rs | 69 +++++++++++++++++++ .../upgrade/rebuild_history_tree.rs | 56 ++++++++++++++- 4 files changed, 151 insertions(+), 2 deletions(-) diff --git a/zebra-chain/src/primitives/zcash_history.rs b/zebra-chain/src/primitives/zcash_history.rs index 6d2155d3524..2e690452639 100644 --- a/zebra-chain/src/primitives/zcash_history.rs +++ b/zebra-chain/src/primitives/zcash_history.rs @@ -67,6 +67,33 @@ pub struct Entry { inner: [u8; zcash_history::MAX_ENTRY_SIZE], } +impl Entry { + /// Builds an [`Entry`] from the meaningful prefix of a history-tree entry that was serialized by + /// an *older* Zebra version whose `zcash_history::MAX_ENTRY_SIZE` was smaller. + /// + /// An [`Entry`] is a fixed-size, zero-padded buffer: a leaf/node `kind` byte, optional child + /// links, then the version's node data, followed by zero padding out to `MAX_ENTRY_SIZE`. The + /// inner `zcash_history::Entry` reader (`Entry::read`/`from_bytes`) consumes exactly the bytes of + /// that meaningful prefix and ignores the trailing padding, so copying an older, shorter buffer + /// into the front of a current-width, zero-initialized buffer yields an entry the current reader + /// parses identically. (The consensus branch id is supplied by the reader, not stored in the + /// buffer, so it is unaffected.) + /// + /// `old_bytes` must be no longer than the current [`zcash_history::MAX_ENTRY_SIZE`]; that holds + /// for any pre-Ironwood entry, whose buffer was strictly smaller than the current one. Returns + /// `None` otherwise rather than truncating, so a future width *shrink* (which must never happen + /// for a consensus-fixed format, but is checked defensively) cannot silently corrupt an entry. + pub fn from_smaller_format_bytes(old_bytes: &[u8]) -> Option { + if old_bytes.len() > zcash_history::MAX_ENTRY_SIZE { + return None; + } + + let mut inner = [0u8; zcash_history::MAX_ENTRY_SIZE]; + inner[..old_bytes.len()].copy_from_slice(old_bytes); + Some(Entry { inner }) + } +} + impl Entry { /// Create a leaf Entry for the given block, its network, and the root of its /// note commitment trees. diff --git a/zebra-state/Cargo.toml b/zebra-state/Cargo.toml index c3a53363102..89b75611844 100644 --- a/zebra-state/Cargo.toml +++ b/zebra-state/Cargo.toml @@ -51,6 +51,7 @@ bincode = { workspace = true } chrono = { workspace = true, features = ["clock", "std"] } dirs = { workspace = true } futures = { workspace = true } +serde-big-array = { workspace = true } hex = { workspace = true } hex-literal = { workspace = true } humantime-serde = { workspace = true } diff --git a/zebra-state/src/service/finalized_state/disk_format/chain.rs b/zebra-state/src/service/finalized_state/disk_format/chain.rs index d5a8b657a23..c28f6b85245 100644 --- a/zebra-state/src/service/finalized_state/disk_format/chain.rs +++ b/zebra-state/src/service/finalized_state/disk_format/chain.rs @@ -8,6 +8,7 @@ use std::collections::BTreeMap; use bincode::Options; +use serde_big_array::BigArray; use zebra_chain::{ amount::NonNegative, @@ -50,6 +51,74 @@ pub struct HistoryTreeParts { current_height: Height, } +/// Width in bytes of a history-tree `Entry` as serialized by pre-Ironwood Zebra versions, i.e. +/// `zcash_history::MAX_ENTRY_SIZE` before the Ironwood (`V3`) node-data fields were added. +/// +/// The current width is `326` (V3 node data `MAX_NODE_DATA_SIZE = 317`, plus the 9-byte entry +/// header). V3 added two 32-byte Ironwood roots and a 9-byte compact tx count over V2, i.e. `73` +/// bytes, so the pre-Ironwood width was `326 - 73 = 253`. This is the *buffer* width, which is the +/// same for any pre-Ironwood tip regardless of whether its entries hold V1 (Heartwood/Canopy) or V2 +/// (Nu5..Nu6.2) node data, because the buffer is always sized to the maximum node data of the code +/// that wrote it. +const OLD_MAX_ENTRY_SIZE: usize = 253; + +/// A mirror of a single pre-Ironwood history-tree `Entry`: a fixed `OLD_MAX_ENTRY_SIZE`-byte buffer, +/// serialized by `bincode` exactly as the real `zcash_history::Entry` was at the old width (a raw, +/// length-prefix-free fixed array, via `serde_big_array::BigArray`). +#[derive(serde::Deserialize)] +struct OldEntry { + #[serde(with = "BigArray")] + inner: [u8; OLD_MAX_ENTRY_SIZE], +} + +/// A mirror of [`HistoryTreeParts`] with the *old* (pre-Ironwood) entry width, used only to read an +/// entry written by an older Zebra version. +/// +/// Every field has the same type and order as [`HistoryTreeParts`] except `peaks`, whose values are +/// [`OldEntry`] (253-byte buffers) instead of `zcash_history::Entry` (326-byte buffers). Because +/// `bincode::DefaultOptions` is not self-describing and encodes a fixed `[u8; N]` as exactly `N` +/// raw bytes, the *only* on-disk difference between an old and a current `HistoryTreeParts` blob is +/// this per-entry width; deserializing with the old width therefore reads a pre-Ironwood blob with +/// no trailing bytes (`DefaultOptions` disallows trailing bytes, so any mismatch is rejected). +#[derive(serde::Deserialize)] +struct OldHistoryTreeParts { + network_kind: NetworkKind, + size: u32, + peaks: BTreeMap, + current_height: Height, +} + +/// Re-encodes a pre-Ironwood tip history-tree blob into the current `Entry` format, returning the +/// new `bincode` bytes (what [`HistoryTreeParts::as_bytes`] would produce), or `None` if `raw` is +/// not a readable old-format blob. +/// +/// This is the in-place repair used when the historical blocks a from-blocks rebuild needs are +/// missing (a database pruned before the Ironwood bump). For a pre-Ironwood chain every peak is V1 +/// or V2 node data, which is consensus-fixed; widening each entry's buffer copies that data verbatim +/// and only changes the trailing zero padding, so the re-encoded `peaks` (and hence the MMR root) +/// are identical to what a from-blocks rebuild or a fresh sync produces. +pub(crate) fn reencode_old_format_history_tree_parts(raw: &[u8]) -> Option> { + let old: OldHistoryTreeParts = bincode::DefaultOptions::new().deserialize(raw).ok()?; + + let mut peaks = BTreeMap::new(); + for (idx, old_entry) in old.peaks { + // Copy the consensus-fixed prefix into a current-width, zero-padded `Entry`. The current + // reader parses the meaningful prefix and ignores the extra padding, so this is byte-for-byte + // equivalent in consensus terms to the original entry. + let entry = zcash_history::Entry::from_smaller_format_bytes(&old_entry.inner)?; + peaks.insert(idx, entry); + } + + let parts = HistoryTreeParts { + network_kind: old.network_kind, + size: old.size, + peaks, + current_height: old.current_height, + }; + + Some(parts.as_bytes()) +} + impl HistoryTreeParts { /// Converts [`HistoryTreeParts`] to a [`NonEmptyHistoryTree`]. pub(crate) fn with_network( diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs index b2cfc0ec422..85fb027f5f9 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs @@ -50,7 +50,11 @@ use zebra_chain::{ }; use crate::service::finalized_state::{ - disk_format::chain::HistoryTreeParts, DiskWriteBatch, ZebraDb, + disk_format::{ + chain::{reencode_old_format_history_tree_parts, HistoryTreeParts}, + RawBytes, + }, + DiskWriteBatch, ZebraDb, }; use super::{CancelFormatChange, DiskFormatUpgrade}; @@ -173,7 +177,21 @@ pub(crate) fn rebuild_tip_history_tree_if_needed( let network = db.network(); - let Some(history_tree) = rebuild_tip_history_tree(db, &network, tip_height)? else { + let history_tree = match rebuild_tip_history_tree(db, &network, tip_height) { + Ok(history_tree) => history_tree, + Err(RebuildError::MissingData { height }) => { + // The from-blocks rebuild needs historical blocks and note commitment trees that a + // database pruned before the Ironwood bump no longer has. But a pre-Ironwood tip tree is + // made of V1/V2 entries whose node data is consensus-fixed, so we can repair the entry + // *in place* by re-encoding the still-present old-format blob in the current `Entry` + // width, without reading any block. This is provably equivalent to the from-blocks + // rebuild: same peaks bytes, same `size`, same `current_height`, hence the same MMR root. + return reencode_tip_history_tree_in_place(db) + .ok_or(RebuildError::MissingData { height }); + } + }; + + let Some(history_tree) = history_tree else { // Pre-Heartwood tips have no history tree, so there is nothing to rebuild. (Any stale entry // would be deleted rather than rewritten, but pre-Heartwood databases never wrote one.) return Ok(()); @@ -189,6 +207,40 @@ pub(crate) fn rebuild_tip_history_tree_if_needed( Ok(()) } +/// Repairs the tip history-tree entry *in place* by re-encoding the stored pre-Ironwood blob in the +/// current `Entry` format, without reading any block. +/// +/// This is the fallback used when the from-blocks rebuild reports [`RebuildError::MissingData`] (a +/// database pruned before the Ironwood bump). It reads the raw old-format entry, re-encodes it (only +/// the per-entry zero padding changes; the consensus-fixed node data is copied verbatim), writes it +/// back under the `()` key, and confirms it now reads in the current format. +/// +/// Returns `Some(())` on success, or `None` if the entry is absent or cannot be re-encoded (e.g. it +/// is corrupt rather than merely old-format). The caller turns `None` back into the original +/// `MissingData` error, so a genuinely unrecoverable database still fails loudly. +#[allow(clippy::unwrap_in_result)] +fn reencode_tip_history_tree_in_place(db: &ZebraDb) -> Option<()> { + let raw_entry = db.raw_history_tree_value_cf().zs_get(&())?; + let reencoded = reencode_old_format_history_tree_parts(raw_entry.raw_bytes())?; + + let mut batch = DiskWriteBatch::new(); + let _ = db + .raw_history_tree_value_cf() + .with_batch_for_writing(&mut batch) + .zs_insert(&(), &RawBytes::new_raw_bytes(reencoded)); + db.write_batch(batch) + .expect("rewriting the re-encoded tip history tree should always succeed"); + + // The re-encoded entry must now be readable in the current format; if not, treat the repair as + // failed so the caller surfaces the fatal pruned-database error rather than marking the database + // upgraded with an unreadable entry. + if needs_rebuild(db) { + return None; + } + + Some(()) +} + /// Returns `true` if the tip history tree entry exists but cannot be deserialized in the current /// format, and therefore needs to be rebuilt. /// From e040e98677ed0d39ca8728c8f21aa85276fd1e4e Mon Sep 17 00:00:00 2001 From: Adam Tucker Date: Tue, 23 Jun 2026 15:28:51 -0600 Subject: [PATCH 2/2] test(state): add round-trip + pruned-fallback tests for the in-place re-encode Adds the regression tests requested before this leaves draft: - reencode_round_trip_restores_current_format_bytes: synthesizes a genuine pre-Ironwood (253-byte-entry) blob from a real synced tip tree, confirms it is unreadable in the current format (reproducing the Ironwood UnexpectedEof), then asserts the re-encode reproduces the original current-format bytes exactly and yields the same MMR root. - pruned_old_format_database_repairs_in_place_via_reencode_fallback: a pruned old-format database (source blocks gone) is repaired in place via the fallback, leaving the history root unchanged. Also pins OLD_MAX_ENTRY_SIZE at compile time (= MAX_ENTRY_SIZE - V3 node-data delta) and adds a test-only old-width encoder (cfg(test)) used to synthesize the genuine old blob. cargo test -p zebra-state -- rebuild_history_tree: 6 passed, 0 failed. --- .../finalized_state/disk_format/chain.rs | 64 ++++ .../tests/rebuild_history_tree.rs | 283 +++++++++++++++++- 2 files changed, 346 insertions(+), 1 deletion(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/chain.rs b/zebra-state/src/service/finalized_state/disk_format/chain.rs index c28f6b85245..945347e2ebf 100644 --- a/zebra-state/src/service/finalized_state/disk_format/chain.rs +++ b/zebra-state/src/service/finalized_state/disk_format/chain.rs @@ -62,10 +62,27 @@ pub struct HistoryTreeParts { /// that wrote it. const OLD_MAX_ENTRY_SIZE: usize = 253; +/// The Ironwood (`V3`) node data added two 32-byte tree roots and a 9-byte compact tx count over the +/// pre-Ironwood (`V2`) layout, i.e. `73` bytes, which is exactly how much the entry buffer grew. If +/// the current width or this delta ever changes, [`OLD_MAX_ENTRY_SIZE`] must be updated in lockstep, +/// so this is pinned at compile time. (The 9-byte entry header is common to both widths.) +const _: () = { + const V3_NODE_DATA_DELTA: usize = 32 + 32 + 9; + assert!( + zcash_history::MAX_ENTRY_SIZE - OLD_MAX_ENTRY_SIZE == V3_NODE_DATA_DELTA, + "OLD_MAX_ENTRY_SIZE must be the current MAX_ENTRY_SIZE minus the V3 node-data delta", + ); +}; + /// A mirror of a single pre-Ironwood history-tree `Entry`: a fixed `OLD_MAX_ENTRY_SIZE`-byte buffer, /// serialized by `bincode` exactly as the real `zcash_history::Entry` was at the old width (a raw, /// length-prefix-free fixed array, via `serde_big_array::BigArray`). +/// +/// Also derives `Serialize` under `cfg(test)` so tests can synthesize a genuine old-format blob +/// (the exact bytes a pre-Ironwood Zebra wrote) by re-emitting current-format peaks at this +/// narrower width. #[derive(serde::Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] struct OldEntry { #[serde(with = "BigArray")] inner: [u8; OLD_MAX_ENTRY_SIZE], @@ -81,6 +98,7 @@ struct OldEntry { /// this per-entry width; deserializing with the old width therefore reads a pre-Ironwood blob with /// no trailing bytes (`DefaultOptions` disallows trailing bytes, so any mismatch is rejected). #[derive(serde::Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] struct OldHistoryTreeParts { network_kind: NetworkKind, size: u32, @@ -119,6 +137,52 @@ pub(crate) fn reencode_old_format_history_tree_parts(raw: &[u8]) -> Option Option> { + let mut peaks = BTreeMap::new(); + for (idx, entry) in &parts.peaks { + // Serialize the current entry to its raw fixed-width bytes (BigArray => exactly + // `MAX_ENTRY_SIZE` bytes, no length prefix), then keep only the pre-Ironwood prefix. + let wide = bincode::DefaultOptions::new() + .serialize(entry) + .expect("serializing a history tree entry to a vec does not fail"); + + // Refuse to narrow an entry that actually uses the extra V3 bytes: those would be lost. + if wide[OLD_MAX_ENTRY_SIZE..].iter().any(|&b| b != 0) { + return None; + } + + let mut inner = [0u8; OLD_MAX_ENTRY_SIZE]; + inner.copy_from_slice(&wide[..OLD_MAX_ENTRY_SIZE]); + peaks.insert(*idx, OldEntry { inner }); + } + + let old = OldHistoryTreeParts { + network_kind: parts.network_kind, + size: parts.size, + peaks, + current_height: parts.current_height, + }; + + Some( + bincode::DefaultOptions::new() + .serialize(&old) + .expect("serializing the old-format history tree parts to a vec does not fail"), + ) +} + impl HistoryTreeParts { /// Converts [`HistoryTreeParts`] to a [`NonEmptyHistoryTree`]. pub(crate) fn with_network( diff --git a/zebra-state/src/service/finalized_state/tests/rebuild_history_tree.rs b/zebra-state/src/service/finalized_state/tests/rebuild_history_tree.rs index 625eb61ee8f..d01cc50ce58 100644 --- a/zebra-state/src/service/finalized_state/tests/rebuild_history_tree.rs +++ b/zebra-state/src/service/finalized_state/tests/rebuild_history_tree.rs @@ -21,12 +21,15 @@ use std::env; +use bincode::Options as _; + use zebra_chain::{ block::Height, parameters::{ testnet::{ConfiguredActivationHeights, Parameters as TestnetParameters}, Network, NetworkUpgrade, }, + primitives::zcash_history::MAX_ENTRY_SIZE, LedgerState, }; use zebra_test::prelude::*; @@ -36,7 +39,14 @@ use crate::{ service::{ arbitrary::PreparedChain, finalized_state::{ - disk_format::{upgrade::rebuild_history_tree, RawBytes}, + disk_format::{ + chain::{ + encode_history_tree_parts_at_old_width, + reencode_old_format_history_tree_parts, HistoryTreeParts, + }, + upgrade::rebuild_history_tree, + FromDisk, IntoDisk, RawBytes, + }, CheckpointVerifiedBlock, DiskWriteBatch, FinalizedState, }, }, @@ -308,3 +318,274 @@ fn rebuild_fails_clearly_on_pruned_old_format_database() -> Result<()> { Ok(()) } + + +/// A configured testnet whose history tree stays in the *pre-Ironwood* (`V1`/`V2`) entry format for +/// any reachable tip: Heartwood (and the Orchard-era upgrades) activate early, but Nu6.3/Nu7 — the +/// first Ironwood (`V3`) upgrades — are pushed far out of reach of a short generated chain. A tip in +/// this network therefore stores `peaks` whose meaningful bytes fit in the old 253-byte width, which +/// is exactly the on-disk shape a pre-Ironwood Zebra produced and the shape the in-place re-encode +/// repairs. +fn pre_ironwood_test_network() -> Network { + TestnetParameters::build() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(2), + sapling: Some(3), + blossom: Some(4), + heartwood: Some(5), + canopy: Some(6), + nu5: Some(7), + // Keep every Ironwood-capable upgrade out of reach so the tip tree holds only V1/V2 + // entries, which are the entries the pruned-database in-place re-encode is for. + nu6: Some(10_000), + nu6_1: Some(10_001), + nu6_2: Some(10_002), + nu6_3: Some(10_003), + nu7: Some(10_004), + }) + .expect("configured activation heights are valid") + .extend_funding_streams() + .to_network() + .expect("configured network is valid") +} + +/// Regression test for the in-place re-encode (PR #242): re-encoding a genuine pre-Ironwood +/// history-tree blob into the current `Entry` width is the *exact inverse* of the width change. +/// +/// This is the round-trip the reviewers asked for before un-drafting. It builds a real +/// current-format [`HistoryTreeParts`] from a synced regtest tip, re-emits its peaks at the old +/// 253-byte width to synthesize a *genuine* old-format blob (not a truncated current one — see +/// [`encode_history_tree_parts_at_old_width`], which refuses to drop any non-zero byte), confirms +/// that blob is unreadable in the current format (i.e. it reproduces the Ironwood `UnexpectedEof` +/// failure), then runs [`reencode_old_format_history_tree_parts`] and asserts the output is +/// byte-for-byte identical to the original current-format bytes — and hence yields the same MMR root. +#[test] +fn reencode_round_trip_restores_current_format_bytes() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = pre_ironwood_test_network(); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), NetworkUpgrade::Nu5, Some(2), true); + + proptest!( + ProptestConfig::with_cases(proptest_cases()), + |((chain, _count, network, _history_tree) in PreparedChain::default() + .with_ledger_strategy(ledger_strategy) + .with_valid_commitments() + .no_shrink())| { + let synced: Vec = chain.iter().cloned().collect(); + // A few blocks past Heartwood (height 5), so the tip stores a non-empty history tree + // with more than one peak, while staying well below the Ironwood activation heights. + prop_assume!(synced.len() > 8); + + let state = sync_to(&network, &synced); + let db = &state.db; + + let stored_root = db.history_tree().hash(); + prop_assert!( + stored_root.is_some(), + "a Heartwood-onward chain should store a non-empty history tree", + ); + + // The genuine current-format on-disk bytes for the tip tree: this is the ground truth + // the re-encode must reproduce exactly. + let current_bytes = db + .raw_history_tree_value_cf() + .zs_get(&()) + .expect("a synced post-Heartwood database has a stored tip history tree entry") + .raw_bytes() + .clone(); + + // Reconstruct the typed parts from those bytes. `as_bytes()` round-trips them unchanged, + // confirming `current_bytes` is the canonical current-format serialization. + let parts = HistoryTreeParts::from_bytes(¤t_bytes); + prop_assert_eq!( + &parts.as_bytes(), + ¤t_bytes, + "the stored bytes must be the canonical current-format serialization", + ); + + // Synthesize a genuine pre-Ironwood blob by re-emitting the same peaks at the old width. + // `None` here would mean the tip tree holds V3 data that does not fit the old width, + // which `pre_ironwood_test_network` is constructed to avoid. + let old_blob = encode_history_tree_parts_at_old_width(&parts) + .expect("a pre-Ironwood tip tree narrows to the old width without losing any data"); + + // The synthetic blob must really be the smaller, old layout: shorter than the current + // bytes (each of N peaks is 73 bytes narrower) and — crucially — unreadable in the + // current format, which is exactly the `Io(UnexpectedEof)` failure the Ironwood + // `MAX_ENTRY_SIZE` bump introduced for old databases. + prop_assert!( + old_blob.len() < current_bytes.len(), + "the old-format blob must be smaller than the current-format blob", + ); + prop_assert!( + bincode::DefaultOptions::new() + .deserialize::(&old_blob) + .is_err(), + "the synthesized old-format blob must be unreadable in the current format, \ + reproducing the failure the re-encode repairs", + ); + + // The fix under test: re-encode the old blob into the current width. + let reencoded = reencode_old_format_history_tree_parts(&old_blob) + .expect("a genuine old-format blob must re-encode into the current format"); + + // (a) It deserializes as a current `HistoryTreeParts` ... + prop_assert!( + bincode::DefaultOptions::new() + .deserialize::(&reencoded) + .is_ok(), + "the re-encoded blob must be readable in the current format", + ); + // (b) ... and equals the original current-format bytes exactly. This proves the + // re-encode is the precise inverse of the width change: same peaks, same `size`, same + // `current_height`, only the per-entry zero padding restored. + prop_assert_eq!( + &reencoded, + ¤t_bytes, + "re-encoding the old blob must reproduce the original current-format bytes exactly", + ); + + // And, as the consensus-level corollary, the MMR root is unchanged. + let reencoded_root = HistoryTreeParts::from_bytes(&reencoded) + .with_network(&network) + .expect("the re-encoded parts must rebuild a valid history tree") + .hash(); + prop_assert_eq!( + Some(reencoded_root), + stored_root, + "the re-encoded history tree must have the same MMR root as the stored one", + ); + } + ); + + Ok(()) +} + +/// End-to-end regression test for the pruned-database fallback (PR #242): a database with a genuine +/// old-format tip entry whose rebuild source blocks have been pruned is repaired *in place* by the +/// re-encode fallback, leaving the history root unchanged. +/// +/// This is the success counterpart to [`rebuild_fails_clearly_on_pruned_old_format_database`]. That +/// test predates the fallback and asserts the from-blocks rebuild fails with `MissingData` when the +/// source blocks are gone; here the same pruned-and-old-format database instead *succeeds*, because +/// the `RebuildError::MissingData` arm now re-encodes the still-present old-format blob without +/// reading any block. +#[test] +fn pruned_old_format_database_repairs_in_place_via_reencode_fallback() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = pre_ironwood_test_network(); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), NetworkUpgrade::Nu5, Some(2), true); + + proptest!( + ProptestConfig::with_cases(proptest_cases()), + |((chain, _count, network, _history_tree) in PreparedChain::default() + .with_ledger_strategy(ledger_strategy) + .with_valid_commitments() + .no_shrink())| { + let synced: Vec = chain.iter().cloned().collect(); + prop_assume!(synced.len() > 8); + + let state = sync_to(&network, &synced); + let db = &state.db; + + let tip_height = db + .finalized_tip_height() + .expect("synced database has a finalized tip"); + let stored_root = db.history_tree().hash(); + prop_assert!( + stored_root.is_some(), + "a Heartwood-onward chain should store a non-empty history tree", + ); + + // Overwrite the tip entry with a genuine old-format blob (the exact bytes a pre-Ironwood + // Zebra wrote), so the entry needs a rebuild *and* is re-encodable in place. + let parts = HistoryTreeParts::from_bytes( + db.raw_history_tree_value_cf() + .zs_get(&()) + .expect("a synced post-Heartwood database has a stored tip history tree entry") + .raw_bytes(), + ); + let old_blob = encode_history_tree_parts_at_old_width(&parts) + .expect("a pre-Ironwood tip tree narrows to the old width without losing any data"); + + let mut batch = DiskWriteBatch::new(); + let _ = db + .raw_history_tree_value_cf() + .with_batch_for_writing(&mut batch) + .zs_insert(&(), &RawBytes::new_raw_bytes(old_blob)); + db.write_batch(batch) + .expect("writing a genuine old-format history tree entry succeeds"); + + prop_assert!( + rebuild_history_tree::needs_rebuild(db), + "the old-format entry must be detected as needing a rebuild", + ); + + // Prune a block the from-blocks rebuild reads, so `rebuild_tip_history_tree` reports + // `MissingData` and the in-place re-encode fallback is exercised. (The history tree + // window starts at the current upgrade's activation height, so the block just below the + // tip is always within the rebuild range — the same block the failure test deletes.) + let missing_height = Height(tip_height.0 - 1); + let mut batch = DiskWriteBatch::new(); + batch.delete_block_header(db, missing_height); + db.write_batch(batch) + .expect("deleting a block header to simulate a pruned database succeeds"); + + // The repair path must now SUCCEED via the fallback, even though the from-blocks rebuild + // cannot run. + rebuild_history_tree::rebuild_tip_history_tree_if_needed(db, tip_height).expect( + "a pruned old-format database must be repaired in place by the re-encode fallback", + ); + + prop_assert!( + !rebuild_history_tree::needs_rebuild(db), + "the entry must be readable in the current format after the in-place re-encode", + ); + prop_assert!( + rebuild_history_tree::quick_check(db).is_ok(), + "the history tree must pass its validity check after the in-place re-encode", + ); + prop_assert_eq!( + db.history_tree().hash(), + stored_root, + "the in-place re-encoded history tree root must match the originally stored root", + ); + } + ); + + Ok(()) +} + +/// Pins the pre-Ironwood entry width (`OLD_MAX_ENTRY_SIZE = 253` in +/// [`crate::service::finalized_state::disk_format::chain`]) to the current width minus the Ironwood +/// (`V3`) node-data delta. +/// +/// `chain.rs` also enforces this at compile time with a `const` assertion that references the private +/// `OLD_MAX_ENTRY_SIZE`; this runtime test documents the same relationship against the public +/// `zcash_history::MAX_ENTRY_SIZE`, so a future width change trips a named, greppable regression test +/// here in addition to the compile-time guard. +#[test] +fn old_max_entry_size_tracks_current_width_minus_v3_delta() { + // The literal pre-Ironwood width. It is `pub(crate)`-private to `chain.rs`, so it is duplicated + // here; the compile-time `const _` assertion in `chain.rs` is what actually ties the two + // together, and this test guards the arithmetic against the public current width. + const OLD_MAX_ENTRY_SIZE: usize = 253; + // V3 added two 32-byte Ironwood tree roots and a 9-byte compact tx count over the V2 layout. + const V3_NODE_DATA_DELTA: usize = 32 + 32 + 9; + + assert_eq!( + MAX_ENTRY_SIZE, 326, + "the current Ironwood-capable entry width is expected to be 326 bytes", + ); + assert_eq!(V3_NODE_DATA_DELTA, 73, "the V3 node-data delta is 73 bytes"); + assert_eq!( + MAX_ENTRY_SIZE - OLD_MAX_ENTRY_SIZE, + V3_NODE_DATA_DELTA, + "OLD_MAX_ENTRY_SIZE must be the current MAX_ENTRY_SIZE minus the V3 node-data delta", + ); +}