valargroup · czarcas7ic · Jun 23, 2026 · Jun 23, 2026
diff --git a/zebra-chain/src/primitives/zcash_history.rs b/zebra-chain/src/primitives/zcash_history.rs
@@ -67,6 +67,33 @@ pub struct Entry {
     inner: [u8; zcash_history::MAX_ENTRY_SIZE],
 }
 
+impl Entry {
+    /// Builds an [`Entry`] from the meaningful prefix of a history-tree entry that was serialized by
+    /// an *older* Zebra version whose `zcash_history::MAX_ENTRY_SIZE` was smaller.
+    ///
+    /// An [`Entry`] is a fixed-size, zero-padded buffer: a leaf/node `kind` byte, optional child
+    /// links, then the version's node data, followed by zero padding out to `MAX_ENTRY_SIZE`. The
+    /// inner `zcash_history::Entry` reader (`Entry::read`/`from_bytes`) consumes exactly the bytes of
+    /// that meaningful prefix and ignores the trailing padding, so copying an older, shorter buffer
+    /// into the front of a current-width, zero-initialized buffer yields an entry the current reader
+    /// parses identically. (The consensus branch id is supplied by the reader, not stored in the
+    /// buffer, so it is unaffected.)
+    ///
+    /// `old_bytes` must be no longer than the current [`zcash_history::MAX_ENTRY_SIZE`]; that holds
+    /// for any pre-Ironwood entry, whose buffer was strictly smaller than the current one. Returns
+    /// `None` otherwise rather than truncating, so a future width *shrink* (which must never happen
+    /// for a consensus-fixed format, but is checked defensively) cannot silently corrupt an entry.
+    pub fn from_smaller_format_bytes(old_bytes: &[u8]) -> Option<Self> {
+        if old_bytes.len() > zcash_history::MAX_ENTRY_SIZE {
+            return None;
+        }
+
+        let mut inner = [0u8; zcash_history::MAX_ENTRY_SIZE];
+        inner[..old_bytes.len()].copy_from_slice(old_bytes);
+        Some(Entry { inner })
+    }
+}
+
 impl Entry {
     /// Create a leaf Entry for the given block, its network, and the root of its
     /// note commitment trees.

diff --git a/zebra-state/Cargo.toml b/zebra-state/Cargo.toml
@@ -51,6 +51,7 @@ bincode = { workspace = true }
 chrono = { workspace = true, features = ["clock", "std"] }
 dirs = { workspace = true }
 futures = { workspace = true }
+serde-big-array = { workspace = true }
 hex = { workspace = true }
 hex-literal = { workspace = true }
 humantime-serde = { workspace = true }

diff --git a/zebra-state/src/service/finalized_state/disk_format/chain.rs b/zebra-state/src/service/finalized_state/disk_format/chain.rs
@@ -8,6 +8,7 @@
 use std::collections::BTreeMap;
 
 use bincode::Options;
+use serde_big_array::BigArray;
 
 use zebra_chain::{
     amount::NonNegative,
@@ -50,6 +51,138 @@ pub struct HistoryTreeParts {
     current_height: Height,
 }
 
+/// Width in bytes of a history-tree `Entry` as serialized by pre-Ironwood Zebra versions, i.e.
+/// `zcash_history::MAX_ENTRY_SIZE` before the Ironwood (`V3`) node-data fields were added.
+///
+/// The current width is `326` (V3 node data `MAX_NODE_DATA_SIZE = 317`, plus the 9-byte entry
+/// header). V3 added two 32-byte Ironwood roots and a 9-byte compact tx count over V2, i.e. `73`
+/// bytes, so the pre-Ironwood width was `326 - 73 = 253`. This is the *buffer* width, which is the
+/// same for any pre-Ironwood tip regardless of whether its entries hold V1 (Heartwood/Canopy) or V2
+/// (Nu5..Nu6.2) node data, because the buffer is always sized to the maximum node data of the code
+/// that wrote it.
+const OLD_MAX_ENTRY_SIZE: usize = 253;
+
+/// The Ironwood (`V3`) node data added two 32-byte tree roots and a 9-byte compact tx count over the
+/// pre-Ironwood (`V2`) layout, i.e. `73` bytes, which is exactly how much the entry buffer grew. If
+/// the current width or this delta ever changes, [`OLD_MAX_ENTRY_SIZE`] must be updated in lockstep,
+/// so this is pinned at compile time. (The 9-byte entry header is common to both widths.)
+const _: () = {
+    const V3_NODE_DATA_DELTA: usize = 32 + 32 + 9;
+    assert!(
+        zcash_history::MAX_ENTRY_SIZE - OLD_MAX_ENTRY_SIZE == V3_NODE_DATA_DELTA,
+        "OLD_MAX_ENTRY_SIZE must be the current MAX_ENTRY_SIZE minus the V3 node-data delta",
+    );
+};
+
+/// A mirror of a single pre-Ironwood history-tree `Entry`: a fixed `OLD_MAX_ENTRY_SIZE`-byte buffer,
+/// serialized by `bincode` exactly as the real `zcash_history::Entry` was at the old width (a raw,
+/// length-prefix-free fixed array, via `serde_big_array::BigArray`).
+///
+/// Also derives `Serialize` under `cfg(test)` so tests can synthesize a genuine old-format blob
+/// (the exact bytes a pre-Ironwood Zebra wrote) by re-emitting current-format peaks at this
+/// narrower width.
+#[derive(serde::Deserialize)]
+#[cfg_attr(test, derive(serde::Serialize))]
+struct OldEntry {
+    #[serde(with = "BigArray")]
+    inner: [u8; OLD_MAX_ENTRY_SIZE],
+}
+
+/// A mirror of [`HistoryTreeParts`] with the *old* (pre-Ironwood) entry width, used only to read an
+/// entry written by an older Zebra version.
+///
+/// Every field has the same type and order as [`HistoryTreeParts`] except `peaks`, whose values are
+/// [`OldEntry`] (253-byte buffers) instead of `zcash_history::Entry` (326-byte buffers). Because
+/// `bincode::DefaultOptions` is not self-describing and encodes a fixed `[u8; N]` as exactly `N`
+/// raw bytes, the *only* on-disk difference between an old and a current `HistoryTreeParts` blob is
+/// this per-entry width; deserializing with the old width therefore reads a pre-Ironwood blob with
+/// no trailing bytes (`DefaultOptions` disallows trailing bytes, so any mismatch is rejected).
+#[derive(serde::Deserialize)]
+#[cfg_attr(test, derive(serde::Serialize))]
+struct OldHistoryTreeParts {
+    network_kind: NetworkKind,
+    size: u32,
+    peaks: BTreeMap<u32, OldEntry>,
+    current_height: Height,
+}
+
+/// Re-encodes a pre-Ironwood tip history-tree blob into the current `Entry` format, returning the
+/// new `bincode` bytes (what [`HistoryTreeParts::as_bytes`] would produce), or `None` if `raw` is
+/// not a readable old-format blob.
+///
+/// This is the in-place repair used when the historical blocks a from-blocks rebuild needs are
+/// missing (a database pruned before the Ironwood bump). For a pre-Ironwood chain every peak is V1
+/// or V2 node data, which is consensus-fixed; widening each entry's buffer copies that data verbatim
+/// and only changes the trailing zero padding, so the re-encoded `peaks` (and hence the MMR root)
+/// are identical to what a from-blocks rebuild or a fresh sync produces.
+pub(crate) fn reencode_old_format_history_tree_parts(raw: &[u8]) -> Option<Vec<u8>> {
+    let old: OldHistoryTreeParts = bincode::DefaultOptions::new().deserialize(raw).ok()?;
+
+    let mut peaks = BTreeMap::new();
+    for (idx, old_entry) in old.peaks {
+        // Copy the consensus-fixed prefix into a current-width, zero-padded `Entry`. The current
+        // reader parses the meaningful prefix and ignores the extra padding, so this is byte-for-byte
+        // equivalent in consensus terms to the original entry.
+        let entry = zcash_history::Entry::from_smaller_format_bytes(&old_entry.inner)?;
+        peaks.insert(idx, entry);
+    }
+
+    let parts = HistoryTreeParts {
+        network_kind: old.network_kind,
+        size: old.size,
+        peaks,
+        current_height: old.current_height,
+    };
+
+    Some(parts.as_bytes())
+}
+
+/// Test-only inverse of [`reencode_old_format_history_tree_parts`]: re-emits a current-format
+/// [`HistoryTreeParts`] as the *narrower* pre-Ironwood (`OLD_MAX_ENTRY_SIZE`) blob, producing the
+/// exact bytes an older Zebra version would have written for the same tip tree.
+///
+/// Each current `Entry` is a 326-byte zero-padded buffer; an entry holding pre-Ironwood (V1/V2) node
+/// data uses at most `OLD_MAX_ENTRY_SIZE` of those bytes, the rest being zero padding. This narrows
+/// each peak to its first `OLD_MAX_ENTRY_SIZE` bytes and serializes via the `Old*` mirror types, so
+/// the result is byte-for-byte what the old code wrote (not a truncated current blob).
+///
+/// Returns `None` if any peak has a non-zero byte at or beyond `OLD_MAX_ENTRY_SIZE` — i.e. it holds
+/// V3 (Ironwood) node data that does not fit the old width. That makes the synthesis self-checking:
+/// it only ever produces a *faithful* old blob, never a lossily-truncated one.
+#[cfg(test)]
+pub(crate) fn encode_history_tree_parts_at_old_width(parts: &HistoryTreeParts) -> Option<Vec<u8>> {
+    let mut peaks = BTreeMap::new();
+    for (idx, entry) in &parts.peaks {
+        // Serialize the current entry to its raw fixed-width bytes (BigArray => exactly
+        // `MAX_ENTRY_SIZE` bytes, no length prefix), then keep only the pre-Ironwood prefix.
+        let wide = bincode::DefaultOptions::new()
+            .serialize(entry)
+            .expect("serializing a history tree entry to a vec does not fail");
+
+        // Refuse to narrow an entry that actually uses the extra V3 bytes: those would be lost.
+        if wide[OLD_MAX_ENTRY_SIZE..].iter().any(|&b| b != 0) {
+            return None;
+        }
+
+        let mut inner = [0u8; OLD_MAX_ENTRY_SIZE];
+        inner.copy_from_slice(&wide[..OLD_MAX_ENTRY_SIZE]);
+        peaks.insert(*idx, OldEntry { inner });
+    }
+
+    let old = OldHistoryTreeParts {
+        network_kind: parts.network_kind,
+        size: parts.size,
+        peaks,
+        current_height: parts.current_height,
+    };
+
+    Some(
+        bincode::DefaultOptions::new()
+            .serialize(&old)
+            .expect("serializing the old-format history tree parts to a vec does not fail"),
+    )
+}
+
 impl HistoryTreeParts {
     /// Converts [`HistoryTreeParts`] to a [`NonEmptyHistoryTree`].
     pub(crate) fn with_network(

diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/rebuild_history_tree.rs
@@ -50,7 +50,11 @@ use zebra_chain::{
 };
 
 use crate::service::finalized_state::{
-    disk_format::chain::HistoryTreeParts, DiskWriteBatch, ZebraDb,
+    disk_format::{
+        chain::{reencode_old_format_history_tree_parts, HistoryTreeParts},
+        RawBytes,
+    },
+    DiskWriteBatch, ZebraDb,
 };
 
 use super::{CancelFormatChange, DiskFormatUpgrade};
@@ -173,7 +177,21 @@ pub(crate) fn rebuild_tip_history_tree_if_needed(
 
     let network = db.network();
 
-    let Some(history_tree) = rebuild_tip_history_tree(db, &network, tip_height)? else {
+    let history_tree = match rebuild_tip_history_tree(db, &network, tip_height) {
+        Ok(history_tree) => history_tree,
+        Err(RebuildError::MissingData { height }) => {
+            // The from-blocks rebuild needs historical blocks and note commitment trees that a
+            // database pruned before the Ironwood bump no longer has. But a pre-Ironwood tip tree is
+            // made of V1/V2 entries whose node data is consensus-fixed, so we can repair the entry
+            // *in place* by re-encoding the still-present old-format blob in the current `Entry`
+            // width, without reading any block. This is provably equivalent to the from-blocks
+            // rebuild: same peaks bytes, same `size`, same `current_height`, hence the same MMR root.
+            return reencode_tip_history_tree_in_place(db)
+                .ok_or(RebuildError::MissingData { height });
+        }
+    };
+
+    let Some(history_tree) = history_tree else {
         // Pre-Heartwood tips have no history tree, so there is nothing to rebuild. (Any stale entry
         // would be deleted rather than rewritten, but pre-Heartwood databases never wrote one.)
         return Ok(());
@@ -189,6 +207,40 @@ pub(crate) fn rebuild_tip_history_tree_if_needed(
     Ok(())
 }
 
+/// Repairs the tip history-tree entry *in place* by re-encoding the stored pre-Ironwood blob in the
+/// current `Entry` format, without reading any block.
+///
+/// This is the fallback used when the from-blocks rebuild reports [`RebuildError::MissingData`] (a
+/// database pruned before the Ironwood bump). It reads the raw old-format entry, re-encodes it (only
+/// the per-entry zero padding changes; the consensus-fixed node data is copied verbatim), writes it
+/// back under the `()` key, and confirms it now reads in the current format.
+///
+/// Returns `Some(())` on success, or `None` if the entry is absent or cannot be re-encoded (e.g. it
+/// is corrupt rather than merely old-format). The caller turns `None` back into the original
+/// `MissingData` error, so a genuinely unrecoverable database still fails loudly.
+#[allow(clippy::unwrap_in_result)]
+fn reencode_tip_history_tree_in_place(db: &ZebraDb) -> Option<()> {
+    let raw_entry = db.raw_history_tree_value_cf().zs_get(&())?;
+    let reencoded = reencode_old_format_history_tree_parts(raw_entry.raw_bytes())?;
+
+    let mut batch = DiskWriteBatch::new();
+    let _ = db
+        .raw_history_tree_value_cf()
+        .with_batch_for_writing(&mut batch)
+        .zs_insert(&(), &RawBytes::new_raw_bytes(reencoded));
+    db.write_batch(batch)
+        .expect("rewriting the re-encoded tip history tree should always succeed");
+
+    // The re-encoded entry must now be readable in the current format; if not, treat the repair as
+    // failed so the caller surfaces the fatal pruned-database error rather than marking the database
+    // upgraded with an unreadable entry.
+    if needs_rebuild(db) {
+        return None;
+    }
+
+    Some(())
+}
+
 /// Returns `true` if the tip history tree entry exists but cannot be deserialized in the current
 /// format, and therefore needs to be rebuilt.
 ///