Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions zebra-chain/src/primitives/zcash_history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,33 @@ pub struct Entry {
inner: [u8; zcash_history::MAX_ENTRY_SIZE],
}

impl Entry {
/// Builds an [`Entry`] from the meaningful prefix of a history-tree entry that was serialized by
/// an *older* Zebra version whose `zcash_history::MAX_ENTRY_SIZE` was smaller.
///
/// An [`Entry`] is a fixed-size, zero-padded buffer: a leaf/node `kind` byte, optional child
/// links, then the version's node data, followed by zero padding out to `MAX_ENTRY_SIZE`. The
/// inner `zcash_history::Entry` reader (`Entry::read`/`from_bytes`) consumes exactly the bytes of
/// that meaningful prefix and ignores the trailing padding, so copying an older, shorter buffer
/// into the front of a current-width, zero-initialized buffer yields an entry the current reader
/// parses identically. (The consensus branch id is supplied by the reader, not stored in the
/// buffer, so it is unaffected.)
///
/// `old_bytes` must be no longer than the current [`zcash_history::MAX_ENTRY_SIZE`]; that holds
/// for any pre-Ironwood entry, whose buffer was strictly smaller than the current one. Returns
/// `None` otherwise rather than truncating, so a future width *shrink* (which must never happen
/// for a consensus-fixed format, but is checked defensively) cannot silently corrupt an entry.
pub fn from_smaller_format_bytes(old_bytes: &[u8]) -> Option<Self> {
if old_bytes.len() > zcash_history::MAX_ENTRY_SIZE {
return None;
}

let mut inner = [0u8; zcash_history::MAX_ENTRY_SIZE];
inner[..old_bytes.len()].copy_from_slice(old_bytes);
Some(Entry { inner })
}
}

impl Entry {
/// Create a leaf Entry for the given block, its network, and the root of its
/// note commitment trees.
Expand Down
1 change: 1 addition & 0 deletions zebra-state/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ bincode = { workspace = true }
chrono = { workspace = true, features = ["clock", "std"] }
dirs = { workspace = true }
futures = { workspace = true }
serde-big-array = { workspace = true }
hex = { workspace = true }
hex-literal = { workspace = true }
humantime-serde = { workspace = true }
Expand Down
133 changes: 133 additions & 0 deletions zebra-state/src/service/finalized_state/disk_format/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use std::collections::BTreeMap;

use bincode::Options;
use serde_big_array::BigArray;

use zebra_chain::{
amount::NonNegative,
Expand Down Expand Up @@ -50,6 +51,138 @@ pub struct HistoryTreeParts {
current_height: Height,
}

/// Width in bytes of a history-tree `Entry` as serialized by pre-Ironwood Zebra versions, i.e.
/// `zcash_history::MAX_ENTRY_SIZE` before the Ironwood (`V3`) node-data fields were added.
///
/// The current width is `326` (V3 node data `MAX_NODE_DATA_SIZE = 317`, plus the 9-byte entry
/// header). V3 added two 32-byte Ironwood roots and a 9-byte compact tx count over V2, i.e. `73`
/// bytes, so the pre-Ironwood width was `326 - 73 = 253`. This is the *buffer* width, which is the
/// same for any pre-Ironwood tip regardless of whether its entries hold V1 (Heartwood/Canopy) or V2
/// (Nu5..Nu6.2) node data, because the buffer is always sized to the maximum node data of the code
/// that wrote it.
const OLD_MAX_ENTRY_SIZE: usize = 253;

/// The Ironwood (`V3`) node data added two 32-byte tree roots and a 9-byte compact tx count over the
/// pre-Ironwood (`V2`) layout, i.e. `73` bytes, which is exactly how much the entry buffer grew. If
/// the current width or this delta ever changes, [`OLD_MAX_ENTRY_SIZE`] must be updated in lockstep,
/// so this is pinned at compile time. (The 9-byte entry header is common to both widths.)
const _: () = {
const V3_NODE_DATA_DELTA: usize = 32 + 32 + 9;
assert!(
zcash_history::MAX_ENTRY_SIZE - OLD_MAX_ENTRY_SIZE == V3_NODE_DATA_DELTA,
"OLD_MAX_ENTRY_SIZE must be the current MAX_ENTRY_SIZE minus the V3 node-data delta",
);
};

/// A mirror of a single pre-Ironwood history-tree `Entry`: a fixed `OLD_MAX_ENTRY_SIZE`-byte buffer,
/// serialized by `bincode` exactly as the real `zcash_history::Entry` was at the old width (a raw,
/// length-prefix-free fixed array, via `serde_big_array::BigArray`).
///
/// Also derives `Serialize` under `cfg(test)` so tests can synthesize a genuine old-format blob
/// (the exact bytes a pre-Ironwood Zebra wrote) by re-emitting current-format peaks at this
/// narrower width.
#[derive(serde::Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
struct OldEntry {
#[serde(with = "BigArray")]
inner: [u8; OLD_MAX_ENTRY_SIZE],
}

/// A mirror of [`HistoryTreeParts`] with the *old* (pre-Ironwood) entry width, used only to read an
/// entry written by an older Zebra version.
///
/// Every field has the same type and order as [`HistoryTreeParts`] except `peaks`, whose values are
/// [`OldEntry`] (253-byte buffers) instead of `zcash_history::Entry` (326-byte buffers). Because
/// `bincode::DefaultOptions` is not self-describing and encodes a fixed `[u8; N]` as exactly `N`
/// raw bytes, the *only* on-disk difference between an old and a current `HistoryTreeParts` blob is
/// this per-entry width; deserializing with the old width therefore reads a pre-Ironwood blob with
/// no trailing bytes (`DefaultOptions` disallows trailing bytes, so any mismatch is rejected).
#[derive(serde::Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
struct OldHistoryTreeParts {
network_kind: NetworkKind,
size: u32,
peaks: BTreeMap<u32, OldEntry>,
current_height: Height,
}

/// Re-encodes a pre-Ironwood tip history-tree blob into the current `Entry` format, returning the
/// new `bincode` bytes (what [`HistoryTreeParts::as_bytes`] would produce), or `None` if `raw` is
/// not a readable old-format blob.
///
/// This is the in-place repair used when the historical blocks a from-blocks rebuild needs are
/// missing (a database pruned before the Ironwood bump). For a pre-Ironwood chain every peak is V1
/// or V2 node data, which is consensus-fixed; widening each entry's buffer copies that data verbatim
/// and only changes the trailing zero padding, so the re-encoded `peaks` (and hence the MMR root)
/// are identical to what a from-blocks rebuild or a fresh sync produces.
pub(crate) fn reencode_old_format_history_tree_parts(raw: &[u8]) -> Option<Vec<u8>> {
let old: OldHistoryTreeParts = bincode::DefaultOptions::new().deserialize(raw).ok()?;

let mut peaks = BTreeMap::new();
for (idx, old_entry) in old.peaks {
// Copy the consensus-fixed prefix into a current-width, zero-padded `Entry`. The current
// reader parses the meaningful prefix and ignores the extra padding, so this is byte-for-byte
// equivalent in consensus terms to the original entry.
let entry = zcash_history::Entry::from_smaller_format_bytes(&old_entry.inner)?;
peaks.insert(idx, entry);
}

let parts = HistoryTreeParts {
network_kind: old.network_kind,
size: old.size,
peaks,
current_height: old.current_height,
};

Some(parts.as_bytes())
}

/// Test-only inverse of [`reencode_old_format_history_tree_parts`]: re-emits a current-format
/// [`HistoryTreeParts`] as the *narrower* pre-Ironwood (`OLD_MAX_ENTRY_SIZE`) blob, producing the
/// exact bytes an older Zebra version would have written for the same tip tree.
///
/// Each current `Entry` is a 326-byte zero-padded buffer; an entry holding pre-Ironwood (V1/V2) node
/// data uses at most `OLD_MAX_ENTRY_SIZE` of those bytes, the rest being zero padding. This narrows
/// each peak to its first `OLD_MAX_ENTRY_SIZE` bytes and serializes via the `Old*` mirror types, so
/// the result is byte-for-byte what the old code wrote (not a truncated current blob).
///
/// Returns `None` if any peak has a non-zero byte at or beyond `OLD_MAX_ENTRY_SIZE` — i.e. it holds
/// V3 (Ironwood) node data that does not fit the old width. That makes the synthesis self-checking:
/// it only ever produces a *faithful* old blob, never a lossily-truncated one.
#[cfg(test)]
pub(crate) fn encode_history_tree_parts_at_old_width(parts: &HistoryTreeParts) -> Option<Vec<u8>> {
let mut peaks = BTreeMap::new();
for (idx, entry) in &parts.peaks {
// Serialize the current entry to its raw fixed-width bytes (BigArray => exactly
// `MAX_ENTRY_SIZE` bytes, no length prefix), then keep only the pre-Ironwood prefix.
let wide = bincode::DefaultOptions::new()
.serialize(entry)
.expect("serializing a history tree entry to a vec does not fail");

// Refuse to narrow an entry that actually uses the extra V3 bytes: those would be lost.
if wide[OLD_MAX_ENTRY_SIZE..].iter().any(|&b| b != 0) {
return None;
}

let mut inner = [0u8; OLD_MAX_ENTRY_SIZE];
inner.copy_from_slice(&wide[..OLD_MAX_ENTRY_SIZE]);
peaks.insert(*idx, OldEntry { inner });
}

let old = OldHistoryTreeParts {
network_kind: parts.network_kind,
size: parts.size,
peaks,
current_height: parts.current_height,
};

Some(
bincode::DefaultOptions::new()
.serialize(&old)
.expect("serializing the old-format history tree parts to a vec does not fail"),
)
}

impl HistoryTreeParts {
/// Converts [`HistoryTreeParts`] to a [`NonEmptyHistoryTree`].
pub(crate) fn with_network(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ use zebra_chain::{
};

use crate::service::finalized_state::{
disk_format::chain::HistoryTreeParts, DiskWriteBatch, ZebraDb,
disk_format::{
chain::{reencode_old_format_history_tree_parts, HistoryTreeParts},
RawBytes,
},
DiskWriteBatch, ZebraDb,
};

use super::{CancelFormatChange, DiskFormatUpgrade};
Expand Down Expand Up @@ -173,7 +177,21 @@ pub(crate) fn rebuild_tip_history_tree_if_needed(

let network = db.network();

let Some(history_tree) = rebuild_tip_history_tree(db, &network, tip_height)? else {
let history_tree = match rebuild_tip_history_tree(db, &network, tip_height) {
Ok(history_tree) => history_tree,
Err(RebuildError::MissingData { height }) => {
// The from-blocks rebuild needs historical blocks and note commitment trees that a
// database pruned before the Ironwood bump no longer has. But a pre-Ironwood tip tree is
// made of V1/V2 entries whose node data is consensus-fixed, so we can repair the entry
// *in place* by re-encoding the still-present old-format blob in the current `Entry`
// width, without reading any block. This is provably equivalent to the from-blocks
// rebuild: same peaks bytes, same `size`, same `current_height`, hence the same MMR root.
return reencode_tip_history_tree_in_place(db)
.ok_or(RebuildError::MissingData { height });
}
};

let Some(history_tree) = history_tree else {
// Pre-Heartwood tips have no history tree, so there is nothing to rebuild. (Any stale entry
// would be deleted rather than rewritten, but pre-Heartwood databases never wrote one.)
return Ok(());
Expand All @@ -189,6 +207,40 @@ pub(crate) fn rebuild_tip_history_tree_if_needed(
Ok(())
}

/// Repairs the tip history-tree entry *in place* by re-encoding the stored pre-Ironwood blob in the
/// current `Entry` format, without reading any block.
///
/// This is the fallback used when the from-blocks rebuild reports [`RebuildError::MissingData`] (a
/// database pruned before the Ironwood bump). It reads the raw old-format entry, re-encodes it (only
/// the per-entry zero padding changes; the consensus-fixed node data is copied verbatim), writes it
/// back under the `()` key, and confirms it now reads in the current format.
///
/// Returns `Some(())` on success, or `None` if the entry is absent or cannot be re-encoded (e.g. it
/// is corrupt rather than merely old-format). The caller turns `None` back into the original
/// `MissingData` error, so a genuinely unrecoverable database still fails loudly.
#[allow(clippy::unwrap_in_result)]
fn reencode_tip_history_tree_in_place(db: &ZebraDb) -> Option<()> {
let raw_entry = db.raw_history_tree_value_cf().zs_get(&())?;
let reencoded = reencode_old_format_history_tree_parts(raw_entry.raw_bytes())?;

let mut batch = DiskWriteBatch::new();
let _ = db
.raw_history_tree_value_cf()
.with_batch_for_writing(&mut batch)
.zs_insert(&(), &RawBytes::new_raw_bytes(reencoded));
db.write_batch(batch)
.expect("rewriting the re-encoded tip history tree should always succeed");

// The re-encoded entry must now be readable in the current format; if not, treat the repair as
// failed so the caller surfaces the fatal pruned-database error rather than marking the database
// upgraded with an unreadable entry.
if needs_rebuild(db) {
return None;
}

Some(())
}

/// Returns `true` if the tip history tree entry exists but cannot be deserialized in the current
/// format, and therefore needs to be rebuilt.
///
Expand Down
Loading