diff --git a/.github/workflows/checkpoint-update.yml b/.github/workflows/checkpoint-update.yml index 61489e26ed3..28e33c8360f 100644 --- a/.github/workflows/checkpoint-update.yml +++ b/.github/workflows/checkpoint-update.yml @@ -35,6 +35,7 @@ jobs: env: MAINNET_CHECKPOINTS: zebra-chain/src/parameters/checkpoint/main-checkpoints.txt TESTNET_CHECKPOINTS: zebra-chain/src/parameters/checkpoint/test-checkpoints.txt + MAINNET_FRONTIER: zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin EOS_FILE: zebrad/src/components/sync/end_of_support.rs steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd #v6.0.2 @@ -87,6 +88,15 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} continue-on-error: true + - name: Download mainnet frontier artifact + id: mainnet-frontier-artifact + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c #v8.0.1 + with: + name: generate-checkpoints-mainnet-frontier + run-id: ${{ steps.resolve-run.outputs.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true + - name: Download testnet checkpoint artifact id: testnet-artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c #v8.0.1 @@ -112,6 +122,11 @@ jobs: HAS_MAINNET="true" fi + if [ -f "mainnet-frontier.bin" ]; then + BYTES=$(wc -c < mainnet-frontier.bin | tr -d ' ') + echo "Mainnet frontier artifact: ${BYTES} bytes" + fi + if [ -f "test-checkpoints.txt" ]; then LINES=$(wc -l < test-checkpoints.txt | tr -d ' ') echo "Testnet artifact: ${LINES} checkpoint lines" @@ -130,6 +145,7 @@ jobs: # Append new mainnet checkpoints (entries with heights higher than current last) - name: Append new mainnet checkpoints + id: append-mainnet if: steps.check-artifacts.outputs.has_mainnet == 'true' run: | CURRENT_LAST=$(tail -1 "${MAINNET_CHECKPOINTS}" | awk '{print $1}') @@ -138,13 +154,48 @@ jobs: # Extract only new entries (height > current last) NEW_COUNT=$(awk -v last="$CURRENT_LAST" '$1 > last' main-checkpoints.txt | wc -l | tr -d ' ') echo "New mainnet checkpoints to append: ${NEW_COUNT}" + echo "new_count=${NEW_COUNT}" >> "$GITHUB_OUTPUT" if [ "$NEW_COUNT" -gt 0 ]; then awk -v last="$CURRENT_LAST" '$1 > last' main-checkpoints.txt >> "${MAINNET_CHECKPOINTS}" NEW_LAST=$(tail -1 "${MAINNET_CHECKPOINTS}" | awk '{print $1}') echo "Updated last mainnet checkpoint: ${NEW_LAST}" + echo "new_last=${NEW_LAST}" >> "$GITHUB_OUTPUT" + else + echo "new_last=${CURRENT_LAST}" >> "$GITHUB_OUTPUT" + fi + + - name: Update Mainnet VCT frontier + if: >- + steps.check-artifacts.outputs.has_mainnet == 'true' && + steps.append-mainnet.outputs.new_count != '0' + env: + EXPECTED_HEIGHT: ${{ steps.append-mainnet.outputs.new_last }} + run: | + if [ ! -s "mainnet-frontier.bin" ]; then + echo "Mainnet checkpoints advanced, but mainnet-frontier.bin is missing or empty" + exit 1 + fi + + FRONTIER_HEIGHT=$(python3 - <<'PY' + import struct + + with open("mainnet-frontier.bin", "rb") as frontier: + height_bytes = frontier.read(4) + if len(height_bytes) != 4: + raise SystemExit("frontier artifact is shorter than its height prefix") + print(struct.unpack("1000) | reg_miss | all_missing | route_hedge win | steady blk/s | +|---|---|---|---|---|---|---| +| OFF-1 | 10,899 | 18/84 | 97,676 | 380,894 | — | 27.9 | +| OFF-2 | 10,539 | 21/83 | 93,517 | 364,439 | — | 25.4 | +| OFF-3 | 22,438 | 9/84 | 50,060 | 195,060 | — | 68.9 | +| **ON-1** | 18,316 | 7/81 | 43,328 | 62,469 | 17,990 | 45.4 | +| **ON-2** | 19,434 | 12/84 | 44,729 | 57,630 | 18,295 | 50.8 | +| **ON-3** | 28,213 | 3/84 | 0 | 7 | 0 (inert) | 64.7 | + +## Medians (OFF → ON) + +| metric | OFF | ON | Δ | +|---|---|---|---| +| stall intervals | 18 | 7 | **−61%** | +| reg_miss | 93,517 | 43,328 | **−54%** | +| **all_missing** (stale-marker fails) | 364,439 | 57,630 | **−84%** | +| Δblocks per 7.5-min window | 10,899 | 19,434 | **+78%** | +| steady-state blk/s | 27.9 | 50.8 | +82% | + +## Verdict — the hedge works, and is well-behaved + +**It does exactly what it was designed to do, confirmed across N=3:** + +1. **Active when peers thrash.** On the two bad draws (ON-1, ON-2), the baseline equivalent would have accumulated ~360k `all_missing` synthetic failures; the hedge fired (`dispatch` ~140k per-peer, **~18k wins**), bypassing the stale "missing" inventory markers and delivering the head block from a real ready peer. Result: `all_missing` −84%, `reg_miss` −54%, stalls cut, ~+78% more blocks committed in the window. + +2. **Inert when peers are clean.** ON-3 drew a healthy peer set with **0 registry-misses** — the hedge stayed at 0 dispatches and matched the best baseline draw (OFF-3: 68.9 vs ON-3: 64.7 blk/s). No overhead, no regression when there's nothing to fix. + +**This contradicts the handoff's "honest risk"** that #105 might already absorb the stall: on bad draws the baseline still thrashed hard (364k `all_missing`, 18–21 stall intervals), and the hedge sharply reduced it. #105 (let markers age out during the 2s backoff) and the hedge (bypass the markers entirely on retry) are complementary — the hedge attacks the residual cases #105 doesn't resolve within budget. + +## Mechanism evidence (`route_hedge` counters, bad-draw arms) + +- `dispatch` ~136k–147k per-peer requests, `win` ~18k, `exhausted` ~117k–127k. So ~12–13% of per-peer hedge requests delivered the block; the rest exhausted and fell back to the unchanged #105 backoff. Even at that win rate, `all_missing` collapsed −84% and throughput rose — because each win resolves a head-of-line block that would otherwise have stalled the strictly-ordered commit for a full 2s backoff cycle. + +## Honest caveats + +- **Throughput is peer-draw-dependent.** The +78% Δblocks / +82% steady-state are real within these runs but confounded by which peers each window drew (the ON arm happened to also escape cold-start faster on average). The robust, mechanism-level claims are the **`all_missing` −84%** and the **18k hedge wins** — these directly measure the stale-marker bypass and are not throughput-noise. +- N=3 per arm. More runs would tighten the medians, but the direction is consistent across every pair (each ON arm has far lower `all_missing` than every OFF arm except the clean ON-3, which had none to begin with). + +## DoS posture (unchanged from the design) + +Scoped to the single head-of-line hash in `registry_miss_retry`; small fanout (4) clamped to ready peers; `select_random_ready_peers` (random, load-ignoring, broadcast stance); losers cancelled on first win; no new retry budget; counts as one request against `download_concurrency_limit`. + +## Recommendation + +Ship-worthy as a prototype. The lever is validated: it converts stale-marker `all_missing` failures into deliveries and reduces head-of-line stalls, with zero overhead on clean draws. Next tuning (per handoff §7): cut the 2s backoff for hedged retries (the fanout already addresses the root cause, so the wait is mostly wasted), and/or latency-aware peer selection to raise the floor. diff --git a/docs/design/verified-commitment-trees.md b/docs/design/verified-commitment-trees.md new file mode 100644 index 00000000000..69b7f994a07 --- /dev/null +++ b/docs/design/verified-commitment-trees.md @@ -0,0 +1,748 @@ +# Verified commitment trees — fast checkpoint sync + +## Overview (start here) + +**What it is.** Below the last checkpoint, Zebra normally rebuilds the Sapling and Orchard +note-commitment trees for every block just to learn each block's treestate root — the single +biggest CPU cost of checkpoint sync. Verified commitment trees (VCT) instead **fetch the +per-block roots from peers**, **verify each one against the headers the node already trusts**, +fold them straight into the anchor set and history tree, and **skip the rebuild**. At the +last checkpoint height an **embedded final frontier** (verified against that block's proven root) is +written so normal per-block verification resumes above the checkpoint. Result: same consensus +state as the legacy committer, far less work — and no new cryptography. + +**The one invariant that makes it safe:** _no root influences consensus state until it has been +authenticated against a header commitment._ Everything else (the transport, the cache, the peer +policy) is plumbing around that invariant. A root that cannot be obtained or verified is refused, +never guessed — inside the post-fold "frozen" window the committer **fails closed** rather than +recomputing against a now-stale frontier (§8). + +**Data flow (fetch + commit path):** + +```text +header sync (runs ahead of bodies) + │ GetHeaders { want_tree_aux_roots } ─▶ peer ─▶ Headers { headers, body_sizes, tree_aux_roots } + │ (roots carried in-band, all-or-nothing, finalized ranges only; §4.2) + ▼ +header-sync reactor (zebra-network): validate root count + per-height alignment; reject + │ unrequested or non-finalized roots as MalformedMessage (§8.1) + ▼ +CommitHeaderRange (zebra-state): persist provisional roots into + │ zakura_header_commitment_roots_by_height, ahead of body commit (§4.2) + ▼ +PeerSource (DB-backed reader) ── vct_root(height) ──▶ finalized committer + │ + ▼ +finalized committer: verify-before-commit (§6) ──fold roots, skip recompute──▶ DB + │ at the last checkpoint height: verify + write the embedded final frontier ──▶ resume legacy recompute +``` + +**Serving path (how a node answers other nodes' fetches):** + +```text +peer GetHeaders { want_tree_aux_roots } ─▶ header-sync reactor ─▶ header-sync driver (zebrad) + ─▶ ReadRequest::BlockRoots ─▶ committed commitment_roots_by_height index, then provisional + zakura_header_commitment_roots_by_height for header-ahead heights (all-or-nothing; §9) +``` + +**Lifecycle of one fast sync.** + +(1) Node starts under `consensus.checkpoint_sync = true` on +Mainnet → the committer is built in peer mode. +(2) Header sync requests the per-height roots in-band with the finalized header ranges it already fetches (`want_tree_aux_roots`) and persists the received roots provisionally into the database ahead of the committer (§4.2). (3) Each checkpoint block: look up its root; verify it (own header now, successor header next block, plus +the direct below-Heartwood/below-NU5 checks); fold it in; freeze the frontier (§6, §7). +(4) At the last checkpoint height, verify and write the embedded frontier and unfreeze. +(5) Above the last checkpoint height, ordinary semantic verification resumes from the real frontier. A bad/missing root anywhere in the frozen window parks the block and retries in place as header sync re-delivers the root; it never writes wrong state. + +**Glossary.** + +| Term | Meaning | +| --- | --- | +| **Checkpoint sync** | `consensus.checkpoint_sync = true`: trust the embedded checkpoint list for headers/PoW up to the max checkpoint. Precondition for VCT. | +| **last checkpoint height** | The network's max checkpoint height; the boundary where the fast path ends and the embedded final frontier is written. | +| **Fast root** | A peer-supplied `(sapling_root, orchard_root)` for one height, folded in after verification instead of being recomputed. | +| **Final frontier** | The real Sapling/Orchard/Sprout note-commitment trees at the last checkpoint height, embedded in the binary (§5.2) and written as the tip treestate at last checkpoint height. | +| **Frozen frontier** | The window `tip < last_checkpoint_height` during a fast sync where the on-disk frontier is intentionally stale (roots folded, trees not advanced). Legacy recompute here would corrupt state, so the committer fails closed (§8). | +| **Verify-before-commit** | Authenticating each root against the node's header commitments (ZIP-221 MMR one-block-lag + direct sub-Heartwood/sub-NU5 checks) before it affects state (§6). | +| **Fail closed** | In the frozen window, refuse the commit (retryable) rather than recompute or guess (§8). | +| **Provisional roots** | Peer-supplied roots carried in the header-sync `Headers` message and persisted to `zakura_header_commitment_roots_by_height` ahead of body commit. Advisory until verify-before-commit authenticates them (§4.2, §6). | +| **All-or-nothing** | A `Headers` message carries roots for _every_ header in the range or none; a partial root set is rejected on the wire and never served (§5.4). | +| **Kill switch** | `consensus.disable_vct_fast_sync = true`: keep checkpoint sync but force the legacy committer (§4.4). | + +For where each piece lives in the tree, see the file map (§15). + +## 1. Goal + +Let a node sync the chain up to the last checkpoint **without recomputing the Sapling and +Orchard note-commitment frontiers per block** — the dominant CPU cost of checkpoint sync +(the per-block `update_trees_parallel` recompute, ~70% of per-block commit time). + +Instead of rebuilding the trees, the committer consumes: + +1. **per-block commitment roots** (the Sapling and Orchard treestate roots as of the end of + each block), each **verified against the node's own checkpoint-committed block headers** + before it is allowed to influence consensus state; and +2. a **final note-commitment frontier** at the checkpoint last checkpoint height, so post-checkpoint + semantic verification resumes from a correct frontier. + +This is **one fast verified path with its data source factored out behind a seam**, not a +new consensus mode. Every supplied root is verified before commit; a node that cannot obtain +or verify a root falls back to the legacy recompute, bit-identical to today. + +## 2. Scope and non-goals + +- **In scope:** the consensus-critical commit path (verify-before-commit, the frozen-frontier + failure policy, the checkpoint last checkpoint height), the header-sync transport that carries + roots in-band, the provisional-root persistence and serving read path, and the persistent + fast-synced database format. +- **Not a consensus change.** There are exactly two enduring code paths: the standard local + tree rebuild (legacy) and the fast verified path. Which one runs is config-driven by + `consensus.checkpoint_sync` plus the rollout force-disable knob + (`consensus.disable_vct_fast_sync`; §4.4); the `state.storage_mode` axis (Archive vs. Pruned) + is orthogonal — it controls raw-tx/index pruning, not the tree path, so both storage modes + use the fast path under checkpoint sync unless force-disabled. The network `PeerSource` and + crate-local test fixtures are _sources_ behind one seam (§5.3) — not modes. +- **No new cryptography.** Verification reuses the existing consensus checks + (`block_commitment_is_valid_for_chain_history`, `HistoryTree::push`); see §6. +- **Out of scope for the fast lane:** historical tree/subtree RPCs (`z_gettreestate`, + `GetSubtreeRoots`) below the last checkpoint height. A fast-synced node deliberately never built the + per-height trees those need; they return a typed archive-mode error below the last checkpoint height and + are restored only by the archive follower (§12, increments 7–8). + +## 3. Background: the cost being eliminated + +On checkpoint sync, header and PoW validity are already attested by the checkpoint list, so +the committer's remaining per-block work is dominated by advancing the Sapling and Orchard +note-commitment trees (`update_trees_parallel`) to recompute each block's treestate root. +The roots themselves are small and, from Heartwood onward, are **already committed to by the +block headers** via the ZIP-221 ChainHistory MMR: a block's header commitment binds the +history tree as of its parent, and each history-tree leaf is built from the block body plus +that block's Sapling/Orchard roots. + +That is the lever: if a node is _handed_ the per-block roots, it can fold them straight into +the anchor set and history MMR and **confirm them against the headers it already trusts**, +skipping the frontier recompute entirely — without weakening any consensus check. + +## 4. Design decisions + +### 4.1 Roots travel on the wire; the frontier is embedded + +The fast path needs two things, and they are sourced differently: + +- **Per-block roots travel over the network**, carried in-band on the header-sync `Headers` + message (§4.2, §5.4). `BlockCommitmentRoots { height, sapling_root, orchard_root }` (§5.1) is + the wire payload. +- **The final frontier is embedded in the binary** (§5.2), refreshed per release like a + checkpoint, _not_ sent on the wire. There is no `GetFinalFrontiers`/`FinalFrontiers` message + and no frontier-serving path to attack or keep available. + +### 4.2 Roots ride the header-sync message + +Commitment roots are header-adjacent verified metadata, not body data: tiny, verified against +the header chain, servable only by a node holding the validated headers, and needed _buffered +ahead of_ the committer. So they are **carried in-band on the header-sync `Headers` message** +rather than over a separate stream. `GetHeaders` gains a `want_tree_aux_roots` flag, and a +`Headers` response carries an **all-or-nothing** `tree_aux_roots` vector parallel to `headers` +(§5.4). The header-sync stream version is bumped (2 → 4) for the new field. + +Roots are requested and accepted **only for finalized (checkpoint-verified) header ranges** — the +reactor rejects roots on a non-finalized range, and rejects roots a request opted out of, as +`MalformedMessage` (§8.1). When a finalized header range commits via `CommitHeaderRange`, its +roots are **persisted into the `zakura_header_commitment_roots_by_height` column family ahead of +body commit** (§5.3). The committer then reads them per height through the `PeerSource` seam. +Headers and their roots arrive together, so a range's root coverage is known before any of its +roots can trigger the fast path. + +The one coupling to bodies: verifying a root via the ZIP-221 MMR leaf needs the block's +tx-counts (from the body), so roots are **consumed** at commit time with bodies even though they +are **delivered** early with headers. + +### 4.3 Roots follow the header-sync window + +Because roots ride the header-sync `Headers` message, they are fetched exactly where header sync +already is — for the finalized ranges between the verified tip and the last checkpoint height — +with no separate fetch cursor, fetch-ahead cap, or eviction watermark to manage. The committer +only ever looks up a root for a block it is about to commit, and persisted provisional roots are +naturally bounded above by the header tip and cleaned up below it: each provisional root is +**deleted from `zakura_header_commitment_roots_by_height` when its block body commits** (so the +column family does not grow without bound), and header-store rollback also trims provisional +roots above the rollback target (§5.3). + +### 4.4 Mode selection: fast under checkpoint sync + +The fast-vs-legacy choice is driven by user-facing config, not by env vars. The axes are +`consensus.checkpoint_sync` (full checkpoint trust), `consensus.disable_vct_fast_sync` (initial +rollout force-disable for VCT fast sync), and `state.storage_mode` (Archive vs. Pruned, an +orthogonal pruning axis). The resulting modes: + +| Mode | Config | Tree behavior | +| --- | --- | --- | +| **Archive** (default) | `consensus.checkpoint_sync = true`, `consensus.disable_vct_fast_sync = false`, `storage_mode = archive` | Fast — verified roots folded in, recompute skipped. Unpruned (raw tx + indexes kept). No per-height tree history below the last checkpoint height _for now_ (§7, §10). | +| **Pruning** | `consensus.checkpoint_sync = true`, `consensus.disable_vct_fast_sync = false`, `storage_mode.pruned` | Fast — same as Archive, **plus** raw-tx/index pruning outside the retention window. | +| **Force-disabled VCT** | `consensus.checkpoint_sync = true`, `consensus.disable_vct_fast_sync = true` (any storage mode) | Legacy — keeps checkpoint sync enabled but fully reconstructs the Sapling/Orchard trees per block. | +| **Checkpoint sync disabled** | `consensus.checkpoint_sync = false` (any storage mode) | Legacy — fully reconstructs the Sapling/Orchard trees per block, using only mandatory checkpoints. | + +Gating fast on `checkpoint_sync` is also a correctness precondition: the embedded last checkpoint height +frontier is pinned to the network's **full** max checkpoint height (§5.2), which only applies +when `checkpoint_sync = true` (with it `false`, the effective max checkpoint drops to the +Canopy mandatory checkpoint, so there is no valid last checkpoint height to resume from). zebrad mirrors +`consensus.checkpoint_sync` into the state config at startup +(`state_config.checkpoint_sync`), so the state makes the decision without depending on +`zebra-consensus`. + +Precedence is resolved by a pure, unit-tested `select_source_mode` (no process env, no embedded +files in the decision — `consensus.checkpoint_sync`, `consensus.disable_vct_fast_sync`, and the +embedded-frontier presence are passed in as plain inputs): + +1. `consensus.checkpoint_sync = false`, `consensus.disable_vct_fast_sync = true`, or a network + with **no embedded frontier** → **legacy** (no VCT state, zero overhead); +2. else → **peer** (the default under checkpoint sync where embedded frontiers exist). + +The earlier file-backed checkpoint/fixture root source (`VCT_FAST`/`VCT_FIXTURE`) and capture +mode (`VCT_CAPTURE`) were transient integration scaffolding before peer delivery existed and +have been removed. `VCT_REGTEST_FRONTIER` remains as a Regtest final-frontier test hook. +`consensus.disable_vct_fast_sync = true` is the supported user-facing way to force the legacy +committer without disabling checkpoint sync (the deliberate opt-out for the default-on path; see +the status note at the top of this document). + +## 5. Payload, wire, and the source seam + +### 5.1 Per-block commitment roots (the wire payload) + +`zebra_chain::parallel::commitment_aux::BlockCommitmentRoots` holds `{ height, sapling_root, +orchard_root }` with `ZcashSerialize`/`ZcashDeserialize`. It lives in `zebra-chain` so +`zebra-network` and `zebra-state` share one type without a dependency cycle. `orchard_root` is +the empty/default root below NU5. The deserializer treats `height` as an unvalidated `u32`: a +wrong or out-of-range height simply fails to match any local header during verification (§6), +so it is harmless; malformed root bytes are rejected by the root parsers. + +The payload carries **no trust**: a recipient re-verifies every root against its own +checkpoint-committed headers (§6) before folding it in, so a forwarding/serving node is +exactly as trustworthy as an originating one. + +### 5.2 The final frontier last checkpoint height (embedded) + +Fast mode never advances the running Sapling/Orchard frontiers below the checkpoint, so the +real frontiers at the checkpoint must be supplied for the resume. `FinalFrontiers { height, +sapling, orchard, sprout }` is embedded in the binary +(`zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin`, via `include_bytes!`), +tied to the network's max checkpoint height (validated on load: +`embedded VCT final frontier height must match the network's max checkpoint height`). When the +Mainnet checkpoint list advances, this file is regenerated alongside the checkpoint artifacts +by the maintenance tool described in §16. + +- **Sprout** is frozen far below any modern checkpoint, so the tip Sprout tree is its frontier. +- **Subtree tips are not carried**: the resuming chain recomputes them from the frontier + position. +- **Regtest** has no fixed checkpoint (its list is derived at runtime), so there is no constant + to embed; for deterministic e2e testing the frontier is loaded from the file named by + `VCT_REGTEST_FRONTIER` and validated against the Regtest checkpoint height. This is scoped to + Regtest only — Mainnet always uses the embedded constant and never reads the env. + +### 5.3 The `CommitmentRootSource` seam + +`CommitmentRootSource` (`zebra-state/.../finalized_state/commitment_aux.rs`) abstracts _where_ +the fast path's roots and last checkpoint height frontier come from. The committer (`VctState.source`) reads +through this one seam regardless of source: + +```rust +fn vct_root(&self, height) -> Option<(sapling::Root, orchard::Root)>; +fn vct_last_checkpoint_height(&self) -> Option; +fn final_frontiers(&self) -> Option<&FinalFrontiers>; +fn invalidate(&self, height); // drop a rejected root so a replacement can be re-delivered +fn evict_committed_through(&self, height); // drop roots for already-committed heights +``` + +Implementations: + +- `PeerSource` — the production default, a **DB-backed reader** (`PeerSource::new_with_db`). Each + `vct_root(height)` reads the provisional root for that height from the + `zakura_header_commitment_roots_by_height` column family that header sync persisted (§4.2). The + last checkpoint height frontier is held immutably from the embedded constant, so only roots come + from the network. `invalidate` **deletes** a rejected root from that column family so the next + read misses and header sync can re-deliver a verifiable replacement from another peer (the key + to not letting one malicious peer wedge a bad root in place — §8, §11). An in-memory cache + variant (`PeerSource::new`, paired with a `PeerSourceWriter`) remains as **test-only** + scaffolding for proptests that fill roots without a database. +- `FixtureSource` — a crate-local `#[cfg(test)]` source over the same height→roots map, used only + to isolate committer behavior and DB-produced payload round trips without networking. + +The **producer** half (`produce_block_roots(db, range)` / `produce_final_frontiers(db, +height)`) derives the same payload from a database's per-height trees — the serving read path +(§9), minus the network. The producer→`PeerSource`→committer round-trip proving producer and +consumer agree is `vct_db_produced_payload_round_trips`. + +Because the production `PeerSource` reads straight from the database, peer mode no longer +exports a root-writer handle. Header sync writes provisional roots through `CommitHeaderRange` +on the normal state write path, and the committer reads them back through the same database. The +old per-state `TreeAuxRootsWriter` / `PeerSourceHandle` / targeted-refetch signal are removed. +The persisted roots store no peer identity; peer accountability for bad roots is the header-sync +reactor's misbehavior reporting (§8.1), preserving the `zebra-state` / `zebra-network` crate +boundary. + +### 5.4 Roots on the header-sync message + +There is no separate roots stream. The header-sync `HeaderSyncMessage` carries roots in two +places (`zebra-network/src/zakura/header_sync/wire.rs`): + +- `GetHeaders { start_height, count, want_tree_aux_roots }` — the requester sets + `want_tree_aux_roots` only for finalized ranges. +- `Headers { headers, body_sizes, tree_aux_roots }` — `tree_aux_roots` is **all-or-nothing**: + either empty, or exactly one `BlockCommitmentRoots` per header, in ascending height order + aligned to `start_height`. A one-byte `has_roots` marker precedes the roots on the wire. + +Wire and DoS bounds: + +- The byte budget that bounds a `Headers` message accounts for the per-header root + (`HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES = 4 + 32 + 32`), and the static + range-fits-budget assertion includes it, so requesting roots reduces the per-message header + count accordingly (`inbound_get_headers_count_limit(.., want_tree_aux_roots)`). +- Decoding validates: the `has_roots` marker must be 0 or 1 (`InvalidBoolMarker`); roots are + present only when the request wanted them (`UnrequestedTreeAuxRoots`); the root count equals + the header count (`TreeAuxRootCountMismatch`); and the root vector is preallocated only with + the already-bounded header count, never an independent untrusted length. +- The reactor additionally checks each root's height is `start_height + offset` + (`TreeAuxRootHeightMismatch` / `validate_tree_aux_root_heights`) and rejects any roots on a + non-finalized range, before the roots reach state. State re-checks both invariants in + `CommitHeaderRange` (`prepare_header_range_batch_with_roots`) as defense in depth. + +`BlockCommitmentRoots` still carries no trust: a recipient re-verifies every root against its +own checkpoint-committed headers (§6) before folding it in, so a forwarding/serving node is +exactly as trustworthy as an originating one. + +## 6. Verification — verify-before-commit + +Before a supplied root influences consensus state, the committer confirms it against the +node's own checkpoint-committed headers. The logic lives in +`finalized_state/commitment_aux_verify.rs` and reuses the existing consensus check +`block_commitment_is_valid_for_chain_history` plus `HistoryTree::push` — **no new crypto**. + +A block's header commitment binds the history tree _as of its parent_, so the root supplied +for height `H` is folded into a candidate history tree and confirmed when `H+1`'s commitment +is checked against that candidate. A wrong root makes that check fail and the block is +**rejected, not recomputed** (§8). The standalone `verify_commitment_roots` returns the first +offending height; over `[start..=end]` it confirms `[start..=end-1]`, and `end+1` confirms +`end`. + +### 6.1 Direct header checks below Heartwood and NU5 + +The ZIP-221 MMR does not authenticate everything, so two gaps are closed by direct comparison +(no one-block lag — a wrong root is rejected at the block's own commit): + +- **Sapling below Heartwood** (`verify_supplied_sapling_root_below_heartwood`): there is no MMR + yet, so the header's `FinalSaplingRoot` is compared directly; pre-Sapling the root must be + the empty-tree root. At/above Heartwood the MMR path authenticates it. +- **Orchard below NU5** (`verify_supplied_orchard_root_below_nu5`): the V1 history leaf + (Heartwood..Canopy) _ignores_ the Orchard root and there is no MMR below Heartwood, so no + header commits to an Orchard root below NU5 — yet the fast path folds the supplied Orchard + root into the anchor set for every block. The Orchard tree is provably empty there (no + Orchard actions are allowed), so the supplied root is pinned to the empty-tree root. Without + this, an untrusted source could inject an Orchard anchor the legacy recompute never produces, + breaking the §11 trust boundary and consensus equivalence. This was a real hole, masked only + while the source was a trusted fixture; the in-flight peer source would have armed it + (fix in commit #190). + +### 6.2 The one-block lag and the dedup + +A block's own commitment check `C(X, T_{X-1})` is the _identical_ computation the previous +fast block already ran as its look-ahead one commit earlier. The committer caches the +look-ahead result as `(next_height, next_hash)` and skips a block's own check when the prior +look-ahead validated exactly it. The guard is hash identity and heights are monotonic, so a +stale or cloned cache entry can never cause a false skip. Steady state drops from two +commitment checks per block to one (legacy parity) while still attesting every root before it +is persisted. A non-last checkpoint height fast block with no buffered successor is deferred by the write +worker until the successor arrives; the checkpoint last checkpoint height is the only no-successor fast commit +because the embedded final frontier independently authenticates that height's roots. The cache +is cleared on last checkpoint height and on legacy blocks. The dedup is observable +(`state.vct.prevalidated.block.count`) so it cannot silently regress. + +### 6.3 The auth-data-root cache lock + +The NU5+ commitment check trusts a precomputed `AuthDataRoot` carried on +`CheckpointVerifiedBlock` (so the single-threaded committer does not recompute it). Every +cached value is computed from the block by the constructors, so it is correct _by +construction_ — but the public API previously let it be desynced after construction +(`pub auth_data_root`, `DerefMut`, both re-exported). A holder could swap the block while +keeping a stale root, and a header matching the stale root would finalize a block without +proving the header binds the block's actual authorizing data. The (block, auth-data-root) pair +is now locked together: `auth_data_root` is `pub(crate)`, `CheckpointVerifiedBlock` drops +`DerefMut`, the one legitimately-post-set field goes through +`set_deferred_pool_balance_change`, and the semantic verifier builds blocks through +`from_semantic_data` (auth-data root left unset). Compile-time enforced (fix in commit #192). + +## 7. The fast commit path and checkpoint last checkpoint height + +The commit-path hook lives in `finalized_state.rs`; everything about _where data comes from_ +lives in the `vct` and `commitment_aux` submodules, so the commit path holds only the last checkpoint height +logic. For a checkpoint-verified block at `height`: + +1. **Fast-root lookup.** `vct.vct_root(height)` returns the supplied roots, or `None`. +2. **If supplied (fast path):** + - run the own-commitment check unless the dedup (§6.2) already validated it; + - apply the direct below-Heartwood/below-NU5 checks (§6.1); + - build a candidate history tree with the roots folded in (`HistoryTree::push`); + - **verify-before-commit:** either check the buffered successor's commitment against the + candidate (the one-block-lag confirmation) and cache `(height+1, next_hash)` as + pre-validated, or, at the checkpoint last checkpoint height only, verify the embedded final frontiers + against this height's roots; a failure means _this_ height's root is bad → reject and + evict (§8); + - fold the roots into the anchor set, skip the frontier recompute, and **freeze** the + note-commitment frontier (`vct_frontier_frozen = true`) for non-last checkpoint height fast blocks. +3. **Checkpoint last checkpoint height** (when `height` is the last checkpoint height): verify the embedded frontier + against this block's verified root (`frontier.root() == verified root`; collision resistance + makes the root a binding commitment to the frontier), write it as the real tip treestate via + the normal write path, and **unfreeze** — heights at/above the last checkpoint height resume legacy + recompute from a correct frontier. +4. **If not supplied:** §8. + +The write worker enforces the successor side of this contract before calling the committer: if +a queued checkpoint block would take the fast path, is not the last checkpoint height, and has no +buffered successor yet, it is parked locally and retried when another checkpoint block arrives. +It is not reported through the invalid-block reset path, because no verification failure has +occurred — the needed `H+1` witness is merely not buffered yet. + +**Persistent fast-synced databases.** A persistent fast sync marks the database with a +`fast_sync_metadata` column family recording the last checkpoint height (DB format minor bump to +**27.3.0**, consolidated with the roots serving index and history-tree repair). This is a sibling +to `pruning_metadata`, not a reuse — pruning drops tx bytes and keeps trees, fast-sync drops the +per-height trees; a DB can be both. Because fast sync deletes nothing, a **completed** fast-synced +DB (tip at/above the last checkpoint height) **reopens in any storage mode** — a reopen loses no servable data, +and `consensus.disable_vct_fast_sync = true` or `consensus.checkpoint_sync = false` simply resumes +the legacy recompute from the real tip frontier. + +The one reopen that _is_ refused is an **interrupted** fast sync (frozen frontier, tip below the +last checkpoint height) reopened with the fast path disabled (legacy mode — +`consensus.disable_vct_fast_sync = true`, `consensus.checkpoint_sync = false`, or no embedded +frontier). The on-disk frontier is stale and no source can supply the verified roots, so the +fail-closed policy (§8) would refuse every below-last checkpoint height block forever. The open guard refuses +with a clear recovery path (finish the fast sync under `consensus.checkpoint_sync = true` and +`consensus.disable_vct_fast_sync = false`, or re-sync from genesis) instead of stalling silently. +Guards: per-height tree reads return `None` below the last checkpoint height (before the backward search, so no +stale tree and no panic); `z_gettreestate` returns a typed archive-mode error below the last checkpoint height; +genesis-root and subtree format-validity checks skip fast-synced DBs. + +## 8. Failure policy — fail closed on a frozen frontier + +While the frontier is frozen (a fast sync has folded roots but the last checkpoint height has not yet written +the real frontier), the on-disk frontier is **stale**. A legacy recompute in that window would +extend the stale frontier and fold a _wrong_ root into the MMR — corrupting consensus state. +So the committer **fails closed** rather than falling back to recompute (commit #211): + +- A supplied root that fails _any_ verification step is **evicted** from its source (so a + re-fetch from another peer can replace it) and the commit is **refused** with the typed, + **retryable** `VctSuppliedRootUnavailable { height }` error — not retried against the same + rejected root forever, and not recomputed locally. +- A frozen-frontier height with **no** valid supplied root (never fetched, or just evicted) + refuses with the same retryable error and leaves the database untouched. The block commits + once a verifiable root is fetched. +- A non-last checkpoint height fast block with a valid supplied root but **no buffered successor** is not a + root failure: the write worker defers it locally until `H+1` is available to authenticate + the candidate history tree. If a direct committer caller bypasses that deferral, the + committer still fails closed before writing. +- The frozen flag is **seeded from the durable fast-sync marker on open**, not just tracked + in-session: a fast sync interrupted by a restart (frozen frontier persisted, tip below the + last checkpoint height) still refuses on the first post-restart height with a missing root. The frozen + region is exactly `tip < last_checkpoint_height` (the last checkpoint height itself carries the real frontier). + +Outside the frozen window (legacy), a missing root is +simply the ordinary legacy recompute — bit-identical to today. Inside the frozen window, a +missing root parks the current checkpoint block and retries the same commit **in place** once +header sync re-delivers the root for that finalized range — **without resetting the block +queue**. A peer-supplied root that has no buffered successor to confirm it against the header +chain (the one-block lag) is likewise **deferred, not committed on faith**: an untrusted tip +root is rejected before it is persisted, rather than one block too late (when it would be +irreversibly on disk and could wedge the sync). Test-only trusted local sources are exempt and +commit a tip root on the in-arrears check. This is the safety contract: **a bad, slow, or +withholding peer cannot publish a root that influences state without authentication; after +freeze, a later bad or missing re-delivery never writes wrong state and does not reset the block +queue for root availability.** A height that stays stuck on a retryable stall past a threshold escalates +to an error-level log and the `state.vct.root.stalled.height` gauge, so a genuinely unservable +root surfaces loudly instead of a silent stall. Because roots are delivered in-band with the +finalized header range and persisted before commit (§4.2), the common case is that the frozen +window is never entered without its roots in hand. Counters: +`state.vct.root.rejected.count` (evicted after failing verification), +`state.vct.root.unavailable.count` (frozen-frontier hole refused), +`state.vct.root.await_successor.count` (deferred for a missing successor), +`state.vct.root.retry.count` (park-and-retry attempts), and the +`state.vct.root.stalled.height` gauge (raised once a height is stuck past the warn threshold). + +### 8.1 Adversarial peer handling + +With roots carried in-band on header sync, there is no separate `tree_aux` driver and no bespoke +provenance/cooldown/demotion/hedging policy. Bad roots are handled in two layers: + +- **At the wire/reactor boundary**, a peer that sends a malformed root set — wrong count, + misaligned height, roots on a non-finalized range, roots that were not requested, or an + invalid marker byte — is reported through header sync's existing misbehavior path + (`report_misbehavior(.., MalformedMessage)`), and the range is retried. None of those roots + reach state. +- **At verify-before-commit**, a well-formed but _wrong_ root fails authentication against the + header commitment (§6). The committer evicts it (`PeerSource::invalidate` **deletes** it from + `zakura_header_commitment_roots_by_height`) and refuses the commit with the retryable + `VctSuppliedRootUnavailable` error (§8). Header sync then re-requests that finalized range and + delivers a replacement root from whichever peer answers; the block commits in place once a + verifiable root arrives, without resetting the block queue. + +This keeps the honest-peer-available liveness loop: a lying peer causes at most one retryable +refusal per height, its persisted root is dropped, and the height is re-fetched over header sync +from another peer. Peer accountability rides header sync's general misbehavior scoring rather +than a roots-specific cooldown table, so the committer still attributes nothing to peers itself +and `zebra-state` keeps no dependency on `zebra-network` peer types. + +This still cannot guarantee liveness under a true eclipse where every selectable peer withholds +or lies. In that case the node remains **fail-closed**: no wrong state is written, the root stays +retryable, and the §8 stall metrics/logs surface the unservable height. + +## 9. The serving read path (`BlockRoots`) + +A node serves roots from local state via `ReadRequest::BlockRoots { start_height, count }` → +`ReadResponse::BlockRoots(Vec)`. The read handler: + +- clamps the range to the best **header** tip (which may run ahead of committed bodies); +- serves **committed** verified roots first, from the compact `commitment_roots_by_height` index + (so a fast-synced node lacking historical per-height trees can still serve), falling back to + `produce_block_roots` over per-height trees only on a pre-index archive database; +- then appends **provisional** header-ahead roots from `zakura_header_commitment_roots_by_height` + for the contiguous heights that have headers but no committed body yet — committed roots win on + any overlap because they are already verified; +- returns an empty vec for out-of-range/empty requests. + +When this read backs a header-sync serve, the header-sync driver attaches roots only when it has +a **complete aligned set** for the served header range +(`tree_aux_roots_for_served_header_range`). A partial set is served as rootless headers, never as +a partial root vector — which the all-or-nothing wire format (§5.4) would reject anyway. The +driver maps read errors and wrong responses to a rootless serve, never wrong data. + +## 10. Serving availability (open design concern) + +Fast-synced nodes serve roots from `commitment_roots_by_height`, while older archive-produced +nodes can still derive roots from per-height trees. This keeps the root-serving fleet available +as more nodes fast-sync. A client that finds no serving peer degrades to legacy speed before +freeze or retries the finalized range over header sync in the frozen window; it does not corrupt +state. Two mechanisms address it, in order of cost: + +- **Roots-index CF (lightweight, preferred).** A fast node already verified every root it + folded in. Persisting them into a compact column family (~68 bytes/block, ~200 MB for all of + Mainnet) lets it serve them without per-height trees, at near-zero extra cost. A background + task can backfill missing lower ranges by fetching _roots_ (not bodies), so even a + snapshot-started node becomes a full-range roots server cheaply. This is the targeted fix for + the §10 serving-availability gap. +- **Indexing-follower resync (heavyweight, opt-in).** Rebuild the per-height trees off the + consensus critical path (re-downloading bodies if pruned), turning a fast node into a full + archive node. This pays back the cost fast-sync avoided, so it is the archive/RPC path + (increments 7–8), not a default. + +Protocol hygiene that reduces the failure surface meanwhile: header sync already fans requests +across peers and retries a finalized range from another peer on failure, so a peer that cannot +serve roots for a range simply yields rootless headers and the requester re-asks elsewhere. +Serving provisional header-ahead roots in addition to committed ones (§9) widens the servable +range to the header tip without per-height trees. + +## 11. Trust boundary and security + +The trust boundary is sharp: **every peer-provided root must be authenticated against a header +commitment before it influences the anchor set or the history MMR.** Consequences: + +- The wire payload (§5.1) and the source seam (§5.3) carry no trust; a serving/forwarding node + is exactly as trustworthy as an originating one. +- The below-NU5 Orchard pin and below-Heartwood Sapling check (§6.1) close the only ranges the + MMR cannot vouch for. Skipping either would let an untrusted source inject an anchor the + legacy recompute never produces — a consensus-equivalence break, not just a slowdown. +- The frozen-frontier fail-closed policy (§8) means a hostile root never corrupts state: it is + deleted and refused. A malformed root set is rejected at the header-sync reactor before it + reaches state and is scored through header sync's misbehavior path; a well-formed wrong root is + evicted on verify-before-commit and re-fetched over header sync from another peer (§8.1). This + prevents one lying-but-well-formed peer from grinding the sync height by height when honest + peers are available. +- DoS bounds on the header-sync roots fields (§5.4) — the all-or-nothing count check, the + per-height alignment check, the bounded preallocation, and the message byte budget — protect + the serving and client paths from unbounded memory growth. +- The auth-data-root cache lock (§6.3) closes a cross-crate API hole that could otherwise + finalize a block without binding its authorizing data. + +## 12. Increment roadmap + +- **Increments 0–5 (done):** the fast path proven end-to-end from a local test source — the + source seam, verify-before-commit against headers, the frontier-recompute skip, and the + verified checkpoint last checkpoint height with persistent fast-synced databases. +- **Increment 6a — peer source: fetch + serve (happy-path POC).** The first peer transport for + roots: originally a standalone roots-only `tree_aux` stream with its own serving side, driver, + and in-memory `PeerSource` cache — the first point at which real nodes obtained roots over the + network. +- **Increment 6b — adversarial peer policy.** A `zebrad` driver recorded height→peer provenance + and ran a roots-specific cooldown/demotion/disconnect policy over the `tree_aux` stream. +- **Increment 6c — fold roots into header sync (current).** The standalone `tree_aux` stream, + its driver, in-memory cache writer, and bespoke peer policy are **removed**. Roots now ride the + header-sync `Headers` message as all-or-nothing finalized-range metadata (§4.2, §5.4), are + persisted provisionally to `zakura_header_commitment_roots_by_height` ahead of body commit, and + are read back by a DB-backed `PeerSource`. Recovery from a bad/missing root is an in-place + commit retry fed by header sync re-delivery; peer accountability rides header sync's existing + misbehavior scoring (§8.1). +- **Increment 7 — indexing follower lane (archive only).** Relocate `tx_by_loc` + address + indexes and the per-height trees + subtree CFs onto an async follower, so archive mode regains + historical RPC without re-adding the frontier recompute to the consensus path. +- **Increment 8 — archive mode via the follower.** Run the full per-block recompute off the + critical path to restore `z_gettreestate` / `GetSubtreeRoots`, while the consensus lane uses + verified roots. +- **Increment 9 — spec / ZIP.** Publish the cross-client payload schema and verification + algorithm so other clients (zcashd, zaino, …) can serve and verify identically. + +### Supporting fix: Zakura header-store rollback + +Independent of the fast path but on the same branch, `rollback_finalized_state` now also rolls +back the Zakura header store (`delete_zakura_headers_above`). The header store races ahead of +the body chain and is keyed independently; leaving it untouched on a rollback kept a +`BestHeaderTip` above the new body tip, which stalled body sync (the contiguous floor body was +never requestable) until the 5-minute timeout fell back to legacy ChainSync. +(Commits #198 and #202.) + +## 13. Observability + +Live commit-path counters distinguish the fast and legacy paths and the failure modes: + +| Metric | Meaning | +| --- | --- | +| `state.vct.fast.block.count` | block folded supplied roots, skipped the recompute | +| `state.vct.legacy.block.count` | block recomputed the frontier (`consensus.disable_vct_fast_sync = true`, `consensus.checkpoint_sync = false`, or fell back outside the frozen window) | +| `state.vct.prevalidated.block.count` | dedup sub-case: the previous fast block's look-ahead already validated this header | +| `state.vct.root.rejected.count` | supplied root failed verification and was deleted for re-delivery | +| `state.vct.root.unavailable.count` | frozen-frontier height with no valid root; commit refused (retryable) | +| `state.vct.root.retry.count` | park-and-retry attempts on a retryable VCT root stall | +| `state.vct.fast_path.hit` | a finalized commit consumed header-carried roots to skip the recompute | +| `state.vct.fast_path.miss` | a finalized commit did not take the fast path | +| `state.vct.root.stalled.height` (gauge) | a height stuck on a retryable stall past the warn threshold | + +The header-sync `headers_received` / `headers_served` / commit-state trace rows also carry +`want_tree_aux_roots` and `tree_aux_roots_len`, so root delivery is visible per range. The +fast-vs-legacy ratio (`state.vct.fast_path.hit` vs `miss`) is the signal an integration test +asserts to prove roots actually came over the wire rather than a silent legacy sync. + +## 14. Testing strategy + +- **Unit:** the `BlockCommitmentRoots` wire round-trip; the header-sync `Headers`/`GetHeaders` + round-trip carrying roots, plus the all-or-nothing / count-mismatch / height-misalignment / + invalid-marker / unrequested-roots rejections + (`decode_rejects_tree_aux_roots_when_not_requested`, + `non_finalized_response_carrying_tree_aux_roots_is_malformed`) and the byte-budget clamp with + roots requested; `select_source_mode` precedence (`consensus.disable_vct_fast_sync = true` or + `consensus.checkpoint_sync = false` ⇒ legacy regardless of storage mode or embedded frontier; + checkpoint sync + enabled VCT + embedded frontier ⇒ peer); a completed fast-synced DB reopens + in archive mode (`reopening_fast_synced_database_in_archive_mode_succeeds`) while an interrupted + one reopened with the fast path off is refused + (`reopening_interrupted_fast_sync_without_a_root_source_panics`); the below-NU5 Orchard pin and + below-Heartwood Sapling check; the `verify_commitment_roots` lag (wrong root rejected at H+1); + the dedup (second consecutive fast block skips its check; a stale cache entry does not cause a + false skip); the all-or-nothing serving helper + (`served_header_tree_aux_roots_require_complete_coverage`); provisional-root persistence and + cleanup on body commit (`write_block_deletes_matching_provisional_zakura_roots`); + `PeerSource::invalidate` eviction; and the in-process producer → `PeerSource` → committer + byte-identical equivalence. +- **Frozen-frontier proptests:** a frozen-frontier hole returns the retryable + `VctSuppliedRootUnavailable` and leaves the DB untouched; a reopened committer (frozen marker + persisted) still refuses on the first post-restart missing root. +- **Header-sync transport:** the header-sync driver tests (`zakura_header_sync_driver_tests`) + exercise serving and committing finalized ranges with roots end-to-end, including the + all-or-nothing serving helper (roots attached only on complete coverage, otherwise rootless + headers) and routing received roots into `CommitHeaderRange`. +- **State persistence:** `CommitHeaderRange` persists provisional roots into + `zakura_header_commitment_roots_by_height`, rejects count/height mismatches, deletes a + provisional root when its body commits, and trims provisional roots above a header-store + rollback target. +- **Real-data manual runs (`#[ignore]`, env-gated):** `verifies_real_nu5_range_over_synced_forks` + verifies the real NU5/V2 range against synced archive forks (corrupted root rejected at H+1). +- **Headline end-to-end (manual, follow-up):** a fresh node fast-syncing + `verified_tip + 1` → checkpoint from a peer and reaching byte-identical consensus state, with + `state.vct.fast.block.count > 0`. The full two-process Regtest docker e2e is unblocked by the + `VCT_REGTEST_FRONTIER` override but crosses crate boundaries that cannot be wired into CI + without a dependency cycle, so it stays manual. + +## 15. File map + +| Area | File | +| --- | --- | +| Wire payload (`BlockCommitmentRoots`) | `zebra-chain/src/parallel/commitment_aux.rs` | +| Source seam, `PeerSource`, producers, bulk root invalidation | `zebra-state/src/service/finalized_state/commitment_aux.rs` | +| Verify-before-commit logic | `zebra-state/src/service/finalized_state/commitment_aux_verify.rs` | +| Embedded frontier plumbing, `select_source_mode`, counters | `zebra-state/src/service/finalized_state/vct.rs` | +| `checkpoint_sync` mirror field (mode input) | `zebra-state/src/config.rs`; set in `zebrad/src/commands/start.rs` | +| Embedded Mainnet frontier | `zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin` | +| Commit-path hook, last checkpoint height, frozen-frontier policy | `zebra-state/src/service/finalized_state.rs` | +| `BlockRoots` serving read (committed + provisional) | `zebra-state/src/service.rs` | +| Provisional roots CF (`zakura_header_commitment_roots_by_height`), persistence, body-commit/rollback cleanup | `zebra-state/src/service/finalized_state/zebra_db/block.rs`, `.../rollback.rs` | +| `CommitHeaderRange` with roots, fast-path hit/miss metrics | `zebra-state/src/service/write.rs` | +| Header-sync wire (`GetHeaders`/`Headers` roots, markers, byte budget) | `zebra-network/src/zakura/header_sync/wire.rs` | +| Header-sync root validation (count, height alignment, markers) | `zebra-network/src/zakura/header_sync/validation.rs`, `.../error.rs` | +| Header-sync reactor (request/serve/receive roots, misbehavior) | `zebra-network/src/zakura/header_sync/reactor.rs` | +| Header-sync driver: serve `BlockRoots`, all-or-nothing helper, route received roots | `zebrad/src/commands/start/zakura/header_sync_driver.rs` | + +## 16. Frontier regeneration tool + +The embedded Mainnet frontier is a release artifact coupled to the last Mainnet checkpoint. +Whenever the checkpoint list's max height changes, the matching +`zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin` must be regenerated from a +synced Zebra state at that same height. + +This belongs in the checkpoint-maintenance flow rather than in node runtime configuration. The +`zebra-checkpoints` utility runs against a synced node and produces the `HEIGHT HASH` +checkpoint artifact consumed by `.github/workflows/checkpoint-update.yml`. It also has an +explicit Mainnet frontier-artifact output: + +```text +zebra-checkpoints \ + --addr 127.0.0.1:8232 \ + --last-checkpoint \ + --mainnet-frontier-output /tmp/mainnet-frontier.bin \ + --state-cache-dir \ + --frontier-height auto +``` + +The checkpoint stdout format stays unchanged. The frontier is written only when +`--mainnet-frontier-output` is supplied, and status details go to stderr so the existing +checkpoint log scraper remains stable. `--frontier-height auto` means "use the final Mainnet +checkpoint height generated by this run"; an explicit height is useful for local validation and +debugging. `--state-cache-dir` is required whenever `--mainnet-frontier-output` is supplied. +With `--frontier-height auto`, the utility fails if the run did not emit any checkpoint above +genesis, because there is no updated last checkpoint height to pair with the frontier artifact. + +The frontier generator must read Zebra's finalized state, not reconstruct trees from RPC block +data. Checkpoint generation only needs block hashes and sizes, but frontier generation needs the +exact Sapling, Orchard, and Sprout note-commitment trees. The utility therefore opens Zebra +state read-only and calls `zebra-state` helpers that: + +- opens the finalized DB read-only from the supplied state cache directory; +- reads the Sapling and Orchard trees at the requested height; +- reads the tip Sprout tree (Sprout is frozen far below modern checkpoints); +- serializes `FinalFrontiers { height, sapling, orchard, sprout }` using the same byte format + parsed by node startup: `height` as `u32` little-endian, followed by length-prefixed + `IntoDisk` blobs for Sapling, Orchard, and Sprout; +- immediately validates the generated bytes by parsing them through the same height-checking + path used for the embedded frontier (`produce_final_frontiers_bytes` followed by + `validate_final_frontiers_bytes`). + +The GCP checkpoint-generation workflow copies `/tmp/mainnet-frontier.bin` out of the Mainnet +checkpoint-generation container and uploads it as a separate artifact named +`generate-checkpoints-mainnet-frontier`. `checkpoint-update.yml` replaces the embedded frontier +only when it appends new Mainnet checkpoints, and fails closed if Mainnet checkpoints advance +but the frontier artifact is missing, empty, or has an embedded height that does not match the +updated checkpoint max height. + +Local testing proves byte compatibility with the node loader: + +- build a small legacy `FinalizedState` over a generated valid chain; +- produce frontier bytes from that DB at a chosen height; +- write the bytes to a temporary file; +- load the file through the same loader/parser path used by `VCT_REGTEST_FRONTIER` and the + embedded Mainnet frontier; +- assert the parsed height matches, the parsed Sapling/Orchard/Sprout roots match the DB, and + parsing with a different expected height fails. + +That test is the compatibility contract: if the local tool writes bytes that pass this path, the +node will parse the artifact in the same way at startup. + +The focused local checks are: + +```text +cargo test -p zebra-state final_frontier +cargo test -p zebra-utils --features zebra-checkpoints +cargo test -p zebrad --features zebra-checkpoints checkpoints +``` diff --git a/docs/plans/headersync_roots_review.md b/docs/plans/headersync_roots_review.md new file mode 100644 index 00000000000..dd5e62366bd --- /dev/null +++ b/docs/plans/headersync_roots_review.md @@ -0,0 +1,88 @@ +# PR #282 review — `feat!: enforce ranged header requests have roots` + +Branch `review/headersync-roots` @ `8c2f7d379` onto `perf-note-commit-tree` (`e73e09d71`, includes #254). + +Scope: ranged Zakura header-sync responses/commits must now carry exactly one +`tree_aux_root` per header (previously optional / all-or-nothing). Threaded through +wire decode → reactor serving/inbound → state commit → root-covered best-header-tip +capping (state service + header-sync driver, startup + steady-state). + +## Verdict + +Design and implementation are consistent end-to-end. One real compile bug found and +fixed; remaining red tests are all pre-existing on the base branch (documented as flups +below). The roots invariant holds transitively: a header only enters a peer's store via +`CommitHeaderRange` (now mandates roots → persists provisional roots) or a full-block +commit (roots derivable from state), so any header a peer can serve, it can also serve a +root for. Tip propagation still flows over full-block `NewBlock` gossip, so the +mandatory-roots rule on _ranged_ requests does not starve the tip. + +## Bug fixed in this review + +- **zebrad lib tests did not compile.** `start.rs`'s `zakura_header_sync_driver_tests` + imports `block_roots_cover_range` and `root_covered_query_best_header_tip` via + `super::zakura::`, but `zebrad/src/commands/start/zakura/mod.rs` never re-exported them + (both are `pub(crate)` in `header_sync_driver.rs` and used in-module by production code). + The PR author missed this because their local `librocksdb-sys` build failed before + reaching zebrad, so the zebrad tests never compiled. Fix: added both to the + `#[cfg(test)]` re-export block in `mod.rs`. The reported `E0282` was a cascade from the + unresolved import. + +## Flups — pre-existing test failures (NOT caused by #282; reproduce on base `e73e09d71`) + +1. **`zebra-state` proptest `service::finalized_state::tests::prop::vct_frozen_frontier_survives_reopen`.** + Panics at `finalized_state.rs:551`: "database was previously synced in verified + commitment tree mode ... fast path ... is disabled. Set `consensus.checkpoint_sync = true` + and `consensus.disable_vct_fast_sync = false`...". This is #254's VCT fast-sync resume + gate; the proptest reopen config doesn't satisfy the resume preconditions. Verified to + fail identically on the base branch. Relies on later VCT-resume wiring → flup. + +2. **`zebrad` legacy block-sync vectors (run via nextest):** + `components::sync::tests::vectors::request_genesis_accepts_duplicate_finalized_genesis`, + `...::sync_block_too_high_obtain_tips`, `...::sync_block_too_high_extend_tips`. + Legacy (non-Zakura) sync component, untouched by this PR. Verified to fail identically + on the base branch → flup. + +3. **`zebra-network` testkit network tests (env-flaky):** + `zakura::testkit::cluster::tests::connected_peers_import_each_others_signed_records` and + `...::native_stream5_status_exchange_uses_handler_wire_path`. Real iroh peer + registration with 5s timeouts; fail only under parallel-build CPU load, **pass in + isolation**. Harness flakiness, not a sync defect → flup. + +## Harness notes (not failures) + +- `cargo test -p zebrad --lib` (single process) cascades ~76 failures from one root panic: + `zebra_test::init()` → color-eyre `install().unwrap()` → "a hook has already been + installed", poisoning the init `Once`. CI uses **nextest** (process-per-test), which + sidesteps this. Always validate zebrad with `cargo nextest run`, not `cargo test --lib`. +- `cargo clippy --workspace -- -D warnings` fails on **pre-existing** zebra-chain lints + (`unexpected_cfgs: tx_v6` at `transaction.rs:1099`; 4× `ValueCommitment` Copy-clone), + not on anything in #282. PR-touched files are clippy-clean. +- Build requires `CXXFLAGS="-include cstdint"` on GCC 15 (the `librocksdb-sys` C++ / + `` failure the PR author hit). Not a code issue. + +## Non-blocking review observations (candidate follow-ups for the author) + +- **Redundant double root-cover.** `ReadRequest::BestHeaderTip` already returns the + root-covered tip (`root_covered_best_header_tip` in the state service), yet + `drive_zakura_header_sync_actions` re-applies `root_covered_query_best_header_tip` to + that result on every `query_best_header_tip` tick — two extra state reads + (`Tip` + `BlockRoots`) plus a duplicated root scan. Correct (idempotent/monotonic) but + wasteful; consider keeping the cap in one layer. +- **Per-height serving cost.** `block_roots_by_height_range` does point lookups per height + (`finalized_tip_height()` + `serve_block_roots(h..=h)` + provisional read each iteration), + up to `MAX_HEADER_SYNC_HEIGHT_RANGE` = 4000, on a hot serving path that previously used a + single range scan. Consider batching the finalized/provisional reads. +- **Stream version.** `ZAKURA_HEADER_SYNC_STREAM_VERSION` stays `4` while v4 semantics flip + from "optional all-or-nothing roots" to "mandatory one-per-header". An old-v4 peer + answering a non-finalized range would now be rejected (`TreeAuxRootCountMismatch` → + `MalformedMessage`). Fine for a pre-GA fleet upgraded together, but a deliberate + bump-to-5 would make the incompatibility explicit. +- **No backfill migration for pre-existing rootless header rows.** A DB written under the + old optional-roots regime has header rows without provisional roots; after upgrade those + ranges serve empty and the advertised tip is capped to the verified tip until re-synced + with roots. Self-heals (no wedge), but there is no explicit migration. Confirm this is the + intended degradation path (cross-ref the earlier "header-carried roots" plan that leaned + toward keeping roots optional). +- **CHANGELOG.** `feat!` with no CHANGELOG entry; intentional for experimental Zakura + internals, but worth a deliberate note. diff --git a/zebra-chain/Cargo.toml b/zebra-chain/Cargo.toml index 2d0bc55361d..96c78903b6e 100644 --- a/zebra-chain/Cargo.toml +++ b/zebra-chain/Cargo.toml @@ -173,5 +173,10 @@ harness = false name = "note_commitment_hash" harness = false +[[bench]] +name = "precompute_threshold" +harness = false +required-features = ["bench"] + [lints] workspace = true diff --git a/zebra-chain/benches/block.rs b/zebra-chain/benches/block.rs index 916890d8f52..8a2558bdb96 100644 --- a/zebra-chain/benches/block.rs +++ b/zebra-chain/benches/block.rs @@ -3,18 +3,24 @@ use std::io::Cursor; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; use zebra_chain::{ block::{ + merkle::{AuthDataRoot, AUTH_DIGEST_PLACEHOLDER}, tests::generate::{ large_multi_transaction_block, large_single_transaction_block_many_inputs, }, Block, }, serialization::{ZcashDeserialize, ZcashSerialize}, + transparent, }; -use zebra_test::vectors::BLOCK_TESTNET_141042_BYTES; +use zebra_test::vectors::{ + BLOCK_MAINNET_1687107_BYTES, BLOCK_MAINNET_1687121_BYTES, BLOCK_TESTNET_141042_BYTES, +}; + +const MIN_PARALLEL_CHECKPOINT_PREPARE_TRANSACTIONS: usize = 16; fn block_serialization(c: &mut Criterion) { // Biggest block from `zebra-test`. @@ -49,9 +55,101 @@ fn block_serialization(c: &mut Criterion) { } } +fn checkpoint_prepare_substages(c: &mut Criterion) { + let blocks = vec![ + ( + "BLOCK_TESTNET_141042", + Block::zcash_deserialize(Cursor::new(BLOCK_TESTNET_141042_BYTES.as_slice())).unwrap(), + ), + ( + "BLOCK_MAINNET_1687107", + Block::zcash_deserialize(Cursor::new(BLOCK_MAINNET_1687107_BYTES.as_slice())).unwrap(), + ), + ( + "BLOCK_MAINNET_1687121", + Block::zcash_deserialize(Cursor::new(BLOCK_MAINNET_1687121_BYTES.as_slice())).unwrap(), + ), + ( + "large_multi_transaction_block", + large_multi_transaction_block(), + ), + ]; + + let mut group = c.benchmark_group("Checkpoint Prepare Substages"); + + for (name, block) in blocks { + let (transaction_hashes, auth_digests): (Vec<_>, Vec<_>) = { + if block.transactions.len() < MIN_PARALLEL_CHECKPOINT_PREPARE_TRANSACTIONS { + block + .transactions + .iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + } else { + use rayon::prelude::*; + block + .transactions + .par_iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + } + }; + group.bench_with_input( + BenchmarkId::new("txid_auth_digest", name), + &block, + |b, block| { + b.iter(|| { + let digests: (Vec<_>, Vec<_>) = if block.transactions.len() + < MIN_PARALLEL_CHECKPOINT_PREPARE_TRANSACTIONS + { + block + .transactions + .iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + } else { + use rayon::prelude::*; + block + .transactions + .par_iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + }; + digests + }) + }, + ); + + group.bench_with_input( + BenchmarkId::new("auth_data_root", name), + &auth_digests, + |b, auth_digests| { + b.iter_batched( + || auth_digests.clone(), + |auth_digests| { + auth_digests + .into_iter() + .map(|auth_digest| auth_digest.unwrap_or(AUTH_DIGEST_PLACEHOLDER)) + .collect::() + }, + BatchSize::SmallInput, + ) + }, + ); + + if block.coinbase_height().is_some() { + group.bench_function(BenchmarkId::new("new_ordered_outputs", name), |b| { + b.iter(|| transparent::new_ordered_outputs(&block, &transaction_hashes)) + }); + } + } + + group.finish(); +} + criterion_group!( name = benches; config = Criterion::default().noise_threshold(0.05).sample_size(50); - targets = block_serialization + targets = block_serialization, checkpoint_prepare_substages ); criterion_main!(benches); diff --git a/zebra-chain/benches/precompute_threshold.rs b/zebra-chain/benches/precompute_threshold.rs new file mode 100644 index 00000000000..fd051f84539 --- /dev/null +++ b/zebra-chain/benches/precompute_threshold.rs @@ -0,0 +1,96 @@ +//! Benchmarks to find where the precompute's rayon parallelism stops paying off. +//! +//! For a range of per-block note counts, this compares: +//! - `serial`: appending the notes one at a time to a fresh tree (no rayon), the +//! cost the committer pays inline today; and +//! - `parallel`: `NoteCommitmentTree::precompute_append` (rayon `into_par_iter` + +//! `rayon::join`), the off-committer precompute. +//! +//! The crossover — the smallest count where `parallel` beats `serial` — is the +//! point below which gating off rayon (hashing serially) avoids paying overhead +//! that does not buy anything. Orchard's Sinsemilla `combine` dominates, so it is +//! the meaningful pool to measure; Sapling is shown as a control. + +// Disabled due to warnings in criterion macros +#![allow(missing_docs)] + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use halo2::pasta::pallas; + +use zebra_chain::orchard::tree::NoteCommitmentTree as OrchardTree; +use zebra_chain::sapling::tree::NoteCommitmentTree as SaplingTree; + +/// Note counts spanning the small-batch region where rayon overhead is expected +/// to dominate, up to sizes where parallelism clearly wins. +const NOTE_COUNTS: &[usize] = &[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]; + +fn orchard_notes(count: usize) -> Vec { + // Small integers are canonical Pallas field elements. + (0..count as u64).map(pallas::Base::from).collect() +} + +fn sapling_notes(count: usize) -> Vec { + (0..count as u64) + .map(|value| { + let mut bytes = [0u8; 32]; + bytes[..8].copy_from_slice(&value.to_le_bytes()); + Option::from(sapling_crypto::note::ExtractedNoteCommitment::from_bytes( + &bytes, + )) + .expect("small little-endian integer is a canonical Jubjub base") + }) + .collect() +} + +fn bench_orchard(c: &mut Criterion) { + let mut group = c.benchmark_group("orchard_precompute_threshold"); + + for &count in NOTE_COUNTS { + let notes = orchard_notes(count); + group.throughput(Throughput::Elements(count as u64)); + + group.bench_with_input(BenchmarkId::new("serial", count), ¬es, |b, notes| { + b.iter(|| { + let mut tree = OrchardTree::default(); + for note in notes { + tree.append(*black_box(note)).expect("tree is not full"); + } + black_box(tree.root()); + }) + }); + + group.bench_with_input(BenchmarkId::new("parallel", count), ¬es, |b, notes| { + b.iter(|| black_box(OrchardTree::precompute_then_apply_root(black_box(notes)))) + }); + } + + group.finish(); +} + +fn bench_sapling(c: &mut Criterion) { + let mut group = c.benchmark_group("sapling_precompute_threshold"); + + for &count in NOTE_COUNTS { + let notes = sapling_notes(count); + group.throughput(Throughput::Elements(count as u64)); + + group.bench_with_input(BenchmarkId::new("serial", count), ¬es, |b, notes| { + b.iter(|| { + let mut tree = SaplingTree::default(); + for note in notes { + tree.append(*black_box(note)).expect("tree is not full"); + } + black_box(tree.root()); + }) + }); + + group.bench_with_input(BenchmarkId::new("parallel", count), ¬es, |b, notes| { + b.iter(|| black_box(SaplingTree::precompute_then_apply_root(black_box(notes)))) + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_orchard, bench_sapling); +criterion_main!(benches); diff --git a/zebra-chain/benches/transaction.rs b/zebra-chain/benches/transaction.rs index 1267546fd32..548d65104f9 100644 --- a/zebra-chain/benches/transaction.rs +++ b/zebra-chain/benches/transaction.rs @@ -114,9 +114,48 @@ fn bench_transaction_deserialize(c: &mut Criterion) { group.finish(); } +fn bench_transaction_digest(c: &mut Criterion) { + let mut group = c.benchmark_group("Transaction Digest"); + + let block = Block::zcash_deserialize(Cursor::new( + zebra_test::vectors::BLOCK_MAINNET_1687107_BYTES.as_slice(), + )) + .expect("valid block"); + let v5_orchard = block + .transactions + .iter() + .find(|tx| tx.version() == 5) + .expect("block has a v5 transaction"); + + let block = Block::zcash_deserialize(Cursor::new( + zebra_test::vectors::BLOCK_MAINNET_1687121_BYTES.as_slice(), + )) + .expect("valid block"); + let v5_later_nu5 = block + .transactions + .iter() + .find(|tx| tx.version() == 5) + .expect("block has a v5 transaction"); + + let tx_samples = vec![ + ("V5 orchard 1687107", v5_orchard), + ("V5 orchard 1687121", v5_later_nu5), + ]; + + for (label, tx) in tx_samples { + group.bench_with_input( + BenchmarkId::new("txid_and_auth_digest", label), + tx, + |b, tx| b.iter(|| tx.txid_and_auth_digest()), + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default().noise_threshold(0.1).sample_size(50); - targets = bench_transaction_deserialize + targets = bench_transaction_deserialize, bench_transaction_digest } criterion_main!(benches); diff --git a/zebra-chain/src/block/commitment.rs b/zebra-chain/src/block/commitment.rs index 7b1f4e6b23a..e0c3a8141ca 100644 --- a/zebra-chain/src/block/commitment.rs +++ b/zebra-chain/src/block/commitment.rs @@ -396,6 +396,16 @@ pub enum CommitmentError { actual: [u8; 32], }, + #[error( + "invalid pre-NU5 orchard root: expected the empty-tree root {:?}, actual: {:?}", + hex::encode(expected), + hex::encode(actual) + )] + InvalidPreNu5OrchardRoot { + expected: [u8; 32], + actual: [u8; 32], + }, + #[error("missing required block height: block commitments can't be parsed without a block height, block hash: {block_hash:?}")] MissingBlockHeight { block_hash: block::Hash }, diff --git a/zebra-chain/src/orchard/tree.rs b/zebra-chain/src/orchard/tree.rs index 099b8f21905..8da1f13e1ca 100644 --- a/zebra-chain/src/orchard/tree.rs +++ b/zebra-chain/src/orchard/tree.rs @@ -28,12 +28,30 @@ use zcash_primitives::merkle_tree::HashSer; use sinsemilla::HashDomain; use crate::{ + parallel::batch_frontier::{ + apply_append_batch_with_subtree, precompute_append_batch_with_subtree, BatchFrontierError, + PrecomputedSubtreeAppend, + }, serialization::{ serde_helpers, ReadZcashExt, SerializationError, ZcashDeserialize, ZcashSerialize, }, subtree::{NoteCommitmentSubtreeIndex, TRACKED_SUBTREE_HEIGHT}, }; +/// The precomputed parallel-append work for one block's Orchard note commitments, +/// produced off the committer by [`NoteCommitmentTree::precompute_append`] and +/// applied with [`NoteCommitmentTree::apply_precomputed_append`]. +#[derive(Clone, Debug)] +pub(crate) struct PrecomputedAppendBatch(PrecomputedSubtreeAppend); + +impl PrecomputedAppendBatch { + /// The tree size (leaf [`count`](NoteCommitmentTree::count)) this precompute + /// must be applied to. + pub(crate) fn start_size(&self) -> u64 { + self.0.start_size() + } +} + pub mod legacy; use legacy::LegacyNoteCommitmentTree; @@ -344,6 +362,25 @@ impl<'de> serde::Deserialize<'de> for Node { pub enum NoteCommitmentTreeError { #[error("The note commitment tree is full")] FullTree, + + #[error("Invalid precompute: empty batch, stale start size, or multi-subtree batch")] + InvalidPrecompute, +} + +impl From for NoteCommitmentTreeError { + fn from(error: BatchFrontierError) -> Self { + match error { + // A capacity overflow is the tree being full. + BatchFrontierError::Frontier(_) => NoteCommitmentTreeError::FullTree, + // The remaining variants are caller-supplied precompute misuse, which + // is reported as a recoverable error rather than panicking. + BatchFrontierError::BatchSpansMultipleSubtrees + | BatchFrontierError::EmptyBatch + | BatchFrontierError::PrecomputeStartMismatch { .. } => { + NoteCommitmentTreeError::InvalidPrecompute + } + } + } } /// Orchard Incremental Note Commitment Tree @@ -458,6 +495,67 @@ impl NoteCommitmentTree { })) } + /// Precomputes the parallel-append work for `note_commitments` against a tree + /// of size `start_size`, off the committer. See the Sapling equivalent. Returns + /// [`NoteCommitmentTreeError::InvalidPrecompute`] for an empty `note_commitments`, + /// rather than panicking. + pub(crate) fn precompute_append( + start_size: u64, + note_commitments: &[NoteCommitmentUpdate], + ) -> Result { + let nodes: Vec = note_commitments + .iter() + .map(|commitment_x| (*commitment_x).into()) + .collect(); + + let inner = precompute_append_batch_with_subtree::<_, MERKLE_DEPTH>(start_size, &nodes)?; + + Ok(PrecomputedAppendBatch(inner)) + } + + /// Applies a [`PrecomputedAppendBatch`] from [`Self::precompute_append`], + /// returning any completed [`TRACKED_SUBTREE_HEIGHT`] subtree, exactly like + /// [`Self::append_batch`]. `precomputed.start_size()` must equal this tree's + /// [`count`](Self::count); a stale precompute returns + /// [`NoteCommitmentTreeError::InvalidPrecompute`] (rather than panicking) so + /// callers can fall back to [`Self::append_batch`]. + #[allow(clippy::unwrap_in_result)] + pub(crate) fn apply_precomputed_append( + &mut self, + precomputed: PrecomputedAppendBatch, + ) -> Result, NoteCommitmentTreeError> { + let (frontier, completed) = + apply_append_batch_with_subtree(self.inner.clone(), precomputed.0)?; + + self.inner = frontier; + *self + .cached_root + .get_mut() + .expect("a thread that previously held exclusive lock access panicked") = None; + + Ok(completed.map(|(index_value, root)| { + let index = NoteCommitmentSubtreeIndex( + index_value.try_into().expect("subtree index fits in u16"), + ); + (index, root) + })) + } + + /// Benchmark-only: precompute the parallel append for `note_commitments` + /// (rayon hashing), apply the precomputed subtree roots onto a fresh tree, and return the resulting root. + /// Mirrors the committer's precompute path end-to-end so the + /// `precompute_threshold` benchmark can compare it against a serial append. + #[cfg(feature = "bench")] + #[doc(hidden)] + pub fn precompute_then_apply_root(note_commitments: &[NoteCommitmentUpdate]) -> [u8; 32] { + let mut tree = NoteCommitmentTree::default(); + let precomputed = + Self::precompute_append(0, note_commitments).expect("non-empty batch in benchmark"); + tree.apply_precomputed_append(precomputed) + .expect("fresh tree matches start size 0"); + tree.root().into() + } + /// Returns frontier of non-empty tree, or `None` if the tree is empty. fn frontier(&self) -> Option<&NonEmptyFrontier> { self.inner.value() diff --git a/zebra-chain/src/parallel.rs b/zebra-chain/src/parallel.rs index 08505a05adf..663fe5d616d 100644 --- a/zebra-chain/src/parallel.rs +++ b/zebra-chain/src/parallel.rs @@ -1,4 +1,5 @@ //! Parallel chain update methods. pub mod batch_frontier; +pub mod commitment_aux; pub mod tree; diff --git a/zebra-chain/src/parallel/batch_frontier.rs b/zebra-chain/src/parallel/batch_frontier.rs index 12ae93fe7f7..621ca8526f7 100644 --- a/zebra-chain/src/parallel/batch_frontier.rs +++ b/zebra-chain/src/parallel/batch_frontier.rs @@ -46,6 +46,18 @@ pub enum BatchFrontierError { /// The batch would complete more than one tracked subtree. BatchSpansMultipleSubtrees, + + /// A precompute was requested for, or applied to, an empty batch of leaves. + EmptyBatch, + + /// A precompute was applied to a frontier whose size does not match the size + /// the precompute was computed against (a stale look-ahead). + PrecomputeStartMismatch { + /// The tree size the precompute was computed against. + expected: u64, + /// The actual size of the frontier the precompute was applied to. + found: u64, + }, } impl fmt::Display for BatchFrontierError { @@ -57,6 +69,15 @@ impl fmt::Display for BatchFrontierError { BatchFrontierError::BatchSpansMultipleSubtrees => { write!(f, "batch spans more than one tracked subtree boundary") } + BatchFrontierError::EmptyBatch => { + write!(f, "precompute requested for an empty batch of leaves") + } + BatchFrontierError::PrecomputeStartMismatch { expected, found } => { + write!( + f, + "precompute computed for tree size {expected} applied to a frontier of size {found}" + ) + } } } } @@ -104,9 +125,24 @@ fn merge_complete_subtree( } } +/// Below this many leaves in a batch, the per-leaf Merkle hashing is done entirely +/// serially (no rayon at all). Benchmarks (`precompute_threshold`) show that for +/// small batches the rayon `join`/`par_iter` overhead matches or exceeds the +/// hashing it parallelizes — the crossover is ~16 note commitments for both +/// Sapling Pedersen and Orchard Sinsemilla — so gating below it avoids paying for +/// parallelism that does not buy anything on the common small/empty blocks. +/// +/// This gates the *whole-batch* decision only. Above it, the per-chunk reduction +/// still splits all the way down (see [`perfect_subtree_root`]): the largest chunk +/// of a medium batch benefits from internal parallelism, so capping the split +/// granularity here would instead *serialize* that chunk and regress medium +/// batches. +pub(crate) const PARALLEL_HASH_THRESHOLD: usize = 16; + /// Computes the root of a perfect subtree of exactly `2^k` `leaves`, using a -/// parallel divide-and-conquer reduction. The combine hashes within and across -/// the two halves are independent, so this scales across the rayon pool. +/// parallel divide-and-conquer reduction across the rayon pool. The combine hashes +/// within and across the two halves are independent, so this scales across cores. +/// Used for large batches; small batches use [`perfect_subtree_root_serial`]. fn perfect_subtree_root(leaves: &[H]) -> H { debug_assert!(leaves.len().is_power_of_two()); if leaves.len() == 1 { @@ -123,6 +159,23 @@ fn perfect_subtree_root(leaves: &[H]) -> H { H::combine(child_level, &l, &r) } +/// Serial reduction of a perfect subtree of exactly `2^k` `leaves`, with no rayon +/// overhead. Used for small batches (see [`PARALLEL_HASH_THRESHOLD`]). +fn perfect_subtree_root_serial(leaves: &[H]) -> H { + debug_assert!(leaves.len().is_power_of_two()); + if leaves.len() == 1 { + return leaves[0].clone(); + } + let half = leaves.len() / 2; + let child_level = Level::from(half.trailing_zeros() as u8); + let (left, right) = leaves.split_at(half); + H::combine( + child_level, + &perfect_subtree_root_serial(left), + &perfect_subtree_root_serial(right), + ) +} + /// Returns true if the leaves before the frontier tip include a complete /// `2^level` subtree. /// @@ -366,6 +419,239 @@ where } } +// --- Off-committer precompute / apply_precompute split --------------------------------- +// +// [`parallel_append`] does two things: it hashes the new leaves into complete +// subtree roots (the dominant cost on heavy shielded blocks), and it merges +// those roots onto the existing frontier. The hashing depends only on the +// starting leaf *position*, not on the frontier's hashes, so it can run ahead of +// the committer, concurrently across many blocks. [`precompute_subtree_roots`] +// does that hashing; [`apply_precompute`] does the cheap merge on the committer. Their +// composition is byte-identical to [`parallel_append`] (differential proptests). + +/// The position-independent result of appending a run of `num_leaves` leaves +/// starting at tree size [`start_position`](Self::start_position): the +/// parallel-hashed complete subtree roots, plus the last (raw tip) leaf. +#[derive(Clone, Debug)] +pub(crate) struct PrecomputedAppend { + /// Tree size (next leaf position) this was hashed against. [`apply_precompute`] must be + /// applied to a frontier of exactly this size. + start_position: u64, + /// Number of leaves in the run (>= 1). + num_leaves: usize, + /// `(level, root)` for each complete subtree chunk of the first + /// `num_leaves - 1` leaves, in ascending position order. + chunk_roots: Vec<(usize, H)>, + /// The last leaf, which becomes the applied frontier's raw tip. + tip_leaf: H, +} + +/// Hashes the complete subtree roots for appending `new_leaves` to a tree of size +/// `start_position`, in parallel. The expensive, position-independent half of +/// [`parallel_append`]; pair with [`apply_precompute`]. +/// +/// Returns [`BatchFrontierError::EmptyBatch`] if `new_leaves` is empty: the +/// precompute represents a non-empty append (its tip is the last leaf), so an +/// empty batch is reported as a recoverable error rather than panicking. +pub(crate) fn precompute_subtree_roots( + start_position: u64, + new_leaves: &[H], +) -> Result, BatchFrontierError> +where + H: Hashable + Clone + Send + Sync, +{ + let num_leaves = new_leaves.len(); + let (tip_leaf, leaves_to_merge) = new_leaves + .split_last() + .ok_or(BatchFrontierError::EmptyBatch)?; + let tip_leaf = tip_leaf.clone(); + + let chunks = complete_subtree_chunks(start_position, leaves_to_merge); + // Small batches hash entirely serially (no rayon); larger batches fan the chunks + // out across the pool and split each chunk down to the leaves. See + // [`PARALLEL_HASH_THRESHOLD`]. + let chunk_roots: Vec<(usize, H)> = if leaves_to_merge.len() <= PARALLEL_HASH_THRESHOLD { + chunks + .into_iter() + .map(|(level, leaves)| (level, perfect_subtree_root_serial(leaves))) + .collect() + } else { + chunks + .into_par_iter() + .map(|(level, leaves)| (level, perfect_subtree_root(leaves))) + .collect() + }; + + Ok(PrecomputedAppend { + start_position, + num_leaves, + chunk_roots, + tip_leaf, + }) +} + +/// Merges a [`PrecomputedAppend`] onto `frontier`, returning the updated frontier. +/// The cheap, committer-side half of [`parallel_append`] (O(log N) merges). +/// +/// The frontier's size MUST equal the precompute's `start_position`. Callers +/// compare and recompute via [`parallel_append`] on mismatch, so a mismatch here +/// is reported as a recoverable [`BatchFrontierError::PrecomputeStartMismatch`] +/// (a stale precompute must not panic the process). +pub(crate) fn apply_precompute( + frontier: Frontier, + precomputed: PrecomputedAppend, +) -> Result, BatchFrontierError> +where + H: Hashable + Clone + Send + Sync, +{ + let (mut complete_subtree_roots, next_leaf_position) = + frontier_complete_subtree_roots(&frontier); + + if next_leaf_position != precomputed.start_position { + return Err(BatchFrontierError::PrecomputeStartMismatch { + expected: precomputed.start_position, + found: next_leaf_position, + }); + } + + for (level, root) in precomputed.chunk_roots { + merge_complete_subtree(&mut complete_subtree_roots, level, root); + } + + let new_tip_position = next_leaf_position + (precomputed.num_leaves as u64 - 1); + let complete_subtree_roots = complete_subtree_roots.into_iter().flatten().collect(); + + Ok(Frontier::from_parts( + Position::from(new_tip_position), + precomputed.tip_leaf, + complete_subtree_roots, + )?) +} + +/// The precomputed form of [`append_batch_with_subtree`]: the parallel hashing for +/// one block's nodes, split at the tracked-subtree boundary if it crosses one. +/// Produced by [`precompute_append_batch_with_subtree`] off the committer and +/// applied with [`apply_append_batch_with_subtree`]. +#[derive(Clone, Debug)] +pub(crate) struct PrecomputedSubtreeAppend { + /// Tree size this was hashed against; the frontier it is applied to must match. + start_size: u64, + inner: PrecomputedSubtreeKind, +} + +#[derive(Clone, Debug)] +enum PrecomputedSubtreeKind { + /// The batch fits within one tracked-subtree window. + Single(PrecomputedAppend), + /// The batch crosses one tracked-subtree boundary, completing the subtree at + /// `index_value`. `head` ends the subtree; `tail` continues after it (`None` + /// if the batch ends exactly on the boundary). + Boundary { + head: PrecomputedAppend, + tail: Option>, + index_value: u64, + }, +} + +impl PrecomputedSubtreeAppend { + /// The tree size this precompute assumes — the frontier `tree_size` it must + /// be applied to. + pub(crate) fn start_size(&self) -> u64 { + self.start_size + } +} + +/// Precomputes the parallel hashing for appending `nodes` to a tree of size +/// `start_size`, off the committer. Mirrors [`append_batch_with_subtree`]'s +/// boundary handling. `nodes` must be non-empty. +pub(crate) fn precompute_append_batch_with_subtree( + start_size: u64, + nodes: &[H], +) -> Result, BatchFrontierError> +where + H: Hashable + Clone + Send + Sync, +{ + use crate::subtree::TRACKED_SUBTREE_HEIGHT; + + if nodes.is_empty() { + return Err(BatchFrontierError::EmptyBatch); + } + + let new_size = start_size + .checked_add(nodes.len() as u64) + .filter(|&new_size| new_size <= TreeCapacity::::MAX_LEAVES) + .ok_or(BatchFrontierError::Frontier( + FrontierError::MaxDepthExceeded { + depth: DEPTH.saturating_add(1), + }, + ))?; + + let subtree_size = 1u64 << TRACKED_SUBTREE_HEIGHT; + let boundary = (start_size / subtree_size) + .checked_add(1) + .and_then(|n| n.checked_mul(subtree_size)); + if boundary + .and_then(|b| b.checked_add(subtree_size)) + .is_some_and(|second_boundary| second_boundary <= new_size) + { + return Err(BatchFrontierError::BatchSpansMultipleSubtrees); + } + + let inner = if boundary.is_some_and(|b| b <= new_size) { + let boundary = boundary.expect("checked above"); + let head_len = (boundary - start_size) as usize; + let (head, tail) = nodes.split_at(head_len); + let index_value = (boundary >> TRACKED_SUBTREE_HEIGHT) - 1; + PrecomputedSubtreeKind::Boundary { + head: precompute_subtree_roots(start_size, head)?, + tail: (!tail.is_empty()) + .then(|| precompute_subtree_roots(boundary, tail)) + .transpose()?, + index_value, + } + } else { + PrecomputedSubtreeKind::Single(precompute_subtree_roots(start_size, nodes)?) + }; + + Ok(PrecomputedSubtreeAppend { start_size, inner }) +} + +/// Applies a [`PrecomputedSubtreeAppend`] onto `frontier`, returning the completed +/// tracked subtree's `(index_value, root)` if the batch crossed a boundary. The +/// counterpart to [`precompute_append_batch_with_subtree`]; byte-identical to +/// [`append_batch_with_subtree`]. +pub(crate) fn apply_append_batch_with_subtree( + frontier: Frontier, + precomputed: PrecomputedSubtreeAppend, +) -> Result<(Frontier, Option<(u64, H)>), BatchFrontierError> +where + H: Hashable + Clone + Send + Sync, +{ + use crate::subtree::TRACKED_SUBTREE_HEIGHT; + + match precomputed.inner { + PrecomputedSubtreeKind::Single(pre) => Ok((apply_precompute(frontier, pre)?, None)), + PrecomputedSubtreeKind::Boundary { + head, + tail, + index_value, + } => { + let f1 = apply_precompute(frontier, head)?; + // The boundary subtree root needs the applied head, so it is computed + // here on the committer (rare: once per 2^16 leaves). + let root = f1 + .value() + .expect("just appended at least one leaf") + .root(Some(Level::from(TRACKED_SUBTREE_HEIGHT))); + let f2 = match tail { + Some(tail) => apply_precompute(f1, tail)?, + None => f1, + }; + Ok((f2, Some((index_value, root)))) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -559,6 +845,66 @@ mod tests { "frontier parts mismatch" ); } + + /// The off-committer split: precompute the subtree roots keyed only on the + /// starting leaf *count* (no frontier hashes), then apply the precomputed subtree roots onto the real + /// frontier. Must be byte-identical to the sequential append, proving the + /// precompute can run ahead of the committer using just the note position. + #[test] + fn precompute_then_apply_precompute_matches_sequential( + prefix_len in 0usize..300, + batch in proptest::collection::vec(any::().prop_map(TestNode), 1..300), + ) { + let prefix: Vec = (0..prefix_len as u64).map(TestNode).collect(); + let start = build_frontier::(&prefix); + + // Precompute is given only the count (prefix_len), not `start`. + let precomputed = precompute_subtree_roots(prefix_len as u64, &batch) + .expect("non-empty batch in tests"); + prop_assert_eq!(precomputed.start_position, prefix_len as u64); + + let seq = sequential_append::(start.clone(), &batch); + let applied = apply_precompute(start, precomputed).expect("no overflow in tests"); + + prop_assert_eq!(seq.root(), applied.root(), "root mismatch"); + prop_assert_eq!( + seq.value().map(|f| f.clone().into_parts()), + applied.value().map(|f| f.clone().into_parts()), + "frontier parts mismatch" + ); + } + + /// The precomputed batch-with-subtree path (off-committer precompute + apply_precompute) + /// must produce the same frontier AND the same completed-subtree result as + /// the inline `append_batch_with_subtree`, across the tracked-subtree boundary. + #[test] + fn precompute_subtree_matches_append_batch_with_subtree( + prefix_len in 0u64..300, + batch_len in 1usize..300, + ) { + // Exercise the boundary by starting just below it, so some batches cross it. + use crate::subtree::TRACKED_SUBTREE_HEIGHT; + let boundary = 1u64 << TRACKED_SUBTREE_HEIGHT; + let start_size = boundary - 1 - prefix_len.min(boundary - 1); + let prefix: Vec = (0..start_size).map(TestNode).collect(); + let start = build_frontier::(&prefix); + let batch: Vec = (1000..1000 + batch_len as u64).map(TestNode).collect(); + + let (inline_frontier, inline_completed) = + append_batch_with_subtree::<_, DEPTH>(start.clone(), batch.clone()) + .expect("no overflow in tests"); + + let precomputed = + precompute_append_batch_with_subtree::<_, DEPTH>(start_size, &batch) + .expect("no overflow in tests"); + prop_assert_eq!(precomputed.start_size(), start_size); + let (pre_frontier, pre_completed) = + apply_append_batch_with_subtree(start, precomputed) + .expect("no overflow in tests"); + + prop_assert_eq!(inline_frontier.root(), pre_frontier.root(), "root mismatch"); + prop_assert_eq!(inline_completed, pre_completed, "completed subtree mismatch"); + } } /// Spot-check small exhaustive sizes for off-by-one boundary bugs. @@ -626,6 +972,80 @@ mod tests { ); } + /// A caller-supplied `start_size` near `u64::MAX` must report a clean capacity + /// error rather than wrapping past the `MAX_LEAVES` check (which would build an + /// inconsistent precompute and panic in `apply_precompute`, or panic on overflow in debug + /// builds). + #[test] + fn precompute_start_size_overflow_is_reported() { + let batch = [TestNode(1), TestNode(2)]; + + let is_capacity_error = |result| { + matches!( + result, + Err(BatchFrontierError::Frontier( + FrontierError::MaxDepthExceeded { .. } + )) + ) + }; + + // `start_size + nodes.len()` overflows u64. + assert!( + is_capacity_error(precompute_append_batch_with_subtree::<_, DEPTH>( + u64::MAX - 1, + &batch + )), + "overflowing start_size must report a capacity error" + ); + + // `start_size` past the tree's capacity without overflowing u64. + assert!( + is_capacity_error(precompute_append_batch_with_subtree::<_, DEPTH>( + TreeCapacity::::MAX_LEAVES, + &batch + )), + "start_size at capacity must report a capacity error" + ); + } + + /// Empty input is a recoverable error, not a panic: the precompute represents a + /// non-empty append (its tip is the last leaf). + #[test] + fn precompute_empty_batch_is_reported() { + let empty: [TestNode; 0] = []; + + assert_eq!( + precompute_subtree_roots(0, &empty).err(), + Some(BatchFrontierError::EmptyBatch), + "precompute_subtree_roots rejects an empty slice" + ); + assert_eq!( + precompute_append_batch_with_subtree::<_, DEPTH>(0, &empty).err(), + Some(BatchFrontierError::EmptyBatch), + "precompute_append_batch_with_subtree rejects an empty slice" + ); + } + + /// Applying a precompute onto a frontier of the wrong size is a recoverable + /// error, not a panic, so a stale look-ahead can never crash the process. + #[test] + fn apply_precompute_size_mismatch_is_reported() { + let batch = [TestNode(1), TestNode(2), TestNode(3)]; + // Precompute is keyed on tree size 5. + let precomputed = precompute_subtree_roots(5, &batch).expect("non-empty batch"); + + // Apply it to a frontier of size 2 (a different starting size). + let frontier = build_frontier::(&[TestNode(10), TestNode(11)]); + assert_eq!( + apply_precompute(frontier, precomputed).err(), + Some(BatchFrontierError::PrecomputeStartMismatch { + expected: 5, + found: 2, + }), + "apply_precompute reports a size mismatch instead of panicking" + ); + } + /// Batches that would complete more than one tracked subtree are rejected, /// because the return type can only report one completed subtree. #[test] diff --git a/zebra-chain/src/parallel/commitment_aux.rs b/zebra-chain/src/parallel/commitment_aux.rs new file mode 100644 index 00000000000..a32a82d4aaa --- /dev/null +++ b/zebra-chain/src/parallel/commitment_aux.rs @@ -0,0 +1,88 @@ +//! Cross-client commitment-auxiliary payload types for the verified-commitment-trees +//! fast path (`docs/design/verified-commitment-trees.md` §5). +//! +//! These travel over the Zakura `tree_aux` stream (increment 6) and are also produced +//! and consumed locally by `zebra-state`. They live here in `zebra-chain` so both +//! `zebra-network` and `zebra-state` can use them without a dependency cycle. +//! +//! The final-frontier handoff payload (§5.2) is *not* here: it is embedded in the +//! binary, not carried on the wire, so `tree_aux` is a roots-only stream. + +use std::io; + +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + +use crate::{ + block, orchard, sapling, + serialization::{SerializationError, ZcashDeserialize, ZcashSerialize}, +}; + +/// Per-block verified commitment roots — the essential fast-path payload (design §5.1). +/// +/// One entry per height; each root is the note-commitment treestate root as of +/// end-of-block-`height`. `orchard_root` is the empty/default root below NU5. +/// +/// This payload carries no trust: a recipient re-verifies every root against its own +/// checkpoint-committed block headers (design §6) before the fast path folds it in, so +/// a forwarding/serving node is exactly as trustworthy as an originating one. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct BlockCommitmentRoots { + /// The block height these roots are for. + pub height: block::Height, + /// The Sapling note-commitment tree root as of the end of this block. + pub sapling_root: sapling::tree::Root, + /// The Orchard note-commitment tree root as of the end of this block (empty below NU5). + pub orchard_root: orchard::tree::Root, +} + +impl ZcashSerialize for BlockCommitmentRoots { + fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { + writer.write_u32::(self.height.0)?; + self.sapling_root.zcash_serialize(&mut writer)?; + self.orchard_root.zcash_serialize(&mut writer)?; + Ok(()) + } +} + +impl ZcashDeserialize for BlockCommitmentRoots { + fn zcash_deserialize(mut reader: R) -> Result { + // The height is an unvalidated `u32` here; an out-of-range or wrong height simply + // fails to match any local header during verification (design §6), so it is + // harmless. The Sapling/Orchard root parsers reject malformed root bytes. + let height = block::Height(reader.read_u32::()?); + let sapling_root = sapling::tree::Root::zcash_deserialize(&mut reader)?; + let orchard_root = orchard::tree::Root::zcash_deserialize(&mut reader)?; + Ok(BlockCommitmentRoots { + height, + sapling_root, + orchard_root, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::serialization::ZcashDeserializeInto; + + #[test] + fn block_commitment_roots_round_trip() { + let roots = BlockCommitmentRoots { + height: block::Height(1_687_200), + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }; + + let bytes = roots + .zcash_serialize_to_vec() + .expect("serialization to a vec does not fail"); + let parsed: BlockCommitmentRoots = bytes + .zcash_deserialize_into() + .expect("round-trips back to the original"); + + assert_eq!( + parsed, roots, + "BlockCommitmentRoots round-trips on the wire" + ); + } +} diff --git a/zebra-chain/src/parallel/tree.rs b/zebra-chain/src/parallel/tree.rs index da4981d7cb8..9f4efea18c6 100644 --- a/zebra-chain/src/parallel/tree.rs +++ b/zebra-chain/src/parallel/tree.rs @@ -1,12 +1,17 @@ //! Parallel note commitment tree update methods. -use std::sync::Arc; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; use thiserror::Error; use crate::{ - block::Block, - orchard, sapling, sprout, + block::{self, Block}, + orchard, + parallel::batch_frontier::PARALLEL_HASH_THRESHOLD, + sapling, sprout, subtree::{NoteCommitmentSubtree, NoteCommitmentSubtreeIndex}, }; @@ -57,6 +62,24 @@ impl NoteCommitmentTrees { pub fn update_trees_parallel( &mut self, block: &Arc, + ) -> Result<(), NoteCommitmentTreeError> { + self.update_trees_parallel_with(block, None) + } + + /// Like [`update_trees_parallel`](Self::update_trees_parallel), but applies a + /// [`BlockNotePrecompute`] computed ahead of time off the committer when one is + /// supplied and still matches the current tree sizes. + /// + /// The Sapling/Orchard per-leaf Merkle hashing is the dominant cost of + /// committing a shielded block; precomputing it concurrently (keyed only on the + /// note position) lets the committer do just the cheap apply the precomputed subtree roots. A `None` or + /// size-mismatched precompute transparently falls back to hashing inline, so the + /// result is always identical to the plain update. + #[allow(clippy::unwrap_in_result)] + pub fn update_trees_parallel_with( + &mut self, + block: &Arc, + precompute: Option, ) -> Result<(), NoteCommitmentTreeError> { let block = block.clone(); let height = block @@ -75,6 +98,17 @@ impl NoteCommitmentTrees { let sapling_note_commitments: Vec<_> = block.sapling_note_commitments().cloned().collect(); let orchard_note_commitments: Vec<_> = block.orchard_note_commitments().cloned().collect(); + // Only use the precompute if it was computed for this exact block. A + // precompute is otherwise keyed only by starting tree size, so without this + // check one accidentally paired with a different block of the same starting + // size would apply the wrong leaves and silently produce a wrong root. A + // mismatch (or `None`) falls back to inline hashing, which is correct, just + // slower — so this can only cost speed, never correctness. + let (sapling_precompute, orchard_precompute) = match precompute { + Some(p) if p.block_hash == block.hash() => (p.sapling, p.orchard), + _ => (None, None), + }; + let mut sprout_result = None; let mut sapling_result = None; let mut orchard_result = None; @@ -91,18 +125,20 @@ impl NoteCommitmentTrees { if !sapling_note_commitments.is_empty() { scope.spawn_fifo(|_scope| { - sapling_result = Some(Self::update_sapling_note_commitment_tree( + sapling_result = Some(Self::update_sapling_note_commitment_tree_with( sapling, sapling_note_commitments, + sapling_precompute, )); }); } if !orchard_note_commitments.is_empty() { scope.spawn_fifo(|_scope| { - orchard_result = Some(Self::update_orchard_note_commitment_tree( + orchard_result = Some(Self::update_orchard_note_commitment_tree_with( orchard, orchard_note_commitments, + orchard_precompute, )); }); } @@ -212,4 +248,265 @@ impl NoteCommitmentTrees { Ok((orchard, subtree_root)) } + + /// Like [`update_sapling_note_commitment_tree`](Self::update_sapling_note_commitment_tree), + /// but applies `precompute` (off-committer parallel hashing) when present and its + /// `start_size` still matches the tree; otherwise hashes inline. Identical result. + #[allow(clippy::unwrap_in_result)] + pub(crate) fn update_sapling_note_commitment_tree_with( + mut sapling: Arc, + sapling_note_commitments: Vec, + precompute: Option, + ) -> Result< + ( + Arc, + Option<(NoteCommitmentSubtreeIndex, sapling_crypto::Node)>, + ), + NoteCommitmentTreeError, + > { + let sapling_nct = Arc::make_mut(&mut sapling); + + let subtree_root = match precompute { + Some(pre) if pre.start_size() == sapling_nct.count() => { + sapling_nct.apply_precomputed_append(pre)? + } + _ => sapling_nct.append_batch(&sapling_note_commitments)?, + }; + + // Re-calculate and cache the tree root. + let _ = sapling_nct.root(); + + Ok((sapling, subtree_root)) + } + + /// Like [`update_orchard_note_commitment_tree`](Self::update_orchard_note_commitment_tree), + /// but applies `precompute` when present and size-matched; otherwise inline. Identical result. + #[allow(clippy::unwrap_in_result)] + pub(crate) fn update_orchard_note_commitment_tree_with( + mut orchard: Arc, + orchard_note_commitments: Vec, + precompute: Option, + ) -> Result< + ( + Arc, + Option<(NoteCommitmentSubtreeIndex, orchard::tree::Node)>, + ), + NoteCommitmentTreeError, + > { + let orchard_nct = Arc::make_mut(&mut orchard); + + let subtree_root = match precompute { + Some(pre) if pre.start_size() == orchard_nct.count() => { + orchard_nct.apply_precomputed_append(pre)? + } + _ => orchard_nct.append_batch(&orchard_note_commitments)?, + }; + + // Re-calculate and cache the tree root. + let _ = orchard_nct.root(); + + Ok((orchard, subtree_root)) + } +} + +/// The off-committer precomputed parallel-append work for one block's Sapling and +/// Orchard note commitments, produced by [`BlockNotePrecompute::compute`] and applied +/// via [`NoteCommitmentTrees::update_trees_parallel_with`]. +#[derive(Clone, Debug)] +pub struct BlockNotePrecompute { + /// The hash of the block this precompute was computed for. The committer + /// applies the precompute only to this exact block, so a precompute that was + /// accidentally paired with a different block (even one with the same starting + /// tree size) is rejected instead of applying the wrong leaves. See + /// [`NoteCommitmentTrees::update_trees_parallel_with`]. + pub(crate) block_hash: block::Hash, + /// Precomputed Sapling append, if the block has Sapling outputs. + pub(crate) sapling: Option, + /// Precomputed Orchard append, if the block has Orchard actions. + pub(crate) orchard: Option, +} + +impl BlockNotePrecompute { + /// Precomputes the Sapling and Orchard per-leaf Merkle hashing for `block`, + /// given the tree sizes (cumulative note counts) the block will commit at. + /// + /// Runs off the committer, concurrently across blocks. The committer then only + /// applies the precomputed subtree roots. `sapling_start` / `orchard_start` are the respective tree `count`s + /// immediately before this block; the committer re-checks them and falls back to + /// inline hashing on any mismatch. Pools with no notes (or a precompute error) + /// are left `None`, also falling back to inline. + /// + /// The Sapling and Orchard precomputes run concurrently via [`rayon::join`], + /// mirroring the per-pool parallelism of [`NoteCommitmentTrees::update_trees_parallel`]: + /// each pool's hashing is already internally parallel, and the join lets the two + /// pools overlap. For small blocks (both pools below [`PARALLEL_HASH_THRESHOLD`]) + /// they are computed sequentially, since there is too little hashing to repay the + /// cross-pool join. + /// + /// # Cancellation + /// + /// This is started speculatively for the *next* block while the *current* block + /// is still committing, so a failed or invalid current block leaves the work + /// unwanted (the committer drops the receiver). `cancel` lets the writer abort it: + /// the flag is checked once up front and again at the start of each pool's hashing, + /// so a cancel that lands before a pool starts skips that pool's work. (Once a + /// pool's hashing is under way it runs to completion — the bound is best-effort, + /// not interrupt-in-the-middle.) A cancelled call returns an empty precompute, + /// which the committer treats like any other miss and hashes inline. + pub fn compute( + sapling_start: u64, + orchard_start: u64, + block: &Block, + cancel: &AtomicBool, + ) -> Self { + let block_hash = block.hash(); + + if cancel.load(Ordering::Relaxed) { + return Self { + block_hash, + sapling: None, + orchard: None, + }; + } + + let sapling_notes: Vec<_> = block.sapling_note_commitments().cloned().collect(); + let orchard_notes: Vec<_> = block.orchard_note_commitments().cloned().collect(); + + let sapling_fn = || { + if cancel.load(Ordering::Relaxed) || sapling_notes.is_empty() { + return None; + } + sapling::tree::NoteCommitmentTree::precompute_append(sapling_start, &sapling_notes).ok() + }; + let orchard_fn = || { + if cancel.load(Ordering::Relaxed) || orchard_notes.is_empty() { + return None; + } + orchard::tree::NoteCommitmentTree::precompute_append(orchard_start, &orchard_notes).ok() + }; + + let overlap_pools = sapling_notes.len() >= PARALLEL_HASH_THRESHOLD + || orchard_notes.len() >= PARALLEL_HASH_THRESHOLD; + let (sapling, orchard) = if overlap_pools { + rayon::join(sapling_fn, orchard_fn) + } else { + (sapling_fn(), orchard_fn()) + }; + + Self { + block_hash, + sapling, + orchard, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::serialization::ZcashDeserialize; + + /// A precompute started speculatively for the next block is cancellable: when + /// the writer trips the flag (because the current block's commit failed and the + /// child will be discarded), `compute` returns an empty precompute instead of + /// hashing the block. Uses a real NU5 block with Sapling notes; the flag check + /// is identical for the Orchard pool. + #[test] + fn block_note_precompute_respects_cancellation() { + let _init_guard = zebra_test::init(); + + let block = + Block::zcash_deserialize(zebra_test::vectors::BLOCK_MAINNET_1687106_BYTES.as_slice()) + .expect("hard-coded NU5 block vector deserializes"); + + // Precondition: the block exercises the Sapling pool. + assert!( + block.sapling_note_commitments().next().is_some(), + "test block must have Sapling notes" + ); + + // Not cancelled: the Sapling pool is precomputed. + let live = BlockNotePrecompute::compute(0, 0, &block, &AtomicBool::new(false)); + assert!( + live.sapling.is_some(), + "a live precompute hashes the populated pool" + ); + + // Cancelled before it runs: no hashing, an empty precompute the committer + // treats as a miss (hashing inline instead). + let cancelled = BlockNotePrecompute::compute(0, 0, &block, &AtomicBool::new(true)); + assert!( + cancelled.sapling.is_none() && cancelled.orchard.is_none(), + "a cancelled precompute does no work" + ); + } + + /// A precompute is bound to the block it was computed for: applying one built for + /// a *different* block — even with the same starting tree size, which the + /// size-only guard would have accepted — must be rejected and fall back to inline + /// hashing, so it can never silently graft the wrong block's leaves. + #[test] + fn precompute_is_bound_to_its_block() { + let _init_guard = zebra_test::init(); + + // Two distinct blocks that both add Sapling notes. + let candidates: [&[u8]; 6] = [ + zebra_test::vectors::BLOCK_MAINNET_1687106_BYTES.as_slice(), + zebra_test::vectors::BLOCK_MAINNET_1687107_BYTES.as_slice(), + zebra_test::vectors::BLOCK_MAINNET_1687108_BYTES.as_slice(), + zebra_test::vectors::BLOCK_MAINNET_1687113_BYTES.as_slice(), + zebra_test::vectors::BLOCK_MAINNET_1687118_BYTES.as_slice(), + zebra_test::vectors::BLOCK_MAINNET_1687121_BYTES.as_slice(), + ]; + let sapling_blocks: Vec = candidates + .iter() + .map(|bytes| Block::zcash_deserialize(*bytes).expect("block vector deserializes")) + .filter(|block| block.sapling_note_commitments().next().is_some()) + .collect(); + assert!( + sapling_blocks.len() >= 2, + "need two distinct Sapling blocks for this test" + ); + + let block_a = Arc::new(sapling_blocks[0].clone()); + let block_b = sapling_blocks[1].clone(); + assert_ne!(block_a.hash(), block_b.hash(), "blocks must differ"); + + // The correct trees for committing block A onto the genesis trees. + let mut correct = NoteCommitmentTrees::default(); + correct + .update_trees_parallel(&block_a) + .expect("appending block A's notes succeeds"); + + // A precompute built for block B at the same starting tree size (0) as A: its + // `start_size` matches A's tree, so the size-only guard would have applied B's + // leaves. The block-hash binding must reject it instead. + let pre_b = BlockNotePrecompute::compute(0, 0, &block_b, &AtomicBool::new(false)); + assert!( + pre_b.sapling.is_some(), + "block B exercises the Sapling pool" + ); + + let mut mismatched = NoteCommitmentTrees::default(); + mismatched + .update_trees_parallel_with(&block_a, Some(pre_b)) + .expect("update succeeds"); + assert_eq!( + mismatched.sapling.root(), + correct.sapling.root(), + "a precompute for a different block must be rejected, not grafted" + ); + + // The correctly-bound precompute for A is still applied and matches. + let pre_a = BlockNotePrecompute::compute(0, 0, &block_a, &AtomicBool::new(false)); + let mut matched = NoteCommitmentTrees::default(); + matched + .update_trees_parallel_with(&block_a, Some(pre_a)) + .expect("update succeeds"); + assert_eq!( + matched.sapling.root(), + correct.sapling.root(), + "a precompute bound to this block is applied" + ); + } } diff --git a/zebra-chain/src/primitives/zcash_primitives.rs b/zebra-chain/src/primitives/zcash_primitives.rs index 524a4c34eb3..d2ee89b849f 100644 --- a/zebra-chain/src/primitives/zcash_primitives.rs +++ b/zebra-chain/src/primitives/zcash_primitives.rs @@ -513,6 +513,13 @@ fn sighash_inner( /// /// [ZIP-244]: https://zips.z.cash/zip-0244 pub(crate) fn auth_digest(tx: &Transaction) -> AuthDigest { + // Compute the v5 ZIP-244 authorizing-data digest natively, avoiding the + // `librustzcash` reparse (see `crate::transaction::zip244`). Other versions + // (e.g. v6) fall back to `librustzcash`. + if let Some(auth_digest) = crate::transaction::zip244::auth_digest(tx) { + return auth_digest; + } + let nu = tx.network_upgrade().expect("V5 tx has a network upgrade"); AuthDigest( @@ -532,6 +539,37 @@ pub(crate) fn auth_digest(tx: &Transaction) -> AuthDigest { /// /// If passed a pre-v5 transaction. pub(crate) fn txid_and_auth_digest(tx: &Transaction) -> (Hash, AuthDigest) { + // Compute the v5 ZIP-244 txid and authorizing-data digest natively, avoiding + // the `librustzcash` reparse (see `crate::transaction::zip244`). Other + // versions (e.g. v6) fall back to `librustzcash`. + if let Some(result) = crate::transaction::zip244::txid_and_auth_digest(tx) { + return result; + } + + let nu = tx.network_upgrade().expect("V5 tx has a network upgrade"); + + let tx = tx + .to_librustzcash(nu) + .expect("V5 tx is convertible to its `zcash_params` equivalent"); + + let txid = Hash(*tx.txid().as_ref()); + let auth_digest = AuthDigest( + tx.auth_commitment() + .as_ref() + .try_into() + .expect("digest has the correct size"), + ); + + (txid, auth_digest) +} + +/// Computes the txid and ZIP-244 authorizing-data digest of a v5+ transaction +/// strictly via the `librustzcash` conversion, bypassing the native ZIP-244 +/// path. Used only as the differential oracle for the native implementation in +/// `crate::transaction::zip244` (see the `native_zip244_matches_librustzcash` +/// property test). +#[cfg(test)] +pub(crate) fn txid_and_auth_digest_via_librustzcash(tx: &Transaction) -> (Hash, AuthDigest) { let nu = tx.network_upgrade().expect("V5 tx has a network upgrade"); let tx = tx diff --git a/zebra-chain/src/sapling/arbitrary.rs b/zebra-chain/src/sapling/arbitrary.rs index 7323307e8d5..14477e1b85f 100644 --- a/zebra-chain/src/sapling/arbitrary.rs +++ b/zebra-chain/src/sapling/arbitrary.rs @@ -85,7 +85,9 @@ impl Arbitrary for Output { cv: ExtendedPoint::generator().into(), cm_u: sapling_crypto::note::ExtractedNoteCommitment::from_bytes(&[0u8; 32]) .unwrap(), - ephemeral_key: keys::EphemeralPublicKey(ExtendedPoint::generator().into()), + ephemeral_key: keys::EphemeralPublicKey( + jubjub::AffinePoint::from(ExtendedPoint::generator()).to_bytes(), + ), enc_ciphertext, out_ciphertext, zkproof, diff --git a/zebra-chain/src/sapling/commitment.rs b/zebra-chain/src/sapling/commitment.rs index edf1fddc8b9..c747b170025 100644 --- a/zebra-chain/src/sapling/commitment.rs +++ b/zebra-chain/src/sapling/commitment.rs @@ -4,7 +4,7 @@ use std::io; use hex::{FromHex, FromHexError, ToHex}; -use crate::serialization::{serde_helpers, SerializationError, ZcashDeserialize, ZcashSerialize}; +use crate::serialization::{SerializationError, ZcashDeserialize, ZcashSerialize}; #[cfg(test)] mod test_vectors; @@ -16,28 +16,82 @@ mod test_vectors; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct CommitmentRandomness(jubjub::Fr); -/// A wrapper for the `sapling_crypto::value::ValueCommitment` type. +/// A Sapling value commitment, stored as its canonical 32-byte compressed +/// encoding. /// -/// We need the wrapper to derive Serialize, Deserialize and Equality. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct ValueCommitment( - #[serde(with = "serde_helpers::ValueCommitment")] pub sapling_crypto::value::ValueCommitment, -); - -impl PartialEq for ValueCommitment { - fn eq(&self, other: &Self) -> bool { - self.0.as_inner() == other.0.as_inner() - } -} -impl Eq for ValueCommitment {} +/// The commitment is a Jubjub curve point. Recovering the point from its +/// encoding requires a field square root (point decompression), which is +/// expensive, and the note-commitment tree uses the note commitment `cm_u`, not +/// `cv`, so the point is decompressed lazily via [`ValueCommitment::commitment`] +/// rather than eagerly at deserialization. This keeps the dominant per-block CPU +/// cost of checkpoint sync (Jubjub point decompression) off the hot path. +/// +/// # Consensus +/// +/// The not-small-order check that this type used to perform at deserialization +/// is deferred, but still enforced for every untrusted transaction. The +/// checkpoint verifier trusts block hashes and does not need it. The semantic +/// verifier and the mempool convert every transaction via `to_librustzcash` +/// (`CachedFfiTransaction::new`), and librustzcash enforces the rule at *read*: +/// `zcash_primitives`'s `read_value_commitment` uses +/// `ValueCommitment::from_bytes_not_small_order`, so a small-order `cv` makes the +/// conversion fail and the transaction is rejected. Validated by +/// `sapling_small_order_cv_epk_deferred_but_caught_by_librustzcash` in +/// `transaction/tests/vectors.rs`. +#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)] +pub struct ValueCommitment(pub(crate) [u8; 32]); impl ValueCommitment { + /// Decompresses and returns the underlying `sapling_crypto` value + /// commitment, or `None` if the stored bytes are not a canonical, + /// non-small-order Jubjub point. + /// + /// This performs the point decompression that deserialization defers, so it + /// is fallible by design: the encoding is validated only where the point is + /// used, and callers must handle an invalid commitment rather than assume it + /// is valid. Consensus validation of the encoding happens on the semantic + /// path via [`crate::transaction::Transaction::sapling_point_encodings_are_valid`] + /// and `to_librustzcash`; the checkpoint verifier trusts block hashes and + /// never calls this. + pub fn commitment(&self) -> Option { + sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&self.0).into_option() + } + + /// Return the canonical 32-byte (little-endian) compressed encoding. + pub fn to_bytes(&self) -> [u8; 32] { + self.0 + } + + /// Returns true if the stored encoding is a canonical, non-small-order + /// Jubjub point, i.e. a valid value commitment per the consensus rules. + /// + /// This performs the point decompression that deserialization defers; it is + /// called by the semantic verifier (not the checkpoint verifier) to enforce + /// the not-small-order rule on untrusted transactions. + /// + /// # Consensus equivalence + /// + /// This MUST accept exactly the encodings that librustzcash accepts for a + /// `cv` on the verification path. If it diverged, Zebra and the rest of the + /// network would disagree on transaction validity — a chain split, not a + /// local bug. `zcash_primitives`'s `read_value_commitment` rejects a `cv` + /// unless `sapling_crypto::value::ValueCommitment::from_bytes_not_small_order` + /// returns a point, so this calls that exact function. Do not reimplement it + /// in terms of a different decoder. The equivalence is pinned by + /// `sapling_point_checks_match_librustzcash_predicates` in + /// `transaction/tests/vectors.rs`. + pub fn is_valid_not_small_order(&self) -> bool { + bool::from( + sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&self.0).is_some(), + ) + } + /// Return the hash bytes in big-endian byte-order suitable for printing out byte by byte. /// /// Zebra displays commitment value in big-endian byte-order, /// following the convention set by zcashd. pub fn bytes_in_display_order(&self) -> [u8; 32] { - let mut reversed_bytes = self.0.to_bytes(); + let mut reversed_bytes = self.0; reversed_bytes.reverse(); reversed_bytes } @@ -75,14 +129,7 @@ impl From for ValueCommitment { /// /// Panics if the given point does not correspond to a valid ValueCommitment. fn from(extended_point: jubjub::ExtendedPoint) -> Self { - let bytes = jubjub::AffinePoint::from(extended_point).to_bytes(); - - let value_commitment = - sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&bytes) - .into_option() - .expect("invalid ValueCommitment bytes"); - - ValueCommitment(value_commitment) + ValueCommitment(jubjub::AffinePoint::from(extended_point).to_bytes()) } } @@ -99,15 +146,19 @@ impl ZcashDeserialize for sapling_crypto::value::ValueCommitment { } impl ZcashDeserialize for ValueCommitment { - fn zcash_deserialize(reader: R) -> Result { - let value_commitment = sapling_crypto::value::ValueCommitment::zcash_deserialize(reader)?; - Ok(Self(value_commitment)) + fn zcash_deserialize(mut reader: R) -> Result { + // Store the canonical encoding without decompressing the Jubjub point. + // The point (and its non-small-order check) is recovered lazily in + // `ValueCommitment::commitment`, only where the point is actually needed. + let mut bytes = [0u8; 32]; + reader.read_exact(&mut bytes)?; + Ok(Self(bytes)) } } impl ZcashSerialize for ValueCommitment { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { - writer.write_all(&self.0.to_bytes())?; + writer.write_all(&self.0)?; Ok(()) } } diff --git a/zebra-chain/src/sapling/keys.rs b/zebra-chain/src/sapling/keys.rs index 58d9208def9..da661c5a1e2 100644 --- a/zebra-chain/src/sapling/keys.rs +++ b/zebra-chain/src/sapling/keys.rs @@ -17,9 +17,7 @@ use rand_core::{CryptoRng, RngCore}; use crate::{ error::{AddressError, RandError}, primitives::redjubjub::SpendAuth, - serialization::{ - serde_helpers, ReadZcashExt, SerializationError, ZcashDeserialize, ZcashSerialize, - }, + serialization::{ReadZcashExt, SerializationError, ZcashDeserialize, ZcashSerialize}, }; #[cfg(test)] @@ -248,64 +246,90 @@ impl PartialEq<[u8; 32]> for TransmissionKey { /// /// [1]: https://zips.z.cash/protocol/protocol.pdf#outputdesc /// [2]: https://zips.z.cash/protocol/protocol.pdf#concretesaplingkeyagreement -#[derive(Copy, Clone, Deserialize, PartialEq, Serialize)] -pub struct EphemeralPublicKey( - #[serde(with = "serde_helpers::AffinePoint")] pub(crate) jubjub::AffinePoint, -); +/// A Sapling ephemeral public key, stored as its canonical 32-byte encoding. +/// +/// The key is a Jubjub curve point, but the validator only ever needs its bytes +/// (for the txid digest and serialization); the point itself is needed only for +/// wallet trial-decryption. So the point is not decompressed at deserialization, +/// keeping the Jubjub point decompression (a field square root) off the +/// checkpoint-sync hot path, where every Sapling output carries one. +/// +/// # Consensus +/// +/// The not-small-order check that this type used to perform at deserialization +/// is deferred, but still enforced for every untrusted transaction. The +/// checkpoint verifier trusts block hashes and does not need it. The semantic +/// verifier and the mempool convert every transaction via `to_librustzcash` +/// (`CachedFfiTransaction::new`) and verify the Sapling bundle, and +/// librustzcash enforces the rule in `SaplingVerificationContext::check_output` +/// (sapling-crypto `verifier.rs`, `epk.is_small_order()`). Validated by +/// `sapling_small_order_cv_epk_deferred_but_caught_by_librustzcash` in +/// `transaction/tests/vectors.rs`. +#[derive(Copy, Clone, Deserialize, PartialEq, Eq, Serialize)] +pub struct EphemeralPublicKey(pub(crate) [u8; 32]); + +impl EphemeralPublicKey { + /// Returns true if the stored encoding is a canonical, non-small-order + /// Jubjub point, i.e. a valid ephemeral public key per the consensus rules. + /// + /// This performs the point decompression that deserialization defers; it is + /// called by the semantic verifier (not the checkpoint verifier) to enforce + /// the not-small-order rule on untrusted transactions. + /// + /// # Consensus equivalence + /// + /// This MUST accept exactly the encodings that librustzcash accepts for an + /// `epk` on the verification path. If it diverged, Zebra and the rest of the + /// network would disagree on transaction validity — a chain split, not a + /// local bug. librustzcash decodes `epk` with `jubjub::ExtendedPoint::from_bytes` + /// (sapling-crypto `verifier/batch.rs`) and rejects it in + /// `SaplingVerificationContext::check_output` when `epk.is_small_order()` + /// (sapling-crypto `verifier.rs`). Decoding as an `AffinePoint` here is + /// equivalent — both reject the same non-canonical/off-curve encodings and + /// agree on `is_small_order` — and that equivalence is pinned by + /// `sapling_point_checks_match_librustzcash_predicates` in + /// `transaction/tests/vectors.rs`. + pub fn is_valid_not_small_order(&self) -> bool { + match jubjub::AffinePoint::from_bytes(self.0).into_option() { + Some(point) => !bool::from(point.is_small_order()), + None => false, + } + } +} impl fmt::Debug for EphemeralPublicKey { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("EphemeralPublicKey") - .field("u", &hex::encode(self.0.get_u().to_bytes())) - .field("v", &hex::encode(self.0.get_v().to_bytes())) + .field("epk", &hex::encode(self.0)) .finish() } } -impl Eq for EphemeralPublicKey {} - impl From for [u8; 32] { fn from(nk: EphemeralPublicKey) -> [u8; 32] { - nk.0.to_bytes() + nk.0 } } impl From<&EphemeralPublicKey> for [u8; 32] { fn from(nk: &EphemeralPublicKey) -> [u8; 32] { - nk.0.to_bytes() + nk.0 } } impl PartialEq<[u8; 32]> for EphemeralPublicKey { fn eq(&self, other: &[u8; 32]) -> bool { - &self.0.to_bytes() == other + &self.0 == other } } impl TryFrom<[u8; 32]> for EphemeralPublicKey { type Error = &'static str; - /// Read an EphemeralPublicKey from a byte array. - /// - /// Returns an error if the key is non-canonical, or [it is of small order][1]. - /// - /// # Consensus - /// - /// > Check that a Output description's cv and epk are not of small order, - /// > i.e. \[h_J\]cv MUST NOT be 𝒪_J and \[h_J\]epk MUST NOT be 𝒪_J. - /// - /// [1]: https://zips.z.cash/protocol/protocol.pdf#outputdesc + /// Store an EphemeralPublicKey from a byte array, deferring point + /// decompression and the not-small-order check (see the type docs). fn try_from(bytes: [u8; 32]) -> Result { - let possible_point = jubjub::AffinePoint::from_bytes(bytes); - - if possible_point.is_none().into() { - return Err("Invalid jubjub::AffinePoint value for Sapling EphemeralPublicKey"); - } - if possible_point.unwrap().is_small_order().into() { - Err("jubjub::AffinePoint value for Sapling EphemeralPublicKey point is of small order") - } else { - Ok(Self(possible_point.unwrap())) - } + Ok(Self(bytes)) } } diff --git a/zebra-chain/src/sapling/output.rs b/zebra-chain/src/sapling/output.rs index a654386bbeb..e8344ccfffd 100644 --- a/zebra-chain/src/sapling/output.rs +++ b/zebra-chain/src/sapling/output.rs @@ -124,7 +124,7 @@ impl OutputInTransactionV4 { impl ZcashSerialize for OutputInTransactionV4 { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { let output = self.0.clone(); - writer.write_all(&output.cv.0.to_bytes())?; + writer.write_all(&output.cv.0)?; writer.write_all(&output.cm_u.to_bytes())?; output.ephemeral_key.zcash_serialize(&mut writer)?; output.enc_ciphertext.zcash_serialize(&mut writer)?; @@ -151,9 +151,7 @@ impl ZcashDeserialize for OutputInTransactionV4 { // Type is `ValueCommit^{Sapling}.Output`, i.e. J // https://zips.z.cash/protocol/protocol.pdf#abstractcommit // See [`sapling_crypto::value::ValueCommitment::zcash_deserialize`]. - cv: commitment::ValueCommitment( - sapling_crypto::value::ValueCommitment::zcash_deserialize(&mut reader)?, - ), + cv: commitment::ValueCommitment::zcash_deserialize(&mut reader)?, // Type is `B^{[ℓ_{Sapling}_{Merkle}]}`, i.e. 32 bytes. // However, the consensus rule above restricts it even more. // See [`sapling_crypto::note::ExtractedNoteCommitment::zcash_deserialize`]. @@ -190,7 +188,7 @@ impl ZcashDeserialize for OutputInTransactionV4 { impl ZcashSerialize for OutputPrefixInTransactionV5 { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { - writer.write_all(&self.cv.0.to_bytes())?; + writer.write_all(&self.cv.0)?; writer.write_all(&self.cm_u.to_bytes())?; self.ephemeral_key.zcash_serialize(&mut writer)?; self.enc_ciphertext.zcash_serialize(&mut writer)?; @@ -216,9 +214,7 @@ impl ZcashDeserialize for OutputPrefixInTransactionV5 { // Type is `ValueCommit^{Sapling}.Output`, i.e. J // https://zips.z.cash/protocol/protocol.pdf#abstractcommit // See [`sapling_crypto::value::ValueCommitment::zcash_deserialize`]. - cv: commitment::ValueCommitment( - sapling_crypto::value::ValueCommitment::zcash_deserialize(&mut reader)?, - ), + cv: commitment::ValueCommitment::zcash_deserialize(&mut reader)?, // Type is `B^{[ℓ_{Sapling}_{Merkle}]}`, i.e. 32 bytes. // However, the consensus rule above restricts it even more. // See [`sapling_crypto::note::ExtractedNoteCommitment::zcash_deserialize`]. diff --git a/zebra-chain/src/sapling/shielded_data.rs b/zebra-chain/src/sapling/shielded_data.rs index 7a08001fe53..d4817f66b2a 100644 --- a/zebra-chain/src/sapling/shielded_data.rs +++ b/zebra-chain/src/sapling/shielded_data.rs @@ -234,6 +234,24 @@ where self.transfers.outputs() } + /// Returns true if every value commitment and ephemeral public key in this + /// bundle is a canonical, non-small-order Jubjub point. + /// + /// Deserialization stores these points as raw bytes and defers the + /// not-small-order check to keep point decompression off the checkpoint-sync + /// hot path. This method performs that deferred check; the semantic verifier + /// calls it for untrusted transactions, while the checkpoint verifier (which + /// trusts block hashes) does not. Spend `rk` is validated separately at + /// deserialization. + pub fn point_encodings_are_valid(&self) -> bool { + self.spends() + .all(|spend| spend.cv.is_valid_not_small_order()) + && self.outputs().all(|output| { + output.cv.is_valid_not_small_order() + && output.ephemeral_key.is_valid_not_small_order() + }) + } + /// Provide the shared anchor for this transaction, if present. /// /// The shared anchor is only present if: @@ -279,15 +297,29 @@ where /// descriptions of the transaction, and the balancing value. /// /// - pub fn binding_verification_key(&self) -> redjubjub::VerificationKeyBytes { - let cv_old: sapling_crypto::value::CommitmentSum = - self.spends().map(|spend| spend.cv.0.clone()).sum(); - let cv_new: sapling_crypto::value::CommitmentSum = - self.outputs().map(|output| output.cv.0.clone()).sum(); - - (cv_old - cv_new) - .into_bvk(self.value_balance.zatoshis()) - .into() + /// Returns `None` if any value commitment is not a canonical, non-small-order + /// point. The encodings are validated on the semantic verification path + /// (`Transaction::sapling_point_encodings_are_valid`), so a `None` here means + /// the caller is working with an unvalidated transaction. + pub fn binding_verification_key(&self) -> Option> { + let cv_old: sapling_crypto::value::CommitmentSum = self + .spends() + .map(|spend| spend.cv.commitment()) + .collect::>>()? + .into_iter() + .sum(); + let cv_new: sapling_crypto::value::CommitmentSum = self + .outputs() + .map(|output| output.cv.commitment()) + .collect::>>()? + .into_iter() + .sum(); + + Some( + (cv_old - cv_new) + .into_bvk(self.value_balance.zatoshis()) + .into(), + ) } } diff --git a/zebra-chain/src/sapling/spend.rs b/zebra-chain/src/sapling/spend.rs index 068df147076..c6afd9a11e1 100644 --- a/zebra-chain/src/sapling/spend.rs +++ b/zebra-chain/src/sapling/spend.rs @@ -159,7 +159,7 @@ impl Spend { impl ZcashSerialize for Spend { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { - writer.write_all(&self.cv.0.to_bytes())?; + writer.write_all(&self.cv.0)?; self.per_spend_anchor.zcash_serialize(&mut writer)?; writer.write_32_bytes(&self.nullifier.into())?; writer.write_all(&<[u8; 32]>::from(self.rk.clone())[..])?; @@ -203,9 +203,7 @@ impl ZcashDeserialize for Spend { // Type is `ValueCommit^{Sapling}.Output`, i.e. J // https://zips.z.cash/protocol/protocol.pdf#abstractcommit // See [`sapling_crypto::value::ValueCommitment::::zcash_deserialize`]. - cv: commitment::ValueCommitment( - sapling_crypto::value::ValueCommitment::zcash_deserialize(&mut reader)?, - ), + cv: commitment::ValueCommitment::zcash_deserialize(&mut reader)?, // Type is `B^{[ℓ_{Sapling}_{Merkle}]}`, i.e. 32 bytes. // But as mentioned above, we validate it further as an integer. per_spend_anchor: (&mut reader).zcash_deserialize_into()?, @@ -240,7 +238,7 @@ impl ZcashDeserialize for Spend { impl ZcashSerialize for SpendPrefixInTransactionV5 { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { - writer.write_all(&self.cv.0.to_bytes())?; + writer.write_all(&self.cv.0)?; writer.write_32_bytes(&self.nullifier.into())?; writer.write_all(&<[u8; 32]>::from(self.rk.clone())[..])?; Ok(()) diff --git a/zebra-chain/src/sapling/tree.rs b/zebra-chain/src/sapling/tree.rs index 7316fdd108e..edbbbacfb08 100644 --- a/zebra-chain/src/sapling/tree.rs +++ b/zebra-chain/src/sapling/tree.rs @@ -23,12 +23,30 @@ use incrementalmerkletree::frontier::{Frontier, NonEmptyFrontier}; use thiserror::Error; use crate::{ + parallel::batch_frontier::{ + apply_append_batch_with_subtree, precompute_append_batch_with_subtree, BatchFrontierError, + PrecomputedSubtreeAppend, + }, serialization::{ serde_helpers, ReadZcashExt, SerializationError, ZcashDeserialize, ZcashSerialize, }, subtree::{NoteCommitmentSubtreeIndex, TRACKED_SUBTREE_HEIGHT}, }; +/// The precomputed parallel-append work for one block's Sapling note commitments, +/// produced off the committer by [`NoteCommitmentTree::precompute_append`] and +/// applied with [`NoteCommitmentTree::apply_precomputed_append`]. +#[derive(Clone, Debug)] +pub(crate) struct PrecomputedAppendBatch(PrecomputedSubtreeAppend); + +impl PrecomputedAppendBatch { + /// The tree size (leaf [`count`](NoteCommitmentTree::count)) this precompute + /// must be applied to. + pub(crate) fn start_size(&self) -> u64 { + self.0.start_size() + } +} + pub mod legacy; use legacy::LegacyNoteCommitmentTree; @@ -145,6 +163,25 @@ impl ZcashDeserialize for Root { pub enum NoteCommitmentTreeError { #[error("The note commitment tree is full")] FullTree, + + #[error("Invalid precompute: empty batch, stale start size, or multi-subtree batch")] + InvalidPrecompute, +} + +impl From for NoteCommitmentTreeError { + fn from(error: BatchFrontierError) -> Self { + match error { + // A capacity overflow is the tree being full. + BatchFrontierError::Frontier(_) => NoteCommitmentTreeError::FullTree, + // The remaining variants are caller-supplied precompute misuse, which + // is reported as a recoverable error rather than panicking. + BatchFrontierError::BatchSpansMultipleSubtrees + | BatchFrontierError::EmptyBatch + | BatchFrontierError::PrecomputeStartMismatch { .. } => { + NoteCommitmentTreeError::InvalidPrecompute + } + } + } } /// Sapling Incremental Note Commitment Tree. @@ -261,6 +298,73 @@ impl NoteCommitmentTree { })) } + /// Precomputes the parallel-append work for `note_commitments` against a tree + /// of size `start_size`, off the committer's critical path. + /// + /// This does the per-leaf Merkle hashing (the dominant cost of committing a + /// shielded block) using only the starting leaf *count*, so it can run + /// concurrently ahead of the committer. Apply with + /// [`Self::apply_precomputed_append`] on a tree whose [`count`](Self::count) + /// equals `start_size`. Returns [`NoteCommitmentTreeError::InvalidPrecompute`] + /// for an empty `note_commitments`, rather than panicking. + pub(crate) fn precompute_append( + start_size: u64, + note_commitments: &[NoteCommitmentUpdate], + ) -> Result { + let nodes: Vec = note_commitments + .iter() + .map(sapling_crypto::Node::from_cmu) + .collect(); + + let inner = precompute_append_batch_with_subtree::<_, MERKLE_DEPTH>(start_size, &nodes)?; + + Ok(PrecomputedAppendBatch(inner)) + } + + /// Applies a [`PrecomputedAppendBatch`] from [`Self::precompute_append`], + /// returning any completed [`TRACKED_SUBTREE_HEIGHT`] subtree, exactly like + /// [`Self::append_batch`]. `precomputed.start_size()` must equal this tree's + /// [`count`](Self::count); a stale precompute returns + /// [`NoteCommitmentTreeError::InvalidPrecompute`] (rather than panicking) so + /// callers can fall back to [`Self::append_batch`]. + #[allow(clippy::unwrap_in_result)] + pub(crate) fn apply_precomputed_append( + &mut self, + precomputed: PrecomputedAppendBatch, + ) -> Result, NoteCommitmentTreeError> + { + let (frontier, completed) = + apply_append_batch_with_subtree(self.inner.clone(), precomputed.0)?; + + self.inner = frontier; + *self + .cached_root + .get_mut() + .expect("a thread that previously held exclusive lock access panicked") = None; + + Ok(completed.map(|(index_value, root)| { + let index = NoteCommitmentSubtreeIndex( + index_value.try_into().expect("subtree index fits in u16"), + ); + (index, root) + })) + } + + /// Benchmark-only: precompute the parallel append for `note_commitments` + /// (rayon hashing), apply it onto a fresh tree, and return the resulting root. + /// Mirrors the committer's precompute path end-to-end so the + /// `precompute_threshold` benchmark can compare it against a serial append. + #[cfg(feature = "bench")] + #[doc(hidden)] + pub fn precompute_then_apply_root(note_commitments: &[NoteCommitmentUpdate]) -> [u8; 32] { + let mut tree = NoteCommitmentTree::default(); + let precomputed = + Self::precompute_append(0, note_commitments).expect("non-empty batch in benchmark"); + tree.apply_precomputed_append(precomputed) + .expect("fresh tree matches start size 0"); + tree.root().into() + } + /// Returns frontier of non-empty tree, or None. fn frontier(&self) -> Option<&NonEmptyFrontier> { self.inner.value() @@ -805,4 +909,164 @@ mod tests { tree.assert_frontier_eq(&original); assert_eq!(tree.root(), original.root()); } + + /// The off-committer precompute (`precompute_append` + `apply_precomputed_append`) + /// must produce the same frontier, root, and completed-subtree result as the + /// inline `append_batch` across a range of tree/batch sizes. + #[test] + fn precompute_append_matches_append_batch() { + let cases = [ + ("empty tree, one leaf", 0u64, 1usize), + ("empty tree, small batch", 0, 5), + ("odd tree, small batch", 3, 4), + ("power-of-two tree, small batch", 8, 7), + ("after power-of-two tree, small batch", 9, 6), + ]; + + for (name, prefix_len, batch_len) in cases { + let start = build_tree(prefix_len); + let note_commitments: Vec<_> = (0..batch_len as u64) + .map(|value| note_commitment(1_000 + prefix_len + value)) + .collect(); + + let mut inline_tree = start.clone(); + let _ = inline_tree.root(); + let inline_result = inline_tree + .append_batch(¬e_commitments) + .expect("inline append succeeds"); + + let mut precompute_tree = start; + let _ = precompute_tree.root(); + let precomputed = NoteCommitmentTree::precompute_append(prefix_len, ¬e_commitments) + .expect("precompute succeeds"); + assert_eq!(precomputed.start_size(), prefix_len, "{name}: start size"); + let precompute_result = precompute_tree + .apply_precomputed_append(precomputed) + .expect("apply precompute succeeds"); + + assert_eq!( + precompute_result, inline_result, + "{name}: subtree result mismatch" + ); + precompute_tree.assert_frontier_eq(&inline_tree); + assert_eq!( + precompute_tree.root(), + inline_tree.root(), + "{name}: root mismatch" + ); + } + } + + /// The precompute path matches inline `append_batch` when the batch crosses the + /// first tracked-subtree boundary, including the returned subtree index and root. + #[test] + fn precompute_append_crosses_subtree_boundary() { + let start = pre_subtree_boundary_tree(); + let note_commitments = [note_commitment(100), note_commitment(200)]; + + let mut inline_tree = start.clone(); + let _ = inline_tree.root(); + let inline_result = inline_tree + .append_batch(¬e_commitments) + .expect("inline append succeeds"); + assert!(inline_result.is_some(), "batch crosses a subtree boundary"); + + let mut precompute_tree = start; + let _ = precompute_tree.root(); + let start_size = precompute_tree.count(); + let precomputed = NoteCommitmentTree::precompute_append(start_size, ¬e_commitments) + .expect("precompute succeeds"); + let precompute_result = precompute_tree + .apply_precomputed_append(precomputed) + .expect("apply precompute succeeds"); + + assert_eq!(precompute_result, inline_result, "subtree result mismatch"); + precompute_tree.assert_frontier_eq(&inline_tree); + assert_eq!(precompute_tree.root(), inline_tree.root(), "root mismatch"); + } + + /// The committer's size-match guard in `update_sapling_note_commitment_tree_with`: + /// a precompute keyed on the wrong tree size must be rejected and fall back to + /// inline hashing, so a stale look-ahead can never corrupt the tree — it can only + /// lose the speedup. A correctly-keyed precompute and `None` must match inline too. + #[test] + fn update_with_falls_back_on_size_mismatch() { + use crate::parallel::tree::NoteCommitmentTrees; + use std::sync::Arc; + + let start = build_tree(9); + let note_commitments: Vec<_> = (0..6).map(|value| note_commitment(2_000 + value)).collect(); + + // Inline reference. + let mut inline_tree = start.clone(); + let _ = inline_tree.root(); + let expected_subtree = inline_tree + .append_batch(¬e_commitments) + .expect("inline append succeeds"); + let expected_root = inline_tree.root(); + + let run = |precompute: Option| { + let base = start.clone(); + let _ = base.root(); + let (tree, subtree) = NoteCommitmentTrees::update_sapling_note_commitment_tree_with( + Arc::new(base), + note_commitments.clone(), + precompute, + ) + .expect("update succeeds"); + (tree.root(), subtree) + }; + + // No precompute: inline path. + assert_eq!( + run(None), + (expected_root, expected_subtree), + "None fallback" + ); + + // Correctly-keyed precompute (start_size == tree count 9): applies the precomputed subtree roots, same result. + let matched = NoteCommitmentTree::precompute_append(9, ¬e_commitments) + .expect("precompute succeeds"); + assert_eq!( + run(Some(matched)), + (expected_root, expected_subtree), + "matched precompute" + ); + + // Wrong-keyed precompute (start_size 7 != tree count 9): the guard rejects it + // and falls back to inline, still producing the correct tree. + let stale = NoteCommitmentTree::precompute_append(7, ¬e_commitments) + .expect("precompute succeeds"); + assert_eq!( + run(Some(stale)), + (expected_root, expected_subtree), + "stale precompute falls back" + ); + } + + /// The public precompute wrappers report invalid input as a recoverable + /// `NoteCommitmentTreeError`, never a panic: an empty batch, and a stale + /// precompute applied directly to a mismatched tree. + #[test] + fn precompute_wrappers_report_invalid_input() { + // Empty batch. + assert_eq!( + NoteCommitmentTree::precompute_append(0, &[]).err(), + Some(NoteCommitmentTreeError::InvalidPrecompute), + "empty precompute_append is a recoverable error" + ); + + // Stale precompute applied to a tree of the wrong size. + let note_commitments: Vec<_> = (0..4).map(|value| note_commitment(3_000 + value)).collect(); + let stale = NoteCommitmentTree::precompute_append(5, ¬e_commitments) + .expect("precompute succeeds"); + + let mut tree = build_tree(2); + let _ = tree.root(); + assert_eq!( + tree.apply_precomputed_append(stale), + Err(NoteCommitmentTreeError::InvalidPrecompute), + "applying a stale precompute is a recoverable error" + ); + } } diff --git a/zebra-chain/src/transaction.rs b/zebra-chain/src/transaction.rs index f68ff93157e..5f1f21ce2a2 100644 --- a/zebra-chain/src/transaction.rs +++ b/zebra-chain/src/transaction.rs @@ -13,6 +13,7 @@ mod serialize; mod sighash; mod txid; mod unmined; +pub(crate) mod zip244; #[cfg(any(test, feature = "proptest-impl"))] #[allow(clippy::unwrap_in_result)] @@ -1076,6 +1077,34 @@ impl Transaction { } } + /// Returns true if every Sapling value commitment and ephemeral public key in + /// this transaction is a canonical, non-small-order Jubjub point (or the + /// transaction has no Sapling data). + /// + /// Those points are stored as raw bytes and their not-small-order check is + /// deferred from deserialization to keep point decompression off the + /// checkpoint-sync hot path. The semantic verifier calls this to enforce the + /// consensus rule on untrusted transactions; the checkpoint verifier does not + /// need it because it trusts block hashes. + pub fn sapling_point_encodings_are_valid(&self) -> bool { + match self { + Transaction::V4 { + sapling_shielded_data: Some(sapling_shielded_data), + .. + } => sapling_shielded_data.point_encodings_are_valid(), + Transaction::V5 { + sapling_shielded_data: Some(sapling_shielded_data), + .. + } => sapling_shielded_data.point_encodings_are_valid(), + #[cfg(all(zcash_unstable = "nu7", feature = "tx_v6"))] + Transaction::V6 { + sapling_shielded_data: Some(sapling_shielded_data), + .. + } => sapling_shielded_data.point_encodings_are_valid(), + _ => true, + } + } + // orchard /// Access the [`orchard::ShieldedData`] in this transaction, diff --git a/zebra-chain/src/transaction/serialize.rs b/zebra-chain/src/transaction/serialize.rs index d59748ffc38..9cd162f73e9 100644 --- a/zebra-chain/src/transaction/serialize.rs +++ b/zebra-chain/src/transaction/serialize.rs @@ -1033,7 +1033,17 @@ impl ZcashDeserialize for Transaction { // `proofsOrchard`, `vSpendAuthSigsOrchard`, and `bindingSigOrchard`. let orchard_shielded_data = (&mut limited_reader).zcash_deserialize_into()?; - let tx = Transaction::V5 { + // Convertibility to the librustzcash transaction type is + // intentionally not re-checked here. That check re-runs the full + // conversion, which decompresses every Jubjub/Pallas curve point, + // on every block, and it is the dominant CPU cost of checkpoint + // sync. It is also redundant: untrusted transactions that are not + // convertible are still rejected by the semantic verifier, which + // converts every transaction via `CachedFfiTransaction::new` + // before accepting it, while blocks below the checkpoints are + // trusted by their hash (and validated against the header merkle + // root built from the transaction IDs). + Ok(Transaction::V5 { network_upgrade, lock_time, expiry_height, @@ -1041,11 +1051,7 @@ impl ZcashDeserialize for Transaction { outputs, sapling_shielded_data, orchard_shielded_data, - }; - - tx.to_librustzcash(network_upgrade)?; - - Ok(tx) + }) } #[cfg(any(zcash_unstable = "nu6.3", zcash_unstable = "nu7"))] (6, true) => { diff --git a/zebra-chain/src/transaction/tests/prop.rs b/zebra-chain/src/transaction/tests/prop.rs index c80cbbe8db6..1cc94dab155 100644 --- a/zebra-chain/src/transaction/tests/prop.rs +++ b/zebra-chain/src/transaction/tests/prop.rs @@ -46,6 +46,9 @@ proptest! { } } + /// `txid_and_auth_digest` shares one librustzcash conversion to produce both + /// the txid and the ZIP-244 auth digest; this asserts the result is identical + /// to computing them separately via `hash()` and `auth_digest()`. #[test] fn txid_and_auth_digest_matches_separate(tx in any::()) { let _init_guard = zebra_test::init(); @@ -56,6 +59,32 @@ proptest! { prop_assert_eq![auth_digest, tx.auth_digest()]; } + /// The native ZIP-244 txid + authorizing-data digest implementation + /// (`transaction::zip244`) must be byte-for-byte identical to the + /// `librustzcash` conversion it replaces. This is the consensus-critical + /// correctness proof for the native path, exercised across thousands of + /// random v5 transaction shapes (coinbase, spends-only, outputs-only, empty + /// shielded bundles, multi-action orchard, both NU5 and NU6 branch ids). + #[test] + fn native_zip244_matches_librustzcash(tx in Transaction::v5_strategy(LedgerState::default())) { + let _init_guard = zebra_test::init(); + + let (native_txid, native_auth) = crate::transaction::zip244::txid_and_auth_digest(&tx) + .expect("v5 transaction has a native ZIP-244 digest"); + let (ref_txid, ref_auth) = + crate::primitives::zcash_primitives::txid_and_auth_digest_via_librustzcash(&tx); + + prop_assert_eq!(native_txid, ref_txid, "native txid must match librustzcash"); + prop_assert_eq!(native_auth, ref_auth, "native auth digest must match librustzcash"); + + // The separate native entry points must agree with the combined one. + prop_assert_eq!(crate::transaction::zip244::txid(&tx).expect("v5"), native_txid); + prop_assert_eq!( + crate::transaction::zip244::auth_digest(&tx).expect("v5"), + native_auth + ); + } + #[test] fn transaction_hash_struct_display_roundtrip(hash in any::()) { let _init_guard = zebra_test::init(); diff --git a/zebra-chain/src/transaction/tests/vectors.rs b/zebra-chain/src/transaction/tests/vectors.rs index b8e60fc3c36..44460a80b91 100644 --- a/zebra-chain/src/transaction/tests/vectors.rs +++ b/zebra-chain/src/transaction/tests/vectors.rs @@ -970,7 +970,9 @@ fn binding_signatures() { .expect("network upgrade is valid for tx"); let bvk = redjubjub::VerificationKey::try_from( - sapling_shielded_data.binding_verification_key(), + sapling_shielded_data + .binding_verification_key() + .expect("test transaction has valid value commitments"), ) .expect("a valid redjubjub::VerificationKey"); @@ -1001,7 +1003,9 @@ fn binding_signatures() { .expect("network upgrade is valid for tx"); let bvk = redjubjub::VerificationKey::try_from( - sapling_shielded_data.binding_verification_key(), + sapling_shielded_data + .binding_verification_key() + .expect("test transaction has valid value commitments"), ) .expect("a valid redjubjub::VerificationKey"); @@ -1033,7 +1037,9 @@ fn binding_signatures() { .expect("network upgrade is valid for tx"); let bvk = redjubjub::VerificationKey::try_from( - sapling_shielded_data.binding_verification_key(), + sapling_shielded_data + .binding_verification_key() + .expect("test transaction has valid value commitments"), ) .expect("a valid redjubjub::VerificationKey"); @@ -1159,6 +1165,481 @@ fn orchard_rk_identity_point() { Transaction::zcash_deserialize(&tx_bytes[..]).expect_err("rk = identity should fail"); } +/// Validates that lazy Sapling `cv` / `ephemeral_key` deserialization stays +/// consensus-safe. +/// +/// To keep the Jubjub point decompression (a field square root) off the +/// checkpoint-sync hot path, `cv` and `ephemeral_key` are now stored as raw +/// bytes and the not-small-order consensus check is deferred. This is safe +/// because every *untrusted* transaction (semantic block verification, the +/// mempool, and `sendrawtransaction`) is converted via `to_librustzcash` +/// (`CachedFfiTransaction::new`) before it is accepted, and librustzcash +/// independently enforces the same rules: +/// +/// - `cv`: rejected at *read* — `zcash_primitives`'s `read_value_commitment` +/// uses `ValueCommitment::from_bytes_not_small_order`, so `to_librustzcash` +/// fails on a small-order `cv`. +/// - `ephemeral_key`: rejected at *verify* — `SaplingVerificationContext:: +/// check_output` (sapling-crypto `verifier.rs`) checks `epk.is_small_order()`. +/// +/// The checkpoint verifier does not need these checks: it trusts block hashes, +/// and a malicious block with a small-order point either fails its checkpoint +/// hash or the header merkle root. +/// +/// This test asserts the deferral (Zebra now *accepts* a small-order `cv`/`epk` +/// at deserialization) and the safety net (`to_librustzcash` *rejects* the +/// small-order `cv`, and the small-order `epk` is detectably small-order, which +/// is what the Sapling verifier checks). +#[test] +fn sapling_small_order_cv_epk_deferred_but_caught_by_librustzcash() { + use group::Group; + + use crate::{ + amount::Amount, + at_least_one, + block::Height, + parameters::NetworkUpgrade, + primitives::{ + redjubjub::{Binding, Signature}, + Groth16Proof, + }, + sapling::{ + self, + keys::EphemeralPublicKey, + shielded_data::{ShieldedData, TransferData}, + EncryptedNote, Output, ValueCommitment, WrappedNoteKey, + }, + serialization::{ZcashDeserializeInto, ZcashSerialize}, + transaction::{LockTime, Transaction}, + }; + + let _init_guard = zebra_test::init(); + + // The Jubjub identity point is a valid encoding, but it is small order + // (order 1), so the not-small-order consensus check must reject it. + let small_order_bytes = jubjub::AffinePoint::from(jubjub::ExtendedPoint::identity()).to_bytes(); + + // These are the exact library functions the semantic/mempool path uses, so + // they must detect the small-order point. + assert!( + bool::from( + sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&small_order_bytes) + .is_none() + ), + "from_bytes_not_small_order (used by librustzcash read_value_commitment) must reject \ + the small-order cv", + ); + assert!( + bool::from( + jubjub::AffinePoint::from_bytes(small_order_bytes) + .unwrap() + .is_small_order() + ), + "is_small_order (used by the Sapling verifier check_output) must flag the small-order epk", + ); + + // A valid, non-small-order point (the Jubjub generator), used to isolate the + // `epk` case from the `cv` case below. + let valid_cv_bytes = jubjub::AffinePoint::from(jubjub::ExtendedPoint::generator()).to_bytes(); + assert!( + bool::from( + sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&valid_cv_bytes) + .is_some() + ), + "the Jubjub generator is a valid non-small-order cv", + ); + + // Build a minimal V5 transaction with one Sapling output with the given cv + // and ephemeral_key bytes, round-trip it through Zebra's (now lazy) + // deserializer, and return whether `to_librustzcash` accepts it. + let build_and_convert = |cv_bytes: [u8; 32], epk_bytes: [u8; 32]| -> bool { + let output = Output { + cv: ValueCommitment(cv_bytes), + cm_u: sapling_crypto::note::ExtractedNoteCommitment::from_bytes(&[0u8; 32]).unwrap(), + ephemeral_key: EphemeralPublicKey(epk_bytes), + enc_ciphertext: EncryptedNote([0u8; 580]), + out_ciphertext: WrappedNoteKey([0u8; 80]), + zkproof: Groth16Proof([0u8; 192]), + }; + + let shielded_data: ShieldedData = ShieldedData { + value_balance: Amount::try_from(0).expect("zero is a valid amount"), + transfers: TransferData::JustOutputs { + outputs: at_least_one![output], + }, + binding_sig: Signature::::from([0u8; 64]), + }; + + let tx = Transaction::V5 { + network_upgrade: NetworkUpgrade::Nu5, + lock_time: LockTime::unlocked(), + expiry_height: Height(0), + inputs: vec![], + outputs: vec![], + sapling_shielded_data: Some(shielded_data), + orchard_shielded_data: None, + }; + + let bytes = tx + .zcash_serialize_to_vec() + .expect("crafted transaction must serialize"); + + // Deferral: Zebra now accepts a small-order cv/epk at deserialization + // (the not-small-order check no longer runs here). + let tx: Transaction = bytes + .zcash_deserialize_into() + .expect("lazy deserialization accepts a small-order cv/epk; validation is deferred"); + + tx.to_librustzcash(NetworkUpgrade::Nu5).is_ok() + }; + + // cv is enforced at *read*: `read_value_commitment` uses + // `from_bytes_not_small_order`, so `to_librustzcash` (run for every untrusted + // transaction via `CachedFfiTransaction::new`) rejects a small-order cv. + assert!( + !build_and_convert(small_order_bytes, valid_cv_bytes), + "to_librustzcash must reject a small-order Sapling cv at read", + ); + + // epk is enforced at *verify*, not at read: a small-order epk (with a valid + // cv) passes `to_librustzcash`, then the Sapling verifier's `check_output` + // rejects it via `epk.is_small_order()` (asserted above). This locates the + // enforcement at the verifier, which `verify_sapling_bundle` invokes for + // every untrusted transaction. + // + // A fully isolated end-to-end verifier test is intentionally omitted: mutating + // epk also changes the SigHash (breaking the binding signature) and the + // output proof cannot be forged without proving keys, so any consensus-level + // rejection would be confounded. The `is_small_order` assertion above checks + // the exact, unchanged librustzcash code path that performs the rejection. + assert!( + build_and_convert(valid_cv_bytes, small_order_bytes), + "to_librustzcash must accept a small-order epk (it is enforced at verify, not read)", + ); +} + +/// Edge cases for the lazy Sapling `cv` / `ephemeral_key` deserialization. +/// +/// Beyond the small-order case, this validates: +/// - an off-curve / non-canonical `cv` is also rejected by `to_librustzcash`, so +/// the safety net covers every invalid encoding, not just small-order points; +/// - an off-curve / non-canonical `ephemeral_key` is detectably invalid (the +/// Sapling verifier decompresses `epk`, which fails for an off-curve point); +/// - the lazy types preserve the encoding byte-for-byte through a +/// serialize/deserialize round-trip — the txid and block merkle root hash these +/// bytes, so any change would be consensus-breaking; +/// - `cv.commitment()` decompresses a valid encoding back to the same point; +/// - Sapling `rk` (`ValidatingKey`) is still validated at deserialization — it +/// was not made lazy, so a small-order `rk` is still rejected at read. +#[test] +fn sapling_lazy_cv_epk_edge_cases() { + use group::Group; + + use crate::{ + amount::Amount, + at_least_one, + block::Height, + parameters::NetworkUpgrade, + primitives::{ + redjubjub::{Binding, Signature}, + Groth16Proof, + }, + sapling::{ + self, + keys::{EphemeralPublicKey, ValidatingKey}, + shielded_data::{ShieldedData, TransferData}, + EncryptedNote, Output, ValueCommitment, WrappedNoteKey, + }, + serialization::{ZcashDeserializeInto, ZcashSerialize}, + transaction::{LockTime, Transaction}, + }; + + let _init_guard = zebra_test::init(); + + // A non-canonical / off-curve 32-byte value: not a valid Jubjub point. + let off_curve = [0xffu8; 32]; + assert!( + bool::from(jubjub::AffinePoint::from_bytes(off_curve).is_none()), + "0xff..ff must not be a valid Jubjub point encoding", + ); + let valid_cv = jubjub::AffinePoint::from(jubjub::ExtendedPoint::generator()).to_bytes(); + let small_order = jubjub::AffinePoint::from(jubjub::ExtendedPoint::identity()).to_bytes(); + + let make_v5 = |cv: [u8; 32], epk: [u8; 32]| -> Transaction { + let output = Output { + cv: ValueCommitment(cv), + cm_u: sapling_crypto::note::ExtractedNoteCommitment::from_bytes(&[0u8; 32]).unwrap(), + ephemeral_key: EphemeralPublicKey(epk), + enc_ciphertext: EncryptedNote([0u8; 580]), + out_ciphertext: WrappedNoteKey([0u8; 80]), + zkproof: Groth16Proof([0u8; 192]), + }; + Transaction::V5 { + network_upgrade: NetworkUpgrade::Nu5, + lock_time: LockTime::unlocked(), + expiry_height: Height(0), + inputs: vec![], + outputs: vec![], + sapling_shielded_data: Some(ShieldedData:: { + value_balance: Amount::try_from(0).expect("zero is a valid amount"), + transfers: TransferData::JustOutputs { + outputs: at_least_one![output], + }, + binding_sig: Signature::::from([0u8; 64]), + }), + orchard_shielded_data: None, + } + }; + + // An off-curve cv is rejected by to_librustzcash, covering invalid encodings + // that are not small-order. + let tx_off_curve_cv: Transaction = make_v5(off_curve, valid_cv) + .zcash_serialize_to_vec() + .expect("serializes") + .zcash_deserialize_into() + .expect("lazy deserialization accepts an off-curve cv"); + assert!( + tx_off_curve_cv + .to_librustzcash(NetworkUpgrade::Nu5) + .is_err(), + "to_librustzcash must reject an off-curve cv", + ); + + // Byte-identity: arbitrary (here non-canonical) cv/epk bytes survive a + // serialize -> deserialize -> serialize round-trip unchanged, so the txid and + // merkle root computed from them are unaffected by the lazy representation. + let bytes_in = make_v5(off_curve, off_curve) + .zcash_serialize_to_vec() + .expect("serializes"); + let tx_round: Transaction = bytes_in + .clone() + .zcash_deserialize_into() + .expect("round-trips"); + let bytes_out = tx_round.zcash_serialize_to_vec().expect("re-serializes"); + assert_eq!( + bytes_in, bytes_out, + "lazy cv/epk must round-trip byte-for-byte", + ); + match &tx_round { + Transaction::V5 { + sapling_shielded_data: Some(sd), + .. + } => { + let out = sd.outputs().next().expect("one output"); + assert_eq!(out.cv.0, off_curve, "cv bytes preserved exactly"); + assert_eq!( + out.ephemeral_key.0, off_curve, + "epk bytes preserved exactly" + ); + } + _ => panic!("expected a V5 transaction with Sapling data"), + } + + // `commitment()` decompresses a valid encoding to the same point. + assert_eq!( + ValueCommitment(valid_cv) + .commitment() + .expect("the generator is a valid value commitment") + .to_bytes(), + valid_cv, + "commitment() must round-trip a valid value commitment", + ); + + // `rk` was not made lazy: a small-order rk is still rejected at deserialization + // (`SpendPrefixInTransactionV5` reads it via `ValidatingKey::try_from`). + assert!( + ValidatingKey::try_from(small_order).is_err(), + "Sapling rk must still reject a small-order point at deserialization", + ); +} + +/// The explicit Sapling cv/epk not-small-order check used by the semantic +/// verifier rejects bad points. +/// +/// `Transaction::sapling_point_encodings_are_valid` is the deferred check, +/// relocated from deserialization to the semantic verification path (it is what +/// `Verifier::verify_v4_transaction` / `verify_v5_transaction` call, returning +/// `TransactionError::SmallOrder` on failure). Unlike the proof/binding-signature +/// verification, this check is isolated, so it can be exercised directly: it +/// rejects a small-order or off-curve `cv` *and* a small-order or off-curve +/// `epk`, and accepts valid points. The checkpoint verifier never calls it. +#[test] +fn sapling_point_encodings_check_rejects_bad_points() { + use group::Group; + + use crate::{ + amount::Amount, + at_least_one, + block::Height, + parameters::NetworkUpgrade, + primitives::{ + redjubjub::{Binding, Signature}, + Groth16Proof, + }, + sapling::{ + self, + keys::EphemeralPublicKey, + shielded_data::{ShieldedData, TransferData}, + EncryptedNote, Output, ValueCommitment, WrappedNoteKey, + }, + transaction::{LockTime, Transaction}, + }; + + let _init_guard = zebra_test::init(); + + let valid = jubjub::AffinePoint::from(jubjub::ExtendedPoint::generator()).to_bytes(); + let small_order = jubjub::AffinePoint::from(jubjub::ExtendedPoint::identity()).to_bytes(); + let off_curve = [0xffu8; 32]; + + let make = |cv: [u8; 32], epk: [u8; 32]| -> Transaction { + let output = Output { + cv: ValueCommitment(cv), + cm_u: sapling_crypto::note::ExtractedNoteCommitment::from_bytes(&[0u8; 32]).unwrap(), + ephemeral_key: EphemeralPublicKey(epk), + enc_ciphertext: EncryptedNote([0u8; 580]), + out_ciphertext: WrappedNoteKey([0u8; 80]), + zkproof: Groth16Proof([0u8; 192]), + }; + Transaction::V5 { + network_upgrade: NetworkUpgrade::Nu5, + lock_time: LockTime::unlocked(), + expiry_height: Height(0), + inputs: vec![], + outputs: vec![], + sapling_shielded_data: Some(ShieldedData:: { + value_balance: Amount::try_from(0).expect("zero is a valid amount"), + transfers: TransferData::JustOutputs { + outputs: at_least_one![output], + }, + binding_sig: Signature::::from([0u8; 64]), + }), + orchard_shielded_data: None, + } + }; + + // Valid points pass (a dummy proof/binding sig does not affect this check). + assert!( + make(valid, valid).sapling_point_encodings_are_valid(), + "valid cv/epk must pass the encoding check", + ); + + // A small-order cv is rejected. + assert!( + !make(small_order, valid).sapling_point_encodings_are_valid(), + "small-order cv must be rejected", + ); + + // A small-order epk is rejected. This is the isolated, executable proof of + // the epk rejection: the check runs independently of proof verification. + assert!( + !make(valid, small_order).sapling_point_encodings_are_valid(), + "small-order epk must be rejected", + ); + + // Off-curve / non-canonical encodings are rejected for both fields. + assert!( + !make(off_curve, valid).sapling_point_encodings_are_valid(), + "off-curve cv must be rejected", + ); + assert!( + !make(valid, off_curve).sapling_point_encodings_are_valid(), + "off-curve epk must be rejected", + ); +} + +/// The relocated Sapling `cv` / `epk` not-small-order checks accept exactly the +/// same encodings as the librustzcash functions they mirror. +/// +/// The consensus check was moved off the deserialization path into +/// `ValueCommitment::is_valid_not_small_order` and +/// `EphemeralPublicKey::is_valid_not_small_order`. If either ever diverged from +/// what librustzcash enforces at the FFI boundary, Zebra would accept or reject a +/// transaction that the rest of the network does not — a chain split, not a local +/// bug. This pins each Zebra predicate against the exact library predicate, over a +/// corpus that covers both verdicts: +/// +/// - `cv`: `zcash_primitives`'s `read_value_commitment` accepts a `cv` iff +/// `sapling_crypto::value::ValueCommitment::from_bytes_not_small_order` returns +/// a point. +/// - `epk`: sapling-crypto decodes `epk` via `jubjub::ExtendedPoint::from_bytes` +/// (`verifier/batch.rs`) and `check_output` rejects it when +/// `epk.is_small_order()` (`verifier.rs`). Zebra decodes as an `AffinePoint`, so +/// this also guards that the two decoders agree across the input space. +#[test] +fn sapling_point_checks_match_librustzcash_predicates() { + use group::{Group, GroupEncoding}; + + use crate::sapling::{keys::EphemeralPublicKey, ValueCommitment}; + + let _init_guard = zebra_test::init(); + + // The exact predicate librustzcash applies to a `cv` at read. + let librustzcash_cv_valid = |bytes: [u8; 32]| -> bool { + bool::from( + sapling_crypto::value::ValueCommitment::from_bytes_not_small_order(&bytes).is_some(), + ) + }; + + // The exact predicate librustzcash applies to an `epk`: decode as an + // `ExtendedPoint` (as sapling-crypto's batch verifier does), then reject a + // small-order point (as `check_output` does). + let librustzcash_epk_valid = |bytes: [u8; 32]| -> bool { + match jubjub::ExtendedPoint::from_bytes(&bytes).into_option() { + Some(point) => !bool::from(point.is_small_order()), + None => false, + } + }; + + // A representative spread of encodings: the three consensus-relevant classes + // (valid non-small-order, valid small-order, off-curve/non-canonical), a + // deterministic byte-pattern sweep that mixes decodable and undecodable + // encodings, and many prime-order points `[k]·G` to exercise the accepting + // branch heavily. + let mut inputs: Vec<[u8; 32]> = vec![ + jubjub::AffinePoint::from(jubjub::ExtendedPoint::generator()).to_bytes(), + jubjub::AffinePoint::from(jubjub::ExtendedPoint::identity()).to_bytes(), + [0xffu8; 32], + [0x00u8; 32], + ]; + for b in 0u8..=255 { + inputs.push([b; 32]); + } + let mut acc = jubjub::ExtendedPoint::generator(); + for _ in 0..64 { + inputs.push(jubjub::AffinePoint::from(acc).to_bytes()); + acc += jubjub::ExtendedPoint::generator(); + } + + // Guard against a vacuous comparison: the corpus must contain both accepted + // and rejected encodings for each predicate, otherwise an all-accept or + // all-reject bug could pass the equivalence assertion below. + assert!( + inputs.iter().any(|&b| librustzcash_cv_valid(b)) + && inputs.iter().any(|&b| !librustzcash_cv_valid(b)), + "cv corpus must contain both accepted and rejected encodings", + ); + assert!( + inputs.iter().any(|&b| librustzcash_epk_valid(b)) + && inputs.iter().any(|&b| !librustzcash_epk_valid(b)), + "epk corpus must contain both accepted and rejected encodings", + ); + + for bytes in inputs { + assert_eq!( + ValueCommitment(bytes).is_valid_not_small_order(), + librustzcash_cv_valid(bytes), + "ValueCommitment::is_valid_not_small_order must match librustzcash \ + read_value_commitment for {bytes:02x?}", + ); + assert_eq!( + EphemeralPublicKey(bytes).is_valid_not_small_order(), + librustzcash_epk_valid(bytes), + "EphemeralPublicKey::is_valid_not_small_order must match librustzcash \ + check_output for {bytes:02x?}", + ); + } +} + /// Reproduction for GHSA-rgwx-8r98-p34c: /// Coinbase Sapling spend vectors allocate before zero-spend consensus rule. /// diff --git a/zebra-chain/src/transaction/txid.rs b/zebra-chain/src/transaction/txid.rs index 40d26720438..08f98441fd1 100644 --- a/zebra-chain/src/transaction/txid.rs +++ b/zebra-chain/src/transaction/txid.rs @@ -45,9 +45,15 @@ impl<'a> TxIdBuilder<'a> { /// In this case it's the hash of a tree of hashes of specific parts of the /// transaction, as specified in ZIP-244 and ZIP-225. fn txid_v5(self) -> Option { + // Compute the v5 ZIP-244 txid natively, directly from the parsed + // transaction, avoiding the `librustzcash` reparse (see `super::zip244`). + // Non-v5 transactions (e.g. v6) fall back to `librustzcash` below. + if let Some(txid) = super::zip244::txid(self.trans) { + return Some(txid); + } + let nu = self.trans.network_upgrade()?; - // We compute v5 txid (from ZIP-244) using librustzcash. Some(Hash(*self.trans.to_librustzcash(nu).ok()?.txid().as_ref())) } diff --git a/zebra-chain/src/transaction/zip244.rs b/zebra-chain/src/transaction/zip244.rs new file mode 100644 index 00000000000..313a123fecc --- /dev/null +++ b/zebra-chain/src/transaction/zip244.rs @@ -0,0 +1,446 @@ +//! Native ZIP-244 transaction identifier (txid) and authorizing-data commitment. +//! +//! Computes the v5 txid digest tree and the ZIP-244 authorizing-data digest +//! directly from Zebra's parsed [`Transaction`], without converting to the +//! `librustzcash` transaction type via [`Transaction::to_librustzcash`]. +//! +//! That conversion re-serializes the whole transaction and re-parses it, +//! decompressing every Jubjub/Pallas curve point (`cv`, `rk`, ephemeral keys, +//! …) into typed group elements — purely so `librustzcash` can re-serialize +//! those same bytes back into the BLAKE2b digest tree. In the checkpoint range +//! the points are never otherwise needed (no proof/signature verification), so +//! the decompression is pure overhead; profiling the heavy shielded region +//! attributes ~44% of all CPU to these reparses. This module feeds Zebra's +//! canonical field bytes straight into the same BLAKE2b tree. +//! +//! The output is **byte-for-byte identical** to the `librustzcash` computation; +//! this is consensus-critical and is proven by the differential property test +//! `native_matches_librustzcash` (and `txid_and_auth_digest_matches_separate`) +//! in `transaction/tests/prop.rs`, plus the existing ZIP-244 known-answer +//! vectors and a clean differential mainnet sync. +//! +//! Specified in [ZIP-244] and [ZIP-225]. The personalizations and field +//! orderings mirror `zcash_primitives::transaction::txid` and +//! `orchard::bundle::commitments`. +//! +//! Only v5 transactions are handled here; v6 (the unstable `tx_v6` feature, +//! which can carry a ZIP-233 header field) still routes through `librustzcash`. +//! +//! [ZIP-244]: https://zips.z.cash/zip-0244 +//! [ZIP-225]: https://zips.z.cash/zip-0225 + +use std::io; + +use blake2b_simd::{Hash as Blake2bHash, Params, State}; + +use crate::{ + orchard, + parameters::TX_V5_VERSION_GROUP_ID, + sapling, + serialization::ZcashSerialize, + transaction::{AuthDigest, Hash, Transaction}, + transparent, +}; + +// txid tree root personalization (`ZcashTxHash_` ‖ consensus_branch_id LE32) +const ZCASH_TX_PERSONALIZATION_PREFIX: &[u8; 12] = b"ZcashTxHash_"; + +// txid level-1 node personalizations +const ZCASH_HEADERS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdHeadersHash"; +const ZCASH_TRANSPARENT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdTranspaHash"; +const ZCASH_SAPLING_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSaplingHash"; +const ZCASH_ORCHARD_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdOrchardHash"; + +// txid transparent level-2 node personalizations +const ZCASH_PREVOUTS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdPrevoutHash"; +const ZCASH_SEQUENCE_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSequencHash"; +const ZCASH_OUTPUTS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdOutputsHash"; + +// txid sapling level-2 node personalizations +const ZCASH_SAPLING_SPENDS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSSpendsHash"; +const ZCASH_SAPLING_SPENDS_COMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSSpendCHash"; +const ZCASH_SAPLING_SPENDS_NONCOMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSSpendNHash"; +const ZCASH_SAPLING_OUTPUTS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSOutputHash"; +const ZCASH_SAPLING_OUTPUTS_COMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSOutC__Hash"; +const ZCASH_SAPLING_OUTPUTS_MEMOS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSOutM__Hash"; +const ZCASH_SAPLING_OUTPUTS_NONCOMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdSOutN__Hash"; + +// txid orchard level-2 node personalizations +const ZCASH_ORCHARD_ACTIONS_COMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdOrcActCHash"; +const ZCASH_ORCHARD_ACTIONS_MEMOS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdOrcActMHash"; +const ZCASH_ORCHARD_ACTIONS_NONCOMPACT_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxIdOrcActNHash"; + +// auth-digest tree root personalization (`ZTxAuthHash_` ‖ consensus_branch_id LE32) +const ZCASH_AUTH_PERSONALIZATION_PREFIX: &[u8; 12] = b"ZTxAuthHash_"; +const ZCASH_TRANSPARENT_SCRIPTS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxAuthTransHash"; +const ZCASH_SAPLING_SIGS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxAuthSapliHash"; +const ZCASH_ORCHARD_SIGS_HASH_PERSONALIZATION: &[u8; 16] = b"ZTxAuthOrchaHash"; + +/// A new BLAKE2b-256 state with the given 16-byte personalization. +fn hasher(personal: &[u8; 16]) -> State { + Params::new().hash_length(32).personal(personal).to_state() +} + +/// `io::Write` adapter that feeds bytes into a BLAKE2b [`State`], so Zebra's +/// existing [`ZcashSerialize`] implementations can write a field's canonical +/// bytes straight into a hash with no intermediate allocation. +struct HashWriter<'a>(&'a mut State); + +impl io::Write for HashWriter<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.update(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +/// Write a value's canonical [`ZcashSerialize`] bytes into a BLAKE2b state. +fn update_serialized(state: &mut State, value: &T) { + value + .zcash_serialize(HashWriter(state)) + .expect("writing to a BLAKE2b state is infallible"); +} + +/// The fields of a v5 transaction needed to compute its digests. +/// +/// Returns `None` for non-v5 transactions (the caller falls back to +/// `librustzcash`). +struct V5Parts<'a> { + network_upgrade: crate::parameters::NetworkUpgrade, + lock_time: &'a crate::transaction::LockTime, + expiry_height: crate::block::Height, + inputs: &'a [transparent::Input], + outputs: &'a [transparent::Output], + sapling: Option<&'a sapling::ShieldedData>, + orchard: Option<&'a orchard::ShieldedData>, +} + +fn v5_parts(tx: &Transaction) -> Option> { + match tx { + Transaction::V5 { + network_upgrade, + lock_time, + expiry_height, + inputs, + outputs, + sapling_shielded_data, + orchard_shielded_data, + } => Some(V5Parts { + network_upgrade: *network_upgrade, + lock_time, + expiry_height: *expiry_height, + inputs, + outputs, + sapling: sapling_shielded_data.as_ref(), + orchard: orchard_shielded_data.as_ref(), + }), + _ => None, + } +} + +/// The consensus branch id of a v5 transaction, as the LE `u32` committed to by +/// the header digest and both tree-root personalizations. +fn consensus_branch_id(parts: &V5Parts) -> u32 { + u32::from( + parts + .network_upgrade + .branch_id() + .expect("v5 network upgrade has a consensus branch id"), + ) +} + +// --- txid digest (ZIP-244 §T) ------------------------------------------------- + +/// ZIP-244 §T.1 header digest. +fn hash_header(parts: &V5Parts) -> Blake2bHash { + let mut h = hasher(ZCASH_HEADERS_HASH_PERSONALIZATION); + // header: fOverwintered (set for v5) in the high bit, version 5 in the low bits. + h.update(&(0x8000_0005_u32).to_le_bytes()); + h.update(&TX_V5_VERSION_GROUP_ID.to_le_bytes()); + h.update(&consensus_branch_id(parts).to_le_bytes()); + // lock_time and expiry_height are each a single LE u32; `LockTime` serializes + // as exactly that u32. + update_serialized(&mut h, parts.lock_time); + h.update(&parts.expiry_height.0.to_le_bytes()); + h.finalize() +} + +/// ZIP-244 §T.2a prevouts digest. +fn hash_prevouts(inputs: &[transparent::Input]) -> Blake2bHash { + let mut h = hasher(ZCASH_PREVOUTS_HASH_PERSONALIZATION); + for input in inputs { + match input { + transparent::Input::PrevOut { outpoint, .. } => update_serialized(&mut h, outpoint), + // A coinbase input commits to the null prevout, exactly as Zebra's + // `Input` serialization writes it. + transparent::Input::Coinbase { .. } => { + h.update(&[0u8; 32]); + h.update(&0xffff_ffff_u32.to_le_bytes()); + } + } + } + h.finalize() +} + +/// ZIP-244 §T.2b sequence digest. +fn hash_sequence(inputs: &[transparent::Input]) -> Blake2bHash { + let mut h = hasher(ZCASH_SEQUENCE_HASH_PERSONALIZATION); + for input in inputs { + h.update(&input.sequence().to_le_bytes()); + } + h.finalize() +} + +/// ZIP-244 §T.2c outputs digest. +fn hash_outputs(outputs: &[transparent::Output]) -> Blake2bHash { + let mut h = hasher(ZCASH_OUTPUTS_HASH_PERSONALIZATION); + for output in outputs { + update_serialized(&mut h, output); + } + h.finalize() +} + +/// ZIP-244 §T.2 transparent digest. +fn hash_transparent_txid( + inputs: &[transparent::Input], + outputs: &[transparent::Output], +) -> Blake2bHash { + let mut h = hasher(ZCASH_TRANSPARENT_HASH_PERSONALIZATION); + // The transparent bundle is absent (and the digest is the bare + // personalization hash) only when there are no inputs and no outputs. + if !inputs.is_empty() || !outputs.is_empty() { + h.update(hash_prevouts(inputs).as_bytes()); + h.update(hash_sequence(inputs).as_bytes()); + h.update(hash_outputs(outputs).as_bytes()); + } + h.finalize() +} + +/// ZIP-244 §T.3a sapling spends digest. +fn hash_sapling_spends(sapling: &sapling::ShieldedData) -> Blake2bHash { + let mut h = hasher(ZCASH_SAPLING_SPENDS_HASH_PERSONALIZATION); + if sapling.spends().next().is_some() { + let mut ch = hasher(ZCASH_SAPLING_SPENDS_COMPACT_HASH_PERSONALIZATION); + let mut nh = hasher(ZCASH_SAPLING_SPENDS_NONCOMPACT_HASH_PERSONALIZATION); + // In a v5 transaction every spend shares the one anchor. + let anchor = <[u8; 32]>::from( + sapling + .shared_anchor() + .expect("v5 sapling spends share an anchor when present"), + ); + for spend in sapling.spends() { + ch.update(&<[u8; 32]>::from(spend.nullifier)); + + update_serialized(&mut nh, &spend.cv); + nh.update(&anchor); + nh.update(&<[u8; 32]>::from(spend.rk.clone())); + } + h.update(ch.finalize().as_bytes()); + h.update(nh.finalize().as_bytes()); + } + h.finalize() +} + +/// ZIP-244 §T.3b sapling outputs digest. +fn hash_sapling_outputs(sapling: &sapling::ShieldedData) -> Blake2bHash { + let mut h = hasher(ZCASH_SAPLING_OUTPUTS_HASH_PERSONALIZATION); + if sapling.outputs().next().is_some() { + let mut ch = hasher(ZCASH_SAPLING_OUTPUTS_COMPACT_HASH_PERSONALIZATION); + let mut mh = hasher(ZCASH_SAPLING_OUTPUTS_MEMOS_HASH_PERSONALIZATION); + let mut nh = hasher(ZCASH_SAPLING_OUTPUTS_NONCOMPACT_HASH_PERSONALIZATION); + for output in sapling.outputs() { + ch.update(&output.cm_u.to_bytes()); + ch.update(&<[u8; 32]>::from(&output.ephemeral_key)); + ch.update(&output.enc_ciphertext.0[..52]); + + mh.update(&output.enc_ciphertext.0[52..564]); + + update_serialized(&mut nh, &output.cv); + nh.update(&output.enc_ciphertext.0[564..]); + nh.update(&output.out_ciphertext.0[..]); + } + h.update(ch.finalize().as_bytes()); + h.update(mh.finalize().as_bytes()); + h.update(nh.finalize().as_bytes()); + } + h.finalize() +} + +/// ZIP-244 §T.3 sapling digest. +fn hash_sapling_txid( + sapling: Option<&sapling::ShieldedData>, +) -> Blake2bHash { + let mut h = hasher(ZCASH_SAPLING_HASH_PERSONALIZATION); + if let Some(sapling) = sapling { + // `ShieldedData` only exists with at least one spend or output, so this + // matches librustzcash's "non-empty bundle" branch. + if sapling.spends().next().is_some() || sapling.outputs().next().is_some() { + h.update(hash_sapling_spends(sapling).as_bytes()); + h.update(hash_sapling_outputs(sapling).as_bytes()); + h.update(&sapling.value_balance.zatoshis().to_le_bytes()); + } + } + h.finalize() +} + +/// ZIP-244 §T.4 orchard digest (mirrors `orchard::bundle::commitments::hash_bundle_txid_data`). +fn hash_orchard_txid(orchard: Option<&orchard::ShieldedData>) -> Blake2bHash { + let mut h = hasher(ZCASH_ORCHARD_HASH_PERSONALIZATION); + if let Some(orchard) = orchard { + let mut ch = hasher(ZCASH_ORCHARD_ACTIONS_COMPACT_HASH_PERSONALIZATION); + let mut mh = hasher(ZCASH_ORCHARD_ACTIONS_MEMOS_HASH_PERSONALIZATION); + let mut nh = hasher(ZCASH_ORCHARD_ACTIONS_NONCOMPACT_HASH_PERSONALIZATION); + for action in orchard.actions() { + ch.update(&<[u8; 32]>::from(action.nullifier)); + ch.update(&<[u8; 32]>::from(action.cm_x)); + update_serialized(&mut ch, &action.ephemeral_key); + ch.update(&action.enc_ciphertext.0[..52]); + + mh.update(&action.enc_ciphertext.0[52..564]); + + update_serialized(&mut nh, &action.cv); + nh.update(&<[u8; 32]>::from(action.rk)); + nh.update(&action.enc_ciphertext.0[564..]); + nh.update(&action.out_ciphertext.0[..]); + } + h.update(ch.finalize().as_bytes()); + h.update(mh.finalize().as_bytes()); + h.update(nh.finalize().as_bytes()); + h.update(&[orchard.flags.bits()]); + h.update(&orchard.value_balance.zatoshis().to_le_bytes()); + h.update(&<[u8; 32]>::from(orchard.shared_anchor)); + } + h.finalize() +} + +/// Combine the four level-1 digests into the txid (ZIP-244 txid digest). +fn txid_inner(parts: &V5Parts) -> Hash { + let header = hash_header(parts); + let transparent = hash_transparent_txid(parts.inputs, parts.outputs); + let sapling = hash_sapling_txid(parts.sapling); + let orchard = hash_orchard_txid(parts.orchard); + + let mut personal = [0u8; 16]; + personal[..12].copy_from_slice(ZCASH_TX_PERSONALIZATION_PREFIX); + personal[12..].copy_from_slice(&consensus_branch_id(parts).to_le_bytes()); + + let mut h = hasher(&personal); + h.update(header.as_bytes()); + h.update(transparent.as_bytes()); + h.update(sapling.as_bytes()); + h.update(orchard.as_bytes()); + + Hash( + h.finalize() + .as_bytes() + .try_into() + .expect("BLAKE2b-256 digest is 32 bytes"), + ) +} + +// --- auth digest (ZIP-244 authorizing-data commitment) ------------------------ + +/// ZIP-244 transparent script-sig digest. +fn hash_transparent_auth( + inputs: &[transparent::Input], + outputs: &[transparent::Output], +) -> Blake2bHash { + let mut h = hasher(ZCASH_TRANSPARENT_SCRIPTS_HASH_PERSONALIZATION); + // Present only when the transparent bundle is present (any input or output). + if !inputs.is_empty() || !outputs.is_empty() { + for input in inputs { + match input { + transparent::Input::PrevOut { unlock_script, .. } => { + update_serialized(&mut h, unlock_script) + } + transparent::Input::Coinbase { .. } => { + let script = input + .coinbase_script() + .expect("v5 coinbase input has a valid script sig"); + update_serialized(&mut h, &script); + } + } + } + } + h.finalize() +} + +/// ZIP-244 sapling auth digest. +fn hash_sapling_auth( + sapling: Option<&sapling::ShieldedData>, +) -> Blake2bHash { + let mut h = hasher(ZCASH_SAPLING_SIGS_HASH_PERSONALIZATION); + if let Some(sapling) = sapling { + for spend in sapling.spends() { + h.update(&spend.zkproof.0[..]); + } + for spend in sapling.spends() { + h.update(&<[u8; 64]>::from(spend.spend_auth_sig)[..]); + } + for output in sapling.outputs() { + h.update(&output.zkproof.0[..]); + } + h.update(&<[u8; 64]>::from(sapling.binding_sig)[..]); + } + h.finalize() +} + +/// ZIP-244 orchard auth digest (mirrors `orchard::bundle::commitments::hash_bundle_auth_data`). +fn hash_orchard_auth(orchard: Option<&orchard::ShieldedData>) -> Blake2bHash { + let mut h = hasher(ZCASH_ORCHARD_SIGS_HASH_PERSONALIZATION); + if let Some(orchard) = orchard { + h.update(&orchard.proof.0[..]); + for action in orchard.actions.iter() { + update_serialized(&mut h, &action.spend_auth_sig); + } + update_serialized(&mut h, &orchard.binding_sig); + } + h.finalize() +} + +/// Combine the three authorizing-data digests into the ZIP-244 auth commitment. +fn auth_digest_inner(parts: &V5Parts) -> AuthDigest { + let transparent = hash_transparent_auth(parts.inputs, parts.outputs); + let sapling = hash_sapling_auth(parts.sapling); + let orchard = hash_orchard_auth(parts.orchard); + + let mut personal = [0u8; 16]; + personal[..12].copy_from_slice(ZCASH_AUTH_PERSONALIZATION_PREFIX); + personal[12..].copy_from_slice(&consensus_branch_id(parts).to_le_bytes()); + + let mut h = hasher(&personal); + h.update(transparent.as_bytes()); + h.update(sapling.as_bytes()); + h.update(orchard.as_bytes()); + + AuthDigest( + h.finalize() + .as_bytes() + .try_into() + .expect("BLAKE2b-256 digest is 32 bytes"), + ) +} + +// --- public entry points ------------------------------------------------------ + +/// Computes the txid of a v5 transaction natively, or returns `None` for other +/// versions (the caller falls back to the `librustzcash` path). +pub(crate) fn txid(tx: &Transaction) -> Option { + Some(txid_inner(&v5_parts(tx)?)) +} + +/// Computes the ZIP-244 authorizing-data digest of a v5 transaction natively, or +/// returns `None` for other versions. +pub(crate) fn auth_digest(tx: &Transaction) -> Option { + Some(auth_digest_inner(&v5_parts(tx)?)) +} + +/// Computes both the txid and the ZIP-244 authorizing-data digest of a v5 +/// transaction natively, or returns `None` for other versions. +pub(crate) fn txid_and_auth_digest(tx: &Transaction) -> Option<(Hash, AuthDigest)> { + let parts = v5_parts(tx)?; + Some((txid_inner(&parts), auth_digest_inner(&parts))) +} diff --git a/zebra-consensus/src/block.rs b/zebra-consensus/src/block.rs index 1a1217d44c9..4b0134ae114 100644 --- a/zebra-consensus/src/block.rs +++ b/zebra-consensus/src/block.rs @@ -353,14 +353,17 @@ where let new_outputs = Arc::into_inner(known_utxos) .expect("all verification tasks using known_utxos are complete"); - let prepared_block = zs::SemanticallyVerifiedBlock { + // The semantic verifier checks the auth-data commitment during + // contextual validation, so the auth-data root isn't precomputed here + // (`from_semantic_data` leaves it unset). + let prepared_block = zs::SemanticallyVerifiedBlock::from_semantic_data( block, hash, height, new_outputs, transaction_hashes, - deferred_pool_balance_change: Some(deferred_pool_balance_change), - }; + Some(deferred_pool_balance_change), + ); // Return early for proposal requests. if request.is_proposal() { diff --git a/zebra-consensus/src/block/request.rs b/zebra-consensus/src/block/request.rs index 534f6c599b8..d03abc2de26 100644 --- a/zebra-consensus/src/block/request.rs +++ b/zebra-consensus/src/block/request.rs @@ -2,13 +2,30 @@ use std::sync::Arc; -use zebra_chain::block::Block; +use zebra_chain::{ + block::{self, Block}, + parameters::Network, +}; +use zebra_state::CheckpointVerifiedBlock; + +use crate::checkpoint::VerifyCheckpointError; #[derive(Debug, Clone, PartialEq, Eq)] /// A request to the chain or block verifier pub enum Request { /// Performs semantic validation, then asks the state to perform contextual validation and commit the block Commit(Arc), + + /// Like [`Request::Commit`], but the (CPU-heavy) checkpoint-verifier + /// precomputation — the per-transaction txids and the auth data root — has + /// already been done by the caller, off the single-threaded checkpoint + /// verifier. + /// + /// Only valid below the checkpoint height; the verifier still performs all + /// validity checks (proof of work, Merkle root, height). Used by the syncer, + /// which can build these blocks concurrently across many download tasks. + CommitCheckpointPrecomputed(CheckpointVerifiedBlock), + /// Performs semantic validation but skips checking proof of work, /// then asks the state to perform contextual validation. /// Does not commit the block to the state. @@ -16,18 +33,62 @@ pub enum Request { } impl Request { + /// Creates a commit request for the downloaded block. + /// + /// For checkpoint-height blocks, precompute the checkpoint-verified block + /// off the verifier's single-threaded buffer worker. Callers should do this + /// before reserving verifier readiness, so the CPU-heavy work does not hold a + /// verifier slot. + pub async fn create_commit_request( + block: Arc, + block_height: block::Height, + max_checkpoint_height: block::Height, + network: Network, + ) -> Result { + if block_height <= max_checkpoint_height { + let hash = block.hash(); + + // Keep checkpoint sync's cheap proof-of-work gate before the + // per-transaction precomputation, matching the verifier path. + // Security: This prevents attackers from flooding the verifier with invalid blocks + // only to reject afterwards. + if network.disable_pow() { + super::check::difficulty_threshold_is_valid( + &block.header, + &network, + &block_height, + &hash, + )?; + } else { + super::check::difficulty_is_valid(&block.header, &network, &block_height, &hash)?; + super::check::equihash_solution_is_valid(&block.header)?; + } + + let checkpoint_block = tokio::task::spawn_blocking(move || { + CheckpointVerifiedBlock::with_hash(block, hash) + }) + .await + .expect("checkpoint block precomputation should not panic"); + + Ok(Request::CommitCheckpointPrecomputed(checkpoint_block)) + } else { + Ok(Request::Commit(block)) + } + } + /// Returns inner block pub fn block(&self) -> Arc { - Arc::clone(match self { - Request::Commit(block) => block, - Request::CheckProposal(block) => block, - }) + match self { + Request::Commit(block) => Arc::clone(block), + Request::CommitCheckpointPrecomputed(block) => Arc::clone(&block.block), + Request::CheckProposal(block) => Arc::clone(block), + } } /// Returns `true` if the request is a proposal pub fn is_proposal(&self) -> bool { match self { - Request::Commit(_) => false, + Request::Commit(_) | Request::CommitCheckpointPrecomputed(_) => false, Request::CheckProposal(_) => true, } } diff --git a/zebra-consensus/src/block/tests.rs b/zebra-consensus/src/block/tests.rs index d316bb6d69b..9db7b6eb61f 100644 --- a/zebra-consensus/src/block/tests.rs +++ b/zebra-consensus/src/block/tests.rs @@ -14,7 +14,7 @@ use zebra_chain::{ }, Block, Height, }, - parameters::{subsidy::block_subsidy, NetworkUpgrade}, + parameters::{subsidy::block_subsidy, Network, NetworkUpgrade}, serialization::{ZcashDeserialize, ZcashDeserializeInto}, transaction::{arbitrary::transaction_to_fake_v5, LockTime, Transaction}, work::difficulty::{ParameterDifficulty as _, INVALID_COMPACT_DIFFICULTY}, @@ -158,6 +158,69 @@ async fn check_transcripts() -> Result<(), Report> { Ok(()) } +#[tokio::test] +async fn create_commit_request_selects_checkpoint_precomputation() -> Result<(), Report> { + let _init_guard = zebra_test::init(); + + let block: Arc = + Block::zcash_deserialize(&zebra_test::vectors::BLOCK_MAINNET_GENESIS_BYTES[..])?.into(); + let max_checkpoint_height = Height(1); + + let request = Request::create_commit_request( + block.clone(), + Height(0), + max_checkpoint_height, + Network::Mainnet, + ) + .await?; + assert!(matches!(request, Request::CommitCheckpointPrecomputed(_))); + assert_eq!(request.block(), block); + + let request = Request::create_commit_request( + block.clone(), + max_checkpoint_height, + max_checkpoint_height, + Network::Mainnet, + ) + .await?; + assert!(matches!(request, Request::CommitCheckpointPrecomputed(_))); + assert_eq!(request.block(), block); + + let request = Request::create_commit_request( + block.clone(), + Height(2), + max_checkpoint_height, + Network::Mainnet, + ) + .await?; + assert_eq!(request, Request::Commit(block)); + + Ok(()) +} + +#[tokio::test] +async fn create_commit_request_rejects_invalid_checkpoint_pow() -> Result<(), Report> { + let _init_guard = zebra_test::init(); + + let block = + Arc::::zcash_deserialize(&zebra_test::vectors::BLOCK_MAINNET_GENESIS_BYTES[..])?; + let mut block = Arc::try_unwrap(block).expect("genesis block should have no other references"); + let block_height = block.coinbase_height().expect("genesis block has height"); + + Arc::make_mut(&mut block.header).difficulty_threshold = INVALID_COMPACT_DIFFICULTY; + + let request = + Request::create_commit_request(block.into(), block_height, block_height, Network::Mainnet) + .await; + + assert!( + request.is_err(), + "invalid checkpoint proof of work must be rejected before precomputation" + ); + + Ok(()) +} + #[test] fn coinbase_is_first_for_historical_blocks() -> Result<(), Report> { let _init_guard = zebra_test::init(); diff --git a/zebra-consensus/src/checkpoint.rs b/zebra-consensus/src/checkpoint.rs index 23ccc26a385..31de33bfbf3 100644 --- a/zebra-consensus/src/checkpoint.rs +++ b/zebra-consensus/src/checkpoint.rs @@ -601,30 +601,75 @@ where .ok_or(VerifyCheckpointError::CoinbaseHeight { hash })?; self.check_height(height)?; + // Cheap proof-of-work checks run *before* the expensive precomputation, + // so a flood of invalid-PoW blocks can't make us do per-transaction work. + self.check_proof_of_work(&block.header, height, hash)?; + + // Precompute the per-transaction hashes and auth data root, which scale + // with block weight. (The precomputed path does this concurrently in the + // caller and skips it here.) + let block = CheckpointVerifiedBlock::with_hash(block, hash); + + self.finish_validation(block) + } + + /// Check a [`CheckpointVerifiedBlock`] whose precomputation (txids, auth data + /// root) was already done by the caller, off the single-threaded verifier. + /// + /// Runs the same validity checks as [`Self::check_block`] (height, proof of + /// work, Merkle root) against the precomputed block. + fn validate_precomputed_block( + &self, + block: CheckpointVerifiedBlock, + ) -> Result { + let hash = block.hash; + let height = block.height; + self.check_height(height)?; + self.check_proof_of_work(&block.block.header, height, hash)?; + self.finish_validation(block) + } + + /// Check the block's proof of work (difficulty, and equihash unless disabled). + fn check_proof_of_work( + &self, + header: &block::Header, + height: block::Height, + hash: block::Hash, + ) -> Result<(), VerifyCheckpointError> { if self.network.disable_pow() { crate::block::check::difficulty_threshold_is_valid( - &block.header, + header, &self.network, &height, &hash, )?; } else { - crate::block::check::difficulty_is_valid(&block.header, &self.network, &height, &hash)?; - crate::block::check::equihash_solution_is_valid(&block.header)?; + crate::block::check::difficulty_is_valid(header, &self.network, &height, &hash)?; + crate::block::check::equihash_solution_is_valid(header)?; } + Ok(()) + } + + /// Finish validating a (precomputed) checkpoint block: set its deferred pool + /// balance change and check its Merkle root. + fn finish_validation( + &self, + mut block: CheckpointVerifiedBlock, + ) -> Result { + let height = block.height; + // See [ZIP-1015](https://zips.z.cash/zip-1015). let expected_deferred_amount = funding_stream_values(height, &self.network, block_subsidy(height, &self.network)?)? .remove(&FundingStreamReceiver::Deferred); - let deferred_pool_balance_change = expected_deferred_amount - .unwrap_or_default() - .checked_sub(self.network.lockbox_disbursement_total_amount(height)) - .map(DeferredPoolBalanceChange::new); - - // don't do precalculation until the block passes basic difficulty checks - let block = CheckpointVerifiedBlock::new(block, Some(hash), deferred_pool_balance_change); + block.set_deferred_pool_balance_change( + expected_deferred_amount + .unwrap_or_default() + .checked_sub(self.network.lockbox_disbursement_total_amount(height)) + .map(DeferredPoolBalanceChange::new), + ); crate::block::check::merkle_root_validity( &self.network, @@ -647,11 +692,31 @@ where /// returns an error immediately. #[allow(clippy::unwrap_in_result)] fn queue_block(&mut self, block: Arc) -> Result { + let block = self.check_block(block)?; + self.enqueue(block) + } + + /// Like [`Self::queue_block`], but for a block whose precomputation was + /// already done by the caller (off the single-threaded verifier). + #[allow(clippy::unwrap_in_result)] + fn queue_precomputed_block( + &mut self, + block: CheckpointVerifiedBlock, + ) -> Result { + let block = self.validate_precomputed_block(block)?; + self.enqueue(block) + } + + /// Add an already-validated checkpoint block to the queue of blocks waiting + /// to be verified against a checkpoint. + #[allow(clippy::unwrap_in_result)] + fn enqueue( + &mut self, + block: CheckpointVerifiedBlock, + ) -> Result { // Set up a oneshot channel to send results let (tx, rx) = oneshot::channel(); - // Check that the height and Merkle roots are valid. - let block = self.check_block(block)?; let height = block.height; let hash = block.hash; @@ -707,6 +772,134 @@ where Ok(req_block) } + /// Verify a checkpoint block whose precomputation (per-transaction txids and + /// auth data root) was already done concurrently by the caller, off this + /// single-threaded verifier. The verifier still performs all validity checks. + /// + /// This is the fast path used by the syncer: only the cheap checks and the + /// queue/commit bookkeeping run here, while the expensive precomputation has + /// already happened across many concurrent download tasks. + pub(crate) fn call_precomputed( + &mut self, + block: CheckpointVerifiedBlock, + ) -> Pin> + Send + 'static>> + { + // Reset the verifier back to the state tip if requested + // (e.g. due to an error when committing a block to the state) + if let Ok(tip) = self.reset_receiver.try_recv() { + self.reset_progress(tip); + } + + // Immediately reject all incoming blocks that arrive after we've finished. + if let FinalCheckpoint = self.previous_checkpoint_height() { + return async { Err(VerifyCheckpointError::Finished) }.boxed(); + } + + let req_block = match self.queue_precomputed_block(block) { + Ok(req_block) => req_block, + Err(e) => return async { Err(e) }.boxed(), + }; + + self.verify_and_commit(req_block) + } + + /// Process a queued checkpoint block: advance checkpoint-range verification + /// and spawn the task that commits the block to the state once its range is + /// verified. Shared by the [`Service`] and precomputed entry points. + fn verify_and_commit( + &mut self, + req_block: RequestBlock, + ) -> Pin> + Send + 'static>> + { + self.process_checkpoint_range(); + + metrics::gauge!("checkpoint.queued_slots").set(self.queued.len() as f64); + + // Because the checkpoint verifier duplicates state from the state + // service (it tracks which checkpoints have been verified), we must + // commit blocks transactionally on a per-checkpoint basis. Otherwise, + // the checkpoint verifier's state could desync from the underlying + // state service. Among other problems, this could cause the checkpoint + // verifier to reject blocks not already in the state as + // already-verified. + // + // # Dropped Receivers + // + // To commit blocks transactionally on a per-checkpoint basis, we must + // commit all verified blocks in a checkpoint range, regardless of + // whether or not the response futures for each block were dropped. + // + // We accomplish this by spawning a new task containing the + // commit-if-verified logic. This task will always execute, except if + // the program is interrupted, in which case there is no longer a + // checkpoint verifier to keep in sync with the state. + // + // # State Commit Failures + // + // If the state commit fails due to corrupt block data, + // we don't reject the entire checkpoint. + // Instead, we reset the verifier to the successfully committed state tip. + let state_service = self.state_service.clone(); + let commit_checkpoint_verified = tokio::spawn(async move { + let hash = req_block + .rx + .await + .map_err(Into::into) + .map_err(VerifyCheckpointError::CommitCheckpointVerified) + .expect("CheckpointVerifier does not leave dangling receivers")?; + + // We use a `ServiceExt::oneshot`, so that every state service + // `poll_ready` has a corresponding `call`. See #1593. + match state_service + .oneshot(zs::Request::CommitCheckpointVerifiedBlock(req_block.block)) + .map_err(VerifyCheckpointError::CommitCheckpointVerified) + .await? + { + zs::Response::Committed(committed_hash) => { + assert_eq!(committed_hash, hash, "state must commit correct hash"); + Ok(hash) + } + _ => unreachable!("wrong response for CommitCheckpointVerifiedBlock"), + } + }); + + let state_service = self.state_service.clone(); + let reset_sender = self.reset_sender.clone(); + async move { + let result = commit_checkpoint_verified.await; + // Avoid a panic on shutdown + // + // When `zebrad` is terminated using Ctrl-C, the `commit_checkpoint_verified` task + // can return a `JoinError::Cancelled`. We expect task cancellation on shutdown, + // so we don't need to panic here. The persistent state is correct even when the + // task is cancelled, because block data is committed inside transactions, in + // height order. + let result = if zebra_chain::shutdown::is_shutting_down() { + Err(VerifyCheckpointError::ShuttingDown) + } else { + result.expect("commit_checkpoint_verified should not panic") + }; + if result.is_err() { + // If there was an error committing the block, then this verifier + // will be out of sync with the state. In that case, reset + // its progress back to the state tip. + let tip = match state_service + .oneshot(zs::Request::Tip) + .await + .map_err(VerifyCheckpointError::Tip)? + { + zs::Response::Tip(tip) => tip, + _ => unreachable!("wrong response for Tip"), + }; + // Ignore errors since send() can fail only when the verifier + // is being dropped, and then it doesn't matter anymore. + let _ = reset_sender.send(tip); + } + result + } + .boxed() + } + /// During checkpoint range processing, process all the blocks at `height`. /// /// Returns the first valid block. If there is no valid block, returns None. @@ -1105,92 +1298,6 @@ where Err(e) => return async { Err(e) }.boxed(), }; - self.process_checkpoint_range(); - - metrics::gauge!("checkpoint.queued_slots").set(self.queued.len() as f64); - - // Because the checkpoint verifier duplicates state from the state - // service (it tracks which checkpoints have been verified), we must - // commit blocks transactionally on a per-checkpoint basis. Otherwise, - // the checkpoint verifier's state could desync from the underlying - // state service. Among other problems, this could cause the checkpoint - // verifier to reject blocks not already in the state as - // already-verified. - // - // # Dropped Receivers - // - // To commit blocks transactionally on a per-checkpoint basis, we must - // commit all verified blocks in a checkpoint range, regardless of - // whether or not the response futures for each block were dropped. - // - // We accomplish this by spawning a new task containing the - // commit-if-verified logic. This task will always execute, except if - // the program is interrupted, in which case there is no longer a - // checkpoint verifier to keep in sync with the state. - // - // # State Commit Failures - // - // If the state commit fails due to corrupt block data, - // we don't reject the entire checkpoint. - // Instead, we reset the verifier to the successfully committed state tip. - let state_service = self.state_service.clone(); - let commit_checkpoint_verified = tokio::spawn(async move { - let hash = req_block - .rx - .await - .map_err(Into::into) - .map_err(VerifyCheckpointError::CommitCheckpointVerified) - .expect("CheckpointVerifier does not leave dangling receivers")?; - - // We use a `ServiceExt::oneshot`, so that every state service - // `poll_ready` has a corresponding `call`. See #1593. - match state_service - .oneshot(zs::Request::CommitCheckpointVerifiedBlock(req_block.block)) - .map_err(VerifyCheckpointError::CommitCheckpointVerified) - .await? - { - zs::Response::Committed(committed_hash) => { - assert_eq!(committed_hash, hash, "state must commit correct hash"); - Ok(hash) - } - _ => unreachable!("wrong response for CommitCheckpointVerifiedBlock"), - } - }); - - let state_service = self.state_service.clone(); - let reset_sender = self.reset_sender.clone(); - async move { - let result = commit_checkpoint_verified.await; - // Avoid a panic on shutdown - // - // When `zebrad` is terminated using Ctrl-C, the `commit_checkpoint_verified` task - // can return a `JoinError::Cancelled`. We expect task cancellation on shutdown, - // so we don't need to panic here. The persistent state is correct even when the - // task is cancelled, because block data is committed inside transactions, in - // height order. - let result = if zebra_chain::shutdown::is_shutting_down() { - Err(VerifyCheckpointError::ShuttingDown) - } else { - result.expect("commit_checkpoint_verified should not panic") - }; - if result.is_err() { - // If there was an error committing the block, then this verifier - // will be out of sync with the state. In that case, reset - // its progress back to the state tip. - let tip = match state_service - .oneshot(zs::Request::Tip) - .await - .map_err(VerifyCheckpointError::Tip)? - { - zs::Response::Tip(tip) => tip, - _ => unreachable!("wrong response for Tip"), - }; - // Ignore errors since send() can fail only when the verifier - // is being dropped, and then it doesn't matter anymore. - let _ = reset_sender.send(tip); - } - result - } - .boxed() + self.verify_and_commit(req_block) } } diff --git a/zebra-consensus/src/config.rs b/zebra-consensus/src/config.rs index 709c73ddff2..2a56842bbd2 100644 --- a/zebra-consensus/src/config.rs +++ b/zebra-consensus/src/config.rs @@ -26,6 +26,8 @@ pub struct Config { /// /// Disabling this option makes Zebra start full validation earlier. /// It is slower and less secure. + /// To keep checkpoint sync enabled but force-disable the initial VCT fast-sync rollout, use + /// [`disable_vct_fast_sync`](Self::disable_vct_fast_sync) instead. /// /// Zebra requires some checkpoints to simplify validation of legacy network upgrades. /// Required checkpoints are always active, even when this option is `false`. @@ -35,22 +37,41 @@ pub struct Config { /// For security reasons, this option might be deprecated or ignored in a future Zebra /// release. pub checkpoint_sync: bool, + + /// Force-disable the verified-commitment-trees fast sync path during its initial rollout. + /// + /// This keeps [`checkpoint_sync`](Self::checkpoint_sync) enabled while forcing the legacy + /// per-block Sapling/Orchard tree recompute in both Archive and Pruned storage modes. Set to + /// `false` by default: checkpoint sync uses VCT fast sync on networks with embedded handoff + /// frontiers. + pub disable_vct_fast_sync: bool, } impl From for Config { fn from( InnerConfig { - checkpoint_sync, .. + checkpoint_sync, + disable_vct_fast_sync, + .. }: InnerConfig, ) -> Self { - Self { checkpoint_sync } + Self { + checkpoint_sync, + disable_vct_fast_sync, + } } } impl From for InnerConfig { - fn from(Config { checkpoint_sync }: Config) -> Self { + fn from( + Config { + checkpoint_sync, + disable_vct_fast_sync, + }: Config, + ) -> Self { Self { checkpoint_sync, + disable_vct_fast_sync, _debug_skip_parameter_preload: false, } } @@ -66,6 +87,9 @@ pub struct InnerConfig { /// See [`Config`] for more details. pub checkpoint_sync: bool, + /// See [`Config`] for more details. + pub disable_vct_fast_sync: bool, + #[serde(skip_serializing, rename = "debug_skip_parameter_preload")] /// Unused config field for backwards compatibility. pub _debug_skip_parameter_preload: bool, @@ -78,6 +102,7 @@ impl Default for Config { fn default() -> Self { Self { checkpoint_sync: true, + disable_vct_fast_sync: false, } } } @@ -86,7 +111,30 @@ impl Default for InnerConfig { fn default() -> Self { Self { checkpoint_sync: Config::default().checkpoint_sync, + disable_vct_fast_sync: Config::default().disable_vct_fast_sync, _debug_skip_parameter_preload: false, } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn disable_vct_fast_sync_defaults_false_and_converts_through_inner_config() { + assert!(!Config::default().disable_vct_fast_sync); + + let force_disabled = Config::from(InnerConfig { + checkpoint_sync: true, + disable_vct_fast_sync: true, + _debug_skip_parameter_preload: false, + }); + + assert!(force_disabled.checkpoint_sync); + assert!(force_disabled.disable_vct_fast_sync); + + let inner = InnerConfig::from(force_disabled); + assert!(inner.disable_vct_fast_sync); + } +} diff --git a/zebra-consensus/src/router.rs b/zebra-consensus/src/router.rs index 91bfa46ee12..905702de21e 100644 --- a/zebra-consensus/src/router.rs +++ b/zebra-consensus/src/router.rs @@ -200,6 +200,17 @@ where } fn call(&mut self, request: Request) -> Self::Future { + // A precomputed checkpoint block is, by construction, below the + // checkpoint height; route it straight to the checkpoint verifier's + // fast path (which skips the now-already-done precomputation). + if let Request::CommitCheckpointPrecomputed(block) = request { + return self + .checkpoint + .call_precomputed(block) + .map_err(Into::into) + .boxed(); + } + let block = request.block(); match block.coinbase_height() { diff --git a/zebra-consensus/src/router/tests.rs b/zebra-consensus/src/router/tests.rs index 719840e49c5..e0117c2676a 100644 --- a/zebra-consensus/src/router/tests.rs +++ b/zebra-consensus/src/router/tests.rs @@ -143,10 +143,12 @@ static STATE_VERIFY_TRANSCRIPT_GENESIS: Lazy< async fn verify_checkpoint_test() -> Result<(), Report> { verify_checkpoint(Config { checkpoint_sync: true, + ..Config::default() }) .await?; verify_checkpoint(Config { checkpoint_sync: false, + ..Config::default() }) .await?; diff --git a/zebra-consensus/src/transaction.rs b/zebra-consensus/src/transaction.rs index d7f043e1c0a..939a87a88f9 100644 --- a/zebra-consensus/src/transaction.rs +++ b/zebra-consensus/src/transaction.rs @@ -406,6 +406,27 @@ where check::has_enough_orchard_flags(&tx)?; check::consensus_branch_id(&tx, req.height(), &network)?; + // # Consensus + // + // > Check that an Output description's cv and epk are not of small + // > order, [and] that a Spend description's cv and rk are not of + // > small order. + // + // https://zips.z.cash/protocol/protocol.pdf#outputdesc + // https://zips.z.cash/protocol/protocol.pdf#spenddesc + // + // The not-small-order check for Sapling cv and epk is deferred from + // deserialization, which stores them as raw bytes to keep point + // decompression off the checkpoint-sync hot path (the checkpoint + // verifier does not need it, because it trusts block hashes). Enforce + // it here on the semantic verification path and the mempool, which + // process untrusted transactions, before any state lookup or the + // librustzcash conversion so an invalid point fails fast. (Spend rk + // is still validated at deserialization.) + if !tx.sapling_point_encodings_are_valid() { + return Err(TransactionError::SmallOrder); + } + // Soft fork: temporarily require transactions to not contain Orchard actions. // // This soft fork was added while NU 6.1 was the active epoch on the Zcash diff --git a/zebra-consensus/src/transaction/tests.rs b/zebra-consensus/src/transaction/tests.rs index d575045df72..a64e34ae748 100644 --- a/zebra-consensus/src/transaction/tests.rs +++ b/zebra-consensus/src/transaction/tests.rs @@ -2716,6 +2716,100 @@ fn v4_with_sapling_outputs_and_no_spends() { }) } +/// A transaction whose Sapling output has an invalid (off-curve) ephemeral key +/// is rejected by the verifier with `SmallOrder`. +/// +/// The not-small-order consensus check for Sapling `cv`/`epk` is deferred from +/// deserialization (to keep point decompression off the checkpoint-sync hot +/// path) and re-enforced by `Verifier::call` via +/// `Transaction::sapling_point_encodings_are_valid`, in the early quick checks. +/// This drives the full verifier end-to-end and confirms the rejection: the +/// state service is `unreachable!` because the check fires before any state +/// lookup. It mirrors `v4_with_sapling_outputs_and_no_spends` (which accepts the +/// same transaction shape) with only the ephemeral key corrupted. +#[test] +fn sapling_output_with_invalid_ephemeral_key_is_rejected() { + let _init_guard = zebra_test::init(); + zebra_test::MULTI_THREADED_RUNTIME.block_on(async { + let network = Network::Mainnet; + + let (height, mut transaction) = test_transactions(&network) + .rev() + .filter(|(_, transaction)| { + !transaction.is_coinbase() && transaction.inputs().is_empty() + }) + .find(|(_, transaction)| { + transaction.sapling_spends_per_anchor().next().is_none() + && transaction.sapling_outputs().next().is_some() + }) + .expect("a transaction with Sapling outputs and no Sapling spends"); + + // Corrupt the first Sapling output's ephemeral key to an off-curve point. + corrupt_first_sapling_output_ephemeral_key( + Arc::get_mut(&mut transaction).expect("transaction only has one active reference"), + ); + + // The state service must not be reached: the check fires before any + // state lookup. + let state_service = + service_fn(|_| async { unreachable!("State service should not be called") }); + let verifier = Verifier::new_for_tests(&network, state_service); + + let result = verifier + .oneshot(Request::Block { + transaction_hash: transaction.hash(), + transaction, + known_utxos: Arc::new(HashMap::new()), + known_outpoint_hashes: Arc::new(HashSet::new()), + height, + time: DateTime::::MAX_UTC, + }) + .await; + + assert_eq!( + result, + Err(TransactionError::SmallOrder), + "a Sapling output with an off-curve ephemeral key must be rejected with SmallOrder", + ); + }); +} + +/// Replaces the first Sapling output's ephemeral key with an off-curve point, +/// for `sapling_output_with_invalid_ephemeral_key_is_rejected`. +fn corrupt_first_sapling_output_ephemeral_key(transaction: &mut Transaction) { + let bad_epk = sapling::keys::EphemeralPublicKey::try_from([0xffu8; 32]) + .expect("deserialization defers point validation, so try_from stores the bytes"); + + match transaction { + Transaction::V4 { + sapling_shielded_data: Some(shielded_data), + .. + } => set_first_sapling_output_ephemeral_key(&mut shielded_data.transfers, bad_epk), + Transaction::V5 { + sapling_shielded_data: Some(shielded_data), + .. + } => set_first_sapling_output_ephemeral_key(&mut shielded_data.transfers, bad_epk), + _ => panic!("expected a V4 or V5 transaction with Sapling data"), + } +} + +fn set_first_sapling_output_ephemeral_key( + transfers: &mut sapling::TransferData, + ephemeral_key: sapling::keys::EphemeralPublicKey, +) { + match transfers { + sapling::TransferData::JustOutputs { outputs } => { + let mut outputs_vec = outputs.as_slice().to_vec(); + outputs_vec[0].ephemeral_key = ephemeral_key; + *outputs = AtLeastOne::from_vec(outputs_vec) + .expect("replacing a field keeps at least one output"); + } + sapling::TransferData::SpendsAndMaybeOutputs { maybe_outputs, .. } => { + maybe_outputs[0].ephemeral_key = ephemeral_key; + } + } +} + /// Test if a V5 transaction with Sapling spends is accepted by the verifier. #[tokio::test] async fn v5_with_sapling_spends() { diff --git a/zebra-network/src/peer/connection.rs b/zebra-network/src/peer/connection.rs index 07c1b10e3b7..f874a1ecde8 100644 --- a/zebra-network/src/peer/connection.rs +++ b/zebra-network/src/peer/connection.rs @@ -1057,7 +1057,15 @@ where .map(|()| Handler::Ping { nonce, ping_sent_at }) } - (AwaitingRequest, BlocksByHash(hashes) | BlocksByHashFrom { hashes, .. }) => { + // `HedgedBlocksByHash` is rewritten to `BlocksByHash` by the peer set + // and should not reach an individual connection, but handle it + // identically as a defensive fallback. + ( + AwaitingRequest, + BlocksByHash(hashes) + | BlocksByHashFrom { hashes, .. } + | HedgedBlocksByHash { hashes, .. }, + ) => { self .peer_tx .send(Message::GetData( diff --git a/zebra-network/src/peer_set/set.rs b/zebra-network/src/peer_set/set.rs index d172cc679eb..28582698fe2 100644 --- a/zebra-network/src/peer_set/set.rs +++ b/zebra-network/src/peer_set/set.rs @@ -125,7 +125,7 @@ use tower::{ Service, }; -use zebra_chain::{chain_tip::ChainTip, parameters::Network}; +use zebra_chain::{block, chain_tip::ChainTip, parameters::Network}; use crate::{ address_book::AddressMetrics, @@ -1135,6 +1135,88 @@ where .boxed() } + /// Routes a single-block download to up to `fanout` random ready peers, + /// ignoring inventory markers, and resolves with the first peer that + /// delivers the block. + /// + /// # Security + /// + /// Peers are chosen randomly and load is ignored, matching the broadcast + /// path: this prevents a peer from biasing selection by manipulating its own + /// load. Inventory markers are deliberately ignored — the caller uses this + /// only for the head-of-line block after a registry-miss, where the markers + /// are stale and `route_inv` would otherwise fail the request even though + /// ready peers actually have the block. The fanout is small and scoped to a + /// single hash, bounding the extra load this places on the peer set. + /// + /// If no ready peers have the block (or there are none), returns the same + /// synthetic [`NotFoundRegistry`](PeerError::NotFoundRegistry) error as + /// [`route_inv`](Self::route_inv), so the caller's head-of-line retry and + /// backoff handling applies unchanged. + fn route_hedge( + &mut self, + hashes: HashSet, + fanout: usize, + ) -> >::Future { + let inv_hash = InventoryHash::from( + *hashes + .iter() + .next() + .expect("hedged block requests contain exactly one hash"), + ); + + if self.ready_services.is_empty() { + metrics::counter!("pool.route_hedge.no_ready.count").increment(1); + return async move { + // Let other tasks run, so a retry might find different ready peers. + tokio::task::yield_now().await; + Err::( + SharedPeerError::from(PeerError::NotFoundRegistry(vec![inv_hash])).into(), + ) + } + .boxed(); + } + + let fanout = fanout.clamp(1, self.ready_services.len()); + let selected_peers = self.select_random_ready_peers(fanout); + metrics::counter!("pool.route_hedge.dispatch.count").increment(1); + + // Send a plain `BlocksByHash` to each selected peer; peers and + // connections never see the hedged variant. + let inner = Request::BlocksByHash(hashes); + let mut futs = FuturesUnordered::new(); + for key in selected_peers { + let mut svc = self + .take_ready_service(&key) + .expect("selected peers are ready"); + futs.push(svc.call(inner.clone())); + self.push_unready(key, svc); + } + + async move { + // Take the first peer that actually delivers an available block. + // Peers that are genuinely missing it (or error) are skipped; the + // remaining in-flight calls are cancelled when `futs` drops on return. + while let Some(result) = futs.next().await { + if let Ok(Response::Blocks(blocks)) = result { + if blocks.iter().any(|block| block.available().is_some()) { + metrics::counter!("pool.route_hedge.win.count").increment(1); + return Ok(Response::Blocks(blocks)); + } + } + } + + // Every hedged peer was missing the block (or errored). Surface the + // same synthetic registry-miss `route_inv` would, so the sync layer's + // head-of-line retry/backoff handling applies. + metrics::counter!("pool.route_hedge.exhausted.count").increment(1); + Err::( + SharedPeerError::from(PeerError::NotFoundRegistry(vec![inv_hash])).into(), + ) + } + .boxed() + } + /// Broadcasts the same request to lots of ready peers, ignoring return values. fn route_broadcast(&mut self, req: Request) -> >::Future { // Broadcasts ignore the response @@ -1433,6 +1515,12 @@ where self.route_inv(req, hash) } + // Head-of-line hedge: fan a single-block download out to a few random + // ready peers, ignoring inventory markers, and take the first delivery. + Request::HedgedBlocksByHash { ref hashes, fanout } if hashes.len() == 1 => { + self.route_hedge(hashes.clone(), fanout) + } + // Broadcast advertisements to lots of peers Request::AdvertiseTransactionIds(_, _) => self.route_broadcast(req), Request::AdvertiseBlock(_, _) => self.route_broadcast(req), diff --git a/zebra-network/src/peer_set/set/tests/vectors.rs b/zebra-network/src/peer_set/set/tests/vectors.rs index 72fc7c6e756..0304c92c506 100644 --- a/zebra-network/src/peer_set/set/tests/vectors.rs +++ b/zebra-network/src/peer_set/set/tests/vectors.rs @@ -695,3 +695,81 @@ fn peer_set_route_inv_all_missing_fail() { ); }); } + +/// Check that a hedged block request still reaches ready peers that are all marked +/// missing the inventory, where a plain `BlocksByHash` would fail with a synthetic +/// `NotFoundRegistry`. This is the stale-inventory-marker bypass the head-of-line +/// hedge relies on. +#[test] +fn peer_set_route_hedge_bypasses_missing_markers() { + let test_hash = block::Hash([0; 32]); + let test_inv = InventoryHash::Block(test_hash); + + // Hard-coded fixed test addresses created by mock_peer_discovery. + let peer_addrs: [PeerSocketAddr; 2] = [ + "127.0.0.1:1".parse().expect("valid peer address"), + "127.0.0.1:2".parse().expect("valid peer address"), + ]; + + // Use two peers with the same version. + let peer_version = Version::min_specified_for_upgrade(&Network::Mainnet, NetworkUpgrade::Nu6); + let peer_versions = PeerVersions { + peer_versions: vec![peer_version, peer_version], + }; + + let (runtime, _init_guard) = zebra_test::init_async(); + let _guard = runtime.enter(); + + // CORRECTNESS: This test does not depend on external resources that could really timeout. + tokio::time::pause(); + + let (discovered_peers, mut handles) = peer_versions.mock_peer_discovery(); + let (minimum_peer_version, _best_tip_height) = + MinimumPeerVersion::with_mock_chain_tip(&Network::Mainnet); + + assert_eq!(handles.len(), 2); + + runtime.block_on(async move { + let (mut peer_set, mut peer_set_guard) = PeerSetBuilder::new() + .with_discover(discovered_peers) + .with_minimum_peer_version(minimum_peer_version.clone()) + .max_conns_per_ip(max(2, DEFAULT_MAX_CONNS_PER_IP)) + .build(); + + // Mark the inventory as missing for both peers, so plain inventory routing + // would fail the request. + for addr in peer_addrs { + peer_set_guard + .inventory_sender() + .as_mut() + .expect("unexpected missing inv sender") + .send(InventoryStatus::new_missing(test_inv, addr)) + .expect("unexpected dropped receiver"); + } + + let peer_ready = peer_set + .ready() + .await + .expect("peer set service is always ready"); + + assert_eq!(peer_ready.ready_services.len(), 2); + + // Hedge the request to both ready peers, ignoring the missing markers. + let hedged_request = Request::HedgedBlocksByHash { + hashes: iter::once(test_hash).collect(), + fanout: 2, + }; + let _fut = peer_ready.call(hedged_request); + + // Both missing-marked peers must receive the rewritten plain `BlocksByHash` + // request, proving the hedge bypassed the stale markers (`route_inv` would + // have dispatched to neither — see `peer_set_route_inv_all_missing_fail`). + let expected = Request::BlocksByHash(iter::once(test_hash).collect()); + for handle in handles.iter_mut() { + match handle.try_to_receive_outbound_client_request().request() { + Some(ClientRequest { request, .. }) => assert_eq!(request, expected), + None => panic!("hedged request was not routed to a ready (missing-marked) peer"), + } + } + }); +} diff --git a/zebra-network/src/protocol/internal/request.rs b/zebra-network/src/protocol/internal/request.rs index 96ff09ba283..ba0c012ddcc 100644 --- a/zebra-network/src/protocol/internal/request.rs +++ b/zebra-network/src/protocol/internal/request.rs @@ -103,6 +103,30 @@ pub enum Request { source: PeerSource, }, + /// Hedged single-block download. + /// + /// Fans the request out to up to `fanout` random ready peers, *ignoring + /// inventory markers*, and returns the first peer that delivers the block. + /// + /// This is used only for the head-of-line block after a registry-miss, to + /// bypass stale "missing" inventory markers: the peers usually do have the + /// block, only the local marker is stale. The peer set rewrites this to a + /// per-peer [`Request::BlocksByHash`], so peers and connections see a normal + /// request and no wire/connection changes are needed. + /// + /// The set must contain exactly one hash. A small `fanout` keeps this + /// DoS-bounded; it is clamped to the number of ready peers. + /// + /// # Returns + /// + /// Returns [`Response::Blocks`](super::Response::Blocks). + HedgedBlocksByHash { + /// Requested block hashes (exactly one). + hashes: HashSet, + /// Maximum number of ready peers to fan the request out to. + fanout: usize, + }, + /// Request transactions by their unmined transaction ID. /// /// v4 transactions use a legacy transaction ID, and @@ -270,6 +294,9 @@ impl fmt::Display for Request { Request::BlocksByHashFrom { hashes, .. } => { format!("BlocksByHashFrom({})", hashes.len()) } + Request::HedgedBlocksByHash { hashes, fanout } => { + format!("HedgedBlocksByHash({}, fanout: {fanout})", hashes.len()) + } Request::TransactionsById(ids) => format!("TransactionsById({})", ids.len()), Request::TransactionsByIdFrom { ids, .. } => { format!("TransactionsByIdFrom({})", ids.len()) @@ -305,7 +332,9 @@ impl Request { Request::Peers => "Peers", Request::Ping(_) => "Ping", - Request::BlocksByHash(_) | Request::BlocksByHashFrom { .. } => "BlocksByHash", + Request::BlocksByHash(_) + | Request::BlocksByHashFrom { .. } + | Request::HedgedBlocksByHash { .. } => "BlocksByHash", Request::TransactionsById(_) | Request::TransactionsByIdFrom { .. } => { "TransactionsById" } @@ -327,6 +356,7 @@ impl Request { self, Request::BlocksByHash(_) | Request::BlocksByHashFrom { .. } + | Request::HedgedBlocksByHash { .. } | Request::TransactionsById(_) | Request::TransactionsByIdFrom { .. } ) @@ -339,6 +369,10 @@ impl Request { | Request::BlocksByHashFrom { hashes: block_hashes, .. + } + | Request::HedgedBlocksByHash { + hashes: block_hashes, + .. } => block_hashes.clone(), _ => HashSet::new(), } diff --git a/zebra-network/src/zakura/discovery/service.rs b/zebra-network/src/zakura/discovery/service.rs index 99acdbf65f3..d0bc830e9e6 100644 --- a/zebra-network/src/zakura/discovery/service.rs +++ b/zebra-network/src/zakura/discovery/service.rs @@ -1047,6 +1047,7 @@ mod tests { msg: HeaderSyncMessage::Headers { headers: Vec::new(), body_sizes: Vec::new(), + tree_aux_roots: Vec::new(), }, }) .await?; diff --git a/zebra-network/src/zakura/handler.rs b/zebra-network/src/zakura/handler.rs index c2de9e5d0f1..5e4b000cb4c 100644 --- a/zebra-network/src/zakura/handler.rs +++ b/zebra-network/src/zakura/handler.rs @@ -175,7 +175,7 @@ const _: () = assert!(LEGACY_REQUEST_STREAM_KIND == super::legacy_gossip::ZAKURA_STREAM_LEGACY_REQUESTS); const _: () = assert!(DISCOVERY_STREAM_KIND == super::discovery::ZAKURA_STREAM_DISCOVERY); const _: () = assert!(HEADER_SYNC_STREAM_KIND == super::header_sync::ZAKURA_STREAM_HEADER_SYNC); -const _: () = assert!(ZAKURA_STREAM_VERSION_2 == ZAKURA_HEADER_SYNC_STREAM_VERSION); +const _: () = assert!(ZAKURA_STREAM_VERSION_4 == ZAKURA_HEADER_SYNC_STREAM_VERSION); const _: () = assert!(LEGACY_REQUEST_BLOCKS_BY_HASH == super::legacy_gossip::MSG_REQUEST_BLOCKS_BY_HASH); const _: () = assert!( @@ -3376,7 +3376,12 @@ async fn write_outbound_request_frame_inner( flags: u16, payload: Vec, ) -> Result, OutboundRequestError> { - let budget = LegacyResponseBudget::from_request(message_type, &payload, limits)?; + // The legacy request stream validates responses with a legacy-message-specific budget. + let mut legacy_state = LegacyResponseReadState::new(LegacyResponseBudget::from_request( + message_type, + &payload, + limits, + )?); let (mut send, mut recv) = timeout(OUTBOUND_STREAM_WRITE_TIMEOUT, connection.open_bi()) .await .map_err(|_| -> BoxError { "Zakura outbound request stream open timed out".into() }) @@ -3413,11 +3418,10 @@ async fn write_outbound_request_frame_inner( let _ = send.finish(); let mut frames = Vec::new(); - let mut state = LegacyResponseReadState::new(budget); loop { match read_frame( &mut recv, - app_frame_cap_for_stream_kind(&limits, stream_kind), + inbound_frame_cap_for_stream_kind(&limits, stream_kind), limits.idle_timeout, // This is the requester side of a one-shot legacy request/response: // the responder streams its frames promptly, so a silent gap before @@ -3427,11 +3431,11 @@ async fn write_outbound_request_frame_inner( .await { Ok(frame) => { - state.validate_frame(request_id, &frame)?; + legacy_state.validate_frame(request_id, &frame)?; frames.push(frame); } Err(ZakuraHandlerError::Closed) => { - state.finish()?; + legacy_state.finish()?; return Ok(frames); } Err(ZakuraHandlerError::Timeout(_)) => { @@ -4033,18 +4037,15 @@ fn app_frame_cap_for_stream_kind(limits: &ZakuraConnectionLimits, stream_kind: u .max(1) } -/// Frame cap for reading on an admitted inbound stream, never larger than the +/// Frame cap for reading frames received from a peer, never larger than the /// message cap allows. /// -/// On an admitted ordered/request stream a frame payload *is* the message, so -/// `admit_inbound_message` rejects any payload over `max_message_bytes`. A peer -/// can negotiate `max_frame_bytes > max_message_bytes` (the two caps are clamped -/// independently in `ZakuraLocalLimits::clamp`), so the cap handed to -/// `read_frame` must also be limited to the message size. Otherwise a frame whose -/// `payload_len` falls between the two limits is allocated and read in full by -/// `read_frame` before `admit_inbound_message` rejects it as oversize, letting a -/// peer force per-frame allocation/I/O up to the larger frame cap across many -/// streams. +/// On ordered/request streams and requester-side responses, a frame payload *is* +/// the message. A peer can negotiate `max_frame_bytes > max_message_bytes` (the +/// two caps are clamped independently in `ZakuraLocalLimits::clamp`), so the cap +/// handed to `read_frame` must also be limited to the message size. Otherwise a +/// frame whose `payload_len` falls between the two limits is allocated and read +/// in full before the later message-level validation rejects or decodes it. fn inbound_frame_cap_for_stream_kind(limits: &ZakuraConnectionLimits, stream_kind: u16) -> u32 { let frame_header_bytes = u32::try_from(FRAME_HEADER_BYTES).expect("frame header byte count fits in u32"); @@ -4074,7 +4075,7 @@ fn should_run_freshness_reaper( /// The only stream-kind version this v1 handler serves. Every known kind is /// at version 1; a peer naming any other version of a known kind is rejected. const ZAKURA_STREAM_VERSION_1: u16 = 1; -const ZAKURA_STREAM_VERSION_2: u16 = 2; +const ZAKURA_STREAM_VERSION_4: u16 = 4; /// Returns whether the handler can serve a stream with this kind and version. /// @@ -4873,6 +4874,7 @@ mod tests { let get_headers_frame = HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: false, } .encode_frame()?; @@ -5100,6 +5102,7 @@ mod tests { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: false, }, }) .await?; @@ -6431,7 +6434,7 @@ mod tests { }, Stream { kind: HEADER_SYNC_STREAM_KIND, - version: ZAKURA_STREAM_VERSION_2, + version: ZAKURA_STREAM_VERSION_4, frame_cap: 1024, capability: ZAKURA_CAP_HEADER_SYNC, mode: StreamMode::Ordered, @@ -6451,7 +6454,7 @@ mod tests { (LEGACY_GOSSIP_STREAM_KIND, ZAKURA_STREAM_VERSION_1), (LEGACY_REQUEST_STREAM_KIND, ZAKURA_STREAM_VERSION_1), (DISCOVERY_STREAM_KIND, ZAKURA_STREAM_VERSION_1), - (HEADER_SYNC_STREAM_KIND, ZAKURA_STREAM_VERSION_2), + (HEADER_SYNC_STREAM_KIND, ZAKURA_STREAM_VERSION_4), (ZAKURA_STREAM_BLOCK_SYNC, ZAKURA_STREAM_VERSION_1), ] { assert!( diff --git a/zebra-network/src/zakura/header_sync/config.rs b/zebra-network/src/zakura/header_sync/config.rs index 0dc545584f2..dd42577587c 100644 --- a/zebra-network/src/zakura/header_sync/config.rs +++ b/zebra-network/src/zakura/header_sync/config.rs @@ -131,16 +131,26 @@ pub fn header_sync_header_bytes_for_network(network: &Network) -> usize { } /// Maximum `Headers` count that fits both the stream-5 payload cap and the app frame cap. -pub fn header_sync_count_by_byte_budget(network: &Network, max_frame_bytes: u32) -> u32 { +pub fn header_sync_count_by_byte_budget( + network: &Network, + max_frame_bytes: u32, + want_tree_aux_roots: bool, +) -> u32 { let frame_payload_cap = usize::try_from(max_frame_bytes) .unwrap_or(usize::MAX) .saturating_sub(FRAME_HEADER_BYTES); let payload_cap = MAX_HS_MESSAGE_BYTES.min(frame_payload_cap); - let header_bytes = - header_sync_header_bytes_for_network(network).saturating_add(HEADER_SYNC_BODY_SIZE_BYTES); - let count = payload_cap - .saturating_sub(HEADER_SYNC_MESSAGE_TYPE_BYTES + HEADER_SYNC_COUNT_BYTES) - / header_bytes; + let root_bytes = if want_tree_aux_roots { + HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES + } else { + 0 + }; + let header_bytes = header_sync_header_bytes_for_network(network) + .saturating_add(HEADER_SYNC_BODY_SIZE_BYTES) + .saturating_add(root_bytes); + let count = payload_cap.saturating_sub( + HEADER_SYNC_MESSAGE_TYPE_BYTES + HEADER_SYNC_COUNT_BYTES + HEADER_SYNC_HAS_ROOTS_BYTES, + ) / header_bytes; u32::try_from(count) .unwrap_or(u32::MAX) @@ -153,11 +163,16 @@ pub fn clamp_header_sync_request_count( peer_max_headers_per_response: u32, network: &Network, max_frame_bytes: u32, + want_tree_aux_roots: bool, ) -> u32 { desired_count .min(clamp_advertised_range(peer_max_headers_per_response)) .min(MAX_HS_RANGE) - .min(header_sync_count_by_byte_budget(network, max_frame_bytes)) + .min(header_sync_count_by_byte_budget( + network, + max_frame_bytes, + want_tree_aux_roots, + )) .max(1) } @@ -166,23 +181,43 @@ pub fn inbound_get_headers_count_limit( config: &ZakuraHeaderSyncConfig, network: &Network, max_frame_bytes: u32, + want_tree_aux_roots: bool, ) -> u32 { clamp_header_sync_request_count( u32::MAX, config.advertised_max_headers_per_response(), network, max_frame_bytes, + want_tree_aux_roots, ) } /// Truncate a served header run so the encoded `Headers` response fits the byte budgets. +/// +/// All three parallel vectors (`headers`, `body_sizes`, `tree_aux_roots`) are truncated +/// to the same length so the [`HeaderSyncMessage::Headers`] invariant is preserved. pub fn truncate_headers_to_byte_budget( mut headers: Vec>, + mut body_sizes: Vec, + mut tree_aux_roots: Vec, network: &Network, max_frame_bytes: u32, -) -> Vec> { - let max_count = usize::try_from(header_sync_count_by_byte_budget(network, max_frame_bytes)) - .expect("header-sync byte-budget count fits in usize"); +) -> (Vec>, Vec, Vec) { + if headers.len() != tree_aux_roots.len() { + headers.clear(); + body_sizes.clear(); + tree_aux_roots.clear(); + return (headers, body_sizes, tree_aux_roots); + } + + let max_count = usize::try_from(header_sync_count_by_byte_budget( + network, + max_frame_bytes, + true, + )) + .expect("header-sync byte-budget count fits in usize"); headers.truncate(max_count); - headers + body_sizes.truncate(max_count); + tree_aux_roots.truncate(max_count); + (headers, body_sizes, tree_aux_roots) } diff --git a/zebra-network/src/zakura/header_sync/error.rs b/zebra-network/src/zakura/header_sync/error.rs index f8bbd545d36..dbaabf5461e 100644 --- a/zebra-network/src/zakura/header_sync/error.rs +++ b/zebra-network/src/zakura/header_sync/error.rs @@ -45,6 +45,37 @@ pub enum HeaderSyncWireError { body_sizes: usize, }, + /// A locally constructed or inbound `Headers` message did not carry exactly one root per header. + #[error("Zakura header-sync Headers tree-aux root count {roots} does not match header count {headers}")] + TreeAuxRootCountMismatch { + /// Header count. + headers: usize, + /// Tree-aux root count. + roots: usize, + }, + + /// An inbound `Headers` response carried a root for the wrong height. + #[error("Zakura header-sync Headers tree-aux root height {root_height:?} does not match expected height {expected_height:?}")] + TreeAuxRootHeightMismatch { + /// Expected root height. + expected_height: block::Height, + /// Actual root height. + root_height: block::Height, + }, + + /// A boolean marker field used a value other than 0 or 1. + #[error("Zakura header-sync {field} marker has invalid value {value}")] + InvalidBoolMarker { + /// Marker field name. + field: &'static str, + /// Invalid marker value. + value: u8, + }, + + /// A peer returned tree-aux roots for a request that opted out. + #[error("Zakura header-sync Headers included tree-aux roots for an opt-out request")] + UnrequestedTreeAuxRoots, + /// An inbound `Headers` response did not match an in-flight request. #[error("unsolicited Zakura header-sync Headers response")] UnsolicitedHeaders, diff --git a/zebra-network/src/zakura/header_sync/events.rs b/zebra-network/src/zakura/header_sync/events.rs index 501cc06b0a0..f084512d62d 100644 --- a/zebra-network/src/zakura/header_sync/events.rs +++ b/zebra-network/src/zakura/header_sync/events.rs @@ -256,10 +256,14 @@ pub enum HeaderSyncEvent { start_height: block::Height, /// Requested header count. requested_count: u32, + /// Whether the original request wanted all-or-nothing tree-aux roots. + want_tree_aux_roots: bool, /// Bounded headers returned by state. headers: Vec>, /// Advisory serialized body sizes, parallel to `headers`. body_sizes: Vec, + /// Per-height commitment roots, parallel to `headers`. + tree_aux_roots: Vec, }, } @@ -286,6 +290,8 @@ pub enum HeaderSyncAction { headers: Vec>, /// Advisory serialized body sizes, parallel to `headers`. body_sizes: Vec, + /// Per-height commitment roots, parallel to `headers`. + tree_aux_roots: Vec, /// Whether the range is expected to be finalized by checkpoint policy. finalized: bool, }, @@ -299,6 +305,8 @@ pub enum HeaderSyncAction { start: block::Height, /// Maximum count. count: u32, + /// Whether the requester wants all-or-nothing tree-aux roots. + want_tree_aux_roots: bool, }, /// Ask state for missing block-body gaps. QueryMissingBlockBodies { @@ -407,15 +415,22 @@ pub struct ExpectedHeadersResponse { pub start_height: block::Height, /// Requested header count. pub count: u32, + /// Whether this request asked the peer to include all-or-nothing roots. + pub want_tree_aux_roots: bool, } impl ExpectedHeadersResponse { /// Create a bounded expected response. - pub fn new(start_height: block::Height, count: u32) -> Result { + pub fn new( + start_height: block::Height, + count: u32, + want_tree_aux_roots: bool, + ) -> Result { validate_get_headers_count(count)?; Ok(Self { start_height, count, + want_tree_aux_roots, }) } } diff --git a/zebra-network/src/zakura/header_sync/mod.rs b/zebra-network/src/zakura/header_sync/mod.rs index 200844b8ee2..556b1496f3d 100644 --- a/zebra-network/src/zakura/header_sync/mod.rs +++ b/zebra-network/src/zakura/header_sync/mod.rs @@ -21,6 +21,7 @@ use tokio::{ use tokio_util::sync::CancellationToken; use zebra_chain::{ block::{self, BlockTimeError}, + parallel::commitment_aux::BlockCommitmentRoots, parameters::Network, serialization::{SerializationError, ZcashDeserialize, ZcashSerialize}, work::{difficulty::CompactDifficulty, difficulty::ExpandedDifficulty, equihash}, diff --git a/zebra-network/src/zakura/header_sync/pipe.rs b/zebra-network/src/zakura/header_sync/pipe.rs index 10d2df32ad5..e4777d2a18d 100644 --- a/zebra-network/src/zakura/header_sync/pipe.rs +++ b/zebra-network/src/zakura/header_sync/pipe.rs @@ -466,7 +466,8 @@ mod tests { #[test] fn deliver_correlated_headers_decodes_against_expectation() { let (handle, mut events) = test_handle(); - let expected = ExpectedHeadersResponse::new(block::Height(1), 1).expect("count is valid"); + let expected = + ExpectedHeadersResponse::new(block::Height(1), 1, true).expect("count is valid"); let flow = deliver(&handle, Some(expected), peer(), headers_frame(Vec::new())); @@ -488,8 +489,10 @@ mod tests { let (commands_tx, commands_rx) = mpsc::unbounded_channel(); let mut local = HsLocal::new(commands_rx, DEFAULT_HS_INBOUND_NEW_BLOCK_MIN_INTERVAL); - let first = ExpectedHeadersResponse::new(block::Height(1), 1).expect("count is valid"); - let second = ExpectedHeadersResponse::new(block::Height(2), 2).expect("count is valid"); + let first = + ExpectedHeadersResponse::new(block::Height(1), 1, false).expect("count is valid"); + let second = + ExpectedHeadersResponse::new(block::Height(2), 2, false).expect("count is valid"); commands_tx .send(HeaderSyncPeerCommand::RecordExpectedHeaders(first)) .expect("pipe is alive"); @@ -576,7 +579,7 @@ mod tests { /// timeout and desynchronizing the peer-local FIFO from the outstanding range. #[test] fn saturated_events_queue_restores_solicited_expectation() { - use zebra_chain::serialization::ZcashDeserializeInto; + use zebra_chain::{orchard, sapling, serialization::ZcashDeserializeInto}; use zebra_test::vectors::BLOCK_MAINNET_1_BYTES; // Keep `_events_rx` alive so the saturated queue rejects with `Full` @@ -584,7 +587,8 @@ mod tests { let (handle, _events_rx) = saturated_events_handle(); let (commands_tx, commands_rx) = mpsc::unbounded_channel(); - let expected = ExpectedHeadersResponse::new(block::Height(1), 1).expect("count is valid"); + let expected = + ExpectedHeadersResponse::new(block::Height(1), 1, true).expect("count is valid"); commands_tx .send(HeaderSyncPeerCommand::RecordExpectedHeaders(expected)) .expect("pipe is alive"); @@ -600,6 +604,11 @@ mod tests { let solicited_headers = HeaderSyncMessage::Headers { headers: vec![block_one.header.clone()], body_sizes: vec![0], + tree_aux_roots: vec![BlockCommitmentRoots { + height: block::Height(1), + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }], } .encode_frame() .expect("headers frame encodes"); @@ -615,10 +624,27 @@ mod tests { // Drain the recorded expectation into `HsLocal`, mirroring `run_peer`'s // pre-frame command drain so the `Headers` frame is correlated. pipe.local_mut().drain_ready_commands(); + assert_eq!( + pipe.local_mut().pop_expected_headers_response(), + Some(expected), + "the solicited response expectation should be available after draining commands" + ); + pipe.local_mut().restore_expected_headers(expected); + HeaderSyncMessage::decode_frame( + solicited_headers.clone(), + HeaderSyncDecodeContext::for_headers_response(expected, expected.count), + ) + .expect("test Headers frame decodes against its expectation"); // The decoded response cannot be delivered (events queue is full); the // pipe logs and continues, exactly as production does. - assert!(matches!(pipe.run_one(solicited_headers), Flow::Done)); + let flow = pipe.run_one(solicited_headers); + match flow { + Flow::Done => {} + Flow::Continue(()) => panic!("unexpected successful forward"), + Flow::Reject(SinkReject::Protocol(_)) => panic!("unexpected protocol reject"), + Flow::Reject(SinkReject::Local(_)) => panic!("unexpected local reject"), + } // The popped expectation must be restored so the still-outstanding range // stays correlated. Without the fix the expectation is gone (returns None). diff --git a/zebra-network/src/zakura/header_sync/reactor.rs b/zebra-network/src/zakura/header_sync/reactor.rs index 54653a9206b..7b1baab2a47 100644 --- a/zebra-network/src/zakura/header_sync/reactor.rs +++ b/zebra-network/src/zakura/header_sync/reactor.rs @@ -193,14 +193,18 @@ impl HeaderSyncReactor { peer, start_height, requested_count, + want_tree_aux_roots, headers, body_sizes, + tree_aux_roots, } => self.handle_header_range_response_ready( peer, start_height, requested_count, + want_tree_aux_roots, headers, body_sizes, + tree_aux_roots, ), } } @@ -623,19 +627,29 @@ impl HeaderSyncReactor { requested_count: u32, returned_count: u32, ) { - self.trace_headers_served(&peer, start_height, requested_count, returned_count); + self.trace_headers_served( + &peer, + start_height, + requested_count, + returned_count, + false, + 0, + ); if let Some(peer_state) = self.state.peers.get_mut(&peer) { peer_state.finish_serving_headers(); } } + #[allow(clippy::too_many_arguments)] fn handle_header_range_response_ready( &mut self, peer: ZakuraPeerId, start_height: block::Height, requested_count: u32, - headers: Vec>, - body_sizes: Vec, + want_tree_aux_roots: bool, + mut headers: Vec>, + mut body_sizes: Vec, + mut tree_aux_roots: Vec, ) { let Some(peer_state) = self.state.peers.get_mut(&peer) else { return; @@ -644,16 +658,33 @@ impl HeaderSyncReactor { peer_state.finish_serving_headers(); return; } + + let roots_complete = validate_tree_aux_roots_len(headers.len(), tree_aux_roots.len()) + .and_then(|()| validate_tree_aux_root_heights(start_height, &tree_aux_roots)) + .is_ok(); + if !headers.is_empty() && (!want_tree_aux_roots || !roots_complete) { + headers.clear(); + body_sizes.clear(); + tree_aux_roots.clear(); + }; let returned_count = u32::try_from(headers.len()).unwrap_or(u32::MAX); - let send_result = peer_state - .session - .try_send_headers_with_sizes(headers, body_sizes); + let served_tree_aux_roots_len = u32::try_from(tree_aux_roots.len()).unwrap_or(u32::MAX); + let send_result = peer_state.session.try_send_headers_with_sizes_and_roots( + headers, + body_sizes, + tree_aux_roots, + ); peer_state.finish_serving_headers(); match send_result { - Ok(()) => { - self.trace_headers_served(&peer, start_height, requested_count, returned_count) - } + Ok(()) => self.trace_headers_served( + &peer, + start_height, + requested_count, + returned_count, + want_tree_aux_roots, + served_tree_aux_roots_len, + ), Err(error) => { tracing::debug!( ?peer, @@ -707,14 +738,18 @@ impl HeaderSyncReactor { HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, } => { - self.handle_headers(peer, headers, body_sizes).await; + self.handle_headers(peer, headers, body_sizes, tree_aux_roots) + .await; } HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots, } => { - self.handle_get_headers(peer, start_height, count).await; + self.handle_get_headers(peer, start_height, count, want_tree_aux_roots) + .await; } HeaderSyncMessage::NewBlock(block) => { self.handle_new_block(peer, block).await; @@ -743,6 +778,7 @@ impl HeaderSyncReactor { peer: ZakuraPeerId, start_height: block::Height, count: u32, + want_tree_aux_roots: bool, ) { let local_inflight_cap = self.startup.config.advertised_max_inflight_requests(); let Some(peer_state) = self.state.peers.get_mut(&peer) else { @@ -761,6 +797,7 @@ impl HeaderSyncReactor { &self.startup.config, &self.startup.network, self.startup.max_frame_bytes, + want_tree_aux_roots, ); if count == 0 || count > allowed_count { self.report_misbehavior(peer, HeaderSyncMisbehavior::GetHeadersTooLong) @@ -778,6 +815,7 @@ impl HeaderSyncReactor { peer: peer.clone(), start: start_height, count, + want_tree_aux_roots, }) { if let Some(peer_state) = self.state.peers.get_mut(&peer) { peer_state.finish_serving_headers(); @@ -884,6 +922,7 @@ impl HeaderSyncReactor { peer: ZakuraPeerId, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, ) { metrics::counter!("sync.header.response.received").increment(1); let Some(peer_state) = self.state.peers.get_mut(&peer) else { @@ -906,6 +945,7 @@ impl HeaderSyncReactor { peer, headers, body_sizes, + tree_aux_roots, outstanding, peer_max_headers_per_response, in_flight_count, @@ -913,16 +953,27 @@ impl HeaderSyncReactor { .await; } + #[allow(clippy::too_many_arguments)] async fn handle_headers_for_outstanding( &mut self, peer: ZakuraPeerId, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, outstanding: OutstandingRange, peer_max_headers_per_response: u32, in_flight_count: usize, ) { - if validate_body_sizes_len(headers.len(), body_sizes.len()).is_err() { + if validate_body_sizes_len(headers.len(), body_sizes.len()).is_err() + || validate_tree_aux_roots_len(headers.len(), tree_aux_roots.len()).is_err() + { + self.report_misbehavior(peer, HeaderSyncMisbehavior::MalformedMessage) + .await; + self.state.schedule.retry(outstanding.range); + self.schedule().await; + return; + } + if !outstanding.range.want_tree_aux_roots && !tree_aux_roots.is_empty() { self.report_misbehavior(peer, HeaderSyncMisbehavior::MalformedMessage) .await; self.state.schedule.retry(outstanding.range); @@ -940,6 +991,8 @@ impl HeaderSyncReactor { outstanding.expected_max_count, peer_max_headers_per_response, in_flight_count, + outstanding.range.want_tree_aux_roots, + u32::try_from(tree_aux_roots.len()).unwrap_or(u32::MAX), ); if let Some(peer_state) = self.state.peers.get_mut(&peer) { peer_state.outstanding.push(OutstandingRange { @@ -960,6 +1013,8 @@ impl HeaderSyncReactor { outstanding.expected_max_count, peer_max_headers_per_response, in_flight_count, + outstanding.range.want_tree_aux_roots, + u32::try_from(tree_aux_roots.len()).unwrap_or(u32::MAX), ); if header_count > outstanding.expected_max_count || header_count > outstanding.range.count { self.report_misbehavior(peer.clone(), HeaderSyncMisbehavior::ResponseTooLong) @@ -977,6 +1032,7 @@ impl HeaderSyncReactor { ExpectedHeadersResponse::new( outstanding.range.start_height, outstanding.expected_max_count, + outstanding.range.want_tree_aux_roots, ) .expect("outstanding range uses a non-zero bounded count"), outstanding.expected_max_count, @@ -1016,6 +1072,14 @@ impl HeaderSyncReactor { self.schedule().await; return; } + if validate_tree_aux_root_heights(outstanding.range.start_height, &tree_aux_roots).is_err() + { + self.report_misbehavior(peer, HeaderSyncMisbehavior::MalformedMessage) + .await; + self.state.schedule.retry(outstanding.range); + self.schedule().await; + return; + } if let Err(error) = validate_headers_stateless(headers.clone(), validation_context).await { debug!( ?peer, @@ -1078,6 +1142,7 @@ impl HeaderSyncReactor { start_height: outstanding.range.start_height, headers, body_sizes, + tree_aux_roots, finalized: outstanding.range.finalized, }); } @@ -1180,6 +1245,7 @@ impl HeaderSyncReactor { peer.max_headers_per_response, &self.startup.network, self.startup.max_frame_bytes, + range.want_tree_aux_roots, ); if range.finalized && count < range.count { self.state.schedule.retry(range); @@ -1194,7 +1260,11 @@ impl HeaderSyncReactor { let Some(peer) = self.state.peers.get(&peer_id) else { continue; }; - if let Err(error) = peer.session.try_send_get_headers(range.start_height, count) { + if let Err(error) = peer.session.try_send_get_headers( + range.start_height, + count, + range.want_tree_aux_roots, + ) { tracing::debug!( peer = ?peer_id, start_height = ?range.start_height, @@ -1219,7 +1289,7 @@ impl HeaderSyncReactor { self.state.schedule.mark_assigned(peer_id.clone(), range); let destination = peer_id.clone(); metrics::counter!("sync.header.request.sent").increment(1); - self.trace_get_headers_sent(&destination, range.start_height, count, peer_cap); + self.trace_get_headers_sent(&destination, range, count, peer_cap); #[cfg(test)] let _ = self .actions @@ -1228,6 +1298,7 @@ impl HeaderSyncReactor { msg: HeaderSyncMessage::GetHeaders { start_height: range.start_height, count, + want_tree_aux_roots: range.want_tree_aux_roots, }, }) .await; @@ -1549,7 +1620,9 @@ impl HeaderSyncReactor { insert_height(row, hs_trace::HEIGHT, *height); insert_hash(row, hs_trace::HASH, *hash); } - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, start, count, .. + } => { insert_optional_str(row, hs_trace::KIND, Some("query_headers_by_height_range")); insert_peer(row, hs_trace::PEER, peer); insert_height(row, hs_trace::RANGE_START, *start); @@ -1638,18 +1711,33 @@ impl HeaderSyncReactor { fn trace_get_headers_sent( &self, peer: &ZakuraPeerId, - start_height: block::Height, + range: RangeRequest, count: u32, advertised_cap: u32, ) { self.emit_trace(hs_trace::HEADER_GET_HEADERS_SENT, |row| { insert_peer(row, hs_trace::PEER, peer); - insert_height(row, hs_trace::RANGE_START, start_height); + insert_height(row, hs_trace::RANGE_START, range.start_height); insert_u64(row, hs_trace::RANGE_COUNT, u64::from(count)); insert_u64(row, hs_trace::ADVERTISED_CAP, u64::from(advertised_cap)); + insert_bool(row, hs_trace::FINALIZED, range.finalized); + insert_bool( + row, + hs_trace::WANT_TREE_AUX_ROOTS, + range.want_tree_aux_roots, + ); + insert_optional_str(row, hs_trace::RANGE_PRIORITY, Some(range.priority.label())); + insert_height( + row, + hs_trace::VERIFIED_BLOCK_TIP, + self.state.verified_block_tip, + ); + insert_height(row, hs_trace::FINALIZED_HEIGHT, self.state.finalized_height); + insert_height(row, hs_trace::BEST_HEADER_TIP, self.state.best_header_tip); }); } + #[allow(clippy::too_many_arguments)] fn trace_headers_received( &self, peer: &ZakuraPeerId, @@ -1658,6 +1746,8 @@ impl HeaderSyncReactor { expected_max_count: u32, advertised_cap: u32, in_flight_count: usize, + want_tree_aux_roots: bool, + tree_aux_roots_len: u32, ) { self.emit_trace(hs_trace::HEADER_HEADERS_RECEIVED, |row| { insert_peer(row, hs_trace::PEER, peer); @@ -1666,6 +1756,12 @@ impl HeaderSyncReactor { insert_u64(row, hs_trace::ADVERTISED_CAP, u64::from(advertised_cap)); insert_u64(row, hs_trace::EXPECTED_COUNT, u64::from(expected_max_count)); insert_u64(row, hs_trace::IN_FLIGHT_COUNT, in_flight_count as u64); + insert_bool(row, hs_trace::WANT_TREE_AUX_ROOTS, want_tree_aux_roots); + insert_u64( + row, + hs_trace::TREE_AUX_ROOTS_LEN, + u64::from(tree_aux_roots_len), + ); }); } @@ -1675,12 +1771,20 @@ impl HeaderSyncReactor { start_height: block::Height, requested_count: u32, returned_count: u32, + want_tree_aux_roots: bool, + tree_aux_roots_len: u32, ) { self.emit_trace(hs_trace::HEADER_HEADERS_SERVED, |row| { insert_peer(row, hs_trace::PEER, peer); insert_height(row, hs_trace::RANGE_START, start_height); insert_u64(row, hs_trace::RANGE_COUNT, u64::from(returned_count)); insert_u64(row, hs_trace::EXPECTED_COUNT, u64::from(requested_count)); + insert_bool(row, hs_trace::WANT_TREE_AUX_ROOTS, want_tree_aux_roots); + insert_u64( + row, + hs_trace::TREE_AUX_ROOTS_LEN, + u64::from(tree_aux_roots_len), + ); }); } @@ -1885,6 +1989,10 @@ fn header_sync_wire_error_kind(error: &HeaderSyncWireError) -> &'static str { HeaderSyncWireError::OversizedPayload { .. } => "oversized_payload", HeaderSyncWireError::HeaderCountLimit { .. } => "header_count_limit", HeaderSyncWireError::BodySizeCountMismatch { .. } => "body_size_count_mismatch", + HeaderSyncWireError::TreeAuxRootCountMismatch { .. } => "tree_aux_root_count_mismatch", + HeaderSyncWireError::TreeAuxRootHeightMismatch { .. } => "tree_aux_root_height_mismatch", + HeaderSyncWireError::InvalidBoolMarker { .. } => "invalid_bool_marker", + HeaderSyncWireError::UnrequestedTreeAuxRoots => "unrequested_tree_aux_roots", HeaderSyncWireError::UnsolicitedHeaders => "unsolicited_headers", HeaderSyncWireError::ZeroHeaderRequestCount => "zero_header_request_count", HeaderSyncWireError::HeightOutOfRange(_) => "height_out_of_range", @@ -1951,6 +2059,7 @@ fn trace_header_sync_message_fields( HeaderSyncMessage::GetHeaders { start_height, count, + .. } => { insert_height(row, hs_trace::RANGE_START, *start_height); insert_u64(row, hs_trace::RANGE_COUNT, u64::from(*count)); diff --git a/zebra-network/src/zakura/header_sync/service.rs b/zebra-network/src/zakura/header_sync/service.rs index 7e7126583ea..5db8886baeb 100644 --- a/zebra-network/src/zakura/header_sync/service.rs +++ b/zebra-network/src/zakura/header_sync/service.rs @@ -140,13 +140,15 @@ impl HeaderSyncPeerSession { &self, start_height: block::Height, count: u32, + want_tree_aux_roots: bool, ) -> Result<(), OrderedSendError> { - let expected = ExpectedHeadersResponse::new(start_height, count) + let expected = ExpectedHeadersResponse::new(start_height, count, want_tree_aux_roots) .map_err(|error| OrderedSendError::Encode(Box::new(error)))?; if let Some(commands) = &self.inner.commands { self.try_send_message(HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots, })?; return commands .send(HeaderSyncPeerCommand::RecordExpectedHeaders(expected)) @@ -156,6 +158,7 @@ impl HeaderSyncPeerSession { self.try_send_message(HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots, }) } @@ -165,18 +168,22 @@ impl HeaderSyncPeerSession { headers: Vec>, ) -> Result<(), OrderedSendError> { let body_sizes = vec![0; headers.len()]; - self.try_send_headers_with_sizes(headers, body_sizes) + let tree_aux_roots = Vec::new(); + self.try_send_headers_with_sizes_and_roots(headers, body_sizes, tree_aux_roots) } - /// Send a typed header range response with one advisory body-size hint per header. - pub fn try_send_headers_with_sizes( + /// Send a typed header range response with one advisory body-size hint and + /// tree-aux root payload per header. + pub fn try_send_headers_with_sizes_and_roots( &self, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, ) -> Result<(), OrderedSendError> { self.try_send_message(HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, }) } @@ -238,7 +245,9 @@ pub(crate) async fn drive_header_sync_actions( "Zakura header-sync NewBlock body arrived before block-acceptance hook is wired" ); } - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, start, count, .. + } => { let _ = handle .send(HeaderSyncEvent::HeaderRangeResponseFinished { peer, diff --git a/zebra-network/src/zakura/header_sync/state.rs b/zebra-network/src/zakura/header_sync/state.rs index 7328cf312b3..d1b6b55ee84 100644 --- a/zebra-network/src/zakura/header_sync/state.rs +++ b/zebra-network/src/zakura/header_sync/state.rs @@ -87,6 +87,7 @@ impl HeaderSyncCore { count, anchor_hash: self.best_header_hash, finalized, + want_tree_aux_roots: true, priority: RangePriority::Forward, }); } @@ -116,6 +117,7 @@ impl HeaderSyncCore { count, anchor_hash: previous_hash, finalized: true, + want_tree_aux_roots: true, priority: RangePriority::Backward, }); } @@ -335,6 +337,7 @@ pub(super) struct RangeRequest { pub(super) count: u32, pub(super) anchor_hash: block::Hash, pub(super) finalized: bool, + pub(super) want_tree_aux_roots: bool, pub(super) priority: RangePriority, } @@ -355,3 +358,12 @@ pub(super) enum RangePriority { Forward, Backward, } + +impl RangePriority { + pub(super) fn label(self) -> &'static str { + match self { + RangePriority::Forward => "forward", + RangePriority::Backward => "backward", + } + } +} diff --git a/zebra-network/src/zakura/header_sync/tests.rs b/zebra-network/src/zakura/header_sync/tests.rs index e681a223518..51356500825 100644 --- a/zebra-network/src/zakura/header_sync/tests.rs +++ b/zebra-network/src/zakura/header_sync/tests.rs @@ -14,12 +14,15 @@ use std::{ sync::{Mutex, OnceLock}, }; use zebra_chain::{ + orchard, + parallel::commitment_aux::BlockCommitmentRoots, parameters::{ testnet::{ ConfiguredActivationHeights, ConfiguredCheckpoints, Parameters, RegtestParameters, }, Network, }, + sapling, serialization::{ZcashDeserializeInto, ZcashSerialize}, work::{difficulty::CompactDifficulty, equihash::Solution}, }; @@ -116,10 +119,23 @@ fn mainnet_header(bytes: &[u8]) -> Arc { } fn headers_message(headers: Vec>) -> HeaderSyncMessage { + let start_height = headers + .first() + .map(|header| test_header_height(header.as_ref())) + .unwrap_or(block::Height(1)); + headers_message_from(start_height, headers) +} + +fn headers_message_from( + start_height: block::Height, + headers: Vec>, +) -> HeaderSyncMessage { let body_sizes = vec![0; headers.len()]; + let tree_aux_roots = roots_from_height(start_height, headers.len()); HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, } } @@ -127,12 +143,101 @@ fn headers_message_with_sizes( headers: Vec>, body_sizes: Vec, ) -> HeaderSyncMessage { + let start_height = headers + .first() + .map(|header| test_header_height(header.as_ref())) + .unwrap_or(block::Height(1)); + let tree_aux_roots = roots_from_height(start_height, headers.len()); + HeaderSyncMessage::Headers { + headers, + body_sizes, + tree_aux_roots, + } +} + +fn rootless_headers_message_from( + start_height: block::Height, + headers: Vec>, +) -> HeaderSyncMessage { + let _ = start_height; + let body_sizes = vec![0; headers.len()]; + HeaderSyncMessage::Headers { + headers, + body_sizes, + tree_aux_roots: Vec::new(), + } +} + +fn finalized_headers_message(headers: Vec>) -> HeaderSyncMessage { + let start_height = headers + .first() + .map(|header| test_header_height(header.as_ref())) + .unwrap_or(block::Height(1)); + finalized_headers_message_from(start_height, headers) +} + +fn finalized_headers_message_from( + start_height: block::Height, + headers: Vec>, +) -> HeaderSyncMessage { + let body_sizes = vec![0; headers.len()]; + let tree_aux_roots = roots_from_height(start_height, headers.len()); + HeaderSyncMessage::Headers { + headers, + body_sizes, + tree_aux_roots, + } +} + +fn finalized_headers_message_with_sizes( + headers: Vec>, + body_sizes: Vec, +) -> HeaderSyncMessage { + let start_height = headers + .first() + .map(|header| test_header_height(header.as_ref())) + .unwrap_or(block::Height(1)); + let tree_aux_roots = roots_from_height(start_height, headers.len()); HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, + } +} + +fn root_at(height: block::Height) -> BlockCommitmentRoots { + BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), } } +fn test_header_height(header: &block::Header) -> block::Height { + let hash = block::Hash::from(header); + [ + (block::Height(0), &BLOCK_MAINNET_GENESIS_BYTES[..]), + (block::Height(1), &BLOCK_MAINNET_1_BYTES[..]), + (block::Height(2), &BLOCK_MAINNET_2_BYTES[..]), + (block::Height(3), &BLOCK_MAINNET_3_BYTES[..]), + (block::Height(4), &BLOCK_MAINNET_4_BYTES[..]), + ] + .into_iter() + .find_map(|(height, bytes)| { + (hash == block::Hash::from(mainnet_header(bytes).as_ref())).then_some(height) + }) + .unwrap_or(block::Height(1)) +} + +fn roots_from_height(start_height: block::Height, count: usize) -> Vec { + (0..count) + .map(|offset| { + let offset = u32::try_from(offset).expect("test root count fits in u32"); + root_at(block::Height(start_height.0 + offset)) + }) + .collect() +} + async fn validate_headers_stateless_after_equihash_acceptance( headers: Vec>, context: HeaderSyncValidationContext<'_>, @@ -153,7 +258,14 @@ async fn validate_headers_stateless_after_equihash_acceptance( fn headers_context(count: u32, peer_cap: u32) -> HeaderSyncDecodeContext { HeaderSyncDecodeContext::for_headers_response( - ExpectedHeadersResponse::new(block::Height(1), count).unwrap(), + ExpectedHeadersResponse::new(block::Height(1), count, false).unwrap(), + peer_cap, + ) +} + +fn finalized_headers_context(count: u32, peer_cap: u32) -> HeaderSyncDecodeContext { + HeaderSyncDecodeContext::for_headers_response( + ExpectedHeadersResponse::new(block::Height(1), count, true).unwrap(), peer_cap, ) } @@ -446,6 +558,7 @@ async fn advisory_summary_status_mismatch_uses_status_without_misbehavior_and_ba HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } if peer == peer_id => { assert_eq!(start_height, block::Height(1)); @@ -686,6 +799,7 @@ async fn next_outbound_get_headers( HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } => return (peer, start_height, count), HeaderSyncAction::Misbehavior { peer, reason } => { @@ -803,6 +917,7 @@ fn codec_round_trips_get_headers() { let message = HeaderSyncMessage::GetHeaders { start_height: block::Height(42), count: DEFAULT_HS_RANGE, + want_tree_aux_roots: false, }; let encoded = message.encode().unwrap(); @@ -814,10 +929,10 @@ fn codec_round_trips_get_headers() { #[test] fn codec_round_trips_headers_with_bounded_vector() { let headers = vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]; - let message = headers_message_with_sizes(headers, vec![123_456]); + let message = finalized_headers_message_with_sizes(headers, vec![123_456]); let encoded = message.encode().unwrap(); - let decoded = HeaderSyncMessage::decode(&encoded, headers_context(1, 1)).unwrap(); + let decoded = HeaderSyncMessage::decode(&encoded, finalized_headers_context(1, 1)).unwrap(); assert_eq!(decoded, message); } @@ -825,14 +940,28 @@ fn codec_round_trips_headers_with_bounded_vector() { #[test] fn codec_round_trips_headers_with_unknown_body_size_sentinel() { let headers = vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]; - let message = headers_message_with_sizes(headers, vec![0]); + let message = finalized_headers_message_with_sizes(headers, vec![0]); let encoded = message.encode().unwrap(); - let decoded = HeaderSyncMessage::decode(&encoded, headers_context(1, 1)).unwrap(); + let decoded = HeaderSyncMessage::decode(&encoded, finalized_headers_context(1, 1)).unwrap(); assert_eq!(decoded, message); } +#[test] +fn decode_rejects_tree_aux_roots_when_not_requested() { + let headers = vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]; + let message = finalized_headers_message_with_sizes(headers, vec![0]); + let encoded = message.encode().unwrap(); + + // A response carrying tree-aux roots against a request that did not ask for + // them (a non-finalized range) is rejected at decode before allocation. + assert!(matches!( + HeaderSyncMessage::decode(&encoded, headers_context(1, 1)), + Err(HeaderSyncWireError::UnrequestedTreeAuxRoots) + )); +} + #[test] fn codec_round_trips_new_block() { let message = HeaderSyncMessage::NewBlock(mainnet_block(&BLOCK_MAINNET_1_BYTES)); @@ -853,6 +982,7 @@ fn codec_rejects_unknown_message_types_and_trailing_bytes() { let mut encoded = HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: false, } .encode() .unwrap(); @@ -877,9 +1007,32 @@ fn headers_codec_rejects_body_size_mismatch_truncation_and_trailing_bytes() { }) )); + assert!(matches!( + HeaderSyncMessage::Headers { + headers: headers.clone(), + body_sizes: vec![100], + tree_aux_roots: Vec::new(), + } + .encode(), + Err(HeaderSyncWireError::TreeAuxRootCountMismatch { + headers: 1, + roots: 0, + }) + )); + + assert!(matches!( + validate_tree_aux_root_heights(block::Height(1), &[root_at(block::Height(2))]), + Err(HeaderSyncWireError::TreeAuxRootHeightMismatch { + expected_height: block::Height(1), + root_height: block::Height(2), + }) + )); + let mut truncated_mid_size = message.encode().unwrap(); truncated_mid_size.pop(); - assert!(HeaderSyncMessage::decode(&truncated_mid_size, headers_context(1, 1)).is_err()); + assert!( + HeaderSyncMessage::decode(&truncated_mid_size, finalized_headers_context(1, 1)).is_err() + ); let mut truncated_mid_header = vec![MSG_HS_HEADERS]; truncated_mid_header.write_u32::(1).unwrap(); @@ -889,11 +1042,27 @@ fn headers_codec_rejects_body_size_mismatch_truncation_and_trailing_bytes() { let mut with_trailing = message.encode().unwrap(); with_trailing.push(0); assert!(matches!( - HeaderSyncMessage::decode(&with_trailing, headers_context(1, 1)), + HeaderSyncMessage::decode(&with_trailing, finalized_headers_context(1, 1)), Err(HeaderSyncWireError::TrailingBytes) )); } +#[test] +fn decode_rejects_non_empty_headers_without_tree_aux_roots() { + let headers = vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]; + let mut encoded = headers_message(headers).encode().unwrap(); + encoded[HEADER_SYNC_MESSAGE_TYPE_BYTES + HEADER_SYNC_COUNT_BYTES] = 0; + encoded.truncate(encoded.len() - HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES); + + assert!(matches!( + HeaderSyncMessage::decode(&encoded, finalized_headers_context(1, 1)), + Err(HeaderSyncWireError::TreeAuxRootCountMismatch { + headers: 1, + roots: 0, + }) + )); +} + #[test] fn frame_decode_rejects_oversized_payload_length_before_allocating() { let mut bytes = Vec::new(); @@ -912,6 +1081,7 @@ fn frame_decode_rejects_oversized_payload_length_before_allocating() { fn decode_rejects_header_counts_over_contract_caps() { let mut encoded = vec![MSG_HS_HEADERS]; encoded.write_u32::(MAX_HS_RANGE + 1).unwrap(); + encoded.write_u8(0).unwrap(); assert!(matches!( HeaderSyncMessage::decode(&encoded, headers_context(MAX_HS_RANGE, MAX_HS_RANGE)), Err(HeaderSyncWireError::HeaderCountLimit { .. }) @@ -919,6 +1089,7 @@ fn decode_rejects_header_counts_over_contract_caps() { let mut encoded = vec![MSG_HS_HEADERS]; encoded.write_u32::(2).unwrap(); + encoded.write_u8(0).unwrap(); assert!(matches!( HeaderSyncMessage::decode(&encoded, headers_context(1, MAX_HS_RANGE)), Err(HeaderSyncWireError::HeaderCountLimit { actual: 2, max: 1 }) @@ -926,6 +1097,7 @@ fn decode_rejects_header_counts_over_contract_caps() { let mut encoded = vec![MSG_HS_HEADERS]; encoded.write_u32::(2).unwrap(); + encoded.write_u8(0).unwrap(); assert!(matches!( HeaderSyncMessage::decode(&encoded, headers_context(MAX_HS_RANGE, 1)), Err(HeaderSyncWireError::HeaderCountLimit { actual: 2, max: 1 }) @@ -936,18 +1108,20 @@ fn decode_rejects_header_counts_over_contract_caps() { fn headers_codec_does_not_use_legacy_160_header_cap() { let header = mainnet_header(&BLOCK_MAINNET_1_BYTES); let headers = vec![header; 161]; - let message = headers_message(headers); + let message = finalized_headers_message(headers); let encoded = message.encode().unwrap(); - let decoded = HeaderSyncMessage::decode(&encoded, headers_context(161, 161)).unwrap(); + let decoded = HeaderSyncMessage::decode(&encoded, finalized_headers_context(161, 161)).unwrap(); match decoded { HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, } => { assert_eq!(headers.len(), 161); assert_eq!(body_sizes, vec![0; 161]); + assert_eq!(tree_aux_roots, roots_from_height(block::Height(1), 161)); } _ => panic!("decoded message must be Headers"), } @@ -958,6 +1132,7 @@ fn get_headers_rejects_invalid_counts() { assert!(HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 0, + want_tree_aux_roots: false, } .encode() .is_err()); @@ -965,6 +1140,7 @@ fn get_headers_rejects_invalid_counts() { assert!(HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: MAX_HS_RANGE + 1, + want_tree_aux_roots: false, } .encode() .is_err()); @@ -1019,7 +1195,10 @@ fn header_serialized_sizes_are_exact_and_message_cap_has_headroom() { let default_response_bytes = HEADER_SYNC_MESSAGE_TYPE_BYTES + HEADER_SYNC_COUNT_BYTES - + (COMMON_HEADER_BYTES + HEADER_SYNC_BODY_SIZE_BYTES) * DEFAULT_HS_RANGE as usize; + + (COMMON_HEADER_BYTES + + HEADER_SYNC_BODY_SIZE_BYTES + + HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES) + * DEFAULT_HS_RANGE as usize; assert!(default_response_bytes < MAX_HS_MESSAGE_BYTES); assert!(MAX_HS_MESSAGE_BYTES < LOCAL_MAX_MESSAGE_BYTES as usize); } @@ -1031,13 +1210,43 @@ fn request_and_serving_counts_are_clamped_by_byte_budget() { MAX_HS_RANGE, &Network::Mainnet, LOCAL_MAX_MESSAGE_BYTES, + false, ); assert!(count < MAX_HS_RANGE); + let count_with_roots = clamp_header_sync_request_count( + MAX_HS_RANGE, + MAX_HS_RANGE, + &Network::Mainnet, + LOCAL_MAX_MESSAGE_BYTES, + true, + ); + assert!(count_with_roots < count); + + let config = ZakuraHeaderSyncConfig { + max_headers_per_response: MAX_HS_RANGE, + ..ZakuraHeaderSyncConfig::default() + }; + assert_eq!( + inbound_get_headers_count_limit(&config, &Network::Mainnet, LOCAL_MAX_MESSAGE_BYTES, false), + count + ); + assert_eq!( + inbound_get_headers_count_limit(&config, &Network::Mainnet, LOCAL_MAX_MESSAGE_BYTES, true), + count_with_roots + ); + let headers = vec![mainnet_header(&BLOCK_MAINNET_1_BYTES); usize::try_from(count).unwrap() + 100]; - let headers = - truncate_headers_to_byte_budget(headers, &Network::Mainnet, LOCAL_MAX_MESSAGE_BYTES); + let body_sizes = vec![0u32; headers.len()]; + let tree_aux_roots = roots_from_height(block::Height(1), headers.len()); + let (headers, _body_sizes, _tree_aux_roots) = truncate_headers_to_byte_budget( + headers, + body_sizes, + tree_aux_roots, + &Network::Mainnet, + LOCAL_MAX_MESSAGE_BYTES, + ); let encoded = headers_message(headers).encode().unwrap(); assert!(encoded.len() <= MAX_HS_MESSAGE_BYTES); @@ -1094,6 +1303,7 @@ async fn restart_rebuilds_schedule_from_durable_best_tip_and_peer_status() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, .. } = next_non_query_action(&mut fixture.actions).await @@ -1155,6 +1365,7 @@ async fn status_updates_peer_caps_and_scheduler_respects_them() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } = next_non_query_action(&mut fixture.actions).await { @@ -1233,6 +1444,7 @@ async fn scheduler_fans_out_same_forward_range_to_three_peers() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } = next_non_query_action(&mut fixture.actions).await { @@ -1265,6 +1477,7 @@ async fn scheduler_narrows_large_ranges_before_tracking_fanout() { MAX_HS_RANGE, &network, LOCAL_MAX_MESSAGE_BYTES, + true, ); let mut fixture = spawn_test_reactor(startup_for( network.clone(), @@ -1299,6 +1512,7 @@ async fn scheduler_narrows_large_ranges_before_tracking_fanout() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } = action { @@ -1331,6 +1545,7 @@ async fn scheduler_narrows_large_ranges_before_tracking_fanout() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, .. } = next_non_query_action(&mut fixture.actions).await @@ -1372,6 +1587,7 @@ async fn scheduler_creates_checkpoint_forward_before_backward_ranges() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, .. } = next_non_query_action(&mut fixture.actions).await @@ -1410,6 +1626,7 @@ async fn scheduler_creates_backward_checkpoint_terminating_ranges() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, .. } = next_non_query_action(&mut fixture.actions).await @@ -1421,6 +1638,96 @@ async fn scheduler_creates_backward_checkpoint_terminating_ranges() { } } +#[tokio::test(flavor = "current_thread")] +async fn forward_ranges_below_checkpoint_handoff_request_tree_aux_roots() { + let network = Parameters::build() + .with_network_name("HeadersyncRootWindowTest") + .expect("custom network name is valid") + .with_genesis_hash(Network::Mainnet.genesis_hash()) + .expect("mainnet genesis hash is valid") + .with_activation_heights(ConfiguredActivationHeights { + overwinter: Some(1), + sapling: Some(2), + blossom: Some(3), + heartwood: Some(4), + canopy: Some(4), + ..Default::default() + }) + .expect("custom activation heights are in order") + .clear_funding_streams() + .with_checkpoints(ConfiguredCheckpoints::HeightsAndHashes(vec![ + (block::Height(0), Network::Mainnet.genesis_hash()), + (block::Height(400), block::Hash([4; 32])), + (block::Height(1_200), block::Hash([12; 32])), + ])) + .expect("custom checkpoints are valid") + .to_network() + .expect("custom testnet parameters are valid"); + let first_checkpoint = block::Height(400); + let first_checkpoint_hash = block::Hash([4; 32]); + let mut capture = + TraceCapture::for_test("forward_ranges_below_checkpoint_handoff_request_tree_aux_roots") + .unwrap(); + let mut startup = startup_for( + network, + (block::Height(0), Network::Mainnet.genesis_hash()), + Some((first_checkpoint, first_checkpoint_hash)), + ); + startup.trace = ZakuraTrace::new(capture.tracer(), "01"); + let mut fixture = spawn_test_reactor(startup); + let peer_id = peer(77); + + connect_peer(&fixture, peer_id).await; + advertise_tip( + &fixture, + peer(77), + block::Height(0), + block::Height(1_000), + DEFAULT_HS_RANGE, + 10, + ) + .await; + + loop { + if let HeaderSyncAction::SendMessage { + msg: + HeaderSyncMessage::GetHeaders { + start_height, + count, + want_tree_aux_roots, + }, + .. + } = next_non_query_action(&mut fixture.actions).await + { + assert_eq!(start_height, block::Height(401)); + assert_eq!(count, 600); + assert!( + want_tree_aux_roots, + "header ranges below the checkpoint handoff should carry roots" + ); + break; + } + } + + capture.flush().await; + let reader = capture.reader().unwrap(); + reader.table(HEADER_SYNC_TABLE.table()).assert_row( + hs_trace::HEADER_GET_HEADERS_SENT, + &[ + (hs_trace::RANGE_START, TraceValue::U64(401)), + (hs_trace::RANGE_COUNT, TraceValue::U64(600)), + (hs_trace::FINALIZED, TraceValue::Bool(false)), + (hs_trace::WANT_TREE_AUX_ROOTS, TraceValue::Bool(true)), + (hs_trace::RANGE_PRIORITY, TraceValue::Str("forward")), + (hs_trace::VERIFIED_BLOCK_TIP, TraceValue::U64(0)), + (hs_trace::FINALIZED_HEIGHT, TraceValue::U64(0)), + (hs_trace::BEST_HEADER_TIP, TraceValue::U64(400)), + ], + ); + + let _ = capture.finish().await.unwrap(); +} + #[tokio::test(flavor = "current_thread")] async fn incoming_headers_match_outstanding_before_commit() { let checkpoint_hash = block::Hash::from(mainnet_header(&BLOCK_MAINNET_3_BYTES).as_ref()); @@ -1472,6 +1779,60 @@ async fn incoming_headers_match_outstanding_before_commit() { } } +#[tokio::test(flavor = "current_thread")] +async fn rootless_non_empty_response_is_malformed() { + let checkpoint_hash = block::Hash::from(mainnet_header(&BLOCK_MAINNET_3_BYTES).as_ref()); + let (network, _) = checkpoint_testnet_with_hash(block::Height(3), checkpoint_hash); + let first_checkpoint = block::Height(3); + let start = block::Height(4); + let mut fixture = spawn_test_reactor(startup_for( + network.clone(), + (block::Height(0), network.genesis_hash()), + Some((first_checkpoint, checkpoint_hash)), + )); + let peer_id = peer(8); + + connect_peer(&fixture, peer_id.clone()).await; + advertise_tip(&fixture, peer_id.clone(), block::Height(0), start, 1, 1).await; + loop { + if matches!( + next_non_query_action(&mut fixture.actions).await, + HeaderSyncAction::SendMessage { + msg: HeaderSyncMessage::GetHeaders { + want_tree_aux_roots: true, + .. + }, + .. + } + ) { + break; + } + } + + fixture + .handle + .send(HeaderSyncEvent::WireMessage { + peer: peer_id.clone(), + msg: rootless_headers_message_from(start, vec![mainnet_header(&BLOCK_MAINNET_4_BYTES)]), + }) + .await + .unwrap(); + + loop { + match next_non_query_action(&mut fixture.actions).await { + HeaderSyncAction::Misbehavior { peer, reason } => { + assert_eq!(peer, peer_id); + assert_eq!(reason, HeaderSyncMisbehavior::MalformedMessage); + break; + } + HeaderSyncAction::CommitHeaderRange { .. } => { + panic!("a rootless non-empty response must not commit") + } + _ => {} + } + } +} + #[tokio::test(flavor = "current_thread")] async fn headers_over_outstanding_contract_reports_response_too_long_without_flooding() { let network = Network::Mainnet; @@ -1514,10 +1875,13 @@ async fn headers_over_outstanding_contract_reports_response_too_long_without_flo .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(vec![ - mainnet_header(&BLOCK_MAINNET_1_BYTES), - mainnet_header(&BLOCK_MAINNET_2_BYTES), - ]), + msg: headers_message_from( + start, + vec![ + mainnet_header(&BLOCK_MAINNET_1_BYTES), + mainnet_header(&BLOCK_MAINNET_2_BYTES), + ], + ), }) .await .unwrap(); @@ -1586,10 +1950,10 @@ async fn matching_headers_are_statelessly_validated_before_commit() { .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(vec![ - mainnet_header(&BLOCK_MAINNET_1_BYTES), - Arc::new(bad_second), - ]), + msg: headers_message_from( + next_height(two_before_checkpoint).expect("has successor"), + vec![mainnet_header(&BLOCK_MAINNET_1_BYTES), Arc::new(bad_second)], + ), }) .await .unwrap(); @@ -1832,6 +2196,7 @@ async fn late_covered_response_does_not_reanchor_newer_outstanding_range() { HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: true, }, } if peer == peer_id => break, _ => {} @@ -1855,6 +2220,7 @@ async fn late_covered_response_does_not_reanchor_newer_outstanding_range() { HeaderSyncMessage::GetHeaders { start_height: block::Height(2), count: 1, + want_tree_aux_roots: true, }, } if peer == peer_id => break, _ => {} @@ -1969,6 +2335,7 @@ async fn local_commit_failure_retries_without_peer_misbehavior() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, } if peer == first_peer || peer == second_peer => { assert_eq!(start_height, start); @@ -2156,6 +2523,7 @@ async fn reconnect_clears_session_bound_outstanding_ranges() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: true, }, } if peer == peer_id )); @@ -2184,6 +2552,7 @@ async fn reconnect_clears_session_bound_outstanding_ranges() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: true, }, } if peer == peer_id )); @@ -2990,6 +3359,7 @@ async fn inbound_get_headers_requires_status_and_respects_serving_cap() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 1, + want_tree_aux_roots: false, }, }) .await @@ -3023,6 +3393,7 @@ async fn inbound_get_headers_requires_status_and_respects_serving_cap() { msg: HeaderSyncMessage::GetHeaders { start_height: start, count: 3, + want_tree_aux_roots: false, }, }) .await @@ -3032,6 +3403,7 @@ async fn inbound_get_headers_requires_status_and_respects_serving_cap() { peer, start: action_start, count, + .. } => { assert_eq!(peer, requester); assert_eq!(action_start, start); @@ -3048,6 +3420,7 @@ async fn inbound_get_headers_requires_status_and_respects_serving_cap() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(7), count: 1, + want_tree_aux_roots: false, }, }) .await @@ -3079,12 +3452,15 @@ async fn inbound_get_headers_requires_status_and_respects_serving_cap() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(8), count: 1, + want_tree_aux_roots: false, }, }) .await .unwrap(); match next_query_headers_action(&mut fixture.actions).await { - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, start, count, .. + } => { assert_eq!(peer, requester); assert_eq!(start, block::Height(8)); assert_eq!(count, 1); @@ -3123,6 +3499,7 @@ async fn inbound_get_headers_over_cap_disconnects_without_state_read() { msg: HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 4, + want_tree_aux_roots: false, }, }) .await @@ -3174,7 +3551,10 @@ async fn rejected_non_linking_range_traces_link_stage_and_error_kind() { .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(vec![mainnet_header(&BLOCK_MAINNET_2_BYTES)]), + msg: headers_message_from( + block::Height(1), + vec![mainnet_header(&BLOCK_MAINNET_2_BYTES)], + ), }) .await .unwrap(); @@ -3545,7 +3925,10 @@ async fn forward_link_wedge_reanchors_to_verified_tip_without_banning() { .handle .send(HeaderSyncEvent::WireMessage { peer: served_peer, - msg: headers_message(vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]), + msg: headers_message_from( + start_height, + vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)], + ), }) .await .unwrap(); @@ -3569,6 +3952,7 @@ async fn forward_link_wedge_reanchors_to_verified_tip_without_banning() { HeaderSyncMessage::GetHeaders { start_height, count: _, + want_tree_aux_roots: true, }, .. } if saw_reanchor_action && start_height == expected_start => { @@ -3626,7 +4010,10 @@ async fn single_peer_forward_link_failures_do_not_reanchor_globally() { .handle .send(HeaderSyncEvent::WireMessage { peer: served_peer, - msg: headers_message(vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)]), + msg: headers_message_from( + start_height, + vec![mainnet_header(&BLOCK_MAINNET_1_BYTES)], + ), }) .await .unwrap(); @@ -3674,6 +4061,7 @@ async fn forward_genesis_backfill_reaches_checkpoint_before_finalized_commit() { HeaderSyncMessage::GetHeaders { start_height, count, + want_tree_aux_roots: true, }, .. } = next_non_query_action(&mut fixture.actions).await @@ -3688,7 +4076,7 @@ async fn forward_genesis_backfill_reaches_checkpoint_before_finalized_commit() { .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(headers.to_vec()), + msg: finalized_headers_message(headers.to_vec()), }) .await .unwrap(); @@ -3867,11 +4255,14 @@ async fn checkpoint_backfill_rejects_non_contiguous_run_before_commit() { .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(vec![ - mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), - mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), - mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), - ]), + msg: headers_message_from( + block::Height(1), + vec![ + mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), + mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), + mainnet_header(&BLOCK_MAINNET_GENESIS_BYTES), + ], + ), }) .await .unwrap(); @@ -3919,7 +4310,10 @@ async fn header_response_that_does_not_link_to_anchor_is_misbehavior_before_comm .handle .send(HeaderSyncEvent::WireMessage { peer: peer_id.clone(), - msg: headers_message(vec![mainnet_header(&BLOCK_MAINNET_2_BYTES)]), + msg: headers_message_from( + block::Height(1), + vec![mainnet_header(&BLOCK_MAINNET_2_BYTES)], + ), }) .await .unwrap(); @@ -4242,6 +4636,7 @@ async fn pow_validation_does_not_monopolize_the_runtime_thread() { fn hostile_vectors_are_rejected_for_allocation_and_unsolicited_headers() { let mut encoded = vec![MSG_HS_HEADERS]; encoded.write_u32::(u32::MAX).unwrap(); + encoded.write_u8(0).unwrap(); assert!(matches!( HeaderSyncMessage::decode(&encoded, headers_context(MAX_HS_RANGE, MAX_HS_RANGE)), Err(HeaderSyncWireError::HeaderCountLimit { .. }) @@ -4249,6 +4644,7 @@ fn hostile_vectors_are_rejected_for_allocation_and_unsolicited_headers() { let mut encoded = vec![MSG_HS_HEADERS]; encoded.write_u32::(1).unwrap(); + encoded.write_u8(0).unwrap(); assert!(matches!( HeaderSyncMessage::decode(&encoded, HeaderSyncDecodeContext::control()), Err(HeaderSyncWireError::UnsolicitedHeaders) diff --git a/zebra-network/src/zakura/header_sync/validation.rs b/zebra-network/src/zakura/header_sync/validation.rs index a1d2bb9e985..6e16519e27a 100644 --- a/zebra-network/src/zakura/header_sync/validation.rs +++ b/zebra-network/src/zakura/header_sync/validation.rs @@ -62,6 +62,14 @@ pub(super) fn insert_u64(row: &mut serde_json::Map, key: &'static row.insert(key.to_string(), Value::Number(Number::from(value))); } +pub(super) fn insert_bool( + row: &mut serde_json::Map, + key: &'static str, + value: bool, +) { + row.insert(key.to_string(), Value::Bool(value)); +} + pub(super) fn insert_optional_str( row: &mut serde_json::Map, key: &'static str, @@ -126,6 +134,11 @@ impl HeaderSyncDecodeContext { } } + pub(super) fn wants_tree_aux_roots(self) -> bool { + self.requested + .is_some_and(|requested| requested.want_tree_aux_roots) + } + pub(super) fn headers_response_limit(self) -> Result, HeaderSyncWireError> { let Some(requested) = self.requested else { return Ok(None); @@ -326,6 +339,39 @@ pub(super) fn validate_body_sizes_len( Ok(()) } +pub(super) fn validate_tree_aux_roots_len( + headers: usize, + roots: usize, +) -> Result<(), HeaderSyncWireError> { + if headers != roots { + return Err(HeaderSyncWireError::TreeAuxRootCountMismatch { headers, roots }); + } + Ok(()) +} + +pub(super) fn validate_tree_aux_root_heights( + start_height: block::Height, + roots: &[BlockCommitmentRoots], +) -> Result<(), HeaderSyncWireError> { + for (offset, root) in roots.iter().enumerate() { + let offset = u32::try_from(offset) + .map_err(|_| HeaderSyncWireError::NumericOverflow("tree-aux root height offset"))?; + let expected_height = block::Height( + start_height + .0 + .checked_add(offset) + .ok_or(HeaderSyncWireError::NumericOverflow("tree-aux root height"))?, + ); + if root.height != expected_height { + return Err(HeaderSyncWireError::TreeAuxRootHeightMismatch { + expected_height, + root_height: root.height, + }); + } + } + Ok(()) +} + pub(super) fn clamp_advertised_range(value: u32) -> u32 { value.clamp(1, MAX_HS_RANGE) } @@ -346,6 +392,17 @@ pub(super) fn read_height(reader: &mut R) -> Result( + reader: &mut R, + field: &'static str, +) -> Result { + match reader.read_u8()? { + 0 => Ok(false), + 1 => Ok(true), + value => Err(HeaderSyncWireError::InvalidBoolMarker { field, value }), + } +} + pub(super) fn reject_trailing( bytes: &[u8], reader: &Cursor<&[u8]>, diff --git a/zebra-network/src/zakura/header_sync/wire.rs b/zebra-network/src/zakura/header_sync/wire.rs index ce25b2e2119..eff54299071 100644 --- a/zebra-network/src/zakura/header_sync/wire.rs +++ b/zebra-network/src/zakura/header_sync/wire.rs @@ -4,9 +4,8 @@ use super::{config::*, error::*, validation::*, *}; pub const ZAKURA_STREAM_HEADER_SYNC: u16 = 5; /// Version of the native header-sync stream. /// -/// Version 2 intentionally breaks stream-5 compatibility before header sync is -/// deployed: `Headers` now carries one advisory body-size hint per header. -pub const ZAKURA_HEADER_SYNC_STREAM_VERSION: u16 = 2; +/// Version 4 carries one tree-aux root for each non-empty range header. +pub const ZAKURA_HEADER_SYNC_STREAM_VERSION: u16 = 4; /// Peer status advertisement. pub const MSG_HS_STATUS: u8 = 1; @@ -28,7 +27,10 @@ pub const DEFAULT_HS_MAX_INFLIGHT: u16 = 10; pub(super) const HEADER_SYNC_MESSAGE_TYPE_BYTES: usize = 1; pub(super) const HEADER_SYNC_COUNT_BYTES: usize = 4; +pub(super) const HEADER_SYNC_HAS_ROOTS_BYTES: usize = 1; pub(super) const HEADER_SYNC_BODY_SIZE_BYTES: usize = 4; +/// Encoded [`BlockCommitmentRoots`]: height + Sapling root + Orchard root. +pub(super) const HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES: usize = 4 + 32 + 32; pub(super) const COMMON_HEADER_BYTES: usize = 1_487; pub(super) const REGTEST_HEADER_BYTES: usize = 177; pub(super) const HEADER_SYNC_FANOUT: usize = 3; @@ -48,7 +50,10 @@ const _: () = assert!(MAX_HS_MESSAGE_BYTES < LOCAL_MAX_MESSAGE_BYTES as usize); const _: () = assert!( HEADER_SYNC_MESSAGE_TYPE_BYTES + HEADER_SYNC_COUNT_BYTES - + (COMMON_HEADER_BYTES + HEADER_SYNC_BODY_SIZE_BYTES) * (DEFAULT_HS_RANGE as usize) + + (COMMON_HEADER_BYTES + + HEADER_SYNC_BODY_SIZE_BYTES + + HEADER_SYNC_BLOCK_COMMITMENT_ROOTS_BYTES) + * (DEFAULT_HS_RANGE as usize) < MAX_HS_MESSAGE_BYTES ); @@ -63,15 +68,23 @@ pub enum HeaderSyncMessage { start_height: block::Height, /// Requested header count. count: u32, + /// Whether the requester wants all-or-nothing tree-aux roots. + /// A sender who is syncing in vct mode will always request these. + /// A sender who is syncing in non-checkpoint mode does not need these but still requests them. + /// A sender who is syncing above the last checkpoint height does not request these. + want_tree_aux_roots: bool, }, /// A bounded contiguous header run with one advisory body-size hint per header. /// - /// A `0` size means "unknown"; the hint is not consensus data. + /// A `0` size means "unknown"; the hint is not consensus data. Tree-aux roots + /// are peer-sourced execution hints and are verified by state before use. Headers { /// Headers in ascending height order. headers: Vec>, /// Advisory serialized body sizes, parallel to `headers`. body_sizes: Vec, + /// Per-height commitment roots, parallel to `headers`. + tree_aux_roots: Vec, }, /// Full block tip-flood payload. NewBlock(Arc), @@ -97,22 +110,30 @@ impl HeaderSyncMessage { Self::GetHeaders { start_height, count, + want_tree_aux_roots, } => { validate_get_headers_count(*count)?; write_height(&mut bytes, *start_height)?; bytes.write_u32::(*count)?; + bytes.write_u8(u8::from(*want_tree_aux_roots))?; } Self::Headers { headers, body_sizes, + tree_aux_roots, } => { validate_headers_len(headers.len(), usize_from_u32(MAX_HS_RANGE, "headers cap")?)?; validate_body_sizes_len(headers.len(), body_sizes.len())?; + validate_tree_aux_roots_len(headers.len(), tree_aux_roots.len())?; bytes.write_u32::(u32_from_usize(headers.len(), "headers count")?)?; + bytes.write_u8(u8::from(!tree_aux_roots.is_empty()))?; for (header, body_size) in headers.iter().zip(body_sizes) { header.zcash_serialize(&mut bytes)?; bytes.write_u32::(*body_size)?; } + for roots in tree_aux_roots { + roots.zcash_serialize(&mut bytes)?; + } } Self::NewBlock(block) => { block.zcash_serialize(&mut bytes)?; @@ -145,27 +166,48 @@ impl HeaderSyncMessage { MSG_HS_GET_HEADERS => { let start_height = read_height(&mut reader)?; let count = reader.read_u32::()?; + let want_tree_aux_roots = read_bool_marker(&mut reader, "want_tree_aux_roots")?; validate_get_headers_count(count)?; Self::GetHeaders { start_height, count, + want_tree_aux_roots, } } MSG_HS_HEADERS => { let count = usize_from_u32(reader.read_u32::()?, "headers count")?; + let has_roots = read_bool_marker(&mut reader, "has_roots")?; let Some(max_headers) = context.headers_response_limit()? else { return Err(HeaderSyncWireError::UnsolicitedHeaders); }; + if has_roots && !context.wants_tree_aux_roots() { + return Err(HeaderSyncWireError::UnrequestedTreeAuxRoots); + } validate_headers_len(count, max_headers)?; let mut headers = Vec::with_capacity(count); let mut body_sizes = Vec::with_capacity(count); + let mut tree_aux_roots = if has_roots { + Vec::with_capacity(count) + } else { + Vec::new() + }; for _ in 0..count { headers.push(Arc::new(block::Header::zcash_deserialize(&mut reader)?)); body_sizes.push(reader.read_u32::()?); } + if has_roots { + for _ in 0..count { + tree_aux_roots.push(BlockCommitmentRoots::zcash_deserialize(&mut reader)?); + } + } + validate_tree_aux_roots_len(count, tree_aux_roots.len())?; + if let Some(requested) = context.requested { + validate_tree_aux_root_heights(requested.start_height, &tree_aux_roots)?; + } Self::Headers { headers, body_sizes, + tree_aux_roots, } } MSG_HS_NEW_BLOCK => { diff --git a/zebra-network/src/zakura/testkit/cluster.rs b/zebra-network/src/zakura/testkit/cluster.rs index 9ddcf1ac362..952a23d17be 100644 --- a/zebra-network/src/zakura/testkit/cluster.rs +++ b/zebra-network/src/zakura/testkit/cluster.rs @@ -172,13 +172,15 @@ mod tests { }; use tokio_util::sync::CancellationToken; use zebra_chain::{ - block, + block, orchard, + parallel::commitment_aux::BlockCommitmentRoots, parameters::{ testnet::{ ConfiguredActivationHeights, ConfiguredCheckpoints, Parameters as TestnetParameters, }, Network, }, + sapling, serialization::{ZcashDeserializeInto, ZcashSerialize}, }; use zebra_test::vectors::{ @@ -188,12 +190,48 @@ mod tests { fn headers_message(headers: Vec>) -> HeaderSyncMessage { let body_sizes = vec![0; headers.len()]; + let start_height = headers + .first() + .map(|header| test_header_height(header.as_ref())) + .unwrap_or(block::Height(1)); + let tree_aux_roots = roots_from_height(start_height, headers.len()); HeaderSyncMessage::Headers { headers, body_sizes, + tree_aux_roots, } } + fn root_at(height: block::Height) -> BlockCommitmentRoots { + BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + } + } + + fn roots_from_height(start_height: block::Height, count: usize) -> Vec { + (0..count) + .map(|offset| { + let offset = u32::try_from(offset).expect("test root count fits in u32"); + root_at(block::Height(start_height.0 + offset)) + }) + .collect() + } + + fn test_header_height(header: &block::Header) -> block::Height { + let hash = block::Hash::from(header); + if hash == mainnet_genesis_hash() { + return block::Height(0); + } + + (1..=5) + .find_map(|height| { + (hash == mainnet_block(block_bytes(height)).hash()).then_some(block::Height(height)) + }) + .unwrap_or(block::Height(1)) + } + #[derive(Debug, Default)] struct OrderedSourceProbeService { senders: Arc>>, @@ -779,6 +817,7 @@ mod tests { HeaderSyncMessage::GetHeaders { start_height: actual_start, count: actual_count, + .. } if *actual_start == start_height && *actual_count == count ) }) @@ -843,7 +882,12 @@ mod tests { .push((peer, HeaderSyncMessage::NewBlock(block))); } } - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, + start, + count, + want_tree_aux_roots: _, + } => { let headers = local .store .lock() @@ -851,11 +895,12 @@ mod tests { .headers_by_range(start, count); let returned_count = u32::try_from(headers.len()).unwrap_or(u32::MAX); if let Some(target) = peer_to_index.get(&peer) { + let msg = headers_message(headers); let _ = nodes[*target] .handle .send(HeaderSyncEvent::WireMessage { peer: local.peer_id.clone(), - msg: headers_message(headers), + msg, }) .await; let _ = local @@ -1236,7 +1281,9 @@ mod tests { .expect("misbehavior list mutex is not poisoned") .push(reason); } - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, start, count, .. + } => { let Some(handle) = endpoint.header_sync() else { continue; }; @@ -2731,8 +2778,13 @@ mod tests { cluster.start_drivers(); cluster.connect_all().await; cluster.wait_for_tip(checkpointed, block::Height(4)).await?; + // `with_checkpoint_anchor(3)` pre-sets `finalized_height = 3`, so waiting + // on the finalized height is a no-op that returns before the backward + // checkpoint range (1..=3) has actually been backfilled. Wait instead for + // the backfilled headers to land in the store, so the `(1, 3)` commit + // trace below is asserted only after the backward range has committed. await_until( - "checkpoint backfill finalized", + "checkpoint backfill committed", Duration::from_secs(5), || { cluster @@ -2743,8 +2795,8 @@ mod tests { .store .lock() .expect("test store mutex is not poisoned") - .finalized_height - >= block::Height(3) + .headers + .contains_key(&block::Height(1)) }, ) .await?; @@ -2974,6 +3026,7 @@ mod tests { HeaderSyncMessage::GetHeaders { start_height: block::Height(start), count: 1, + want_tree_aux_roots: false, }, ) .await; @@ -3166,6 +3219,7 @@ mod tests { HeaderSyncMessage::GetHeaders { start_height: block::Height(1), count: 4_001, + want_tree_aux_roots: false, }, ) .await; diff --git a/zebra-network/src/zakura/testkit/trace_reader.rs b/zebra-network/src/zakura/testkit/trace_reader.rs index df9ee0c6e53..91b78c32251 100644 --- a/zebra-network/src/zakura/testkit/trace_reader.rs +++ b/zebra-network/src/zakura/testkit/trace_reader.rs @@ -38,6 +38,8 @@ pub enum TraceValue<'a> { Str(&'a str), /// An unsigned integer field. U64(u64), + /// A boolean field. + Bool(bool), /// A null field. Null, } @@ -294,6 +296,7 @@ fn trace_value_matches(actual: Option<&Value>, expected: TraceValue<'_>) -> bool match expected { TraceValue::Str(expected) => actual.and_then(Value::as_str) == Some(expected), TraceValue::U64(expected) => actual.and_then(Value::as_u64) == Some(expected), + TraceValue::Bool(expected) => actual.and_then(Value::as_bool) == Some(expected), TraceValue::Null => actual.is_some_and(Value::is_null), } } diff --git a/zebra-network/src/zakura/trace.rs b/zebra-network/src/zakura/trace.rs index 286b8d10784..53da3a15f47 100644 --- a/zebra-network/src/zakura/trace.rs +++ b/zebra-network/src/zakura/trace.rs @@ -301,6 +301,20 @@ pub mod header_sync_trace { pub const EXPECTED_COUNT: &str = "expected_count"; /// In-flight request count field. pub const IN_FLIGHT_COUNT: &str = "in_flight_count"; + /// Whether header-carried tree-aux roots were requested for this range. + pub const WANT_TREE_AUX_ROOTS: &str = "want_tree_aux_roots"; + /// Whether the range is expected to terminate at a checkpoint. + pub const FINALIZED: &str = "finalized"; + /// Header scheduler priority label (`forward` or `backward`). + pub const RANGE_PRIORITY: &str = "range_priority"; + /// Highest verified full-block/body height observed by the header scheduler. + pub const VERIFIED_BLOCK_TIP: &str = "verified_block_tip"; + /// Durable finalized height observed by the header scheduler. + pub const FINALIZED_HEIGHT: &str = "finalized_height"; + /// Durable best header tip observed by the header scheduler. + pub const BEST_HEADER_TIP: &str = "best_header_tip"; + /// Number of header-carried tree-aux roots present on this send/receive. + pub const TREE_AUX_ROOTS_LEN: &str = "tree_aux_roots_len"; /// Destination peer count field. pub const DESTINATION_PEER_COUNT: &str = "destination_peer_count"; /// Bounded reason field. @@ -356,6 +370,8 @@ pub mod commit_state_trace { pub const RANGE_START: &str = "range_start"; /// Range count field. pub const RANGE_COUNT: &str = "range_count"; + /// Number of header-carried tree-aux roots supplied to this commit. + pub const TREE_AUX_ROOTS_LEN: &str = "tree_aux_roots_len"; /// Result label field. pub const RESULT: &str = "result"; /// Bounded reason field. diff --git a/zebra-state/CHANGELOG.md b/zebra-state/CHANGELOG.md index 4eb899ab0b3..3ecf99dde55 100644 --- a/zebra-state/CHANGELOG.md +++ b/zebra-state/CHANGELOG.md @@ -9,8 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Extended value-pool disk serialization with an Ironwood slot after the - deferred pool, and bumped the state database format version to `27.3.0`. +- Extended value-pool disk serialization with an Ironwood slot after the deferred pool, and + consolidated the current verified-commitment-trees state database format changes under + version `27.3.0`. +- Added the `vct_upgrade_metadata` column family, recording the upgrade height `U` (the lowest + height this binary committed). `tree_aux` root serving now stitches the per-height trees below + `U` with the serving index at and above `U`, so a node that upgraded mid-chain serves a range + crossing `U` as one gap-free batch instead of a short prefix that stalled the fetch client. + Historical note-commitment tree RPCs are unavailable only within the band `[U, H)` (where `H` + is the checkpoint handoff), and available below `U` and at or above `H`. ## [8.0.0] - 2026-06-02 diff --git a/zebra-state/src/arbitrary.rs b/zebra-state/src/arbitrary.rs index 1dc9b7ce33d..73ff08de28d 100644 --- a/zebra-state/src/arbitrary.rs +++ b/zebra-state/src/arbitrary.rs @@ -98,6 +98,7 @@ impl ContextuallyVerifiedBlock { new_outputs, transaction_hashes, deferred_pool_balance_change: _, + auth_data_root: _, } = block.into(); Self { diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index d3b33a9d5af..3f73efd70df 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -110,6 +110,32 @@ pub struct Config { #[serde(skip)] pub enable_zakura_header_seed_from_committed_blocks: bool, + /// Mirror of `consensus.checkpoint_sync`, set by zebrad at startup. + /// + /// When `true` (the default), a node syncing under checkpoint trust uses the fast + /// verified-commitment-trees path below the last checkpoint: per-block Sapling/Orchard + /// roots are verified against the committed headers and folded into the anchor set and + /// history tree, skipping the per-block frontier recompute. The + /// `consensus.disable_vct_fast_sync` setting is mirrored into state to keep checkpoint sync + /// enabled while forcing the legacy per-block recompute. + /// + /// Skipped in serde because it is not an independent state setting — it tracks the + /// consensus option, so the generic Zebra state config does not expose a duplicate. + #[serde(skip)] + pub checkpoint_sync: bool, + + /// Mirror of `consensus.disable_vct_fast_sync`, set by zebrad at startup. + /// + /// This keeps `consensus.checkpoint_sync` enabled while forcing the legacy per-block + /// Sapling/Orchard tree recompute in both Archive and Pruned storage modes. Set to `false` + /// by default: checkpoint sync uses VCT fast sync on networks with embedded handoff + /// frontiers. + /// + /// Skipped in serde because users configure this alongside `consensus.checkpoint_sync`, not + /// as an independent state setting. + #[serde(skip)] + pub disable_vct_fast_sync: bool, + /// Whether to delete the old database directories when present. /// /// Set to `true` by default. If this is set to `false`, @@ -402,6 +428,8 @@ impl Default for Config { ephemeral: false, should_backup_non_finalized_state: true, enable_zakura_header_seed_from_committed_blocks: false, + checkpoint_sync: true, + disable_vct_fast_sync: false, delete_old_database: true, storage_mode: StorageMode::default(), debug_stop_at_height: None, @@ -423,6 +451,11 @@ mod tests { #[test] fn storage_mode_deserializes_from_documented_toml() { + assert!( + !Config::default().disable_vct_fast_sync, + "VCT fast sync is enabled by default when checkpoint sync and embedded frontiers are available" + ); + let archive: Config = toml::from_str(r#"storage_mode = "archive""#) .expect("archive storage mode deserializes from a string"); assert_eq!(archive.storage_mode, StorageMode::Archive); @@ -445,6 +478,12 @@ mod tests { pruned_with_retention.storage_mode, StorageMode::Pruned(PruningConfig { tx_retention: 6000 }) ); + + let serialized = toml::to_string(&Config::default()).expect("state config serializes"); + assert!( + !serialized.contains("disable_vct_fast_sync"), + "disable_vct_fast_sync is configured under [consensus], not [state]" + ); } } diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 9d769b631f2..e9af1a35b9f 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -91,6 +91,17 @@ const DATABASE_FORMAT_VERSION: u64 = 27; /// - adding new column families, /// - changing the format of a column family in a compatible way, or /// - breaking changes with compatibility code in all supported Zebra versions. +/// +/// Version 3 adds the verified-commitment-trees state format: +/// - the `fast_sync_metadata` column family, which records fast-sync handoff state, +/// - the `commitment_roots_by_height` serving index (design §4), a compact per-height +/// `(sapling_root, orchard_root)` map every node writes so a fast-synced node can serve +/// `tree_aux` roots without per-height trees, and +/// - the on-open repair for incompatible stored history-tree bytes before background format +/// checks read the tip tree. +/// +/// New databases populate the serving index going forward; existing ones open with it empty and +/// serve from per-height trees as before. const DATABASE_FORMAT_MINOR_VERSION: u64 = 3; /// The database format patch version, incremented each time the on-disk database format has a diff --git a/zebra-state/src/error.rs b/zebra-state/src/error.rs index 98ff1c18b80..ea0876b3d9b 100644 --- a/zebra-state/src/error.rs +++ b/zebra-state/src/error.rs @@ -86,6 +86,26 @@ impl CommitBlockError { } } + /// Returns the missing VCT supplied-root height for retryable root-fetch stalls. + pub fn vct_supplied_root_unavailable_height(&self) -> Option { + match self { + CommitBlockError::ValidateContextError(error) => { + error.vct_supplied_root_unavailable_height() + } + _ => None, + } + } + + /// Returns the height for any retryable VCT root stall (absent/evicted root, or one + /// not yet verifiable for lack of a buffered successor). See + /// [`ValidateContextError::vct_retryable_height`]. + pub fn vct_retryable_height(&self) -> Option { + match self { + CommitBlockError::ValidateContextError(error) => error.vct_retryable_height(), + _ => None, + } + } + /// Returns a suggested misbehaviour score increment for a certain error. pub fn misbehavior_score(&self) -> u32 { 0 @@ -149,6 +169,18 @@ impl CommitCheckpointVerifiedError { pub fn duplicate_location(&self) -> Option<&KnownBlock> { self.0.duplicate_location() } + + /// Returns the missing VCT supplied-root height for retryable root-fetch stalls. + pub fn vct_supplied_root_unavailable_height(&self) -> Option { + self.0.vct_supplied_root_unavailable_height() + } + + /// Returns the height for any retryable VCT root stall (absent/evicted root, or one + /// not yet verifiable for lack of a buffered successor). See + /// [`ValidateContextError::vct_retryable_height`]. + pub fn vct_retryable_height(&self) -> Option { + self.0.vct_retryable_height() + } } impl From for CommitCheckpointVerifiedError { @@ -189,6 +221,24 @@ pub enum CommitHeaderRangeError { body_sizes: usize, }, + /// The request supplied a different number of roots than headers. + #[error("header range tree-aux root count {roots} does not match header count {headers}")] + TreeAuxRootCountMismatch { + /// Header count. + headers: usize, + /// Tree-aux root count. + roots: usize, + }, + + /// A supplied tree-aux root did not match the inferred header height. + #[error("header range tree-aux root height {root_height:?} does not match expected height {expected_height:?}")] + TreeAuxRootHeightMismatch { + /// Expected root height. + expected_height: block::Height, + /// Actual root height. + root_height: block::Height, + }, + /// The supplied anchor is not known to state. #[error("header range anchor {anchor} is not known")] UnknownAnchor { @@ -349,6 +399,23 @@ pub enum ValidateContextError { #[non_exhaustive] NotReadyToBeCommitted, + #[error( + "verified-commitment-trees fast path has no valid supplied root for height \ + {height:?}: the note-commitment frontier is frozen, so this block cannot be \ + committed until a verifiable root is fetched from a peer (retryable)" + )] + #[non_exhaustive] + VctSuppliedRootUnavailable { height: block::Height }, + + #[error( + "verified-commitment-trees fast path cannot yet verify the supplied root for height \ + {height:?}: no successor block is buffered to confirm it against the header chain, and \ + committing it unverified would persist a root that is only checked one block later \ + (irreversibly, once on disk). Commit is deferred until the successor arrives (retryable)" + )] + #[non_exhaustive] + VctSuppliedRootAwaitingSuccessor { height: block::Height }, + #[error("block height {candidate_height:?} is lower than the current finalized height {finalized_tip_height:?}")] #[non_exhaustive] OrphanedBlock { @@ -560,6 +627,33 @@ pub enum ValidateContextError { }, } +impl ValidateContextError { + /// Returns the missing VCT supplied-root height for retryable root-fetch stalls. + /// + /// This is the subset of [`Self::vct_retryable_height`] that warrants a peer *refetch*: + /// the supplied root is absent or was evicted after failing verification, so a different + /// peer must supply a replacement. An await-successor stall ([`Self::vct_retryable_height`] + /// but not this) already has its root and only waits for the next block to be downloaded. + pub fn vct_supplied_root_unavailable_height(&self) -> Option { + match self { + ValidateContextError::VctSuppliedRootUnavailable { height } => Some(*height), + _ => None, + } + } + + /// Returns the height for any retryable VCT root stall: either an absent/evicted supplied + /// root ([`Self::VctSuppliedRootUnavailable`]) or one not yet verifiable because no successor + /// is buffered to confirm it ([`Self::VctSuppliedRootAwaitingSuccessor`]). The write loop + /// parks and retries the same block for both; only the former additionally requests a refetch. + pub fn vct_retryable_height(&self) -> Option { + match self { + ValidateContextError::VctSuppliedRootUnavailable { height } + | ValidateContextError::VctSuppliedRootAwaitingSuccessor { height } => Some(*height), + _ => None, + } + } +} + impl From for ValidateContextError { fn from(value: sprout::tree::NoteCommitmentTreeError) -> Self { ValidateContextError::NoteCommitmentTreeError(value.into()) @@ -620,4 +714,65 @@ mod tests { }; assert_eq!(dup_err.misbehavior_score(), 0); } + + #[test] + fn checkpoint_error_exposes_retryable_vct_root_height() { + let height = Height(42); + let retryable: CommitCheckpointVerifiedError = + ValidateContextError::VctSuppliedRootUnavailable { height }.into(); + assert_eq!( + retryable.vct_supplied_root_unavailable_height(), + Some(height), + "checkpoint commit errors expose retryable VCT root misses" + ); + + let non_retryable: CommitCheckpointVerifiedError = + ValidateContextError::NonSequentialBlock { + candidate_height: Height(5), + parent_height: Height(3), + } + .into(); + assert_eq!( + non_retryable.vct_supplied_root_unavailable_height(), + None, + "unrelated validation errors are not treated as VCT root misses" + ); + assert_eq!( + non_retryable.vct_retryable_height(), + None, + "unrelated validation errors are not retryable VCT stalls" + ); + } + + /// An await-successor stall is retryable (the write loop parks and re-commits) but is + /// *not* a refetch case: the root is present, only its successor is missing. So it must + /// surface through `vct_retryable_height` while `vct_supplied_root_unavailable_height` + /// (which gates the peer refetch) stays `None` — otherwise the committer would spam + /// pointless refetches for a root it already holds. + #[test] + fn await_successor_is_retryable_but_not_a_refetch() { + let height = Height(7); + let awaiting: CommitCheckpointVerifiedError = + ValidateContextError::VctSuppliedRootAwaitingSuccessor { height }.into(); + + assert_eq!( + awaiting.vct_retryable_height(), + Some(height), + "an await-successor stall is retryable", + ); + assert_eq!( + awaiting.vct_supplied_root_unavailable_height(), + None, + "an await-successor stall must not trigger a peer refetch (the root is present)", + ); + + // The unavailable case is both retryable and a refetch trigger. + let unavailable: CommitCheckpointVerifiedError = + ValidateContextError::VctSuppliedRootUnavailable { height }.into(); + assert_eq!(unavailable.vct_retryable_height(), Some(height)); + assert_eq!( + unavailable.vct_supplied_root_unavailable_height(), + Some(height) + ); + } } diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 59bc7fbd460..015f190c428 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -80,6 +80,10 @@ pub use service::finalized_state::{ preview_rollback_finalized_state, rollback_finalized_state, RollbackBackupSummary, RollbackFinalizedStateError, RollbackFinalizedStateOptions, RollbackFinalizedStateSummary, }; +pub use service::finalized_state::{ + produce_final_frontiers_bytes, validate_final_frontiers_bytes, FinalFrontiersGenerationError, + FinalFrontiersValidationError, +}; pub use service::{ finalized_state::{DiskWriteBatch, FromDisk, IntoDisk, WriteDisk, ZebraDb}, ReadStateService, diff --git a/zebra-state/src/request.rs b/zebra-state/src/request.rs index 68550ef9337..264842f4eff 100644 --- a/zebra-state/src/request.rs +++ b/zebra-state/src/request.rs @@ -2,7 +2,7 @@ use std::{ collections::{HashMap, HashSet}, - ops::{Add, Deref, DerefMut, RangeInclusive}, + ops::{Add, Deref, RangeInclusive}, pin::Pin, sync::Arc, }; @@ -10,7 +10,11 @@ use std::{ use tower::{BoxError, Service, ServiceExt}; use zebra_chain::{ amount::{DeferredPoolBalanceChange, NegativeAllowed}, - block::{self, Block, HeightDiff}, + block::{ + self, + merkle::{AuthDataRoot, AUTH_DIGEST_PLACEHOLDER}, + Block, HeightDiff, + }, diagnostic::{task::WaitForPanics, CodeTimer}, history_tree::HistoryTree, orchard, @@ -36,6 +40,27 @@ use crate::{ CommitSemanticallyVerifiedError, }; +/// Times `$body` and records its duration to the named histogram when the +/// `commit-metrics` feature is enabled; otherwise just evaluates `$body` with +/// zero overhead. Used to profile checkpoint prepare phases. +macro_rules! timed_prepare_phase { + ($name:expr, $body:expr) => {{ + #[cfg(feature = "commit-metrics")] + let _start = std::time::Instant::now(); + let result = $body; + #[cfg(feature = "commit-metrics")] + metrics::histogram!($name).record(_start.elapsed().as_secs_f64()); + result + }}; +} + +/// Minimum transaction count before checkpoint prepare uses Rayon for +/// per-transaction digest fanout. +/// +/// Small blocks are faster serially because Rayon scheduling costs dominate the +/// native ZIP-244 digest work. +const MIN_PARALLEL_CHECKPOINT_PREPARE_TRANSACTIONS: usize = 16; + /// Identify a spend by a transparent outpoint or revealed nullifier. /// /// This enum implements `From` for [`transparent::OutPoint`], [`sprout::Nullifier`], @@ -260,6 +285,26 @@ pub struct SemanticallyVerifiedBlock { pub transaction_hashes: Arc<[transaction::Hash]>, /// This block's deferred pool value balance change. pub deferred_pool_balance_change: Option, + /// The precomputed ZIP-244 authorizing-data commitment root for this block, + /// if it was computed during verification. + /// + /// The checkpoint verifier sets this (it runs with high concurrency, ahead + /// of the single-threaded finalized committer) so the committer does not + /// have to recompute the per-transaction auth digests on its critical path. + /// `None` means "not precomputed"; the committer falls back to computing it. + /// + /// # Security + /// + /// The finalized checkpoint committer **trusts** a `Some` value as the + /// authorizing data for the ZIP-244 `hashBlockCommitments` header check + /// (`check::block_commitment_is_valid_for_chain_history`), so it must always + /// equal `block.auth_data_root()`. To keep that invariant unforgeable this + /// field is crate-private: it is only ever set by the constructors in this + /// module, which derive it from `block`. A caller outside the crate cannot + /// set it, and [`CheckpointVerifiedBlock`] (the only type whose cache the + /// committer trusts) cannot be mutated after construction, so the cache can + /// never be desynced from the block it commits. + pub(crate) auth_data_root: Option, } /// A block ready to be committed directly to the finalized state with @@ -491,6 +536,7 @@ impl ContextuallyVerifiedBlock { new_outputs, transaction_hashes, deferred_pool_balance_change, + auth_data_root: _, } = semantically_verified; // This is redundant for the non-finalized state, @@ -523,7 +569,7 @@ impl CheckpointVerifiedBlock { deferred_pool_balance_change: Option, ) -> Self { let mut block = Self::with_hash(block.clone(), hash.unwrap_or(block.hash())); - block.deferred_pool_balance_change = deferred_pool_balance_change; + block.set_deferred_pool_balance_change(deferred_pool_balance_change); block } /// Creates a block that's ready to be committed to the finalized state, @@ -536,14 +582,77 @@ impl CheckpointVerifiedBlock { } } +fn prepare_block_data( + block: &Block, +) -> ( + Arc<[transaction::Hash]>, + AuthDataRoot, + HashMap, +) { + #[cfg(feature = "commit-metrics")] + { + let transaction_count = block.transactions.len(); + let output_count: usize = block + .transactions + .iter() + .map(|transaction| transaction.outputs().len()) + .sum(); + let v5_transaction_count = block + .transactions + .iter() + .filter(|transaction| transaction.version() == 5) + .count(); + + if let Some(height) = block.coinbase_height() { + metrics::gauge!("zebra.state.prepare.block.height").set(height.0 as f64); + } + metrics::histogram!("zebra.state.prepare.block_tx_count").record(transaction_count as f64); + metrics::histogram!("zebra.state.prepare.block_output_count").record(output_count as f64); + metrics::histogram!("zebra.state.prepare.block_v5_tx_count") + .record(v5_transaction_count as f64); + } + + // Compute each transaction's txid and ZIP-244 auth digest together, for efficiency. + let (transaction_hashes, auth_digests): (Vec<_>, Vec<_>) = + timed_prepare_phase!("zebra.state.prepare.txid_auth_digest.duration_seconds", { + if block.transactions.len() < MIN_PARALLEL_CHECKPOINT_PREPARE_TRANSACTIONS { + block + .transactions + .iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + } else { + use rayon::prelude::*; + block + .transactions + .par_iter() + .map(|tx| tx.txid_and_auth_digest()) + .unzip() + } + }); + let transaction_hashes: Arc<[_]> = transaction_hashes.into(); + let auth_data_root = timed_prepare_phase!( + "zebra.state.prepare.auth_data_root.duration_seconds", + auth_digests + .into_iter() + .map(|auth_digest| auth_digest.unwrap_or(AUTH_DIGEST_PLACEHOLDER)) + .collect::() + ); + let new_outputs = timed_prepare_phase!( + "zebra.state.prepare.new_ordered_outputs.duration_seconds", + transparent::new_ordered_outputs(block, &transaction_hashes) + ); + + (transaction_hashes, auth_data_root, new_outputs) +} + impl SemanticallyVerifiedBlock { /// Creates [`SemanticallyVerifiedBlock`] from [`Block`] and [`block::Hash`]. pub fn with_hash(block: Arc, hash: block::Hash) -> Self { let height = block .coinbase_height() .expect("semantically verified block should have a coinbase height"); - let transaction_hashes: Arc<[_]> = block.transactions.iter().map(|tx| tx.hash()).collect(); - let new_outputs = transparent::new_ordered_outputs(&block, &transaction_hashes); + let (transaction_hashes, auth_data_root, new_outputs) = prepare_block_data(&block); Self { block, @@ -552,6 +661,35 @@ impl SemanticallyVerifiedBlock { new_outputs, transaction_hashes, deferred_pool_balance_change: None, + auth_data_root: Some(auth_data_root), + } + } + + /// Creates a [`SemanticallyVerifiedBlock`] from data the semantic verifier + /// has already prepared, leaving the authorizing-data root unset. + /// + /// The semantic verifier binds the ZIP-244 auth-data commitment during + /// contextual validation and the committer recomputes it on that path, so it + /// is not precomputed here. This constructor exists so callers outside the + /// crate build the block through a checked entry point rather than a struct + /// literal, keeping the crate-private [`auth_data_root`](Self::auth_data_root) + /// cache out of their reach (see its security note). + pub fn from_semantic_data( + block: Arc, + hash: block::Hash, + height: block::Height, + new_outputs: HashMap, + transaction_hashes: Arc<[transaction::Hash]>, + deferred_pool_balance_change: Option, + ) -> Self { + Self { + block, + hash, + height, + new_outputs, + transaction_hashes, + deferred_pool_balance_change, + auth_data_root: None, } } @@ -577,8 +715,7 @@ impl From> for SemanticallyVerifiedBlock { let height = block .coinbase_height() .expect("semantically verified block should have a coinbase height"); - let transaction_hashes: Arc<[_]> = block.transactions.iter().map(|tx| tx.hash()).collect(); - let new_outputs = transparent::new_ordered_outputs(&block, &transaction_hashes); + let (transaction_hashes, auth_data_root, new_outputs) = prepare_block_data(&block); Self { block, @@ -587,6 +724,7 @@ impl From> for SemanticallyVerifiedBlock { new_outputs, transaction_hashes, deferred_pool_balance_change: None, + auth_data_root: Some(auth_data_root), } } } @@ -602,6 +740,7 @@ impl From for SemanticallyVerifiedBlock { deferred_pool_balance_change: Some(DeferredPoolBalanceChange::new( valid.chain_value_pool_change.deferred_amount(), )), + auth_data_root: None, } } } @@ -615,6 +754,7 @@ impl From for SemanticallyVerifiedBlock { new_outputs: finalized.new_outputs, transaction_hashes: finalized.transaction_hashes, deferred_pool_balance_change: finalized.deferred_pool_balance_change, + auth_data_root: None, } } } @@ -632,9 +772,28 @@ impl Deref for CheckpointVerifiedBlock { &self.0 } } -impl DerefMut for CheckpointVerifiedBlock { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 + +// `DerefMut` is intentionally **not** implemented for `CheckpointVerifiedBlock`. +// The committer trusts its precomputed `auth_data_root` for the ZIP-244 header +// commitment check, so the block bytes and the cache must stay locked together +// after construction. Mutable deref would let a holder swap `block` (or the +// cache) and feed the committer a root that doesn't match the block. The only +// field the checkpoint verifier sets after construction is the deferred pool +// balance, exposed through the narrow setter below. + +impl CheckpointVerifiedBlock { + /// Sets the deferred pool balance change computed by the checkpoint verifier + /// after construction. + /// + /// This is the only post-construction mutation a caller may perform; it does + /// not touch the block or the precomputed authorizing-data root, so the + /// committer's trusted cache stays bound to the block (see + /// [`SemanticallyVerifiedBlock::auth_data_root`]). + pub fn set_deferred_pool_balance_change( + &mut self, + deferred_pool_balance_change: Option, + ) { + self.0.deferred_pool_balance_change = deferred_pool_balance_change; } } @@ -850,6 +1009,11 @@ pub enum Request { /// /// A `0` value means unknown. These hints are not consensus data. body_sizes: Vec, + /// Tree-aux roots, parallel to `headers`. + /// + /// Every non-empty Zakura header range must provide one root per header. + /// Roots are advisory until verified during block commit. + tree_aux_roots: Vec, }, /// Computes the depth in the current best chain of the block identified by the given hash. @@ -1112,6 +1276,18 @@ pub enum ReadRequest { /// with whether the database has pruned historical data. IsPruned, + /// Returns [`ReadResponse::BlockRoots(Vec)`](ReadResponse::BlockRoots) + /// with the per-block Sapling/Orchard commitment roots for the heights + /// `[start_height, start_height + count)` that this node holds, in ascending height + /// order (the verified-commitment-trees `tree_aux` serving read). May return fewer + /// than `count` roots if the node does not hold the whole range. + BlockRoots { + /// First requested height. + start_height: block::Height, + /// Number of consecutive heights requested. + count: u32, + }, + /// Returns [`ReadResponse::Tip(Option<(Height, block::Hash)>)`](ReadResponse::Tip) /// with the current best chain tip. Tip, @@ -1486,6 +1662,7 @@ impl ReadRequest { match self { ReadRequest::UsageInfo => "usage_info", ReadRequest::IsPruned => "is_pruned", + ReadRequest::BlockRoots { .. } => "block_roots", ReadRequest::Tip => "tip", ReadRequest::FinalizedTip => "finalized_tip", ReadRequest::TipPoolValues => "tip_pool_values", @@ -1636,3 +1813,73 @@ impl TimedSpan { .wait_for_panics() } } + +#[cfg(test)] +mod tests { + use zebra_chain::serialization::ZcashDeserializeInto; + + use super::*; + + /// Loads the NU5 mainnet block 1,687,106 (its v5 transactions exercise the + /// ZIP-244 authorizing-data digests). + fn nu5_block() -> Arc { + Arc::new( + zebra_test::vectors::BLOCK_MAINNET_1687106_BYTES + .zcash_deserialize_into::() + .expect("NU5 test vector block deserializes"), + ) + } + + /// Every [`CheckpointVerifiedBlock`] constructor must precompute the + /// authorizing-data root *from its own block*, so the value the finalized + /// committer trusts for the ZIP-244 header commitment check always matches + /// the block it commits. This is the invariant that makes the crate-private, + /// non-`DerefMut` cache safe; a constructor that set a stale or foreign root + /// would reopen the trust hole this guards. + #[test] + fn checkpoint_verified_block_caches_its_own_auth_data_root() { + let block = nu5_block(); + let expected = Some(block.auth_data_root()); + + assert_eq!( + CheckpointVerifiedBlock::from(block.clone()).auth_data_root, + expected, + "From> must cache the block's own auth data root", + ); + assert_eq!( + CheckpointVerifiedBlock::with_hash(block.clone(), block.hash()).auth_data_root, + expected, + "with_hash must cache the block's own auth data root", + ); + assert_eq!( + CheckpointVerifiedBlock::new(block.clone(), None, None).auth_data_root, + expected, + "new must cache the block's own auth data root", + ); + } + + /// The semantic-verifier constructor leaves the cache empty: that path binds + /// the auth-data commitment during contextual validation, and the committer + /// recomputes it, so there is no precomputed value to trust. + #[test] + fn semantic_constructor_leaves_auth_data_root_unset() { + let block = nu5_block(); + let hash = block.hash(); + let height = block.coinbase_height().expect("test block has a height"); + let (transaction_hashes, _auth_data_root, new_outputs) = prepare_block_data(&block); + + let semantic = SemanticallyVerifiedBlock::from_semantic_data( + block, + hash, + height, + new_outputs, + transaction_hashes, + None, + ); + + assert_eq!( + semantic.auth_data_root, None, + "the semantic path must not precompute an auth data root the committer would trust", + ); + } +} diff --git a/zebra-state/src/response.rs b/zebra-state/src/response.rs index e60135eb92a..5b8316c445a 100644 --- a/zebra-state/src/response.rs +++ b/zebra-state/src/response.rs @@ -311,6 +311,10 @@ pub enum ReadResponse { /// pruned historical data. IsPruned(bool), + /// Response to [`ReadRequest::BlockRoots`] with the per-block commitment roots + /// this node holds for the requested range, in ascending height order. + BlockRoots(Vec), + /// Response to [`ReadRequest::Tip`] with the current best chain tip. Tip(Option<(block::Height, block::Hash)>), @@ -561,6 +565,7 @@ impl TryFrom for Response { ReadResponse::UsageInfo(_) | ReadResponse::IsPruned(_) + | ReadResponse::BlockRoots(_) | ReadResponse::TipPoolValues { .. } | ReadResponse::BlockInfo(_) | ReadResponse::TransactionIdsForBlock(_) diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 9edb075cb87..751e35e7f87 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -60,6 +60,7 @@ use crate::{ BoxError, CheckpointVerifiedBlock, CommitHeaderRangeError, CommitSemanticallyVerifiedError, Config, KnownBlock, ReadRequest, ReadResponse, Request, Response, SemanticallyVerifiedBlock, }; +use zebra_chain::parallel::commitment_aux::BlockCommitmentRoots; pub mod block_iter; pub mod chain_tip; @@ -88,6 +89,13 @@ use self::queued_blocks::{QueuedCheckpointVerified, QueuedSemanticallyVerified, pub use self::traits::{ReadState, State}; +/// Error returned for historical note-commitment tree/subtree read requests on a +/// verified-commitment-trees fast-synced database, where the per-height trees +/// below the checkpoint handoff height were never written. +const FAST_SYNCED_TREE_UNAVAILABLE_ERROR: &str = + "note commitment treestate is unavailable below the checkpoint on a fast-synced node; \ + historical treestate queries require an archive node"; + /// A read-write service for Zebra's cached blockchain state. /// /// This service modifies and provides access to: @@ -986,6 +994,7 @@ impl StateService { anchor: block::Hash, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, ) -> oneshot::Receiver> { let (rsp_tx, rsp_rx) = oneshot::channel(); @@ -999,6 +1008,7 @@ impl StateService { anchor, headers, body_sizes, + tree_aux_roots, rsp_tx, }) { @@ -1237,9 +1247,12 @@ impl Service for StateService { anchor, headers, body_sizes, + tree_aux_roots, } => { let rsp_rx = tokio::task::block_in_place(move || { - span.in_scope(|| self.send_header_range(anchor, headers, body_sizes)) + span.in_scope(|| { + self.send_header_range(anchor, headers, body_sizes, tree_aux_roots) + }) }); let span = Span::current(); @@ -1474,6 +1487,138 @@ where headers } +// Returns the block commitment roots for the given height range +fn block_roots_by_height_range( + chain: Option, + db: &ZebraDb, + start: block::Height, + count: u32, +) -> Vec +where + C: AsRef, +{ + // Cap the count to the maximum header sync height range + let mut roots = Vec::with_capacity( + usize::try_from(count.min(MAX_HEADER_SYNC_HEIGHT_RANGE)) + .expect("capped root count fits in usize"), + ); + + // Iterate over the height range + for offset in 0..count.min(MAX_HEADER_SYNC_HEIGHT_RANGE) { + let Some(height) = start + i64::from(offset) else { + break; + }; + + // If the height is at or below the finalized tip height, serve the roots from the finalized state + let root = if db + .finalized_tip_height() + .is_some_and(|finalized_tip| height <= finalized_tip) + { + finalized_state::serve_block_roots(db, height..=height) + .into_iter() + .next() + // If the height is in the chain, serve the roots from the chain + } else if let Some(chain) = chain + .as_ref() + .map(|chain| chain.as_ref()) + .filter(|chain| chain.contains_block_height(height)) + { + match ( + chain.sapling_tree(height.into()), + chain.orchard_tree(height.into()), + ) { + (Some(sapling), Some(orchard)) => Some(BlockCommitmentRoots { + height, + sapling_root: sapling.root(), + orchard_root: orchard.root(), + }), + _ => None, + } + // If the height is not in the chain, serve the roots from the zakura header commitment roots by height range + } else { + db.zakura_header_commitment_roots_by_height_range(height..=height) + .into_iter() + .next() + }; + + let Some(root) = root else { + break; + }; + + if root.height != height { + break; + } + + roots.push(root); + } + + roots +} + +// Returns true if the given roots cover the given height range +fn block_roots_cover_range( + start_height: block::Height, + count: u32, + roots: &[BlockCommitmentRoots], +) -> bool { + if roots.len() != usize::try_from(count).unwrap_or(usize::MAX) { + return false; + } + + roots.iter().enumerate().all(|(offset, roots)| { + let Ok(offset) = u32::try_from(offset) else { + return false; + }; + start_height + .0 + .checked_add(offset) + .is_some_and(|height| roots.height == block::Height(height)) + }) +} + +// Return the highest known tip, but cap it to the verified block tip +// if the header-only extension is not root-covered. +fn root_covered_best_header_tip( + chain: Option, + db: &ZebraDb, + best_disk_header_tip: Option<(block::Height, block::Hash)>, + verified_block_tip: Option<(block::Height, block::Hash)>, +) -> Option<(block::Height, block::Hash)> +where + C: AsRef, +{ + // Choose the best candidate between the best disk header tip and the verified block tip + let best_header_tip = match (best_disk_header_tip, verified_block_tip) { + (Some(header_tip), Some(block_tip)) if block_tip.0 > header_tip.0 => Some(block_tip), + (Some(header_tip), _) => Some(header_tip), + (None, block_tip) => block_tip, + }?; + + // Is the chosen candidate already at or below the verified block tip? + // If yes, there no header-only gap. + let Some(verified_block_tip) = verified_block_tip else { + return Some(best_header_tip); + }; + + if best_header_tip.0 <= verified_block_tip.0 { + return Some(best_header_tip); + } + + let Ok(start_height) = verified_block_tip.0.next() else { + return Some(verified_block_tip); + }; + let best_header_height = best_header_tip.0; + let verified_block_height = verified_block_tip.0; + let count = best_header_height.0.checked_sub(verified_block_height.0)?; + let roots = block_roots_by_height_range(chain, db, start_height, count); + + if block_roots_cover_range(start_height, count, &roots) { + Some(best_header_tip) + } else { + Some(verified_block_tip) + } +} + impl Service for ReadStateService { type Response = ReadResponse; type Error = BoxError; @@ -1536,6 +1681,24 @@ impl Service for ReadStateService { // Used by the `getblockchaininfo` RPC. ReadRequest::IsPruned => Ok(ReadResponse::IsPruned(state.db.is_pruned())), + // The verified-commitment-trees `tree_aux` serving read (design §9). + ReadRequest::BlockRoots { + start_height, + count, + } => { + let roots = if count == 0 { + Vec::new() + } else { + block_roots_by_height_range( + state.latest_best_chain(), + &state.db, + start_height, + count, + ) + }; + Ok(ReadResponse::BlockRoots(roots)) + } + // Used by the StateService. ReadRequest::Tip => Ok(ReadResponse::Tip(read::tip( state.latest_best_chain(), @@ -1709,17 +1872,15 @@ impl Service for ReadStateService { ReadRequest::BestHeaderTip => { let best_disk_header_tip = state.db.best_header_tip(); - let verified_block_tip = read::tip(state.latest_best_chain(), &state.db); - - Ok(ReadResponse::BestHeaderTip( - match (best_disk_header_tip, verified_block_tip) { - (Some(header_tip), Some(block_tip)) if block_tip.0 > header_tip.0 => { - Some(block_tip) - } - (Some(header_tip), _) => Some(header_tip), - (None, block_tip) => block_tip, - }, - )) + let best_chain = state.latest_best_chain(); + let verified_block_tip = read::tip(best_chain.clone(), &state.db); + + Ok(ReadResponse::BestHeaderTip(root_covered_best_header_tip( + best_chain, + &state.db, + best_disk_header_tip, + verified_block_tip, + ))) } ReadRequest::MissingBlockBodies { from, limit } => { @@ -1759,15 +1920,34 @@ impl Service for ReadStateService { Ok(ReadResponse::Blocks(blocks)) } - ReadRequest::SaplingTree(hash_or_height) => Ok(ReadResponse::SaplingTree( - read::sapling_tree(state.latest_best_chain(), &state.db, hash_or_height), - )), + ReadRequest::SaplingTree(hash_or_height) => { + if state.db.vct_historical_tree_unavailable(hash_or_height) { + return Err(FAST_SYNCED_TREE_UNAVAILABLE_ERROR.into()); + } + Ok(ReadResponse::SaplingTree(read::sapling_tree( + state.latest_best_chain(), + &state.db, + hash_or_height, + ))) + } - ReadRequest::OrchardTree(hash_or_height) => Ok(ReadResponse::OrchardTree( - read::orchard_tree(state.latest_best_chain(), &state.db, hash_or_height), - )), + ReadRequest::OrchardTree(hash_or_height) => { + if state.db.vct_historical_tree_unavailable(hash_or_height) { + return Err(FAST_SYNCED_TREE_UNAVAILABLE_ERROR.into()); + } + Ok(ReadResponse::OrchardTree(read::orchard_tree( + state.latest_best_chain(), + &state.db, + hash_or_height, + ))) + } ReadRequest::SaplingSubtrees { start_index, limit } => { + // On a fast-synced database, subtrees below the checkpoint handoff + // height were never written, so a below-checkpoint range returns an + // empty list (the existing "no subtree at the start index" contract) + // rather than panicking. A typed archive-mode error for subtrees + // unifies with the indexing watermark in a later increment. let end_index = limit .and_then(|limit| start_index.0.checked_add(limit.0)) .map(NoteCommitmentSubtreeIndex); @@ -1991,7 +2171,7 @@ impl Service for ReadStateService { /// Initialize a state service from the provided [`Config`]. /// Returns a boxed state service, a read-only state service, -/// and receivers for state chain tip updates. +/// receivers for state chain tip updates, and a `tree_aux` roots writer if peer mode is active. /// /// Each `network` has its own separate on-disk database. /// @@ -2089,12 +2269,9 @@ pub fn spawn_init_read_only( pub async fn init_test( network: &Network, ) -> Buffer, Request> { - // TODO: pass max_checkpoint_height and checkpoint_verify_concurrency limit - // if we ever need to test final checkpoint sent UTXO queries - let (state_service, _, _, _) = - StateService::new(Config::ephemeral(), network, block::Height::MAX, 0).await; + let (state_service, _, _, _) = init_test_services_inner(network).await; - Buffer::new(BoxService::new(state_service), 1) + state_service } /// Initializes a state service with an ephemeral [`Config`] and a buffer with a single slot, @@ -2109,6 +2286,18 @@ pub async fn init_test_services( ReadStateService, LatestChainTip, ChainTipChange, +) { + init_test_services_inner(network).await +} + +#[cfg(any(test, feature = "proptest-impl"))] +async fn init_test_services_inner( + network: &Network, +) -> ( + Buffer, Request>, + ReadStateService, + LatestChainTip, + ChainTipChange, ) { // TODO: pass max_checkpoint_height and checkpoint_verify_concurrency limit // if we ever need to test final checkpoint sent UTXO queries diff --git a/zebra-state/src/service/chain_tip.rs b/zebra-state/src/service/chain_tip.rs index 3e3b7c3a481..6cbdf6ae937 100644 --- a/zebra-state/src/service/chain_tip.rs +++ b/zebra-state/src/service/chain_tip.rs @@ -116,6 +116,7 @@ impl From for ChainTipBlock { new_outputs: _, transaction_hashes, deferred_pool_balance_change: _, + auth_data_root: _, } = prepared; Self { diff --git a/zebra-state/src/service/check.rs b/zebra-state/src/service/check.rs index 34c87d4ff72..ab25e7619e9 100644 --- a/zebra-state/src/service/check.rs +++ b/zebra-state/src/service/check.rs @@ -5,7 +5,9 @@ use std::{borrow::Borrow, sync::Arc}; use chrono::Duration; use zebra_chain::{ - block::{self, Block, ChainHistoryBlockTxAuthCommitmentHash, CommitmentError}, + block::{ + self, merkle::AuthDataRoot, Block, ChainHistoryBlockTxAuthCommitmentHash, CommitmentError, + }, history_tree::HistoryTree, parameters::{Network, NetworkUpgrade}, work::difficulty::CompactDifficulty, @@ -170,6 +172,7 @@ pub(crate) fn block_commitment_is_valid_for_chain_history( block: Arc, network: &Network, history_tree: &HistoryTree, + precomputed_auth_data_root: Option, ) -> Result<(), ValidateContextError> { match block.commitment(network)? { block::Commitment::PreSaplingReserved(_) @@ -232,7 +235,12 @@ pub(crate) fn block_commitment_is_valid_for_chain_history( "the history tree of the previous block must exist \ since the current block has a ChainHistoryBlockTxAuthCommitment", ); - let auth_data_root = block.auth_data_root(); + // Use the auth data root precomputed by the verifier when available + // (it is byte-identical to recomputing it here), so the committer + // does not repeat the per-transaction auth-digest work on its + // single-threaded critical path. + let auth_data_root = + precomputed_auth_data_root.unwrap_or_else(|| block.auth_data_root()); let hash_block_commitments = ChainHistoryBlockTxAuthCommitmentHash::from_commitments( &history_tree_root, diff --git a/zebra-state/src/service/check/tests/nullifier.rs b/zebra-state/src/service/check/tests/nullifier.rs index f42858afda6..90c25471937 100644 --- a/zebra-state/src/service/check/tests/nullifier.rs +++ b/zebra-state/src/service/check/tests/nullifier.rs @@ -85,7 +85,7 @@ proptest! { // randomly choose to commit the block to the finalized or non-finalized state if use_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); // the block was committed prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); @@ -349,7 +349,7 @@ proptest! { // randomly choose to commit the next block to the finalized or non-finalized state if duplicate_in_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); prop_assert!(commit_result.is_ok()); @@ -448,7 +448,7 @@ proptest! { // randomly choose to commit the block to the finalized or non-finalized state if use_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(),None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); prop_assert!(commit_result.is_ok()); @@ -628,7 +628,7 @@ proptest! { // randomly choose to commit the next block to the finalized or non-finalized state if duplicate_in_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(),None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); prop_assert!(commit_result.is_ok()); @@ -725,7 +725,7 @@ proptest! { // randomly choose to commit the block to the finalized or non-finalized state if use_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); prop_assert!(commit_result.is_ok()); @@ -914,7 +914,7 @@ proptest! { // randomly choose to commit the next block to the finalized or non-finalized state if duplicate_in_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); prop_assert!(commit_result.is_ok()); @@ -1004,7 +1004,7 @@ proptest! { finalized_state.populate_with_anchors(&block2); let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, None, None, "test"); prop_assert!(commit_result.is_ok()); let block2 = Arc::new(block2).prepare(); @@ -1058,7 +1058,7 @@ proptest! { finalized_state.populate_with_anchors(&block2); let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, None, None, "test"); prop_assert!(commit_result.is_ok()); let block2 = Arc::new(block2).prepare(); @@ -1112,7 +1112,7 @@ proptest! { finalized_state.populate_with_anchors(&block2); let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.into(), None, None, None, "test"); prop_assert!(commit_result.is_ok()); let block2 = Arc::new(block2).prepare(); diff --git a/zebra-state/src/service/check/tests/utxo.rs b/zebra-state/src/service/check/tests/utxo.rs index dd9017bea20..7c807963d13 100644 --- a/zebra-state/src/service/check/tests/utxo.rs +++ b/zebra-state/src/service/check/tests/utxo.rs @@ -185,7 +185,7 @@ proptest! { // randomly choose to commit the block to the finalized or non-finalized state if use_finalized_state { let block1 = CheckpointVerifiedBlock::from(Arc::new(block1)); - let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block1.clone().into(), None, None, None, "test"); // the block was committed prop_assert_eq!(Some((Height(1), block1.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); @@ -273,7 +273,7 @@ proptest! { if use_finalized_state_spend { let block2 = CheckpointVerifiedBlock::from(Arc::new(block2)); - let commit_result = finalized_state.commit_finalized_direct(block2.clone().into(),None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block2.clone().into(), None, None, None, "test"); // the block was committed prop_assert_eq!(Some((Height(2), block2.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); @@ -609,7 +609,7 @@ proptest! { if use_finalized_state_spend { let block2 = CheckpointVerifiedBlock::from(block2.clone()); - let commit_result = finalized_state.commit_finalized_direct(block2.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct(block2.clone().into(), None, None, None, "test"); // the block was committed prop_assert_eq!(Some((Height(2), block2.hash)), read::best_tip(&non_finalized_state, &finalized_state.db)); @@ -877,8 +877,13 @@ fn new_state_with_mainnet_transparent_data( if use_finalized_state { let block1 = CheckpointVerifiedBlock::from(block1.clone()); - let commit_result = - finalized_state.commit_finalized_direct(block1.clone().into(), None, "test"); + let commit_result = finalized_state.commit_finalized_direct( + block1.clone().into(), + None, + None, + None, + "test", + ); // the block was committed assert_eq!( diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index b9edd39fd7b..ceffa099081 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -22,7 +22,13 @@ use std::{ }, }; -use zebra_chain::{block, parallel::tree::NoteCommitmentTrees, parameters::Network}; +use zebra_chain::{ + block::{self, merkle::AuthDataRoot, Block}, + orchard, + parallel::tree::{BlockNotePrecompute, NoteCommitmentTrees}, + parameters::Network, + sapling, +}; use zebra_db::{ block::{RetentionPlan, ZAKURA_HEADER_BODY_SIZE_BY_HEIGHT}, chain::BLOCK_INFO, @@ -70,12 +76,64 @@ static COMMIT_COMPUTE_POOL: LazyLock = LazyLock::new(|| { .expect("rayon thread pool configuration is valid") }); +/// Spawns the note-commitment tree per-leaf hashing for `block` onto the +/// commit-compute pool, returning a receiver for the result and a cancellation +/// flag. +/// +/// The off-committer half of the tree-update pipeline: the finalized write loop +/// starts this for the *next* block — using the running tree sizes `sapling_start` +/// / `orchard_start` (the tree `count`s the block will commit at) — so the heavy +/// hashing overlaps the *current* block's commit on otherwise idle cores. The +/// committer then only applies the precomputed subtree roots. If the precompute is stale (its `start_size` no +/// longer matches the tree), the committer falls back to inline hashing, so this +/// is purely a scheduling optimization. +/// +/// Because it is started speculatively before the current block has committed, the +/// caller must keep the returned flag and set it if it discards the precompute — +/// e.g. when the current block's commit fails. The spawned task checks the flag +/// before each pool's hashing (and skips the send if cancelled), so a discarded +/// child that has not started a pool yet avoids that pool's work. +pub(crate) fn spawn_note_precompute( + sapling_start: u64, + orchard_start: u64, + block: Arc, +) -> ( + crossbeam_channel::Receiver, + Arc, +) { + let (tx, rx) = crossbeam_channel::bounded(1); + let cancel = Arc::new(AtomicBool::new(false)); + let task_cancel = cancel.clone(); + COMMIT_COMPUTE_POOL.spawn(move || { + let result = + BlockNotePrecompute::compute(sapling_start, orchard_start, &block, &task_cancel); + // If the precompute was cancelled, the receiver has been (or is being) + // dropped and the result is unwanted; skip the send. + if !task_cancel.load(Ordering::Relaxed) { + let _ = tx.send(result); + } + }); + (rx, cancel) +} + pub mod column_family; +mod commitment_aux; +mod commitment_aux_verify; mod disk_db; mod disk_format; +mod vct; mod zebra_db; +use vct::VctState; + +/// The verified-commitment-trees `tree_aux` serving read path (design §9): the per-block +/// commitment roots for a height range, derived from the per-height trees. +pub(crate) use commitment_aux::serve_block_roots; + +pub use commitment_aux::{produce_final_frontiers_bytes, FinalFrontiersGenerationError}; +pub use vct::{validate_final_frontiers_bytes, FinalFrontiersValidationError}; + #[cfg(any(test, feature = "proptest-impl"))] mod arbitrary; @@ -119,6 +177,7 @@ pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ "zakura_header_height_by_hash", "zakura_header_by_height", ZAKURA_HEADER_BODY_SIZE_BY_HEIGHT, + ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT, // Transactions "tx_by_loc", "hash_by_tx_loc", @@ -147,8 +206,12 @@ pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ "history_tree", "tip_chain_value_pool", BLOCK_INFO, + // Verified-commitment-trees serving index + COMMITMENT_ROOTS_BY_HEIGHT, // Storage policy PRUNING_METADATA, + VCT_SYNC_METADATA, + VCT_UPGRADE_METADATA, ]; /// The name of the column family that records pruning progress. @@ -159,6 +222,65 @@ pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ /// a pruned database cannot be reopened in archive mode. pub const PRUNING_METADATA: &str = "pruning_metadata"; +/// The name of the column family that marks a verified-commitment-trees +/// (vct) synced database. +/// +/// A vct-synced database is built by folding verified commitment roots into the +/// anchor set and history tree below the last checkpoint, skipping the per-height +/// note-commitment trees entirely. This column family holds a single entry, keyed +/// by the unit value `()`, mapping to the checkpoint handoff height: the lowest +/// height at which a per-height note-commitment tree is present. Per-height trees +/// are absent for every non-genesis height strictly below it. +/// +/// The presence of this entry marks the database as vct-synced: the historical +/// per-height trees were never written, so the database cannot answer historical +/// tree/subtree RPCs below the handoff height (the RPC handlers return a typed +/// archive-mode error there, §9). Vct sync is the default under checkpoint sync +/// for both Archive and Pruned storage modes, so a vct-synced database reopens in +/// either; the missing-history limitation is enforced at the RPC boundary, not at +/// reopen. This is orthogonal to pruning (which drops raw transactions but keeps +/// the trees); a database can be both. +pub const VCT_SYNC_METADATA: &str = "vct_sync_metadata"; + +/// The name of the column family that records the verified-commitment-trees upgrade height. +/// +/// This holds a single entry, keyed by the unit value `()`, mapping to `U`: the lowest height +/// this (vct-aware) binary committed, which is also the lowest height present in the +/// [`COMMITMENT_ROOTS_BY_HEIGHT`] serving index. It is written once — on the first committed +/// block — and never moved, so it is a stable boundary as the chain grows. +/// +/// `U` is what lets the two root sources be stitched without a gap: heights below `U` predate +/// this binary, so they carry per-height trees but no index entry and are served from the trees; +/// heights at or above `U` carry an index entry and are served from it. Combined with the +/// checkpoint handoff `H` in [`VCT_SYNC_METADATA`], it also bounds the band `[U, H)` in which a +/// vct-synced node holds no per-height tree, so historical tree/subtree RPCs are unavailable +/// there but available below `U` (pre-upgrade trees) and at/above `H` (semantic-sync trees). +pub const VCT_UPGRADE_METADATA: &str = "vct_upgrade_metadata"; + +/// The name of the column family holding the per-height Sapling/Orchard note-commitment +/// roots, keyed by [`block::Height`]. +/// +/// This is the verified-commitment-trees serving index (design §4): a compact +/// `height -> (sapling_root, orchard_root)` map (64 bytes/height) that **every** node +/// persists for each committed block, on both the vct and legacy commit paths. Its purpose +/// is to let a vct-synced node — which folds verified roots in but writes no per-height +/// note-commitment trees — still answer the `tree_aux` `BlockRoots` read, so the +/// root-serving fleet does not collapse as nodes adopt vct sync. The roots are the same +/// values a legacy node derives from its per-height trees via `produce_block_roots`; serving +/// reads this index first and falls back to the trees only for databases written before the +/// index existed. +pub const COMMITMENT_ROOTS_BY_HEIGHT: &str = "commitment_roots_by_height"; + +/// Provisional peer-supplied per-height Sapling/Orchard roots attached to Zakura +/// header-sync responses. +/// +/// These roots are advisory metadata for header-ahead blocks. They are persisted +/// with `zakura_header_*` so VCT fast sync can read them before full block bodies +/// arrive, but they remain untrusted until block commit verifies them against the +/// header commitments. +pub const ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT: &str = + "zakura_header_commitment_roots_by_height"; + /// The finalized part of the chain state, stored in the db. /// /// `rocksdb` allows concurrent writes through a shared reference, @@ -210,6 +332,39 @@ pub struct FinalizedState { #[cfg(feature = "elasticsearch")] /// A collection of blocks to be sent to elasticsearch as a bulk. pub elastic_blocks: Vec, + + /// Verified-commitment-trees state (peer/fixture/capture mode), or `None` + /// when legacy recompute is selected. Shared across clones. + vct: Option>, + + /// Verify-before-commit dedup. Holds the `(height, hash)` of the next + /// block whose commitment was already validated by the previous fast + /// commit's look-ahead (`C(next, candidate)`). When the next block to commit + /// matches, its own commitment check is the identical computation, so it is + /// skipped — making each header commitment check run once instead of twice. + /// Guarded by hash identity (and height monotonicity), so a stale or cloned + /// value can never cause an incorrect skip. + vct_prevalidated_next: Option<(block::Height, block::Hash)>, + + /// `true` while a verified-commitment-trees (vct) fast sync has frozen the + /// note-commitment frontier — i.e. a verified commitment tree block has committed but the + /// checkpoint handoff (which replaces the frontier with the real one) has not. + /// + /// While frozen, the running frontier is no longer the real frontier for the + /// heights being committed, so a legacy recompute would fold a wrong root into + /// the history MMR and corrupt consensus state. The committer therefore refuses + /// to recompute for a height with no valid supplied root in this window, + /// returning a retryable error instead (see `commit_finalized_direct`). Reset to + /// `false` at the handoff, after which legacy recompute resumes from the real + /// frontier. + /// + /// Seeded from durable state on open (not just within a session): a vct sync + /// interrupted by a restart leaves the frozen frontier persisted but the tip + /// below the handoff, so [`FinalizedState::new`] re-derives this flag from the + /// vct-sync marker. Without that, the first post-restart height with no supplied + /// root would legacy-recompute against the stale on-disk frontier and corrupt the + /// MMR — the exact hazard this flag exists to prevent. + vct_frontier_frozen: bool, } impl FinalizedState { @@ -330,6 +485,26 @@ impl FinalizedState { read_only, ); + let vct = VctState::from_config( + config.checkpoint_sync, + config.disable_vct_fast_sync, + network, + db.clone(), + ); + + // Re-derive the frozen-frontier flag from durable state: a fast sync + // interrupted before the checkpoint handoff leaves the stale frozen frontier + // on disk (fast commits never write per-height trees) with the tip still below + // the handoff. Reopening in that window must keep the committer frozen so a + // height with no supplied root refuses instead of legacy-recomputing against + // the stale frontier. The handoff height itself carries the real frontier, so + // `tip < handoff` (exclusive) is exactly the frozen region. Read from the + // fast-sync marker, not `vct`, so it holds even if VCT is disabled this run. + let vct_frontier_frozen = db + .vct_synced_below() + .zip(db.finalized_tip_height()) + .is_some_and(|(handoff, tip)| tip < handoff); + #[cfg(feature = "elasticsearch")] let new_state = Self { debug_stop_at_height: config.debug_stop_at_height.map(block::Height), @@ -338,6 +513,9 @@ impl FinalizedState { db, elastic_db, elastic_blocks: vec![], + vct, + vct_prevalidated_next: None, + vct_frontier_frozen, }; #[cfg(not(feature = "elasticsearch"))] @@ -346,6 +524,9 @@ impl FinalizedState { checkpoint_raw_tx_retention_start: None, checkpoint_raw_tx_archive_backlog: Arc::new(AtomicBool::new(false)), db, + vct, + vct_prevalidated_next: None, + vct_frontier_frozen, }; // Pruning is a one-way storage mode. Refuse to open a database that has @@ -359,6 +540,23 @@ impl FinalizedState { ); } + // An *interrupted* fast sync — frozen frontier, tip still below the handoff — can + // only be safely resumed by the fast path (which supplies the verified roots). The + // on-disk frontier is stale, so the committer fails closed on every below-handoff + // height with no supplied root (§8). Reopening without a VCT root source selects the + // legacy committer, which can never supply those roots, so the node would refuse every + // block forever. Refuse to open instead, with a clear recovery path, rather than + // stalling silently. + if new_state.vct_frontier_frozen && new_state.vct.is_none() { + panic!( + "this database was previously synced in verified commitment tree mode that was \ + interrupted below the checkpoint handoff height. the fast path that supplies \ + the verified roots needed to resume it is disabled. Set \ + `consensus.checkpoint_sync = true` and `consensus.disable_vct_fast_sync = false` to \ + finish the fast sync, or delete the cache directory and re-sync from genesis" + ); + } + // TODO: move debug_stop_at_height into a task in the start command (#3442) if let Some(tip_height) = new_state.db.finalized_tip_height() { if new_state.is_at_stop_height(tip_height) { @@ -520,11 +718,18 @@ impl FinalizedState { &mut self, ordered_block: QueuedCheckpointVerified, prev_note_commitment_trees: Option, - ) -> Result<(CheckpointVerifiedBlock, NoteCommitmentTrees), CommitCheckpointVerifiedError> { + note_precompute: Option, + next_checkpoint: Option<(Arc, Option)>, + ) -> Result< + (CheckpointVerifiedBlock, NoteCommitmentTrees), + (QueuedCheckpointVerified, CommitCheckpointVerifiedError), + > { let (checkpoint_verified, rsp_tx) = ordered_block; let result = self.commit_finalized_direct( checkpoint_verified.clone().into(), prev_note_commitment_trees, + note_precompute, + next_checkpoint, "commit checkpoint-verified request", ); @@ -545,9 +750,13 @@ impl FinalizedState { .set(checkpoint_verified.height.0 as f64); }; - let _ = rsp_tx.send(result.clone().map(|(hash, _)| hash)); - - result.map(|(_hash, note_commitment_trees)| (checkpoint_verified, note_commitment_trees)) + match result { + Ok((hash, note_commitment_trees)) => { + let _ = rsp_tx.send(Ok(hash)); + Ok((checkpoint_verified, note_commitment_trees)) + } + Err(error) => Err(((checkpoint_verified, rsp_tx), error)), + } } /// Immediately commit a `finalized` block to the finalized state. @@ -568,42 +777,240 @@ impl FinalizedState { &mut self, finalizable_block: FinalizableBlock, prev_note_commitment_trees: Option, + note_precompute: Option, + // The next checkpoint block (and its precomputed + // auth data root), used to verify this block's fixture roots before the fast + // path trusts them. `None` is only valid for fast blocks at the checkpoint + // handoff, where the embedded final frontiers independently authenticate + // this height's roots, or outside the checkpoint commit path. + next_checkpoint: Option<(Arc, Option)>, source: &str, ) -> Result<(block::Hash, NoteCommitmentTrees), CommitCheckpointVerifiedError> { - let (height, hash, finalized, prev_note_commitment_trees, retention) = - match finalizable_block { - FinalizableBlock::Checkpoint { - checkpoint_verified, - } => { - // Checkpoint-verified blocks don't have an associated treestate, so we retrieve the - // treestate of the finalized tip from the database and update it for the block - // being committed, assuming the retrieved treestate is the parent block's - // treestate. Later on, this function proves this assumption by asserting that the - // finalized tip is the parent block of the block being committed. - - let block = checkpoint_verified.block.clone(); - let mut history_tree = self.db.history_tree(); - let prev_note_commitment_trees = prev_note_commitment_trees - .unwrap_or_else(|| self.db.note_commitment_trees_for_tip()); - - let mut note_commitment_trees = prev_note_commitment_trees.clone(); - let network = self.network(); + let ( + height, + hash, + finalized, + prev_note_commitment_trees, + retention, + fast_anchor_roots, + fast_sync_below, + ) = match finalizable_block { + FinalizableBlock::Checkpoint { + checkpoint_verified, + } => { + // Checkpoint-verified blocks don't have an associated treestate, so we retrieve the + // treestate of the finalized tip from the database and update it for the block + // being committed, assuming the retrieved treestate is the parent block's + // treestate. Later on, this function proves this assumption by asserting that the + // finalized tip is the parent block of the block being committed. + + let block = checkpoint_verified.block.clone(); + // Auth data root precomputed by the checkpoint verifier (if any), + // so the commitment check below doesn't recompute it here on the + // single-threaded committer. `AuthDataRoot` is `Copy`. + let precomputed_auth_data_root = checkpoint_verified.auth_data_root; + let mut history_tree = self.db.history_tree(); + let prev_note_commitment_trees = prev_note_commitment_trees + .unwrap_or_else(|| self.db.note_commitment_trees_for_tip()); + + let mut note_commitment_trees = prev_note_commitment_trees.clone(); + let network = self.network(); + let height = checkpoint_verified.height; + + // The last checkpoint height (boundary below which the vct + // path skips per-height trees), when final frontiers are loaded. + let vct_last_checkpoint_height = self + .vct + .as_ref() + .and_then(|v| v.vct_sync_last_checkpoint_height()); + + // In vct mode, if the source has this height's roots at or below the + // last checkpoint height, skip the per-block note-commitment frontier recompute + // (`update_trees_parallel`) entirely and fold the supplied roots into the + // anchor set and history leaf instead. The frontier stays the (frozen) + // parent frontier; nothing below the checkpoint reads it for consensus. + // See docs/design/verified-commitment-trees.md. + let vct_roots = self.vct.as_ref().and_then(|v| { + if vct_last_checkpoint_height + .is_some_and(|last_checkpoint_height| height > last_checkpoint_height) + { + None + } else { + v.vct_roots_at_height(height) + } + }); - // Run two independent CPU-intensive crypto operations concurrently - // on the rayon pool (Part 1 of the checkpoint-commit parallelization): - // - // - updating the note commitment trees, and - // - checking this block's commitment against the *parent* history tree. - // - // These are independent: the commitment check reads only the parent - // history tree (not this block's note commitment trees), and the - // history tree push below depends on both, so it runs after the join. + let mut vct_anchor_roots = None; + // `Some(C)` for fast blocks of a persistent fast sync; written + // to the fast-sync marker in the commit batch. + let mut vct_sync_below = None; + + if let Some((sapling_root, orchard_root)) = vct_roots { + // The handoff frontiers are the only non-successor authority that + // can authenticate this block's own supplied roots before they are + // persisted. + let last_checkpoint_frontiers = self + .vct + .as_ref() + .and_then(|v| v.final_frontiers_for_last_checkpoint(height)); + + // This block's own commitment check is identical to the + // previous vct block's look-ahead. When that look-ahead + // already validated this exact header, skip the duplicate. + let block_hash = block.hash(); + let is_prevalidated = self.vct_prevalidated_next == Some((height, block_hash)); + if is_prevalidated { + if let Some(v) = &self.vct { + v.record_prevalidated(); + } + // Observability: the previous fast block's look-ahead already + // validated this header, so its commitment check was skipped (the + // dedup). A subset of `state.vct.fast.block.count`. + metrics::counter!("state.vct.prevalidated.block.count").increment(1); + } + + let mut verification_items = vec![ + commitment_aux_verify::CommitmentRootVerification::with_roots( + block.clone(), + sapling_root, + orchard_root, + precomputed_auth_data_root, + is_prevalidated, + ), + ]; + if let Some((next_block, next_auth)) = &next_checkpoint { + verification_items.push( + commitment_aux_verify::CommitmentRootVerification::header_only( + next_block.clone(), + *next_auth, + ), + ); + } + + // Verifies this block's own header, folds its supplied roots into + // the candidate tree, and when buffered checks the successor header + // against that candidate (the one-block lag). + let candidate = COMMIT_COMPUTE_POOL + .install(|| { + commitment_aux_verify::verify_commitment_roots( + &network, + (*history_tree).clone(), + verification_items, + ) + }) + .map_err(|(_fail_height, error)| { + self.vct_prevalidated_next = None; + self.vct_reject_supplied_root(height, error) + })?; + + if let Some((next_block, _next_auth)) = &next_checkpoint { + self.vct_prevalidated_next = Some(( + (height + 1).expect("checkpoint block heights are valid"), + next_block.hash(), + )); + } else if self + .vct + .as_ref() + .is_some_and(|v| v.vct_root_needs_successor(height, &network)) + { + // Untrusted root at/above Heartwood, no successor to confirm it, + // not the last checkpoint: defer rather than persist it unverified. Leaves + // the database untouched; the block re-commits once the successor + // is buffered. + metrics::counter!("state.vct.root.await_successor.count").increment(1); + return Err(ValidateContextError::VctSuppliedRootAwaitingSuccessor { + height, + } + .into()); + } else { + self.vct_prevalidated_next = None; + } + + history_tree = Arc::new(candidate); + if let Some(v) = &self.vct { + v.record_fast_block(); + } + // Observability: this block folded supplied roots and skipped the + // note-commitment frontier recompute (the verified-commitment-trees + // fast path). Paired with `state.vct.legacy.block.count` below, this + // gives a live fast-vs-legacy ratio. + metrics::counter!("state.vct.fast.block.count").increment(1); + + // When final frontiers are loaded, this is a persistent fast + // sync: mark the database fast-synced (per-height trees absent + // below the handoff height). + vct_sync_below = vct_last_checkpoint_height; + + if let Some((sapling_frontier, orchard_frontier, sprout_frontier)) = + last_checkpoint_frontiers + { + // Checkpoint handoff: verify the supplied frontiers against + // this block's verified roots (collision resistance makes the + // root a binding commitment to the frontier), then write them + // as the real tip treestate via the legacy write path + // (`fast_anchor_roots` left `None`), so post-checkpoint + // semantic verification resumes from a correct frontier. + self.vct_verify_handoff_frontier_roots( + height, + &sapling_frontier, + &orchard_frontier, + &sapling_root, + &orchard_root, + )?; + + // Subtree tips are left `None`: the resuming chain recomputes + // them from the frontier position. + note_commitment_trees = NoteCommitmentTrees { + sprout: sprout_frontier, + sapling: sapling_frontier, + sapling_subtree: None, + orchard: orchard_frontier, + orchard_subtree: None, + }; + + // The handoff writes the real final frontier as the tip + // treestate, so the frontier is no longer frozen: heights at and + // above the handoff resume legacy recompute from a correct frontier. + self.vct_frontier_frozen = false; + } else { + vct_anchor_roots = Some((sapling_root, orchard_root)); + + // A non-handoff fast block leaves the note-commitment frontier + // frozen (it folds roots instead of advancing the trees), so a + // later height with no valid supplied root must not legacy-recompute + // against this stale frontier (see the `else` branch below). + self.vct_frontier_frozen = true; + } + } else if self.vct_frontier_frozen { + // Frozen-frontier safety: a fast sync has already frozen the + // note-commitment frontier, but this height has no valid supplied root + // (never fetched, or evicted after failing verification). Recomputing + // here would fold a wrong root into the history MMR and corrupt state, + // so refuse with a retryable error and leave the database untouched — + // the block is committed once a verifiable root is fetched from a peer. + metrics::counter!("state.vct.root.unavailable.count").increment(1); + tracing::warn!( + ?height, + "VCT: no verifiable supplied root for a frozen-frontier height; \ + refusing to recompute (retryable)" + ); + return Err(ValidateContextError::VctSuppliedRootUnavailable { height }.into()); + } else { + // Not a fast block: any cached pre-validation does not apply to + // the next fast block (its parent frontier differs), so clear it. + self.vct_prevalidated_next = None; + + // Observability: this block recomputed the note-commitment frontier + // (the legacy path) — either VCT is off, or the fast path's roots were + // unavailable for this height and it safely fell back. + metrics::counter!("state.vct.legacy.block.count").increment(1); + + // Legacy / capture path: recompute the note-commitment frontier. // - // The commitment check is done here (and not during semantic - // validation) because it needs the history tree root, and the - // checkpoint verifier doesn't run contextual validation. For - // Nu5-onward the block hash commits only to non-authorizing data - // (ZIP-244), so this verifies the authorizing-data commitment. + // Run two independent CPU-intensive crypto operations concurrently + // on the rayon pool: updating the note commitment trees, and + // checking this block's commitment against the *parent* history + // tree. They are independent; the history push below joins them. #[cfg(feature = "commit-metrics")] metrics::histogram!("zebra.state.write.block_tx_count") .record(block.transactions.len() as f64); @@ -622,13 +1029,18 @@ impl FinalizedState { block.clone(), &network, &history_tree, + precomputed_auth_data_root, ) )); }); + // `note_precompute`, if present and still size-matched, + // lets the committer apply the precomputed subtree roots + // instead of re-hashing the notes here; else hashes inline. timed_commit_phase!( "zebra.state.write.update_trees.duration_seconds", - note_commitment_trees.update_trees_parallel(&block) + note_commitment_trees + .update_trees_parallel_with(&block, note_precompute) ) }) }); @@ -636,10 +1048,7 @@ impl FinalizedState { // Surface the tree-update error first, preserving the error // precedence of the previous sequential code. tree_result.map_err(ValidateContextError::from)?; - // `rayon::in_place_scope_fifo` guarantees all spawned tasks - // complete before the scope returns, so `commitment_result` is - // always `Some` here: the spawned closure wrote to it before - // the scope exited. + // `in_place_scope_fifo` joins all spawned tasks, so this is `Some`. commitment_result.expect("scope has already finished")?; // Update the history tree (depends on both operations above). @@ -651,47 +1060,45 @@ impl FinalizedState { .map_err(Arc::new) .map_err(ValidateContextError::from)?; - // Total serial wall time of the checkpoint compute phase (note tree - // update + commitment check, then history push). Compared against the - // summed phase times, this shows the overlap win. #[cfg(feature = "commit-metrics")] metrics::histogram!("zebra.state.write.checkpoint_compute.duration_seconds") .record(_ckpt_compute.elapsed().as_secs_f64()); - - let treestate = Treestate { - note_commitment_trees, - history_tree, - }; - - let height = checkpoint_verified.height; - let hash = checkpoint_verified.hash; - - ( - height, - hash, - FinalizedBlock::from_checkpoint_verified(checkpoint_verified, treestate), - Some(prev_note_commitment_trees), - self.retention_plan(height, true), - ) } - FinalizableBlock::Contextual { - contextually_verified, - treestate, - } => { - let height = contextually_verified.height; - - ( - height, - contextually_verified.hash, - FinalizedBlock::from_contextually_verified( - contextually_verified, - *treestate, - ), - prev_note_commitment_trees, - self.retention_plan(height, false), - ) - } - }; + + let treestate = Treestate { + note_commitment_trees, + history_tree, + }; + + let hash = checkpoint_verified.hash; + + ( + height, + hash, + FinalizedBlock::from_checkpoint_verified(checkpoint_verified, treestate), + Some(prev_note_commitment_trees), + self.retention_plan(height, true), + vct_anchor_roots, + vct_sync_below, + ) + } + FinalizableBlock::Contextual { + contextually_verified, + treestate, + } => { + let height = contextually_verified.height; + + ( + height, + contextually_verified.hash, + FinalizedBlock::from_contextually_verified(contextually_verified, *treestate), + prev_note_commitment_trees, + self.retention_plan(height, false), + None, + None, + ) + } + }; let committed_tip_hash = self.db.finalized_tip_hash(); let committed_tip_height = self.db.finalized_tip_height(); @@ -724,23 +1131,33 @@ impl FinalizedState { let finalized_inner_block = finalized.block.clone(); let note_commitment_trees = finalized.treestate.note_commitment_trees.clone(); - // Build and write the block's RocksDB batch inside the dedicated - // commit-compute pool. The par-iter calls inside write_block end up scheduled - // on a separate pool from global (which is used by download/verify pipeline). - // This leads to less contention and more throughput, as benchmarked over the - // sand-blasting region. + // Run `write_block` directly on the committer thread rather than entering the + // dedicated commit-compute pool via `install()`. + // + // The committer is not a member of `COMMIT_COMPUTE_POOL`, so `install()` is a + // synchronous cross-thread handoff: the committer parks until a pool worker + // picks up the job, runs it, and signals back. The look-ahead note-commitment + // precompute (`spawn_note_precompute`) keeps those workers busy, so the handoff + // waits on a contended pool, and that wait dominates the isolation it was meant + // to provide for `write_block`'s internal rayon (`join`/`par_iter`). Running + // `write_block` here removes the per-block round-trip; its internal rayon uses + // the global pool instead. Measured net win on the sandblast region (see PR). let network = self.network(); - let result = COMMIT_COMPUTE_POOL.install(|| { - self.db.write_block( - finalized, - prev_note_commitment_trees, - &network, - source, - retention, - ) - }); + let result = self.db.write_block( + finalized, + prev_note_commitment_trees, + &network, + source, + retention, + fast_anchor_roots, + fast_sync_below, + ); if result.is_ok() { + if let Some(vct) = &self.vct { + vct.evict_committed_roots_through(height); + } + if retention.clears_archive_backlog() { self.checkpoint_raw_tx_archive_backlog .store(false, Ordering::Relaxed); @@ -759,6 +1176,9 @@ impl FinalizedState { "stopping at configured height, flushing database to disk" ); + // POC: emit the equivalence digest + fast-path summary before exit. + self.vct_log_equivalence_digest(); + // We're just about to do a forced exit, so it's ok to do a forced db shutdown self.db.shutdown(true); @@ -774,6 +1194,158 @@ impl FinalizedState { result.map(|hash| (hash, note_commitment_trees)) } + /// POC: `true` when the verified-commitment-trees fast (skip-recompute) path will + /// apply to `height` — i.e. fast mode is active *and* the source already holds this + /// height's roots, so the committer will fold them in and skip the frontier recompute. + /// The write loop uses this to skip the off-thread note precompute only when its result + /// would be discarded; a legacy-fallback block (root not supplied) still precomputes. + pub(crate) fn vct_fast_will_apply(&self, height: block::Height) -> bool { + self.vct + .as_ref() + .is_some_and(|v| v.is_fast() && v.vct_roots_at_height(height).is_some()) + } + + /// Clears any cached successor prevalidation. + /// + /// The finalized write loop calls this when it discards checkpoint queue state, so a + /// look-ahead header that no longer corresponds to the next committed block cannot + /// authorize a later fast-path skip. + pub(crate) fn clear_vct_prevalidated_next(&mut self) { + self.vct_prevalidated_next = None; + } + + /// `true` when committing `height` on the fast path needs a buffered successor before + /// it can safely persist this block's supplied roots. + /// + /// Only untrusted peer-supplied roots at or above Heartwood require this. The + /// checkpoint handoff is exempt because its embedded final frontiers are verified + /// against this block's roots before the real tip treestate is written; trusted + /// local fixtures can commit their tip root on the in-arrears check. + pub(crate) fn vct_fast_needs_successor(&self, height: block::Height) -> bool { + self.vct + .as_ref() + .is_some_and(|v| v.vct_root_needs_successor(height, &self.network())) + } + + /// Verify checkpoint handoff frontiers against this block's supplied roots. + fn vct_verify_handoff_frontier_roots( + &mut self, + height: block::Height, + sapling_frontier: &sapling::tree::NoteCommitmentTree, + orchard_frontier: &orchard::tree::NoteCommitmentTree, + sapling_root: &sapling::tree::Root, + orchard_root: &orchard::tree::Root, + ) -> Result<(), CommitCheckpointVerifiedError> { + if sapling_frontier.root() != *sapling_root || orchard_frontier.root() != *orchard_root { + self.vct_prevalidated_next = None; + return Err(self.vct_reject_supplied_root( + height, + ValidateContextError::VctSuppliedRootUnavailable { height }, + )); + } + + Ok(()) + } + + /// Reject a supplied fast-path root that failed verification for `height`. + /// + /// Evicts the bad root from the source so a re-fetch can replace it with a verifiable + /// one from a different peer, and returns a typed, retryable error. In fast mode the + /// note-commitment frontier is frozen, so the committer cannot recompute the root + /// locally (that would fold a wrong root into the history MMR); it must refuse and + /// leave the database untouched rather than persist or corrupt state. This is what + /// keeps a single malicious peer from halting the sync: the bad root is dropped, not + /// retried forever, and any honest peer's root verifies. + fn vct_reject_supplied_root( + &self, + height: block::Height, + error: ValidateContextError, + ) -> CommitCheckpointVerifiedError { + if let Some(v) = &self.vct { + v.invalidate_fast_root(height); + } + metrics::counter!("state.vct.root.rejected.count").increment(1); + tracing::warn!( + ?height, + ?error, + "VCT: supplied commitment root failed verification; evicted for re-fetch" + ); + ValidateContextError::VctSuppliedRootUnavailable { height }.into() + } + + /// Test-only: enable fast mode reading roots/frontiers from an arbitrary + /// [`commitment_aux::CommitmentRootSource`] (e.g. a payload produced from a + /// database via [`commitment_aux::produce_block_roots`]), so the producer→consumer + /// round-trip can be exercised in-process. `requires_verified_successor` marks + /// whether the installed source is untrusted and must defer tip roots until their + /// successor is buffered. + #[cfg(test)] + pub(in crate::service::finalized_state) fn enable_vct_fast_source( + &mut self, + source: Box, + requires_verified_successor: bool, + ) { + self.vct = Some(VctState::test_with_source( + source, + requires_verified_successor, + )); + } + + /// Test-only: the fast-sync handoff height recorded in the database marker, if any. + #[cfg(test)] + pub(crate) fn vct_fast_synced_below(&self) -> Option { + self.db.vct_synced_below() + } + + /// Test-only: number of blocks that took the fast (skip-recompute) path so far. + #[cfg(test)] + pub(crate) fn vct_fast_count(&self) -> u64 { + self.vct.as_ref().map(|v| v.fast_count()).unwrap_or(0) + } + + /// Test-only: number of fast blocks whose own commitment check was skipped by + /// the dedup (the previous block's look-ahead already validated them). + #[cfg(test)] + pub(crate) fn vct_prevalidated_count(&self) -> u64 { + self.vct + .as_ref() + .map(|v| v.prevalidated_count()) + .unwrap_or(0) + } + + /// POC: log the consensus-equivalence digest (anchor sets + history root) and + /// the fast-path block count at the stop height, so a legacy run and a fast run + /// can be compared. Gated by `VCT_DIGEST` so normal runs pay nothing. + fn vct_log_equivalence_digest(&self) { + if std::env::var_os("VCT_DIGEST").is_none() { + return; + } + + let fast_count = if let Some(v) = &self.vct { + v.fast_count() + } else { + 0 + }; + + let ( + sapling_anchor_count, + sapling_anchor_digest, + orchard_anchor_count, + orchard_anchor_digest, + ) = self.db.vct_anchor_digest(); + let history_root = self.db.history_tree().hash(); + + tracing::info!( + sapling_anchor_count, + sapling_anchor_digest, + orchard_anchor_count, + orchard_anchor_digest, + ?history_root, + vct_fast_blocks = fast_count, + "VCT-DIGEST" + ); + } + #[cfg(feature = "elasticsearch")] /// Store finalized blocks into an elasticsearch database. /// diff --git a/zebra-state/src/service/finalized_state/commitment_aux.rs b/zebra-state/src/service/finalized_state/commitment_aux.rs new file mode 100644 index 00000000000..007367a8a5b --- /dev/null +++ b/zebra-state/src/service/finalized_state/commitment_aux.rs @@ -0,0 +1,715 @@ +//! Commitment-root source seam and payload types for the verified-commitment-trees +//! fast path (`docs/design/verified-commitment-trees.md` §5, increment 3). +//! +//! The fast path consumes per-block Sapling/Orchard roots and a final frontier at the +//! checkpoint handoff. *Where* that data comes from is abstracted behind +//! [`CommitmentRootSource`], so the committer reads through one seam regardless of +//! source. The production source is the transport-backed [`PeerSource`] over `tree_aux`; +//! tests use a crate-local fixture source over the same `RootMap` shape. +//! +//! It also provides the **producer** half ([`produce_block_roots`] / +//! [`produce_final_frontiers`]): deriving the same payload from an existing database's +//! per-height trees. That is the read path a serving node runs, and tests can feed the +//! DB-produced payload back through the fast path in-process to prove producer and +//! consumer agreement without networking. + +use std::{ + collections::HashMap, + fmt, + sync::{Arc, RwLock}, +}; + +use thiserror::Error; +use zebra_chain::{block, orchard, sapling, sprout}; + +use super::{FromDisk, IntoDisk, ZebraDb}; + +/// Per-block verified commitment roots — the essential fast-path payload (design §5.1), +/// the wire payload carried over `tree_aux` (increment 6a). Defined in `zebra-chain` so +/// `zebra-network` and `zebra-state` share it without a dependency cycle. +pub(super) use zebra_chain::parallel::commitment_aux::BlockCommitmentRoots; + +/// The verified final note-commitment frontiers at the checkpoint handoff height +/// (design §5.2). +/// +/// Fast mode skips the per-block frontier recompute below the checkpoint, so the +/// running Sapling/Orchard frontiers are never advanced. To let post-checkpoint +/// semantic verification resume, the real frontiers at the checkpoint are supplied +/// here, verified (`frontier.root() == the verified root at the checkpoint`), and +/// written as the tip treestate at the handoff. Subtree tips are not carried: the +/// resuming chain recomputes them from the frontier position. +#[derive(Clone, Debug)] +pub(super) struct FinalFrontiers { + pub(super) height: block::Height, + pub(super) sapling: Arc, + pub(super) orchard: Arc, + pub(super) sprout: Arc, +} + +/// Errors producing [`FinalFrontiers`] from a finalized database. +#[derive(Clone, Debug, Eq, Error, PartialEq)] +pub enum FinalFrontiersGenerationError { + /// The database has no Sapling tree at the requested height. + #[error("missing Sapling final frontier tree at height {height:?}")] + MissingSaplingTree { + /// The requested final frontier height. + height: block::Height, + }, + + /// The database has no Orchard tree at the requested height. + #[error("missing Orchard final frontier tree at height {height:?}")] + MissingOrchardTree { + /// The requested final frontier height. + height: block::Height, + }, +} + +/// Errors parsing [`FinalFrontiers`] from the embedded/frontier-file byte format. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(super) enum FinalFrontiersParseError { + /// The input ended before the 4-byte height field. + MissingHeight { + /// The total number of bytes in the input. + actual_len: usize, + }, + /// The input ended before a tree blob's 4-byte length prefix. + MissingLength { + /// The tree whose length prefix was being read. + tree: &'static str, + /// Byte offset where the length prefix starts. + offset: usize, + /// Bytes remaining from `offset`. + remaining: usize, + }, + /// A tree blob's length prefix points past the end of the input. + TruncatedBlob { + /// The tree whose blob was being read. + tree: &'static str, + /// Byte offset where the blob starts. + offset: usize, + /// Blob length from the prefix. + expected_len: usize, + /// Bytes remaining from `offset`. + remaining: usize, + }, + /// A tree blob's length prefix overflows `usize` arithmetic. + LengthOverflow { + /// The tree whose blob was being read. + tree: &'static str, + /// Byte offset where the blob starts. + offset: usize, + /// Blob length from the prefix. + len: usize, + }, + /// The parser consumed all expected fields, but extra bytes remained. + TrailingBytes { + /// Byte offset where the trailing data starts. + offset: usize, + /// Number of trailing bytes. + trailing_len: usize, + }, +} + +impl fmt::Display for FinalFrontiersParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FinalFrontiersParseError::MissingHeight { actual_len } => write!( + f, + "missing final frontier height: expected 4 bytes, got {actual_len}" + ), + FinalFrontiersParseError::MissingLength { + tree, + offset, + remaining, + } => write!( + f, + "missing {tree} frontier length prefix at byte {offset}: expected 4 bytes, got {remaining}" + ), + FinalFrontiersParseError::TruncatedBlob { + tree, + offset, + expected_len, + remaining, + } => write!( + f, + "truncated {tree} frontier blob at byte {offset}: length prefix says {expected_len} bytes, but only {remaining} remain" + ), + FinalFrontiersParseError::LengthOverflow { tree, offset, len } => write!( + f, + "{tree} frontier blob length overflows at byte {offset}: {len} bytes" + ), + FinalFrontiersParseError::TrailingBytes { + offset, + trailing_len, + } => write!( + f, + "unexpected trailing final frontier bytes at byte {offset}: {trailing_len} bytes" + ), + } + } +} + +impl std::error::Error for FinalFrontiersParseError {} + +impl FinalFrontiers { + /// Serialize to the embedded byte format: height (u32 LE), then sapling, orchard, + /// and sprout trees, each as `u32`-LE-length-prefixed `IntoDisk` bytes. Used to + /// create embedded or test final-frontier fixtures. + pub(super) fn to_bytes(&self) -> Vec { + let mut out = Vec::new(); + out.extend_from_slice(&self.height.0.to_le_bytes()); + let blobs: [Vec; 3] = [ + IntoDisk::as_bytes(&*self.sapling), + IntoDisk::as_bytes(&*self.orchard), + IntoDisk::as_bytes(&*self.sprout), + ]; + for blob in blobs { + let len = u32::try_from(blob.len()).expect("note commitment tree fits in u32 bytes"); + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(&blob); + } + out + } + + /// Parse the embedded byte format written by [`Self::to_bytes`]. + pub(super) fn from_bytes(bytes: &[u8]) -> Result { + let height_bytes = bytes + .get(0..4) + .ok_or(FinalFrontiersParseError::MissingHeight { + actual_len: bytes.len(), + })?; + let height_bytes: [u8; 4] = + height_bytes + .try_into() + .map_err(|_| FinalFrontiersParseError::MissingHeight { + actual_len: bytes.len(), + })?; + let height = block::Height(u32::from_le_bytes(height_bytes)); + + // Read three `u32`-length-prefixed blobs starting after the height. + let mut cursor: usize = 4; + let mut next_blob = |tree: &'static str| -> Result, FinalFrontiersParseError> { + let len_end = + cursor + .checked_add(4) + .ok_or(FinalFrontiersParseError::LengthOverflow { + tree, + offset: cursor, + len: 4, + })?; + let len_bytes = + bytes + .get(cursor..len_end) + .ok_or(FinalFrontiersParseError::MissingLength { + tree, + offset: cursor, + remaining: bytes.len().saturating_sub(cursor), + })?; + let len_bytes: [u8; 4] = + len_bytes + .try_into() + .map_err(|_| FinalFrontiersParseError::MissingLength { + tree, + offset: cursor, + remaining: bytes.len().saturating_sub(cursor), + })?; + // Zebra's supported platforms have at least 32-bit `usize`, so every + // u32 length prefix fits in memory indexes. + let len = u32::from_le_bytes(len_bytes) as usize; + cursor = len_end; + let blob_end = + cursor + .checked_add(len) + .ok_or(FinalFrontiersParseError::LengthOverflow { + tree, + offset: cursor, + len, + })?; + let blob = + bytes + .get(cursor..blob_end) + .ok_or(FinalFrontiersParseError::TruncatedBlob { + tree, + offset: cursor, + expected_len: len, + remaining: bytes.len().saturating_sub(cursor), + })?; + cursor = blob_end; + Ok(blob.to_vec()) + }; + let sapling = next_blob("sapling")?; + let orchard = next_blob("orchard")?; + let sprout = next_blob("sprout")?; + + if cursor != bytes.len() { + return Err(FinalFrontiersParseError::TrailingBytes { + offset: cursor, + trailing_len: bytes.len() - cursor, + }); + } + + Ok(FinalFrontiers { + height, + sapling: Arc::new(::from_bytes( + sapling, + )), + orchard: Arc::new(::from_bytes( + orchard, + )), + sprout: Arc::new(::from_bytes( + sprout, + )), + }) + } +} + +/// Where the fast path's verified per-block roots and handoff frontiers come from. +/// +/// One enduring seam, two enduring data paths: the standard/legacy path rebuilds +/// trees locally and never consults a source; the fast verified path reads roots +/// from *some* source and verifies them against the headers. The production source is +/// [`PeerSource`]; tests may install a trusted local source to isolate committer +/// behavior. The trait carries no trust policy by itself: the owning VCT state decides +/// whether supplied roots must be confirmed by a buffered successor before commit. +pub(super) trait CommitmentRootSource: std::fmt::Debug + Send + Sync { + /// The supplied roots for `height`, if this source has them. + fn vct_root(&self, height: block::Height) + -> Option<(sapling::tree::Root, orchard::tree::Root)>; + + /// The checkpoint handoff height (below which the vct path skips per-height + /// trees), if this source supplies a final frontier. + fn vct_last_checkpoint_height(&self) -> Option; + + /// The verified final frontiers at the handoff height, if supplied. + fn final_frontiers(&self) -> Option<&FinalFrontiers>; + + /// Discard the supplied root for `height` so a later [`fast_root`](Self::fast_root) + /// returns `None` for it. + /// + /// Called by the committer when a supplied root fails verification: dropping the bad + /// root un-poisons the cache so a re-fetch from a different peer can replace it, rather + /// than the committer re-reading the same rejected root forever. The default is a no-op + /// for test-only local sources; the peer source overrides it. + fn invalidate(&self, _height: block::Height) {} + + /// Discard roots for heights that have already been committed. + /// + /// Called after the database write succeeds, so retry paths still keep roots needed + /// for an uncommitted block. The default is a no-op for test-only local sources; the + /// peer source uses this to keep its live fetch-ahead cache bounded during sync. + fn evict_committed_through(&self, _height: block::Height) {} +} + +/// The shared in-memory representation behind the concrete sources: a height→roots +/// map plus the optional handoff frontiers. +#[cfg(test)] +#[derive(Debug, Default)] +struct RootMap { + roots: HashMap, + frontiers: Option, +} + +#[cfg(test)] +impl RootMap { + fn fast_root( + &self, + height: block::Height, + ) -> Option<(sapling::tree::Root, orchard::tree::Root)> { + self.roots.get(&height.0).copied() + } + + fn handoff_height(&self) -> Option { + self.frontiers.as_ref().map(|f| f.height) + } + + fn final_frontiers(&self) -> Option<&FinalFrontiers> { + self.frontiers.as_ref() + } +} + +/// Test-only local source over a height-keyed roots map. +#[cfg(test)] +#[derive(Debug)] +pub(super) struct FixtureSource(RootMap); + +#[cfg(test)] +impl FixtureSource { + pub(super) fn new( + roots: HashMap, + frontiers: Option, + ) -> Self { + FixtureSource(RootMap { roots, frontiers }) + } +} + +#[cfg(test)] +impl CommitmentRootSource for FixtureSource { + fn vct_root( + &self, + height: block::Height, + ) -> Option<(sapling::tree::Root, orchard::tree::Root)> { + self.0.fast_root(height) + } + fn vct_last_checkpoint_height(&self) -> Option { + self.0.handoff_height() + } + fn final_frontiers(&self) -> Option<&FinalFrontiers> { + self.0.final_frontiers() + } +} + +/// A [`CommitmentRootSource`] backed by provisional header-ahead roots in `db`. +/// +/// Header sync persists peer-supplied roots into `db` ahead of body commit; the committer +/// reads them per height through the [`CommitmentRootSource`] seam. The handoff frontier is +/// embedded in the binary (design §5.2), held immutably here and never fetched over the +/// network. The in-memory `cache` is test-only scaffolding for the non-`db` source. +#[derive(Debug)] +pub(super) struct PeerSource { + db: Option, + cache: Arc>, + frontiers: Option, +} + +/// Shared peer-source cache state. +#[derive(Debug, Default)] +struct PeerRootsCache { + roots: HashMap, + committed_through: Option, +} + +impl PeerSource { + /// Create an empty in-memory peer source and a writer sharing its cache. `frontiers` + /// is the embedded handoff frontier (`None` for the bare benchmark, with no checkpoint + /// handoff). The writer lets a test fill roots before and after the source is moved + /// into the committer. + #[cfg(any(test, feature = "proptest-impl"))] + #[allow(dead_code)] + pub(super) fn new(frontiers: Option) -> (Self, PeerSourceWriter) { + let cache = Arc::new(RwLock::new(PeerRootsCache::default())); + let writer = PeerSourceWriter { + cache: Arc::clone(&cache), + }; + ( + PeerSource { + db: None, + cache, + frontiers, + }, + writer, + ) + } + + /// Create a source backed by provisional header-ahead roots in `db`. + pub(super) fn new_with_db(db: ZebraDb, frontiers: Option) -> Self { + PeerSource { + db: Some(db), + cache: Arc::new(RwLock::new(PeerRootsCache::default())), + frontiers, + } + } +} + +/// Test-only writer sharing a [`PeerSource`]'s in-memory cache, so a proptest can fill +/// roots before and after the source is moved into the committer. +#[cfg(any(test, feature = "proptest-impl"))] +#[derive(Clone, Debug)] +pub(super) struct PeerSourceWriter { + cache: Arc>, +} + +#[cfg(any(test, feature = "proptest-impl"))] +impl PeerSourceWriter { + /// Insert roots into the shared in-memory cache. Last write wins per uncommitted + /// height; roots at already-committed heights are ignored. + #[allow(dead_code)] + pub(super) fn insert_roots(&self, roots: impl IntoIterator) { + let mut cache = self.cache.write().expect("peer source roots lock poisoned"); + for r in roots { + if cache + .committed_through + .is_some_and(|height| r.height.0 <= height) + { + continue; + } + + cache + .roots + .insert(r.height.0, (r.sapling_root, r.orchard_root)); + } + } +} + +impl CommitmentRootSource for PeerSource { + fn vct_root( + &self, + height: block::Height, + ) -> Option<(sapling::tree::Root, orchard::tree::Root)> { + if let Some(db) = &self.db { + return db + .zakura_header_commitment_roots_by_height_range(height..=height) + .into_iter() + .next() + .map(|roots| (roots.sapling_root, roots.orchard_root)); + } + + self.cache + .read() + .expect("peer source roots lock poisoned") + .roots + .get(&height.0) + .copied() + } + fn vct_last_checkpoint_height(&self) -> Option { + self.frontiers.as_ref().map(|f| f.height) + } + fn final_frontiers(&self) -> Option<&FinalFrontiers> { + self.frontiers.as_ref() + } + fn invalidate(&self, height: block::Height) { + // Drop the rejected root so the next read misses; header sync can then deliver a + // verifiable replacement for this height from another peer. + if let Some(db) = &self.db { + if let Err(error) = db.delete_zakura_header_commitment_roots([height]) { + tracing::debug!(?error, ?height, "failed to delete rejected VCT root"); + } + return; + } + + self.cache + .write() + .expect("peer source roots lock poisoned") + .roots + .remove(&height.0); + } + + fn evict_committed_through(&self, height: block::Height) { + let mut cache = self.cache.write().expect("peer source roots lock poisoned"); + let start = cache + .committed_through + .map_or(0, |height| height.saturating_add(1)); + + if start <= height.0 { + for cached_height in start..=height.0 { + cache.roots.remove(&cached_height); + } + cache.committed_through = Some(height.0); + } + } +} + +/// Produce the per-block roots payload for `range` from `db`'s per-height trees. +/// +/// This is the serving read path (the future `TreeAuxStatePort::read_block_roots`), +/// minus the network: it derives each root from the stored per-height tree, exactly +/// the value the fast path folds into the anchor set. It requires per-height trees, so +/// the caller restricts it to a non-fast-synced (archive/pre-index) database within the +/// tip, where the trees are present. As defense-in-depth on this peer-triggered read, a +/// height whose tree is unexpectedly absent stops the scan and serves the contiguous +/// prefix collected so far rather than panicking; the wire client validates contiguity +/// and treats a short batch as partial progress. +// The `ReadRequest::BlockRoots` serving read path; also exercised by the round-trip test. +pub(crate) fn produce_block_roots( + db: &ZebraDb, + range: std::ops::RangeInclusive, +) -> Vec { + let (start, end) = (range.start().0, range.end().0); + let mut roots = Vec::new(); + for h in start..=end { + let height = block::Height(h); + let (Some(sapling), Some(orchard)) = ( + db.sapling_tree_by_height(&height), + db.orchard_tree_by_height(&height), + ) else { + break; + }; + roots.push(BlockCommitmentRoots { + height, + sapling_root: sapling.root(), + orchard_root: orchard.root(), + }); + } + roots +} + +/// Serve the per-block roots for `range`, stitching the two sources at the upgrade height `U`. +/// +/// The `commitment_roots_by_height` serving index only covers heights at and above `U` (the lowest +/// height this binary committed). Heights below `U` predate the index, so they are derived from the +/// per-height trees instead, and the two runs are concatenated. This is what lets a node that +/// upgraded mid-chain serve a request that straddles `U` as one gap-free batch, rather than the +/// short index-only prefix that would stall the client's minimum-progress check. +/// +/// Both sources stop at the first absent height, so the result is always a contiguous run from +/// `range.start()`; a tree gap below `U` is served as the prefix collected so far without reaching +/// into the index. A database that never recorded `U` — a pre-index archive node — derives the +/// whole range from the trees, the original archive fallback. +pub(crate) fn serve_block_roots( + db: &ZebraDb, + range: std::ops::RangeInclusive, +) -> Vec { + let Some(upgrade) = db.vct_upgrade_height() else { + return produce_block_roots(db, range); + }; + + let (start, end) = (*range.start(), *range.end()); + + // Wholly at/above `U`: the index covers it. (`U == 0` for a node that fast-synced from + // genesis takes this path for every request, never touching the absent per-height trees.) + if start >= upgrade { + return db.commitment_roots_by_height_range(range); + } + + // Below `U`: derive the per-height-tree run up to `U - 1` (`start < upgrade` so `upgrade >= 1`). + let trees_end = block::Height(end.0.min(upgrade.0 - 1)); + let mut roots = produce_block_roots(db, start..=trees_end); + + // Continue into the index only if the tree run is contiguous up to `U - 1`; a short run means a + // gap below `U`, so serve it alone and let the client retry the remainder. + if roots.last().map(|root| root.height) == Some(trees_end) && end >= upgrade { + roots.extend(db.commitment_roots_by_height_range(upgrade..=end)); + } + + roots +} + +/// Produce the final frontiers at `height` from `db`'s per-height trees. +/// +/// Sprout is frozen far below any modern checkpoint, so the tip Sprout tree is the frontier at +/// `height`. +pub(super) fn produce_final_frontiers( + db: &ZebraDb, + height: block::Height, +) -> Result { + let sapling = db + .sapling_tree_by_height(&height) + .ok_or(FinalFrontiersGenerationError::MissingSaplingTree { height })?; + let orchard = db + .orchard_tree_by_height(&height) + .ok_or(FinalFrontiersGenerationError::MissingOrchardTree { height })?; + + Ok(FinalFrontiers { + height, + sapling, + orchard, + sprout: db.sprout_tree_for_tip(), + }) +} + +/// Produce serialized final-frontier bytes for the checkpoint handoff at `height`. +/// +/// These bytes use the same format as the embedded `mainnet-frontier.bin` file consumed by +/// [`super::vct`]. +pub fn produce_final_frontiers_bytes( + db: &ZebraDb, + height: block::Height, +) -> Result, FinalFrontiersGenerationError> { + Ok(produce_final_frontiers(db, height)?.to_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// The final-frontier serialization round-trips: parsed frontiers carry the same + /// height and tree roots as the originals. + #[test] + fn final_frontiers_bytes_round_trips() { + let frontiers = FinalFrontiers { + height: block::Height(1_687_200), + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + }; + + let parsed = + FinalFrontiers::from_bytes(&frontiers.to_bytes()).expect("frontiers should parse"); + + assert_eq!(parsed.height, frontiers.height, "height round-trips"); + assert_eq!( + parsed.sapling.root(), + frontiers.sapling.root(), + "sapling frontier round-trips" + ); + assert_eq!( + parsed.orchard.root(), + frontiers.orchard.root(), + "orchard frontier round-trips" + ); + assert_eq!( + parsed.sprout.root(), + frontiers.sprout.root(), + "sprout frontier round-trips" + ); + } + + /// The test fixture source looks up produced roots by height and exposes + /// the handoff frontier — the consumer view of producer output. + #[test] + fn fixture_source_round_trips_payload() { + let roots = vec![ + BlockCommitmentRoots { + height: block::Height(10), + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }, + BlockCommitmentRoots { + height: block::Height(11), + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }, + ]; + let roots = roots + .into_iter() + .map(|root| (root.height.0, (root.sapling_root, root.orchard_root))) + .collect(); + let frontiers = FinalFrontiers { + height: block::Height(11), + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + }; + + let source = FixtureSource::new(roots, Some(frontiers)); + + assert!( + source.vct_root(block::Height(10)).is_some(), + "produced root is looked up by height" + ); + assert!( + source.vct_root(block::Height(99)).is_none(), + "absent height has no root" + ); + assert_eq!( + source.vct_last_checkpoint_height(), + Some(block::Height(11)), + "handoff height comes from the supplied frontiers" + ); + } + + /// `invalidate` drops a peer-supplied root so a later read misses it, letting the + /// driver re-fetch a verifiable replacement from another peer. This un-poisons the + /// cache after a bad root is rejected by the committer, so one malicious peer cannot + /// wedge the same rejected root in place forever. + #[test] + fn peer_source_invalidate_evicts_a_root() { + let (source, writer) = PeerSource::new(None); + writer.insert_roots([BlockCommitmentRoots { + height: block::Height(42), + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }]); + + assert!( + source.vct_root(block::Height(42)).is_some(), + "the inserted root is present before eviction" + ); + + source.invalidate(block::Height(42)); + + assert!( + source.vct_root(block::Height(42)).is_none(), + "an evicted root is gone, so the next read misses and a re-fetch can replace it" + ); + } +} diff --git a/zebra-state/src/service/finalized_state/commitment_aux_verify.rs b/zebra-state/src/service/finalized_state/commitment_aux_verify.rs new file mode 100644 index 00000000000..dd645c3c0bd --- /dev/null +++ b/zebra-state/src/service/finalized_state/commitment_aux_verify.rs @@ -0,0 +1,513 @@ +//! Read-only verification of supplied per-block note-commitment roots against the +//! checkpoint-committed block headers, via the ZIP-221 ChainHistory MMR. +//! +//! This is the "verify" half of the verified-commitment-trees design +//! (`docs/design/verified-commitment-trees.md` §6): given a sequence of per-block +//! Sapling/Orchard roots (from a fixture today, an untrusted peer later), confirm +//! they reconstruct a history tree consistent with the header commitments. The +//! commit path uses this module before persisting supplied roots. +//! +//! It reuses the existing consensus check +//! ([`block_commitment_is_valid_for_chain_history`](crate::service::check::block_commitment_is_valid_for_chain_history)) +//! and [`HistoryTree::push`], which build the V1/V2 leaf from the block body and the +//! supplied roots — so there is no new crypto here. + +use std::sync::Arc; + +use zebra_chain::{ + block::{merkle::AuthDataRoot, Block, Height}, + history_tree::HistoryTree, + orchard, + parameters::{Network, NetworkUpgrade}, + sapling, +}; + +use zebra_chain::block::{Commitment, CommitmentError}; + +use crate::{service::check, ValidateContextError}; + +/// One block-sized step in supplied commitment-root verification. +#[derive(Clone, Debug)] +pub(crate) struct CommitmentRootVerification { + pub(crate) block: Arc, + pub(crate) roots: Option<(sapling::tree::Root, orchard::tree::Root)>, + pub(crate) precomputed_auth_data_root: Option, + pub(crate) skip_parent_check: bool, +} + +impl CommitmentRootVerification { + pub(crate) fn with_roots( + block: Arc, + sapling_root: sapling::tree::Root, + orchard_root: orchard::tree::Root, + precomputed_auth_data_root: Option, + skip_parent_check: bool, + ) -> Self { + CommitmentRootVerification { + block, + roots: Some((sapling_root, orchard_root)), + precomputed_auth_data_root, + skip_parent_check, + } + } + + pub(crate) fn header_only( + block: Arc, + precomputed_auth_data_root: Option, + ) -> Self { + CommitmentRootVerification { + block, + roots: None, + precomputed_auth_data_root, + skip_parent_check: false, + } + } +} + +/// Verifies a supplied Sapling root for a *pre-Heartwood* block directly against the +/// block header (design §6.1). +/// +/// The ZIP-221 history MMR does not exist below Heartwood, so +/// [`block_commitment_is_valid_for_chain_history`](check::block_commitment_is_valid_for_chain_history) +/// is a no-op there and cannot authenticate the supplied roots. This fills that gap: +/// +/// - Sapling..Heartwood: the header's `FinalSaplingRoot` commits the Sapling root +/// directly, so the supplied root must equal it. +/// - Pre-Sapling: the Sapling tree is empty, so the supplied root must be the +/// empty-tree root. +/// +/// Heartwood and later (`ChainHistoryRoot` / `ChainHistoryBlockTxAuthCommitment` / +/// the activation-reserved block) are authenticated by the MMR path and accepted +/// here. The Orchard root below NU5 is pinned separately by +/// [`verify_supplied_orchard_root_below_nu5`]. +pub(crate) fn verify_supplied_sapling_root_below_heartwood( + network: &Network, + block: &Block, + sapling_root: &sapling::tree::Root, +) -> Result<(), ValidateContextError> { + let expected = match block.commitment(network)? { + Commitment::FinalSaplingRoot(header_root) => header_root, + Commitment::PreSaplingReserved(_) => sapling::tree::NoteCommitmentTree::default().root(), + // Heartwood activation and later are authenticated by the MMR path. + _ => return Ok(()), + }; + + if sapling_root != &expected { + return Err(ValidateContextError::InvalidBlockCommitment( + CommitmentError::InvalidFinalSaplingRoot { + expected: <[u8; 32]>::from(expected), + actual: <[u8; 32]>::from(*sapling_root), + }, + )); + } + + Ok(()) +} + +/// Verifies a supplied Orchard root for a *pre-NU5* block (design §6.1). +/// +/// The Orchard tree does not activate until NU5, and no header below NU5 commits to an +/// Orchard root: the ZIP-221 V1 history leaf (Heartwood..Canopy) *ignores* the Orchard +/// root entirely (`zcash_history.rs`, `V1::block_to_history_node`), and below Heartwood +/// there is no MMR at all. So the MMR path that authenticates Orchard roots from NU5 +/// onward cannot vouch for any root below NU5 — yet the fast path folds the supplied +/// Orchard root into the anchor set for every block. Without this check an untrusted +/// source could inject an arbitrary Orchard anchor below NU5 that the legacy recompute +/// path never produces, breaking the §11 trust boundary and consensus equivalence. +/// +/// Below NU5 the Orchard tree is always the empty default, so the supplied root must +/// equal the empty-tree root. At and above NU5 activation the MMR path authenticates +/// the root, so this accepts. +pub(crate) fn verify_supplied_orchard_root_below_nu5( + network: &Network, + height: Height, + orchard_root: &orchard::tree::Root, +) -> Result<(), ValidateContextError> { + // At/above NU5 the ZIP-221 V2 MMR commits to the Orchard root, so it is + // authenticated there, not here. + if let Some(nu5_height) = NetworkUpgrade::Nu5.activation_height(network) { + if height >= nu5_height { + return Ok(()); + } + } + + let expected = orchard::tree::NoteCommitmentTree::default().root(); + if orchard_root != &expected { + return Err(ValidateContextError::InvalidBlockCommitment( + CommitmentError::InvalidPreNu5OrchardRoot { + expected: <[u8; 32]>::from(expected), + actual: <[u8; 32]>::from(*orchard_root), + }, + )); + } + + Ok(()) +} + +/// Verifies that `items` (blocks in ascending height order, with supplied +/// Sapling/Orchard roots when they should be folded in) reconstruct a ZIP-221 +/// history MMR consistent with the block header commitments, starting from `tree` +/// (the parent block's history tree). +/// +/// Returns the final history tree on success, or `(height, error)` for the first +/// block whose header commitment rejects the roots folded in so far. +/// +/// # Lag +/// +/// A block's commitment commits to the history tree as of its *parent*, so the root +/// supplied for height `H` is only confirmed when height `H + 1` is processed. Over a +/// contiguous range `[start..=end]` this therefore confirms the roots at +/// `[start..=end - 1]`; pass the block at `end + 1` to confirm the root at `end`. +pub(crate) fn verify_commitment_roots( + network: &Network, + mut tree: HistoryTree, + items: I, +) -> Result +where + I: IntoIterator, +{ + for item in items { + let CommitmentRootVerification { + block, + roots, + precomputed_auth_data_root, + skip_parent_check, + } = item; + + let height = block + .coinbase_height() + .expect("checkpoint-verified blocks have a coinbase height"); + + // Validate this block's header commitment against the current (parent) tree, + // i.e. against every root already folded in. + if !skip_parent_check { + check::block_commitment_is_valid_for_chain_history( + block.clone(), + network, + &tree, + precomputed_auth_data_root, + ) + .map_err(|error| (height, error))?; + } + + let Some((sapling_root, orchard_root)) = roots else { + continue; + }; + + verify_supplied_sapling_root_below_heartwood(network, &block, &sapling_root) + .map_err(|error| (height, error))?; + verify_supplied_orchard_root_below_nu5(network, height, &orchard_root) + .map_err(|error| (height, error))?; + + // Fold this block's supplied roots into the running MMR (builds the leaf + // from the block body tx-counts + the roots). + tree.push(network, block, &sapling_root, &orchard_root) + .map_err(Arc::new) + .map_err(ValidateContextError::from) + .map_err(|error| (height, error))?; + } + + Ok(tree) +} + +#[cfg(test)] +mod tests { + use super::*; + + use zebra_chain::{ + block::Block, + parameters::{ + testnet::{ConfiguredActivationHeights, RegtestParameters}, + Network::Mainnet, + NetworkUpgrade, + }, + serialization::ZcashDeserializeInto, + }; + + /// Build an empty [`HistoryTree`] (the genesis block is pre-Heartwood). + fn empty_history_tree() -> HistoryTree { + let genesis = Arc::new( + zebra_test::vectors::BLOCK_MAINNET_GENESIS_BYTES + .zcash_deserialize_into::() + .expect("genesis deserializes"), + ); + HistoryTree::from_block(&Mainnet, genesis, &Default::default(), &Default::default()) + .expect("empty history tree for a pre-Heartwood block") + } + + /// A distinct, valid Orchard root that is *not* the empty-tree root, for the + /// negative cases. Zero is a valid Pallas base field element, and the empty + /// Orchard tree root is an uncommitted-leaf hash, so the two differ. + fn non_empty_orchard_root() -> orchard::tree::Root { + let empty = orchard::tree::NoteCommitmentTree::default().root(); + let wrong = orchard::tree::Root::try_from([0u8; 32]) + .expect("zero is a valid pallas base field element"); + assert_ne!( + wrong, empty, + "the negative cases need a root distinct from the empty-tree root" + ); + wrong + } + + fn verification_item( + block: Arc, + sapling_root: sapling::tree::Root, + orchard_root: orchard::tree::Root, + ) -> CommitmentRootVerification { + CommitmentRootVerification::with_roots(block, sapling_root, orchard_root, None, false) + } + + /// Below NU5 the supplied Orchard root must equal the empty-tree root (no header + /// commits to it there), and any other root is rejected. At/above NU5 the MMR + /// authenticates it, so this check accepts unconditionally. + #[test] + fn pins_orchard_root_to_empty_below_nu5_and_defers_above() { + let nu5 = NetworkUpgrade::Nu5 + .activation_height(&Mainnet) + .expect("mainnet has NU5"); + let empty = orchard::tree::NoteCommitmentTree::default().root(); + let wrong = non_empty_orchard_root(); + + // Below NU5: the empty root is accepted, a non-empty root is rejected. + let pre_nu5 = Height(nu5.0 - 1); + verify_supplied_orchard_root_below_nu5(&Mainnet, pre_nu5, &empty) + .expect("the empty-tree root is accepted below NU5"); + let error = verify_supplied_orchard_root_below_nu5(&Mainnet, pre_nu5, &wrong) + .expect_err("a non-empty orchard root must be rejected below NU5"); + assert!( + matches!( + error, + ValidateContextError::InvalidBlockCommitment( + CommitmentError::InvalidPreNu5OrchardRoot { .. } + ) + ), + "rejection uses the dedicated pre-NU5 orchard error, got: {error:?}" + ); + + // Pre-Sapling/Heartwood (well below NU5) is also pinned to empty. + verify_supplied_orchard_root_below_nu5(&Mainnet, Height(1), &empty) + .expect("the empty-tree root is accepted at low heights"); + verify_supplied_orchard_root_below_nu5(&Mainnet, Height(1), &wrong) + .expect_err("a non-empty orchard root must be rejected at low heights"); + + // At and above NU5 the MMR path authenticates the root, so even a non-empty + // root is accepted here (it is checked elsewhere). + verify_supplied_orchard_root_below_nu5(&Mainnet, nu5, &wrong) + .expect("at NU5 the root is authenticated by the MMR, not pinned here"); + verify_supplied_orchard_root_below_nu5(&Mainnet, Height(nu5.0 + 1), &wrong) + .expect("above NU5 the root is authenticated by the MMR, not pinned here"); + } + + #[test] + fn pins_orchard_root_to_empty_when_nu5_is_unconfigured() { + let network = zebra_chain::parameters::Network::new_regtest(RegtestParameters { + activation_heights: ConfiguredActivationHeights { + nu5: None, + ..Default::default() + }, + ..Default::default() + }); + let empty = orchard::tree::NoteCommitmentTree::default().root(); + let wrong = non_empty_orchard_root(); + + verify_supplied_orchard_root_below_nu5(&network, Height(1), &empty) + .expect("the empty-tree root is accepted when NU5 is unconfigured"); + let error = verify_supplied_orchard_root_below_nu5(&network, Height(1), &wrong) + .expect_err("a non-empty orchard root must be rejected when NU5 is unconfigured"); + assert!( + matches!( + error, + ValidateContextError::InvalidBlockCommitment( + CommitmentError::InvalidPreNu5OrchardRoot { .. } + ) + ), + "rejection uses the dedicated pre-NU5 orchard error, got: {error:?}" + ); + } + + /// The verifier confirms real Sapling roots over the Heartwood activation and its + /// next block (the V1 `ChainHistoryRoot` path), and rejects a wrong root at the + /// *next* block (the one-block lag). + #[test] + fn verifies_real_roots_and_rejects_a_wrong_root_at_next_height() { + let (blocks, sapling_roots) = Mainnet.block_sapling_roots_map(); + let activation = NetworkUpgrade::Heartwood + .activation_height(&Mainnet) + .expect("mainnet has Heartwood") + .0; + + let block_at = |height: u32| -> Arc { + Arc::new( + blocks + .get(&height) + .expect("test vector block exists") + .zcash_deserialize_into::() + .expect("block deserializes"), + ) + }; + let root_at = |height: u32| -> sapling::tree::Root { + sapling::tree::Root::try_from(**sapling_roots.get(&height).expect("root vector exists")) + .expect("valid root") + }; + + let act_block = block_at(activation); + let next_block = block_at(activation + 1); + let act_root = root_at(activation); + let next_root = root_at(activation + 1); + let empty_orchard_root = orchard::tree::NoteCommitmentTree::default().root(); + + // Positive: the real roots reconstruct a tree the next block's header commits to. + let ok_items = vec![ + verification_item(act_block.clone(), act_root, empty_orchard_root), + verification_item(next_block.clone(), next_root, empty_orchard_root), + ]; + verify_commitment_roots(&Mainnet, empty_history_tree(), ok_items) + .expect("real roots verify against the headers"); + + // Negative + lag: a wrong root at the activation height (here, the next + // block's root, which is a valid but different root) is only caught when the + // following block's commitment is checked. + assert_ne!(act_root, next_root, "test needs two distinct roots"); + let bad_items = vec![ + verification_item(act_block, next_root, empty_orchard_root), + verification_item(next_block, next_root, empty_orchard_root), + ]; + let (fail_height, _error) = + verify_commitment_roots(&Mainnet, empty_history_tree(), bad_items) + .expect_err("a wrong root must be rejected"); + assert_eq!( + fail_height.0, + activation + 1, + "a wrong root at H is detected at H+1 (the lag)" + ); + } + + /// Real NU5/V2-range verification over the POC range (1,707,211..=1,717,210), + /// exercising the actual [`verify_commitment_roots`] on production data. + /// + /// Gated by env vars so it stays out of normal CI. Requires two read-only forks + /// of the RUNBOOK 1.707M master snapshot: + /// - `VCT_SEED_DB`: an *unsynced* `cp -al` fork (its tip history tree at height + /// 1,707,210 is the seed — mid-NU5-epoch, so no activation boundary to handle). + /// - `VCT_ARCHIVE_DB`: an archive fork synced to >= 1,717,211 (provides the blocks + /// and per-height roots). + /// + /// Run: + /// ```text + /// VCT_SEED_DB= VCT_ARCHIVE_DB= \ + /// cargo test -p zebra-state --lib commitment_aux_verify -- --ignored --nocapture + /// ``` + #[ignore] + #[test] + #[allow(clippy::print_stderr)] // intentional progress output for a manual run + fn verifies_real_nu5_range_over_synced_forks() { + use std::path::PathBuf; + + use crate::{ + constants::{state_database_format_version_in_code, STATE_DATABASE_KIND}, + service::finalized_state::{ZebraDb, STATE_COLUMN_FAMILIES_IN_CODE}, + Config, + }; + + let (Some(seed_dir), Some(archive_dir)) = ( + std::env::var_os("VCT_SEED_DB"), + std::env::var_os("VCT_ARCHIVE_DB"), + ) else { + eprintln!("skipping: set VCT_SEED_DB (unsynced fork) and VCT_ARCHIVE_DB (synced fork)"); + return; + }; + + let open = |dir: PathBuf| -> ZebraDb { + let config = Config { + cache_dir: dir, + ephemeral: false, + ..Default::default() + }; + ZebraDb::new( + &config, + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), + &Mainnet, + true, // skip format upgrades + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), + true, // read-only + ) + }; + + let seed_db = open(PathBuf::from(seed_dir)); + let archive_db = open(PathBuf::from(archive_dir)); + + let start = 1_707_211u32; + let end = 1_717_210u32; + + // Seed: the history tree at 1,707,210 (the unsynced fork's tip). + let seed = (*seed_db.history_tree()).clone(); + assert_eq!( + seed_db.finalized_tip_height().map(|h| h.0), + Some(start - 1), + "VCT_SEED_DB must be the unsynced 1,707,210 master fork" + ); + assert!( + archive_db.finalized_tip_height().map(|h| h.0).unwrap_or(0) > end, + "VCT_ARCHIVE_DB must be synced to at least {}", + end + 1 + ); + + // Build (block, sapling_root, orchard_root) for [start..=end+1]; the +1 block + // confirms the in-range root at `end` via the one-block lag. + let item_at = |h: u32| -> CommitmentRootVerification { + let block = archive_db + .block(Height(h).into()) + .expect("archive fork has the block"); + let sapling_root = archive_db + .sapling_tree_by_height(&Height(h)) + .expect("archive fork has the per-height Sapling tree") + .root(); + let orchard_root = archive_db + .orchard_tree_by_height(&Height(h)) + .expect("archive fork has the per-height Orchard tree") + .root(); + verification_item(block, sapling_root, orchard_root) + }; + let items: Vec<_> = (start..=end + 1).map(item_at).collect(); + + // Positive: every supplied root in the range is confirmed by the V2 headers. + verify_commitment_roots(&Mainnet, seed.clone(), items.clone()) + .expect("real NU5 roots verify against the headers"); + eprintln!("VCT NU5 positive: {} blocks verified", items.len()); + + // Negative + lag: corrupt one root mid-range with a distinct valid root (the + // range's first root, certainly different after thousands of sandblast blocks); + // expect rejection at H+1. + let bad_offset = 5_000usize; + let bad_height = start + bad_offset as u32; + let wrong_root = items[0].roots.expect("test verification item has roots").0; + let mut bad_items = items; + assert_ne!( + bad_items[bad_offset] + .roots + .expect("test verification item has roots") + .0, + wrong_root, + "need a distinct wrong root" + ); + bad_items[bad_offset] + .roots + .as_mut() + .expect("test verification item has roots") + .0 = wrong_root; + let (fail_height, _error) = verify_commitment_roots(&Mainnet, seed, bad_items) + .expect_err("a wrong NU5 root must be rejected"); + assert_eq!( + fail_height.0, + bad_height + 1, + "a wrong root at H is detected at H+1 (the lag)" + ); + eprintln!( + "VCT NU5 negative: wrong root at {bad_height} rejected at {}", + fail_height.0 + ); + } +} diff --git a/zebra-state/src/service/finalized_state/disk_format/chain.rs b/zebra-state/src/service/finalized_state/disk_format/chain.rs index 129df714203..67c69a50d69 100644 --- a/zebra-state/src/service/finalized_state/disk_format/chain.rs +++ b/zebra-state/src/service/finalized_state/disk_format/chain.rs @@ -51,6 +51,11 @@ pub struct HistoryTreeParts { } impl HistoryTreeParts { + /// Deserializes history tree parts from raw database bytes. + pub(crate) fn from_bytes_result(bytes: impl AsRef<[u8]>) -> Result { + bincode::DefaultOptions::new().deserialize(bytes.as_ref()) + } + /// Converts [`HistoryTreeParts`] to a [`NonEmptyHistoryTree`]. pub(crate) fn with_network( self, @@ -89,8 +94,7 @@ impl IntoDisk for HistoryTreeParts { impl FromDisk for HistoryTreeParts { fn from_bytes(bytes: impl AsRef<[u8]>) -> Self { - bincode::DefaultOptions::new() - .deserialize(bytes.as_ref()) + Self::from_bytes_result(bytes) .expect("deserialization format should match the serialization format used by IntoDisk") } } diff --git a/zebra-state/src/service/finalized_state/disk_format/shielded.rs b/zebra-state/src/service/finalized_state/disk_format/shielded.rs index a845cda2c30..2cdad46dc74 100644 --- a/zebra-state/src/service/finalized_state/disk_format/shielded.rs +++ b/zebra-state/src/service/finalized_state/disk_format/shielded.rs @@ -87,6 +87,42 @@ impl FromDisk for orchard::tree::Root { } } +/// The per-height Sapling and Orchard note-commitment roots, as stored in the +/// `commitment_roots_by_height` index (keyed by [`Height`]). +/// +/// Every node persists this 64-byte value for each committed block — including a +/// verified-commitment-trees fast-synced node, which folds these roots in but writes no +/// per-height note-commitment trees. It lets such a node still serve the `tree_aux` +/// `BlockRoots` read from a compact index rather than from the (absent) trees. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CommitmentRootsByHeight { + /// The Sapling note-commitment tree root at this height. + pub sapling: sapling::tree::Root, + /// The Orchard note-commitment tree root at this height. + pub orchard: orchard::tree::Root, +} + +impl IntoDisk for CommitmentRootsByHeight { + type Bytes = [u8; 64]; + + fn as_bytes(&self) -> Self::Bytes { + let mut out = [0u8; 64]; + out[..32].copy_from_slice(&IntoDisk::as_bytes(&self.sapling)); + out[32..].copy_from_slice(&IntoDisk::as_bytes(&self.orchard)); + out + } +} + +impl FromDisk for CommitmentRootsByHeight { + fn from_bytes(bytes: impl AsRef<[u8]>) -> Self { + let bytes = bytes.as_ref(); + CommitmentRootsByHeight { + sapling: sapling::tree::Root::from_bytes(&bytes[..32]), + orchard: orchard::tree::Root::from_bytes(&bytes[32..]), + } + } +} + impl IntoDisk for NoteCommitmentSubtreeIndex { type Bytes = [u8; 2]; diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs b/zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs index 76a1f0cba99..780cb34a136 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs @@ -96,7 +96,7 @@ fn test_raw_rocksdb_column_families_with_network(network: Network) { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "snapshot tests") + .commit_finalized_direct(block.into(), None, None, None, "snapshot tests") .expect("test block is valid"); let mut settings = insta::Settings::clone_current(); diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap index d548705b0b8..b9ed66111eb 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap @@ -6,6 +6,7 @@ expression: cf_names "balance_by_transparent_addr", "block_header_by_height", "block_info", + "commitment_roots_by_height", "default", "hash_by_height", "hash_by_tx_loc", @@ -30,8 +31,11 @@ expression: cf_names "tx_loc_by_transparent_addr_loc", "utxo_by_out_loc", "utxo_loc_by_transparent_addr_loc", + "vct_sync_metadata", + "vct_upgrade_metadata", "zakura_header_body_size_by_height", "zakura_header_by_height", + "zakura_header_commitment_roots_by_height", "zakura_header_hash_by_height", "zakura_header_height_by_hash", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_0.snap new file mode 100644 index 00000000000..c96257d2aff --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_1.snap new file mode 100644 index 00000000000..50911e30cd8 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_1.snap @@ -0,0 +1,14 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000001", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_2.snap new file mode 100644 index 00000000000..5f670090392 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@mainnet_2.snap @@ -0,0 +1,18 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000001", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000002", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_0.snap new file mode 100644 index 00000000000..c96257d2aff --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_1.snap new file mode 100644 index 00000000000..50911e30cd8 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_1.snap @@ -0,0 +1,14 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000001", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_2.snap new file mode 100644 index 00000000000..5f670090392 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/commitment_roots_by_height_raw_data@testnet_2.snap @@ -0,0 +1,18 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "000000", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000001", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), + KV( + k: "000002", + v: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493eae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap index 6d858dc42ea..89436201814 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap @@ -15,8 +15,10 @@ expression: empty_column_families "tx_loc_by_transparent_addr_loc: no entries", "utxo_by_out_loc: no entries", "utxo_loc_by_transparent_addr_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_1.snap index 4fb77151719..dc2b789202e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_1.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_1.snap @@ -11,8 +11,10 @@ expression: empty_column_families "sapling_nullifiers: no entries", "sprout_nullifiers: no entries", "tx_loc_by_spent_out_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_2.snap index 4fb77151719..dc2b789202e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_2.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_2.snap @@ -11,8 +11,10 @@ expression: empty_column_families "sapling_nullifiers: no entries", "sprout_nullifiers: no entries", "tx_loc_by_spent_out_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap index 618b220b649..fa60db8d11a 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap @@ -6,6 +6,7 @@ expression: empty_column_families "balance_by_transparent_addr: no entries", "block_header_by_height: no entries", "block_info: no entries", + "commitment_roots_by_height: no entries", "hash_by_height: no entries", "hash_by_tx_loc: no entries", "height_by_hash: no entries", @@ -29,8 +30,11 @@ expression: empty_column_families "tx_loc_by_transparent_addr_loc: no entries", "utxo_by_out_loc: no entries", "utxo_loc_by_transparent_addr_loc: no entries", + "vct_sync_metadata: no entries", + "vct_upgrade_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap index 6d858dc42ea..89436201814 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap @@ -15,8 +15,10 @@ expression: empty_column_families "tx_loc_by_transparent_addr_loc: no entries", "utxo_by_out_loc: no entries", "utxo_loc_by_transparent_addr_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_1.snap index 4fb77151719..dc2b789202e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_1.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_1.snap @@ -11,8 +11,10 @@ expression: empty_column_families "sapling_nullifiers: no entries", "sprout_nullifiers: no entries", "tx_loc_by_spent_out_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_2.snap index 4fb77151719..dc2b789202e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_2.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_2.snap @@ -11,8 +11,10 @@ expression: empty_column_families "sapling_nullifiers: no entries", "sprout_nullifiers: no entries", "tx_loc_by_spent_out_loc: no entries", + "vct_sync_metadata: no entries", "zakura_header_body_size_by_height: no entries", "zakura_header_by_height: no entries", + "zakura_header_commitment_roots_by_height: no entries", "zakura_header_hash_by_height: no entries", "zakura_header_height_by_hash: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_0.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_1.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_1.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_2.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@mainnet_2.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_0.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_1.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_1.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_2.snap new file mode 100644 index 00000000000..f0d28a38b02 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/vct_upgrade_metadata_raw_data@testnet_2.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "", + v: "000000", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index d4f5872006c..3e240f022d4 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -110,7 +110,11 @@ fn format_upgrades( "add Zakura header body size hints", Version::new(27, 2, 0), )), - ] as [Box; 7]) + Box::new(no_migration::NoMigration::new( + "add verified-commitment-trees metadata, serving index, and history tree repair", + Version::new(27, 3, 0), + )), + ] as [Box; 8]) .into_iter() .filter(move |upgrade| upgrade.version() > min_version()) } @@ -877,7 +881,22 @@ fn format_upgrades_are_in_version_order() { fn zakura_header_body_size_cf_upgrade_is_no_migration() { let upgrades: Vec<_> = format_upgrades(Some(Version::new(27, 1, 0))).collect(); - assert_eq!(upgrades.len(), 1); assert_eq!(upgrades[0].version(), Version::new(27, 2, 0)); assert!(!upgrades[0].needs_migration()); } + +#[test] +fn fast_sync_metadata_cf_upgrade_is_no_migration() { + let upgrades: Vec<_> = format_upgrades(Some(Version::new(27, 2, 0))).collect(); + + assert_eq!(upgrades.len(), 1); + assert_eq!(upgrades[0].version(), Version::new(27, 3, 0)); + assert!(!upgrades[0].needs_migration()); +} + +#[test] +fn vct_format_changes_are_consolidated_under_27_3_0() { + let upgrades: Vec<_> = format_upgrades(Some(Version::new(27, 3, 0))).collect(); + + assert!(upgrades.is_empty()); +} diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs index 41585c37dd4..3c56437861e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs @@ -138,6 +138,13 @@ impl DiskFormatUpgrade for AddSubtrees { db: &ZebraDb, cancel_receiver: &Receiver, ) -> Result, CancelFormatChange> { + // Fast-synced databases deliberately have no per-height note-commitment + // trees or subtrees below the checkpoint handoff height, so the subtree + // scans below do not apply to them. + if db.is_vct_synced() { + return Ok(Ok(())); + } + // This is redundant in some code paths, but not in others. But it's quick anyway. let quick_result = subtree_format_calculation_pre_checks(db); @@ -210,6 +217,12 @@ pub fn subtree_format_calculation_pre_checks(db: &ZebraDb) -> Result<(), String> return Ok(()); } + // Fast-synced databases deliberately have no per-height note-commitment trees + // or subtrees below the checkpoint handoff height, so these checks don't apply. + if db.is_vct_synced() { + return Ok(()); + } + // Check the entire format before returning any errors. let sapling_result = quick_check_sapling_subtrees(db); let orchard_result = quick_check_orchard_subtrees(db); diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs index 186cfe5f51c..37e24717012 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs @@ -74,6 +74,13 @@ pub fn quick_check(db: &ZebraDb) -> Result<(), String> { return Ok(()); } + // A fast-synced database deliberately has no per-height note-commitment trees + // below the checkpoint handoff height, including the genesis trees this check + // reads. The genesis-root-caching invariant does not apply to it. + if db.is_vct_synced() { + return Ok(()); + } + let sprout_genesis_tree = sprout::tree::NoteCommitmentTree::default(); let sprout_genesis_tree = db .sprout_tree_by_anchor(&sprout_genesis_tree.root()) @@ -127,6 +134,13 @@ pub fn detailed_check( db: &ZebraDb, cancel_receiver: &Receiver, ) -> Result, CancelFormatChange> { + // A fast-synced database deliberately has no per-height note-commitment trees + // below the checkpoint handoff height, so the per-height tree scans below do + // not apply to it. + if db.is_vct_synced() { + return Ok(Ok(())); + } + // This is redundant in some code paths, but not in others. But it's quick anyway. // Check the entire format before returning any errors. let mut result = quick_check(db); diff --git a/zebra-state/src/service/finalized_state/tests/prop.rs b/zebra-state/src/service/finalized_state/tests/prop.rs index 16140ef5e36..f21ca3304f6 100644 --- a/zebra-state/src/service/finalized_state/tests/prop.rs +++ b/zebra-state/src/service/finalized_state/tests/prop.rs @@ -1,6 +1,9 @@ //! Randomised property tests for the finalized state. -use std::env; +use std::{collections::HashMap, env, fs, sync::Arc}; + +use tempfile::TempDir; +use tokio::sync::oneshot; use zebra_chain::{ block::Height, @@ -13,16 +16,137 @@ use zebra_chain::{ use zebra_test::prelude::*; use crate::{ - config::Config, - service::{ - arbitrary::PreparedChain, - finalized_state::{CheckpointVerifiedBlock, FinalizedState}, - }, - tests::FakeChainHelper, + config::Config, service::arbitrary::PreparedChain, tests::FakeChainHelper, HashOrHeight, +}; + +use super::super::{ + commitment_aux, serve_block_roots, vct::validate_final_frontiers_bytes, + CheckpointVerifiedBlock, DiskWriteBatch, FinalizedState, }; const DEFAULT_PARTIAL_CHAIN_PROPTEST_CASES: u32 = 1; +type TestRootMap = HashMap< + u32, + ( + zebra_chain::sapling::tree::Root, + zebra_chain::orchard::tree::Root, + ), +>; +type SaplingTree = Arc; +type OrchardTree = Arc; +type SproutTree = Arc; + +fn enable_vct_test_fixture_source(state: &mut FinalizedState, roots: TestRootMap) { + state.enable_vct_fast_source( + Box::new(commitment_aux::FixtureSource::new(roots, None)), + false, + ); +} + +fn enable_vct_test_fixture_source_with_handoff( + state: &mut FinalizedState, + roots: TestRootMap, + handoff_height: Height, + sapling: SaplingTree, + orchard: OrchardTree, + sprout: SproutTree, +) { + state.enable_vct_fast_source( + Box::new(commitment_aux::FixtureSource::new( + roots, + Some(commitment_aux::FinalFrontiers { + height: handoff_height, + sapling, + orchard, + sprout, + }), + )), + false, + ); +} + +#[test] +fn vct_generated_final_frontier_bytes_are_node_loader_compatible() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last, "generated chain unexpectedly short"); + let height = Height(last as u32); + + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + for block in blocks.iter().take(last + 1) { + let cv = CheckpointVerifiedBlock::from(block.block.clone()); + legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct frontier bytes legacy") + .unwrap(); + } + + let bytes = commitment_aux::produce_final_frontiers_bytes(&legacy.db, height) + .expect("legacy DB has final frontiers at the requested height"); + let temp_dir = TempDir::new().expect("temp dir"); + let path = temp_dir.path().join("frontier.bin"); + fs::write(&path, &bytes).expect("frontier bytes write to temp file"); + + let bytes_from_file = fs::read(&path).expect("frontier bytes read from temp file"); + validate_final_frontiers_bytes(&bytes_from_file, height) + .expect("generated frontier bytes pass node loader validation"); + + let parsed = commitment_aux::FinalFrontiers::from_bytes(&bytes_from_file) + .expect("validated bytes parse as final frontiers"); + prop_assert_eq!(parsed.height, height, "frontier height round-trips"); + prop_assert_eq!( + parsed.sapling.root(), + legacy.db.sapling_tree_by_height(&height).unwrap().root(), + "parsed Sapling frontier matches the DB tree at the requested height" + ); + prop_assert_eq!( + parsed.orchard.root(), + legacy.db.orchard_tree_by_height(&height).unwrap().root(), + "parsed Orchard frontier matches the DB tree at the requested height" + ); + prop_assert_eq!( + parsed.sprout.root(), + legacy.db.sprout_tree_for_tip().root(), + "parsed Sprout frontier matches the DB tip tree" + ); + + let wrong_height = Height(height.0.checked_add(1).expect("test height is in range")); + prop_assert!( + validate_final_frontiers_bytes(&bytes_from_file, wrong_height).is_err(), + "node loader validation rejects a frontier whose height does not match the checkpoint" + ); + }); + + Ok(()) +} + #[test] fn blocks_with_v5_transactions() -> Result<()> { let _init_guard = zebra_test::init(); @@ -39,6 +163,8 @@ fn blocks_with_v5_transactions() -> Result<()> { let (hash, _) = state.commit_finalized_direct( checkpoint_verified.into(), None, + None, + None, "blocks_with_v5_transactions test" ).unwrap(); prop_assert_eq!(Some(height), state.finalized_tip_height()); @@ -114,6 +240,8 @@ fn all_upgrades_and_wrong_commitments_with_fake_activation_heights() -> Result<( state.commit_finalized_direct( checkpoint_verified.into(), None, + None, + None, "all_upgrades test" ).expect_err("Must fail commitment check"); failure_count += 1; @@ -124,6 +252,8 @@ fn all_upgrades_and_wrong_commitments_with_fake_activation_heights() -> Result<( let (hash, _) = state.commit_finalized_direct( checkpoint_verified.into(), None, + None, + None, "all_upgrades test" ).unwrap(); prop_assert_eq!(Some(height), state.finalized_tip_height()); @@ -136,3 +266,1616 @@ fn all_upgrades_and_wrong_commitments_with_fake_activation_heights() -> Result<( Ok(()) } + +/// Verified-commitment-trees fast path (`commit_finalized_direct` Checkpoint arm): +/// committing with correct fixture roots produces the same consensus state (anchor +/// sets + history root) as the legacy recompute path across all upgrade boundaries, +/// and a wrong fixture root is rejected (verify-before-commit) rather than persisted. +/// Exercises: a below-Heartwood seed, history-tree creation at Heartwood, the NU5 +/// V1->V2 transition, verify-ahead against the buffered successor, trusted fixture tip +/// commits without a successor, and rejection of a corrupted root. +#[test] +#[allow(clippy::needless_range_loop)] // the loops index blocks[i+1] and the fixture by height +fn vct_fast_path_matches_legacy_and_rejects_wrong_roots() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(env::var("PROPTEST_CASES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(DEFAULT_PARTIAL_CHAIN_PROPTEST_CASES)), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + + // Process a bounded prefix [0, last] spanning the Heartwood (history-tree + // creation) and NU5 (V1->V2) boundaries plus a couple of V2 blocks; `last` is + // the tip we compare at. Chains are far longer than this + // (MAX_PARTIAL_CHAIN_BLOCKS), so this is a plain assertion, not a discard. + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last + 1, "generated chain unexpectedly short"); + + // The fast path runs below the checkpoint, seeded from an already-committed + // tip. Seed just before Heartwood so the fast range creates the history tree + // (Heartwood) and crosses NU5 (V1->V2). + let seed = (heartwood - 1) as usize; + + // Legacy pass over [0, last]: record per-block roots for the fast range as + // the fixture, and the golden consensus state at the tip. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + } + let golden_anchors = legacy.db.vct_anchor_digest(); + let golden_history = legacy.db.history_tree().hash(); + + // Fast pass over [0, last] with the correct fixture: genesis..=seed recompute + // (no fixture entry); seed+1..=last verify-ahead against their buffered + // successor. Every fast-eligible block takes the fast path, and the result + // equals legacy. + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut fast, fixture.clone()); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct fast") + .expect("verified fast commit succeeds"); + } + prop_assert_eq!(fast.db.vct_anchor_digest(), golden_anchors, "fast anchors must match legacy"); + prop_assert_eq!(fast.db.history_tree().hash(), golden_history, "fast history must match legacy"); + prop_assert_eq!(fast.vct_fast_count(), (last - seed) as u64, "every fast-eligible block took the fast path"); + // The dedup: each header commitment is checked once, not twice. Only the + // first fast block runs its own commitment check; every later fast block + // was already validated by its predecessor's look-ahead, so it is skipped. + prop_assert_eq!(fast.vct_prevalidated_count(), (last - seed - 1) as u64, "every fast block after the first skips its redundant own commitment check"); + + // A trusted local fixture may commit its tip root without a successor: it is + // not adversarial and the root is checked in arrears when a successor arrives. + let mut no_successor = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut no_successor, fixture.clone()); + for i in 0..last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + no_successor + .commit_finalized_direct(cv.into(), None, None, next, "vct no-successor seed") + .expect("verified fast commit succeeds with successor"); + } + prop_assert!(!no_successor.vct_fast_needs_successor(Height(last as u32)), "a trusted fixture tip can commit without a successor"); + let cv = CheckpointVerifiedBlock::from(blocks[last].block.clone()); + no_successor + .commit_finalized_direct(cv.into(), None, None, None, "vct trusted fixture no successor") + .expect("trusted fixture tip commits without a successor"); + prop_assert_eq!( + no_successor.db.finalized_tip_height(), + Some(Height(last as u32)), + "the trusted fixture tip committed" + ); + + // Negative: corrupt the fixture Sapling root at a V2 (post-NU5) height with a + // distinct value (the empty root; a V2 block has a non-empty Sapling tree). + // Fast mode cannot recompute a bad root away (the frontier is frozen), so the + // wrong root must be *rejected* by the next block's commitment (verify-before- + // commit) — the commit at that height fails rather than persisting it. + let bad_height = (nu5 + 1) as usize; + let mut bad_fixture = fixture.clone(); + let bad_entry = bad_fixture.get_mut(&(bad_height as u32)).unwrap(); + prop_assert_ne!(bad_entry.0, Default::default(), "a V2 block must have a non-empty Sapling root"); + bad_entry.0 = Default::default(); + + let mut bad = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut bad, bad_fixture); + let mut error_height = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + if bad.commit_finalized_direct(cv.into(), None, None, next, "vct bad").is_err() { + error_height = Some(i); + break; + } + } + prop_assert_eq!(error_height, Some(bad_height), "a wrong fixture root is rejected at its own commit"); + + // Negative (Orchard, below NU5): no header commits to an Orchard root below + // NU5 (V1 history leaves ignore it; no MMR below Heartwood), so the fast path + // pins it to the empty-tree root. Corrupt a below-NU5 fixture Orchard root to + // a non-empty value. Unlike the Sapling MMR path (one-block lag), this is a + // direct check, so it is rejected at the block's *own* commit — closing the + // hole where an untrusted source injects a spurious Orchard anchor. + let bad_orchard_height = (nu5 - 1) as usize; + prop_assert!(bad_orchard_height > seed, "the corrupted height must be in the fast range"); + let empty_orchard = zebra_chain::orchard::tree::NoteCommitmentTree::default().root(); + let wrong_orchard = zebra_chain::orchard::tree::Root::try_from([0u8; 32]) + .expect("zero is a valid pallas base field element"); + prop_assert_ne!(wrong_orchard, empty_orchard, "the wrong root must differ from the empty-tree root"); + + let mut bad_orchard_fixture = fixture.clone(); + let bad_orchard_entry = bad_orchard_fixture.get_mut(&(bad_orchard_height as u32)).unwrap(); + prop_assert_eq!(bad_orchard_entry.1, empty_orchard, "a below-NU5 block has the empty Orchard root"); + bad_orchard_entry.1 = wrong_orchard; + + let mut bad_orchard = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut bad_orchard, bad_orchard_fixture); + let mut orchard_error_height = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + if bad_orchard.commit_finalized_direct(cv.into(), None, None, next, "vct bad orchard").is_err() { + orchard_error_height = Some(i); + break; + } + } + prop_assert_eq!(orchard_error_height, Some(bad_orchard_height), "a wrong below-NU5 orchard root is rejected at its own commit"); + }); + + Ok(()) +} + +/// A verified-commitment-trees fast sync must never legacy-recompute a height whose +/// supplied root is missing once the note-commitment frontier is frozen: the running +/// frontier is no longer the real one, so recomputing would fold a wrong root into the +/// history MMR and silently corrupt consensus state (a peer that omits a height — see the +/// driver's gap handling — could trigger this). Instead the committer must refuse with the +/// retryable `VctSuppliedRootUnavailable` error and leave the database untouched, so the +/// block can be committed later from a fetched root. This guards the liveness/no-corruption +/// half of the peer-source fast path (the bad-root rejection half is covered by +/// `vct_fast_path_matches_legacy_and_rejects_wrong_roots`). +#[test] +#[allow(clippy::needless_range_loop)] // the loop indexes blocks[i+1] and the fixture by height +fn vct_frozen_frontier_hole_refuses_instead_of_recomputing() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last + 1, "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + // Record the per-block roots for the fast range as the fixture. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct hole legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + } + + // Punch a hole: drop a post-NU5 height's root from the fixture, simulating a + // peer that omitted it (or a root evicted after failing verification). Earlier + // fast blocks freeze the frontier, so this height has no real frontier to + // recompute against. + let hole = (nu5 + 1) as usize; + prop_assert!(hole > seed && hole < last, "the hole must be inside the fast range"); + fixture.remove(&(hole as u32)); + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut fast, fixture); + + let mut error_height = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = (i < last).then(|| (blocks[i + 1].block.clone(), None)); + match fast.commit_finalized_direct(cv.into(), None, None, next, "vct hole fast") { + Ok(_) => {} + Err(error) => { + // The refusal is the typed, retryable error — not a generic + // invalid-block error and not silent corruption. + prop_assert!( + format!("{error:?}").contains("VctSuppliedRootUnavailable"), + "a frozen-frontier hole returns the retryable VctSuppliedRootUnavailable error, got: {error:?}" + ); + error_height = Some(i); + break; + } + } + } + + prop_assert_eq!(error_height, Some(hole), "the commit refuses at the hole height, not before or after"); + // Nothing at or past the hole was persisted: the tip is the last block before + // the hole, so no corrupt MMR leaf was written. + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height((hole - 1) as u32)), + "the database tip stays just below the hole — the refused block left state untouched" + ); + }); + + Ok(()) +} + +/// Retryable VCT root misses must stay internal to the finalized write loop: the +/// public checkpoint commit wrapper returns the queued block and error to the caller +/// that can retry, rather than completing the block's response channel with a +/// transient error. +#[test] +#[allow(clippy::needless_range_loop)] // the loop indexes blocks[i+1] and the fixture by height +fn vct_retryable_root_miss_keeps_checkpoint_response_pending() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last, "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct response legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + } + + let hole = (nu5 + 1) as usize; + fixture.remove(&(hole as u32)); + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut fast, fixture); + + for i in 0..hole { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct response fast") + .expect("pre-hole fast commits succeed"); + } + + let cv = CheckpointVerifiedBlock::from(blocks[hole].block.clone()); + let (rsp_tx, mut rsp_rx) = oneshot::channel(); + let next = Some((blocks[hole + 1].block.clone(), None)); + let result = fast.commit_finalized((cv, rsp_tx), None, None, next); + let Err((returned_block, error)) = result else { + panic!("missing frozen-frontier root should return the queued block for retry"); + }; + + prop_assert_eq!(returned_block.0.height, Height(hole as u32)); + prop_assert!( + error.vct_supplied_root_unavailable_height().is_some(), + "the returned error is the typed retryable VCT root miss" + ); + prop_assert!( + matches!(rsp_rx.try_recv(), Err(oneshot::error::TryRecvError::Empty)), + "the checkpoint response stays pending so the write loop can retry internally" + ); + }); + + Ok(()) +} + +/// An *untrusted* (peer) source must never commit a fast block whose own supplied root has +/// no buffered successor to confirm it against the header chain. A block's roots are only +/// committed by the next block's header (the one-block lag), so committing at the sync tip +/// would persist a root checked only one block later — irreversibly, once on disk. A wrong +/// tip root would then wedge the sync with no recovery (the failure surfaces at the next +/// block and is mis-attributed to *its* root). So the committer defers: it refuses the tip +/// block with the retryable `VctSuppliedRootAwaitingSuccessor`, leaves the database +/// untouched, and commits the same height once a successor is buffered. A trusted local +/// fixture is exempt (covered by `vct_fast_path_matches_legacy_and_rejects_wrong_roots`, +/// whose tip commits on the in-arrears check); this guards the peer path specifically. +#[test] +#[allow(clippy::needless_range_loop)] // the loop indexes blocks[i+1] and inserts roots by height +fn vct_peer_source_defers_unverifiable_tip_root_until_successor() -> Result<()> { + use crate::service::finalized_state::commitment_aux::PeerSource; + use zebra_chain::parallel::commitment_aux::BlockCommitmentRoots; + + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + // The deferral target: a post-NU5 (real MMR root) height, so it sits above + // Heartwood where the root needs a successor to be confirmed. + let tip_target = (nu5 + 1) as usize; + prop_assert!(blocks.len() > tip_target + 1, "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + // Legacy golden pass to source the correct per-block roots for the fast range. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut peer_roots = Vec::new(); + for i in 0..=tip_target { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct defer legacy") + .unwrap(); + if i > seed { + peer_roots.push(BlockCommitmentRoots { + height: Height(i as u32), + sapling_root: trees.sapling.root(), + orchard_root: trees.orchard.root(), + }); + } + } + + // An untrusted peer source pre-filled with the *correct* roots: the deferral is + // about the missing successor, not a bad root. + let (source, writer) = PeerSource::new(None); + writer.insert_roots(peer_roots); + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + fast.enable_vct_fast_source(Box::new(source), true); + + // Commit up to (but not including) the tip target, each with its successor. + for i in 0..tip_target { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct defer pre-tip") + .expect("pre-tip fast commits succeed"); + } + prop_assert_eq!(fast.db.finalized_tip_height(), Some(Height((tip_target - 1) as u32))); + + // The tip target with no buffered successor must defer, not commit: its own + // (correct) root is not yet confirmed, and the peer source is untrusted. + prop_assert!( + fast.vct_fast_needs_successor(Height(tip_target as u32)), + "an untrusted peer tip root needs successor verification" + ); + let pre_deferral_prevalidated = fast.vct_prevalidated_count(); + let cv = CheckpointVerifiedBlock::from(blocks[tip_target].block.clone()); + let error = fast + .commit_finalized_direct(cv.into(), None, None, None, "vct defer tip no successor") + .expect_err("an untrusted tip root with no successor must defer, not commit"); + prop_assert!( + error.vct_supplied_root_unavailable_height().is_none(), + "deferral is not a refetch case (the root is present): {error:?}" + ); + prop_assert!( + format!("{error:?}").contains("VctSuppliedRootAwaitingSuccessor"), + "the tip defers with the await-successor error, got: {error:?}" + ); + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height((tip_target - 1) as u32)), + "the deferred block left the database untouched" + ); + let after_deferral_prevalidated = fast.vct_prevalidated_count(); + prop_assert_eq!( + after_deferral_prevalidated, + pre_deferral_prevalidated + 1, + "the deferred attempt uses the predecessor look-ahead" + ); + + // Once a successor is buffered, the very same height commits and the tip advances: + // the deferral was a wait, not a permanent stall. + let cv = CheckpointVerifiedBlock::from(blocks[tip_target].block.clone()); + let next = Some((blocks[tip_target + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct defer tip with successor") + .expect("the deferred height commits once its successor is buffered"); + prop_assert_eq!( + fast.vct_prevalidated_count(), + after_deferral_prevalidated + 1, + "the retry reuses the preserved predecessor look-ahead" + ); + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height(tip_target as u32)), + "the tip advances once the successor confirms the root" + ); + }); + + Ok(()) +} + +/// A wrong peer-supplied root must be recoverable at the same height: the committer rejects and +/// evicts the bad cached value, leaves the database parked below the height, then commits the +/// same block once the `tree_aux` driver refills that height with a verifiable root. +#[test] +#[allow(clippy::needless_range_loop)] // the loop indexes blocks[i+1] and inserts roots by height +fn vct_peer_source_bad_root_refill_commits_same_height() -> Result<()> { + use crate::service::finalized_state::commitment_aux::PeerSource; + use zebra_chain::parallel::commitment_aux::BlockCommitmentRoots; + + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let target = (nu5 + 1) as usize; + prop_assert!(blocks.len() > target + 1, "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + // Source the true roots from a legacy pass, then poison the target height exactly + // as a malicious peer would. Earlier roots are correct so the frontier freezes + // before the bad root is encountered. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut peer_roots = Vec::new(); + let mut correct_target_root = None; + for i in 0..=target { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct refill legacy") + .unwrap(); + if i > seed { + let root = BlockCommitmentRoots { + height: Height(i as u32), + sapling_root: trees.sapling.root(), + orchard_root: trees.orchard.root(), + }; + if i == target { + correct_target_root = Some(root.clone()); + let mut poisoned = root; + prop_assert_ne!( + poisoned.sapling_root, + Default::default(), + "a V2 target block must have a non-empty Sapling root" + ); + poisoned.sapling_root = Default::default(); + peer_roots.push(poisoned); + } else { + peer_roots.push(root); + } + } + } + let correct_target_root = correct_target_root.expect("target root was produced"); + + let (source, writer) = PeerSource::new(None); + writer.insert_roots(peer_roots); + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + fast.enable_vct_fast_source(Box::new(source), true); + + for i in 0..target { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct refill pre-target") + .expect("pre-target fast commits succeed"); + } + prop_assert_eq!(fast.db.finalized_tip_height(), Some(Height((target - 1) as u32))); + + let cv = CheckpointVerifiedBlock::from(blocks[target].block.clone()); + let next = Some((blocks[target + 1].block.clone(), None)); + let error = fast + .commit_finalized_direct(cv.into(), None, None, next.clone(), "vct poisoned target") + .expect_err("the poisoned peer root must be rejected before commit"); + prop_assert_eq!( + error.vct_supplied_root_unavailable_height(), + Some(Height(target as u32)), + "the bad root is exposed as a retryable refetch for its own height" + ); + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height((target - 1) as u32)), + "the rejected root left the database parked below the target" + ); + + // Simulate the `tree_aux` driver refilling the evicted height from another peer. + writer.insert_roots([correct_target_root]); + + let cv = CheckpointVerifiedBlock::from(blocks[target].block.clone()); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct refilled target") + .expect("the same height commits once the peer cache is refilled"); + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height(target as u32)), + "the refilled root unblocks the parked height" + ); + }); + + Ok(()) +} + +/// The frozen-frontier guard must survive a restart. A fast sync interrupted before the +/// checkpoint handoff leaves the stale frozen frontier persisted (fast commits never write +/// per-height trees) with the tip still below the handoff, but the in-memory `frozen` flag +/// is rebuilt from scratch on open. If it came back `false`, the first post-restart height +/// with no supplied root would legacy-recompute against the stale on-disk frontier and +/// corrupt the history MMR — the exact hazard the in-session guard prevents +/// (`vct_frozen_frontier_hole_refuses_instead_of_recomputing`). So `FinalizedState::new` +/// re-derives the flag from the durable fast-sync marker. This reopens the database between +/// freezing and the hole, and asserts that the very first commit of the new session (no +/// prior fast block to re-arm the flag in-session) still refuses with the retryable +/// `VctSuppliedRootUnavailable`, leaves state untouched, and commits once the root arrives. +#[test] +#[allow(clippy::needless_range_loop)] // the loop indexes blocks[i+1] and the fixture by height +fn vct_frozen_frontier_survives_reopen() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let handoff_height = nu5 + 3; + let last = handoff_height as usize; + prop_assert!(blocks.len() > last, "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + // Stop the fast sync two blocks below the handoff, so the tip is inside the + // frozen region and there is room for the hole at `stop + 1` (still below the + // handoff, where the real frontier would have been written). + let stop = (handoff_height - 2) as usize; + let hole = stop + 1; + prop_assert!(seed < stop && hole < last, "the hole must sit inside the frozen fast range"); + + // Legacy golden pass over [0, last]: the per-block fixture for the fast range + // and the real final frontiers at the handoff (needed to configure fast mode). + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + let mut handoff_trees = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct reopen legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + if i == last { + handoff_trees = Some(trees); + } + } + let handoff_trees = handoff_trees.expect("committed the handoff block"); + + // A persistent database so the syncing handle can be dropped and reopened by + // path, modelling a node restart. Archive storage mode (the default): fast sync + // is the default under checkpoint sync, and a fast-synced database reopens fine + // in archive mode, exactly as in production. + let dir = TempDir::new().expect("temp dir"); + let config = Config { + cache_dir: dir.path().to_path_buf(), + ephemeral: false, + ..Config::default() + }; + + // Session 1: a genesis-start fast sync interrupted at `stop`, two blocks below + // the handoff. The fast commits write the fast-sync marker but no per-height + // trees, so the on-disk frontier is frozen and the tip is below the handoff. + { + let mut fast = FinalizedState::new(&config, &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source_with_handoff( + &mut fast, + fixture.clone(), + Height(handoff_height), + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + for i in 0..=stop { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct reopen fast") + .expect("verified fast commit succeeds"); + } + prop_assert_eq!(fast.vct_fast_synced_below(), Some(Height(handoff_height)), "the interrupted sync left the fast-sync marker"); + prop_assert_eq!(fast.db.finalized_tip_height(), Some(Height(stop as u32)), "the tip is parked below the handoff"); + // Drop releases the database lock for the reopen below. + } + + // Session 2 (restart): reopen the same database, then punch a hole at the next + // height (a peer that omitted it, or a root evicted after failing verification). + let mut reopened = FinalizedState::new(&config, &network, #[cfg(feature = "elasticsearch")] false); + prop_assert_eq!(reopened.vct_fast_synced_below(), Some(Height(handoff_height)), "the marker is still durable after reopen"); + + let mut holed = fixture.clone(); + holed.remove(&(hole as u32)); + enable_vct_test_fixture_source_with_handoff( + &mut reopened, + holed, + Height(handoff_height), + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + + // The very first commit of the new session is the hole. No fast block has run + // since the reopen, so the only thing that can arm the guard is the flag seeded + // from the durable marker. Before the fix it came back `false` and this would + // legacy-recompute against the stale frontier; now it refuses. + let cv = CheckpointVerifiedBlock::from(blocks[hole].block.clone()); + let next = Some((blocks[hole + 1].block.clone(), None)); + let error = reopened + .commit_finalized_direct(cv.into(), None, None, next, "vct reopen hole") + .expect_err("a frozen-frontier hole must refuse after reopen, not recompute"); + prop_assert!( + format!("{error:?}").contains("VctSuppliedRootUnavailable"), + "the reopened committer returns the retryable VctSuppliedRootUnavailable, got: {error:?}" + ); + prop_assert_eq!(reopened.db.finalized_tip_height(), Some(Height(stop as u32)), "the refused block left the reopened state untouched"); + + // Retryable: once a verifiable root for the hole is supplied, the same height + // commits and the tip advances — the refusal was a stall, not a permanent wedge. + enable_vct_test_fixture_source_with_handoff( + &mut reopened, + fixture.clone(), + Height(handoff_height), + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + let cv = CheckpointVerifiedBlock::from(blocks[hole].block.clone()); + let next = Some((blocks[hole + 1].block.clone(), None)); + reopened + .commit_finalized_direct(cv.into(), None, None, next, "vct reopen refill") + .expect("the height commits once its root is fetched"); + prop_assert_eq!(reopened.db.finalized_tip_height(), Some(Height(hole as u32)), "the tip advances past the former hole once the root arrives"); + }); + + Ok(()) +} + +/// Verified-commitment-trees checkpoint handoff (merged increments 4+5): a +/// genesis-start fast sync writes the verified final frontier at the handoff +/// height, marks the database fast-synced, guards historical per-height tree reads +/// below the handoff, and leaves the tip treestate (which post-checkpoint semantic +/// verification resumes from) byte-identical to the legacy recompute. +#[test] +#[allow(clippy::needless_range_loop)] // the loops index blocks[i+1] and the fixture by height +fn vct_fast_sync_handoff_marks_database_and_resumes() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(env::var("PROPTEST_CASES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(DEFAULT_PARTIAL_CHAIN_PROPTEST_CASES)), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last, "generated chain unexpectedly short"); + let handoff = Height(last as u32); + + // The fast range is seeded just below Heartwood, so it is authenticated by + // the ZIP-221 MMR (the synthetic chain's pre-Heartwood `FinalSaplingRoot` + // headers are not consistent with the computed trees, so the Sapling-era + // direct-header path can't be exercised here — that rides with the real + // synced node). The handoff is at the tip. + let seed = (heartwood - 1) as usize; + + // Legacy pass over [0, last]: the per-block fixture for the fast range, the + // golden consensus state, and the real final frontiers at the handoff. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + let mut handoff_trees = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + if i == last { + handoff_trees = Some(trees); + } + } + let golden_anchors = legacy.db.vct_anchor_digest(); + let golden_history = legacy.db.history_tree().hash(); + let golden_tip = legacy.db.note_commitment_trees_for_tip(); + let handoff_trees = handoff_trees.expect("committed the handoff block"); + + // Fast genesis-start pass over [0, last], supplying the verified frontiers + // for the handoff at `last`. + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source_with_handoff( + &mut fast, + fixture.clone(), + handoff, + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + prop_assert!(!fast.vct_fast_needs_successor(handoff), "the trusted handoff frontier authenticates the handoff root without a successor"); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = (i < last).then(|| (blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct fast handoff") + .expect("verified fast commit succeeds"); + } + + // The database is marked fast-synced at the handoff height, and the upgrade height is + // genesis: a node that fast-syncs from genesis records `U = 0`, so its whole `[0, H)` + // range is the absent band and every request is served from the index. + prop_assert_eq!(fast.vct_fast_synced_below(), Some(handoff), "fast-sync marker is set to the handoff height"); + prop_assert_eq!(fast.db.vct_upgrade_height(), Some(Height(0)), "genesis fast sync records the upgrade height at genesis"); + + // Consensus state (anchor sets + history root) matches the legacy recompute. + prop_assert_eq!(fast.db.vct_anchor_digest(), golden_anchors, "fast anchors must match legacy"); + prop_assert_eq!(fast.db.history_tree().hash(), golden_history, "fast history must match legacy"); + + // The handoff wrote the real frontier at the checkpoint, so the tip + // treestate that semantic verification resumes from matches legacy. + let fast_tip = fast.db.note_commitment_trees_for_tip(); + prop_assert_eq!(fast_tip.sapling.root(), golden_tip.sapling.root(), "tip sapling frontier must match legacy"); + prop_assert_eq!(fast_tip.orchard.root(), golden_tip.orchard.root(), "tip orchard frontier must match legacy"); + prop_assert_eq!(fast_tip.sprout.root(), golden_tip.sprout.root(), "tip sprout frontier must match legacy"); + + // Historical per-height tree reads below the handoff are unavailable + // (guarded, no panic), while the handoff height itself is present. + prop_assert!(fast.db.sapling_tree_by_height(&Height(last as u32 - 1)).is_none(), "below-handoff sapling tree read is guarded"); + prop_assert!(fast.db.orchard_tree_by_height(&Height(last as u32 - 1)).is_none(), "below-handoff orchard tree read is guarded"); + prop_assert!(fast.db.sapling_tree_by_height(&handoff).is_some(), "handoff sapling tree is present"); + prop_assert!(fast.db.orchard_tree_by_height(&handoff).is_some(), "handoff orchard tree is present"); + + // Root-serving index (design §4): the fast-synced node holds no per-height trees + // below the handoff (asserted just above), yet it must still serve `tree_aux` + // roots for that range so the root-serving fleet does not collapse as nodes + // fast-sync. Those roots come from the compact `commitment_roots_by_height` index + // the fast path persists per block, and they match exactly the roots the + // legacy/archive node derives from its per-height trees. + let below_handoff = Height((seed + 1) as u32)..=Height(last as u32 - 1); + let served = fast.db.commitment_roots_by_height_range(below_handoff.clone()); + let expected = commitment_aux::produce_block_roots(&legacy.db, below_handoff.clone()); + prop_assert!(!served.is_empty(), "a fast-synced node serves below-handoff roots from the index"); + prop_assert_eq!(served, expected.clone(), "index-served roots match the legacy per-height-tree roots"); + + // The same range goes through `serve_block_roots`: with `U = 0` the request starts at + // or above the upgrade height, so it is served entirely from the index — no per-height + // trees (which the fast-synced node lacks below the handoff) are consulted. + prop_assert_eq!(serve_block_roots(&fast.db, below_handoff), expected, "serve_block_roots serves the fast-synced range from the index"); + + // The `z_gettreestate` RPC gate predicate matches the read guard: a + // below-handoff height is unavailable (typed archive-mode error), while the + // handoff height itself is available. + prop_assert!(fast.db.vct_historical_tree_unavailable(HashOrHeight::Height(Height(last as u32 - 1))), "RPC gate: below-handoff treestate is unavailable"); + prop_assert!(!fast.db.vct_historical_tree_unavailable(HashOrHeight::Height(handoff)), "RPC gate: handoff treestate is available"); + + // Negative: a peer can supply a wrong root exactly at the handoff height, + // where there is no buffered checkpoint successor to authenticate it. The + // final embedded frontier still binds the expected root, so the committer + // must reject and retry instead of panicking or writing a bad handoff. + let mut bad_handoff_fixture = fixture.clone(); + let bad_handoff_entry = bad_handoff_fixture + .get_mut(&(last as u32)) + .expect("fixture contains the handoff root"); + prop_assert_ne!(bad_handoff_entry.0, Default::default(), "a post-NU5 handoff block must have a non-empty Sapling root"); + bad_handoff_entry.0 = Default::default(); + + let mut bad_handoff = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source_with_handoff( + &mut bad_handoff, + bad_handoff_fixture, + handoff, + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + + let mut error_height = None; + let mut handoff_error = None; + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = (i < last).then(|| (blocks[i + 1].block.clone(), None)); + match bad_handoff.commit_finalized_direct(cv.into(), None, None, next, "vct bad handoff") { + Ok(_) => {} + Err(error) => { + error_height = Some(i); + handoff_error = Some(error); + break; + } + } + } + prop_assert_eq!(error_height, Some(last), "the bad handoff root is rejected at the handoff height"); + let handoff_error = handoff_error.expect("the bad handoff root failed"); + prop_assert!( + format!("{handoff_error:?}").contains("VctSuppliedRootUnavailable"), + "a bad handoff root returns the retryable VctSuppliedRootUnavailable error, got: {handoff_error:?}" + ); + prop_assert_eq!( + bad_handoff.db.finalized_tip_height(), + Some(Height(last as u32 - 1)), + "the refused handoff block left state untouched" + ); + }); + + Ok(()) +} + +/// Switching between the rollout fast path and the manual recompute path is safe at the +/// committed-state boundaries: after the handoff writes the real frontier, legacy recompute can +/// resume from that frontier; before any fast commit has frozen the frontier, a later fast sync +/// can consume verified roots for future heights. +#[test] +#[allow(clippy::needless_range_loop)] // the loops index blocks[i+1] and the fixture by height +fn vct_mode_switches_continue_from_safe_boundaries() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let handoff_index = (nu5 + 3) as usize; + let post_handoff_tip = handoff_index + 2; + prop_assert!(blocks.len() > post_handoff_tip, "generated chain unexpectedly short"); + let handoff = Height(handoff_index as u32); + let seed = (heartwood - 1) as usize; + + // Legacy golden pass over the full range: source fast roots and final frontiers, then + // compare both switching scenarios against this byte-identical manual recompute. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + let mut handoff_trees = None; + let mut post_handoff_roots = None; + for i in 0..=post_handoff_tip { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct switch legacy") + .unwrap(); + if i > seed && i <= handoff_index { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + if i == handoff_index { + handoff_trees = Some(trees); + } else if i == handoff_index + 1 { + post_handoff_roots = Some((trees.sapling.root(), trees.orchard.root())); + } + } + let golden_anchors = legacy.db.vct_anchor_digest(); + let golden_history = legacy.db.history_tree().hash(); + let golden_tip = legacy.db.note_commitment_trees_for_tip(); + let handoff_trees = handoff_trees.expect("committed the handoff block"); + let post_handoff_roots = post_handoff_roots.expect("committed a post-handoff block"); + + // Fast -> manual: complete the fast handoff, reopen with the force-disable knob, and + // keep checkpoint sync enabled while post-handoff blocks recompute from the real + // frontier written at the handoff. + let fast_to_manual_dir = TempDir::new().expect("temp dir"); + let fast_config = Config { + cache_dir: fast_to_manual_dir.path().to_path_buf(), + ephemeral: false, + ..Config::default() + }; + { + let mut fast = FinalizedState::new(&fast_config, &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source_with_handoff( + &mut fast, + fixture.clone(), + handoff, + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + for i in 0..=handoff_index { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = (i < handoff_index).then(|| (blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct switch fast prefix") + .expect("verified fast prefix commits"); + } + prop_assert_eq!(fast.vct_fast_synced_below(), Some(handoff), "fast sync reached the handoff before the switch"); + } + + let manual_config = Config { + disable_vct_fast_sync: true, + ..fast_config + }; + let mut manual = FinalizedState::new(&manual_config, &network, #[cfg(feature = "elasticsearch")] false); + for i in (handoff_index + 1)..=post_handoff_tip { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + manual + .commit_finalized_direct(cv.into(), None, None, None, "vct switch manual suffix") + .expect("manual suffix commits after fast handoff"); + } + let manual_tip = manual.db.note_commitment_trees_for_tip(); + prop_assert_eq!(manual.db.vct_anchor_digest(), golden_anchors, "fast-to-manual anchors match legacy"); + prop_assert_eq!(manual.db.history_tree().hash(), golden_history, "fast-to-manual history matches legacy"); + prop_assert_eq!(manual_tip.sapling.root(), golden_tip.sapling.root(), "fast-to-manual sapling tip matches legacy"); + prop_assert_eq!(manual_tip.orchard.root(), golden_tip.orchard.root(), "fast-to-manual orchard tip matches legacy"); + prop_assert_eq!(manual_tip.sprout.root(), golden_tip.sprout.root(), "fast-to-manual sprout tip matches legacy"); + + // Manual -> fast: commit a prefix with the force-disable knob before any fast block + // can freeze the frontier, then reopen and consume verified roots through the handoff. + let manual_to_fast_dir = TempDir::new().expect("temp dir"); + let manual_prefix_config = Config { + cache_dir: manual_to_fast_dir.path().to_path_buf(), + ephemeral: false, + disable_vct_fast_sync: true, + ..Config::default() + }; + { + let mut manual_prefix = FinalizedState::new(&manual_prefix_config, &network, #[cfg(feature = "elasticsearch")] false); + for i in 0..=seed { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + manual_prefix + .commit_finalized_direct(cv.into(), None, None, None, "vct switch manual prefix") + .expect("manual prefix commits"); + } + } + + let fast_suffix_config = Config { + disable_vct_fast_sync: false, + ..manual_prefix_config + }; + let mut fast_suffix = FinalizedState::new(&fast_suffix_config, &network, #[cfg(feature = "elasticsearch")] false); + let mut guarded_fixture = fixture; + // A stale or over-eager peer cache entry above the handoff must be ignored so + // the committer resumes legacy recompute from the real handoff frontier. + prop_assert_ne!( + post_handoff_roots.0, + Default::default(), + "a post-NU5 post-handoff block must have a non-empty Sapling root", + ); + guarded_fixture.insert( + (handoff_index + 1) as u32, + (Default::default(), post_handoff_roots.1), + ); + enable_vct_test_fixture_source_with_handoff( + &mut fast_suffix, + guarded_fixture, + handoff, + handoff_trees.sapling.clone(), + handoff_trees.orchard.clone(), + handoff_trees.sprout.clone(), + ); + for i in (seed + 1)..=post_handoff_tip { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = (i < post_handoff_tip).then(|| (blocks[i + 1].block.clone(), None)); + fast_suffix + .commit_finalized_direct(cv.into(), None, None, next, "vct switch fast suffix") + .expect("fast suffix commits after manual prefix"); + } + prop_assert_eq!( + fast_suffix.vct_fast_count(), + (handoff_index - seed) as u64, + "an above-handoff cached root must not keep the committer on the fast path", + ); + let fast_suffix_tip = fast_suffix.db.note_commitment_trees_for_tip(); + prop_assert_eq!(fast_suffix.db.vct_anchor_digest(), golden_anchors, "manual-to-fast anchors match legacy"); + prop_assert_eq!(fast_suffix.db.history_tree().hash(), golden_history, "manual-to-fast history matches legacy"); + prop_assert_eq!(fast_suffix_tip.sapling.root(), golden_tip.sapling.root(), "manual-to-fast sapling tip matches legacy"); + prop_assert_eq!(fast_suffix_tip.orchard.root(), golden_tip.orchard.root(), "manual-to-fast orchard tip matches legacy"); + prop_assert_eq!(fast_suffix_tip.sprout.root(), golden_tip.sprout.root(), "manual-to-fast sprout tip matches legacy"); + }); + + Ok(()) +} + +/// Standalone test isolating the verify-before-commit **dedup**: each header +/// commitment is checked once, not twice. +/// +/// - **Skip:** the first fast block runs its own commitment check; the next one +/// is skipped, because the first block's look-ahead already validated it. +/// - **Stale-cache guard:** a cache entry with the right height but the *wrong* +/// hash must not trigger a skip — the guard forces the own check to run, so a +/// stale or mismatched entry can never let an unverified block through. +/// - **Wrapper-hash guard:** a public `CheckpointVerifiedBlock::with_hash` caller +/// cannot replay a stale cached successor hash onto a different block. +#[test] +fn vct_dedup_skips_redundant_check_and_guards_stale_cache() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0 as usize; + + // Seed just before NU5, then operate on four consecutive fast blocks so + // the forged-wrapper regression exercises `hashBlockCommitments`. + let seed = nu5 - 2; + let last = seed + 4; + prop_assert!(blocks.len() > last + 1, "generated chain unexpectedly short"); + + // Legacy pass to record the correct per-block roots as the fixture. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + for (i, prepared) in blocks.iter().take(last + 1).enumerate() { + let cv = CheckpointVerifiedBlock::from(prepared.block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct dedup legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + } + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut fast, fixture); + + // Commit block `i` with its real successor as the one-block look-ahead. + let commit = |fast: &mut FinalizedState, i: usize| { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct dedup fast") + .expect("verified fast commit succeeds"); + }; + + // genesis..=seed take the recompute path (no fixture entries), so the dedup + // never engages here. + for i in 0..=seed { + commit(&mut fast, i); + } + prop_assert_eq!(fast.vct_prevalidated_count(), 0, "no fast blocks committed yet"); + + // First fast block: no cached predecessor, so it runs its own check. + commit(&mut fast, seed + 1); + prop_assert_eq!(fast.vct_prevalidated_count(), 0, "the first fast block runs its own commitment check"); + + // Second fast block: its predecessor's look-ahead already validated it, + // so the own check is skipped — the dedup engages. + commit(&mut fast, seed + 2); + prop_assert_eq!(fast.vct_prevalidated_count(), 1, "the second fast block skips its redundant own commitment check"); + + // Stale-cache guard: overwrite the cache with the correct height but the + // hash of a *different* block. The next commit must NOT skip. + let stale_hash = blocks[seed + 1].hash; + prop_assert_ne!(stale_hash, blocks[seed + 3].hash, "stale hash must differ from the real block"); + fast.vct_prevalidated_next = Some((Height((seed + 3) as u32), stale_hash)); + commit(&mut fast, seed + 3); + prop_assert_eq!(fast.vct_prevalidated_count(), 1, "a stale cache entry (wrong hash) must not cause a false skip"); + + // Public wrapper-hash guard: the stale cache records a real look-ahead + // hash, but a caller-controlled checkpoint wrapper tries to replay that + // hash onto a different block whose own NU5 header commitment is invalid. + // The skip must compare the cache against the wrapped block's real hash, + // not the wrapper hash, so the bad commitment is checked and rejected. + let forged_wrapper_hash = blocks[seed + 2].hash; + let bad_block = blocks[seed + 4].block.clone().set_block_commitment([0x42; 32]); + let bad_block_hash = bad_block.hash(); + prop_assert_ne!( + forged_wrapper_hash, + bad_block_hash, + "the forged wrapper hash must differ from the bad block's real hash", + ); + fast.vct_prevalidated_next = + Some((Height((seed + 4) as u32), forged_wrapper_hash)); + let forged = CheckpointVerifiedBlock::with_hash(bad_block, forged_wrapper_hash); + let error = fast + .commit_finalized_direct(forged.into(), None, None, None, "vct forged wrapper hash") + .expect_err("a forged wrapper hash must not skip the bad block's own commitment check"); + prop_assert!( + format!("{error:?}").contains("VctSuppliedRootUnavailable"), + "the forged wrapper hash path must reject the bad commitment, got: {error:?}", + ); + prop_assert_eq!( + fast.vct_prevalidated_count(), + 1, + "the forged wrapper hash must not increment the prevalidated count", + ); + prop_assert_eq!( + fast.db.finalized_tip_height(), + Some(Height((seed + 3) as u32)), + "the rejected forged block must leave finalized state untouched", + ); + }); + + Ok(()) +} + +/// Clearing a cached VCT successor prevalidation must disarm exactly one possible +/// skip without disabling the normal dedup optimization for future contiguous fast +/// blocks. This covers the write-loop reset/drop behavior indirectly: those paths +/// call `clear_vct_prevalidated_next()` when buffered checkpoint state is discarded. +#[test] +fn vct_clear_prevalidation_cache_disarms_skip_then_dedup_resumes() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0 as usize; + let seed = nu5 - 2; + let last = seed + 5; + prop_assert!(blocks.len() > last + 1, "generated chain unexpectedly short"); + + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let mut fixture = std::collections::HashMap::new(); + for (i, prepared) in blocks.iter().take(last + 1).enumerate() { + let cv = CheckpointVerifiedBlock::from(prepared.block.clone()); + let (_h, trees) = legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct clear legacy") + .unwrap(); + if i > seed { + fixture.insert(i as u32, (trees.sapling.root(), trees.orchard.root())); + } + } + + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + enable_vct_test_fixture_source(&mut fast, fixture); + + let commit = |fast: &mut FinalizedState, i: usize| { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct clear fast") + .expect("verified fast commit succeeds"); + }; + + for i in 0..=seed { + commit(&mut fast, i); + } + commit(&mut fast, seed + 1); + prop_assert_eq!(fast.vct_prevalidated_count(), 0, "first fast block runs its own check"); + + commit(&mut fast, seed + 2); + prop_assert_eq!(fast.vct_prevalidated_count(), 1, "second fast block uses predecessor look-ahead"); + + fast.clear_vct_prevalidated_next(); + commit(&mut fast, seed + 3); + prop_assert_eq!( + fast.vct_prevalidated_count(), + 1, + "clearing the cache forces the next fast block to run its own check", + ); + + commit(&mut fast, seed + 4); + prop_assert_eq!( + fast.vct_prevalidated_count(), + 2, + "normal successor dedup resumes after the cleared block commits", + ); + }); + + Ok(()) +} + +/// Increment-3 contract proof: a roots/frontier payload **produced from a database** +/// (the serving read path) can replace the fixture and drives the fast path to +/// byte-identical consensus state. +/// +/// Builds an archive/legacy state over a generated valid-commitment chain (crossing +/// Heartwood and NU5), produces the per-block roots and final frontier from that DB +/// via [`commitment_aux::produce_block_roots`] / [`commitment_aux::produce_final_frontiers`], +/// then drives a fresh fast-sync state that consumes the produced payload through the +/// test-only [`commitment_aux::FixtureSource`]. Asserts the fast anchors + history-tree hash are +/// byte-identical to the legacy build, and that the produced final frontier agrees with +/// the legacy tip frontier and the produced root at the handoff height. +/// +/// This is coverage the existing equivalence test lacks: there the roots are captured +/// from the committer's inline-returned trees, here they come from the **DB read path** +/// a serving node runs. No networking and no DB-format change. +#[test] +#[allow(clippy::needless_range_loop)] // the loops index blocks[i+1] (the look-ahead) and by height +fn vct_db_produced_payload_round_trips_to_byte_identical_state() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + let last = (nu5 + 3) as usize; + prop_assert!(blocks.len() > last + 1, "generated chain unexpectedly short"); + // Seed below Heartwood so the fast range creates the history tree and + // crosses the NU5 V1->V2 boundary, matching the equivalence test. + let seed = (heartwood - 1) as usize; + + // Legacy/archive pass: a real DB with per-height trees, plus the golden state. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + for block in blocks.iter().take(last + 1) { + let cv = CheckpointVerifiedBlock::from(block.block.clone()); + legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct round-trip legacy") + .unwrap(); + } + let golden_anchors = legacy.db.vct_anchor_digest(); + let golden_history = legacy.db.history_tree().hash(); + + // Produce the payload from the legacy DB's per-height trees (the serving read path). + let last_height = Height(last as u32); + let produced_roots = commitment_aux::produce_block_roots( + &legacy.db, + Height((seed + 1) as u32)..=last_height, + ); + let produced_frontiers = commitment_aux::produce_final_frontiers(&legacy.db, last_height) + .expect("legacy DB has the tip frontier"); + + // The produced final frontier agrees with the legacy tip frontier and with the + // produced root at the handoff height (the two producer outputs are consistent). + let handoff = produced_roots.last().expect("produced a non-empty range"); + prop_assert_eq!(produced_frontiers.sapling.root(), handoff.sapling_root, "produced sapling frontier matches the produced root at handoff"); + prop_assert_eq!(produced_frontiers.orchard.root(), handoff.orchard_root, "produced orchard frontier matches the produced root at handoff"); + prop_assert_eq!(produced_frontiers.sapling.root(), legacy.db.sapling_tree_by_height(&last_height).unwrap().root(), "produced sapling frontier matches legacy tip"); + prop_assert_eq!(produced_frontiers.sprout.root(), legacy.db.sprout_tree_for_tip().root(), "produced sprout frontier matches legacy tip"); + + // Consume the DB-produced roots in a fresh fast-sync state. + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + let produced_roots = produced_roots + .into_iter() + .map(|root| (root.height.0, (root.sapling_root, root.orchard_root))) + .collect(); + fast.enable_vct_fast_source( + Box::new(commitment_aux::FixtureSource::new(produced_roots, None)), + false, + ); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct round-trip fast") + .expect("verified fast commit from DB-produced roots succeeds"); + } + + prop_assert_eq!(fast.db.vct_anchor_digest(), golden_anchors, "fast anchors from DB-produced roots match legacy"); + prop_assert_eq!(fast.db.history_tree().hash(), golden_history, "fast history from DB-produced roots match legacy"); + + // Serving stitch across the upgrade height `U`. Simulate a node that upgraded + // mid-chain: it keeps the full per-height trees (written before the upgrade) but only + // has the serving index from `U` upward. `serve_block_roots` must still return the + // whole requested range as one contiguous run — trees fill `[start, U)`, the index + // fills `[U, end]` — matching the all-trees reference, with no short batch at the + // boundary that would stall the client's minimum-progress check. + let serve_range = Height((seed + 1) as u32)..=last_height; + let all_trees_reference = + commitment_aux::produce_block_roots(&legacy.db, serve_range.clone()); + let upgrade = Height(((seed + 1 + last) / 2) as u32); + prop_assert!( + serve_range.start() < &upgrade && upgrade <= last_height, + "the chosen upgrade height splits the served range" + ); + let mut batch = DiskWriteBatch::new(); + batch.delete_range_commitment_roots_by_height(&legacy.db, &Height(0), &upgrade); + batch.update_vct_upgrade_marker(&legacy.db, upgrade); + legacy + .db + .write_batch(batch) + .expect("simulating a mid-chain upgrade succeeds"); + prop_assert!( + legacy + .db + .commitment_roots_by_height_range(Height(0)..=Height(upgrade.0 - 1)) + .is_empty(), + "the serving index is dropped below the upgrade height" + ); + let stitched = serve_block_roots(&legacy.db, serve_range); + prop_assert_eq!( + stitched, + all_trees_reference, + "serve_block_roots stitches the trees below U with the index at/above U into one gap-free run" + ); + }); + + Ok(()) +} + +/// Verified-commitment-trees consumer half of the `tree_aux` peer source (increment 6a): +/// a [`commitment_aux::PeerSource`] **filled incrementally** by its writer handle (as the +/// driver fills it when root ranges arrive from peers) drives the fast path to +/// byte-identical consensus state. Same harness as the DB-produced round-trip, but the +/// produced roots are inserted into the shared cache in two chunks via +/// [`commitment_aux::PeerSourceWriter`] — proving the fillable, driver-facing source is a +/// drop-in for the fixture. (The network transport that fills it is the rest of 6a.) +#[test] +#[allow(clippy::needless_range_loop)] // the loops index blocks[i+1] (the look-ahead) and by height +fn vct_peer_source_filled_incrementally_drives_byte_identical_state() -> Result<()> { + let _init_guard = zebra_test::init(); + + let network = ParametersBuilder::default() + .with_activation_heights(ConfiguredActivationHeights { + before_overwinter: Some(1), + overwinter: Some(10), + sapling: Some(15), + blossom: Some(20), + heartwood: Some(25), + canopy: Some(30), + nu5: Some(35), + nu6: Some(40), + nu6_1: Some(45), + nu6_2: Some(47), + nu6_3: Some(48), + nu7: Some(50), + }) + .expect("failed to set activation heights") + .extend_funding_streams() + .to_network() + .expect("failed to build configured network"); + let ledger_strategy = + LedgerState::genesis_strategy(Some(network), None::, None, false); + + proptest!(ProptestConfig::with_cases(1), + |((chain, _count, network, _history_tree) in PreparedChain::default().with_ledger_strategy(ledger_strategy.clone()).with_valid_commitments().no_shrink())| { + + let blocks: Vec<_> = chain.iter().collect(); + let nu5 = NetworkUpgrade::Nu5.activation_height(&network).unwrap().0; + let heartwood = NetworkUpgrade::Heartwood.activation_height(&network).unwrap().0; + // The untrusted peer source defers any fast block whose own root has no buffered + // successor, so every committed fast block needs `blocks[i + 1]`. Keep `last` one + // below the chain tip so the deepest commit still has a successor witness. + let last = ((nu5 + 3) as usize).min(blocks.len().saturating_sub(2)); + prop_assert!(last > (nu5 as usize), "generated chain unexpectedly short"); + let seed = (heartwood - 1) as usize; + + // Legacy/archive pass: a real DB with per-height trees, plus the golden state. + let mut legacy = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + for block in blocks.iter().take(last + 1) { + let cv = CheckpointVerifiedBlock::from(block.block.clone()); + legacy + .commit_finalized_direct(cv.into(), None, None, None, "vct peer-source legacy") + .unwrap(); + } + let golden_anchors = legacy.db.vct_anchor_digest(); + let golden_history = legacy.db.history_tree().hash(); + + // Produce the payload from the legacy DB (the serving read path). + let produced_roots = commitment_aux::produce_block_roots( + &legacy.db, + Height((seed + 1) as u32)..=Height(last as u32), + ); + + // Fill the peer source incrementally via its writer, in two chunks, as the + // driver would when successive root ranges arrive from a peer. + let (peer_source, writer) = commitment_aux::PeerSource::new(None); + let split = produced_roots.len() / 2; + writer.insert_roots(produced_roots[..split].iter().cloned()); + writer.insert_roots(produced_roots[split..].iter().cloned()); + + // Consume the peer-source-supplied roots in a fresh fast-sync state. Each fast + // block is committed with its successor buffered, as the write loop does — the + // untrusted source defers a tip commit with no successor (covered by + // `vct_peer_source_defers_unverifiable_tip_root_until_successor`). + let mut fast = FinalizedState::new(&Config::ephemeral(), &network, #[cfg(feature = "elasticsearch")] false); + fast.enable_vct_fast_source(Box::new(peer_source), true); + for i in 0..=last { + let cv = CheckpointVerifiedBlock::from(blocks[i].block.clone()); + let next = Some((blocks[i + 1].block.clone(), None)); + fast.commit_finalized_direct(cv.into(), None, None, next, "vct peer-source fast") + .expect("verified fast commit from peer-source roots succeeds"); + } + + prop_assert_eq!(fast.db.vct_anchor_digest(), golden_anchors, "fast anchors from peer-source roots match legacy"); + prop_assert_eq!(fast.db.history_tree().hash(), golden_history, "fast history from peer-source roots match legacy"); + }); + + Ok(()) +} diff --git a/zebra-state/src/service/finalized_state/tests/rollback.rs b/zebra-state/src/service/finalized_state/tests/rollback.rs index 775e6b2b0ee..9488818978a 100644 --- a/zebra-state/src/service/finalized_state/tests/rollback.rs +++ b/zebra-state/src/service/finalized_state/tests/rollback.rs @@ -79,7 +79,13 @@ fn sync_to(config: &Config, network: &Network, blocks: &[SemanticallyVerifiedBlo for block in blocks { let checkpoint_verified = CheckpointVerifiedBlock::from(block.block.clone()); state - .commit_finalized_direct(checkpoint_verified.into(), None, "rollback test") + .commit_finalized_direct( + checkpoint_verified.into(), + None, + None, + None, + "rollback test", + ) .expect("committing a generated block to a fresh state succeeds"); } } diff --git a/zebra-state/src/service/finalized_state/tests/transparent.rs b/zebra-state/src/service/finalized_state/tests/transparent.rs index d6ca53b36f5..c10689d1868 100644 --- a/zebra-state/src/service/finalized_state/tests/transparent.rs +++ b/zebra-state/src/service/finalized_state/tests/transparent.rs @@ -128,6 +128,7 @@ fn intra_block_self_spend_chain_in_finalized_state() { new_outputs, transaction_hashes, deferred_pool_balance_change: None, + auth_data_root: None, }; let finalized = FinalizedBlock::from_checkpoint_verified( CheckpointVerifiedBlock(semantically_verified), diff --git a/zebra-state/src/service/finalized_state/vct.rs b/zebra-state/src/service/finalized_state/vct.rs new file mode 100644 index 00000000000..1c5d57fdd4d --- /dev/null +++ b/zebra-state/src/service/finalized_state/vct.rs @@ -0,0 +1,676 @@ +//! Verified-commitment-trees fast-sync experiment state (POC harness). +//! +//! This module holds the embedded-frontier plumbing and run counters for the +//! verified-commitment-trees fast path. On networks with an embedded handoff frontier, +//! the default source is the peer `tree_aux` source. `checkpoint_sync = false` or +//! `consensus.disable_vct_fast_sync = true` selects legacy recompute. +//! +//! [`super`] (`finalized_state.rs`) holds only the commit-path hook (the checkpoint +//! handoff write and the fast-sync marker); everything about *where the data comes +//! from* lives here, behind a small method API so the commit path never touches the +//! experiment's internals. + +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + +use thiserror::Error; +#[cfg(test)] +use zebra_chain::parallel::tree::NoteCommitmentTrees; +use zebra_chain::{ + block, orchard, + parameters::{Network, NetworkUpgrade}, + sapling, sprout, +}; + +use super::{ + commitment_aux::{CommitmentRootSource, FinalFrontiers, PeerSource}, + ZebraDb, +}; + +/// Embedded verified final note-commitment frontiers for Mainnet. +const MAINNET_FINAL_FRONTIERS: &[u8] = include_bytes!("vct/mainnet-frontier.bin"); + +/// Errors validating serialized VCT final-frontier bytes. +#[derive(Clone, Debug, Eq, Error, PartialEq)] +pub enum FinalFrontiersValidationError { + /// The bytes could not be parsed as [`FinalFrontiers`]. + #[error("invalid VCT final frontier bytes: {error}")] + InvalidBytes { + /// The parser error message. + error: String, + }, + + /// The serialized frontier height does not match the expected checkpoint handoff height. + #[error("embedded VCT final frontier height must match the network's max checkpoint height")] + HeightMismatch { + /// Height encoded in the serialized frontier. + actual: block::Height, + /// Expected checkpoint handoff height. + expected: block::Height, + }, +} + +/// POC state for the verified-commitment-trees experiment +/// (`docs/design/verified-commitment-trees.md`). Shared across +/// [`super::FinalizedState`] clones via `Arc` so the counters are shared. +/// +/// A checkpoint-trusting sync (`checkpoint_sync = true`) uses the peer `tree_aux` source by +/// default on networks with embedded final frontiers; `checkpoint_sync = false` or +/// `disable_vct_fast_sync = true` opts out to the legacy per-block recompute (no VCT state). +#[derive(Debug)] +pub(crate) struct VctState { + /// Fast mode: skip the per-block frontier recompute and fold the source's roots + /// into the anchor set + history tree. + fast: bool, + /// Where the verified per-block roots and handoff frontiers come from. The + /// committer reads roots/handoff/frontiers through this seam only. + source: Box, + /// Whether roots from this VCT state must be confirmed against a buffered successor + /// before they are committed. + requires_verified_successor: bool, + /// Count of blocks that took the fast (skip-recompute) path, for the run summary. + fast_count: AtomicU64, + /// Count of fast blocks whose own commitment check was skipped because the + /// previous block's look-ahead already validated it (the dedup). Lets tests + /// assert the dedup actually engages, so it can't be silently regressed. + prevalidated_count: AtomicU64, +} + +/// Which commitment-root source the committer uses, resolved from the (already read) +/// configuration signals. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SourceMode { + /// Legacy recompute committer (no VCT state). + Legacy, + /// Fetch per-block roots from peers — the default where embedded frontiers exist. + Peer, +} + +/// Resolve the source mode as a pure function, so the peer-source default is +/// unit-testable without touching embedded-frontier files. The fast verified path +/// (peer source) is the default whenever the node syncs under checkpoint trust and +/// the network has an embedded handoff frontier. `checkpoint_sync = false` or +/// `disable_vct_fast_sync = true` selects the legacy recompute; a network with no embedded +/// frontier also falls back to legacy. Storage mode (Archive vs. Pruned) is orthogonal and not +/// an input here. +fn select_source_mode( + checkpoint_sync: bool, + disable_vct_fast_sync: bool, + has_embedded_frontiers: bool, +) -> SourceMode { + if !checkpoint_sync || disable_vct_fast_sync || !has_embedded_frontiers { + SourceMode::Legacy + } else { + SourceMode::Peer + } +} + +impl VctState { + /// Build the committer state from `checkpoint_sync` (the mirror of + /// `consensus.checkpoint_sync`) and the `disable_vct_fast_sync` force-disable knob. + /// On networks with an embedded handoff frontier (Mainnet) a checkpoint-trusting sync + /// defaults to the peer (`tree_aux`) fast source; disabling checkpoint sync, setting the + /// force-disable knob, or using a network without an embedded frontier returns `None` for a + /// zero-overhead legacy committer that recomputes the trees per block. + pub(super) fn from_config( + checkpoint_sync: bool, + disable_vct_fast_sync: bool, + network: &Network, + db: ZebraDb, + ) -> Option> { + // Parse the embedded handoff frontier once (None on networks without one, e.g. + // Testnet). The decision below only needs its presence; the peer arm reuses the + // parsed value. + let embedded = embedded_final_frontiers(network); + + match select_source_mode(checkpoint_sync, disable_vct_fast_sync, embedded.is_some()) { + // Default: the peer (`tree_aux`) source on any network with embedded final + // frontiers (Mainnet). Per-block roots arrive from peers into a shared cache + // filled by the driver; the committer reads them per height and folds them in, + // skipping the recompute. A height the peer cannot supply — or any node with no + // serving peers — stays in legacy mode, bit-identical to a legacy committer by + // construction (the precompute overlap is preserved for those blocks; see + // `vct_fast_will_apply`). + SourceMode::Peer => { + let parsed = embedded?; + tracing::info!( + handoff_height = parsed.height.0, + "VCT: peer (tree_aux) source enabled by default — roots fetched from peers" + ); + let source = PeerSource::new_with_db(db, Some(parsed)); + Some(Arc::new(VctState { + fast: true, + source: Box::new(source), + requires_verified_successor: true, + fast_count: AtomicU64::new(0), + prevalidated_count: AtomicU64::new(0), + })) + } + + // Legacy committer: full per-block recompute when checkpoint sync is disabled, the + // force-disable knob is set, or the network has no embedded frontiers. No VCT state, + // zero overhead. + SourceMode::Legacy => None, + } + } + + /// `true` when the fast (skip-recompute) path is active. + pub(super) fn is_fast(&self) -> bool { + self.fast + } + + /// The supplied roots for `height`, when vct mode has a source entry for it + /// (the signal that this block takes the fast path). + pub(super) fn vct_roots_at_height( + &self, + height: block::Height, + ) -> Option<(sapling::tree::Root, orchard::tree::Root)> { + if !self.fast { + return None; + } + + if self + .source + .vct_last_checkpoint_height() + .is_some_and(|handoff| height > handoff) + { + return None; + } + + self.source.vct_root(height) + } + + /// `true` when committing `height` on the vct path needs a buffered successor before + /// it can safely persist this block's supplied roots. + /// + /// Only untrusted peer-supplied roots at or above Heartwood require this. The + /// checkpoint handoff is exempt because its embedded final frontiers are verified + /// against this block's roots before the real tip treestate is written; trusted + /// local fixtures can commit their tip root on the in-arrears check. + pub(super) fn vct_root_needs_successor( + &self, + height: block::Height, + network: &Network, + ) -> bool { + self.fast + && self.vct_roots_at_height(height).is_some() + && self.requires_verified_successor + && self + .source + .final_frontiers() + .is_none_or(|frontiers| frontiers.height != height) + && Some(height) >= NetworkUpgrade::Heartwood.activation_height(network) + } + + /// Discard the supplied root for `height` after it failed verification, so a re-fetch + /// can replace it. See + /// [`CommitmentRootSource::invalidate`](super::commitment_aux::CommitmentRootSource::invalidate). + pub(super) fn invalidate_fast_root(&self, height: block::Height) { + self.source.invalidate(height); + } + + /// Discard peer-supplied roots that are no longer needed after `height` has committed. + pub(super) fn evict_committed_roots_through(&self, height: block::Height) { + self.source.evict_committed_through(height); + } + + /// The checkpoint handoff height: the boundary below which the fast path skips + /// per-height note-commitment trees. `None` unless final frontiers are loaded. + pub(super) fn vct_sync_last_checkpoint_height(&self) -> Option { + self.source.vct_last_checkpoint_height() + } + + /// The verified `(sapling, orchard, sprout)` frontiers to write as the tip + /// treestate, when `height` is the checkpoint handoff height. + #[allow(clippy::type_complexity)] + pub(super) fn final_frontiers_for_last_checkpoint( + &self, + height: block::Height, + ) -> Option<( + Arc, + Arc, + Arc, + )> { + self.source + .final_frontiers() + .filter(|f| f.height == height) + .map(|f| (f.sapling.clone(), f.orchard.clone(), f.sprout.clone())) + } + + /// Record that a block took the fast (skip-recompute) path. + pub(super) fn record_fast_block(&self) { + self.fast_count.fetch_add(1, Ordering::Relaxed); + } + + /// Record a fast block whose own commitment check was skipped by the dedup. + pub(super) fn record_prevalidated(&self) { + self.prevalidated_count.fetch_add(1, Ordering::Relaxed); + } + + /// Number of blocks that took the fast path so far. + pub(super) fn fast_count(&self) -> u64 { + self.fast_count.load(Ordering::Relaxed) + } + + /// Number of fast blocks whose own commitment check the dedup skipped. + #[cfg(test)] + pub(super) fn prevalidated_count(&self) -> u64 { + self.prevalidated_count.load(Ordering::Relaxed) + } + + /// Test-only: build fast-mode state from an arbitrary commitment-root source + /// (e.g. a payload produced from a database), so the producer→consumer round-trip + /// can be exercised without networking. + #[cfg(test)] + pub(super) fn test_with_source( + source: Box, + requires_verified_successor: bool, + ) -> Arc { + Arc::new(VctState { + fast: true, + source, + requires_verified_successor, + fast_count: AtomicU64::new(0), + prevalidated_count: AtomicU64::new(0), + }) + } +} + +/// The verified final frontiers embedded for `network`, if supported. +/// +/// Mainnet uses the constant embedded in the binary. Regtest has no fixed checkpoint — +/// its checkpoint list is derived at runtime from the mined chain — so there is no +/// committed frontier to embed; for deterministic e2e/integration testing of the fast +/// path on Regtest, the frontier is instead loaded from the file named by the +/// `VCT_REGTEST_FRONTIER` env var. This is scoped to **Regtest only** and validated +/// against the configured Regtest checkpoint height, so Mainnet always uses the +/// embedded constant and never reads the env. Other testnets have no frontier. +fn embedded_final_frontiers(network: &Network) -> Option { + match network { + Network::Mainnet => Some(parse_embedded_final_frontiers( + MAINNET_FINAL_FRONTIERS, + network.checkpoint_list().max_height(), + )), + Network::Testnet(params) if params.is_regtest() => { + let path = std::env::var_os("VCT_REGTEST_FRONTIER")?; + Some(load_frontier_file( + path.as_ref(), + network.checkpoint_list().max_height(), + )) + } + Network::Testnet(_) => None, + } +} + +/// Load and validate a final-frontier fixture file (the Regtest path; see +/// [`embedded_final_frontiers`]). Separated from the env read so it is unit-testable +/// without mutating process environment variables. +fn load_frontier_file(path: &std::ffi::OsStr, expected_height: block::Height) -> FinalFrontiers { + let bytes = + std::fs::read(path).expect("VCT_REGTEST_FRONTIER must name a readable final-frontier file"); + parse_embedded_final_frontiers(&bytes, expected_height) +} + +/// Parse embedded final frontiers and verify they match the checkpoint list. +fn parse_embedded_final_frontiers(bytes: &[u8], expected_height: block::Height) -> FinalFrontiers { + parse_final_frontiers_bytes(bytes, expected_height).unwrap_or_else(|error| panic!("{error}")) +} + +fn parse_final_frontiers_bytes( + bytes: &[u8], + expected_height: block::Height, +) -> Result { + let parsed = FinalFrontiers::from_bytes(bytes).map_err(|error| { + FinalFrontiersValidationError::InvalidBytes { + error: error.to_string(), + } + })?; + + if parsed.height != expected_height { + return Err(FinalFrontiersValidationError::HeightMismatch { + actual: parsed.height, + expected: expected_height, + }); + } + + Ok(parsed) +} + +/// Validate serialized VCT final-frontier bytes against an expected checkpoint handoff height. +pub fn validate_final_frontiers_bytes( + bytes: &[u8], + expected_height: block::Height, +) -> Result<(), FinalFrontiersValidationError> { + parse_final_frontiers_bytes(bytes, expected_height).map(|_| ()) +} + +/// Test/developer helper for producing embedded final-frontier bytes from a +/// legacy-computed tip treestate. +#[cfg(test)] +fn final_frontiers_bytes(height: block::Height, trees: &NoteCommitmentTrees) -> Vec { + FinalFrontiers { + height, + sapling: trees.sapling.clone(), + orchard: trees.orchard.clone(), + sprout: trees.sprout.clone(), + } + .to_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + const EXPECTED_MAINNET_FINAL_SAPLING_ROOT: [u8; 32] = [ + 5, 88, 219, 64, 134, 21, 57, 124, 234, 59, 83, 8, 7, 143, 19, 29, 247, 58, 105, 80, 119, + 139, 242, 243, 206, 137, 211, 94, 151, 126, 154, 13, + ]; + const EXPECTED_MAINNET_FINAL_ORCHARD_ROOT: [u8; 32] = [ + 177, 173, 139, 203, 63, 186, 47, 172, 148, 107, 150, 204, 211, 212, 33, 155, 172, 108, 132, + 148, 70, 210, 120, 97, 219, 160, 58, 242, 198, 124, 44, 3, + ]; + const EXPECTED_MAINNET_FINAL_SPROUT_ROOT: [u8; 32] = [ + 77, 239, 224, 205, 90, 67, 51, 216, 15, 139, 120, 78, 55, 17, 177, 22, 246, 34, 206, 184, + 49, 7, 97, 172, 28, 178, 69, 208, 13, 101, 55, 169, + ]; + + #[test] + fn source_mode_precedence() { + use SourceMode::*; + // Args are (checkpoint_sync, disable_vct_fast_sync, has_embedded_frontiers). + + // The default: a checkpoint-trusting sync uses the peer source wherever embedded + // frontiers exist (Mainnet). Storage mode (Archive/Pruned) is not an input, so this + // covers both Archive and Pruned. + assert_eq!(select_source_mode(true, false, true), Peer); + // `disable_vct_fast_sync = true` keeps checkpoint sync on but forces the legacy + // recompute, regardless of embedded frontiers. + assert_eq!(select_source_mode(true, true, true), Legacy); + assert_eq!(select_source_mode(true, true, false), Legacy); + // `checkpoint_sync = false` also fully recomputes the trees: legacy, never peer, + // regardless of the force-disable knob or embedded frontiers. + assert_eq!(select_source_mode(false, false, true), Legacy); + assert_eq!(select_source_mode(false, false, false), Legacy); + assert_eq!(select_source_mode(false, true, true), Legacy); + assert_eq!(select_source_mode(false, true, false), Legacy); + // No embedded frontiers (e.g. Testnet): legacy, never peer, even under checkpoint sync. + assert_eq!(select_source_mode(true, false, false), Legacy); + } + + #[test] + fn successor_policy_is_vct_state_data() { + let network = Network::Mainnet; + let height = NetworkUpgrade::Heartwood + .activation_height(&network) + .expect("mainnet has a Heartwood activation height"); + let root_map = + || std::iter::once((height.0, (Default::default(), Default::default()))).collect(); + + let trusted = VctState::test_with_source( + Box::new(super::super::commitment_aux::FixtureSource::new( + root_map(), + None, + )), + false, + ); + assert!( + !trusted.vct_root_needs_successor(height, &network), + "trusted fixture roots can commit without a buffered successor" + ); + + let untrusted = VctState::test_with_source( + Box::new(super::super::commitment_aux::FixtureSource::new( + root_map(), + None, + )), + true, + ); + assert!( + untrusted.vct_root_needs_successor(height, &network), + "untrusted roots defer until a buffered successor verifies them" + ); + } + + #[test] + fn vct_root_is_bounded_by_handoff_height() { + let handoff = block::Height(10); + let after_handoff = (handoff + 1).expect("test height is valid"); + let roots = std::collections::HashMap::from([ + (handoff.0, (Default::default(), Default::default())), + (after_handoff.0, (Default::default(), Default::default())), + ]); + let frontiers = FinalFrontiers { + height: handoff, + sapling: Arc::new(sapling::tree::NoteCommitmentTree::default()), + orchard: Arc::new(orchard::tree::NoteCommitmentTree::default()), + sprout: Arc::new(sprout::tree::NoteCommitmentTree::default()), + }; + + let bounded = VctState::test_with_source( + Box::new(super::super::commitment_aux::FixtureSource::new( + roots.clone(), + Some(frontiers), + )), + false, + ); + assert!( + bounded.vct_roots_at_height(handoff).is_some(), + "the handoff root remains fast-path eligible" + ); + assert!( + bounded.vct_roots_at_height(after_handoff).is_none(), + "roots above the handoff are ignored" + ); + + let unbounded = VctState::test_with_source( + Box::new(super::super::commitment_aux::FixtureSource::new( + roots, None, + )), + false, + ); + assert!( + unbounded.vct_roots_at_height(after_handoff).is_some(), + "sources without a handoff keep the existing fixture behavior" + ); + } + + #[test] + fn embedded_mainnet_final_frontiers_parse() { + let frontiers = embedded_final_frontiers(&Network::Mainnet) + .expect("mainnet has embedded final frontiers"); + + assert_eq!( + frontiers.height, + Network::Mainnet.checkpoint_list().max_height(), + "embedded frontier is tied to the last mainnet checkpoint" + ); + assert_eq!( + <[u8; 32]>::from(frontiers.sapling.root()), + EXPECTED_MAINNET_FINAL_SAPLING_ROOT, + "embedded mainnet final Sapling frontier root is pinned" + ); + assert_eq!( + <[u8; 32]>::from(frontiers.orchard.root()), + EXPECTED_MAINNET_FINAL_ORCHARD_ROOT, + "embedded mainnet final Orchard frontier root is pinned" + ); + assert_eq!( + <[u8; 32]>::from(frontiers.sprout.root()), + EXPECTED_MAINNET_FINAL_SPROUT_ROOT, + "embedded mainnet final Sprout frontier root is pinned" + ); + } + + #[test] + fn final_frontiers_capture_helper_serializes_tip_trees() { + let height = block::Height(3_358_006); + let trees = NoteCommitmentTrees::default(); + + let parsed = FinalFrontiers::from_bytes(&final_frontiers_bytes(height, &trees)) + .expect("captured final frontiers should parse"); + + assert_eq!(parsed.height, height, "captured height round-trips"); + assert_eq!( + parsed.sapling.root(), + trees.sapling.root(), + "captured sapling frontier round-trips" + ); + assert_eq!( + parsed.orchard.root(), + trees.orchard.root(), + "captured orchard frontier round-trips" + ); + assert_eq!( + parsed.sprout.root(), + trees.sprout.root(), + "captured sprout frontier round-trips" + ); + } + + #[test] + #[should_panic(expected = "embedded VCT final frontier height must match")] + fn embedded_final_frontiers_reject_checkpoint_height_mismatch() { + let frontiers = FinalFrontiers { + height: block::Height(1), + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + }; + + let _ = parse_embedded_final_frontiers(&frontiers.to_bytes(), block::Height(2)); + } + + #[test] + fn final_frontiers_parser_rejects_short_height() { + let error = + FinalFrontiers::from_bytes(&[0, 1, 2]).expect_err("short height should be rejected"); + + assert_eq!( + error.to_string(), + "missing final frontier height: expected 4 bytes, got 3" + ); + } + + #[test] + fn final_frontiers_parser_rejects_missing_tree_length() { + let bytes = block::Height(1).0.to_le_bytes(); + + let error = + FinalFrontiers::from_bytes(&bytes).expect_err("missing length should be rejected"); + + assert_eq!( + error.to_string(), + "missing sapling frontier length prefix at byte 4: expected 4 bytes, got 0" + ); + } + + #[test] + fn final_frontiers_parser_rejects_truncated_tree_blob() { + let mut bytes = block::Height(1).0.to_le_bytes().to_vec(); + bytes.extend_from_slice(&3u32.to_le_bytes()); + bytes.extend_from_slice(&[0, 1]); + + let error = + FinalFrontiers::from_bytes(&bytes).expect_err("truncated blob should be rejected"); + + assert_eq!( + error.to_string(), + "truncated sapling frontier blob at byte 8: length prefix says 3 bytes, but only 2 remain" + ); + } + + #[test] + fn final_frontiers_parser_rejects_trailing_bytes() { + let bytes = FinalFrontiers { + height: block::Height(1), + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + } + .to_bytes() + .into_iter() + .chain([0]) + .collect::>(); + + let error = + FinalFrontiers::from_bytes(&bytes).expect_err("trailing bytes should be rejected"); + + assert_eq!( + error.to_string(), + format!( + "unexpected trailing final frontier bytes at byte {}: 1 bytes", + bytes.len() - 1 + ) + ); + } + + #[test] + #[should_panic(expected = "invalid VCT final frontier bytes: truncated sapling frontier blob")] + fn embedded_final_frontiers_reject_malformed_bytes_with_context() { + let mut bytes = block::Height(1).0.to_le_bytes().to_vec(); + bytes.extend_from_slice(&3u32.to_le_bytes()); + bytes.extend_from_slice(&[0, 1]); + + let _ = parse_embedded_final_frontiers(&bytes, block::Height(1)); + } + + #[test] + fn embedded_final_frontiers_are_network_specific() { + assert!( + embedded_final_frontiers(&Network::new_default_testnet()).is_none(), + "testnet has no embedded final frontier until VCT fast sync supports it" + ); + } + + /// The Regtest frontier-file loader (the `VCT_REGTEST_FRONTIER` path) round-trips a + /// captured frontier and ties it to the expected checkpoint height — exercising the + /// producer (`to_bytes`) → loader (`load_frontier_file`) seam without env vars. + #[test] + fn load_frontier_file_round_trips_a_captured_frontier() { + let height = block::Height(123); + let bytes = FinalFrontiers { + height, + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + } + .to_bytes(); + + let path = + std::env::temp_dir().join(format!("vct-frontier-load-test-{}.bin", std::process::id())); + std::fs::write(&path, &bytes).expect("write temp frontier file"); + + let loaded = load_frontier_file(path.as_os_str(), height); + assert_eq!(loaded.height, height, "loaded frontier height matches"); + assert_eq!( + loaded.sapling.root(), + sapling::tree::NoteCommitmentTree::default().root(), + "loaded sapling frontier round-trips" + ); + + let _ = std::fs::remove_file(&path); + } + + /// A frontier whose height does not match the checkpoint height is rejected, so a + /// stale/wrong Regtest fixture cannot silently mis-seed the handoff. + #[test] + #[should_panic(expected = "embedded VCT final frontier height must match")] + fn load_frontier_file_rejects_height_mismatch() { + let bytes = FinalFrontiers { + height: block::Height(5), + sapling: Arc::new(Default::default()), + orchard: Arc::new(Default::default()), + sprout: Arc::new(Default::default()), + } + .to_bytes(); + let path = std::env::temp_dir().join(format!( + "vct-frontier-mismatch-test-{}.bin", + std::process::id() + )); + std::fs::write(&path, &bytes).expect("write temp frontier file"); + + let _ = load_frontier_file(path.as_os_str(), block::Height(6)); + } +} diff --git a/zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin b/zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin new file mode 100644 index 00000000000..eec96324fe9 Binary files /dev/null and b/zebra-state/src/service/finalized_state/vct/mainnet-frontier.bin differ diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index c4d86f3ecd0..be72207dfca 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -39,9 +39,10 @@ pub mod metrics; /// preparation work (raw-transaction serialization and block-size summation) is /// run on the rayon pool instead of sequentially. /// -/// Below this, the rayon multi-threading overhead (waking workers, distributing the items, -/// and joining) outweighs the work itself. -/// The value was chosen by benchmarking over the sand-blasting region. +/// Below this, the rayon fork-join cost (waking workers, distributing the items, +/// and joining) outweighs the work itself. The parallel path is a clear win for +/// the large blocks in the heavy shielded region; for the small blocks of the +/// early chain it is pure overhead, so those run sequentially. pub(crate) const PARALLEL_BLOCK_TX_THRESHOLD: usize = 16; /// Minimum number of per-input/per-address database reads a block triggers before @@ -174,11 +175,25 @@ impl ZebraDb { ) } + db.run_blocking_format_repairs(network); db.spawn_format_change(format_change); db } + /// Run synchronous compatibility repairs before background format checks can read the DB. + pub fn run_blocking_format_repairs(&self, network: &Network) { + if self.debug_skip_format_upgrades { + return; + } + + // Repair incompatible stored history-tree bytes before the background + // format-validity check can read and panic on them. Healthy databases are + // a no-op, and read-only/offline-tool opens keep their existing + // skip-upgrade behavior. + rollback::repair_tip_history_tree_if_incompatible(self, network); + } + /// Launch any required format changes or format checks, and store their thread handle. pub fn spawn_format_change(&mut self, format_change: DbFormatChange) { if self.debug_skip_format_upgrades { diff --git a/zebra-state/src/service/finalized_state/zebra_db/block.rs b/zebra-state/src/service/finalized_state/zebra_db/block.rs index 44eeb7aa3a8..d8a9fd7a270 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block.rs @@ -22,7 +22,7 @@ use zebra_chain::{ amount::NonNegative, block::{self, Block, Height}, orchard, - parallel::tree::NoteCommitmentTrees, + parallel::{commitment_aux::BlockCommitmentRoots, tree::NoteCommitmentTrees}, parameters::{Network, GENESIS_PREVIOUS_BLOCK_HASH}, sapling, serialization::{CompactSizeMessage, TrustedPreallocate, ZcashSerialize as _}, @@ -43,10 +43,12 @@ use crate::{ disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk}, disk_format::{ block::TransactionLocation, + shielded::CommitmentRootsByHeight, transparent::{AddressBalanceLocationUpdates, OutputLocation}, }, zebra_db::{metrics::block_precommit_metrics, ZebraDb}, - FromDisk, IntoDisk, RawBytes, PRUNING_METADATA, + FromDisk, IntoDisk, RawBytes, PRUNING_METADATA, VCT_SYNC_METADATA, VCT_UPGRADE_METADATA, + ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT, }, HashOrHeight, }; @@ -512,6 +514,71 @@ impl ZebraDb { self.db.zs_get(&header_by_height, &height) } + /// Returns provisional Zakura header-ahead roots for the contiguous prefix of `range`. + pub fn zakura_header_commitment_roots_by_height_range( + &self, + range: std::ops::RangeInclusive, + ) -> Vec { + let cf = self + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); + let mut roots = Vec::new(); + for height in (range.start().0..=range.end().0).map(Height) { + let Some(value) = self + .db + .zs_get::<_, _, CommitmentRootsByHeight>(&cf, &height) + else { + break; + }; + roots.push(BlockCommitmentRoots { + height, + sapling_root: value.sapling, + orchard_root: value.orchard, + }); + } + roots + } + + /// Persist provisional header-ahead roots supplied by Zakura header sync. + pub fn insert_zakura_header_commitment_roots( + &self, + roots: impl IntoIterator, + ) -> Result<(), rocksdb::Error> { + let cf = self + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); + let mut batch = DiskWriteBatch::new(); + for roots in roots { + batch.zs_insert( + &cf, + roots.height, + CommitmentRootsByHeight { + sapling: roots.sapling_root, + orchard: roots.orchard_root, + }, + ); + } + self.write_batch(batch) + } + + /// Delete provisional header-ahead roots by height. + pub fn delete_zakura_header_commitment_roots( + &self, + heights: impl IntoIterator, + ) -> Result<(), rocksdb::Error> { + let cf = self + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); + let mut batch = DiskWriteBatch::new(); + for height in heights { + batch.zs_delete(&cf, height); + } + self.write_batch(batch) + } + // The header readers below resolve from the consensus header column families // (`hash_by_height` / `height_by_hash` / `block_header_by_height`) *ungated* // by body availability, then fall back to the provisional Zakura frontier. @@ -726,6 +793,72 @@ impl ZebraDb { self.lowest_retained_height().is_some() } + // Verified-commitment-trees fast-sync methods + + /// Returns the checkpoint handoff height `H` of a verified-commitment-trees fast-synced + /// database: the upper (exclusive) bound of the band `[U, H)` in which per-height + /// note-commitment trees are absent. `U` is [`vct_upgrade_height`](Self::vct_upgrade_height). + /// + /// The fast path skips per-height trees only below the handoff; at and above `H`, semantic sync + /// writes them again. (Trees below the upgrade height `U` are also present — written before this + /// binary ran.) Returns `None` if the database was synced normally (per-height trees for every + /// height below the tip). Use [`vct_tree_absent`](Self::vct_tree_absent) to test a single + /// height rather than comparing against this bound directly. + pub fn vct_synced_below(&self) -> Option { + let vct_sync_metadata = self.db.cf_handle(VCT_SYNC_METADATA)?; + self.db.zs_get(&vct_sync_metadata, &()) + } + + /// Returns `true` if the database was built by the verified-commitment-trees + /// path, and therefore lacks per-height note-commitment trees below the + /// handoff height. The missing history is surfaced at the RPC boundary (§9); + /// it does not prevent reopening in any storage mode. + pub fn is_vct_synced(&self) -> bool { + self.vct_synced_below().is_some() + } + + /// Returns the verified-commitment-trees upgrade height `U`: the lowest height this binary + /// committed, equal to the lowest height in the `commitment_roots_by_height` serving index. + /// + /// Written once on the first committed block and never moved (see + /// [`VCT_UPGRADE_METADATA`](crate::service::finalized_state::VCT_UPGRADE_METADATA)). Heights + /// below `U` predate this binary, so they hold per-height trees but no index entry; heights at + /// or above `U` hold an index entry. Returns `None` for a database written before this marker + /// existed (a pre-index archive database), where every height is served from the trees. + pub fn vct_upgrade_height(&self) -> Option { + let vct_upgrade_metadata = self.db.cf_handle(VCT_UPGRADE_METADATA)?; + self.db.zs_get(&vct_upgrade_metadata, &()) + } + + /// Returns `true` if the per-height note-commitment tree at `height` was never written because + /// this is a vct-synced database, i.e. `height` falls in the absent band `[U, H)`. + /// + /// `U` is the upgrade height ([`vct_upgrade_height`](Self::vct_upgrade_height)) and `H` is the + /// checkpoint handoff ([`vct_synced_below`](Self::vct_synced_below)). The fast path skips + /// per-height trees only at and after the upgrade and only below the checkpoint: heights below + /// `U` keep their pre-upgrade trees, and heights at or above `H` get trees again from semantic + /// sync. Returns `false` for a normally-synced database (`H` is `None`). When `H` is set, `U` + /// is too (both are written by the commit path), but `U` defaults to genesis if ever absent, + /// which preserves the original "absent below `H`" behaviour. + pub fn vct_tree_absent(&self, height: Height) -> bool { + let Some(handoff) = self.vct_synced_below() else { + return false; + }; + let upgrade = self.vct_upgrade_height().unwrap_or(Height(0)); + upgrade <= height && height < handoff + } + + /// Returns `true` if `hash_or_height` resolves to a non-tip historical height + /// whose per-height note-commitment tree is unavailable because this is a + /// vct-synced database (the tree within the `[U, H)` absent band was never + /// written). Read-request handlers use this to return an archive-mode error + /// instead of a misleading "not found". + pub fn vct_historical_tree_unavailable(&self, hash_or_height: HashOrHeight) -> bool { + hash_or_height + .height_or_else(|hash| self.height(hash)) + .is_some_and(|height| self.vct_tree_absent(height)) + } + /// Returns the half-open range of block heights `[from, until)` whose raw /// transaction data should be pruned when committing a block at `new_tip`, /// given the configured `retention` window. Returns `None` if there is @@ -799,6 +932,7 @@ impl ZebraDb { /// - Propagates any errors from computing the block's chain value balance change or /// from applying the change to the chain value balance #[allow(clippy::unwrap_in_result)] + #[allow(clippy::too_many_arguments)] pub(in super::super) fn write_block( &mut self, finalized: FinalizedBlock, @@ -806,6 +940,11 @@ impl ZebraDb { network: &Network, source: &str, retention: RetentionPlan, + // When `Some`, skip per-height tree writes and fold these roots into + // the anchor set. + vct_anchor_roots: Option<(sapling::tree::Root, orchard::tree::Root)>, + // When `Some(height)`, mark the database as vct-synced. + vct_sync_below: Option, ) -> Result { let tx_hash_indexes: HashMap = finalized .transaction_hashes @@ -839,35 +978,63 @@ impl ZebraDb { .flat_map(|input| input.outpoint()) .collect(); - let spent_utxos: Vec<(transparent::OutPoint, OutputLocation, transparent::Utxo)> = - if outpoints.len() >= super::PARALLEL_BLOCK_READ_THRESHOLD { - use rayon::prelude::*; - outpoints - .into_par_iter() - .map(|outpoint| { - read_spent_utxo( - self, - finalized.height, - outpoint, - &tx_hash_indexes, - &finalized.new_outputs, - ) - }) - .collect() - } else { - outpoints - .into_iter() - .map(|outpoint| { - read_spent_utxo( - self, - finalized.height, - outpoint, - &tx_hash_indexes, - &finalized.new_outputs, - ) - }) - .collect() - }; + // Serialize the raw transaction bytes for `tx_by_loc` concurrently with the + // spent-UTXO reads. Serialization is CPU-bound while the reads wait on disk, + // so overlapping them keeps the raw-tx serialization off the committer's + // serial critical path. The bytes are handed to `prepare_block_batch`; if + // `None` it serializes inline (e.g. the semantic path). + let store_raw_txs = retention.stores_raw_transactions(); + let db: &ZebraDb = self; + let (spent_utxos, precomputed_raw_txs): ( + Vec<(transparent::OutPoint, OutputLocation, transparent::Utxo)>, + Option>, + ) = rayon::join( + || { + if outpoints.len() >= super::PARALLEL_BLOCK_READ_THRESHOLD { + use rayon::prelude::*; + outpoints + .into_par_iter() + .map(|outpoint| { + read_spent_utxo( + db, + finalized.height, + outpoint, + &tx_hash_indexes, + &finalized.new_outputs, + ) + }) + .collect() + } else { + outpoints + .into_iter() + .map(|outpoint| { + read_spent_utxo( + db, + finalized.height, + outpoint, + &tx_hash_indexes, + &finalized.new_outputs, + ) + }) + .collect() + } + }, + || { + if store_raw_txs { + use rayon::prelude::*; + Some( + finalized + .block + .transactions + .par_iter() + .map(|transaction| RawBytes::new_raw_bytes(transaction.as_bytes())) + .collect(), + ) + } else { + None + } + }, + ); let spent_utxos_by_outpoint: HashMap = spent_utxos @@ -959,7 +1126,10 @@ impl ZebraDb { address_balances, self.finalized_value_pool(), prev_note_commitment_trees, - retention.stores_raw_transactions(), + store_raw_txs, + precomputed_raw_txs, + vct_anchor_roots, + vct_sync_below, )?; // In pruned storage mode, delete raw transaction history that has fallen @@ -1252,6 +1422,32 @@ impl RetentionPlan { } } +#[cfg(test)] +fn inferred_header_range_roots( + zebra_db: &ZebraDb, + anchor: block::Hash, + count: usize, +) -> Result, CommitHeaderRangeError> { + let anchor_height = zebra_db + .header_height(anchor) + .or_else(|| (anchor == zebra_db.network().genesis_hash()).then_some(block::Height(0))) + .unwrap_or(block::Height(0)); + + (0..count) + .map(|index| { + let offset = + u32::try_from(index + 1).map_err(|_| CommitHeaderRangeError::HeightOverflow)?; + let height = (anchor_height + i64::from(offset)) + .ok_or(CommitHeaderRangeError::HeightOverflow)?; + Ok(BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + }) + }) + .collect() +} + impl DiskWriteBatch { // Write block methods @@ -1282,13 +1478,22 @@ impl DiskWriteBatch { value_pool: ValueBalance, prev_note_commitment_trees: Option, store_raw_transactions: bool, + precomputed_raw_txs: Option>, + vct_anchor_roots: Option<(sapling::tree::Root, orchard::tree::Root)>, + vct_sync_below: Option, ) -> Result<(), CommitCheckpointVerifiedError> { // Commit block, transaction, and note commitment tree data. self.prepare_block_header_and_transaction_data_batch( zebra_db, finalized, store_raw_transactions, + precomputed_raw_txs, )?; + let zakura_header_commitment_roots_by_height = zebra_db + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); + self.zs_delete(&zakura_header_commitment_roots_by_height, finalized.height); // The consensus rules are silent on shielded transactions in the genesis block, // because there aren't any in the mainnet or testnet genesis blocks. @@ -1297,7 +1502,13 @@ impl DiskWriteBatch { // // In Zebra we include the nullifiers and note commitments in the genesis block because it simplifies our code. self.prepare_shielded_transaction_batch(zebra_db, finalized); - self.prepare_trees_batch(zebra_db, finalized, prev_note_commitment_trees); + self.prepare_trees_batch( + zebra_db, + finalized, + prev_note_commitment_trees, + vct_anchor_roots, + vct_sync_below, + ); // # Consensus // @@ -1406,6 +1617,7 @@ impl DiskWriteBatch { zebra_db: &ZebraDb, finalized: &FinalizedBlock, store_raw_transactions: bool, + precomputed_raw_txs: Option>, ) -> Result<(), CommitCheckpointVerifiedError> { let db = &zebra_db.db; @@ -1454,13 +1666,19 @@ impl DiskWriteBatch { // Serialize the raw transaction bytes up front: on heavy shielded blocks // this serialization dominates the per-block write cost, and each - // transaction serializes independently. + // transaction serializes independently. The result is byte-identical to + // inserting the transactions directly, because `RawBytes` is stored + // verbatim. The serialized bytes are inserted in height/index order below. // // Only fan out to rayon once the block has enough transactions to amortize - // the multithreading overhead. Small blocks serialize sequentially (see + // the fork-join cost; small blocks serialize sequentially (see // PARALLEL_BLOCK_TX_THRESHOLD). let raw_transactions: Vec = if !store_raw_transactions { Vec::new() + } else if let Some(precomputed) = precomputed_raw_txs { + // Serialized off the committer's critical path (overlapped with the + // spent-UTXO reads in `write_block`); use those bytes directly. + precomputed } else if block.transactions.len() >= super::PARALLEL_BLOCK_TX_THRESHOLD { use rayon::prelude::*; block @@ -1480,7 +1698,8 @@ impl DiskWriteBatch { let transaction_location = TransactionLocation::from_usize(*height, transaction_index); // Commit each transaction's raw bytes only when the storage policy - // keeps historical transaction data for this height. + // keeps historical transaction data for this height (then + // `raw_transactions` holds the pre-serialized bytes in order). if let Some(raw_transaction) = raw_transactions.get(transaction_index) { self.zs_insert(&tx_by_loc, transaction_location, raw_transaction); } @@ -1645,12 +1864,27 @@ impl DiskWriteBatch { } /// Prepare a database batch containing a contextually validated header range. + #[cfg(test)] pub fn prepare_header_range_batch( &mut self, zebra_db: &ZebraDb, anchor: block::Hash, headers: &[Arc], body_sizes: &[u32], + ) -> Result { + let roots = inferred_header_range_roots(zebra_db, anchor, headers.len())?; + self.prepare_header_range_batch_with_roots(zebra_db, anchor, headers, body_sizes, &roots) + } + + /// Prepare a database batch containing a contextually validated header range + /// and one provisional tree-aux root per header. + pub fn prepare_header_range_batch_with_roots( + &mut self, + zebra_db: &ZebraDb, + anchor: block::Hash, + headers: &[Arc], + body_sizes: &[u32], + tree_aux_roots: &[BlockCommitmentRoots], ) -> Result { if headers.is_empty() { return Err(CommitHeaderRangeError::EmptyRange); @@ -1663,6 +1897,13 @@ impl DiskWriteBatch { }); } + if headers.len() != tree_aux_roots.len() { + return Err(CommitHeaderRangeError::TreeAuxRootCountMismatch { + headers: headers.len(), + roots: tree_aux_roots.len(), + }); + } + if headers.len() > MAX_HEADER_SYNC_HEIGHT_RANGE as usize { return Err(CommitHeaderRangeError::RangeTooLong { actual: headers.len(), @@ -1676,6 +1917,10 @@ impl DiskWriteBatch { .db .cf_handle(ZAKURA_HEADER_BODY_SIZE_BY_HEIGHT) .unwrap(); + let roots_by_height = zebra_db + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); let anchor_height = zebra_db .header_height(anchor) @@ -1710,6 +1955,14 @@ impl DiskWriteBatch { .ok_or(CommitHeaderRangeError::HeightOverflow)?; let hash = block::Hash::from(&**header); let body_size = body_sizes[index]; + if let Some(roots) = tree_aux_roots.get(index) { + if roots.height != height { + return Err(CommitHeaderRangeError::TreeAuxRootHeightMismatch { + expected_height: height, + root_height: roots.height, + }); + } + } if let Some(expected) = checkpoints.hash(height) { if expected != hash { @@ -1820,10 +2073,12 @@ impl DiskWriteBatch { self.zs_delete(&hash_by_height, height); self.zs_delete(&header_by_height, height); self.zs_delete(&body_size_by_height, height); + self.zs_delete(&roots_by_height, height); } } - for (height, hash, header, body_size) in validated_headers { + for (index, (height, hash, header, body_size)) in validated_headers.into_iter().enumerate() + { self.zs_insert(&header_by_height, height, header); self.zs_insert(&hash_by_height, height, hash); self.zs_insert(&height_by_hash, hash, height); @@ -1832,6 +2087,17 @@ impl DiskWriteBatch { } else { self.zs_delete(&body_size_by_height, height); } + + if let Some(roots) = tree_aux_roots.get(index) { + self.zs_insert( + &roots_by_height, + height, + CommitmentRootsByHeight { + sapling: roots.sapling_root, + orchard: roots.orchard_root, + }, + ); + } } Ok(block::Hash::from( diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/prune.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/prune.rs index 9bf0a03f4a0..0e63bc5cde5 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/prune.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/prune.rs @@ -49,7 +49,7 @@ fn new_state_with_blocks(config: &Config, network: &Network) -> FinalizedState { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "prune tests") + .commit_finalized_direct(block.into(), None, None, None, "prune tests") .expect("test block is valid"); } @@ -80,7 +80,7 @@ fn new_state_with_checkpoint_retention( .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "checkpoint retention tests") + .commit_finalized_direct(block.into(), None, None, None, "checkpoint retention tests") .expect("test block is valid"); } @@ -351,7 +351,7 @@ fn checkpoint_retention_hands_off_to_online_pruning_at_start() { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "checkpoint handoff tests") + .commit_finalized_direct(block.into(), None, None, None, "checkpoint handoff tests") .expect("test block is valid"); } @@ -386,7 +386,7 @@ fn checkpoint_retention_hands_off_to_online_pruning_at_start() { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "checkpoint handoff tests") + .commit_finalized_direct(block.into(), None, None, None, "checkpoint handoff tests") .expect("handoff block is valid"); let online_prune_until = @@ -630,7 +630,7 @@ fn archive_to_pruned_checkpoint_sync_drains_archive_raw_transactions_before_skip .expect("test data deserializes"); archive_state - .commit_finalized_direct(block.into(), None, "archive phase") + .commit_finalized_direct(block.into(), None, None, None, "archive phase") .expect("archive block is valid"); } @@ -670,7 +670,13 @@ fn archive_to_pruned_checkpoint_sync_drains_archive_raw_transactions_before_skip .expect("test data deserializes"); pruned_state - .commit_finalized_direct(block.into(), None, "archive to pruned checkpoint") + .commit_finalized_direct( + block.into(), + None, + None, + None, + "archive to pruned checkpoint", + ) .expect("checkpoint block is valid"); assert_eq!( @@ -728,7 +734,7 @@ fn archive_backlog_flag_is_recomputed_when_reopening_a_pruned_database() { .expect("test data deserializes"); archive_state - .commit_finalized_direct(block.into(), None, "archive phase") + .commit_finalized_direct(block.into(), None, None, None, "archive phase") .expect("archive block is valid"); } std::mem::drop(archive_state); @@ -761,7 +767,13 @@ fn archive_backlog_flag_is_recomputed_when_reopening_a_pruned_database() { .zcash_deserialize_into() .expect("test data deserializes"); pruned_state - .commit_finalized_direct(block.into(), None, "archive to pruned checkpoint") + .commit_finalized_direct( + block.into(), + None, + None, + None, + "archive to pruned checkpoint", + ) .expect("checkpoint block is valid"); assert_eq!( pruned_state.db.lowest_retained_height(), @@ -842,7 +854,13 @@ fn contextual_commits_keep_raw_transactions_before_checkpoint_retention_start() .zcash_deserialize_into() .expect("genesis test data deserializes"); state - .commit_finalized_direct(genesis.into(), None, "contextual retention tests") + .commit_finalized_direct( + genesis.into(), + None, + None, + None, + "contextual retention tests", + ) .expect("genesis block is valid"); let block: Arc = blocks @@ -858,7 +876,7 @@ fn contextual_commits_keep_raw_transactions_before_checkpoint_retention_start() let finalizable = FinalizableBlock::new(contextually_verified, Treestate::default()); state - .commit_finalized_direct(finalizable, None, "contextual retention tests") + .commit_finalized_direct(finalizable, None, None, None, "contextual retention tests") .expect("contextual block is valid"); assert!( @@ -1110,6 +1128,162 @@ fn reopening_pruned_database_in_archive_mode_panics() { ); } +#[test] +fn reopening_fast_synced_database_in_archive_mode_succeeds() { + let _init_guard = zebra_test::init(); + let network = Mainnet; + + let dir = tempfile::tempdir().expect("temp dir is created"); + let config = Config { + cache_dir: dir.path().to_path_buf(), + ephemeral: false, + ..Config::default() + }; + + // Commit blocks, write the verified-commitment-trees fast-sync marker, then drop + // the handle to release the database lock. + { + let state = new_state_with_blocks(&config, &network); + let mut batch = DiskWriteBatch::new(); + batch.update_vct_sync_marker(&state.db, Height(2)); + state.db.write_batch(batch).expect("marker batch writes"); + } + + // A completed fast-synced database can reopen in archive mode even when the initial-rollout + // force-disable knob selects manual recomputation. Fast sync deletes nothing; the missing + // historical trees are surfaced at the RPC boundary, not by refusing to reopen. + let config = Config { + disable_vct_fast_sync: true, + ..config + }; + let reopened = FinalizedState::new( + &config, + &network, + #[cfg(feature = "elasticsearch")] + false, + ); + + assert_eq!( + reopened.db.vct_synced_below(), + Some(Height(2)), + "the fast-sync marker is preserved across the archive-mode reopen" + ); +} + +#[test] +fn reopening_fast_synced_database_in_pruned_mode_with_vct_disabled_succeeds() { + let _init_guard = zebra_test::init(); + let network = Mainnet; + + let dir = tempfile::tempdir().expect("temp dir is created"); + let config = Config { + cache_dir: dir.path().to_path_buf(), + ephemeral: false, + storage_mode: StorageMode::Pruned(PruningConfig { + tx_retention: MIN_PRUNING_RETENTION, + }), + ..Config::default() + }; + + // Commit blocks, write a completed fast-sync marker below the tip, then drop the handle to + // release the database lock. + { + let state = new_state_with_blocks(&config, &network); + let mut batch = DiskWriteBatch::new(); + batch.update_vct_sync_marker(&state.db, Height(2)); + state.db.write_batch(batch).expect("marker batch writes"); + } + + // Pruning only removes historical raw transaction bytes; it does not make a completed + // fast-sync marker unsafe to reopen with VCT force-disabled. + let config = Config { + disable_vct_fast_sync: true, + ..config + }; + let reopened = FinalizedState::new( + &config, + &network, + #[cfg(feature = "elasticsearch")] + false, + ); + + assert_eq!( + reopened.db.vct_synced_below(), + Some(Height(2)), + "the fast-sync marker is preserved across the pruned-mode reopen" + ); +} + +#[test] +#[should_panic(expected = "interrupted below the checkpoint handoff")] +fn reopening_interrupted_fast_sync_without_a_root_source_panics() { + let _init_guard = zebra_test::init(); + let network = Mainnet; + + let dir = tempfile::tempdir().expect("temp dir is created"); + // `checkpoint_sync = false` selects the legacy committer (no VCT state), so nothing can + // supply the verified roots an interrupted fast sync needs to resume. + let config = Config { + cache_dir: dir.path().to_path_buf(), + ephemeral: false, + checkpoint_sync: false, + ..Config::default() + }; + + // Commit blocks (tip = TEST_BLOCKS), then write a fast-sync marker ABOVE the tip so the + // database looks like an interrupted fast sync (frozen frontier, tip below the handoff). + { + let state = new_state_with_blocks(&config, &network); + let mut batch = DiskWriteBatch::new(); + batch.update_vct_sync_marker(&state.db, Height(100)); + state.db.write_batch(batch).expect("marker batch writes"); + } + + // Reopening with the fast path disabled must refuse: the on-disk frontier is stale and no + // root source exists, so the committer would otherwise stall on every below-handoff block. + let _state = FinalizedState::new( + &config, + &network, + #[cfg(feature = "elasticsearch")] + false, + ); +} + +#[test] +#[should_panic(expected = "interrupted below the checkpoint handoff")] +fn reopening_interrupted_fast_sync_with_vct_disabled_panics() { + let _init_guard = zebra_test::init(); + let network = Mainnet; + + let dir = tempfile::tempdir().expect("temp dir is created"); + // Keep checkpoint sync enabled, but force-disable the VCT source. This should be just as + // unsafe as disabling checkpoint sync when the database is below a durable fast-sync marker. + let config = Config { + cache_dir: dir.path().to_path_buf(), + ephemeral: false, + disable_vct_fast_sync: true, + ..Config::default() + }; + + // Commit blocks (tip = TEST_BLOCKS), then write a fast-sync marker ABOVE the tip so the + // database looks like an interrupted fast sync (frozen frontier, tip below the handoff). + { + let state = new_state_with_blocks(&config, &network); + let mut batch = DiskWriteBatch::new(); + batch.update_vct_sync_marker(&state.db, Height(100)); + state.db.write_batch(batch).expect("marker batch writes"); + } + + // Reopening with the VCT force-disable knob must refuse: the on-disk frontier is stale and + // no root source exists, so the committer would otherwise stall on every below-handoff block. + let _state = FinalizedState::new( + &config, + &network, + #[cfg(feature = "elasticsearch")] + false, + ); +} + #[test] fn validate_storage_mode_enforces_retention_floor() { let pruned = |tx_retention| Config { diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs index 82a214099c6..7efcde8250e 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs @@ -195,7 +195,7 @@ fn test_block_and_transaction_data_with_network(network: Network) { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "snapshot tests") + .commit_finalized_direct(block.into(), None, None, None, "snapshot tests") .expect("test block is valid"); let mut settings = insta::Settings::clone_current(); diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs index 770988a9404..289c76d837c 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs @@ -12,6 +12,7 @@ use std::{iter, path::Path, sync::Arc}; +use super::super::RetentionPlan; use zebra_chain::{ block::{ self, @@ -22,11 +23,14 @@ use zebra_chain::{ Block, Height, }, block_info::BlockInfo, + orchard, + parallel::commitment_aux::BlockCommitmentRoots, parameters::{ testnet, Network::{self, *}, NetworkUpgrade, }, + sapling, serialization::{ZcashDeserializeInto, ZcashSerialize}, transparent::new_ordered_outputs_with_height, work::difficulty::ParameterDifficulty, @@ -428,6 +432,48 @@ fn committed_body_releases_only_its_height_and_keeps_the_frontier() { ); } +#[test] +fn write_block_deletes_matching_provisional_zakura_roots() { + let _init_guard = zebra_test::init(); + let genesis = zebra_test::vectors::BLOCK_MAINNET_GENESIS_BYTES + .zcash_deserialize_into::>() + .expect("genesis block deserializes"); + let block1 = zebra_test::vectors::BLOCK_MAINNET_1_BYTES + .zcash_deserialize_into::>() + .expect("block 1 deserializes"); + let mut state = ZebraDb::new( + &Config::ephemeral(), + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), + &Mainnet, + true, + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), + false, + ); + let roots = [root_at(Height(1)), root_at(Height(2))]; + + write_full_block(&mut state, genesis); + state + .insert_zakura_header_commitment_roots(roots.clone()) + .expect("provisional roots write"); + assert_eq!( + state.zakura_header_commitment_roots_by_height_range(Height(1)..=Height(2)), + roots.to_vec() + ); + + write_full_block(&mut state, block1); + + assert!(state + .zakura_header_commitment_roots_by_height_range(Height(1)..=Height(1)) + .is_empty()); + assert_eq!( + state.zakura_header_commitment_roots_by_height_range(Height(2)..=Height(2)), + vec![root_at(Height(2))] + ); +} + /// Pruning-readiness guard: a committed height whose body is removed (as online /// pruning deletes `tx_by_loc` rows) keeps its header readable from the retained /// consensus `block_header_by_height`, because the header readers are not gated @@ -1129,6 +1175,32 @@ fn alternate_header( Arc::new(header) } +fn root_at(height: Height) -> BlockCommitmentRoots { + BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + } +} + +fn write_full_block(state: &mut ZebraDb, block: Arc) { + let checkpoint_verified = CheckpointVerifiedBlock::from(block); + let finalized = + FinalizedBlock::from_checkpoint_verified(checkpoint_verified, Treestate::default()); + + state + .write_block( + finalized, + None, + &Mainnet, + "test", + RetentionPlan::Store, + None, + None, + ) + .expect("block commit succeeds"); +} + fn commit_header_range( state: &ZebraDb, anchor: block::Hash, @@ -1152,7 +1224,7 @@ fn write_full_block_header_and_transactions(state: &ZebraDb, block: Arc) let mut batch = DiskWriteBatch::new(); batch - .prepare_block_header_and_transaction_data_batch(state, &finalized, true) + .prepare_block_header_and_transaction_data_batch(state, &finalized, true, None) .expect("full block header and transaction batch is valid"); state.db.write(batch).expect("full block batch writes"); } @@ -1220,6 +1292,7 @@ fn test_block_db_round_trip_with( new_outputs, transaction_hashes, deferred_pool_balance_change: None, + auth_data_root: None, }) }; @@ -1230,7 +1303,7 @@ fn test_block_db_round_trip_with( // Skip validation by writing the block directly to the database let mut batch = DiskWriteBatch::new(); batch - .prepare_block_header_and_transaction_data_batch(&state, &finalized, true) + .prepare_block_header_and_transaction_data_batch(&state, &finalized, true, None) .expect("test block header and transaction batch is valid"); state.db.write(batch).expect("block is valid for writing"); @@ -1282,3 +1355,90 @@ fn missing_pruning_metadata_cf_is_archive_database() { assert!(state.lowest_retained_height().is_none()); assert!(!state.is_pruned()); } + +/// POC (verified-commitment-trees): the anchor-only fast write produces the same +/// `sapling_anchors` / `orchard_anchors` contents as the legacy full write, while +/// skipping the per-height note-commitment tree CFs, and is idempotent. +/// See `docs/design/verified-commitment-trees.md`. +#[test] +fn vct_anchor_only_write_matches_legacy_and_skips_per_height_trees() { + use zebra_chain::{orchard, sapling}; + + fn ephemeral_db() -> ZebraDb { + ZebraDb::new( + &Config::ephemeral(), + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), + &Mainnet, + true, + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), + false, + ) + } + + let sapling_tree = sapling::tree::NoteCommitmentTree::default(); + let orchard_tree = orchard::tree::NoteCommitmentTree::default(); + let sapling_root = sapling_tree.root(); + let orchard_root = orchard_tree.root(); + + // Legacy path: the full write inserts the anchor *and* a per-height tree at each + // of two heights (the anchor set collapses to one key; two tree entries). + let legacy = ephemeral_db(); + { + let mut batch = DiskWriteBatch::new(); + batch.create_sapling_tree(&legacy, &Height(10), &sapling_tree); + batch.create_sapling_tree(&legacy, &Height(11), &sapling_tree); + batch.create_orchard_tree(&legacy, &Height(10), &orchard_tree); + legacy.db.write(batch).expect("legacy batch writes"); + } + + // Fast path: anchor-only writes for the same roots, no per-height trees. + let fast = ephemeral_db(); + { + let mut batch = DiskWriteBatch::new(); + batch.insert_sapling_anchor(&fast, &sapling_root); + batch.insert_orchard_anchor(&fast, &orchard_root); + fast.db.write(batch).expect("fast batch writes"); + } + + // The anchor sets are byte-identical (same count, same digest): the fast + // anchor-only write reproduces exactly the legacy anchor index. + assert_eq!( + legacy.vct_anchor_digest(), + fast.vct_anchor_digest(), + "fast anchor-only write must match legacy anchor set" + ); + + // The fast DB skipped the per-height Sapling tree CF; the legacy DB did not. + let count_sapling_trees = |db: &ZebraDb| -> usize { + let cf = db.db.cf_handle("sapling_note_commitment_tree").unwrap(); + db.db + .zs_forward_range_iter::<_, Height, sapling::tree::NoteCommitmentTree, _>(&cf, ..) + .count() + }; + assert_eq!( + count_sapling_trees(&legacy), + 2, + "legacy path writes a per-height tree at each height" + ); + assert_eq!( + count_sapling_trees(&fast), + 0, + "fast path skips per-height trees entirely" + ); + + // Re-inserting an unchanged root is idempotent (anchor CF is a set). + let before = fast.vct_anchor_digest(); + { + let mut batch = DiskWriteBatch::new(); + batch.insert_sapling_anchor(&fast, &sapling_root); + fast.db.write(batch).expect("idempotent write"); + } + assert_eq!( + fast.vct_anchor_digest(), + before, + "anchor insert is idempotent" + ); +} diff --git a/zebra-state/src/service/finalized_state/zebra_db/chain.rs b/zebra-state/src/service/finalized_state/zebra_db/chain.rs index 91db10bfa16..e3532ad69a1 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/chain.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/chain.rs @@ -29,7 +29,7 @@ use zebra_chain::{ use crate::{ request::FinalizedBlock, service::finalized_state::{ - disk_db::DiskWriteBatch, + disk_db::{DiskWriteBatch, ReadDisk}, disk_format::{chain::HistoryTreeParts, RawBytes}, zebra_db::{metrics::value_pool_metrics, ZebraDb}, TypedColumnFamily, @@ -157,6 +157,40 @@ impl ZebraDb { Arc::new(HistoryTree::from(history_tree)) } + /// Returns `Ok(())` if the stored tip history tree decodes with the current + /// `HistoryTreeParts` format. + /// + /// This is a non-panicking compatibility probe used during DB open before + /// background format checks can call [`Self::history_tree`]. It reads raw + /// bytes and performs the same current-key then legacy-key fallback as + /// [`Self::history_tree`]. + pub(crate) fn check_tip_history_tree_decodes(&self) -> Result<(), String> { + let history_tree_cf = self + .db + .cf_handle(HISTORY_TREE) + .expect("column family was created when database was created"); + + let raw_parts: Option = self.db.zs_get(&history_tree_cf, &()); + let raw_parts = raw_parts.or_else(|| { + self.db + .zs_last_key_value::<_, RawBytes, RawBytes>(&history_tree_cf) + .map(|(_height_key, tree_value)| tree_value) + }); + + let Some(raw_parts) = raw_parts else { + return Ok(()); + }; + + let parts = HistoryTreeParts::from_bytes_result(raw_parts.raw_bytes()) + .map_err(|error| format!("stored history tree does not deserialize: {error}"))?; + + parts + .with_network(&self.db.network()) + .map_err(|error| format!("stored history tree is invalid for this network: {error}"))?; + + Ok(()) + } + /// Returns all the history tip trees. /// We only store the history tree for the tip, so this method is only used in tests and /// upgrades. @@ -297,9 +331,12 @@ impl DiskWriteBatch { // `Block::zcash_serialized_size` walks the entire block's serialization // on a single thread, which is a significant per-block cost on heavy // shielded blocks (it re-traverses every transaction). - // Sum the independent per-transaction sizes across the rayon pool. + // Sum the independent per-transaction sizes. This is byte-count-identical + // to serializing the block: + // size = header + CompactSize(tx_count) + sum(transaction sizes). // Only fan out to rayon once the block has enough transactions to - // amortize the multi-threading overhead. + // amortize the fork-join cost; small blocks sum sequentially (see + // PARALLEL_BLOCK_TX_THRESHOLD). let block_size = { let transactions = &finalized.block.transactions; let transactions_size: usize = diff --git a/zebra-state/src/service/finalized_state/zebra_db/prune.rs b/zebra-state/src/service/finalized_state/zebra_db/prune.rs index 42813e35378..6f1695002d7 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/prune.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/prune.rs @@ -355,7 +355,7 @@ mod tests { .expect("test data deserializes"); state - .commit_finalized_direct(block.into(), None, "offline prune tests") + .commit_finalized_direct(block.into(), None, None, None, "offline prune tests") .expect("test block is valid"); } diff --git a/zebra-state/src/service/finalized_state/zebra_db/rollback.rs b/zebra-state/src/service/finalized_state/zebra_db/rollback.rs index aa4940c53ad..86ad051b96e 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/rollback.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/rollback.rs @@ -11,11 +11,13 @@ use zebra_chain::{ amount::{self, Amount, DeferredPoolBalanceChange, NonNegative}, block::{self, Block, Height}, history_tree::{HistoryTree, HistoryTreeError}, + orchard, parallel::tree::{NoteCommitmentTreeError, NoteCommitmentTrees}, parameters::{ subsidy::{block_subsidy, funding_stream_values, FundingStreamReceiver, SubsidyError}, Network, NetworkUpgrade, }, + sapling, subtree::NoteCommitmentSubtreeIndex, transaction, transparent::{self, Input}, @@ -29,6 +31,7 @@ use crate::{ finalized_state::{ disk_db::{DiskWriteBatch, ReadDisk, WriteDisk}, disk_format::{ + shielded::CommitmentRootsByHeight, transparent::{ AddressBalanceLocation, AddressTransaction, AddressUnspentOutput, OutputLocation, @@ -40,7 +43,8 @@ use crate::{ transparent::{BALANCE_BY_TRANSPARENT_ADDR, TX_LOC_BY_SPENT_OUT_LOC}, ZebraDb, }, - STATE_COLUMN_FAMILIES_IN_CODE, + COMMITMENT_ROOTS_BY_HEIGHT, STATE_COLUMN_FAMILIES_IN_CODE, + ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT, }, non_finalized_state::write_semantically_verified_backup_block, }, @@ -542,6 +546,61 @@ fn block_has_sprout_commitments(block: &Block) -> bool { block.sprout_note_commitments().next().is_some() } +/// Blocking DB-open repair for an incompatible stored `history_tree`. +/// +/// If the stored tip history tree cannot be decoded by the current code, rebuild it from the +/// finalized blocks plus Sapling/Orchard roots using the same algorithm rollback uses, then write +/// it back before the background format-validity check can read the old value. +/// +/// The roots come from the compact per-height root index when present, so post-index VCT +/// fast-synced databases can be repaired without historical tree rows. Pre-index archive +/// databases fall back to deriving roots from per-height trees. Databases missing both sources +/// fail closed with remediation instead of attempting a partial rebuild. +pub(crate) fn repair_tip_history_tree_if_incompatible(db: &ZebraDb, network: &Network) { + let Some(tip_height) = db.finalized_tip_height() else { + return; + }; + // Pre-Heartwood (no history tree) needs no repair. + if NetworkUpgrade::current(network, tip_height) < NetworkUpgrade::Heartwood { + return; + } + // Healthy DBs (the common case) decode fine: no-op, no rebuild. + if let Err(error) = db.check_tip_history_tree_decodes() { + tracing::warn!( + ?tip_height, + ?error, + "stored history tree is incompatible with this binary; rebuilding it from finalized \ + blocks and commitment roots before startup" + ); + } else { + return; + } + + match rebuild_history_tree_from_upgrade_activation(db, network, tip_height) { + Ok(rebuilt) => { + let mut batch = DiskWriteBatch::new(); + batch.update_history_tree(db, &rebuilt); + db.write_batch(batch) + .expect("history-tree repair batch write should succeed"); + tracing::info!( + ?tip_height, + history_root = ?rebuilt.hash(), + "history-tree repair complete; rebuilt tip tree written in the current format" + ); + } + Err(error) => { + panic!( + "cannot repair the incompatible history tree at tip {tip_height:?}: {error}. \ + The repair requires finalized block bodies plus Sapling/Orchard roots from the \ + current network-upgrade activation height through the tip. Roots can come from \ + `commitment_roots_by_height` or from per-height trees. If this database predates \ + the root index and is VCT fast-synced or pruned, re-sync from genesis or repair \ + from an archive-capable database." + ); + } + } +} + fn rebuild_history_tree_from_upgrade_activation( db: &ZebraDb, network: &Network, @@ -583,6 +642,15 @@ fn history_rebuild_inputs_at_height( let block = db .block(height.into()) .ok_or(RollbackFinalizedStateError::MissingBlock { height })?; + + if let Some(roots) = db + .commitment_roots_by_height_range(height..=height) + .into_iter() + .next() + { + return Ok((block, roots.sapling_root, roots.orchard_root)); + } + let sapling_root = db .sapling_tree_by_height(&height) .ok_or(RollbackFinalizedStateError::MissingSaplingTree { height })? @@ -868,6 +936,10 @@ fn delete_zakura_headers_above(db: &ZebraDb, batch: &mut DiskWriteBatch, target_ .db .cf_handle("zakura_header_body_size_by_height") .unwrap(); + let roots_by_height = db + .db + .cf_handle(ZAKURA_HEADER_COMMITMENT_ROOTS_BY_HEIGHT) + .unwrap(); let Some((tip_height, _tip_hash)) = db .db @@ -883,6 +955,7 @@ fn delete_zakura_headers_above(db: &ZebraDb, batch: &mut DiskWriteBatch, target_ batch.zs_delete(&hash_by_height, height); batch.zs_delete(&header_by_height, height); batch.zs_delete(&body_size_by_height, height); + batch.zs_delete(&roots_by_height, height); } } @@ -946,6 +1019,8 @@ fn prune_tree_indexes( target_height: Height, retained_sprout_roots: &Option>, ) { + let retained_shielded_roots = retained_shielded_roots(db, target_height); + let sapling_trees: BTreeMap<_, _> = db .sapling_tree_by_height_range(( std::ops::Bound::Excluded(target_height), @@ -953,8 +1028,11 @@ fn prune_tree_indexes( )) .collect(); for (height, tree) in sapling_trees { + let root = tree.root(); batch.delete_sapling_tree(db, &height); - batch.delete_sapling_anchor(db, &tree.root()); + if !retained_shielded_roots.sapling.contains(&root) { + batch.delete_sapling_anchor(db, &root); + } } let orchard_trees: BTreeMap<_, _> = db @@ -964,10 +1042,22 @@ fn prune_tree_indexes( )) .collect(); for (height, tree) in orchard_trees { + let root = tree.root(); batch.delete_orchard_tree(db, &height); - batch.delete_orchard_anchor(db, &tree.root()); + if !retained_shielded_roots.orchard.contains(&root) { + batch.delete_orchard_anchor(db, &root); + } } + // Fast-sync writes anchors and this root index without writing per-height trees. Use the + // index to remove anchors introduced only by rolled-back fast-path heights before truncating + // it, but retain any repeated root that is still valid at or below the target. + prune_fast_commitment_anchors_from_index(db, batch, target_height, &retained_shielded_roots); + + // Truncate the per-height commitment-roots serving index above the target, so a rolled-back + // database does not serve roots for heights it no longer holds. + batch.delete_range_commitment_roots_by_height(db, &Height(target_height.0 + 1), &Height::MAX); + // Delete every sapling/orchard subtree whose notes extend past the target height. Subtree // indexes are read back from the database and number far fewer than `u16::MAX`, so `index.0 + 1` // (the exclusive end of the single-index delete range) cannot overflow. @@ -1003,6 +1093,66 @@ fn prune_tree_indexes( batch.delete_range_sprout_tree(db, &next_height, &Height::MAX); } +#[derive(Default)] +struct RetainedShieldedRoots { + sapling: HashSet, + orchard: HashSet, +} + +fn retained_shielded_roots(db: &ZebraDb, target_height: Height) -> RetainedShieldedRoots { + let mut retained = RetainedShieldedRoots::default(); + + let commitment_roots_by_height = db.db.cf_handle(COMMITMENT_ROOTS_BY_HEIGHT).unwrap(); + for (_height, roots) in db + .db + .zs_forward_range_iter::<_, Height, CommitmentRootsByHeight, _>( + &commitment_roots_by_height, + ..=target_height, + ) + { + retained.sapling.insert(roots.sapling); + retained.orchard.insert(roots.orchard); + } + + for (_height, tree) in db.sapling_tree_by_height_range(..=target_height) { + retained.sapling.insert(tree.root()); + } + + for (_height, tree) in db.orchard_tree_by_height_range(..=target_height) { + retained.orchard.insert(tree.root()); + } + + retained +} + +fn prune_fast_commitment_anchors_from_index( + db: &ZebraDb, + batch: &mut DiskWriteBatch, + target_height: Height, + retained_roots: &RetainedShieldedRoots, +) { + let commitment_roots_by_height = db.db.cf_handle(COMMITMENT_ROOTS_BY_HEIGHT).unwrap(); + let rolled_back_roots: BTreeMap<_, CommitmentRootsByHeight> = db + .db + .zs_forward_range_iter( + &commitment_roots_by_height, + ( + std::ops::Bound::Excluded(target_height), + std::ops::Bound::Unbounded, + ), + ) + .collect(); + + for (_height, roots) in rolled_back_roots { + if !retained_roots.sapling.contains(&roots.sapling) { + batch.delete_sapling_anchor(db, &roots.sapling); + } + if !retained_roots.orchard.contains(&roots.orchard) { + batch.delete_orchard_anchor(db, &roots.orchard); + } + } +} + fn clear_backup_dir(path: &PathBuf) -> Result<(), std::io::Error> { match std::fs::remove_dir_all(path) { Ok(()) => {} @@ -1021,16 +1171,9 @@ mod tests { use super::*; - /// `delete_zakura_headers_above` must truncate every Zakura header CF above the target, - /// including the hash→height index, while leaving rows at or below the target intact. This - /// is the consistency guarantee that lets a rolled-back snapshot re-sync bodies from its tip - /// instead of stalling on an un-requestable floor (see the function doc). - #[test] - fn delete_zakura_headers_above_truncates_the_header_store() { - let _init_guard = zebra_test::init(); - + fn ephemeral_mainnet_db() -> ZebraDb { let network = Network::Mainnet; - let db = ZebraDb::new( + ZebraDb::new( &Config::ephemeral(), STATE_DATABASE_KIND, &state_database_format_version_in_code(), @@ -1040,7 +1183,206 @@ mod tests { .iter() .map(ToString::to_string), false, + ) + } + + fn sapling_note_commitment(value: u64) -> sapling::tree::NoteCommitmentUpdate { + let mut bytes = [0; 32]; + bytes[..8].copy_from_slice(&value.to_le_bytes()); + + Option::::from( + sapling::tree::NoteCommitmentUpdate::from_bytes(&bytes), + ) + .expect("small little-endian integers are canonical Jubjub field elements") + } + + fn sapling_root(value: u64) -> sapling::tree::Root { + let mut tree = sapling::tree::NoteCommitmentTree::default(); + tree.append(sapling_note_commitment(value)) + .expect("single-note Sapling tree is not full"); + tree.root() + } + + fn orchard_root(value: u64) -> orchard::tree::Root { + let mut tree = orchard::tree::NoteCommitmentTree::default(); + tree.append(halo2::pasta::pallas::Base::from(value)) + .expect("single-note Orchard tree is not full"); + tree.root() + } + + /// Fast-path VCT commits write Sapling/Orchard anchors and the compact + /// `commitment_roots_by_height` index, but skip per-height tree rows. Rollback must + /// therefore prune stale anchors from the index before truncating it; otherwise anchors + /// from rolled-back fast commits stay valid for contextual verification. + #[test] + fn prune_tree_indexes_drops_fast_index_anchors_above_target() { + let _init_guard = zebra_test::init(); + let db = ephemeral_mainnet_db(); + + let retained_sapling = sapling_root(1); + let removed_sapling = sapling_root(2); + let retained_orchard = orchard_root(1); + let removed_orchard = orchard_root(2); + + let mut batch = DiskWriteBatch::new(); + batch.insert_sapling_anchor(&db, &retained_sapling); + batch.insert_sapling_anchor(&db, &removed_sapling); + batch.insert_orchard_anchor(&db, &retained_orchard); + batch.insert_orchard_anchor(&db, &removed_orchard); + + // Heights 1 and 2 are retained. Height 3 is rolled back and has a unique stale + // anchor. Height 4 is also rolled back, but repeats the retained root, so its anchor + // must remain valid after the index row is truncated. + batch.insert_commitment_roots_by_height( + &db, + Height(1), + &retained_sapling, + &retained_orchard, + ); + batch.insert_commitment_roots_by_height( + &db, + Height(2), + &retained_sapling, + &retained_orchard, + ); + batch.insert_commitment_roots_by_height(&db, Height(3), &removed_sapling, &removed_orchard); + batch.insert_commitment_roots_by_height( + &db, + Height(4), + &retained_sapling, + &retained_orchard, + ); + db.write_batch(batch) + .expect("seeding fast-path roots succeeds"); + + let mut batch = DiskWriteBatch::new(); + prune_tree_indexes(&db, &mut batch, Height(2), &None); + db.write_batch(batch) + .expect("pruning fast-path roots succeeds"); + + assert!( + db.contains_sapling_anchor(&retained_sapling), + "rollback retains Sapling anchors still valid at or below the target" + ); + assert!( + db.contains_orchard_anchor(&retained_orchard), + "rollback retains Orchard anchors still valid at or below the target" + ); + assert!( + !db.contains_sapling_anchor(&removed_sapling), + "rollback removes Sapling anchors introduced only by rolled-back fast commits" + ); + assert!( + !db.contains_orchard_anchor(&removed_orchard), + "rollback removes Orchard anchors introduced only by rolled-back fast commits" + ); + assert_eq!( + db.commitment_roots_by_height_range(Height(1)..=Height(4)) + .into_iter() + .map(|roots| roots.height) + .collect::>(), + vec![Height(1), Height(2)], + "rollback truncates the serving index above the target" + ); + } + + /// `vct_tree_absent` marks exactly the half-open band `[U, H)`: heights below the upgrade + /// height `U` keep their pre-upgrade trees, and heights at or above the handoff `H` get trees + /// again from semantic sync. With no handoff marker the database is a normal archive and no + /// height is ever absent. + #[test] + fn vct_tree_absent_marks_only_the_upgrade_to_handoff_band() { + let _init_guard = zebra_test::init(); + let db = ephemeral_mainnet_db(); + + // No markers: a normally-synced archive database, never absent. + assert!(!db.vct_tree_absent(Height(0))); + assert!(!db.vct_tree_absent(Height(100))); + + // Upgrade U = 4, handoff H = 10: per-height trees absent exactly in [4, 10). + let mut batch = DiskWriteBatch::new(); + batch.update_vct_upgrade_marker(&db, Height(4)); + batch.update_vct_sync_marker(&db, Height(10)); + db.write_batch(batch).expect("seeding vct markers succeeds"); + + assert!( + !db.vct_tree_absent(Height(3)), + "below U: the pre-upgrade tree is present" + ); + assert!(db.vct_tree_absent(Height(4)), "at U: the tree is absent"); + assert!(db.vct_tree_absent(Height(9)), "below H: the tree is absent"); + assert!( + !db.vct_tree_absent(Height(10)), + "at H: the handoff tree is present" + ); + assert!( + !db.vct_tree_absent(Height(11)), + "above H: the semantic-sync tree is present" + ); + } + + /// When the upgrade height is at or above the handoff — a node upgraded after the last + /// checkpoint, where semantic sync keeps writing trees — the band `[U, H)` is empty, so every + /// height is servable regardless of the upgrade height. + #[test] + fn vct_tree_absent_empty_band_when_upgraded_above_handoff() { + let _init_guard = zebra_test::init(); + let db = ephemeral_mainnet_db(); + + let mut batch = DiskWriteBatch::new(); + batch.update_vct_upgrade_marker(&db, Height(15)); + batch.update_vct_sync_marker(&db, Height(10)); + db.write_batch(batch).expect("seeding vct markers succeeds"); + + for height in [0, 9, 10, 12, 15, 20] { + assert!( + !db.vct_tree_absent(Height(height)), + "U >= H leaves an empty band, so height {height} is servable" + ); + } + } + + /// `serve_block_roots` reads a request that starts at or above the upgrade height `U` straight + /// from the serving index, without touching the per-height trees. + #[test] + fn serve_block_roots_serves_at_or_above_upgrade_from_index() { + let _init_guard = zebra_test::init(); + let db = ephemeral_mainnet_db(); + + // Index covers [4, 6]; the upgrade height is U = 4. + let mut batch = DiskWriteBatch::new(); + batch.update_vct_upgrade_marker(&db, Height(4)); + for height in 4u32..=6 { + batch.insert_commitment_roots_by_height( + &db, + Height(height), + &sapling_root(height.into()), + &orchard_root(height.into()), + ); + } + db.write_batch(batch) + .expect("seeding the serving index succeeds"); + + let served = crate::service::finalized_state::serve_block_roots(&db, Height(4)..=Height(6)); + assert_eq!( + served + .into_iter() + .map(|root| root.height) + .collect::>(), + vec![Height(4), Height(5), Height(6)], + "a request at or above U is served from the index" ); + } + + /// `delete_zakura_headers_above` must truncate every Zakura header CF above the target, + /// including the hash→height index, while leaving rows at or below the target intact. This + /// is the consistency guarantee that lets a rolled-back snapshot re-sync bodies from its tip + /// instead of stalling on an un-requestable floor (see the function doc). + #[test] + fn delete_zakura_headers_above_truncates_the_header_store() { + let _init_guard = zebra_test::init(); + + let db = ephemeral_mainnet_db(); // A real header value for `zakura_header_by_height`; the height math is what matters, so // every seeded height can reuse the same header. @@ -1145,18 +1487,7 @@ mod tests { fn delete_zakura_headers_above_is_a_noop_on_an_empty_store() { let _init_guard = zebra_test::init(); - let network = Network::Mainnet; - let db = ZebraDb::new( - &Config::ephemeral(), - STATE_DATABASE_KIND, - &state_database_format_version_in_code(), - &network, - true, - STATE_COLUMN_FAMILIES_IN_CODE - .iter() - .map(ToString::to_string), - false, - ); + let db = ephemeral_mainnet_db(); let mut batch = DiskWriteBatch::new(); delete_zakura_headers_above(&db, &mut batch, Height(3)); diff --git a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs index e07b482fd9b..bdcfc6dbf58 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs @@ -17,10 +17,12 @@ use std::{ sync::Arc, }; +use std::ops::RangeInclusive; + use zebra_chain::{ block::Height, orchard, - parallel::tree::NoteCommitmentTrees, + parallel::{commitment_aux::BlockCommitmentRoots, tree::NoteCommitmentTrees}, sapling, sprout, subtree::{NoteCommitmentSubtreeData, NoteCommitmentSubtreeIndex}, transaction::Transaction, @@ -30,8 +32,9 @@ use crate::{ request::{FinalizedBlock, Treestate}, service::finalized_state::{ disk_db::{DiskWriteBatch, ReadDisk, WriteDisk}, - disk_format::RawBytes, + disk_format::{shielded::CommitmentRootsByHeight, RawBytes}, zebra_db::ZebraDb, + COMMITMENT_ROOTS_BY_HEIGHT, }, TransactionLocation, }; @@ -116,6 +119,78 @@ impl ZebraDb { self.db.zs_contains(&orchard_anchors, &orchard_anchor) } + /// Returns the per-block Sapling/Orchard commitment roots stored in the + /// `commitment_roots_by_height` serving index for the **contiguous** prefix of `range` + /// that is present, in ascending height order (design §4). + /// + /// Reads stop at the first absent height, so the result is always a gap-free run from + /// `range.start()` — exactly what the `tree_aux` `BlockRoots` serve and `fetch_roots` + /// client expect. A node populates this index for every block it commits (fast or + /// legacy), so a fast-synced node — which holds no per-height trees — can still serve + /// roots here. Returns an empty vec for a database written before the index existed + /// (e.g. a pre-index archive node), where the caller falls back to `produce_block_roots`. + pub fn commitment_roots_by_height_range( + &self, + range: RangeInclusive, + ) -> Vec { + let cf = self.db.cf_handle(COMMITMENT_ROOTS_BY_HEIGHT).unwrap(); + let mut roots = Vec::new(); + for height in (range.start().0..=range.end().0).map(Height) { + let Some(value) = self + .db + .zs_get::<_, _, CommitmentRootsByHeight>(&cf, &height) + else { + break; + }; + roots.push(BlockCommitmentRoots { + height, + sapling_root: value.sapling, + orchard_root: value.orchard, + }); + } + roots + } + + /// POC: returns `(sapling_count, sapling_digest, orchard_count, orchard_digest)`, + /// a deterministic, order-independent digest of the Sapling and Orchard anchor + /// sets. Two syncs that produce the same anchor sets produce the same digest, + /// even if one took the fast (skip-recompute) path. See + /// `docs/design/verified-commitment-trees.md`. + pub fn vct_anchor_digest(&self) -> (u64, u64, u64, u64) { + use crate::service::finalized_state::IntoDisk; + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let sapling_anchors = self.db.cf_handle("sapling_anchors").unwrap(); + let mut sapling_hasher = DefaultHasher::new(); + let mut sapling_count = 0u64; + for (root, ()) in self + .db + .zs_forward_range_iter::<_, sapling::tree::Root, (), _>(&sapling_anchors, ..) + { + IntoDisk::as_bytes(&root).hash(&mut sapling_hasher); + sapling_count += 1; + } + + let orchard_anchors = self.db.cf_handle("orchard_anchors").unwrap(); + let mut orchard_hasher = DefaultHasher::new(); + let mut orchard_count = 0u64; + for (root, ()) in self + .db + .zs_forward_range_iter::<_, orchard::tree::Root, (), _>(&orchard_anchors, ..) + { + IntoDisk::as_bytes(&root).hash(&mut orchard_hasher); + orchard_count += 1; + } + + ( + sapling_count, + sapling_hasher.finish(), + orchard_count, + orchard_hasher.finish(), + ) + } + // # Sprout trees /// Returns the Sprout note commitment tree of the finalized tip @@ -154,7 +229,17 @@ impl ZebraDb { .map(|(_key, tree_value): (Height, _)| tree_value); } - sprout_tree.expect("Sprout note commitment tree must exist if there is a finalized tip") + sprout_tree.unwrap_or_else(|| { + // While a fast sync is in progress (tip below the handoff height), the + // sprout tip tree is only written at the handoff; the committer does not + // read it before then. + assert!( + self.finalized_tip_height() + .is_some_and(|tip| self.vct_tree_absent(tip)), + "Sprout note commitment tree must exist if there is a finalized tip" + ); + Arc::::default() + }) } /// Returns the Sprout note commitment tree matching the given anchor. @@ -204,8 +289,17 @@ impl ZebraDb { None => return Default::default(), }; - self.sapling_tree_by_height(&height) - .expect("Sapling note commitment tree must exist if there is a finalized tip") + self.sapling_tree_by_height(&height).unwrap_or_else(|| { + // While a fast sync is in progress the tip is in the absent band and its + // frontier is not stored; the committer does not read it (it folds + // verified roots). Every other caller reaches here only below the upgrade + // height or at/above the handoff, where the tree is present. + assert!( + self.vct_tree_absent(height), + "Sapling note commitment tree must exist if there is a finalized tip" + ); + Default::default() + }) } /// Returns the Sapling note commitment tree matching the given block height, or `None` if the @@ -223,6 +317,14 @@ impl ZebraDb { return None; } + // On a verified-commitment-trees fast-synced database, the per-height trees within the + // `[U, H)` absent band were never written. Return `None` rather than letting the backward + // search return a stale tree from an earlier height; trees below the upgrade height `U` + // (pre-upgrade) and at/above the handoff `H` (semantic sync) are present. + if self.vct_tree_absent(*height) { + return None; + } + let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap(); // If we know there must be a tree, search backwards for it. @@ -330,8 +432,15 @@ impl ZebraDb { None => return Default::default(), }; - self.orchard_tree_by_height(&height) - .expect("Orchard note commitment tree must exist if there is a finalized tip") + self.orchard_tree_by_height(&height).unwrap_or_else(|| { + // See `sapling_tree_for_tip`: the fast-sync tip frontier in the absent + // band is not stored and not read by the committer. + assert!( + self.vct_tree_absent(height), + "Orchard note commitment tree must exist if there is a finalized tip" + ); + Default::default() + }) } /// Returns the Orchard note commitment tree matching the given block height, @@ -349,6 +458,14 @@ impl ZebraDb { return None; } + // On a verified-commitment-trees fast-synced database, the per-height trees within the + // `[U, H)` absent band were never written. Return `None` rather than letting the backward + // search return a stale tree from an earlier height; trees below the upgrade height `U` + // (pre-upgrade) and at/above the handoff `H` (semantic sync) are present. + if self.vct_tree_absent(*height) { + return None; + } + let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap(); // If we know there must be a tree, search backwards for it. @@ -534,6 +651,8 @@ impl DiskWriteBatch { zebra_db: &ZebraDb, finalized: &FinalizedBlock, prev_note_commitment_trees: Option, + vct_anchor_roots: Option<(sapling::tree::Root, orchard::tree::Root)>, + vct_sync_below: Option, ) { let FinalizedBlock { height, @@ -545,6 +664,42 @@ impl DiskWriteBatch { .. } = finalized; + // Record the upgrade height `U` once, on the first block this binary commits: the lowest + // height in the serving index, and the boundary below which roots are served from the + // pre-upgrade per-height trees instead. Written on both commit paths so it is set even for + // a node that upgrades above the last checkpoint (legacy path only). Set-once: the marker + // is never moved, so the boundary stays stable as the chain grows. Commits are sequential, + // so the absent check sees the previous block's committed marker, not a half-written batch. + if zebra_db.vct_upgrade_height().is_none() { + self.update_vct_upgrade_marker(zebra_db, *height); + } + + // Mark the database as vct-synced (per-height note-commitment trees absent + // below the checkpoint handoff height). Written in the same atomic batch as + // every vct commit, so a vct-synced database always carries the marker and + // the read/validity guards never see absent trees without it. + if let Some(handoff) = vct_sync_below { + self.update_vct_sync_marker(zebra_db, handoff); + } + + // POC (verified-commitment-trees) vct path: the committer skipped the + // per-block frontier recompute, so `note_commitment_trees` is the frozen + // parent frontier. Write only the supplied roots into the anchor set and + // the (already-extended) history tree; skip the per-height Sapling/Orchard + // tree CFs and subtrees entirely. The Sprout tree is unchanged below any + // modern checkpoint, so it is correctly left untouched here. + // See docs/design/verified-commitment-trees.md. + if let Some((sapling_root, orchard_root)) = vct_anchor_roots { + self.insert_sapling_anchor(zebra_db, &sapling_root); + self.insert_orchard_anchor(zebra_db, &orchard_root); + // Persist the per-height roots into the serving index even though no per-height + // tree is written, so this fast-synced node can still serve `tree_aux` roots + // (design §4); otherwise the root-serving fleet collapses as nodes fast-sync. + self.insert_commitment_roots_by_height(zebra_db, *height, &sapling_root, &orchard_root); + self.update_history_tree(zebra_db, history_tree); + return; + } + let prev_sprout_tree = prev_note_commitment_trees.as_ref().map_or_else( || zebra_db.sprout_tree_for_tip(), |prev_trees| prev_trees.sprout.clone(), @@ -581,6 +736,17 @@ impl DiskWriteBatch { } } + // Persist the per-height roots into the serving index for *every* committed height + // (not just when a tree changed — the index must be gap-free for contiguous serving), + // so a legacy/archive node serves `tree_aux` roots from the compact index too, and a + // node that later fast-syncs above this height already has the lower range covered. + self.insert_commitment_roots_by_height( + zebra_db, + *height, + ¬e_commitment_trees.sapling.root(), + ¬e_commitment_trees.orchard.root(), + ); + self.update_history_tree(zebra_db, history_tree); } @@ -646,6 +812,78 @@ impl DiskWriteBatch { self.zs_insert(&sapling_tree_cf, height, tree); } + /// POC: inserts only the Sapling anchor `root` (value `()`), without writing a + /// per-height tree. Used by the verified-commitment-trees fast path, which + /// supplies the root directly instead of recomputing the frontier. The anchor + /// CF is a set, so re-inserting an unchanged root is idempotent. + pub fn insert_sapling_anchor(&mut self, zebra_db: &ZebraDb, root: &sapling::tree::Root) { + let sapling_anchors = zebra_db.db.cf_handle("sapling_anchors").unwrap(); + self.zs_insert(&sapling_anchors, root, ()); + } + + /// Inserts the per-height Sapling/Orchard commitment roots into the + /// `commitment_roots_by_height` serving index (design §4). + /// + /// Written on every committed block, fast or legacy, so any node — including a + /// fast-synced node that holds no per-height trees — can serve the `tree_aux` + /// `BlockRoots` read from this compact 64-byte-per-height index. Idempotent + /// (re-inserting the same height overwrites with the identical value). + pub fn insert_commitment_roots_by_height( + &mut self, + zebra_db: &ZebraDb, + height: Height, + sapling_root: &sapling::tree::Root, + orchard_root: &orchard::tree::Root, + ) { + let cf = zebra_db.db.cf_handle(COMMITMENT_ROOTS_BY_HEIGHT).unwrap(); + self.zs_insert( + &cf, + height, + CommitmentRootsByHeight { + sapling: *sapling_root, + orchard: *orchard_root, + }, + ); + } + + /// Deletes the commitment-roots serving-index entries in `[from, to)`. + /// + /// Used by the finalized rollback to truncate the index above the rollback target, the + /// same way the per-height trees and anchors above the target are removed, so a + /// rolled-back database does not retain root entries for heights it no longer holds. + pub fn delete_range_commitment_roots_by_height( + &mut self, + zebra_db: &ZebraDb, + from: &Height, + to: &Height, + ) { + let cf = zebra_db.db.cf_handle(COMMITMENT_ROOTS_BY_HEIGHT).unwrap(); + self.zs_delete_range(&cf, from, to); + } + + /// Records the verified-commitment-trees fast-sync marker: per-height + /// note-commitment trees are absent below `handoff`. Idempotent (written in the + /// same batch as each fast commit). + pub fn update_vct_sync_marker(&mut self, zebra_db: &ZebraDb, handoff: Height) { + let vct_sync_metadata = zebra_db + .db + .cf_handle(crate::service::finalized_state::VCT_SYNC_METADATA) + .unwrap(); + self.zs_insert(&vct_sync_metadata, (), handoff); + } + + /// Records the verified-commitment-trees upgrade height `U` = `height`, the lowest height this + /// binary commits and the lowest height in the serving index. Set once and never moved, so the + /// caller must only invoke this when [`vct_upgrade_height`](ZebraDb::vct_upgrade_height) is + /// still absent. + pub fn update_vct_upgrade_marker(&mut self, zebra_db: &ZebraDb, height: Height) { + let vct_upgrade_metadata = zebra_db + .db + .cf_handle(crate::service::finalized_state::VCT_UPGRADE_METADATA) + .unwrap(); + self.zs_insert(&vct_upgrade_metadata, (), height); + } + /// Inserts the Sapling note commitment subtree into the batch. pub fn insert_sapling_subtree( &mut self, @@ -725,6 +963,13 @@ impl DiskWriteBatch { self.zs_insert(&orchard_tree_cf, height, tree); } + /// POC: inserts only the Orchard anchor `root` (value `()`), without writing a + /// per-height tree. The Orchard twin of [`Self::insert_sapling_anchor`]. + pub fn insert_orchard_anchor(&mut self, zebra_db: &ZebraDb, root: &orchard::tree::Root) { + let orchard_anchors = zebra_db.db.cf_handle("orchard_anchors").unwrap(); + self.zs_insert(&orchard_anchors, root, ()); + } + /// Inserts the Orchard note commitment subtree into the batch. pub fn insert_orchard_subtree( &mut self, diff --git a/zebra-state/src/service/non_finalized_state.rs b/zebra-state/src/service/non_finalized_state.rs index 813484b46f4..a2b676a9896 100644 --- a/zebra-state/src/service/non_finalized_state.rs +++ b/zebra-state/src/service/non_finalized_state.rs @@ -634,6 +634,8 @@ impl NonFinalizedState { block, &network, &history_tree, + // The non-finalized path doesn't precompute the auth data root. + None, )); }); diff --git a/zebra-state/src/service/tests.rs b/zebra-state/src/service/tests.rs index 06cc4e36237..81ad0d0c48c 100644 --- a/zebra-state/src/service/tests.rs +++ b/zebra-state/src/service/tests.rs @@ -13,7 +13,10 @@ use zebra_chain::{ block::{self, Block, CountedHeader, Height}, chain_tip::ChainTip, fmt::SummaryDebug, + orchard, + parallel::commitment_aux::BlockCommitmentRoots, parameters::{Network, NetworkUpgrade}, + sapling, serialization::{ZcashDeserialize, ZcashDeserializeInto, ZcashSerialize}, transaction, transparent, value_balance::ValueBalance, @@ -25,8 +28,9 @@ use crate::{ arbitrary::Prepare, init_test, service::{ - arbitrary::populated_state, chain_tip::TipAction, headers_by_height_range, - non_finalized_state::Chain, StateService, + arbitrary::populated_state, block_roots_by_height_range, chain_tip::TipAction, + headers_by_height_range, non_finalized_state::Chain, root_covered_best_header_tip, + StateService, }, tests::{ setup::{partial_nu5_chain_strategy, transaction_v4_from_coinbase}, @@ -38,6 +42,23 @@ use crate::{ const LAST_BLOCK_HEIGHT: u32 = 10; +fn root_at(height: Height) -> BlockCommitmentRoots { + BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + } +} + +fn roots_from_height(start_height: Height, count: usize) -> Vec { + (0..count) + .map(|offset| { + let offset = u32::try_from(offset).expect("test root count fits in u32"); + root_at(Height(start_height.0 + offset)) + }) + .collect() +} + async fn test_populated_state_responds_correctly( mut state: Buffer, Request>, ) -> Result<()> { @@ -517,6 +538,7 @@ async fn header_only_service_requests_preserve_body_boundary() -> std::result::R anchor: genesis.hash(), headers: vec![block1.header.clone(), block2.header.clone()], body_sizes: vec![999_999, 0], + tree_aux_roots: roots_from_height(Height(1), 2), }) .await?, Response::Committed(block2_hash), @@ -644,6 +666,49 @@ async fn header_only_service_requests_preserve_body_boundary() -> std::result::R Ok(()) } +#[tokio::test(flavor = "multi_thread")] +async fn commit_header_range_rejects_missing_tree_aux_roots() -> std::result::Result<(), BoxError> { + let _init_guard = zebra_test::init(); + let network = Network::Mainnet; + let (state_service, _read_state, _, _) = + StateService::new(Config::ephemeral(), &network, Height::MAX, 0).await; + let genesis = + zebra_test::vectors::BLOCK_MAINNET_GENESIS_BYTES.zcash_deserialize_into::>()?; + let block1 = + zebra_test::vectors::BLOCK_MAINNET_1_BYTES.zcash_deserialize_into::>()?; + + let state = Buffer::new(BoxService::new(state_service), 1); + assert_eq!( + state + .clone() + .oneshot(Request::CommitCheckpointVerifiedBlock( + CheckpointVerifiedBlock::from(genesis.clone()), + )) + .await?, + Response::Committed(genesis.hash()), + ); + + let error = state + .oneshot(Request::CommitHeaderRange { + anchor: genesis.hash(), + headers: vec![block1.header.clone()], + body_sizes: vec![0], + tree_aux_roots: Vec::new(), + }) + .await + .expect_err("missing roots must reject a non-empty header range"); + + assert!(matches!( + error.downcast_ref::(), + Some(crate::CommitHeaderRangeError::TreeAuxRootCountMismatch { + headers: 1, + roots: 0, + }) + )); + + Ok(()) +} + /// A node still in the finalized (checkpoint) write phase must be able to commit /// a Zakura header range. /// @@ -703,6 +768,7 @@ async fn commit_header_range_completes_while_in_finalized_write_phase( anchor: genesis.hash(), headers: vec![block1.header.clone(), block2.header.clone()], body_sizes: vec![999_999, 0], + tree_aux_roots: roots_from_height(Height(1), 2), }), ) .await @@ -755,9 +821,11 @@ async fn header_range_reads_include_non_finalized_best_chain_blocks() -> Result< chain = chain.push(block1.clone().prepare().test_with_zero_spent_utxos())?; chain = chain.push(block2.clone().prepare().test_with_zero_spent_utxos())?; + let chain = Arc::new(chain); + assert_eq!( headers_by_height_range( - Some(Arc::new(chain)), + Some(chain.clone()), &state_service.read_service.db, start, 2, @@ -767,6 +835,44 @@ async fn header_range_reads_include_non_finalized_best_chain_blocks() -> Result< (start.next().unwrap(), block2_hash, block2.header.clone()), ], ); + let roots = block_roots_by_height_range(Some(chain), &state_service.read_service.db, start, 2); + assert_eq!(roots.len(), 2); + assert_eq!(roots[0].height, start); + assert_eq!(roots[1].height, start.next().unwrap()); + let verified_tip = ((start - 1).unwrap(), block::Hash([0; 32])); + let best_header_tip = (start.next().unwrap(), block2_hash); + assert_eq!( + root_covered_best_header_tip( + None::>, + &state_service.read_service.db, + Some(best_header_tip), + Some(verified_tip), + ), + Some(verified_tip), + "rootless durable header tips are capped to the verified block tip" + ); + assert_eq!( + root_covered_best_header_tip( + Some(Arc::new( + Chain::new( + &network, + (start - 1).unwrap(), + Default::default(), + Default::default(), + Default::default(), + Default::default(), + ValueBalance::fake_populated_pool(), + ) + .push(block1.prepare().test_with_zero_spent_utxos())? + .push(block2.prepare().test_with_zero_spent_utxos())?, + )), + &state_service.read_service.db, + Some(best_header_tip), + Some(verified_tip), + ), + Some(best_header_tip), + "verified non-finalized roots allow the header tip to stay ahead" + ); assert_eq!( headers_by_height_range(None::>, &state_service.read_service.db, start, 2), Vec::new(), diff --git a/zebra-state/src/service/write.rs b/zebra-state/src/service/write.rs index 96070107433..c20c6a78913 100644 --- a/zebra-state/src/service/write.rs +++ b/zebra-state/src/service/write.rs @@ -3,8 +3,11 @@ use std::{ collections::VecDeque, path::{Path, PathBuf}, - sync::Arc, - time::Duration, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, }; use indexmap::IndexMap; @@ -16,12 +19,17 @@ use tokio::sync::{ use tracing::Span; use zebra_chain::block::{self, Height}; +use zebra_chain::parallel::{ + commitment_aux::BlockCommitmentRoots, + tree::{BlockNotePrecompute, NoteCommitmentTrees}, +}; + use crate::{ constants::MAX_BLOCK_REORG_HEIGHT, error::CommitHeaderRangeError, service::{ check, - finalized_state::{FinalizedState, ZebraDb}, + finalized_state::{spawn_note_precompute, FinalizedState, ZebraDb}, non_finalized_state::NonFinalizedState, queued_blocks::{QueuedCheckpointVerified, QueuedSemanticallyVerified}, ChainTipBlock, ChainTipSender, InvalidateError, ReconsiderError, @@ -36,6 +44,41 @@ use crate::service::{ non_finalized_state::Chain, }; +/// A speculatively-started note-commitment precompute for an upcoming finalized +/// block: the block hash it was started for, the channel to receive the result on, +/// and a flag to cancel it if the block is no longer going to be committed. +type PendingPrecompute = ( + block::Hash, + crossbeam_channel::Receiver, + Arc, +); + +/// Delay between retryable VCT root-miss commit attempts while the peer cache refills. +const VCT_ROOT_RETRY_WAIT: Duration = Duration::from_millis(500); + +/// Delay between retryable VCT await-successor commit attempts. Shorter than +/// [`VCT_ROOT_RETRY_WAIT`]: the root is already cached and only the next block needs to be +/// downloaded into the look-ahead, so a tighter poll keeps the one-block commit lag small. +const VCT_AWAIT_SUCCESSOR_WAIT: Duration = Duration::from_millis(20); + +/// How long a single checkpoint height may stay stuck on a retryable VCT root stall before +/// the committer escalates to an error-level log and a `state.vct.root.stalled.height` gauge. +/// Transient waits (a successor still downloading, a root still in flight) clear well within +/// this; staying stuck past it means no peer can serve a root the frozen frontier requires, +/// and — by design — the committer will not recompute against the stale frontier, so the node +/// cannot advance until a peer supplies it. Surfacing that loudly is the operator's only signal. +const VCT_ROOT_STALL_WARN_AFTER: Duration = Duration::from_secs(30); + +/// Cancels and drops a pending look-ahead precompute, if any. +/// +/// Tripping the flag tells the spawned task (started before the current block +/// committed) to stop instead of hashing a block that will not be committed. +fn cancel_pending_precompute(pending: &mut Option) { + if let Some((_hash, _rx, cancel)) = pending.take() { + cancel.store(true, Ordering::Relaxed); + } +} + /// The maximum size of the parent error map. /// /// We allow enough space for multiple concurrent chain forks with errors. @@ -126,11 +169,18 @@ fn commit_header_range( anchor: block::Hash, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, rsp_tx: oneshot::Sender>, ) { let mut batch = crate::service::finalized_state::DiskWriteBatch::new(); let result = batch - .prepare_header_range_batch(&finalized_state.db, anchor, &headers, &body_sizes) + .prepare_header_range_batch_with_roots( + &finalized_state.db, + anchor, + &headers, + &body_sizes, + &tree_aux_roots, + ) .and_then(|hash| { finalized_state .db @@ -183,6 +233,7 @@ pub enum NonFinalizedWriteMessage { anchor: block::Hash, headers: Vec>, body_sizes: Vec, + tree_aux_roots: Vec, rsp_tx: oneshot::Sender>, }, /// The hash of a block that should be invalidated and removed from @@ -317,9 +368,33 @@ impl WriteBlockWorkerTask { backup_dir_path, } = &mut self; - let mut prev_finalized_note_commitment_trees = None; + let mut prev_finalized_note_commitment_trees: Option = None; let mut deferred_non_finalized_messages = VecDeque::new(); + // One-block look-ahead so the next block's note-commitment tree hashing can + // be precomputed off the committer (on idle cores) while the current block + // commits. `pending_precompute` holds the receiver and cancellation flag for + // the block started last iteration; `finalized_lookahead` buffers the peeked + // next block. The precompute is keyed on the running tree sizes and only + // applied if those still match at commit time, so this never affects + // correctness, only speed. + // + // Because the next block's precompute is started before the current block + // commits, a current block that fails to commit (e.g. an invalid block from + // a peer) leaves that speculative work unwanted. Whenever this loop discards + // a pending precompute it trips the cancellation flag via + // [`cancel_pending_precompute`], so the spawned task stops instead of hashing + // a block that will never be committed. + let mut pending_precompute: Option = None; + let mut finalized_lookahead: VecDeque = VecDeque::new(); + let mut retry_finalized_block: Option = None; + + // Tracks how long the committer has been stuck retrying a single VCT root stall, so a + // genuine stall (no peer can serve a frozen-frontier height) escalates to a loud, + // observable signal while a transient wait stays quiet. `(height, first-seen)`. + let mut vct_root_stall: Option<(Height, Instant)> = None; + let mut vct_root_stall_logged = false; + // Write all the finalized blocks sent by the state, // until the state closes the finalized block channel's sender. loop { @@ -328,9 +403,17 @@ impl WriteBlockWorkerTask { anchor, headers, body_sizes, + tree_aux_roots, rsp_tx, }) => { - commit_header_range(finalized_state, anchor, headers, body_sizes, rsp_tx); + commit_header_range( + finalized_state, + anchor, + headers, + body_sizes, + tree_aux_roots, + rsp_tx, + ); continue; } Ok(msg) => deferred_non_finalized_messages.push_back(msg), @@ -338,13 +421,19 @@ impl WriteBlockWorkerTask { Err(TryRecvError::Disconnected) => {} } - let ordered_block = match finalized_block_write_receiver.try_recv() { - Ok(block) => block, - Err(TryRecvError::Empty) => { - std::thread::park_timeout(Duration::from_millis(10)); - continue; - } - Err(TryRecvError::Disconnected) => break, + let ordered_block = match retry_finalized_block + .take() + .or_else(|| finalized_lookahead.pop_front()) + { + Some(block) => block, + None => match finalized_block_write_receiver.try_recv() { + Ok(block) => block, + Err(TryRecvError::Empty) => { + std::thread::park_timeout(Duration::from_millis(10)); + continue; + } + Err(TryRecvError::Disconnected) => break, + }, }; // TODO: split these checks into separate functions @@ -375,22 +464,196 @@ impl WriteBlockWorkerTask { Assuming a parent block failed, and dropping this block", ); + // The pipeline is broken; cancel and drop any look-ahead so the next + // precompute re-seeds from the real tip (a stale precompute would + // only fall back anyway, but cancelling stops the wasted hashing). + cancel_pending_precompute(&mut pending_precompute); + finalized_lookahead.clear(); + finalized_state.clear_vct_prevalidated_next(); + // We don't want to send a reset here, because it could overwrite a valid sent hash std::mem::drop(ordered_block); continue; } - // Try committing the block - match finalized_state - .commit_finalized(ordered_block, prev_finalized_note_commitment_trees.take()) + // Peek the next block and start its precompute, so the heavy hashing + // overlaps this block's commit. Its start sizes are the current tree + // sizes plus this block's note counts (the sizes after this block). + if finalized_lookahead.is_empty() { + if let Ok(next) = finalized_block_write_receiver.try_recv() { + finalized_lookahead.push_back(next); + } + } + + // A non-handoff VCT fast block's supplied roots are authenticated by + // its successor's header. If the successor is not buffered yet, keep + // this block local and wait instead of surfacing a checkpoint commit + // error through the invalid-block reset path. + if finalized_lookahead.is_empty() + && finalized_state.vct_fast_needs_successor(ordered_block.0.height) { + tracing::trace!( + height = ?ordered_block.0.height, + hash = ?ordered_block.0.hash, + "VCT: deferring fast checkpoint commit until successor is buffered" + ); + retry_finalized_block = Some(ordered_block); + std::thread::park_timeout(Duration::from_millis(10)); + continue; + } + + // Use the precompute for this block if we started it last iteration and + // it is for this exact block; otherwise cancel it (so the spawned task + // stops) and let the committer hash inline. + let note_precompute = match pending_precompute.take() { + Some((hash, rx, _cancel)) if hash == ordered_block.0.hash => rx.recv().ok(), + Some((_hash, _rx, cancel)) => { + cancel.store(true, Ordering::Relaxed); + None + } + None => None, + }; + + // In verified-commitment-trees mode, the committer skips the + // note-commitment frontier entirely, so the off-thread precompute would + // just be discarded. Skip it only when the *next* block will actually + // take the vct path (its roots are already supplied). A legacy-fallback block + // (no peer roots yet, or never) still gets the precompute overlap. + let next_block_takes_vct_path = finalized_lookahead + .front() + .is_some_and(|next| finalized_state.vct_fast_will_apply(next.0.height)); + if !next_block_takes_vct_path { + if let (Some(trees), Some(next)) = ( + prev_finalized_note_commitment_trees.as_ref(), + finalized_lookahead.front(), + ) { + let block = &ordered_block.0.block; + let sapling_start = + trees.sapling.count() + block.sapling_note_commitments().count() as u64; + let orchard_start = + trees.orchard.count() + block.orchard_note_commitments().count() as u64; + let (rx, cancel) = + spawn_note_precompute(sapling_start, orchard_start, next.0.block.clone()); + pending_precompute = Some((next.0.hash, rx, cancel)); + } + } + + // The buffered successor (if any) lets the committer verify this block's + // verified-commitment-trees fixture roots before trusting them: a block's + // roots are only committed by the next block's header. Its auth data root + // is already precomputed by the checkpoint verifier. + let next_checkpoint = finalized_lookahead + .front() + .map(|next| (next.0.block.clone(), next.0.auth_data_root)); + let prev_note_commitment_trees = prev_finalized_note_commitment_trees.take(); + let prev_note_commitment_trees_for_retry = prev_note_commitment_trees.clone(); + + let next_block_took_vct_path = + finalized_state.vct_fast_will_apply(ordered_block.0.height); + + // Try committing the block + match finalized_state.commit_finalized( + ordered_block, + prev_note_commitment_trees, + note_precompute, + next_checkpoint, + ) { Ok((finalized, note_commitment_trees)) => { + // Whether this successful commit consumed header-carried + // tree-aux roots to skip the note-commitment frontier rebuild. + if next_block_took_vct_path { + metrics::counter!("state.vct.fast_path.hit").increment(1); + } else { + metrics::counter!("state.vct.fast_path.miss").increment(1); + } + + // A successful commit clears any VCT root stall: log recovery and reset + // the stalled-height gauge if it had been raised. + if vct_root_stall.is_some() { + if vct_root_stall_logged { + info!( + stalled_height = ?vct_root_stall.map(|(h, _)| h), + "VCT: checkpoint commit recovered; the stalled height now has a verifiable supplied root" + ); + metrics::gauge!("state.vct.root.stalled.height").set(0.0); + } + vct_root_stall = None; + vct_root_stall_logged = false; + } + let tip_block = ChainTipBlock::from(finalized); prev_finalized_note_commitment_trees = Some(note_commitment_trees); chain_tip_sender.set_finalized_tip(tip_block); } - Err(error) => { + Err((ordered_block, error)) => { + // Retryable VCT root stalls (an absent/evicted root, or one not yet + // verifiable for lack of a buffered successor) park-and-retry the same + // block in place rather than resetting the queue. An absent root waits + // for header sync to deliver it; an await-successor stall just waits for + // the next block to be downloaded into the look-ahead, so it polls faster. + if let Some(height) = error.vct_retryable_height() { + metrics::counter!("state.vct.root.retry.count").increment(1); + let needs_refetch = error.vct_supplied_root_unavailable_height(); + + // Escalate a stall that persists on the same height past the warn + // threshold: a transient wait resolves in a few polls and stays + // quiet, but a height stuck longer means no peer can serve a root the + // frozen frontier requires — the node will not advance (it will not, + // by design, recompute against the stale frontier). Surface it loudly. + match vct_root_stall { + Some((stuck, _)) if stuck == height => {} + _ => { + vct_root_stall = Some((height, Instant::now())); + vct_root_stall_logged = false; + } + } + if !vct_root_stall_logged + && vct_root_stall.is_some_and(|(_, since)| { + since.elapsed() >= VCT_ROOT_STALL_WARN_AFTER + }) + { + tracing::error!( + ?height, + awaiting_refetch = needs_refetch.is_some(), + stalled_for = ?VCT_ROOT_STALL_WARN_AFTER, + "VCT: checkpoint commit stalled with no verifiable supplied root; \ + the node cannot advance until a peer serves this height (it will \ + not recompute against the frozen frontier)" + ); + metrics::gauge!("state.vct.root.stalled.height") + .set(f64::from(height.0)); + vct_root_stall_logged = true; + } else { + tracing::warn!( + ?height, + block_height = ?ordered_block.0.height, + block_hash = ?ordered_block.0.hash, + awaiting_refetch = needs_refetch.is_some(), + "VCT: supplied root not yet verifiable; retrying checkpoint commit in place" + ); + } + + prev_finalized_note_commitment_trees = prev_note_commitment_trees_for_retry; + retry_finalized_block = Some(ordered_block); + cancel_pending_precompute(&mut pending_precompute); + std::thread::park_timeout(if needs_refetch.is_some() { + VCT_ROOT_RETRY_WAIT + } else { + VCT_AWAIT_SUCCESSOR_WAIT + }); + continue; + } + let finalized_tip = finalized_state.db.tip(); + let _ = ordered_block.1.send(Err(error.clone())); + + // The commit failed and the queue is being reset, so any + // look-ahead precompute is for a block that will not be + // committed: cancel it so the spawned task stops instead of + // hashing the discarded child, and clear the look-ahead. + cancel_pending_precompute(&mut pending_precompute); + finalized_lookahead.clear(); + finalized_state.clear_vct_prevalidated_next(); // The last block in the queue failed, so we can't commit the next block. // Instead, we need to reset the state queue, @@ -435,9 +698,17 @@ impl WriteBlockWorkerTask { anchor, headers, body_sizes, + tree_aux_roots, rsp_tx, } => { - commit_header_range(finalized_state, anchor, headers, body_sizes, rsp_tx); + commit_header_range( + finalized_state, + anchor, + headers, + body_sizes, + tree_aux_roots, + rsp_tx, + ); continue; } NonFinalizedWriteMessage::Invalidate { hash, rsp_tx } => { @@ -554,7 +825,7 @@ impl WriteBlockWorkerTask { tracing::trace!("finalizing block past the reorg limit"); let contextually_verified_with_trees = non_finalized_state.finalize(); prev_finalized_note_commitment_trees = finalized_state - .commit_finalized_direct(contextually_verified_with_trees, prev_finalized_note_commitment_trees.take(), "commit contextually-verified request") + .commit_finalized_direct(contextually_verified_with_trees, prev_finalized_note_commitment_trees.take(), None, None, "commit contextually-verified request") .expect( "unexpected finalized block commit error: note commitment and history trees were already checked by the non-finalized state", ).1.into(); diff --git a/zebra-state/src/tests/setup.rs b/zebra-state/src/tests/setup.rs index 7c4c4a0bd6c..90a43963547 100644 --- a/zebra-state/src/tests/setup.rs +++ b/zebra-state/src/tests/setup.rs @@ -113,7 +113,7 @@ pub(crate) fn new_state_with_mainnet_genesis( let genesis = CheckpointVerifiedBlock::from(genesis); finalized_state - .commit_finalized_direct(genesis.clone().into(), None, "test") + .commit_finalized_direct(genesis.clone().into(), None, None, None, "test") .expect("unexpected invalid genesis block test vector"); assert_eq!( diff --git a/zebra-utils/Cargo.toml b/zebra-utils/Cargo.toml index 5d6891c08ff..1f7147edd73 100644 --- a/zebra-utils/Cargo.toml +++ b/zebra-utils/Cargo.toml @@ -41,7 +41,8 @@ zebra-checkpoints = [ "itertools", "tokio", "zebra-chain/json-conversion", - "zebra-node-services/rpc-client" + "zebra-node-services/rpc-client", + "zebra-state", ] search-issue-refs = [ @@ -62,6 +63,7 @@ thiserror = { workspace = true } zebra-node-services = { path = "../zebra-node-services", version = "7.0.0" } zebra-chain = { path = "../zebra-chain", version = "9.0.0" } +zebra-state = { path = "../zebra-state", version = "8.0.0", optional = true } # These crates are needed for the block-template-to-proposal binary zebra-rpc = { path = "../zebra-rpc", version = "9.0.0" } diff --git a/zebra-utils/src/bin/zebra-checkpoints/args.rs b/zebra-utils/src/bin/zebra-checkpoints/args.rs index dfc0ee29401..b685786e05d 100644 --- a/zebra-utils/src/bin/zebra-checkpoints/args.rs +++ b/zebra-utils/src/bin/zebra-checkpoints/args.rs @@ -2,7 +2,7 @@ //! //! For usage please refer to the program help: `zebra-checkpoints --help` -use std::{net::SocketAddr, str::FromStr}; +use std::{net::SocketAddr, path::PathBuf, str::FromStr}; use structopt::StructOpt; use thiserror::Error; @@ -91,6 +91,37 @@ impl FromStr for Transport { #[error("Invalid transport: {0}")] pub struct InvalidTransportError(String); +/// The checkpoint height whose final frontier should be emitted. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum FrontierHeight { + /// Use the highest checkpoint height selected by this run. + Auto, + /// Use an explicit checkpoint height. + Explicit(Height), +} + +impl FromStr for FrontierHeight { + type Err = InvalidFrontierHeightError; + + fn from_str(string: &str) -> Result { + if string.eq_ignore_ascii_case("auto") { + return Ok(FrontierHeight::Auto); + } + + let height = string + .parse::() + .map(Height) + .map_err(|_| InvalidFrontierHeightError(string.to_owned()))?; + + Ok(FrontierHeight::Explicit(height)) + } +} + +/// An error indicating that the supplied string is not a valid frontier height. +#[derive(Clone, Debug, Error, PartialEq, Eq)] +#[error("Invalid frontier height: {0}")] +pub struct InvalidFrontierHeightError(String); + /// zebra-checkpoints arguments #[derive(Clone, Debug, Eq, PartialEq, StructOpt)] pub struct Args { @@ -117,8 +148,41 @@ pub struct Args { #[structopt(short, long)] pub last_checkpoint: Option, + /// Write Mainnet VCT final-frontier bytes to this path. + /// + /// This is an explicit side artifact for checkpoint maintenance; checkpoint lines are still + /// printed unchanged on stdout. + #[structopt(long)] + pub mainnet_frontier_output: Option, + + /// Zebra state cache directory used to read the final-frontier trees. + /// + /// Required when `--mainnet-frontier-output` is supplied. + #[structopt(long)] + pub state_cache_dir: Option, + + /// Frontier height to write, or `auto` to use the highest checkpoint emitted by this run. + #[structopt(long, default_value = "auto")] + pub frontier_height: FrontierHeight, + /// Passthrough args for `zcash-cli`. /// Only used if the transport is [`Cli`](Transport::Cli). #[structopt(last = true)] pub zcli_args: Vec, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn frontier_height_parses_auto_or_explicit_height() { + assert_eq!("auto".parse(), Ok(FrontierHeight::Auto)); + assert_eq!("AUTO".parse(), Ok(FrontierHeight::Auto)); + assert_eq!( + "12345".parse(), + Ok(FrontierHeight::Explicit(Height(12_345))) + ); + assert!("not-a-height".parse::().is_err()); + } +} diff --git a/zebra-utils/src/bin/zebra-checkpoints/main.rs b/zebra-utils/src/bin/zebra-checkpoints/main.rs index e4cd5a58f2d..dd694f78627 100644 --- a/zebra-utils/src/bin/zebra-checkpoints/main.rs +++ b/zebra-utils/src/bin/zebra-checkpoints/main.rs @@ -8,7 +8,7 @@ //! zebra-consensus accepts an ordered list of checkpoints, starting with the //! genesis block. Checkpoint heights can be chosen arbitrarily. -use std::{ffi::OsString, process::Stdio}; +use std::{ffi::OsString, path::Path, process::Stdio}; #[cfg(unix)] use std::os::unix::process::ExitStatusExt; @@ -23,6 +23,7 @@ use structopt::StructOpt; use zebra_chain::{ block::{self, Block, Height, HeightDiff, TryIntoHeight}, + parameters::Network, serialization::ZcashDeserializeInto, transparent::MIN_TRANSPARENT_COINBASE_MATURITY, }; @@ -34,7 +35,7 @@ use zebra_utils::init_tracing; pub mod args; -use args::{Args, Backend, Transport}; +use args::{Args, Backend, FrontierHeight, Transport}; /// Make an RPC call based on `our_args` and `rpc_command`, and return the response as a [`Value`]. async fn rpc_output(our_args: &Args, method: M, params: I) -> Result @@ -135,6 +136,47 @@ where Ok(response) } +/// Write the Mainnet VCT final-frontier artifact for `frontier_height`. +#[allow(clippy::print_stderr)] +fn write_mainnet_frontier( + frontier_output: &Path, + state_cache_dir: &Path, + frontier_height: Height, +) -> Result<()> { + let config = zebra_state::Config { + cache_dir: state_cache_dir.to_path_buf(), + ephemeral: false, + ..zebra_state::Config::default() + }; + + let (_read_state, db, _non_finalized_state_sender) = + zebra_state::init_read_only(config, &Network::Mainnet); + let bytes = zebra_state::produce_final_frontiers_bytes(&db, frontier_height)?; + zebra_state::validate_final_frontiers_bytes(&bytes, frontier_height)?; + std::fs::write(frontier_output, &bytes)?; + + eprintln!( + "Wrote Mainnet VCT final frontier for height {:?} to {}", + frontier_height, + frontier_output.display() + ); + + Ok(()) +} + +fn resolve_frontier_height( + selection: &FrontierHeight, + last_checkpoint_height: Height, +) -> Result { + match selection { + FrontierHeight::Auto if last_checkpoint_height == Height::MIN => Err(eyre!( + "--frontier-height auto requires at least one generated checkpoint above genesis" + )), + FrontierHeight::Auto => Ok(last_checkpoint_height), + FrontierHeight::Explicit(height) => Ok(*height), + } +} + /// Process entry point for `zebra-checkpoints` #[tokio::main] #[allow(clippy::print_stdout, clippy::print_stderr, clippy::unwrap_in_result)] @@ -281,5 +323,34 @@ async fn main() -> Result<()> { } } + if let Some(frontier_output) = &args.mainnet_frontier_output { + let state_cache_dir = args.state_cache_dir.as_deref().ok_or_else(|| { + eyre!("--state-cache-dir is required when --mainnet-frontier-output is supplied") + })?; + let frontier_height = + resolve_frontier_height(&args.frontier_height, last_checkpoint_height)?; + + write_mainnet_frontier(frontier_output, state_cache_dir, frontier_height) + .with_suggestion(|| "Hint: run this against a synced Mainnet Zebra state")?; + } + Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn frontier_height_auto_requires_a_generated_checkpoint() { + assert!(resolve_frontier_height(&FrontierHeight::Auto, Height::MIN).is_err()); + assert_eq!( + resolve_frontier_height(&FrontierHeight::Auto, Height(100)).unwrap(), + Height(100) + ); + assert_eq!( + resolve_frontier_height(&FrontierHeight::Explicit(Height(42)), Height::MIN).unwrap(), + Height(42) + ); + } +} diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 375507a855a..9abacb0cb02 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -543,6 +543,10 @@ impl StartCmd { let mut state_config = config.state.clone(); state_config.enable_zakura_header_seed_from_committed_blocks = config.network.v2_p2p; + // State owns the VCT commit path, but users configure its checkpoint-sync controls + // together under `[consensus]`. + state_config.checkpoint_sync = config.consensus.checkpoint_sync; + state_config.disable_vct_fast_sync = config.consensus.disable_vct_fast_sync; let (state_service, read_only_state_service, latest_chain_tip, chain_tip_change) = zebra_state::init( @@ -2049,6 +2053,7 @@ mod zakura_header_sync_driver_tests { use tower::{service_fn, util::BoxService, ServiceExt}; use zebra_chain::block; use zebra_chain::serialization::ZcashDeserializeInto; + use zebra_chain::{orchard, parallel::commitment_aux::BlockCommitmentRoots, sapling}; use zebra_network::zakura::testkit::{TraceCapture, TraceValue}; use zebra_network::zakura::{ commit_state_trace as cs_trace, BlockApplyResult, BlockSizeEstimate, BlockSyncAction, @@ -2060,22 +2065,32 @@ mod zakura_header_sync_driver_tests { use zebra_test::vectors::{BLOCK_MAINNET_1_BYTES, BLOCK_MAINNET_2_BYTES}; use super::zakura::{ - apply_block_sync_body, block_apply_class, block_sync_chain_tip_event, - block_sync_missing_body_window, block_sync_needed_blocks_from_state, - block_verify_error_is_duplicate, body_sizes_for_served_header_range, - chain_tip_mirror_frontier_change, coalesce_ready_needed_block_queries, - coalesce_stale_needed_block_queries, commit_block_sync_body, drive_block_sync_actions, - drive_zakura_header_sync_actions, header_range_commit_failure_kind, - notify_block_sync_header_tip, query_block_sync_frontiers, query_block_sync_needed_blocks, - verified_block_tip_from_state, BlockApplyClass, BlocksyncThroughputProbe, - ZakuraHeaderSyncDriverHandles, ZAKURA_BLOCK_SYNC_CHECKPOINT_FRONTIER_REFRESH_INTERVAL, - ZAKURA_BLOCK_SYNC_DRIVER_TIMEOUT, ZAKURA_BLOCK_SYNC_MISSING_BODY_WINDOW, + apply_block_sync_body, block_apply_class, block_roots_cover_range, + block_sync_chain_tip_event, block_sync_missing_body_window, + block_sync_needed_blocks_from_state, block_verify_error_is_duplicate, + body_sizes_for_served_header_range, chain_tip_mirror_frontier_change, + coalesce_ready_needed_block_queries, coalesce_stale_needed_block_queries, + commit_block_sync_body, drive_block_sync_actions, drive_zakura_header_sync_actions, + header_range_commit_failure_kind, notify_block_sync_header_tip, query_block_sync_frontiers, + query_block_sync_needed_blocks, root_covered_query_best_header_tip, + tree_aux_roots_for_served_header_range, verified_block_tip_from_state, BlockApplyClass, + BlocksyncThroughputProbe, ZakuraHeaderSyncDriverHandles, + ZAKURA_BLOCK_SYNC_CHECKPOINT_FRONTIER_REFRESH_INTERVAL, ZAKURA_BLOCK_SYNC_DRIVER_TIMEOUT, + ZAKURA_BLOCK_SYNC_MISSING_BODY_WINDOW, }; fn mainnet_block(bytes: &[u8]) -> Arc { Arc::new(bytes.zcash_deserialize_into().expect("block vector parses")) } + fn root_at(height: block::Height) -> BlockCommitmentRoots { + BlockCommitmentRoots { + height, + sapling_root: sapling::tree::NoteCommitmentTree::default().root(), + orchard_root: orchard::tree::NoteCommitmentTree::default().root(), + } + } + #[derive(Debug)] struct NoopZakuraService; @@ -2156,6 +2171,103 @@ mod zakura_header_sync_driver_tests { body_sizes_for_served_header_range(start, header_heights, &[]), vec![0, 0, 0, 0], ); + + assert_eq!( + body_sizes_for_served_header_range( + start, + [block::Height(9), block::Height(10)], + &body_size_hints, + ), + vec![0, 100], + ); + } + + #[test] + fn served_header_tree_aux_roots_require_complete_coverage() { + let start = block::Height(10); + let header_heights = [ + block::Height(10), + block::Height(11), + block::Height(12), + block::Height(13), + ]; + let roots = [root_at(block::Height(10)), root_at(block::Height(11))]; + + assert!( + tree_aux_roots_for_served_header_range(start, header_heights, &roots).is_err(), + "partial root coverage is reported before serving rootless headers" + ); + + let roots_with_gap = [ + root_at(block::Height(10)), + root_at(block::Height(12)), + root_at(block::Height(13)), + ]; + assert!( + tree_aux_roots_for_served_header_range(start, header_heights, &roots_with_gap).is_err(), + "root gaps are reported before serving rootless headers" + ); + + let complete_roots = [ + root_at(block::Height(10)), + root_at(block::Height(11)), + root_at(block::Height(12)), + root_at(block::Height(13)), + ]; + assert_eq!( + tree_aux_roots_for_served_header_range(start, header_heights, &complete_roots) + .expect("complete roots match the served header range"), + complete_roots.to_vec(), + "complete root coverage is attached to the served header range" + ); + } + + #[test] + fn startup_root_backfill_gate_requires_complete_root_coverage() { + let start = block::Height(10); + let complete_roots = [ + root_at(block::Height(10)), + root_at(block::Height(11)), + root_at(block::Height(12)), + ]; + assert!(block_roots_cover_range(start, 3, &complete_roots)); + assert!(!block_roots_cover_range(start, 3, &complete_roots[..2])); + + let roots_with_gap = [ + root_at(block::Height(10)), + root_at(block::Height(12)), + root_at(block::Height(13)), + ]; + assert!(!block_roots_cover_range(start, 3, &roots_with_gap)); + } + + #[tokio::test] + async fn query_best_header_tip_is_capped_when_roots_are_missing() { + let verified_tip = (block::Height(0), block::Hash([0; 32])); + let durable_header_tip = (block::Height(2), block::Hash([2; 32])); + let read_state = service_fn(move |request: zebra_state::ReadRequest| async move { + match request { + zebra_state::ReadRequest::Tip => Ok::<_, zebra_state::BoxError>( + zebra_state::ReadResponse::Tip(Some(verified_tip)), + ), + zebra_state::ReadRequest::BlockRoots { + start_height, + count, + } => { + assert_eq!(start_height, block::Height(1)); + assert_eq!(count, 2); + Ok(zebra_state::ReadResponse::BlockRoots(Vec::new())) + } + request => panic!("unexpected read request: {request:?}"), + } + }); + + assert_eq!( + root_covered_query_best_header_tip(read_state, durable_header_tip) + .await + .expect("capped query succeeds"), + verified_tip + ); } #[test] diff --git a/zebrad/src/commands/start/zakura/header_sync_driver.rs b/zebrad/src/commands/start/zakura/header_sync_driver.rs index 434b99ee15f..a1d9ca0bec7 100644 --- a/zebrad/src/commands/start/zakura/header_sync_driver.rs +++ b/zebrad/src/commands/start/zakura/header_sync_driver.rs @@ -8,6 +8,7 @@ use tracing::{debug, warn}; use zebra_chain::{ block::{self}, chain_tip::ChainTip, + parallel::commitment_aux::BlockCommitmentRoots, }; use zebra_network::zakura::{ commit_state_trace as cs_trace, BlockSyncFrontiers, Frontier, FrontierChange, HeaderSyncAction, @@ -48,6 +49,7 @@ pub(crate) async fn zakura_header_sync_driver_startup( }; let verified_block_tip = match read_state + .clone() .oneshot(zebra_state::ReadRequest::Tip) .await .map_err(|error| eyre!("{error}"))? @@ -60,17 +62,118 @@ pub(crate) async fn zakura_header_sync_driver_startup( let finalized_height = finalized_tip.map_or(block::Height(0), |(height, _)| height); let verified_block_tip = verified_block_tip_from_state(finalized_tip, verified_block_tip, empty_state_tip); + let best_header_tip = root_covered_best_header_tip_or_verified( + read_state, + best_header_tip.unwrap_or(empty_state_tip), + verified_block_tip, + ) + .await?; + Ok(ZakuraHeaderSyncDriverStartup { frontiers: HeaderSyncFrontiers { finalized_height, verified_block_tip: verified_block_tip.0, verified_block_hash: verified_block_tip.1, }, - best_header_tip: Some(best_header_tip.unwrap_or(empty_state_tip)), + best_header_tip: Some(best_header_tip), verified_block_tip_hash: verified_block_tip.1, }) } +async fn root_covered_best_header_tip_or_verified( + read_state: ReadState, + best_header_tip: (block::Height, block::Hash), + verified_block_tip: (block::Height, block::Hash), +) -> Result<(block::Height, block::Hash), Report> +where + ReadState: Service< + zebra_state::ReadRequest, + Response = zebra_state::ReadResponse, + Error = zebra_state::BoxError, + > + Send + + 'static, + ReadState::Future: Send + 'static, +{ + if best_header_tip.0 <= verified_block_tip.0 { + return Ok(best_header_tip); + } + + let Ok(start_height) = verified_block_tip.0.next() else { + return Ok(verified_block_tip); + }; + let best_header_height = best_header_tip.0; + let verified_block_height = verified_block_tip.0; + let count = best_header_height + .0 + .checked_sub(verified_block_height.0) + .ok_or_else(|| eyre!("best header tip is unexpectedly below verified block tip"))?; + let roots = match read_state + .oneshot(zebra_state::ReadRequest::BlockRoots { + start_height, + count, + }) + .await + .map_err(|error| eyre!("{error}"))? + { + zebra_state::ReadResponse::BlockRoots(roots) => roots, + response => Err(eyre!("unexpected BlockRoots response: {response:?}"))?, + }; + + if block_roots_cover_range(start_height, count, &roots) { + Ok(best_header_tip) + } else { + Ok(verified_block_tip) + } +} + +pub(crate) async fn root_covered_query_best_header_tip( + read_state: ReadState, + best_header_tip: (block::Height, block::Hash), +) -> Result<(block::Height, block::Hash), Report> +where + ReadState: Service< + zebra_state::ReadRequest, + Response = zebra_state::ReadResponse, + Error = zebra_state::BoxError, + > + Clone + + Send + + 'static, + ReadState::Future: Send + 'static, +{ + let verified_block_tip = match read_state + .clone() + .oneshot(zebra_state::ReadRequest::Tip) + .await + .map_err(|error| eyre!("{error}"))? + { + zebra_state::ReadResponse::Tip(Some(tip)) => tip, + zebra_state::ReadResponse::Tip(None) => return Ok(best_header_tip), + response => Err(eyre!("unexpected Tip response: {response:?}"))?, + }; + + root_covered_best_header_tip_or_verified(read_state, best_header_tip, verified_block_tip).await +} + +pub(crate) fn block_roots_cover_range( + start_height: block::Height, + count: u32, + roots: &[BlockCommitmentRoots], +) -> bool { + if roots.len() != usize::try_from(count).unwrap_or(usize::MAX) { + return false; + } + + roots.iter().enumerate().all(|(offset, roots)| { + let Ok(offset) = u32::try_from(offset) else { + return false; + }; + start_height + .0 + .checked_add(offset) + .is_some_and(|height| roots.height == block::Height(height)) + }) +} + #[derive(Clone)] pub(crate) struct ZakuraHeaderSyncDriverHandles { pub(crate) endpoint: ZakuraEndpoint, @@ -268,7 +371,12 @@ pub(crate) async fn drive_zakura_header_sync_actions { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, + start, + count, + want_tree_aux_roots, + } => { trace_state_read_start( &trace, "query_headers_by_height_range", @@ -346,9 +454,82 @@ pub(crate) async fn drive_zakura_header_sync_actions roots, + Ok(response) => { + trace_state_read_error( + &trace, + "block_roots", + Some(&peer), + start, + count, + "unexpected_response", + started, + ); + warn!(?peer, ?response, "unexpected BlockRoots response"); + Vec::new() + } + Err(error) => { + trace_state_read_error( + &trace, + "block_roots", + Some(&peer), + start, + count, + &format!("{error}"), + started, + ); + warn!( + ?peer, + ?error, + "failed to read Zakura BlockRoots response from state" + ); + Vec::new() + } + } + } else { + Vec::new() + }; + let header_heights: Vec<_> = + headers.iter().map(|(height, _, _)| *height).collect(); + let tree_aux_roots = if want_tree_aux_roots { + tree_aux_roots_for_served_header_range( + start, + header_heights.iter().copied(), + &block_roots, + ) + .unwrap_or_else(|error| { + debug!( + ?peer, + ?start, + requested_count = count, + ?error, + "serving header range without tree aux roots" + ); + + Vec::new() + }) + } else { + Vec::new() + }; let body_sizes = body_sizes_for_served_header_range( start, - headers.iter().map(|(height, _, _)| *height), + header_heights.iter().copied(), &body_size_hints, ); let headers = headers @@ -369,8 +550,10 @@ pub(crate) async fn drive_zakura_header_sync_actions { let count = u32::try_from(headers.len()).unwrap_or(u32::MAX); + let tree_aux_roots_len = u32::try_from(tree_aux_roots.len()).unwrap_or(u32::MAX); emit_commit_state( &trace, cs_trace::COMMIT_START, @@ -442,6 +627,11 @@ pub(crate) async fn drive_zakura_header_sync_actions { + Ok(zebra_state::ReadResponse::BestHeaderTip(Some(best_header_tip))) => { + let (tip_height, tip_hash) = match root_covered_query_best_header_tip( + read_state.clone(), + best_header_tip, + ) + .await + { + Ok(tip) => tip, + Err(error) => { + trace_state_read_error( + &trace, + "query_best_header_tip_roots", + None, + best_header_tip.0, + 1, + &format!("{error}"), + started, + ); + warn!( + ?error, + "failed to apply Zakura root coverage to best header tip" + ); + continue; + } + }; emit_commit_state( &trace, cs_trace::STATE_READ_SUCCESS, @@ -731,6 +952,10 @@ pub(crate) fn body_sizes_for_served_header_range( header_heights .into_iter() .map(|height| { + if height < start { + return 0; + } + let Some(offset) = usize::try_from(height - start).ok() else { return 0; }; @@ -745,6 +970,61 @@ pub(crate) fn body_sizes_for_served_header_range( .collect() } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum TreeAuxRootsForServedHeaderRangeError { + HeaderBeforeStart { + start: block::Height, + height: block::Height, + }, + OffsetOutOfRange { + start: block::Height, + height: block::Height, + }, + MissingRoot { + height: block::Height, + offset: usize, + }, + RootHeightMismatch { + expected_height: block::Height, + actual_height: block::Height, + offset: usize, + }, +} + +pub(crate) fn tree_aux_roots_for_served_header_range( + start: block::Height, + header_heights: impl IntoIterator, + block_roots: &[BlockCommitmentRoots], +) -> Result, TreeAuxRootsForServedHeaderRangeError> { + let mut roots = Vec::new(); + + for height in header_heights { + if height < start { + return Err(TreeAuxRootsForServedHeaderRangeError::HeaderBeforeStart { start, height }); + } + + let Some(offset) = usize::try_from(height - start).ok() else { + return Err(TreeAuxRootsForServedHeaderRangeError::OffsetOutOfRange { start, height }); + }; + + let Some(root) = block_roots.get(offset) else { + return Err(TreeAuxRootsForServedHeaderRangeError::MissingRoot { height, offset }); + }; + + if root.height != height { + return Err(TreeAuxRootsForServedHeaderRangeError::RootHeightMismatch { + expected_height: height, + actual_height: root.height, + offset, + }); + } + + roots.push(root.clone()); + } + + Ok(roots) +} + async fn log_missing_block_bodies( read_state: ReadState, from: block::Height, @@ -837,6 +1117,9 @@ pub(crate) fn header_range_commit_failure_kind( } zebra_state::CommitHeaderRangeError::EmptyRange | zebra_state::CommitHeaderRangeError::RangeTooLong { .. } + | zebra_state::CommitHeaderRangeError::BodySizeCountMismatch { .. } + | zebra_state::CommitHeaderRangeError::TreeAuxRootCountMismatch { .. } + | zebra_state::CommitHeaderRangeError::TreeAuxRootHeightMismatch { .. } | zebra_state::CommitHeaderRangeError::UnknownAnchor { .. } | zebra_state::CommitHeaderRangeError::HeightOverflow | zebra_state::CommitHeaderRangeError::ImmutableConflict { .. } @@ -1102,7 +1385,9 @@ fn trace_header_driver_action(trace: &ZakuraTrace, action: &HeaderSyncAction) { HeaderSyncAction::QueryBestHeaderTip => { insert_cs_str(row, cs_trace::ACTION, "query_best_header_tip"); } - HeaderSyncAction::QueryHeadersByHeightRange { peer, start, count } => { + HeaderSyncAction::QueryHeadersByHeightRange { + peer, start, count, .. + } => { insert_cs_str(row, cs_trace::ACTION, "query_headers_by_height_range"); insert_cs_peer(row, cs_trace::PEER, peer); insert_cs_height(row, cs_trace::RANGE_START, *start); diff --git a/zebrad/src/commands/start/zakura/mod.rs b/zebrad/src/commands/start/zakura/mod.rs index aba11f4ca51..bff8f791fd8 100644 --- a/zebrad/src/commands/start/zakura/mod.rs +++ b/zebrad/src/commands/start/zakura/mod.rs @@ -24,9 +24,10 @@ pub(crate) use block_sync_driver::{ pub(crate) use frontier::{query_block_sync_frontiers, verified_block_tip_from_state}; #[cfg(test)] pub(crate) use header_sync_driver::{ - block_sync_chain_tip_event, body_sizes_for_served_header_range, + block_roots_cover_range, block_sync_chain_tip_event, body_sizes_for_served_header_range, chain_tip_mirror_frontier_change, header_range_commit_failure_kind, - notify_block_sync_header_tip, + notify_block_sync_header_tip, root_covered_query_best_header_tip, + tree_aux_roots_for_served_header_range, }; pub(crate) use header_sync_driver::{ drive_zakura_header_sync_actions, mirror_zakura_full_block_commits, diff --git a/zebrad/src/components/inbound.rs b/zebrad/src/components/inbound.rs index 742cee12218..553dda61962 100644 --- a/zebrad/src/components/inbound.rs +++ b/zebrad/src/components/inbound.rs @@ -422,7 +422,12 @@ impl Service for Inbound { Ok(response) }.boxed() } - zn::Request::BlocksByHash(hashes) | zn::Request::BlocksByHashFrom { hashes, .. } => { + // `HedgedBlocksByHash` is an outbound-only routing directive (the peer set + // rewrites it before it reaches a peer), so peers never send it to us. Handle + // it identically as a defensive fallback. + zn::Request::BlocksByHash(hashes) + | zn::Request::BlocksByHashFrom { hashes, .. } + | zn::Request::HedgedBlocksByHash { hashes, .. } => { // We return an available or missing response to each inventory request, // unless the request is empty, or it reaches a response limit. if hashes.is_empty() { diff --git a/zebrad/src/components/sync.rs b/zebrad/src/components/sync.rs index 4ff476d17b6..10a2199e7dc 100644 --- a/zebrad/src/components/sync.rs +++ b/zebrad/src/components/sync.rs @@ -727,6 +727,13 @@ where /// backing off isn't dropped: every registry-missed required block stays scheduled. registry_miss_retry: HashMap, + /// Fanout for the head-of-line hedge: when a required block registry-misses, its + /// backoff retry is re-dispatched to this many random ready peers (ignoring inventory + /// markers) instead of a single peer, bypassing stale "missing" markers. `0` disables + /// hedging (plain single-peer retry). Read once at construction from the + /// `SYNC_HOL_HEDGE_FANOUT` env var; prototype-only A/B gate. + hol_hedge_fanout: usize, + /// Receiver that is `true` when the downloader is past the lookahead limit. /// This is based on the downloaded block height and the state tip height. past_lookahead_limit_receiver: zs::WatchReceiver, @@ -848,6 +855,7 @@ where verifier, latest_chain_tip.clone(), past_lookahead_limit_sender, + config.network.network.clone(), max( checkpoint_verify_concurrency_limit, full_verify_concurrency_limit, @@ -870,6 +878,10 @@ where missing_block_retry_counts: HashMap::new(), registry_miss_retry_counts: HashMap::new(), registry_miss_retry: HashMap::new(), + hol_hedge_fanout: std::env::var("SYNC_HOL_HEDGE_FANOUT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(0), past_lookahead_limit_receiver, misbehavior_sender, }; @@ -1259,7 +1271,19 @@ where for hash in due { self.registry_miss_retry.remove(&hash); - match self.downloads.download_and_verify(hash).await { + // Re-dispatch the head-of-line block. When hedging is enabled, fan the + // retry out to several random ready peers (ignoring stale inventory + // markers) and take the first delivery; otherwise fall back to the + // single-peer download. + let dispatch = if self.hol_hedge_fanout > 0 { + self.downloads + .download_and_verify_hedged(hash, self.hol_hedge_fanout) + .await + } else { + self.downloads.download_and_verify(hash).await + }; + + match dispatch { Ok(()) | Err(BlockDownloadVerifyError::DuplicateBlockQueuedForDownload { .. diff --git a/zebrad/src/components/sync/downloads.rs b/zebrad/src/components/sync/downloads.rs index 27135e3d510..d873c9ce46a 100644 --- a/zebrad/src/components/sync/downloads.rs +++ b/zebrad/src/components/sync/downloads.rs @@ -26,6 +26,7 @@ use tracing_futures::Instrument; use zebra_chain::{ block::{self, Height, HeightDiff}, chain_tip::ChainTip, + parameters::Network, }; use zebra_network::{self as zn, PeerSocketAddr}; use zebra_state as zs; @@ -254,6 +255,9 @@ where // Configuration // + /// The configured Zcash network. + chain_network: Network, + /// The configured lookahead limit, after applying the minimum limit. lookahead_limit: usize, @@ -356,6 +360,7 @@ where verifier: ZV, latest_chain_tip: ZSTip, past_lookahead_limit_sender: watch::Sender, + chain_network: Network, lookahead_limit: usize, max_checkpoint_height: Height, ) -> Self { @@ -366,6 +371,7 @@ where network, verifier, latest_chain_tip, + chain_network, lookahead_limit, max_checkpoint_height, past_lookahead_limit_sender: Arc::new(std::sync::Mutex::new( @@ -377,15 +383,48 @@ where } } - /// Queue a block for download and verification. + /// Queue a block for download and verification, routed to a single peer via the + /// normal inventory-aware path. /// /// This method waits for the network to become ready, and returns an error /// only if the network service fails. It returns immediately after queuing /// the request. - #[instrument(level = "debug", skip(self), fields(%hash))] pub async fn download_and_verify( &mut self, hash: block::Hash, + ) -> Result<(), BlockDownloadVerifyError> { + let request = zn::Request::BlocksByHash(std::iter::once(hash).collect()); + self.queue_download(hash, request).await + } + + /// Queue the head-of-line block for a *hedged* download: the peer set fans the + /// request out to up to `fanout` random ready peers (ignoring inventory markers) + /// and resolves with the first peer that delivers the block. + /// + /// Used only for the registry-miss retry, to bypass stale "missing" markers that + /// stall ordered commit. Otherwise identical to [`Self::download_and_verify`]. + pub async fn download_and_verify_hedged( + &mut self, + hash: block::Hash, + fanout: usize, + ) -> Result<(), BlockDownloadVerifyError> { + let request = zn::Request::HedgedBlocksByHash { + hashes: std::iter::once(hash).collect(), + fanout, + }; + self.queue_download(hash, request).await + } + + /// Queue a block for download and verification using the given network `request`. + /// + /// The `request` must resolve to a [`zn::Response::Blocks`] for `hash`. It returns + /// an error only if the network service fails, and returns immediately after + /// queuing the request. + #[instrument(level = "debug", skip(self, request), fields(%hash))] + async fn queue_download( + &mut self, + hash: block::Hash, + request: zn::Request, ) -> Result<(), BlockDownloadVerifyError> { if self.cancel_handles.contains_key(&hash) { metrics::counter!("sync.already.queued.dropped.block.hash.count").increment(1); @@ -404,7 +443,7 @@ where .ready() .await .map_err(|error| BlockDownloadVerifyError::NetworkServiceError { error })? - .call(zn::Request::BlocksByHash(std::iter::once(hash).collect())); + .call(request); // This oneshot is used to signal cancellation to the download task. let (cancel_tx, mut cancel_rx) = oneshot::channel::<()>(); @@ -414,6 +453,7 @@ where let lookahead_limit = self.lookahead_limit; let max_checkpoint_height = self.max_checkpoint_height; + let chain_network = self.chain_network.clone(); let past_lookahead_limit_sender = self.past_lookahead_limit_sender.clone(); let past_lookahead_limit_receiver = self.past_lookahead_limit_receiver.clone(); @@ -598,6 +638,20 @@ where Err(BlockDownloadVerifyError::BehindTipHeightLimit { height: block_height, hash })?; } + let request = zebra_consensus::Request::create_commit_request( + block, + block_height, + max_checkpoint_height, + chain_network, + ) + .await + .map_err(|error| BlockDownloadVerifyError::Invalid { + error: error.into(), + height: block_height, + hash, + advertiser_addr, + })?; + // Wait for the verifier service to be ready. let readiness = verifier.ready(); // Prefer the cancel handle if both are ready. @@ -615,7 +669,7 @@ where let verify_start = std::time::Instant::now(); let mut rsp = verifier .map_err(|error| BlockDownloadVerifyError::VerifierServiceError { error })? - .call(zebra_consensus::Request::Commit(block)).boxed(); + .call(request).boxed(); // Add a shorter timeout to workaround a known bug (#5125) let short_timeout_max = (max_checkpoint_height + FINAL_CHECKPOINT_BLOCK_VERIFY_TIMEOUT_LIMIT).expect("checkpoint block height is in valid range"); diff --git a/zebrad/src/components/sync/tests/vectors.rs b/zebrad/src/components/sync/tests/vectors.rs index 62b9d0804cd..e69c8396c1f 100644 --- a/zebrad/src/components/sync/tests/vectors.rs +++ b/zebrad/src/components/sync/tests/vectors.rs @@ -19,6 +19,7 @@ use tower::timeout::Timeout; use zebra_chain::{ block::{self, Block, Height}, chain_tip::mock::{MockChainTip, MockChainTipSender}, + parameters::Network, serialization::ZcashDeserializeInto, }; use zebra_consensus::{ @@ -1951,6 +1952,7 @@ async fn empty_block_response_is_retryable_download_failure() { verifier, chain_tip, past_lookahead_limit_sender, + Network::Mainnet, sync::MIN_CONCURRENCY_LIMIT, Height(0), ); diff --git a/zebrad/tests/common/cached_state.rs b/zebrad/tests/common/cached_state.rs index 572aa8caa51..7c3b3278474 100644 --- a/zebrad/tests/common/cached_state.rs +++ b/zebrad/tests/common/cached_state.rs @@ -146,7 +146,9 @@ pub async fn start_state_service_with_cache_dir( }; // These tests don't need UTXOs to be verified efficiently, because they use cached states. - Ok(zebra_state::init(config, network, Height::MAX, 0).await) + let (state, read_state, latest_chain_tip, chain_tip_change) = + zebra_state::init(config, network, Height::MAX, 0).await; + Ok((state, read_state, latest_chain_tip, chain_tip_change)) } /// Loads the finalized tip height from the state stored in a specified directory. diff --git a/zebrad/tests/common/checkpoints.rs b/zebrad/tests/common/checkpoints.rs index f21603901cd..b0a9821c90e 100644 --- a/zebrad/tests/common/checkpoints.rs +++ b/zebrad/tests/common/checkpoints.rs @@ -160,6 +160,7 @@ pub async fn run(network: Network) -> Result<()> { test_type, zebra_rpc_address, last_checkpoint, + &zebrad_state_path, )?; let show_zebrad_logs = env::var(LOG_ZEBRAD_CHECKPOINTS).is_ok(); @@ -208,13 +209,19 @@ pub fn spawn_zebra_checkpoints_direct( test_type: TestType, zebrad_rpc_address: SocketAddr, last_checkpoint: &str, + zebrad_state_path: &Path, ) -> Result> { let zebrad_rpc_address = zebrad_rpc_address.to_string(); - let arguments = args![ + let mut arguments = args![ "--addr": zebrad_rpc_address, "--last-checkpoint": last_checkpoint, ]; + if network == Network::Mainnet { + arguments.set_parameter("--mainnet-frontier-output", "/tmp/mainnet-frontier.bin"); + arguments.set_parameter("--state-cache-dir", zebrad_state_path.display().to_string()); + arguments.set_parameter("--frontier-height", "auto"); + } // TODO: add logs for different kinds of zebra_checkpoints failures let zebra_checkpoints_failure_messages = PROCESS_FAILURE_MESSAGES diff --git a/zebrad/tests/common/configs/v5.0.0-rc.3.toml b/zebrad/tests/common/configs/v5.0.0-rc.3.toml index e30e93374c7..64cd60a24f1 100644 --- a/zebrad/tests/common/configs/v5.0.0-rc.3.toml +++ b/zebrad/tests/common/configs/v5.0.0-rc.3.toml @@ -42,6 +42,7 @@ [consensus] checkpoint_sync = true +disable_vct_fast_sync = false [health] enforce_on_test_networks = false