diff --git a/crates/pecos-hugr-qis/src/lib.rs b/crates/pecos-hugr-qis/src/lib.rs index b75edf4fa..c051153b4 100644 --- a/crates/pecos-hugr-qis/src/lib.rs +++ b/crates/pecos-hugr-qis/src/lib.rs @@ -62,6 +62,7 @@ The compiler supports standard LLVM optimization levels: pub mod array; pub mod compiler; pub mod prelude; +pub mod result_tags; mod utils; // Re-export main types and functions @@ -74,6 +75,8 @@ pub use compiler::{ // Re-export read_hugr_envelope from utils pub use utils::read_hugr_envelope; +pub use result_tags::{extract_result_tag_measurements, measurement_op_count}; + // Re-export inkwell's OptimizationLevel for convenience pub use tket::hugr::llvm::inkwell::OptimizationLevel; diff --git a/crates/pecos-hugr-qis/src/result_tags.rs b/crates/pecos-hugr-qis/src/result_tags.rs new file mode 100644 index 000000000..e0c7466e5 --- /dev/null +++ b/crates/pecos-hugr-qis/src/result_tags.rs @@ -0,0 +1,220 @@ +//! Extract the Guppy `result(tag, ...)` -> measurement binding from a HUGR. +//! +//! This is the *sound* source of the tag<->measurement association: in the +//! compiled HUGR, a `tket.result` op's dataflow input is wired (transitively) +//! from the measurement op(s) that produced its value. That wiring is fixed at +//! compile time and is immune to any later QIS/Selene measurement reordering, +//! unlike a runtime op-stream heuristic. +//! +//! Measurement identity here is the *ordinal* of the measurement op in HUGR +//! traversal order. This module only recovers the structural binding; whether +//! that HUGR ordinal coincides with the QIS-trace `result_id`/`MeasId` order +//! is a separate property of the Guppy -> HUGR / Guppy -> trace pipelines +//! agreeing on measurement ordering. Within the narrow scope this module +//! supports (straight-line `result_bool <- tket.bool:read <- +//! Measure/MeasureFree`), that correspondence is **committed-test verified** +//! end-to-end by +//! `tests/qec/test_from_guppy_result_tags.py::test_result_tags_match_positional_records` +//! (a scrambled-`result()`-order Guppy program: `result_tags` DEM +//! byte-identical to the positional-records DEM). Outside that scope +//! (computed / constant / array-valued `result()`, runtime loops) the +//! correspondence is undefined and the extractor / runtime-loop guard reject +//! the case rather than relying on it. +//! +//! Note: a *runtime* loop (e.g. `for _ in range(comptime(n))`, as the surface +//! code uses for rounds) is NOT unrolled in the HUGR -- it has one static +//! measure/result op executed n times. Static extraction therefore yields +//! `tag -> static-measure-op`; expanding that to per-iteration runtime `MeasIds` +//! requires a separate static-op -> runtime-measurement correspondence. + +use std::collections::{BTreeMap, HashMap}; + +use tket::hugr::ops::OpType; +use tket::hugr::types::Term; +use tket::hugr::{HugrView, IncomingPort, Node}; + +fn extension_ids(op: &OpType) -> Option<(&str, String)> { + let ext = op.as_extension_op()?; + Some(( + ext.extension_id().as_ref(), + ext.unqualified_id().to_string(), + )) +} + +fn is_measurement(op: &OpType) -> bool { + matches!( + extension_ids(op), + Some((ext, ref name)) + if ext == "tket.quantum" && (name == "Measure" || name == "MeasureFree") + ) +} + +/// Number of *static* measurement ops in the HUGR. +/// +/// For a straight-line program this equals the runtime measurement count; for +/// a program with a runtime loop it is strictly smaller (the loop body's +/// measure op is counted once). Callers use the mismatch to detect that +/// per-occurrence tag binding is not statically available. +#[must_use] +pub fn measurement_op_count>(hugr: &H) -> usize { + hugr.nodes() + .filter(|&n| is_measurement(hugr.get_optype(n))) + .count() +} + +/// Map each `result(tag, )` to the measurement ordinal it records. +/// +/// **Sound by construction, narrow by design.** Only the canonical pattern +/// `result(tag, )` is recognized: a +/// `tket.result:result_bool` op whose value input is *exactly* +/// `tket.bool:read` of a measurement op. The compiled chain is verified to be +/// precisely `result_bool <- tket.bool:read <- Measure/MeasureFree`. +/// +/// Any other shape is **deliberately excluded** (the tag is omitted from the +/// returned map) rather than guessed at -- e.g. computed values +/// (`result("x", m0 == m1)` lowers through `tket.bool:eq`), constants +/// (`result("x", True)` lowers through a `Const`), and array-valued +/// `result(...)` (`result_array_bool` lowers through `collections.borrow_arr` +/// machinery that does not cleanly expose per-element measurement provenance). +/// Resolving those structurally would silently misbind (equality is not +/// parity; an empty record set is not a detector), so they are not returned. +/// +/// A tag repeated across the program accumulates its ordinals in traversal +/// order; callers handle occurrence disambiguation / loop guarding. +#[must_use] +pub fn extract_result_tag_measurements>( + hugr: &H, +) -> BTreeMap> { + // Pass 1: ordinal for every measurement op, in traversal order. + let mut meas_ordinal: HashMap = HashMap::new(); + for node in hugr.nodes() { + if is_measurement(hugr.get_optype(node)) { + let next = meas_ordinal.len(); + meas_ordinal.insert(node, next); + } + } + + // single_linked_output source op, if any. + let src_op = |node: Node, port: usize| -> Option { + hugr.single_linked_output(node, IncomingPort::from(port)) + .map(|(s, _)| s) + }; + + // Pass 2: accept only result_bool <- tket.bool:read <- measurement. + let mut out: BTreeMap> = BTreeMap::new(); + for node in hugr.nodes() { + let op = hugr.get_optype(node); + let Some((ext, name)) = extension_ids(op) else { + continue; + }; + if ext != "tket.result" || name != "result_bool" { + continue; // arrays / non-bool result ops: not soundly resolvable + } + let Some(ext_op) = op.as_extension_op() else { + continue; + }; + let Some(tag) = ext_op.args().iter().find_map(|a| match a { + Term::String(s) => Some(s.clone()), + _ => None, + }) else { + continue; + }; + + // result_bool value input (port 0) must be exactly `tket.bool:read`. + let Some(read) = src_op(node, 0) else { + continue; + }; + match extension_ids(hugr.get_optype(read)) { + Some((e, ref n)) if e == "tket.bool" && n == "read" => {} + _ => continue, // e.g. tket.bool:eq (computed) -> exclude + } + // ... whose input (port 0) must be a measurement op. + let Some(meas) = src_op(read, 0) else { + continue; + }; + let Some(&ord) = meas_ordinal.get(&meas) else { + continue; // e.g. a Const -> exclude + }; + out.entry(tag).or_default().push(ord); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::read_hugr_envelope; + + // Fixtures compiled from Guppy (committed so the regression does not + // depend on a Python toolchain at test time): + // scrambled: result() declared c,a,b over measures a,b,c (raw scalars) + // looped: for _ in range(comptime(3)): result("synx", measure(q)) + // computed: result("eq", m0==m1) ; result("const", True) + // arr: result("pair", measure_array(qs)) (array-valued) + const SCRAMBLED: &[u8] = include_bytes!("../tests/fixtures/scrambled.hugr"); + const LOOPED: &[u8] = include_bytes!("../tests/fixtures/looped.hugr"); + const COMPUTED: &[u8] = include_bytes!("../tests/fixtures/computed.hugr"); + const ARR: &[u8] = include_bytes!("../tests/fixtures/arr.hugr"); + + /// Foundation: `result()` declared in scrambled order (c, a, b) over + /// measurements made in order (a, b, c) must still bind each tag to ITS + /// OWN measurement. This is the exact case the prior runtime read/store + /// heuristic got wrong (it produced `{tag_c: [0,1,2]}`); the HUGR + /// structural binding is immune to declaration/measurement-order skew. + #[test] + fn scrambled_binds_each_tag_to_its_measurement() { + let hugr = read_hugr_envelope(SCRAMBLED).unwrap(); + let map = extract_result_tag_measurements(&hugr); + assert_eq!( + map, + BTreeMap::from([ + ("tag_a".to_string(), vec![0]), + ("tag_b".to_string(), vec![1]), + ("tag_c".to_string(), vec![2]), + ]), + "tag must bind to its own measurement regardless of result() order", + ); + } + + /// Documents the known limitation: a runtime `for _ in range(comptime(n))` + /// loop is NOT unrolled in the HUGR, so a tag emitted once per iteration + /// has a single static measure op. Per-iteration expansion needs a + /// separate static-op -> runtime-measurement correspondence. + #[test] + fn looped_tag_is_single_static_measure_op() { + let hugr = read_hugr_envelope(LOOPED).unwrap(); + let map = extract_result_tag_measurements(&hugr); + assert_eq!( + map.get("synx").map(Vec::as_slice), + Some([0].as_slice()), + "runtime loop is not unrolled in HUGR: one static measure op", + ); + } + + /// Soundness: a computed `result("eq", m0 == m1)` (lowers through + /// `tket.bool:eq`) and a constant `result("const", True)` (lowers through + /// a `Const`) must NOT be returned -- resolving them would silently + /// misbind (equality is not parity; no measurement at all). + #[test] + fn computed_and_constant_tags_are_excluded() { + let hugr = read_hugr_envelope(COMPUTED).unwrap(); + let map = extract_result_tag_measurements(&hugr); + assert!( + !map.contains_key("eq") && !map.contains_key("const"), + "computed/constant tags must be excluded, got {map:?}", + ); + } + + /// Soundness: an array-valued `result("pair", measure_array(qs))` lowers + /// through `collections.borrow_arr` machinery with no clean per-element + /// measurement provenance, so it must NOT be returned. + #[test] + fn array_valued_tag_is_excluded() { + let hugr = read_hugr_envelope(ARR).unwrap(); + let map = extract_result_tag_measurements(&hugr); + assert!( + !map.contains_key("pair"), + "array-valued result tag must be excluded, got {map:?}", + ); + } +} diff --git a/crates/pecos-hugr-qis/tests/fixtures/arr.hugr b/crates/pecos-hugr-qis/tests/fixtures/arr.hugr new file mode 100644 index 000000000..5f9252f7f Binary files /dev/null and b/crates/pecos-hugr-qis/tests/fixtures/arr.hugr differ diff --git a/crates/pecos-hugr-qis/tests/fixtures/computed.hugr b/crates/pecos-hugr-qis/tests/fixtures/computed.hugr new file mode 100644 index 000000000..2f62840b3 Binary files /dev/null and b/crates/pecos-hugr-qis/tests/fixtures/computed.hugr differ diff --git a/crates/pecos-hugr-qis/tests/fixtures/looped.hugr b/crates/pecos-hugr-qis/tests/fixtures/looped.hugr new file mode 100644 index 000000000..4d8d74e28 Binary files /dev/null and b/crates/pecos-hugr-qis/tests/fixtures/looped.hugr differ diff --git a/crates/pecos-hugr-qis/tests/fixtures/scrambled.hugr b/crates/pecos-hugr-qis/tests/fixtures/scrambled.hugr new file mode 100644 index 000000000..7e3ffc149 Binary files /dev/null and b/crates/pecos-hugr-qis/tests/fixtures/scrambled.hugr differ diff --git a/crates/pecos-qec/src/fault_tolerance/dem_builder.rs b/crates/pecos-qec/src/fault_tolerance/dem_builder.rs index 59d79fd0a..a6af42108 100644 --- a/crates/pecos-qec/src/fault_tolerance/dem_builder.rs +++ b/crates/pecos-qec/src/fault_tolerance/dem_builder.rs @@ -83,7 +83,7 @@ mod mem_builder; pub(crate) mod sampler; mod types; -pub use builder::{DemBuilder, DemBuilderError}; +pub use builder::{DemBuilder, DemBuilderError, resolve_result_tags}; pub use dem_sampler::{SamplingEngine, SamplingStatistics}; pub use equivalence::{ ComparisonDetails, ComparisonMethod, DemParseError, EffectKey, EquivalenceResult, diff --git a/crates/pecos-qec/src/fault_tolerance/dem_builder/builder.rs b/crates/pecos-qec/src/fault_tolerance/dem_builder/builder.rs index ae792f1b1..b6ce524d5 100644 --- a/crates/pecos-qec/src/fault_tolerance/dem_builder/builder.rs +++ b/crates/pecos-qec/src/fault_tolerance/dem_builder/builder.rs @@ -35,6 +35,7 @@ struct ParsedDetector { id: u32, coords: Option<[f64; 3]>, records: Vec, + meas_ids: Vec, } /// Parsed observable from JSON metadata. @@ -42,6 +43,7 @@ struct ParsedDetector { struct ParsedObservable { id: u32, records: Vec, + meas_ids: Vec, } // ============================================================================ @@ -129,6 +131,11 @@ impl<'a> DemBuilder<'a> { /// /// One-liner for the common case. Reads detector/DEM output definitions /// from circuit metadata. + /// + /// # Panics + /// + /// Panics if the circuit's detector/observable metadata is malformed (use + /// [`Self::try_from_circuit`] to handle that as an error instead). #[must_use] pub fn from_circuit( circuit: &pecos_quantum::DagCircuit, @@ -137,12 +144,36 @@ impl<'a> DemBuilder<'a> { p_meas: f64, p_prep: f64, ) -> DetectorErrorModel { + Self::try_from_circuit(circuit, p1, p2, p_meas, p_prep) + .unwrap_or_else(|err| panic!("invalid DEM metadata on circuit: {err}")) + } + + /// Try to build a `DetectorErrorModel` directly from a `DagCircuit` and noise. + /// + /// Reads detector/DEM output definitions from circuit metadata and returns + /// parser errors instead of dropping malformed metadata. + /// + /// # Errors + /// + /// Returns an error if detector or observable metadata is malformed. + pub fn try_from_circuit( + circuit: &pecos_quantum::DagCircuit, + p1: f64, + p2: f64, + p_meas: f64, + p_prep: f64, + ) -> Result { build_dem_from_circuit(circuit, p1, p2, p_meas, p_prep) } /// Build a `DetectorErrorModel` from a `TickCircuit` and noise. /// /// Converts to `DagCircuit` internally. + /// + /// # Panics + /// + /// Panics if the circuit's detector/observable metadata is malformed (use + /// [`Self::try_from_tick_circuit`] to handle that as an error instead). #[must_use] pub fn from_tick_circuit( circuit: &pecos_quantum::TickCircuit, @@ -151,6 +182,25 @@ impl<'a> DemBuilder<'a> { p_meas: f64, p_prep: f64, ) -> DetectorErrorModel { + Self::try_from_tick_circuit(circuit, p1, p2, p_meas, p_prep) + .unwrap_or_else(|err| panic!("invalid DEM metadata on circuit: {err}")) + } + + /// Try to build a `DetectorErrorModel` from a `TickCircuit` and noise. + /// + /// Converts to `DagCircuit` internally and returns parser errors instead + /// of dropping malformed metadata. + /// + /// # Errors + /// + /// Returns an error if detector or observable metadata is malformed. + pub fn try_from_tick_circuit( + circuit: &pecos_quantum::TickCircuit, + p1: f64, + p2: f64, + p_meas: f64, + p_prep: f64, + ) -> Result { let dag = pecos_quantum::DagCircuit::from(circuit); build_dem_from_circuit(&dag, p1, p2, p_meas, p_prep) } @@ -379,17 +429,186 @@ impl<'a> DemBuilder<'a> { #[allow(clippy::cast_possible_truncation)] // observable count fits in u32 id: id as u32, records, + meas_ids: Vec::new(), }) .collect(); self } + /// Resolves a JSON `meas_id` to a circuit measurement-record index. + /// + /// When the circuit carries stable `MeasId`s (the traced + /// `from_guppy`/`from_circuit` path), `meas_id` is interpreted as that + /// **stable stamped id** and looked up in `influence_map.meas_ids` -- so a + /// non-sequential traced id (e.g. the QIS result slot) resolves correctly + /// regardless of compilation reordering. When no stable ids are present + /// (the decoupled/raw builder with an empty influence map), `meas_id` is a + /// positional measurement index (the legacy escape hatch). Returns the + /// `0..num_measurements` record index, or `None` if the id is absent. + fn resolve_meas_id_to_tc_index(&self, meas_id: usize) -> Option { + if self.influence_map.meas_ids.is_empty() { + return (meas_id < self.num_measurements).then_some(meas_id); + } + self.influence_map + .meas_ids + .iter() + .position(|mid| mid.0 == meas_id) + } + + fn meas_id_to_record_offset(&self, meas_id: usize) -> Option { + let index = self.resolve_meas_id_to_tc_index(meas_id)?; + let measurement = i64::try_from(index).ok()?; + let total = i64::try_from(self.num_measurements).ok()?; + i32::try_from(measurement - total).ok() + } + + /// Fail loud if any detector/observable references a measurement that does + /// not exist, instead of silently dropping it and weakening the DEM. + /// + /// `records` and `meas_ids` are alternative ways to name the *same* + /// measurements (the parser allows neither both-empty). Each used + /// reference must resolve in range. When an entry carries **both**, they + /// must be redundant -- `meas_ids` must resolve to exactly the `records` + /// set -- otherwise the DEM the builder produces (which consumes + /// `records`) would silently differ from what `meas_ids` asked for. The + /// surface `logical_circuit` path emits both redundantly; a non-redundant + /// pair is a caller error and fails loud here. + /// + /// # Errors + /// Returns [`DemBuilderError::ParseError`] if a used record offset is out + /// of range, a used `meas_id` is absent, or a both-present entry's + /// `records` and `meas_ids` disagree. + fn validate_metadata_refs(&self) -> Result<(), DemBuilderError> { + let check = |kind: &str, id: u32, records: &[i32], meas_ids: &[usize]| { + for &rec in records { + if record_offset_to_absolute_index(self.num_measurements, rec).is_none() { + return Err(DemBuilderError::ParseError(format!( + "{kind} {id} references record offset {rec}, which \ + is out of range for a circuit with {} \ + measurement(s)", + self.num_measurements + ))); + } + } + let mut resolved_offsets = Vec::with_capacity(meas_ids.len()); + for &mid in meas_ids { + let offset = self.meas_id_to_record_offset(mid).ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} {id} references meas_id {mid}, which is not \ + present in the circuit's {} measurement(s)", + self.num_measurements + )) + })?; + resolved_offsets.push(offset); + } + if !records.is_empty() && !meas_ids.is_empty() { + let mut a = records.to_vec(); + let mut b = resolved_offsets; + a.sort_unstable(); + b.sort_unstable(); + if a != b { + return Err(DemBuilderError::ParseError(format!( + "{kind} {id} has both 'records' and 'meas_ids' but \ + they reference different measurements (records map \ + to offsets {a:?}, meas_ids resolve to {b:?}); they \ + are alternatives, not additive -- the builder would \ + consume only 'records' and silently drop the rest" + ))); + } + } + Ok(()) + }; + for d in &self.detectors { + check("Detector", d.id, &d.records, &d.meas_ids)?; + } + for o in &self.observables { + check("Observable", o.id, &o.records, &o.meas_ids)?; + } + Ok(()) + } + + fn effective_record_offsets(&self, records: &[i32], meas_ids: &[usize]) -> Vec { + if !records.is_empty() { + return records.to_vec(); + } + meas_ids + .iter() + .filter_map(|&meas_id| self.meas_id_to_record_offset(meas_id)) + .collect() + } + + /// Validates metadata refs, then builds the Detector Error Model. + /// + /// This is the fail-loud entry point. Every path that ingests + /// detector/observable metadata derived from a circuit (the + /// `from_circuit` family, [`DemSampler::from_circuit`], and the public + /// Python `DemBuilder.build`) must go through here so an out-of-range + /// record offset or `meas_id` is rejected rather than silently dropped. + /// + /// [`Self::build`] is the infallible counterpart, kept for the raw, + /// decoupled construction case (e.g. an empty influence map where record + /// offsets are opaque DEM coordinates) and so existing callers do not + /// change behavior. + /// + /// Rejects a `num_measurements` that disagrees with a non-empty influence + /// map. + /// + /// When the builder is fed a real circuit (the influence map has + /// measurements), record offsets and `meas_id`s are defined against that + /// circuit's actual measurement record. A caller-supplied + /// `with_num_measurements` that differs would let out-of-range refs pass + /// [`Self::validate_metadata_refs`] and silently misbind, so it is an + /// error. An empty influence map keeps the escape hatch: the count is then + /// purely declarative and record offsets are opaque pass-through DEM + /// coordinates. + fn validate_measurement_count(&self) -> Result<(), DemBuilderError> { + let actual = self.influence_map.measurements.len(); + if actual != 0 && self.num_measurements != actual { + return Err(DemBuilderError::ParseError(format!( + "num_measurements={} disagrees with the {actual} measurement(s) \ + the circuit performs; the declared count must match so \ + detector/observable record offsets resolve correctly", + self.num_measurements + ))); + } + // Internal-consistency guard: stable MeasIds must be unique. A + // duplicate would make stamped-id resolution bind to the wrong + // measurement; it indicates a trace/replay bug, not bad caller input. + let mut seen = std::collections::HashSet::with_capacity(self.influence_map.meas_ids.len()); + for mid in &self.influence_map.meas_ids { + if !seen.insert(mid.0) { + return Err(DemBuilderError::ParseError(format!( + "duplicate stable MeasId {} in the traced circuit; each \ + measurement must have a unique stamped id", + mid.0 + ))); + } + } + Ok(()) + } + + /// # Errors + /// + /// Returns [`DemBuilderError::ParseError`] if `num_measurements` disagrees + /// with a non-empty influence map, a used record offset is out of range, + /// a used `meas_id` is not present in the circuit (resolved against the + /// stable stamped ids when available, else positionally), or a + /// both-present entry's `records` and `meas_ids` are not redundant. + pub fn try_build(&self) -> Result { + self.validate_measurement_count()?; + self.validate_metadata_refs()?; + Ok(self.build()) + } + /// Builds the Detector Error Model with source tracking. /// /// This performs fault propagation analysis and tracks error sources (X/Z vs Y) /// through the pipeline, enabling accurate direct/decomposed form splitting. /// /// Use `dem.to_string()` or `dem.to_string_decomposed()` for output. + /// + /// This does **not** validate metadata refs; callers ingesting + /// circuit-derived metadata must use [`Self::try_build`] instead. #[must_use] pub fn build(&self) -> DetectorErrorModel { let num_influence_dem_outputs = self @@ -404,7 +623,8 @@ impl<'a> DemBuilder<'a> { if let Some(coords) = det.coords { def = def.with_coords(coords); } - def = def.with_records(det.records.iter().copied()); + let records = self.effective_record_offsets(&det.records, &det.meas_ids); + def = def.with_records(records.iter().copied()); dem.add_detector(def); } @@ -439,7 +659,8 @@ impl<'a> DemBuilder<'a> { // Add observable definitions in the standard `L` namespace. // Observable IDs are not shifted by tracked Paulis. for obs in &self.observables { - let def = DemOutput::new(obs.id).with_records(obs.records.iter().copied()); + let records = self.effective_record_offsets(&obs.records, &obs.meas_ids); + let def = DemOutput::new(obs.id).with_records(records.iter().copied()); dem.add_observable(def); } @@ -888,15 +1109,28 @@ impl<'a> DemBuilder<'a> { }; for det in &self.detectors { - for &rec in &det.records { - if let Some(tc_meas_idx) = - record_offset_to_absolute_index(self.num_measurements, rec) - && let Some(&influence_idx) = tc_to_influence.get(&tc_meas_idx) - { - meas_to_detectors - .entry(influence_idx) - .or_default() - .push(det.id); + if det.records.is_empty() { + for &meas_id in &det.meas_ids { + if let Some(tc_idx) = self.resolve_meas_id_to_tc_index(meas_id) + && let Some(&influence_idx) = tc_to_influence.get(&tc_idx) + { + meas_to_detectors + .entry(influence_idx) + .or_default() + .push(det.id); + } + } + } else { + for &rec in &det.records { + if let Some(tc_meas_idx) = + record_offset_to_absolute_index(self.num_measurements, rec) + && let Some(&influence_idx) = tc_to_influence.get(&tc_meas_idx) + { + meas_to_detectors + .entry(influence_idx) + .or_default() + .push(det.id); + } } } } @@ -905,15 +1139,28 @@ impl<'a> DemBuilder<'a> { if influence_observable_ids.contains(&obs.id) { continue; } - for &rec in &obs.records { - if let Some(tc_meas_idx) = - record_offset_to_absolute_index(self.num_measurements, rec) - && let Some(&influence_idx) = tc_to_influence.get(&tc_meas_idx) - { - meas_to_observables - .entry(influence_idx) - .or_default() - .push(obs.id); + if obs.records.is_empty() { + for &meas_id in &obs.meas_ids { + if let Some(tc_idx) = self.resolve_meas_id_to_tc_index(meas_id) + && let Some(&influence_idx) = tc_to_influence.get(&tc_idx) + { + meas_to_observables + .entry(influence_idx) + .or_default() + .push(obs.id); + } + } + } else { + for &rec in &obs.records { + if let Some(tc_meas_idx) = + record_offset_to_absolute_index(self.num_measurements, rec) + && let Some(&influence_idx) = tc_to_influence.get(&tc_meas_idx) + { + meas_to_observables + .entry(influence_idx) + .or_default() + .push(obs.id); + } } } } @@ -1097,63 +1344,42 @@ fn get_y_decomposition(p1: u8, p2: u8) -> Option<(u8, u8, u8, u8)> { /// Parses detector definitions from JSON. fn parse_detectors_json(json: &str) -> Result, DemBuilderError> { - // Simple JSON parsing without serde dependency - // Expected format: [{"id": 0, "coords": [0.0, 0.0, 0.0], "records": [-1, -5]}, ...] - let json = json.trim(); if json.is_empty() || json == "[]" { return Ok(Vec::new()); } - let mut detectors = Vec::new(); - - // Find all objects in the array - let mut depth = 0; - let mut obj_start = None; - - for (i, c) in json.char_indices() { - match c { - '[' if depth == 0 => depth = 1, - '{' if depth == 1 => { - depth = 2; - obj_start = Some(i); - } - '{' => depth += 1, - '}' => { - depth -= 1; - if depth == 1 { - if let Some(start) = obj_start { - // i is the byte index of '}', we want to include it - let obj_str = &json[start..i + c.len_utf8()]; - let det = parse_single_detector(obj_str)?; - detectors.push(det); - } - obj_start = None; - } - } - _ => {} - } - } - - Ok(detectors) + let parsed: serde_json::Value = serde_json::from_str(json).map_err(|err| { + DemBuilderError::ParseError(format!("detectors JSON is malformed: {err}")) + })?; + let array = parsed + .as_array() + .ok_or_else(|| DemBuilderError::ParseError("detectors_json must be a JSON list".into()))?; + array.iter().map(parse_single_detector).collect() } /// Parses a single detector object. -fn parse_single_detector(json: &str) -> Result { +fn parse_single_detector(value: &serde_json::Value) -> Result { + let object = value + .as_object() + .ok_or_else(|| DemBuilderError::ParseError("detector entry must be an object".into()))?; + reject_tracked_pauli(object, "detector")?; let id = extract_u32( - json, - &["\"id\"", "\"detector_id\""], + object, + &["id", "detector_id"], + 'D', "missing detector id", "detector id out of range", )?; - let coords = extract_coords(json); - let records = extract_records(json); + let coords = extract_coords(object)?; + let (records, meas_ids) = extract_measurement_refs(object, "detector")?; Ok(ParsedDetector { id, coords, records, + meas_ids, }) } @@ -1164,111 +1390,362 @@ fn parse_observables_json(json: &str) -> Result, DemBuilde return Ok(Vec::new()); } - let mut observables = Vec::new(); - - let mut depth = 0; - let mut obj_start = None; - - for (i, c) in json.char_indices() { - match c { - '[' if depth == 0 => depth = 1, - '{' if depth == 1 => { - depth = 2; - obj_start = Some(i); - } - '{' => depth += 1, - '}' => { - depth -= 1; - if depth == 1 { - if let Some(start) = obj_start { - // i is the byte index of '}', we want to include it - let obj_str = &json[start..i + c.len_utf8()]; - let obs = parse_single_observable(obj_str)?; - observables.push(obs); - } - obj_start = None; - } - } - _ => {} - } - } - - Ok(observables) + let parsed: serde_json::Value = serde_json::from_str(json).map_err(|err| { + DemBuilderError::ParseError(format!("observables JSON is malformed: {err}")) + })?; + let array = parsed.as_array().ok_or_else(|| { + DemBuilderError::ParseError("observables_json must be a JSON list".into()) + })?; + array.iter().map(parse_single_observable).collect() } /// Parses a single observable object. -fn parse_single_observable(json: &str) -> Result { +fn parse_single_observable(value: &serde_json::Value) -> Result { + let object = value + .as_object() + .ok_or_else(|| DemBuilderError::ParseError("observable entry must be an object".into()))?; + reject_tracked_pauli(object, "observable")?; let id = extract_u32( - json, - &["\"id\"", "\"observable_id\""], + object, + &["id", "observable_id"], + 'L', "missing observable id", "observable id out of range", )?; - let records = extract_records(json); + let (records, meas_ids) = extract_measurement_refs(object, "observable")?; - Ok(ParsedObservable { id, records }) + Ok(ParsedObservable { + id, + records, + meas_ids, + }) } -/// Extracts a number after a key. -fn extract_number(json: &str, key: &str) -> Option { - let pos = json.find(key)?; - let rest = &json[pos + key.len()..]; - let rest = rest.trim_start_matches(|c: char| c == ':' || c.is_whitespace()); +/// Parse detector JSON into per-detector measurement-reference vectors for the +/// sampler builders, enforcing the **same** validation and resolution as +/// `DemBuilder`. +/// +/// Schema/type validation (rejects malformed JSON, a non-list top level, a +/// non-object entry, non-integer values, `tracked_pauli` entries, and entries +/// referencing neither `records` nor `meas_ids`) comes from the shared serde +/// parser. On top of that, this resolves every reference against the +/// `influence_map` exactly as `DemBuilder::validate_metadata_refs` / +/// `resolve_meas_id_to_tc_index` do, and rejects fail-loud: +/// - a `records` offset that is out of range, +/// - a `meas_ids` value that does not resolve (a stamped `MeasId` absent from +/// the circuit, or -- when the circuit carries no stable ids -- an +/// out-of-range positional index), and +/// - co-present `records` + `meas_ids` that reference different measurements. +/// +/// `meas_ids` are stamped stable ids when `influence_map.meas_ids` is populated +/// (the traced `from_guppy`/`from_circuit` path), and positional indices only +/// when it is empty -- matching `DemBuilder`. The returned vector uses the +/// sampler's storage convention: negative `records` offsets are kept as-is +/// (preferred when present, like `DemBuilder`), while a `meas_ids`-only entry is +/// emitted as the resolved absolute indices (positive ints). +/// +/// An empty influence map (no measurements) keeps the escape hatch: refs are +/// opaque pass-through coordinates and resolution is skipped. +pub(crate) fn parse_detector_record_vectors( + json: &str, + influence_map: &DagFaultInfluenceMap, +) -> Result>, DemBuilderError> { + reject_duplicate_stamped_meas_ids(influence_map)?; + parse_detectors_json(json)? + .iter() + .map(|d| { + resolve_sampler_record_vector("Detector", d.id, &d.records, &d.meas_ids, influence_map) + }) + .collect() +} + +/// Observable counterpart of [`parse_detector_record_vectors`]. +pub(crate) fn parse_observable_record_vectors( + json: &str, + influence_map: &DagFaultInfluenceMap, +) -> Result>, DemBuilderError> { + reject_duplicate_stamped_meas_ids(influence_map)?; + parse_observables_json(json)? + .iter() + .map(|o| { + resolve_sampler_record_vector( + "Observable", + o.id, + &o.records, + &o.meas_ids, + influence_map, + ) + }) + .collect() +} + +/// Reject a circuit whose stable `MeasId`s are not unique, before resolving any +/// `meas_ids`. A duplicate would make stamped-id resolution bind to the first +/// occurrence (an ambiguous, silently-wrong bind); it indicates a trace/replay +/// bug, not bad caller input. Mirrors the guard in +/// `DemBuilder::validate_measurement_count` so the sampler JSON path rejects +/// exactly what `DemBuilder` does. +fn reject_duplicate_stamped_meas_ids( + influence_map: &DagFaultInfluenceMap, +) -> Result<(), DemBuilderError> { + let mut seen = std::collections::HashSet::with_capacity(influence_map.meas_ids.len()); + for mid in &influence_map.meas_ids { + if !seen.insert(mid.0) { + return Err(DemBuilderError::ParseError(format!( + "duplicate stable MeasId {} in the traced circuit; each \ + measurement must have a unique stamped id", + mid.0 + ))); + } + } + Ok(()) +} + +/// Resolve a stamped/positional `meas_id` against the influence map, mirroring +/// `DemBuilder::resolve_meas_id_to_tc_index`: a stamped stable id when the +/// circuit carries them, a positional index only when it does not. +fn resolve_sampler_meas_id(influence_map: &DagFaultInfluenceMap, meas_id: usize) -> Option { + if influence_map.meas_ids.is_empty() { + (meas_id < influence_map.measurements.len()).then_some(meas_id) + } else { + influence_map + .meas_ids + .iter() + .position(|mid| mid.0 == meas_id) + } +} + +/// Resolve a parsed `records`/`meas_ids` pair to the sampler's single-`Vec` +/// convention, with `DemBuilder`-equivalent validation. See +/// [`parse_detector_record_vectors`] for the contract. +fn resolve_sampler_record_vector( + kind: &str, + id: u32, + records: &[i32], + meas_ids: &[usize], + influence_map: &DagFaultInfluenceMap, +) -> Result, DemBuilderError> { + let num_measurements = influence_map.measurements.len(); + + // Escape hatch: an empty influence map makes refs opaque pass-through + // coordinates with no circuit to resolve against. Prefer records; emit + // meas_ids verbatim as positional indices (there are no stable ids). + if num_measurements == 0 { + if !records.is_empty() { + return Ok(records.to_vec()); + } + return meas_ids + .iter() + .map(|&m| { + i32::try_from(m).map_err(|_| { + DemBuilderError::ParseError(format!( + "{kind} {id} meas_id {m} is out of range for an i32 record vector" + )) + }) + }) + .collect(); + } + + // Resolve each form to absolute measurement indices, fail-loud. + let records_abs = records + .iter() + .map(|&offset| { + record_offset_to_absolute_index(num_measurements, offset).ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} {id} references record offset {offset}, which is out of \ + range for a circuit with {num_measurements} measurement(s)" + )) + }) + }) + .collect::, _>>()?; + let meas_ids_abs = meas_ids + .iter() + .map(|&meas_id| { + resolve_sampler_meas_id(influence_map, meas_id).ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} {id} references meas_id {meas_id}, which is not present in \ + the circuit's {num_measurements} measurement(s)" + )) + }) + }) + .collect::, _>>()?; + + // Co-present records and meas_ids must reference the same measurements + // (mirrors `validate_metadata_refs`); they are alternatives, not additive. + if !records.is_empty() && !meas_ids.is_empty() { + let mut a = records_abs.clone(); + let mut b = meas_ids_abs.clone(); + a.sort_unstable(); + b.sort_unstable(); + if a != b { + return Err(DemBuilderError::ParseError(format!( + "{kind} {id} has both 'records' and 'meas_ids' but they reference \ + different measurements (records -> {a:?}, meas_ids -> {b:?}); they \ + are alternatives, not additive" + ))); + } + } + + // Prefer records (kept as Stim offsets, like `DemBuilder`); otherwise emit + // the resolved absolute indices, which the sampler reads as positive + // (absolute-index) record values. + if !records.is_empty() { + return Ok(records.to_vec()); + } + meas_ids_abs + .iter() + .map(|&idx| { + i32::try_from(idx).map_err(|_| { + DemBuilderError::ParseError(format!( + "{kind} {id} resolved measurement index {idx} exceeds i32 range" + )) + }) + }) + .collect() +} - let end = rest.find(|c: char| !c.is_ascii_digit() && c != '-' && c != '.')?; - let num_str = &rest[..end]; - num_str.parse().ok() +/// Rejects a JSON entry that declares `kind: "tracked_pauli"`. +/// +/// Tracked Paulis reference qubits via `pauli`, not measurements, and are +/// only produced from circuit annotations -- never from `detectors_json` / +/// `observables_json`. The JSON parser reads only `id`/`records`, so a +/// tracked-Pauli entry here would be silently parsed as the wrong thing. +fn reject_tracked_pauli( + object: &serde_json::Map, + kind: &str, +) -> Result<(), DemBuilderError> { + if object.get("kind").and_then(serde_json::Value::as_str) == Some("tracked_pauli") { + return Err(DemBuilderError::ParseError(format!( + "{kind} entry uses kind=\"tracked_pauli\", which is not supported \ + in detectors_json/observables_json (tracked Paulis come only \ + from circuit annotations)" + ))); + } + Ok(()) } +/// Reads an entry id as either an unsigned integer or the DEM-label string +/// form (`prefix` is `'D'` for detectors, `'L'` for observables, e.g. +/// `"D0"`/`"L0"`); both normalize to the same integer. A string id with the +/// wrong prefix or a non-numeric body is a hard error -- silently +/// reinterpreting it would risk a mislabeled DEM. fn extract_u32( - json: &str, + object: &serde_json::Map, keys: &[&str], + prefix: char, missing_message: &str, range_message: &str, ) -> Result { let value = keys .iter() - .find_map(|key| extract_number(json, key)) + .find_map(|key| object.get(*key)) .ok_or_else(|| DemBuilderError::ParseError(missing_message.into()))?; - u32::try_from(value).map_err(|_| DemBuilderError::ParseError(range_message.into())) + if let Some(raw) = value.as_u64() { + return u32::try_from(raw).map_err(|_| DemBuilderError::ParseError(range_message.into())); + } + if let Some(s) = value.as_str() { + let body = s.strip_prefix(prefix); + if let Some(digits) = body + && !digits.is_empty() + && digits.bytes().all(|b| b.is_ascii_digit()) + { + return digits + .parse::() + .map_err(|_| DemBuilderError::ParseError(range_message.into())); + } + return Err(DemBuilderError::ParseError(format!( + "id {s:?} is not a valid identifier; expected an integer or the \ + {prefix:?}-prefixed form like {prefix}0" + ))); + } + Err(DemBuilderError::ParseError(format!( + "{missing_message}: expected an integer or {prefix:?}-prefixed string id" + ))) } /// Extracts coordinates array [x, y, t]. -fn extract_coords(json: &str) -> Option<[f64; 3]> { - let pos = json.find("\"coords\"")?; - let rest = &json[pos..]; - let bracket_start = rest.find('[')?; - let bracket_end = rest.find(']')?; - let array_str = &rest[bracket_start + 1..bracket_end]; - - let nums: Vec = array_str - .split(',') - .filter_map(|s| s.trim().parse().ok()) - .collect(); - - if nums.len() == 3 { - Some([nums[0], nums[1], nums[2]]) - } else { - None +fn extract_coords( + object: &serde_json::Map, +) -> Result, DemBuilderError> { + let Some(coords) = object.get("coords") else { + return Ok(None); + }; + let array = coords + .as_array() + .ok_or_else(|| DemBuilderError::ParseError("detector coords must be an array".into()))?; + if array.len() != 3 { + return Err(DemBuilderError::ParseError( + "detector coords must contain exactly three numbers".into(), + )); } + let mut values = [0.0; 3]; + for (idx, coord) in array.iter().enumerate() { + values[idx] = coord + .as_f64() + .ok_or_else(|| DemBuilderError::ParseError("detector coords must be numeric".into()))?; + } + Ok(Some(values)) } -/// Extracts records array. -fn extract_records(json: &str) -> Vec { - if let Some(pos) = json.find("\"records\"") { - let rest = &json[pos..]; - if let Some(bracket_start) = rest.find('[') - && let Some(bracket_end) = rest.find(']') - { - let array_str = &rest[bracket_start + 1..bracket_end]; - return array_str - .split(',') - .filter_map(|s| s.trim().parse().ok()) - .collect(); - } +/// Extracts `records`/`meas_ids` arrays. +fn extract_measurement_refs( + object: &serde_json::Map, + kind: &str, +) -> Result<(Vec, Vec), DemBuilderError> { + let records = if let Some(records) = object.get("records") { + let array = records.as_array().ok_or_else(|| { + DemBuilderError::ParseError(format!("{kind} records must be an array")) + })?; + array + .iter() + .map(|record| { + let raw = record.as_i64().ok_or_else(|| { + DemBuilderError::ParseError(format!("{kind} record offsets must be integers")) + })?; + i32::try_from(raw).map_err(|_| { + DemBuilderError::ParseError(format!("{kind} record offset out of range")) + }) + }) + .collect::, _>>()? + } else { + Vec::new() + }; + + let meas_ids = if let Some(meas_ids) = object.get("meas_ids") { + let array = meas_ids.as_array().ok_or_else(|| { + DemBuilderError::ParseError(format!("{kind} meas_ids must be an array")) + })?; + array + .iter() + .map(|meas_id| { + let raw = meas_id.as_i64().ok_or_else(|| { + DemBuilderError::ParseError(format!("{kind} meas_ids must be integers")) + })?; + usize::try_from(raw).map_err(|_| { + DemBuilderError::ParseError(format!("{kind} meas_id out of range")) + }) + }) + .collect::, _>>()? + } else { + Vec::new() + }; + + if records.is_empty() && meas_ids.is_empty() { + return Err(DemBuilderError::ParseError(format!( + "{kind} entry has neither 'records' nor 'meas_ids'; it would \ + contribute nothing and silently weaken the DEM" + ))); } - Vec::new() + + // `records` and `meas_ids` are alternative ways to reference the *same* + // measurements, not additive. Co-presence is allowed but must be + // redundant; that equality is enforced fail-loud in + // `validate_metadata_refs` (which has the circuit context needed to + // resolve `meas_ids`), not here at the pure-parse stage. The surface + // `logical_circuit` path legitimately emits both (records = legacy Stim + // offsets, meas_ids = the same measurements as stable ids). + Ok((records, meas_ids)) } // ============================================================================ @@ -1284,7 +1761,7 @@ fn build_dem_from_circuit( p2: f64, p_meas: f64, p_prep: f64, -) -> DetectorErrorModel { +) -> Result { use crate::fault_tolerance::influence_builder::InfluenceBuilder; use crate::fault_tolerance::propagator::DagFaultAnalyzer; use pecos_num::graph::Attribute; @@ -1322,30 +1799,28 @@ fn build_dem_from_circuit( let builder = DemBuilder::new(&influence_map).with_noise(p1, p2, p_meas, p_prep); let builder = if let Some(ref dj) = det_json { - builder - .with_detectors_json(dj) - .unwrap_or_else(|_| DemBuilder::new(&influence_map).with_noise(p1, p2, p_meas, p_prep)) + builder.with_detectors_json(dj)? } else { builder }; let builder = if let Some(ref oj) = obs_json { - builder - .with_observables_json(oj) - .unwrap_or_else(|_| DemBuilder::new(&influence_map).with_noise(p1, p2, p_meas, p_prep)) + builder.with_observables_json(oj)? } else if !annotated_observable_records.is_empty() { builder.with_observable_records(annotated_observable_records) } else { builder }; + // `try_build` enforces num_measurements == influence-map count, so a + // metadata override that disagrees with the circuit is rejected there. let builder = if let Some(n) = num_meas { builder.with_num_measurements(n) } else { builder }; - builder.build() + builder.try_build() } fn observable_records_from_annotations( @@ -1385,6 +1860,151 @@ fn observable_records_from_annotations( .collect() } +// ============================================================================ +// Tag-referenced detector resolution +// ============================================================================ + +/// Resolve `result_tags` on detector/observable JSON into record offsets. +/// +/// `tag_to_ords` is the **sound** Guppy `result(tag, ...)` -> measurement +/// ordinal binding recovered structurally from the compiled HUGR +/// (reorder-immune; see `pecos_hugr_qis::result_tags`). Each referenced tag's +/// ordinals are converted to record offsets (`ordinal - traced_meas_count`). +/// `result_tags` is an *alternative* to `records` (not additive): if the +/// entry has no `records`, the resolved offsets become its `records`; if it +/// has both, they must be redundant (sorted-set equality) and `records` is +/// left unchanged. `result_tags` is then removed so the downstream parser is +/// unchanged. +/// +/// Fail-loud (returns `Err`), never silently misbinds: +/// - **Loop guard**: if `static_meas_count != traced_meas_count` the program +/// has un-unrolled runtime loops (the HUGR has one static measure op per +/// loop body), so per-occurrence tag binding is not statically available. +/// - An unknown tag, malformed `result_tags`, or invalid JSON is an error. +/// +/// # Errors +/// Returns [`DemBuilderError::ParseError`] on the loop guard, an unknown tag, +/// malformed `result_tags`, or invalid JSON. +pub fn resolve_result_tags( + detectors_json: &str, + observables_json: &str, + tag_to_ords: &std::collections::BTreeMap>, + static_meas_count: usize, + traced_meas_count: usize, +) -> Result<(String, String), DemBuilderError> { + if static_meas_count != traced_meas_count { + return Err(DemBuilderError::ParseError(format!( + "result_tags (tag-referenced detectors) is not supported for Guppy \ + programs with runtime loops: the HUGR has {static_meas_count} \ + static measurement op(s) but the traced program emits \ + {traced_meas_count} measurement(s). Per-occurrence tag binding is \ + not statically available; use positional records." + ))); + } + let traced = i64::try_from(traced_meas_count).map_err(|_| { + DemBuilderError::ParseError("traced measurement count too large".to_string()) + })?; + + let rewrite = |json: &str, kind: &str| -> Result { + if json.trim().is_empty() { + return Ok(json.to_string()); + } + let mut value: serde_json::Value = serde_json::from_str(json).map_err(|e| { + DemBuilderError::ParseError(format!("invalid detector/observable JSON: {e}")) + })?; + let Some(entries) = value.as_array_mut() else { + return Ok(json.to_string()); + }; + for entry in entries.iter_mut() { + let Some(obj) = entry.as_object_mut() else { + continue; + }; + let Some(tags) = obj.remove("result_tags") else { + continue; + }; + + // Resolve `result_tags` strictly into a list of record offsets. + let tag_list = tags.as_array().ok_or_else(|| { + DemBuilderError::ParseError( + "result_tags must be a JSON array of strings".to_string(), + ) + })?; + let mut tag_offsets: Vec = Vec::new(); + for tag in tag_list { + let tag = tag.as_str().ok_or_else(|| { + DemBuilderError::ParseError("result_tags entries must be strings".to_string()) + })?; + let ords = tag_to_ords.get(tag).ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} references result_tag {tag:?}, which the Guppy \ + program never records via result(...)" + )) + })?; + for &ord in ords { + tag_offsets.push(i64::try_from(ord).unwrap_or(i64::MAX) - traced); + } + } + + // `result_tags` is an *alternative* to `records` (and `meas_ids`), + // following the same redundancy discipline as records-vs-meas_ids: + // co-presence is allowed only when the two forms reference the + // *same* measurements (sorted-set equality). Additive merging + // would either silently weaken the DEM (when callers expected + // alternatives) or corrupt parity by double-referencing (when + // they were actually redundant). + match obj.get("records") { + None => { + obj.insert( + "records".to_string(), + serde_json::Value::Array( + tag_offsets + .into_iter() + .map(serde_json::Value::from) + .collect(), + ), + ); + } + Some(records_value) => { + let records_array = records_value.as_array().ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} records must be a JSON array of integers" + )) + })?; + let mut existing: Vec = Vec::with_capacity(records_array.len()); + for rec in records_array { + let r = rec.as_i64().ok_or_else(|| { + DemBuilderError::ParseError(format!( + "{kind} records entries must be integers" + )) + })?; + existing.push(r); + } + let mut a = existing; + let mut b = tag_offsets; + a.sort_unstable(); + b.sort_unstable(); + if a != b { + return Err(DemBuilderError::ParseError(format!( + "{kind} entry has both 'records' and 'result_tags' but \ + they reference different measurements (records {a:?}, \ + result_tags resolve to {b:?}); they are alternatives, \ + not additive -- provide one, or make them redundant" + ))); + } + // Records left unchanged; tag offsets are redundant. + } + } + } + serde_json::to_string(&value) + .map_err(|e| DemBuilderError::ParseError(format!("failed to re-serialize JSON: {e}"))) + }; + + Ok(( + rewrite(detectors_json, "Detector")?, + rewrite(observables_json, "Observable")?, + )) +} + // ============================================================================ // Error Type // ============================================================================ @@ -1914,6 +2534,7 @@ mod tests { assert_eq!(detectors[0].id, 0); assert_eq!(detectors[0].coords, Some([0.0, 0.0, 0.0])); assert_eq!(detectors[0].records, vec![-1, -5]); + assert!(detectors[0].meas_ids.is_empty()); assert_eq!(detectors[1].id, 1); assert_eq!(detectors[1].records, vec![-2]); } @@ -1927,6 +2548,19 @@ mod tests { assert_eq!(observables.len(), 1); assert_eq!(observables[0].id, 0); assert_eq!(observables[0].records, vec![-1, -3, -5]); + assert!(observables[0].meas_ids.is_empty()); + } + + #[test] + fn test_parse_json_accepts_meas_ids() { + let detectors = parse_detectors_json(r#"[{"id": 0, "meas_ids": [0, 2]}]"#).unwrap(); + assert_eq!(detectors[0].records, Vec::::new()); + assert_eq!(detectors[0].meas_ids, vec![0, 2]); + + let observables = + parse_observables_json(r#"[{"observable_id": 1, "meas_ids": [3]}]"#).unwrap(); + assert_eq!(observables[0].records, Vec::::new()); + assert_eq!(observables[0].meas_ids, vec![3]); } #[test] @@ -1943,6 +2577,144 @@ mod tests { assert_eq!(dem.dem_outputs()[0].records.as_slice(), &[-1, -3]); } + #[test] + fn test_dem_builder_resolves_meas_ids_when_records_are_absent() { + let influence_map = DagFaultInfluenceMap::with_capacity(0); + let dem = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "meas_ids": [0, 2]}]"#) + .unwrap() + .with_observables_json(r#"[{"id": 0, "meas_ids": [1]}]"#) + .unwrap() + .with_num_measurements(3) + .build(); + + assert_eq!(dem.detectors[0].records.as_slice(), &[-3, -1]); + assert_eq!(dem.dem_outputs()[0].records.as_slice(), &[-2]); + } + + #[test] + fn test_try_build_rejects_out_of_range_record_and_meas_id() { + let influence_map = DagFaultInfluenceMap::with_capacity(0); + + let bad_record = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "records": [-2]}]"#) + .unwrap() + .with_num_measurements(1) + .try_build(); + assert!( + bad_record.is_err(), + "out-of-range record must fail try_build" + ); + + let bad_meas_id = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "meas_ids": [999]}]"#) + .unwrap() + .with_num_measurements(1) + .try_build(); + assert!( + bad_meas_id.is_err(), + "out-of-range meas_id must fail try_build" + ); + + // The infallible `build` stays lax for the decoupled/raw case so + // existing pass-through callers are unaffected. + let _ = DemBuilder::new(&influence_map) + .with_observables_json(r#"[{"id": 0, "records": [-1, -3]}]"#) + .unwrap() + .build(); + + // Empty influence map keeps the escape hatch: a declared count with + // no real measurements is allowed (opaque pass-through coordinates). + assert!( + DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "meas_ids": [0, 2]}]"#) + .unwrap() + .with_num_measurements(3) + .try_build() + .is_ok(), + "empty influence map must keep the declarative-count escape hatch" + ); + } + + #[test] + fn test_parse_accepts_dem_label_id_form() { + let det = parse_detectors_json(r#"[{"id": "D0", "records": [-1]}]"#).unwrap(); + assert_eq!(det[0].id, 0); + let obs = parse_observables_json(r#"[{"id": "L7", "records": [-1]}]"#).unwrap(); + assert_eq!(obs[0].id, 7); + // Wrong prefix / non-numeric body is a hard error, not a guess. + assert!(parse_detectors_json(r#"[{"id": "L0", "records": [-1]}]"#).is_err()); + assert!(parse_detectors_json(r#"[{"id": "X0", "records": [-1]}]"#).is_err()); + assert!(parse_observables_json(r#"[{"id": "Lx", "records": [-1]}]"#).is_err()); + } + + #[test] + fn test_parse_rejects_tracked_pauli_and_refless_entries() { + assert!( + parse_observables_json(r#"[{"kind": "tracked_pauli", "pauli": "X0"}]"#).is_err(), + "tracked_pauli must be rejected in observables_json", + ); + assert!( + parse_detectors_json(r#"[{"id": 0, "kind": "tracked_pauli"}]"#).is_err(), + "tracked_pauli must be rejected in detectors_json too", + ); + assert!( + parse_detectors_json(r#"[{"id": 0}]"#).is_err(), + "an entry with neither records nor meas_ids must be rejected", + ); + // Both-present is allowed at parse time (surface logical_circuit + // legitimately emits redundant records+meas_ids); the + // redundancy/fail-loud decision is made later in try_build. + assert!( + parse_detectors_json(r#"[{"id": 0, "records": [-1], "meas_ids": [0]}]"#).is_ok(), + "both records and meas_ids must parse; redundancy is checked in try_build", + ); + } + + #[test] + fn test_try_build_mixed_records_meas_ids_must_be_redundant() { + // Empty influence map => positional meas_id resolution (deterministic): + // num_measurements=3, meas_id k resolves to record offset k-3. + let influence_map = DagFaultInfluenceMap::with_capacity(0); + + // Redundant: records [-3] and meas_ids [0] both name measurement 0. + let redundant = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "records": [-3], "meas_ids": [0]}]"#) + .unwrap() + .with_num_measurements(3) + .try_build(); + assert!( + redundant.is_ok(), + "redundant records+meas_ids must be accepted: {redundant:?}", + ); + + // Non-redundant: records [-3] (measurement 0) vs meas_ids [1] + // (measurement 1) -> fail loud, not silently records-only. + let conflicting = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "records": [-3], "meas_ids": [1]}]"#) + .unwrap() + .with_num_measurements(3) + .try_build(); + assert!( + conflicting.is_err(), + "non-redundant records+meas_ids must fail loud, not collapse to records", + ); + } + + #[test] + fn test_validate_measurement_count_rejects_duplicate_stamped_meas_id() { + let mut influence_map = DagFaultInfluenceMap::with_capacity(0); + influence_map.meas_ids = vec![pecos_core::MeasId(5), pecos_core::MeasId(5)]; + let result = DemBuilder::new(&influence_map) + .with_detectors_json(r#"[{"id": 0, "meas_ids": [5]}]"#) + .unwrap() + .try_build(); + assert!( + result.is_err(), + "a duplicate stable MeasId must fail loud, not bind to the first", + ); + } + #[test] fn test_parse_empty_json() { assert!(parse_detectors_json("").unwrap().is_empty()); @@ -1950,6 +2722,42 @@ mod tests { assert!(parse_observables_json("").unwrap().is_empty()); } + #[test] + fn test_parse_detector_json_rejects_malformed_shapes() { + for json in [ + "{}", + r#"[{"id":0,"records":["-1"]}]"#, + r#"[{"id":0,"records":[-1.2]}]"#, + r#"[{"id":0,"meas_ids":["0"]}]"#, + r#"[{"id":0,"meas_ids":[-1]}]"#, + r#"[{"id":0,"meas_ids":[1.2]}]"#, + r#"[{"id":true,"records":[-1]}]"#, + ] { + assert!( + parse_detectors_json(json).is_err(), + "detectors JSON should fail loud: {json}" + ); + } + } + + #[test] + fn test_parse_observable_json_rejects_malformed_shapes() { + for json in [ + "{}", + r#"[{"id":0,"records":["-1"]}]"#, + r#"[{"id":0,"records":[-1.2]}]"#, + r#"[{"id":0,"meas_ids":["0"]}]"#, + r#"[{"id":0,"meas_ids":[-1]}]"#, + r#"[{"id":0,"meas_ids":[1.2]}]"#, + r#"[{"observable_id":false,"records":[-1]}]"#, + ] { + assert!( + parse_observables_json(json).is_err(), + "observables JSON should fail loud: {json}" + ); + } + } + #[test] fn test_xor_toggle() { let mut vec: SmallVec<[u32; 4]> = SmallVec::new(); diff --git a/crates/pecos-qec/src/fault_tolerance/dem_builder/dem_sampler.rs b/crates/pecos-qec/src/fault_tolerance/dem_builder/dem_sampler.rs index 7ba0febe2..72c102c43 100644 --- a/crates/pecos-qec/src/fault_tolerance/dem_builder/dem_sampler.rs +++ b/crates/pecos-qec/src/fault_tolerance/dem_builder/dem_sampler.rs @@ -1916,7 +1916,9 @@ impl<'a> SamplingEngineBuilder<'a> { /// # Errors /// Returns an error if the JSON is malformed or missing required fields. pub fn with_detectors_json(mut self, json: &str) -> Result { - self.detector_records = parse_records_json(json, "detector")?; + self.detector_records = + super::builder::parse_detector_record_vectors(json, self.influence_map) + .map_err(|err| err.to_string())?; Ok(self) } @@ -1925,9 +1927,12 @@ impl<'a> SamplingEngineBuilder<'a> { /// Format: `[{"id": 0, "records": [-1, -3, -5]}, ...]` /// /// # Errors - /// Returns an error if the JSON is malformed or missing required fields. + /// Returns an error if the JSON is malformed, fails schema validation, or + /// references measurements out of range for the circuit. pub fn with_observables_json(mut self, json: &str) -> Result { - self.observable_records = parse_records_json(json, "observable")?; + self.observable_records = + super::builder::parse_observable_record_vectors(json, self.influence_map) + .map_err(|err| err.to_string())?; Ok(self) } @@ -2579,68 +2584,6 @@ where result } -/// Parse detector or observable definitions from JSON. -/// -/// Uses a simple custom parser to avoid `serde_json` dependency. -/// Expected format: `[{"id": 0, "records": [-1, -5]}, ...]` -#[allow(clippy::unnecessary_wraps)] -fn parse_records_json(json: &str, _kind: &str) -> Result>, String> { - let json = json.trim(); - if json.is_empty() || json == "[]" { - return Ok(Vec::new()); - } - - let mut results = Vec::new(); - - // Simple state machine to find each object - let mut depth = 0; - let mut start = None; - - for (i, c) in json.char_indices() { - match c { - '{' => { - if depth == 1 { - start = Some(i); - } - depth += 1; - } - '}' => { - depth -= 1; - if depth == 1 { - if let Some(s) = start { - let obj_str = &json[s..i + c.len_utf8()]; - let records = extract_records_from_object(obj_str); - results.push(records); - } - start = None; - } - } - '[' if depth == 0 => depth = 1, - ']' if depth == 1 => depth = 0, - _ => {} - } - } - - Ok(results) -} - -/// Extract the "records" array from a JSON object string. -fn extract_records_from_object(json: &str) -> Vec { - if let Some(pos) = json.find("\"records\"") { - let rest = &json[pos..]; - if let (Some(arr_start), Some(arr_end)) = (rest.find('['), rest.find(']')) - && arr_start < arr_end - { - let arr_str = &rest[arr_start + 1..arr_end]; - return arr_str - .split(',') - .filter_map(|s| s.trim().parse::().ok()) - .collect(); - } - } - Vec::new() -} - #[cfg(test)] mod tests { use super::*; @@ -2718,23 +2661,84 @@ mod tests { assert!(result.is_empty()); } - #[test] - fn test_parse_records_json_empty() { - let result = parse_records_json("[]", "test").unwrap(); - assert!(result.is_empty()); + /// Build an influence map for a circuit with `n` independent measurements + /// (no stable `MeasId`s, so `meas_ids` resolve positionally). + fn im_with_n_measurements( + n: usize, + ) -> crate::fault_tolerance::propagator::DagFaultInfluenceMap { + use crate::fault_tolerance::propagator::DagFaultAnalyzer; + use pecos_quantum::DagCircuit; + let mut dag = DagCircuit::new(); + for q in 0..n { + dag.pz(&[q]); + dag.mz(&[q]); + } + DagFaultAnalyzer::new(&dag).build_influence_map() + } - let result = parse_records_json("", "test").unwrap(); - assert!(result.is_empty()); + #[test] + fn test_record_vectors_empty() { + use super::super::builder::parse_detector_record_vectors; + let im = im_with_n_measurements(8); + assert!(parse_detector_record_vectors("[]", &im).unwrap().is_empty()); + assert!(parse_detector_record_vectors("", &im).unwrap().is_empty()); } #[test] - fn test_parse_records_json_valid() { + fn test_record_vectors_valid() { + use super::super::builder::parse_detector_record_vectors; + let im = im_with_n_measurements(8); let json = r#"[{"id": 0, "records": [-1, -5]}, {"id": 1, "records": [-2, -3, -4]}]"#; - let result = parse_records_json(json, "detector").unwrap(); + let result = parse_detector_record_vectors(json, &im).unwrap(); + assert_eq!(result, vec![vec![-1, -5], vec![-2, -3, -4]]); + } - assert_eq!(result.len(), 2); - assert_eq!(result[0], vec![-1, -5]); - assert_eq!(result[1], vec![-2, -3, -4]); + #[test] + fn test_record_vectors_reject_malformed_metadata() { + // The consolidated parser fails loud where the old hand-rolled scanner + // silently produced empty / partial records. + use super::super::builder::parse_detector_record_vectors; + let im = im_with_n_measurements(8); + // Non-list top level (previously -> empty, accepted). + assert!(parse_detector_record_vectors("{}", &im).is_err()); + // Non-integer record value (previously dropped via filter_map(parse.ok)). + assert!(parse_detector_record_vectors(r#"[{"id":0,"records":[-1,"bad"]}]"#, &im).is_err()); + // Entry referencing neither records nor meas_ids (previously -> empty vec). + assert!(parse_detector_record_vectors(r#"[{"id":0}]"#, &im).is_err()); + } + + #[test] + fn test_record_vectors_reject_out_of_range_and_nonredundant() { + // Context-aware validation: out-of-range refs and non-redundant + // records+meas_ids fail loud instead of being silently dropped + // downstream (the M-E sampler-validation gap). meas_ids resolve + // positionally here because these circuits carry no stable ids; the + // stamped-MeasId semantic is exercised from Python (mz_with_ids). + use super::super::builder::{ + parse_detector_record_vectors, parse_observable_record_vectors, + }; + let im1 = im_with_n_measurements(1); + let im3 = im_with_n_measurements(3); + let im0 = im_with_n_measurements(0); + // Out-of-range negative offset on a 1-measurement circuit. + assert!(parse_detector_record_vectors(r#"[{"id":0,"records":[-1,-2]}]"#, &im1).is_err()); + // Out-of-range observable offset, too. + assert!(parse_observable_record_vectors(r#"[{"id":0,"records":[-1,-2]}]"#, &im1).is_err()); + // Out-of-range (positional) meas_id. + assert!(parse_detector_record_vectors(r#"[{"id":0,"meas_ids":[0,999]}]"#, &im1).is_err()); + // Non-redundant co-present records + meas_ids (3-measurement circuit: + // records[-1] -> index 2, meas_ids[0] -> index 0). + assert!( + parse_detector_record_vectors(r#"[{"id":0,"records":[-1],"meas_ids":[0]}]"#, &im3) + .is_err() + ); + // Redundant co-presence is accepted (both -> index 0). + assert!( + parse_detector_record_vectors(r#"[{"id":0,"records":[-1],"meas_ids":[0]}]"#, &im1) + .is_ok() + ); + // Empty influence map keeps the opaque escape hatch (no range check). + assert!(parse_detector_record_vectors(r#"[{"id":0,"records":[-1,-99]}]"#, &im0).is_ok()); } #[test] diff --git a/crates/pecos-qec/src/fault_tolerance/dem_builder/sampler.rs b/crates/pecos-qec/src/fault_tolerance/dem_builder/sampler.rs index d564fbf49..53d29c1c5 100644 --- a/crates/pecos-qec/src/fault_tolerance/dem_builder/sampler.rs +++ b/crates/pecos-qec/src/fault_tolerance/dem_builder/sampler.rs @@ -57,6 +57,8 @@ pub enum DetectorValidationError { /// Raw measurement mode requires all gates to be in the supported Clifford /// subset (`H`, `X`, `Y`, `Z`, `SZ`, `SZdg`, `CX`, `CZ`, `SWAP`, `MZ`, `PZ`, `I`). UnsupportedGateForDeterminismAnalysis { gate_type: String }, + /// Circuit detector/observable metadata is malformed. + InvalidMetadata { message: String }, } impl std::fmt::Display for DetectorValidationError { @@ -91,6 +93,9 @@ impl std::fmt::Display for DetectorValidationError { H, X, Y, Z, SZ, SZdg, CX, CZ, SWAP, MZ, PZ/QAlloc, I/Idle." ) } + Self::InvalidMetadata { message } => { + write!(f, "Invalid detector/observable metadata: {message}") + } } } } @@ -459,28 +464,38 @@ impl DemSampler { let builder = DemBuilder::new(&influence_map).with_noise_config(noise.clone()); let builder = if let Some(ref dj) = det_json { - builder.with_detectors_json(dj).unwrap_or_else(|_| { - DemBuilder::new(&influence_map).with_noise_config(noise.clone()) - }) + builder.with_detectors_json(dj).map_err(|err| { + DetectorValidationError::InvalidMetadata { + message: err.to_string(), + } + })? } else { builder }; let builder = if let Some(ref oj) = observables_json { - builder.with_observables_json(oj).unwrap_or_else(|_| { - DemBuilder::new(&influence_map).with_noise_config(noise.clone()) - }) + builder.with_observables_json(oj).map_err(|err| { + DetectorValidationError::InvalidMetadata { + message: err.to_string(), + } + })? } else { builder }; + // `try_build` enforces num_measurements == influence-map count, so a + // metadata override that disagrees with the circuit is rejected there. let builder = if let Some(n) = num_meas { builder.with_num_measurements(n) } else { builder }; - let dem = builder.build(); + let dem = builder + .try_build() + .map_err(|err| DetectorValidationError::InvalidMetadata { + message: err.to_string(), + })?; Ok(Self::from_detector_error_model(&dem)) } @@ -1038,7 +1053,8 @@ impl<'a> DemSamplerBuilder<'a> { /// # Errors /// Returns an error if the JSON is malformed. pub fn with_detectors_json(self, json: &str) -> Result { - let records = parse_records_json(json); + let records = super::builder::parse_detector_record_vectors(json, self.influence_map) + .map_err(|err| err.to_string())?; Ok(self.with_detector_records(records)) } @@ -1047,9 +1063,11 @@ impl<'a> DemSamplerBuilder<'a> { /// Format: `[{"id": 0, "records": [-1, -3, -5]}, ...]` /// /// # Errors - /// Returns an error if the JSON is malformed. + /// Returns an error if the JSON is malformed, fails schema validation, or + /// references measurements out of range for the circuit. pub fn with_observables_json(self, json: &str) -> Result { - let records = parse_records_json(json); + let records = super::builder::parse_observable_record_vectors(json, self.influence_map) + .map_err(|err| err.to_string())?; Ok(self.with_observable_records(records)) } @@ -1198,6 +1216,23 @@ impl<'a> DemSamplerBuilder<'a> { /// Returns an error if detector definitions reference non-deterministic /// measurements or are not linearly independent over `Z_2`. pub fn build(self) -> Result { + // A supplied measurement order must cover every measurement, otherwise + // detector/observable record offsets validated against the circuit's + // measurement count would resolve in a different (shorter/longer) frame + // at sample time and silently misbind. (See sampler-JSON validation.) + if let Some(ref order) = self.measurement_order { + let expected = self.influence_map.measurements.len(); + if order.len() != expected { + return Err(DetectorValidationError::InvalidMetadata { + message: format!( + "measurement_order has {} entries but the circuit performs \ + {expected} measurement(s); a measurement order must cover \ + every measurement so record offsets resolve in the same frame", + order.len() + ), + }); + } + } match self.output_mode { OutputMode::RawMeasurements => Ok(self.build_raw()), OutputMode::DetectorEvents => self.build_detector(), @@ -1444,106 +1479,6 @@ pub(crate) fn gate_location_prob_from_locations( 0.0 } -/// Parse detector or observable definitions from JSON. -/// -/// Run noiseless symbolic simulation on a `TickCircuit` to identify non-deterministic measurements. -/// -/// Returns a Vec where true = non-deterministic (needs coin flip). -/// Uses `SymbolicSparseStab` which tracks measurement determinism symbolically. -/// Run noiseless symbolic simulation to identify non-deterministic measurements -/// and their dependency structure. -/// -/// Returns: -/// - `Vec`: non-det mask (true = needs coin flip) -/// - `Vec, bool)>>`: per-measurement dependencies -/// (Some((deps, flip)) for deterministic measurements, None for non-det) -/// -/// Only supports the Clifford gate subset. Returns error for unsupported gates. -fn parse_records_json(json: &str) -> Vec> { - let json = json.trim(); - if json.is_empty() || json == "[]" { - return Vec::new(); - } - - let mut results = Vec::new(); - let mut depth = 0; - let mut start = None; - - for (i, c) in json.char_indices() { - match c { - '{' => { - if depth == 1 { - start = Some(i); - } - depth += 1; - } - '}' => { - depth -= 1; - if depth == 1 { - if let Some(s) = start { - let obj_str = &json[s..i + c.len_utf8()]; - results.push(extract_records_array(obj_str)); - } - start = None; - } - } - '[' if depth == 0 => depth = 1, - ']' if depth == 1 => depth = 0, - _ => {} - } - } - - results -} - -/// Extract measurement record indices from a JSON object string. -/// -/// Prefers `"meas_ids"` (absolute `MeasId` IDs) when available. -/// Also accepts `"records"` for DEM-style negative offsets. -fn extract_records_array(json: &str) -> Vec { - // Prefer meas_ids (absolute, stable IDs from MeasId) - if let Some(pos) = json.find("\"meas_ids\"") { - let rest = &json[pos..]; - if let (Some(arr_start), Some(arr_end)) = (rest.find('['), rest.find(']')) - && arr_start < arr_end - { - let arr_str = &rest[arr_start + 1..arr_end]; - let ids: Vec = arr_str - .split(',') - .filter_map(|s| s.trim().parse::().ok()) - .collect(); - if !ids.is_empty() { - // Convert absolute MeasId IDs to negative offsets: - // not needed — the DemBuilder resolves negative offsets against - // num_measurements. With absolute IDs, we store them as positive - // values and handle them in the DemBuilder's build_measurement_mappings. - // - // For now, keep the negative-offset convention internally but - // convert: absolute ID i becomes offset -(num_measurements - i). - // We don't know num_measurements here, so return the absolute IDs - // as positive i32. The DemBuilder recognizes positive values as - // absolute MeasId indices. - return ids; - } - } - } - - // Fallback: "records" with negative offsets - if let Some(pos) = json.find("\"records\"") { - let rest = &json[pos..]; - if let (Some(arr_start), Some(arr_end)) = (rest.find('['), rest.find(']')) - && arr_start < arr_end - { - let arr_str = &rest[arr_start + 1..arr_end]; - return arr_str - .split(',') - .filter_map(|s| s.trim().parse::().ok()) - .collect(); - } - } - Vec::new() -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/pecos-qec/tests/stim_dem_export_tests.rs b/crates/pecos-qec/tests/stim_dem_export_tests.rs index 6c6663e6c..14c1d151c 100644 --- a/crates/pecos-qec/tests/stim_dem_export_tests.rs +++ b/crates/pecos-qec/tests/stim_dem_export_tests.rs @@ -11,8 +11,7 @@ // the License. //! Integration tests for Stim-format DEM export from `DemStabSim` with -//! per-gate noise. Closes the -//! `~/Repos/pecos-docs/ideas/stim-compat-dem-export.md` gap. +//! per-gate noise. use pecos_core::QubitId; use pecos_qec::dem_stab::DemStabSim; diff --git a/docs/proposals/README.md b/docs/proposals/README.md deleted file mode 100644 index b9e1c077c..000000000 --- a/docs/proposals/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# PECOS Proposals - -This directory contains architectural proposals and design explorations for PECOS. These documents capture ideas that may influence future development directions. - -## Status Labels - -- **Draft** - Initial exploration, gathering feedback -- **Under Discussion** - Being actively considered -- **Accepted** - Approved for implementation -- **Implemented** - Completed and merged -- **Deferred** - Good idea, but not now -- **Rejected** - Decided against - -## Proposals - -| Folder/File | Status | Summary | -|-------------|--------|---------| -| *None currently* | | | - -## Contributing - -When adding a new proposal: - -1. For a single document: Create `NNN-short-title.md` -2. For a multi-document exploration: Create a folder with `README.md` and related docs -3. Add an entry to this README -4. Open for discussion diff --git a/exp/pecos-stab-tn/docs/approach.md b/exp/pecos-stab-tn/docs/approach.md deleted file mode 100644 index e82b71940..000000000 --- a/exp/pecos-stab-tn/docs/approach.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/approach.md`. diff --git a/exp/pecos-stab-tn/docs/future_work.md b/exp/pecos-stab-tn/docs/future_work.md deleted file mode 100644 index f1c47453b..000000000 --- a/exp/pecos-stab-tn/docs/future_work.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/future-work.md`. diff --git a/exp/pecos-stab-tn/docs/landscape.md b/exp/pecos-stab-tn/docs/landscape.md deleted file mode 100644 index 8361359f2..000000000 --- a/exp/pecos-stab-tn/docs/landscape.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/landscape.md`. diff --git a/exp/pecos-stab-tn/docs/literature_status.md b/exp/pecos-stab-tn/docs/literature_status.md deleted file mode 100644 index 8968ce8fb..000000000 --- a/exp/pecos-stab-tn/docs/literature_status.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/literature-status.md`. diff --git a/exp/pecos-stab-tn/docs/ofd_plan.md b/exp/pecos-stab-tn/docs/ofd_plan.md deleted file mode 100644 index ff9606fdd..000000000 --- a/exp/pecos-stab-tn/docs/ofd_plan.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/ofd-plan.md`. diff --git a/exp/pecos-stab-tn/docs/priorities.md b/exp/pecos-stab-tn/docs/priorities.md deleted file mode 100644 index 6c0b00b78..000000000 --- a/exp/pecos-stab-tn/docs/priorities.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/priorities.md`. diff --git a/exp/pecos-stab-tn/docs/references.md b/exp/pecos-stab-tn/docs/references.md deleted file mode 100644 index a16b0eb15..000000000 --- a/exp/pecos-stab-tn/docs/references.md +++ /dev/null @@ -1,3 +0,0 @@ -# Moved to pecos-docs vault - -This document has been moved to `~/Repos/pecos-docs/design/stab-tn/references.md`. diff --git a/exp/pecos-stab-tn/src/stab_mps/measure.rs b/exp/pecos-stab-tn/src/stab_mps/measure.rs index fcd5d7d92..735a0e16a 100644 --- a/exp/pecos-stab-tn/src/stab_mps/measure.rs +++ b/exp/pecos-stab-tn/src/stab_mps/measure.rs @@ -184,7 +184,7 @@ pub fn pre_reduce_for_measurement_pub(tableau: &mut SparseStabY, mps: &mut Mps, /// Proper long-term fix: lazy virtual-frame tracking — accumulate a /// deferred Clifford V such that effective MPS = V·stored MPS, conjugate /// Pauli strings by V before applying to stored MPS, flush only when MPS -/// must be read directly. See `docs/future_work.md`. +/// must be read directly. fn pre_reduce_for_measurement( tableau: &mut SparseStabY, mps: &mut Mps, diff --git a/python/pecos-rslib/src/dag_circuit_bindings.rs b/python/pecos-rslib/src/dag_circuit_bindings.rs index 31bd12985..f033a6b22 100644 --- a/python/pecos-rslib/src/dag_circuit_bindings.rs +++ b/python/pecos-rslib/src/dag_circuit_bindings.rs @@ -1800,6 +1800,51 @@ fn py_hugr_to_dag_circuit(hugr_bytes: &Bound<'_, PyBytes>) -> PyResult measurement binding recovered +/// from the compiled HUGR. +/// +/// All logic (HUGR extraction, the runtime-loop guard, tag->record resolution, +/// unknown-tag validation) is performed in Rust; this is a thin entry point. +/// Returns the rewritten `(detectors_json, observables_json)` with +/// `result_tags` replaced by record offsets. +/// +/// Args: +/// `detectors_json` / `observables_json`: detector/observable JSON. +/// `hugr_bytes`: HUGR envelope bytes (e.g. `guppy_to_hugr(program)`). +/// `traced_meas_count`: number of measurements in the traced circuit. +/// +/// Raises: +/// `ValueError`: on the runtime-loop guard, an unknown tag, malformed +/// `result_tags`, or invalid JSON. +#[pyfunction] +#[pyo3(name = "resolve_result_tags_for_guppy")] +fn py_resolve_result_tags_for_guppy( + detectors_json: &str, + observables_json: &str, + hugr_bytes: &Bound<'_, PyBytes>, + traced_meas_count: usize, +) -> PyResult<(String, String)> { + use pecos_hugr_qis::{ + extract_result_tag_measurements, measurement_op_count, read_hugr_envelope, + }; + use pecos_qec::fault_tolerance::dem_builder::resolve_result_tags; + + let hugr = read_hugr_envelope(hugr_bytes.as_bytes()) + .map_err(|e| PyErr::new::(format!("Failed to parse HUGR: {e}")))?; + let tag_to_ords = extract_result_tag_measurements(&hugr); + let static_meas_count = measurement_op_count(&hugr); + + resolve_result_tags( + detectors_json, + observables_json, + &tag_to_ords, + static_meas_count, + traced_meas_count, + ) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) +} + /// Map a HUGR operation name to a `GateType`. /// /// Args: @@ -3773,6 +3818,10 @@ pub fn register_quantum_circuit_types(parent_module: &Bound<'_, PyModule>) -> Py parent_module.add_function(wrap_pyfunction!(py_hugr_op_to_gate_type, parent_module)?)?; parent_module.add_function(wrap_pyfunction!(py_gate_type_to_hugr_op, parent_module)?)?; parent_module.add_function(wrap_pyfunction!(py_is_quantum_operation, parent_module)?)?; + parent_module.add_function(wrap_pyfunction!( + py_resolve_result_tags_for_guppy, + parent_module + )?)?; Ok(()) } diff --git a/python/pecos-rslib/src/fault_tolerance_bindings.rs b/python/pecos-rslib/src/fault_tolerance_bindings.rs index a637902e3..c443778b2 100644 --- a/python/pecos-rslib/src/fault_tolerance_bindings.rs +++ b/python/pecos-rslib/src/fault_tolerance_bindings.rs @@ -726,7 +726,7 @@ impl PyInfluenceBuilder { /// # Output in DEM format /// print(dem.to_string()) /// ``` -#[pyclass(name = "DetectorErrorModel", module = "pecos_rslib.qec")] +#[pyclass(subclass, name = "DetectorErrorModel", module = "pecos_rslib.qec")] pub struct PyDetectorErrorModel { inner: RustDetectorErrorModel, } @@ -965,15 +965,15 @@ impl PyDetectorErrorModel { if let Ok(dag) = circuit.extract::>() { - Ok(Self { - inner: DemBuilder::from_circuit(&dag.inner, p1, p2, p_meas, p_prep), - }) + let inner = DemBuilder::try_from_circuit(&dag.inner, p1, p2, p_meas, p_prep) + .map_err(|err| pyo3::exceptions::PyValueError::new_err(err.to_string()))?; + Ok(Self { inner }) } else if let Ok(tc) = circuit.extract::>() { - Ok(Self { - inner: DemBuilder::from_tick_circuit(&tc.inner, p1, p2, p_meas, p_prep), - }) + let inner = DemBuilder::try_from_tick_circuit(&tc.inner, p1, p2, p_meas, p_prep) + .map_err(|err| pyo3::exceptions::PyValueError::new_err(err.to_string()))?; + Ok(Self { inner }) } else { Err(pyo3::exceptions::PyTypeError::new_err( "from_circuit() expects a DagCircuit or TickCircuit", @@ -1357,7 +1357,9 @@ impl PyDemBuilder { /// A `DetectorErrorModel` that can be converted to string format. /// /// Raises: - /// `ValueError`: If the detector or observable JSON is malformed. + /// `ValueError`: If the detector or observable JSON is malformed, or + /// a used record offset / `meas_id` is out of range for the + /// configured measurement count. fn build(&self) -> PyResult { let mut builder = RustDemBuilder::new(&self.influence_map).with_noise_config(self.noise.clone()); @@ -1382,7 +1384,9 @@ impl PyDemBuilder { .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; } - let inner = builder.build(); + let inner = builder + .try_build() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(PyDetectorErrorModel { inner }) } diff --git a/python/quantum-pecos/src/pecos/guppy/surface.py b/python/quantum-pecos/src/pecos/guppy/surface.py index 66883176e..3307f294c 100644 --- a/python/quantum-pecos/src/pecos/guppy/surface.py +++ b/python/quantum-pecos/src/pecos/guppy/surface.py @@ -29,7 +29,10 @@ class _ModuleState: temp_dir: ClassVar[Path | None] = None module_cache: ClassVar[dict[str, object]] = {} - distance_module_cache: ClassVar[dict[int, dict]] = {} + # Keyed by full patch identity + effective budget (dx, dz, orientation, + # rotated, effective_budget) so distinct patch geometries -- e.g. rotated + # vs non-rotated at the same dx/dz -- never collide on a cached module. + distance_module_cache: ClassVar[dict[tuple[int, int, str, bool, int], dict]] = {} _state = _ModuleState() @@ -42,22 +45,51 @@ def _get_temp_dir() -> Path: return _state.temp_dir -def generate_guppy_source(patch: "SurfacePatch") -> str: +def generate_guppy_source( + patch: "SurfacePatch", + *, + ancilla_budget: int | None = None, +) -> str: """Generate Guppy source code for a surface code patch. - Uses a 4-round parallel CNOT schedule with dedicated per-stabilizer - ancillas for syndrome extraction. + Uses a 4-round parallel CNOT schedule for syndrome extraction. + + ``ancilla_budget=None`` (default) emits the unconstrained shape: + one ancilla per stabilizer, all measured in parallel at the end of + one round. This matches the abstract circuit's unconstrained-path + measurement order (X stabilizers first by index, then Z). + + A finite ``ancilla_budget`` emits a stabilizer-batched syndrome- + extraction routine that mirrors the abstract circuit's + ``_batched_stabilizers`` schedule (shared helper at + ``pecos.qec.surface._ancilla_batching``): per batch, allocate + ``min(ancilla_budget, total_ancilla)`` fresh ancillas, run the + 4-round CX schedule restricted to that batch's stabilizers, + measure, then move to the next batch (which allocates fresh + qubits whose physical slots are reused by Selene's lowering). + The same per-stabilizer ``result("...:meas:N", …)`` calls fire + in the abstract's batched measurement order, keeping + detector record offsets transferable between abstract and traced + paths. Args: - patch: SurfacePatch with geometry configuration + patch: SurfacePatch with geometry configuration. + ancilla_budget: Optional cap on simultaneously live ancillas. + ``None`` or a value ``>= total_ancilla`` emits the + unconstrained shape; ``< total_ancilla`` emits batched. Returns: - Python/Guppy source code as a string + Python/Guppy source code as a string. """ + from pecos.qec.surface._ancilla_batching import batched_stabilizers, normalize_ancilla_budget + geom = patch.geometry num_data = geom.num_data num_x_stab = len(geom.x_stabilizers) num_z_stab = len(geom.z_stabilizers) + total_ancilla = num_x_stab + num_z_stab + effective_budget = normalize_ancilla_budget(total_ancilla, ancilla_budget) + constrained = effective_budget < total_ancilla dx, dz = geom.dx, geom.dz lines = [ @@ -123,7 +155,7 @@ def generate_guppy_source(patch: "SurfacePatch") -> str: ], ) - # Generate syndrome extraction with parallel CNOT schedule + # Generate syndrome extraction with parallel CNOT schedule. rounds = compute_cnot_schedule(patch) lines.extend( @@ -132,52 +164,115 @@ def generate_guppy_source(patch: "SurfacePatch") -> str: "", "@guppy", f"def syndrome_extraction(surf: SurfaceCode_{dx}x{dz}) -> Syndrome_{dx}x{dz}:", - ' """Extract full syndrome using 4-round parallel CNOT schedule."""', - " # Allocate ancilla qubits (one per stabilizer)", ], ) - lines.extend(f" ax{stab.index} = qubit()" for stab in geom.x_stabilizers) - lines.extend(f" az{stab.index} = qubit()" for stab in geom.z_stabilizers) + if not constrained: + # Unconstrained: one ancilla per stabilizer, X-stabs first then + # Z-stabs, measured in parallel at the end. Matches the + # abstract circuit's unconstrained-path measurement order. + lines.extend( + [ + ' """Extract full syndrome using 4-round parallel CNOT schedule."""', + " # Allocate ancilla qubits (one per stabilizer)", + ], + ) + + lines.extend(f" ax{stab.index} = qubit()" for stab in geom.x_stabilizers) + lines.extend(f" az{stab.index} = qubit()" for stab in geom.z_stabilizers) - lines.append("") - lines.append(" # Hadamard on X ancillas") - lines.extend(f" h(ax{stab.index})" for stab in geom.x_stabilizers) + lines.append("") + lines.append(" # Hadamard on X ancillas") + lines.extend(f" h(ax{stab.index})" for stab in geom.x_stabilizers) + + for rnd_idx, rnd_gates in enumerate(rounds): + lines.append("") + lines.append(f" # Round {rnd_idx + 1}") + for stab_type, stab_idx, data_q in rnd_gates: + if stab_type == "X": + lines.append(f" cx(ax{stab_idx}, surf.data[{data_q}])") + else: + lines.append(f" cx(surf.data[{data_q}], az{stab_idx})") + + lines.append("") + lines.append(" # Hadamard on X ancillas") + lines.extend(f" h(ax{stab.index})" for stab in geom.x_stabilizers) - # Emit 4 rounds of CX gates - for rnd_idx, rnd_gates in enumerate(rounds): lines.append("") - lines.append(f" # Round {rnd_idx + 1}") - for stab_type, stab_idx, data_q in rnd_gates: - if stab_type == "X": - lines.append(f" cx(ax{stab_idx}, surf.data[{data_q}])") - else: - lines.append(f" cx(surf.data[{data_q}], az{stab_idx})") - - lines.append("") - lines.append(" # Hadamard on X ancillas") - lines.extend(f" h(ax{stab.index})" for stab in geom.x_stabilizers) - - # Measure ancillas (destructive) - # Each measurement gets a per-measurement result() call that ties the - # physical measurement to a MeasId. The result() names encode the - # stabilizer type and index. The AllocateResult IDs generated by - # these calls flow through the trace and become MeasIds on the TickCircuit. - lines.append("") - # Measure ancillas with per-measurement result() identity. - # Tag format: "label:idx" where label is the stabilizer name and idx is the - # round-local measurement index. The global MeasId is assigned by the runtime - # via AllocateResult and flows through the trace automatically. - lines.append(" # Measure ancillas") - idx = 0 - for stab in geom.x_stabilizers: - lines.append(f" sx{stab.index} = measure(ax{stab.index})") - lines.append(f' result("sx{stab.index}:meas:{idx}", sx{stab.index})') - idx += 1 - for stab in geom.z_stabilizers: - lines.append(f" sz{stab.index} = measure(az{stab.index})") - lines.append(f' result("sz{stab.index}:meas:{idx}", sz{stab.index})') - idx += 1 + lines.append(" # Measure ancillas") + idx = 0 + for stab in geom.x_stabilizers: + lines.append(f" sx{stab.index} = measure(ax{stab.index})") + lines.append(f' result("sx{stab.index}:meas:{idx}", sx{stab.index})') + idx += 1 + for stab in geom.z_stabilizers: + lines.append(f" sz{stab.index} = measure(az{stab.index})") + lines.append(f' result("sz{stab.index}:meas:{idx}", sz{stab.index})') + idx += 1 + else: + # Constrained: stabilizer-batched. The batch sequence is the + # shared `batched_stabilizers(patch, effective_budget)` so the + # abstract circuit's measurement order matches by construction. + batches = batched_stabilizers(patch, effective_budget) + lines.append( + f' """Extract full syndrome in {len(batches)} ancilla-reuse batches (budget={effective_budget})."""', + ) + idx = 0 + for batch_idx, batch in enumerate(batches): + lines.append("") + lines.append(f" # Batch {batch_idx + 1}/{len(batches)} of stabilizers") + + # Per-batch ancilla variable names: _a_b{batch}_p{pos}. Each + # `qubit()` call here allocates a fresh logical qubit that + # Selene's lowering reuses the physical slot freed by the + # previous batch's `measure()` calls (empirically verified + # in the spike). + batch_anc_var: dict[tuple[str, int], str] = {} + for pos, (stab_type, stab_idx) in enumerate(batch): + var = f"_a_b{batch_idx}_p{pos}" + batch_anc_var[(stab_type, stab_idx)] = var + lines.append(f" {var} = qubit()") + + x_in_batch = [(t, i) for (t, i) in batch if t == "X"] + if x_in_batch: + lines.append(" # Hadamard on X ancillas in this batch") + for stab_type, stab_idx in x_in_batch: + lines.append(f" h({batch_anc_var[(stab_type, stab_idx)]})") + + # Filter the full CX schedule to just this batch's stabilizers. + batch_keys = set(batch_anc_var.keys()) + for rnd_idx, rnd_gates in enumerate(rounds): + rnd_in_batch = [ + (stab_type, stab_idx, data_q) + for stab_type, stab_idx, data_q in rnd_gates + if (stab_type, stab_idx) in batch_keys + ] + if not rnd_in_batch: + continue + lines.append("") + lines.append(f" # Batch {batch_idx + 1} round {rnd_idx + 1}") + for stab_type, stab_idx, data_q in rnd_in_batch: + anc = batch_anc_var[(stab_type, stab_idx)] + if stab_type == "X": + lines.append(f" cx({anc}, surf.data[{data_q}])") + else: + lines.append(f" cx(surf.data[{data_q}], {anc})") + + if x_in_batch: + lines.append("") + lines.append(" # Hadamard on X ancillas in this batch") + for stab_type, stab_idx in x_in_batch: + lines.append(f" h({batch_anc_var[(stab_type, stab_idx)]})") + + lines.append("") + lines.append(f" # Measure batch {batch_idx + 1} ancillas") + for stab_type, stab_idx in batch: + anc = batch_anc_var[(stab_type, stab_idx)] + syn_var = f"sx{stab_idx}" if stab_type == "X" else f"sz{stab_idx}" + tag_prefix = syn_var + lines.append(f" {syn_var} = measure({anc})") + lines.append(f' result("{tag_prefix}:meas:{idx}", {syn_var})') + idx += 1 x_calls = ", ".join(f"sx{s.index}" for s in geom.x_stabilizers) z_calls = ", ".join(f"sz{s.index}" for s in geom.z_stabilizers) @@ -301,24 +396,68 @@ def generate_guppy_source(patch: "SurfacePatch") -> str: return "\n".join(lines) -def _load_guppy_module(patch: "SurfacePatch") -> dict: +def _validate_surface_memory_distance(d: int) -> None: + """Enforce the surface-memory Guppy entry-point distance contract. + + The distance-based public entry points (:func:`get_num_qubits`, + :func:`get_surface_code_module`, :func:`make_surface_code`, + :func:`generate_surface_code_module`) document and require an odd code + distance ``>= 3``. Validate it in one place so they fail loud + consistently rather than silently building an out-of-contract program + (the patch-based entry points validate via ``SurfacePatch`` instead). + """ + if d < 3 or d % 2 == 0: + msg = f"Distance must be odd >= 3, got {d}" + raise ValueError(msg) + + +def _guppy_module_cache_key(patch: "SurfacePatch", effective_budget: int) -> str: + """Filesystem-safe cache key spanning full patch identity + budget. + + Mirrors the topology identity used by the native cache + (``decode._surface_patch_cache_key``): dx, dz, orientation, and the + rotated flag. Keying on distance/dx-dz alone would collide a rotated and + a non-rotated patch of the same shape onto one generated module. + """ + geom = patch.geometry + rotated = "rot" if geom.rotated else "unrot" + return f"{patch.dx}x{patch.dz}_{geom.orientation.name}_{rotated}_b{effective_budget}" + + +def _load_guppy_module( + patch: "SurfacePatch", + *, + ancilla_budget: int | None = None, +) -> dict: """Load a Guppy module for a patch, using caching. + The cache key spans the full patch identity (dx, dz, orientation, + rotated) and the **effective** budget (after clamping via + ``normalize_ancilla_budget``), so ``ancilla_budget=None`` and + ``ancilla_budget >= total_ancilla`` resolve to the same cache entry + while distinct patch geometries never collide. + Args: patch: SurfacePatch with geometry + ancilla_budget: Optional cap on simultaneously live ancillas Returns: Module dictionary with generated functions """ - cache_key = f"{patch.dx}x{patch.dz}" + from pecos.qec.surface._ancilla_batching import normalize_ancilla_budget + + geom = patch.geometry + total_ancilla = len(geom.x_stabilizers) + len(geom.z_stabilizers) + effective_budget = normalize_ancilla_budget(total_ancilla, ancilla_budget) + cache_key = _guppy_module_cache_key(patch, effective_budget) if cache_key in _state.module_cache: return _state.module_cache[cache_key] - # Generate source - source = generate_guppy_source(patch) + # Generate source for this (patch, effective_budget) combination. + source = generate_guppy_source(patch, ancilla_budget=ancilla_budget) - # Write to temp file (required for Guppy introspection) + # Write to temp file (required for Guppy introspection). temp_dir = _get_temp_dir() temp_file = temp_dir / f"patch_{cache_key}.py" temp_file.write_text(source) @@ -342,6 +481,8 @@ def generate_memory_experiment( patch: "SurfacePatch", num_rounds: int, basis: str, + *, + ancilla_budget: int | None = None, ) -> object: """Generate a memory experiment for a patch. @@ -349,11 +490,12 @@ def generate_memory_experiment( patch: SurfacePatch configuration num_rounds: Number of syndrome rounds basis: 'Z' or 'X' + ancilla_budget: Optional cap on simultaneously live ancillas Returns: Guppy function for the experiment """ - module = _load_guppy_module(patch) + module = _load_guppy_module(patch, ancilla_budget=ancilla_budget) if basis.upper() == "Z": factory = module["make_memory_z"] @@ -366,72 +508,135 @@ def generate_memory_experiment( return factory(num_rounds) -def get_num_qubits(d: int) -> int: - """Get total number of qubits for a distance-d surface code. +def get_num_qubits( + d: int | None = None, + *, + patch: "SurfacePatch | None" = None, + ancilla_budget: int | None = None, +) -> int: + """Get the peak simultaneously-live qubit count for a surface-code program. - Peak qubit count: d^2 data qubits + (d^2 - 1) ancilla qubits. + Provide exactly one of ``d`` or ``patch``: - Args: - d: Code distance + - ``d`` (odd >= 3): the default symmetric rotated patch, with + ``d^2`` data and ``d^2 - 1`` ancilla qubits. + - ``patch``: any geometry (asymmetric / non-rotated included); counts + are derived from ``patch.geometry`` so the result is faithful to the + patch actually being traced -- not a scalar-distance approximation. + + Unconstrained (``ancilla_budget=None``): peak count is + ``num_data + total_ancilla``. Constrained: the program reuses ancilla + slots across stabilizer-measurement batches, so only + ``num_data + min(ancilla_budget, total_ancilla)`` slots are live at once. + Clamping matches ``normalize_ancilla_budget``, so the + unconstrained-via-``None`` and unconstrained-via-large-int cases collapse. Returns: - Total qubits (2 * d^2 - 1) + Total qubits the traced program will simultaneously use. """ - return 2 * d * d - 1 + from pecos.qec.surface._ancilla_batching import normalize_ancilla_budget + if (d is None) == (patch is None): + msg = "get_num_qubits requires exactly one of d=... or patch=..." + raise ValueError(msg) -def generate_surface_code_module(d: int) -> str: + if patch is not None: + geom = patch.geometry + num_data = geom.num_data + total_ancilla = len(geom.x_stabilizers) + len(geom.z_stabilizers) + else: + _validate_surface_memory_distance(d) + num_data = d * d + total_ancilla = d * d - 1 + + return num_data + normalize_ancilla_budget(total_ancilla, ancilla_budget) + + +def generate_surface_code_module(d: int, *, ancilla_budget: int | None = None) -> str: """Generate source code for a distance-d surface code module. Args: d: Code distance (must be odd >= 3) + ancilla_budget: Optional cap on simultaneously live ancillas; + forwarded to ``generate_guppy_source``. Returns: Python/Guppy source code as a string """ - if d < 3 or d % 2 == 0: - msg = f"Distance must be odd >= 3, got {d}" - raise ValueError(msg) + _validate_surface_memory_distance(d) from pecos.qec.surface import SurfacePatch patch = SurfacePatch.create(distance=d) - return generate_guppy_source(patch) + return generate_guppy_source(patch, ancilla_budget=ancilla_budget) + + +def _surface_code_module_for_patch(patch: "SurfacePatch", *, ancilla_budget: int | None = None) -> dict: + """Load + cache a surface-code module for an arbitrary patch. + + Cache key spans full patch identity (dx, dz, orientation, rotated) plus + the effective budget, so distinct geometries never collide and the + unconstrained-via-``None`` / unconstrained-via-large-int cases share one + entry. Module metadata is derived from the patch geometry (faithful for + asymmetric / non-rotated patches), not from a scalar distance. + """ + from pecos.qec.surface._ancilla_batching import normalize_ancilla_budget + + geom = patch.geometry + total_ancilla = len(geom.x_stabilizers) + len(geom.z_stabilizers) + effective_budget = normalize_ancilla_budget(total_ancilla, ancilla_budget) + cache_key = (patch.dx, patch.dz, geom.orientation.name, geom.rotated, effective_budget) + if cache_key in _state.distance_module_cache: + return _state.distance_module_cache[cache_key] -def get_surface_code_module(d: int) -> dict: + module = _load_guppy_module(patch, ancilla_budget=ancilla_budget) + + # Metadata derived from the actual patch geometry. + module["distance"] = patch.distance + module["num_data"] = geom.num_data + module["num_stab"] = total_ancilla + module["ancilla_budget"] = effective_budget + + _state.distance_module_cache[cache_key] = module + return module + + +def get_surface_code_module(d: int, *, ancilla_budget: int | None = None) -> dict: """Get a loaded surface code module for distance d. Args: - d: Code distance + d: Code distance (must be odd >= 3) + ancilla_budget: Optional cap on simultaneously live ancillas Returns: Dictionary with module contents and metadata """ - if d in _state.distance_module_cache: - return _state.distance_module_cache[d] - from pecos.qec.surface import SurfacePatch + _validate_surface_memory_distance(d) patch = SurfacePatch.create(distance=d) - module = _load_guppy_module(patch) + return _surface_code_module_for_patch(patch, ancilla_budget=ancilla_budget) - # Add metadata - module["distance"] = d - module["num_data"] = d * d - module["num_stab"] = (d * d - 1) // 2 - _state.distance_module_cache[d] = module - return module - - -def make_surface_code(distance: int, num_rounds: int, basis: str) -> object: +def make_surface_code( + distance: int, + num_rounds: int, + basis: str, + *, + ancilla_budget: int | None = None, +) -> object: """Create a surface code memory experiment. Args: distance: Code distance (must be odd >= 3) num_rounds: Number of syndrome extraction rounds basis: 'Z' or 'X' + ancilla_budget: Optional cap on simultaneously live ancillas. + ``None`` (default) emits the unconstrained Guppy program; + a finite budget emits a stabilizer-batched program that + matches the abstract circuit's + ``batched_stabilizers(patch, effective_budget)`` schedule. Returns: Compiled Guppy program @@ -440,7 +645,7 @@ def make_surface_code(distance: int, num_rounds: int, basis: str) -> object: msg = f"basis must be 'Z' or 'X', got {basis!r}" raise ValueError(msg) - module = get_surface_code_module(distance) + module = get_surface_code_module(distance, ancilla_budget=ancilla_budget) factory = module["make_memory_z"] if basis.upper() == "Z" else module["make_memory_x"] diff --git a/python/quantum-pecos/src/pecos/qec/__init__.py b/python/quantum-pecos/src/pecos/qec/__init__.py index 362efa148..7aac871c1 100644 --- a/python/quantum-pecos/src/pecos/qec/__init__.py +++ b/python/quantum-pecos/src/pecos/qec/__init__.py @@ -34,7 +34,6 @@ DemBuilder, DemSampler, DemSamplerBuilder, - DetectorErrorModel, EquivalenceResult, FaultLocation, InfluenceBuilder, @@ -72,6 +71,12 @@ ColorCodeStabilizer, generate_488_layout, ) + +# DetectorErrorModel is re-exported from pecos.qec.dem: a thin Python subclass +# of the Rust class that adds the from_guppy convenience constructor (the +# Guppy/Selene trace pipeline is Python-only, so it cannot live in the Rust +# extension without a dependency cycle). +from pecos.qec.dem import DetectorErrorModel from pecos.qec.generic import ( CheckSchedule, PauliOperator, diff --git a/python/quantum-pecos/src/pecos/qec/dem.py b/python/quantum-pecos/src/pecos/qec/dem.py new file mode 100644 index 000000000..1adf274c2 --- /dev/null +++ b/python/quantum-pecos/src/pecos/qec/dem.py @@ -0,0 +1,253 @@ +"""Python-level ``DetectorErrorModel`` with a Guppy convenience constructor. + +The core ``DetectorErrorModel`` is implemented in Rust +(``pecos_rslib.qec.DetectorErrorModel``). The Guppy -> Selene -> QIS-trace +pipeline, however, lives entirely in Python (``pecos.sim``, ``pecos.guppy``, +``pecos.qec.surface.decode``). To keep the convenient +``DetectorErrorModel.from_guppy(...)`` call site without making the low-level +Rust extension import the high-level Python package (a dependency cycle), this +module defines a thin Python subclass that adds :meth:`from_guppy` and is +re-exported as the public ``pecos.qec.DetectorErrorModel``. + +This wrapper is intentionally thin: it traces the Guppy program into a +``TickCircuit``, optionally compiles the program to a HUGR (only when +``result_tags`` is requested -- to recover the sound tag -> measurement +binding via ``pecos_hugr_qis::extract_result_tag_measurements``), and hands +the caller's detector/observable JSON to the Rust DEM builder. The metadata +validation that applies to **every** ingest path (``from_guppy``, +``from_circuit``, ``DemSampler.from_circuit``, public ``DemBuilder``) lives +solely in the Rust DEM builder +(``pecos_qec::fault_tolerance::dem_builder``): JSON shape, ``D0``/``L0`` id +forms, tracked-Pauli rejection, ``num_measurements`` consistency, +out-of-range records, ``meas_id`` resolution against the circuit's stable +stamped ``MeasId``s, and the ``records``-vs-``meas_ids`` redundancy rule. + +The ``result_tags`` -> record-offset resolution (loop guard included) is +applied **only** through ``from_guppy``: the rewriter +(``pecos_qec::resolve_result_tags``, invoked via the pyo3 +``resolve_result_tags_for_guppy`` binding) runs from this wrapper before +``from_circuit`` is called, so the downstream DEM builder only ever sees +already-resolved ``records``. ``result_tags`` in circuit metadata fed +directly to ``from_circuit`` / ``DemSampler.from_circuit`` / +``DemBuilder.build`` is **not** resolved -- those paths build from +``records``/``meas_ids`` as usual. +""" + +from __future__ import annotations + +from typing import Any + +from pecos_rslib.qec import DetectorErrorModel as _RustDetectorErrorModel + + +class DetectorErrorModel(_RustDetectorErrorModel): + """Detector error model with a Guppy/QIS-trace convenience constructor. + + Identical to :class:`pecos_rslib.qec.DetectorErrorModel` except for the + added :meth:`from_guppy` classmethod. + + Identity caveat: the inherited Rust factory classmethods + (``from_circuit``, ``from_pecos_metadata_json``, and ``from_guppy``, which + delegates to ``from_circuit``) construct and return the *Rust base* class + ``pecos_rslib.qec.DetectorErrorModel`` -- they do not return instances of + this Python subclass. Consequently ``isinstance(obj, DetectorErrorModel)`` + is ``False`` for objects produced by those constructors even though every + method works identically. Do not use ``isinstance`` against this public + subclass to recognize DEMs; check the Rust base type instead. (No PECOS + code relies on such an ``isinstance``; this is a public-API caveat only.) + """ + + __slots__ = () + + @classmethod + def from_guppy( + cls, + guppy: Any, + *, + num_qubits: int, + detectors_json: str, + observables_json: str = "[]", + num_measurements: int | None = None, + p1: float = 0.001, + p2: float = 0.01, + p_meas: float = 0.001, + p_prep: float = 0.001, + seed: int = 0, + ) -> _RustDetectorErrorModel: + """Build a circuit-level DEM from a Guppy program by tracing it. + + Runs ``guppy`` under the Selene QIS engine with operation tracing, + replays the captured gate stream into a ``TickCircuit``, attaches the + caller-supplied detector/observable definitions, and builds the DEM via + native PECOS fault propagation. All metadata validation happens in the + Rust DEM builder (single source of truth). + + Args: + guppy: Anything ``pecos.sim`` accepts -- a ``@guppy``-decorated + function, a compiled Guppy program (e.g. the object returned by + ``pecos.guppy.make_surface_code``), or a program wrapper. There + is no Guppy *source-string* form in PECOS; pass a program/ + function, not source text. + num_qubits: Number of qubits to allocate for the trace. QIS/HUGR + programs require an explicit qubit count. + detectors_json: Detector definitions as a JSON list, e.g. + ``[{"id": 0, "records": [-1, -5]}, ...]``. ``id`` may be a bare + integer or, for convenience, the DEM-label form ``"D0"`` + (observables likewise accept ``"L0"``); both normalize to the + same integer. + + Each entry references measurements in one of three ways + (provide exactly one form; co-presence is allowed only if the + forms reference the same measurements): + + - ``records``: negative measurement offsets (Stim convention), + positional in the traced measurement record. + - ``meas_ids``: stable stamped ``MeasId``s -- resolved in Rust + against the circuit's actual ids, so robust to any + measurement reordering Guppy/Selene compilation may + introduce. + - ``result_tags``: Guppy ``result(tag, ...)`` tag strings + (e.g. ``[{"id": 0, "result_tags": ["syn_a"]}]``). The + reorder-immune ``tag -> measurement`` binding is recovered + from the compiled HUGR by + ``pecos_hugr_qis::extract_result_tag_measurements`` and + resolved to record offsets in Rust. Supported only for + **straight-line, canonical** programs: + ``result(tag, measure(q))`` of a raw scalar measurement. + Computed (``result(tag, m0 == m1)``), constant + (``result(tag, True)``), and array-valued + (``result(tag, measure_array(qs))``) forms are not + resolvable and an unknown tag is a hard error. Runtime + ``for _ in range(comptime(n))`` loops (e.g. the surface + code's round structure) have one static measure op per + loop body in the HUGR, not per occurrence -- ``result_tags`` + is rejected fail-loud for such programs. ``result_tags`` + also requires ``guppy`` to be a ``@guppy``-decorated + function / ``GuppyFunctionDefinition`` (not an arbitrary + ``pecos.sim``-acceptable wrapper); use ``records`` for the + surface-code path. + observables_json: Observable definitions as a JSON list, e.g. + ``[{"id": 0, "records": [-1]}]`` (same id/records rules as + detectors). + + Tracked Paulis: **hand-authored JSON tracked Paulis are NOT + supported** by this path. Tracked Paulis are only produced from + circuit *annotations* (e.g. the surface builder), not from + ``observables_json``; a ``{"kind": "tracked_pauli", ...}`` + entry here is rejected by the builder. + num_measurements: Total measurement count, used to resolve negative + ``records`` offsets. If omitted, it is inferred from the traced + circuit; if given, it must match the traced count. + p1: Single-qubit gate depolarizing rate. + p2: Two-qubit gate depolarizing rate. + p_meas: Measurement flip rate. + p_prep: Preparation (reset) error rate. + seed: Seed for the ideal trace run. + + Returns: + A ``DetectorErrorModel`` built from the traced circuit. + + Raises: + ValueError: If ``num_measurements`` disagrees with the traced + measurement count, if a detector/observable is malformed or + references an out-of-range ``record`` or an absent + ``meas_id``, or if the traced operation stream cannot be + replayed. + + Note: + **Measurement-dependent (dynamic) control flow is unsupported.** + ``from_guppy`` traces one ideal execution; a Guppy program whose + quantum operations depend on a measurement *outcome* (e.g. + ``if measure(q): x(other)``) would yield a DEM built from a single + sampled branch, silently wrong and seed-dependent. No reliable + runtime-trace heuristic distinguishes that from the + statically-scheduled post-measurement gates a normal QEC circuit + has (the surface code has these every round), so no guard is + attempted -- pass straight-line programs only. Sound detection + would require HUGR conditional-on-measurement analysis (deferred). + + Every measurement is anchored to a stable MeasId automatically: + ``measure()`` itself allocates the result slot in the trace (a + ``result(...)`` call is not required for MeasId assignment). + + Source-anchored tag-referenced detectors are exposed via the + ``result_tags`` field on detectors/observables (see the + ``detectors_json`` argument). The supported scope is canonical + scalar ``result(tag, measure(q))`` in straight-line programs; the + runtime-loop case (per-occurrence binding) remains deferred. + """ + from pecos.qec.surface.decode import trace_guppy_into_tick_circuit + + # Tag-referenced detectors require the compiled HUGR (to recover the + # sound, reorder-immune Guppy `result(tag, ...)` -> measurement + # binding). `guppy_to_hugr` accepts @guppy-decorated functions and + # `GuppyFunctionDefinition`s (e.g. `make_surface_code(...)`), but + # not arbitrary callables / non-Guppy `pecos.sim`-acceptable inputs. + # Compile upfront so a wrong input fails loud here, before tracing, + # with a clear @guppy-mentioning message instead of crashing later + # inside the HUGR step. + needs_tags = _result_tags_present(detectors_json, observables_json) + hugr_bytes: bytes | None = None + if needs_tags: + from pecos._compilation import guppy_to_hugr + + try: + hugr_bytes = guppy_to_hugr(guppy) + except ValueError as exc: + msg = ( + "result_tags requires a @guppy-decorated function (or a " + "GuppyFunctionDefinition, e.g. the object " + "make_surface_code(...) returns) so the program can be " + "compiled to a HUGR. Pass such an input directly, or use " + "positional 'records' / 'meas_ids' instead." + ) + raise ValueError(msg) from exc + + tc = trace_guppy_into_tick_circuit(guppy, num_qubits, seed=seed) + + # Compilation passes required for traced QIS circuits before fault + # analysis: normalize parameterized Clifford rotations to named gates + # and stamp stable MeasIds onto measurement gates. After this every + # MZ carries the stable id the Rust builder resolves meas_ids against. + tc.lower_clifford_rotations() + tc.assign_missing_meas_ids() + + # Resolve `result_tags` -> record offsets via Rust (sound HUGR + # extraction + runtime-loop guard via static-vs-traced measurement + # count). After this, `detectors_json` / `observables_json` no longer + # contain `result_tags`; the downstream Rust DEM builder is unchanged. + if needs_tags: + from pecos_rslib import resolve_result_tags_for_guppy + + detectors_json, observables_json = resolve_result_tags_for_guppy( + detectors_json, + observables_json, + hugr_bytes, + tc.num_measurements(), + ) + + # Hand the caller's metadata to the Rust builder verbatim; it owns all + # schema/ref validation (including D0/L0 id forms, tracked-Pauli + # rejection, num_measurements consistency, and stamped-MeasId + # resolution). + tc.set_meta("detectors", detectors_json) + tc.set_meta("observables", observables_json) + if num_measurements is not None: + tc.set_meta("num_measurements", str(num_measurements)) + + return _RustDetectorErrorModel.from_circuit( + tc, + p1=p1, + p2=p2, + p_meas=p_meas, + p_prep=p_prep, + ) + + +def _result_tags_present(detectors_json: str, observables_json: str) -> bool: + """Cheap gate: does any entry use ``result_tags``? (substring check). + + Only decides whether to compile the Guppy program to HUGR; the actual + extraction, loop-guard, resolution, and validation are all done in Rust. + """ + return '"result_tags"' in (detectors_json or "") or '"result_tags"' in (observables_json or "") diff --git a/python/quantum-pecos/src/pecos/qec/surface/__init__.py b/python/quantum-pecos/src/pecos/qec/surface/__init__.py index fb702c3f1..5aee25b2e 100644 --- a/python/quantum-pecos/src/pecos/qec/surface/__init__.py +++ b/python/quantum-pecos/src/pecos/qec/surface/__init__.py @@ -40,10 +40,6 @@ get_detector_descriptors_from_tick_circuit, get_measurement_order_from_tick_circuit, get_observable_descriptors_from_tick_circuit, - get_stabilizer_region, - get_stabilizer_schedule_entries, - get_stabilizer_schedule_metadata, - get_stabilizer_touch_label, tick_circuit_to_stim, ) from pecos.qec.surface.circuit_builder import ( @@ -99,6 +95,10 @@ SurfacePatch, SurfacePatchBuilder, SurfacePatchDescriptor, + get_stabilizer_region, + get_stabilizer_schedule_entries, + get_stabilizer_schedule_metadata, + get_stabilizer_touch_label, ) from pecos.qec.surface.plot import plot_patch, plot_surface_code from pecos.qec.surface.schedule import ( diff --git a/python/quantum-pecos/src/pecos/qec/surface/_ancilla_batching.py b/python/quantum-pecos/src/pecos/qec/surface/_ancilla_batching.py new file mode 100644 index 000000000..256b8a565 --- /dev/null +++ b/python/quantum-pecos/src/pecos/qec/surface/_ancilla_batching.py @@ -0,0 +1,94 @@ +"""Shared helpers for ancilla-budget reasoning across surface paths. + +Both the abstract surface-circuit builder +(``pecos.qec.surface.circuit_builder``) and the Guppy emitter +(``pecos.guppy.surface``) need to agree, byte-for-byte, on how +stabilizers are partitioned into ancilla-reuse batches. Otherwise the +abstract reference TickCircuit and the traced Guppy program produce +different measurement orders, the detector record offsets the caller +passes reference the wrong measurements, and the DEM is silently +wrong. + +Keeping the partitioning logic in this single helper -- imported by +both consumers -- is the only source of truth. A unit test pins +concrete expected batch sequences for small ``(distance, budget)`` +combinations (see +``tests/qec/surface/test_ancilla_batching.py``) so a regression in +the partitioning policy itself fails fast, independent of any DEM- +level oracle. + +The two functions are intentionally pure (no circuit object created) +so neither consumer pulls in the other's dependencies. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pecos.qec.surface.geometry import SurfacePatch + + +def normalize_ancilla_budget(total_ancilla: int, ancilla_budget: int | None) -> int: + """Clamp an ancilla budget to the valid range for a patch. + + ``None`` collapses to the unconstrained ``total_ancilla``. A budget + ``>= total_ancilla`` clamps to ``total_ancilla`` so callers + requesting "no constraint" via either ``None`` or a large integer + resolve to the same effective budget. ``< 1`` is rejected fail-loud. + + Non-``int`` (including ``bool``, ``float``) is rejected fail-loud + so the public ``ancilla_budget`` kwarg has a strict integer + contract -- avoiding silently-wrong cache keys or qubit counts. + """ + if ancilla_budget is None: + return total_ancilla + + # Reject bool first (bool is a subclass of int in Python). + if isinstance(ancilla_budget, bool) or not isinstance(ancilla_budget, int): + msg = f"ancilla_budget must be int or None, got {type(ancilla_budget).__name__}" + raise TypeError(msg) + + if ancilla_budget < 1: + msg = f"ancilla_budget must be >= 1, got {ancilla_budget}" + raise ValueError(msg) + + return min(ancilla_budget, total_ancilla) + + +def batched_stabilizers( + patch: SurfacePatch, + ancilla_budget: int, +) -> list[list[tuple[str, int]]]: + """Partition stabilizers into ancilla-reuse batches. + + Returns a list of batches, each a list of ``(stab_type, stab_idx)`` + pairs where ``stab_type`` is ``"X"`` or ``"Z"`` and ``stab_idx`` is + the patch-internal stabilizer index. Batches are at most + ``ancilla_budget`` stabilizers each; within each batch every + stabilizer is measured concurrently using one ancilla qubit. + + The stabilizer order is **load-bearing** production semantics shared by + the abstract circuit and the Guppy emitter: ascending stabilizer index, + X before Z on ties. Note the traced-vs-traced Selene parity tests cannot + catch a regression here -- both sides import this one helper, so a policy + change moves them together. The concrete batch-order and source-level + CX-emission pins (``tests/qec/surface/test_ancilla_batching.py``) are what + actually guard this order; preserve it. + + ``ancilla_budget`` is validated through + :func:`normalize_ancilla_budget` (rejects ``None``, ``bool``, + ``float``, ``str``, ``< 1``; clamps ``>= total_ancilla``) so direct + callers of this helper get the same fail-loud guarantees as the + public ``ancilla_budget`` API surface, not an opaque ``range()`` or + silent-empty failure. + """ + geom = patch.geometry + total_ancilla = len(geom.x_stabilizers) + len(geom.z_stabilizers) + effective_budget = normalize_ancilla_budget(total_ancilla, ancilla_budget) + + stabilizers = [("X", stab.index) for stab in geom.x_stabilizers] + stabilizers.extend(("Z", stab.index) for stab in geom.z_stabilizers) + stabilizers.sort(key=lambda stab: (stab[1], 0 if stab[0] == "X" else 1)) + + return [stabilizers[start : start + effective_budget] for start in range(0, len(stabilizers), effective_budget)] diff --git a/python/quantum-pecos/src/pecos/qec/surface/circuit_builder.py b/python/quantum-pecos/src/pecos/qec/surface/circuit_builder.py index 14ce63767..f88604c10 100644 --- a/python/quantum-pecos/src/pecos/qec/surface/circuit_builder.py +++ b/python/quantum-pecos/src/pecos/qec/surface/circuit_builder.py @@ -21,10 +21,29 @@ from enum import Enum, auto from typing import TYPE_CHECKING, TypedDict +# `_batched_stabilizers` and `_normalize_ancilla_budget` are imported from +# the shared `_ancilla_batching` helper so this builder and the Guppy +# emitter (`pecos.guppy.surface`) compute identical batches by +# construction. The local aliases preserve existing call sites; do not +# fork the partitioning logic. +from pecos.qec.surface._ancilla_batching import ( + batched_stabilizers as _batched_stabilizers, +) +from pecos.qec.surface._ancilla_batching import ( + normalize_ancilla_budget as _normalize_ancilla_budget, +) + +# Stabilizer geometry helpers live in the low-level patch module (single +# source of truth). Only the two used by the circuit renderer are imported +# here; the full set is exported publicly from the package __init__. +from pecos.qec.surface.patch import ( + get_stabilizer_region, + get_stabilizer_touch_label, +) + if TYPE_CHECKING: from pecos.qec.surface.patch import ( LogicalDescriptor, - Stabilizer, StabilizerDescriptor, SurfacePatch, SurfacePatchDescriptor, @@ -129,39 +148,6 @@ def total(self) -> int: return len(set(self.data_qubits) | set(self.x_ancilla_qubits) | set(self.z_ancilla_qubits)) -def _normalize_ancilla_budget(total_ancilla: int, ancilla_budget: int | None) -> int: - """Clamp ancilla budget to the valid range for a patch.""" - if ancilla_budget is None: - return total_ancilla - - if ancilla_budget < 1: - msg = f"ancilla_budget must be >= 1, got {ancilla_budget}" - raise ValueError(msg) - - return min(ancilla_budget, total_ancilla) - - -def _batched_stabilizers( - patch: SurfacePatch, - ancilla_budget: int, -) -> list[list[tuple[str, int]]]: - """Partition stabilizers into ancilla-reuse batches. - - This mirrors the public Guppy batching order so the abstract circuit and - its native DEMs match the actual low-ancilla circuit family. - """ - geom = patch.geometry - stabilizers = [("X", stab.index) for stab in geom.x_stabilizers] - stabilizers.extend(("Z", stab.index) for stab in geom.z_stabilizers) - # Sort key is load-bearing: it mirrors Guppy's stabilizer ordering (ascending - # index, X before Z on ties). Batched DEMs are compared against Guppy output - # shot-for-shot in the Selene parity tests, so any change here will diverge - # from the low-ancilla reference family. - stabilizers.sort(key=lambda stab: (stab[1], 0 if stab[0] == "X" else 1)) - - return [stabilizers[start : start + ancilla_budget] for start in range(0, len(stabilizers), ancilla_budget)] - - def build_surface_code_circuit( patch: SurfacePatch, num_rounds: int, @@ -396,70 +382,6 @@ def classify_stabilizer_boundary(stab_type: str, data_qubits: tuple[int, ...], d return _classify_boundary(stab_type, data_qubits, d, dz) -def get_stabilizer_region(stab: Stabilizer, patch: SurfacePatch) -> str: - """Return a coarse region label like ``top+left`` for a stabilizer.""" - geom = patch.geometry - positions = [geom.id_to_pos[q] for q in stab.data_qubits] - avg_row = sum(row for row, _ in positions) / len(positions) - avg_col = sum(col for _, col in positions) / len(positions) - row_label = "top" if avg_row < (geom.dx - 1) / 2 else "bottom" - col_label = "left" if avg_col < (geom.dz - 1) / 2 else "right" - return f"{row_label}+{col_label}" - - -def get_stabilizer_touch_label(stab: Stabilizer, patch: SurfacePatch, data_qubit: int) -> str: - """Label how a data qubit sits relative to a stabilizer support.""" - geom = patch.geometry - if data_qubit not in stab.data_qubits: - msg = f"Qubit {data_qubit} is not in stabilizer {stab.stab_type}{stab.index}" - raise ValueError(msg) - - positions = [geom.id_to_pos[q] for q in stab.data_qubits] - data_row, data_col = geom.id_to_pos[data_qubit] - rows = [row for row, _ in positions] - cols = [col for _, col in positions] - - if len(set(rows)) == 1: - return "left" if data_col == min(cols) else "right" - if len(set(cols)) == 1: - return "top" if data_row == min(rows) else "bottom" - - vertical = "T" if data_row == min(rows) else "B" - horizontal = "L" if data_col == min(cols) else "R" - return vertical + horizontal - - -def get_stabilizer_schedule_entries(stab: Stabilizer, patch: SurfacePatch) -> list[dict[str, int | str]]: - """Return the per-round touch schedule for one stabilizer.""" - from pecos.qec.surface.schedule import get_stab_schedule - - schedule = get_stab_schedule(stab.stab_type, stab.data_qubits, stab.is_boundary, patch.dx, patch.dz) - return [ - { - "round_0based": round_0based, - "data_qubit": data_qubit, - "touch_label": get_stabilizer_touch_label(stab, patch, data_qubit), - } - for round_0based, data_qubit in schedule - ] - - -def get_stabilizer_schedule_metadata(stab: Stabilizer, patch: SurfacePatch) -> dict[str, object]: - """Return metadata describing one stabilizer's schedule and geometry.""" - entries = get_stabilizer_schedule_entries(stab, patch) - rounds = [int(entry["round_0based"]) for entry in entries] - return { - "stabilizer_kind": stab.stab_type, - "stabilizer_index": stab.index, - "stabilizer_is_boundary": stab.is_boundary, - "stabilizer_region": get_stabilizer_region(stab, patch), - "schedule_rounds": rounds, - "schedule_start_round": rounds[0] if rounds else None, - "schedule_end_round": rounds[-1] if rounds else None, - "schedule_entries": entries, - } - - def _build_detector_descriptors( detectors: list[dict[str, object]], patch: SurfacePatch, diff --git a/python/quantum-pecos/src/pecos/qec/surface/decode.py b/python/quantum-pecos/src/pecos/qec/surface/decode.py index 6ffb604b9..e65994390 100644 --- a/python/quantum-pecos/src/pecos/qec/surface/decode.py +++ b/python/quantum-pecos/src/pecos/qec/surface/decode.py @@ -399,6 +399,7 @@ def _copy_surface_tick_circuit_metadata(source_tc: Any, target_tc: Any) -> None: "num_detectors", "detector_descriptors", "observable_descriptors", + "ancilla_budget", ): value = source_tc.get_meta(key) if value is not None: @@ -540,8 +541,14 @@ def tuple_args(payload: Any, op_name: str, arity: int) -> tuple[Any, ...]: [(mapped_slot(int(qubit_a), op_name), mapped_slot(int(qubit_b), op_name))], ) elif op_name == "Measure": - program_id, _result_id = tuple_args(payload, op_name, 2) - tick.mz([mapped_slot(int(program_id), op_name)]) + program_id, result_id = tuple_args(payload, op_name, 2) + # Stamp the QIS-provided result_id as the MeasId rather than + # discarding it and letting assign_missing_meas_ids() invent + # sequential ids (which would be wrong for non-sequential ids). + tick.mz_with_ids( + [mapped_slot(int(program_id), op_name)], + [int(result_id)], + ) elif op_name == "Reset": tick.pz([mapped_slot(scalar_arg(payload, op_name), op_name)]) else: @@ -577,32 +584,32 @@ def _replay_lowered_qis_trace_into_tick_circuit(chunks: list[dict[str, Any]]) -> tick, then compact (ASAP schedule) so that gates on disjoint qubits share a tick --- matching the parallel structure of the abstract circuit. - MeasIds flow from Guppy result() objects: AllocateResult IDs from the - operations stream are stamped on MZ gates via mz_with_ids(). + MeasIds flow from the QIS measurement result slot: Quantum.Measure carries + ``[qubit, result_id]``, and those IDs are stamped on MZ gates via + mz_with_ids(). """ from pecos_rslib.quantum import TickCircuit tick_circuit = TickCircuit() + # Pass 1: the ordered MeasIds, read directly from each Measure op. A + # ``Quantum.Measure`` op carries ``[qubit, result_id]`` where ``result_id`` + # is the QIS result slot the runtime allocated for it (== the MeasId we + # stamp). Using it directly needs no AllocateResult/Measure pairing + # heuristic and no interleave assumption -- batched + # allocate-allocate-measure-measure (a valid QIS pattern) works the same + # as interleaved. (The order of Measure ops here matches the order of MZ + # gates in ``lowered_quantum_ops``, consumed in pass 2.) + meas_ids_in_order: list[int] = [] for chunk in chunks: - # Extract AllocateResult ID → MZ qubit mapping from the operations stream. - # Each AllocateResult(id=N) is followed by Quantum.Measure([qubit, slot]). - # This gives us the MeasId to stamp on each MZ gate. - meas_id_queue: list[tuple[int, int]] = [] # (qubit, meas_id) pairs - last_alloc_id: int | None = None for op in chunk.get("operations") or []: - op_dict = dict(op) - if "AllocateResult" in op_dict: - last_alloc_id = int(op_dict["AllocateResult"]["id"]) - elif "Quantum" in op_dict: - q_op = op_dict["Quantum"] - if "Measure" in q_op and last_alloc_id is not None: - qubit = int(q_op["Measure"][0]) - meas_id_queue.append((qubit, last_alloc_id)) - last_alloc_id = None - - meas_id_idx = 0 # next MeasId to assign + quantum = dict(op).get("Quantum") + if isinstance(quantum, dict) and "Measure" in quantum: + meas_ids_in_order.append(int(quantum["Measure"][1])) + # Pass 2: replay gates, stamping MeasIds on MZ gates in global trace order. + meas_cursor = 0 + for chunk in chunks: for gate in chunk.get("lowered_quantum_ops") or []: gate_type = str(gate["gate_type"]) qubits = [int(q) for q in gate.get("qubits", [])] @@ -628,26 +635,16 @@ def _replay_lowered_qis_trace_into_tick_circuit(chunks: list[dict[str, Any]]) -> elif gate_type == "PZ": tick.pz(qubits) elif gate_type == "MZ": - # Stamp MeasIds from the AllocateResult stream - meas_ids = [] - for q in qubits: - if meas_id_idx < len(meas_id_queue): - expected_q, mid = meas_id_queue[meas_id_idx] - if expected_q == q: - meas_ids.append(mid) - meas_id_idx += 1 - else: - # Qubit mismatch — fall back to auto-assign - meas_ids = [] - break - else: - meas_ids = [] - break - - if meas_ids: - tick.mz_with_ids(qubits, meas_ids) - else: - tick.mz(qubits) + end = meas_cursor + len(qubits) + if end > len(meas_ids_in_order): + msg = ( + "More measured qubits than result(...)-anchored " + "MeasIds in the traced program; a measurement is " + "missing its result(...) call." + ) + raise ValueError(msg) + tick.mz_with_ids(qubits, meas_ids_in_order[meas_cursor:end]) + meas_cursor = end elif gate_type == "RX": tick.rx(angles[0], qubits) elif gate_type == "RY": @@ -678,40 +675,154 @@ def _replay_lowered_qis_trace_into_tick_circuit(chunks: list[dict[str, Any]]) -> msg = f"Unsupported lowered traced gate {gate_type!r}" raise ValueError(msg) + if meas_cursor != len(meas_ids_in_order): + msg = ( + f"Traced program has {len(meas_ids_in_order)} result(...)-anchored " + f"measurements but only {meas_cursor} measured qubit(s) in the " + "lowered gate stream; result()/measurement mismatch." + ) + raise ValueError(msg) + # Compact: ASAP-schedule gates into minimal ticks tick_circuit.compact_ticks() return tick_circuit -def _generate_traced_surface_tick_circuit( - patch: SurfacePatch, - num_rounds: int, - basis: str, -) -> Any: - """Trace the lowered ideal Selene/QIS op stream and replay it into a TickCircuit.""" +def _chunk_has_lowerable_op(chunk: dict[str, Any]) -> bool: + """True if a chunk carries an operation that lowers to a TickCircuit gate. + + A raw ``Quantum`` op (gate / measure / reset) lowers to a gate, and an + ``AllocateQubit`` lowers to a prep (``PZ``) -- both appear in + ``lowered_quantum_ops`` after Selene lowering, and both are emitted as + gates by the raw replay (see :func:`_replay_qis_trace_into_tick_circuit`). + ``AllocateResult``, ``RecordOutput``, ``Barrier``, and ``ReleaseQubit`` + emit no gate and are pass-through bookkeeping, so a chunk containing only + those legitimately has no lowered ops. + """ + return any( + isinstance(op, dict) and ("Quantum" in op or "AllocateQubit" in op) for op in (chunk.get("operations") or []) + ) + + +def _reject_partially_lowered_trace(chunks: list[dict[str, Any]]) -> None: + """Fail loud on a mixed/partially-lowered trace. + + The lowered replay consumes a chunk's gates from ``lowered_quantum_ops`` + only (it reads ``operations`` solely for measurement result ids). So once + *any* chunk is lowered, a chunk that carries a lowerable operation (a raw + ``Quantum`` gate/measure/reset, or an ``AllocateQubit`` prep) but an empty + ``lowered_quantum_ops`` would have those gates silently dropped -- the + resulting TickCircuit would be missing operations with no error. A dropped + *measurement* is already caught downstream by the meas-count guard in + :func:`_replay_lowered_qis_trace_into_tick_circuit`, but a dropped prep or + non-measurement gate (H, CX, ...) would pass silently. Reject the + incomplete trace here instead of building from a partial gate stream. + + This is the explicit trace-format contract for live + ``capture_operation_trace()`` output: lowered and raw forms must not be + mixed across chunks. (Per-chunk completeness of lowering is assumed and is + exercised end-to-end by the byte-identical surface DEM regressions.) + """ + for idx, chunk in enumerate(chunks): + if _chunk_has_lowerable_op(chunk) and not chunk.get("lowered_quantum_ops"): + msg = ( + f"Traced chunk {idx} carries lowerable operations (a quantum " + "gate/measure/reset or an AllocateQubit prep) but no " + "lowered_quantum_ops while other chunks are lowered. This " + "mixed/partially-lowered trace would silently drop the chunk's " + "gates in the lowered replay; refusing to build from an " + "incomplete gate stream." + ) + raise ValueError(msg) + + +def trace_guppy_into_tick_circuit(program: Any, num_qubits: int, *, seed: int = 0) -> Any: + """Trace a Guppy/QIS program's lowered Selene op stream into a ``TickCircuit``. + + Runs ``program`` under the Selene QIS engine with operation tracing enabled + and replays the captured (lowered) gate stream into a PECOS ``TickCircuit``. + This is the generic core shared by the surface traced-QIS path and the + general ``DetectorErrorModel.from_guppy`` entry point. + + Note: this traces ONE ideal execution. Measurement-dependent (dynamic) + control flow is therefore *unsupported / undefined* for DEM construction -- + a single sampled branch is not a static circuit. No reliable runtime-trace + heuristic distinguishes that from statically-scheduled post-measurement + gates (the surface code legitimately has those), so no guard is attempted; + callers must pass straight-line programs. + + Args: + program: Anything ``pecos.sim`` accepts -- a ``@guppy`` function, a + compiled Guppy program, or a program wrapper. + num_qubits: Number of qubits to allocate. QIS/HUGR programs require an + explicit qubit count for trace capture. + seed: Seed for the (ideal) trace run. + + Returns: + A ``TickCircuit`` with no detector/observable metadata attached; the + caller supplies that. + """ import pecos - from pecos.guppy import get_num_qubits, make_surface_code - program = make_surface_code(distance=patch.distance, num_rounds=num_rounds, basis=basis) sim_builder = ( - pecos.sim(program) - .classical(pecos.selene_engine()) - .quantum(pecos.stabilizer()) - .qubits(get_num_qubits(patch.distance)) - .seed(0) + pecos.sim(program).classical(pecos.selene_engine()).quantum(pecos.stabilizer()).qubits(num_qubits).seed(seed) ) chunks = list(sim_builder.capture_operation_trace()) + # Selene lowers QIS gates into per-chunk `lowered_quantum_ops` (the gate + # shape actually executed; e.g. cx -> RZZ + rotations). When any chunk is + # lowered we replay from those, but first reject a mixed/partially-lowered + # trace that would silently drop a chunk's raw gates (see + # `_reject_partially_lowered_trace`). if any(chunk.get("lowered_quantum_ops") for chunk in chunks): + _reject_partially_lowered_trace(chunks) return _replay_lowered_qis_trace_into_tick_circuit(chunks) + # No chunk was lowered: replay the uniformly-raw QIS operation stream. operations: list[dict[str, Any]] = [] for chunk in chunks: operations.extend(list(chunk.get("operations", []))) return _replay_qis_trace_into_tick_circuit(operations) +def _generate_traced_surface_tick_circuit( + patch: SurfacePatch, + num_rounds: int, + basis: str, + *, + ancilla_budget: int | None = None, +) -> Any: + """Trace the lowered ideal Selene/QIS op stream and replay it into a TickCircuit. + + With ``ancilla_budget=None``, emits the unconstrained Guppy program + (one ancilla per stabilizer, all measured at the end of one round). + With a finite budget, emits the stabilizer-batched program; Selene's + lowering reuses ancilla slots across batches so the traced TickCircuit + uses only ``num_data + min(budget, total_ancilla)`` physical qubits + simultaneously. + + The program and qubit count are derived from the **actual patch**, not + its scalar distance, so a non-default patch (non-rotated, asymmetric) is + traced faithfully rather than silently substituting the default rotated + patch of the same distance. + """ + from pecos.guppy import get_num_qubits + from pecos.guppy.surface import generate_memory_experiment + + program = generate_memory_experiment( + patch, + num_rounds, + basis, + ancilla_budget=ancilla_budget, + ) + return trace_guppy_into_tick_circuit( + program, + get_num_qubits(patch=patch, ancilla_budget=ancilla_budget), + seed=0, + ) + + def _build_surface_tick_circuit_for_native_model( patch: SurfacePatch, num_rounds: int, @@ -740,20 +851,35 @@ def _build_surface_tick_circuit_for_native_model( msg = f"Unknown circuit_source {circuit_source!r}" raise ValueError(msg) - if ancilla_budget is not None: - msg = ( - "circuit_source='traced_qis' does not currently support ancilla_budget because " - "pecos.guppy.surface.make_surface_code does not yet expose ancilla budgeting" - ) - raise ValueError(msg) - - traced_tc = _generate_traced_surface_tick_circuit(patch, num_rounds, basis) + traced_tc = _generate_traced_surface_tick_circuit( + patch, + num_rounds, + basis, + ancilla_budget=ancilla_budget, + ) + # Coarse sanity check: the traced and abstract circuits must agree on the + # sequence of *measured qubit indices*. This catches gross drift (a dropped + # or added measurement, a wrong-qubit measurement, a different schedule + # shape). It is NOT an identity-level check: `_extract_measurement_order` + # returns physical qubit indices, and under ancilla reuse the same physical + # qubit appears in many measurements -- so two different stabilizer + # orderings can produce an identical qubit-index sequence and pass here. + # There is no independent stabilizer-identity oracle in the stack today: + # the detector/observable record offsets are the production binding (not a + # validator), and the byte-identical traced-vs-traced DEM regression shares + # the same shared batching policy on both sides (so it cannot catch a + # policy bug). The current safeguards against identity drift are the shared + # `batched_stabilizers` source-of-truth and the source-level CX-emission + # pins; a true identity check here would need stabilizer provenance the + # replayed TickCircuit does not currently carry (future work). traced_measurement_order = _extract_measurement_order(traced_tc) abstract_measurement_order = _extract_measurement_order(abstract_tc) if traced_measurement_order != abstract_measurement_order: msg = ( - "Lowered traced circuit measurement order does not match the abstract surface " - "metadata; refusing to build a mismatched native DEM/sampler" + "Traced and abstract surface circuits disagree on the measured-qubit " + "sequence (a dropped/added/wrong-qubit measurement or a different " + "schedule shape); refusing to build a native DEM/sampler from a " + "circuit that does not match the abstract detector/observable metadata" ) raise ValueError(msg) @@ -819,12 +945,31 @@ def build_memory_circuit( ) -def _can_use_cached_surface_topology( - *, - ancilla_budget: int | None, -) -> bool: - """Return True when we can safely use the shared native topology cache.""" - return ancilla_budget is None +def _canonical_ancilla_budget(patch: SurfacePatch, ancilla_budget: int | None) -> int | None: + """Canonicalize an ancilla budget for the shared native topology cache. + + Collapses every "unconstrained" spelling -- ``None``, a budget equal to + ``total_ancilla``, or any larger value -- to ``None`` so they share one + cache entry and use the unconstrained codegen path; a genuine constraint + (``< total_ancilla``) passes through unchanged. Routing through + :func:`normalize_ancilla_budget` also validates type/range fail-loud at the + cache boundary. + + All cache parameters (``ancilla_budget``, ``circuit_source``, idle-gate + insertion) are independent keys on the cached functions, so constrained + budgets cache correctly -- there is no correctness reason to bypass the + cache for them. ``None``/``== total``/``>> total`` were verified to produce + byte-identical DEMs for both circuit sources, so canonicalizing them + together is behavior-preserving. + """ + if ancilla_budget is None: + return None + from pecos.qec.surface._ancilla_batching import normalize_ancilla_budget + + geom = patch.geometry + total_ancilla = len(geom.x_stabilizers) + len(geom.z_stabilizers) + effective = normalize_ancilla_budget(total_ancilla, ancilla_budget) + return None if effective >= total_ancilla else effective def _uses_dedicated_idle_noise( @@ -1006,88 +1151,6 @@ def _build_native_sampler_from_cached_surface_topology( ) -def _build_native_sampler_from_tick_circuit( - tc: Any, - noise: NoiseModel, - *, - sampling_model: Literal[ - "dem", - "influence_dem", - "mnm", - ] = "dem", # "mnm" accepted for compat, mapped to "influence_dem", -) -> NativeSampler: - """Construct a native sampler directly from a TickCircuit.""" - import json - - from pecos.qec import DagFaultAnalyzer, DemSampler, ParsedDem - from pecos.qec.surface.circuit_builder import generate_dem_from_tick_circuit - - if _noise_uses_dedicated_idle_noise(noise): - tc.fill_idle_gates() - - dag = tc.to_dag_circuit() - analyzer = DagFaultAnalyzer(dag) - influence_map = analyzer.build_influence_map() - - detectors_json = tc.get_meta("detectors") or "[]" - observables_json = tc.get_meta("observables") or "[]" - num_detectors = len(json.loads(detectors_json)) if detectors_json else 0 - num_observables = len(json.loads(observables_json)) if observables_json else 0 - - if sampling_model == "dem": - dem_str = generate_dem_from_tick_circuit( - tc, - p1=noise.p1, - p2=noise.p2, - p_meas=noise.p_meas, - p_prep=noise.p_prep, - p_idle=noise.p_idle, - t1=noise.t1, - t2=noise.t2, - decompose_errors=True, - ) - sampler = ParsedDem.from_string(dem_str).to_dem_sampler() - elif sampling_model in ("influence_dem", "mnm"): - det_records = [d["records"] for d in json.loads(detectors_json)] - obs_records = [o["records"] for o in json.loads(observables_json)] if observables_json else [] - sampler = DemSampler.with_detectors( - influence_map, - det_records, - obs_records, - noise.p1, - noise.p2, - noise.p_meas, - noise.p_prep, - p_idle=noise.p_idle, - t1=noise.t1, - t2=noise.t2, - ) - sampling_model = "influence_dem" - elif sampling_model == "from_circuit": - # Direct from_circuit path: uses DagCircuit annotations and any - # explicit idle locations inserted above for dedicated idle noise. - sampler = DemSampler.from_circuit( - dag, - p1=noise.p1, - p2=noise.p2, - p_meas=noise.p_meas, - p_prep=noise.p_prep, - p_idle=noise.p_idle, - ) - else: - msg = f"Unknown native sampling_model {sampling_model!r}" - raise ValueError(msg) - - return NativeSampler( - sampler=sampler, - detectors_json=detectors_json, - observables_json=observables_json, - num_detectors=num_detectors, - num_observables=num_observables, - sampling_model=sampling_model, - ) - - def generate_circuit_level_dem_from_builder( patch: SurfacePatch, num_rounds: int, @@ -1136,45 +1199,22 @@ def generate_circuit_level_dem_from_builder( >>> noise = NoiseModel(p1=0.001, p2=0.01, p_meas=0.01) >>> dem = generate_circuit_level_dem_from_builder(patch, num_rounds=3, noise=noise) """ - from pecos.qec.surface.circuit_builder import generate_dem_from_tick_circuit - - if _can_use_cached_surface_topology(ancilla_budget=ancilla_budget): - patch_key = _surface_patch_cache_key(patch) - return _cached_surface_native_dem_string( - patch_key, - num_rounds, - basis.upper(), - ancilla_budget, - circuit_source, - noise.p1, - noise.p2, - noise.p_meas, - noise.p_prep, - decompose_errors=decompose_errors, - p_idle=noise.p_idle, - t1=noise.t1, - t2=noise.t2, - ) - - tc = _build_surface_tick_circuit_for_native_model( - patch, + ancilla_budget = _canonical_ancilla_budget(patch, ancilla_budget) + patch_key = _surface_patch_cache_key(patch) + return _cached_surface_native_dem_string( + patch_key, num_rounds, - basis, - ancilla_budget=ancilla_budget, - circuit_source=circuit_source, - ) - if _noise_uses_dedicated_idle_noise(noise): - tc.fill_idle_gates() - return generate_dem_from_tick_circuit( - tc, - p1=noise.p1, - p2=noise.p2, - p_meas=noise.p_meas, - p_prep=noise.p_prep, + basis.upper(), + ancilla_budget, + circuit_source, + noise.p1, + noise.p2, + noise.p_meas, + noise.p_prep, + decompose_errors=decompose_errors, p_idle=noise.p_idle, t1=noise.t1, t2=noise.t2, - decompose_errors=decompose_errors, ) @@ -2815,57 +2855,44 @@ def build_native_sampler( >>> sampler = build_native_sampler(patch, num_rounds=5, noise=noise) >>> detection_events, observable_flips = sampler.sample(num_shots=10000) """ - if _can_use_cached_surface_topology(ancilla_budget=ancilla_budget): - basis = basis.upper() - patch_key = _surface_patch_cache_key(patch) - topology = _cached_surface_native_topology( + ancilla_budget = _canonical_ancilla_budget(patch, ancilla_budget) + basis = basis.upper() + patch_key = _surface_patch_cache_key(patch) + topology = _cached_surface_native_topology( + patch_key, + num_rounds, + basis, + ancilla_budget, + circuit_source, + _noise_uses_dedicated_idle_noise(noise), + ) + if sampling_model == "dem": + dem_str = _cached_surface_native_dem_string( patch_key, num_rounds, basis, ancilla_budget, circuit_source, - _noise_uses_dedicated_idle_noise(noise), + noise.p1, + noise.p2, + noise.p_meas, + noise.p_prep, + decompose_errors=True, + p_idle=noise.p_idle, + t1=noise.t1, + t2=noise.t2, ) - if sampling_model == "dem": - dem_str = _cached_surface_native_dem_string( - patch_key, - num_rounds, - basis, - ancilla_budget, - circuit_source, - noise.p1, - noise.p2, - noise.p_meas, - noise.p_prep, - decompose_errors=True, - p_idle=noise.p_idle, - t1=noise.t1, - t2=noise.t2, - ) - sampler = _cached_parsed_dem(dem_str).to_dem_sampler() - return NativeSampler( - sampler=sampler, - detectors_json=topology.detectors_json, - observables_json=topology.observables_json, - num_detectors=topology.num_detectors, - num_observables=topology.num_observables, - sampling_model=sampling_model, - ) - return _build_native_sampler_from_cached_surface_topology( - topology, - noise, + sampler = _cached_parsed_dem(dem_str).to_dem_sampler() + return NativeSampler( + sampler=sampler, + detectors_json=topology.detectors_json, + observables_json=topology.observables_json, + num_detectors=topology.num_detectors, + num_observables=topology.num_observables, sampling_model=sampling_model, ) - - tc = _build_surface_tick_circuit_for_native_model( - patch, - num_rounds, - basis, - ancilla_budget=ancilla_budget, - circuit_source=circuit_source, - ) - return _build_native_sampler_from_tick_circuit( - tc, + return _build_native_sampler_from_cached_surface_topology( + topology, noise, sampling_model=sampling_model, ) diff --git a/python/quantum-pecos/src/pecos/qec/surface/patch.py b/python/quantum-pecos/src/pecos/qec/surface/patch.py index ef48dd28b..82bb28f62 100644 --- a/python/quantum-pecos/src/pecos/qec/surface/patch.py +++ b/python/quantum-pecos/src/pecos/qec/surface/patch.py @@ -107,7 +107,14 @@ class LogicalDescriptor(TypedDict): support_axis: str -def _get_stabilizer_region(stab: Stabilizer, patch: SurfacePatch) -> str: +# --- Stabilizer geometry/schedule metadata (single source of truth) --------- +# These live here (the low-level geometry module) and are re-exposed by +# ``circuit_builder`` as the public API; both the abstract circuit's detector +# descriptors and ``SurfacePatch.get_stabilizer_descriptor`` consume them, so a +# single implementation prevents the two sides from silently diverging. + + +def get_stabilizer_region(stab: Stabilizer, patch: SurfacePatch) -> str: """Return a coarse region label like ``top+left`` for a stabilizer.""" geom = patch.geometry positions = [geom.id_to_pos[q] for q in stab.data_qubits] @@ -118,7 +125,7 @@ def _get_stabilizer_region(stab: Stabilizer, patch: SurfacePatch) -> str: return f"{row_label}+{col_label}" -def _get_stabilizer_touch_label(stab: Stabilizer, patch: SurfacePatch, data_qubit: int) -> str: +def get_stabilizer_touch_label(stab: Stabilizer, patch: SurfacePatch, data_qubit: int) -> str: """Label how a data qubit sits relative to a stabilizer support.""" geom = patch.geometry if data_qubit not in stab.data_qubits: @@ -140,13 +147,13 @@ def _get_stabilizer_touch_label(stab: Stabilizer, patch: SurfacePatch, data_qubi return vertical + horizontal -def _get_stabilizer_schedule_metadata(stab: Stabilizer, patch: SurfacePatch) -> dict[str, object]: - """Return metadata describing one stabilizer's schedule and geometry.""" - entries: list[StabilizerScheduleEntry] = [ +def get_stabilizer_schedule_entries(stab: Stabilizer, patch: SurfacePatch) -> list[StabilizerScheduleEntry]: + """Return the per-round touch schedule for one stabilizer.""" + return [ { "round_0based": round_0based, "data_qubit": data_qubit, - "touch_label": _get_stabilizer_touch_label(stab, patch, data_qubit), + "touch_label": get_stabilizer_touch_label(stab, patch, data_qubit), } for round_0based, data_qubit in get_stab_schedule( stab.stab_type, @@ -156,12 +163,17 @@ def _get_stabilizer_schedule_metadata(stab: Stabilizer, patch: SurfacePatch) -> patch.dz, ) ] + + +def get_stabilizer_schedule_metadata(stab: Stabilizer, patch: SurfacePatch) -> dict[str, object]: + """Return metadata describing one stabilizer's schedule and geometry.""" + entries = get_stabilizer_schedule_entries(stab, patch) rounds = [int(entry["round_0based"]) for entry in entries] return { "stabilizer_kind": stab.stab_type, "stabilizer_index": stab.index, "stabilizer_is_boundary": stab.is_boundary, - "stabilizer_region": _get_stabilizer_region(stab, patch), + "stabilizer_region": get_stabilizer_region(stab, patch), "schedule_rounds": rounds, "schedule_start_round": rounds[0] if rounds else None, "schedule_end_round": rounds[-1] if rounds else None, @@ -401,7 +413,7 @@ def get_stabilizer_descriptor( """Return one public stabilizer descriptor.""" stabs = self.x_stabilizers if stab_type.upper() == "X" else self.z_stabilizers stab = stabs[index] - metadata = _get_stabilizer_schedule_metadata(stab, self) + metadata = get_stabilizer_schedule_metadata(stab, self) positions = [list(self.geometry.id_to_pos[q]) for q in stab.data_qubits] return { **metadata, diff --git a/python/quantum-pecos/src/pecos/slr/qalloc.py b/python/quantum-pecos/src/pecos/slr/qalloc.py index 397b2332a..cabdf12a1 100644 --- a/python/quantum-pecos/src/pecos/slr/qalloc.py +++ b/python/quantum-pecos/src/pecos/slr/qalloc.py @@ -14,8 +14,6 @@ Inspired by Zig's allocator pattern and NASA's Power of 10 rules. Provides hierarchical qubit slot management with explicit lifecycle states. - -See docs/proposals/slr-qubit-allocators.md for full design documentation. """ from __future__ import annotations diff --git a/python/quantum-pecos/tests/qec/surface/test_ancilla_batching.py b/python/quantum-pecos/tests/qec/surface/test_ancilla_batching.py new file mode 100644 index 000000000..f6bdc67e2 --- /dev/null +++ b/python/quantum-pecos/tests/qec/surface/test_ancilla_batching.py @@ -0,0 +1,256 @@ +# Copyright 2026 The PECOS Developers +# Licensed under the Apache License, Version 2.0 + +"""Tests for the shared ancilla-batching helper. + +This is the single source of truth for stabilizer-batch ordering used +by both the abstract surface-circuit builder +(``pecos.qec.surface.circuit_builder``) and the Guppy emitter +(``pecos.guppy.surface``). The byte-identical traced-vs-traced surface +DEM oracle in ``tests/qec/test_from_guppy_dem.py`` exercises this +helper indirectly, but a regression in the partitioning *policy* +itself (e.g. someone changes the sort key) could pass that oracle +spuriously because both sides share the same shared helper. Concrete +expected-output pins below catch that case directly. +""" + +from __future__ import annotations + +import pytest +from pecos.qec.surface import SurfacePatch +from pecos.qec.surface._ancilla_batching import ( + batched_stabilizers, + normalize_ancilla_budget, +) + +# --- normalize_ancilla_budget ----------------------------------------------- + + +@pytest.mark.parametrize( + ("total", "budget", "expected"), + [ + (8, None, 8), # None means "no constraint" + (8, 8, 8), # exact match + (8, 9, 8), # >= total collapses to total + (8, 999, 8), # large budget collapses to total + (8, 1, 1), # minimum valid + (8, 4, 4), # interior + ], +) +def test_normalize_ancilla_budget_clamps(total: int, budget: int | None, expected: int) -> None: + assert normalize_ancilla_budget(total, budget) == expected + + +def test_normalize_ancilla_budget_rejects_zero_and_negative() -> None: + with pytest.raises(ValueError, match=r"must be >= 1"): + normalize_ancilla_budget(8, 0) + with pytest.raises(ValueError, match=r"must be >= 1"): + normalize_ancilla_budget(8, -1) + + +def test_normalize_ancilla_budget_rejects_non_int() -> None: + """Public ``ancilla_budget`` kwarg has a strict ``int | None`` contract. + + bool is a Python subclass of int but a separate semantic type; rejecting + it explicitly avoids ``True``-as-``1`` silently working, which would mask + caller-side bugs.""" + with pytest.raises(TypeError, match=r"must be int or None, got bool"): + normalize_ancilla_budget(8, True) + with pytest.raises(TypeError, match=r"must be int or None, got float"): + normalize_ancilla_budget(8, 1.5) + with pytest.raises(TypeError, match=r"must be int or None, got str"): + normalize_ancilla_budget(8, "1") + + +# --- batched_stabilizers (concrete sequences) ------------------------------- + + +def test_batched_stabilizers_d3_budget1_one_stabilizer_per_batch() -> None: + """Budget=1 produces one stabilizer per batch, alternating X/Z by + ascending index per the shared sort key. Pinning this concrete order + catches "shared batching policy regressed" independent of any DEM- + level oracle.""" + patch = SurfacePatch.create(distance=3) + batches = batched_stabilizers(patch, 1) + assert batches == [ + [("X", 0)], + [("Z", 0)], + [("X", 1)], + [("Z", 1)], + [("X", 2)], + [("Z", 2)], + [("X", 3)], + [("Z", 3)], + ] + + +def test_batched_stabilizers_d3_budget2_pairs_xz_by_index() -> None: + """Budget=2 pairs (X_k, Z_k) per batch for ascending k.""" + patch = SurfacePatch.create(distance=3) + batches = batched_stabilizers(patch, 2) + assert batches == [ + [("X", 0), ("Z", 0)], + [("X", 1), ("Z", 1)], + [("X", 2), ("Z", 2)], + [("X", 3), ("Z", 3)], + ] + + +def test_batched_stabilizers_full_budget_one_batch() -> None: + """Budget == total_ancilla collapses to a single batch containing + every stabilizer in the canonical sort order.""" + patch = SurfacePatch.create(distance=3) + total = len(patch.geometry.x_stabilizers) + len(patch.geometry.z_stabilizers) + batches = batched_stabilizers(patch, total) + assert len(batches) == 1 + assert batches[0] == [ + ("X", 0), + ("Z", 0), + ("X", 1), + ("Z", 1), + ("X", 2), + ("Z", 2), + ("X", 3), + ("Z", 3), + ] + + +def test_batched_stabilizers_distance_5_budget_3_covers_all_stabilizers() -> None: + """For a slightly bigger patch, every stabilizer appears exactly once + across the returned batches, with batch sizes ``<= budget``.""" + patch = SurfacePatch.create(distance=5) + total = len(patch.geometry.x_stabilizers) + len(patch.geometry.z_stabilizers) + batches = batched_stabilizers(patch, 3) + + assert all(len(batch) <= 3 for batch in batches) + + flat = [pair for batch in batches for pair in batch] + assert len(flat) == total + assert len(set(flat)) == total # no duplicates + + +# --- batched_stabilizers input validation --------------------------------- + + +def test_batched_stabilizers_rejects_invalid_budget_directly() -> None: + """``batched_stabilizers`` validates its own ``ancilla_budget`` (routes + through ``normalize_ancilla_budget``) rather than producing an opaque + ``range()`` error or a silent-empty failure on ``0`` / non-int input. + Closes the self-review's A2 finding.""" + patch = SurfacePatch.create(distance=3) + with pytest.raises(ValueError, match=r"must be >= 1"): + batched_stabilizers(patch, 0) + with pytest.raises(ValueError, match=r"must be >= 1"): + batched_stabilizers(patch, -2) + with pytest.raises(TypeError, match=r"must be int or None"): + batched_stabilizers(patch, True) + with pytest.raises(TypeError, match=r"must be int or None"): + batched_stabilizers(patch, 1.5) + + +def test_batched_stabilizers_clamps_oversized_budget() -> None: + """A budget larger than ``total_ancilla`` clamps to one big batch, + matching ``normalize_ancilla_budget`` behavior. Direct callers get the + same clamping the public API surface gets.""" + patch = SurfacePatch.create(distance=3) + total = len(patch.geometry.x_stabilizers) + len(patch.geometry.z_stabilizers) + huge = batched_stabilizers(patch, 10**6) + assert len(huge) == 1 + assert len(huge[0]) == total + + +# --- D1: pin emitted CX sequences for the constrained Guppy codegen -------- +# The byte-identical traced-vs-traced DEM oracle and the lowered-qubit-stream +# invariant catch many constrained-codegen errors, but not a wrong-CX-order / +# wrong-CX-control / dropped-CX bug inside the emitter (the lowered Selene +# trace uses RZZ + surrounding rotations, not raw CX, so the trace doesn't +# expose the emitted CX shape directly). These tests pin the literal CX +# emission at the **source** level so a regression in +# ``generate_guppy_source``'s per-batch CX restriction fails fast, +# independent of any DEM-level oracle. + + +def _emitted_cx_lines(distance: int, ancilla_budget: int | None) -> list[str]: + """Return the ``cx(...)`` lines emitted in the syndrome_extraction + function for a given (distance, budget).""" + import re + + from pecos.guppy.surface import generate_surface_code_module + + src = generate_surface_code_module(distance, ancilla_budget=ancilla_budget) + in_se = False + cx_lines: list[str] = [] + for line in src.split("\n"): + if line.startswith("def syndrome_extraction"): + in_se = True + continue + # Stop at the next top-level def or @ decorator (next function). + if in_se and line and not line.startswith(" ") and not line.startswith("#"): + break + if in_se: + m = re.match(r"^\s*(cx\([^)]+\))", line) + if m: + cx_lines.append(m.group(1)) + return cx_lines + + +def test_constrained_d3_budget1_emits_expected_cx_sequence() -> None: + """Catches wrong-CX-order / wrong-control / dropped-CX bugs in the + constrained emitter that the DEM-level and trace-level oracles miss.""" + assert _emitted_cx_lines(3, 1) == [ + "cx(_a_b0_p0, surf.data[1])", + "cx(_a_b0_p0, surf.data[0])", + "cx(surf.data[3], _a_b1_p0)", + "cx(surf.data[6], _a_b1_p0)", + "cx(_a_b2_p0, surf.data[2])", + "cx(_a_b2_p0, surf.data[1])", + "cx(_a_b2_p0, surf.data[5])", + "cx(_a_b2_p0, surf.data[4])", + "cx(surf.data[1], _a_b3_p0)", + "cx(surf.data[4], _a_b3_p0)", + "cx(surf.data[0], _a_b3_p0)", + "cx(surf.data[3], _a_b3_p0)", + "cx(_a_b4_p0, surf.data[4])", + "cx(_a_b4_p0, surf.data[3])", + "cx(_a_b4_p0, surf.data[7])", + "cx(_a_b4_p0, surf.data[6])", + "cx(surf.data[5], _a_b5_p0)", + "cx(surf.data[8], _a_b5_p0)", + "cx(surf.data[4], _a_b5_p0)", + "cx(surf.data[7], _a_b5_p0)", + "cx(_a_b6_p0, surf.data[8])", + "cx(_a_b6_p0, surf.data[7])", + "cx(surf.data[2], _a_b7_p0)", + "cx(surf.data[5], _a_b7_p0)", + ] + + +def test_constrained_d3_budget2_emits_expected_cx_sequence() -> None: + """Pins the budget=2 batched CX schedule (pairs X_k with Z_k each batch, + CXs filtered to that batch's stabilizers across 4 schedule rounds).""" + assert _emitted_cx_lines(3, 2) == [ + "cx(surf.data[3], _a_b0_p1)", + "cx(surf.data[6], _a_b0_p1)", + "cx(_a_b0_p0, surf.data[1])", + "cx(_a_b0_p0, surf.data[0])", + "cx(_a_b1_p0, surf.data[2])", + "cx(surf.data[1], _a_b1_p1)", + "cx(_a_b1_p0, surf.data[1])", + "cx(surf.data[4], _a_b1_p1)", + "cx(_a_b1_p0, surf.data[5])", + "cx(surf.data[0], _a_b1_p1)", + "cx(_a_b1_p0, surf.data[4])", + "cx(surf.data[3], _a_b1_p1)", + "cx(_a_b2_p0, surf.data[4])", + "cx(surf.data[5], _a_b2_p1)", + "cx(_a_b2_p0, surf.data[3])", + "cx(surf.data[8], _a_b2_p1)", + "cx(_a_b2_p0, surf.data[7])", + "cx(surf.data[4], _a_b2_p1)", + "cx(_a_b2_p0, surf.data[6])", + "cx(surf.data[7], _a_b2_p1)", + "cx(_a_b3_p0, surf.data[8])", + "cx(_a_b3_p0, surf.data[7])", + "cx(surf.data[2], _a_b3_p1)", + "cx(surf.data[5], _a_b3_p1)", + ] diff --git a/python/quantum-pecos/tests/qec/surface/test_surface_decoder.py b/python/quantum-pecos/tests/qec/surface/test_surface_decoder.py index c89112f6b..0cdcd1486 100644 --- a/python/quantum-pecos/tests/qec/surface/test_surface_decoder.py +++ b/python/quantum-pecos/tests/qec/surface/test_surface_decoder.py @@ -363,6 +363,174 @@ def test_native_circuit_level_dem_threads_ancilla_budget(self) -> None: ) assert decoder.get_dem("X", circuit_level=True) == batched_dem + def test_constrained_budget_uses_cache_and_matches_fresh_build(self) -> None: + """A constrained ancilla budget now flows through the shared topology + cache (previously bypassed). The cached constrained DEM must equal a + DEM built fresh from the corresponding TickCircuit, for both the + ``abstract`` and ``traced_qis`` sources -- pinning that caching is + sound for constrained budgets, not just unconstrained ones.""" + from pecos.qec.surface.circuit_builder import generate_dem_from_tick_circuit, generate_tick_circuit_from_patch + from pecos.qec.surface.decode import ( + _build_surface_tick_circuit_for_native_model, + generate_circuit_level_dem_from_builder, + ) + + patch = SurfacePatch.create(distance=3) + noise = NoiseModel(p1=0.001, p2=0.01, p_meas=0.01, p_prep=0.001) + params = {"p1": noise.p1, "p2": noise.p2, "p_meas": noise.p_meas, "p_prep": noise.p_prep} + + # abstract source + abstract_tc = generate_tick_circuit_from_patch(patch, num_rounds=2, basis="Z", ancilla_budget=2) + cached_abstract = generate_circuit_level_dem_from_builder( + patch, + num_rounds=2, + noise=noise, + basis="Z", + ancilla_budget=2, + ) + assert cached_abstract == generate_dem_from_tick_circuit(abstract_tc, **params, decompose_errors=False) + + # traced_qis source + _require_selene_runtime() + traced_tc = _build_surface_tick_circuit_for_native_model( + patch, + 2, + "Z", + ancilla_budget=2, + circuit_source="traced_qis", + ) + cached_traced = generate_circuit_level_dem_from_builder( + patch, + num_rounds=2, + noise=noise, + basis="Z", + ancilla_budget=2, + circuit_source="traced_qis", + ) + assert cached_traced == generate_dem_from_tick_circuit(traced_tc, **params, decompose_errors=False) + + def test_unconstrained_budget_spellings_collapse_to_one_dem(self) -> None: + """``ancilla_budget`` of ``None``, ``== total_ancilla``, and a value + ``>> total_ancilla`` are all "unconstrained" and must produce the same + DEM. ``_canonical_ancilla_budget`` collapses them so they also share a + single cache entry rather than fragmenting it.""" + from pecos.qec.surface.decode import _canonical_ancilla_budget, generate_circuit_level_dem_from_builder + + patch = SurfacePatch.create(distance=3) + total = len(patch.geometry.x_stabilizers) + len(patch.geometry.z_stabilizers) + noise = NoiseModel(p1=0.001, p2=0.01, p_meas=0.01, p_prep=0.001) + + # Canonicalization: every unconstrained spelling -> None; a real + # constraint passes through unchanged. + assert _canonical_ancilla_budget(patch, None) is None + assert _canonical_ancilla_budget(patch, total) is None + assert _canonical_ancilla_budget(patch, 10**6) is None + assert _canonical_ancilla_budget(patch, 2) == 2 + + dem_none = generate_circuit_level_dem_from_builder(patch, num_rounds=2, noise=noise, basis="Z") + dem_total = generate_circuit_level_dem_from_builder( + patch, + num_rounds=2, + noise=noise, + basis="Z", + ancilla_budget=total, + ) + dem_huge = generate_circuit_level_dem_from_builder( + patch, + num_rounds=2, + noise=noise, + basis="Z", + ancilla_budget=10**6, + ) + assert dem_none == dem_total == dem_huge + + def test_constrained_budget_sampler_builds_for_all_models(self) -> None: + """The native sampler path also caches constrained budgets and builds + for every supported sampling model, with a detector count matching the + constrained circuit's surface metadata.""" + from pecos.qec.surface import build_native_sampler + from pecos.qec.surface.decode import _build_surface_tick_circuit_for_native_model + + patch = SurfacePatch.create(distance=3) + noise = NoiseModel(p1=0.001, p2=0.01, p_meas=0.01, p_prep=0.001) + abstract_tc = _build_surface_tick_circuit_for_native_model( + patch, + 2, + "Z", + ancilla_budget=2, + circuit_source="abstract", + ) + expected_detectors = int(abstract_tc.get_meta("num_detectors")) + + for model in ("dem", "influence_dem", "mnm"): + sampler = build_native_sampler( + patch, + num_rounds=2, + noise=noise, + basis="Z", + ancilla_budget=2, + sampling_model=model, + ) + assert sampler.num_detectors == expected_detectors + + def test_traced_qis_traces_the_given_patch_not_its_distance(self) -> None: + """A non-rotated patch must be traced from its OWN Guppy program, not + the default rotated patch of the same distance. Before the patch- + identity fix, the traced path rebuilt make_surface_code(distance=d) and + the module cache keyed on dx/dz only, so rotated and non-rotated d=3 + collapsed to one cached (rotated) module and produced identical DEMs. + They must now differ.""" + from pecos.qec.surface.decode import _build_surface_tick_circuit_for_native_model + + _require_selene_runtime() + params = {"p1": 0.005, "p2": 0.005, "p_meas": 0.005, "p_prep": 0.005} + + def traced_dem(*, rotated: bool) -> str: + patch = SurfacePatch.create(distance=3, rotated=rotated) + tc = _build_surface_tick_circuit_for_native_model(patch, 2, "Z", circuit_source="traced_qis") + return generate_dem_from_tick_circuit(tc, **params, decompose_errors=False) + + assert traced_dem(rotated=True) != traced_dem(rotated=False) + + def test_guppy_module_cache_keys_on_full_patch_identity(self) -> None: + """Rotated and non-rotated patches of the same dx/dz/budget must NOT + share a cached Guppy module (they generate different circuits).""" + from pecos.guppy.surface import _load_guppy_module + + rotated = _load_guppy_module(SurfacePatch.create(distance=3, rotated=True), ancilla_budget=2) + non_rotated = _load_guppy_module(SurfacePatch.create(distance=3, rotated=False), ancilla_budget=2) + assert rotated is not non_rotated + + def test_surface_memory_distance_validation_is_consistent(self) -> None: + """All distance-based Guppy entry points enforce the documented + 'odd >= 3' contract (previously make_surface_code/get_surface_code_module + accepted even/<3 and get_num_qubits(0) returned -1).""" + from pecos.guppy.surface import ( + generate_surface_code_module, + get_num_qubits, + get_surface_code_module, + make_surface_code, + ) + + for bad in (0, 1, 2, 4): + with pytest.raises(ValueError, match=r"odd >= 3"): + get_num_qubits(bad) + with pytest.raises(ValueError, match=r"odd >= 3"): + get_surface_code_module(bad) + with pytest.raises(ValueError, match=r"odd >= 3"): + make_surface_code(distance=bad, num_rounds=2, basis="Z") + with pytest.raises(ValueError, match=r"odd >= 3"): + generate_surface_code_module(bad) + assert get_num_qubits(3) == 2 * 9 - 1 # valid distance still works + + def test_get_num_qubits_requires_exactly_one_of_d_or_patch(self) -> None: + from pecos.guppy.surface import get_num_qubits + + with pytest.raises(ValueError, match=r"exactly one of"): + get_num_qubits() + with pytest.raises(ValueError, match=r"exactly one of"): + get_num_qubits(3, patch=SurfacePatch.create(distance=3)) + def test_native_circuit_level_dem_cache_respects_patch_geometry(self) -> None: """Shared native DEM caching should preserve asymmetric patch geometry.""" from pecos.qec.surface.circuit_builder import generate_dem_from_tick_circuit, generate_tick_circuit_from_patch diff --git a/python/quantum-pecos/tests/qec/test_dem_metadata_fail_loud.py b/python/quantum-pecos/tests/qec/test_dem_metadata_fail_loud.py new file mode 100644 index 000000000..97da0d66a --- /dev/null +++ b/python/quantum-pecos/tests/qec/test_dem_metadata_fail_loud.py @@ -0,0 +1,290 @@ +# Copyright 2026 The PECOS Developers +# Licensed under the Apache License, Version 2.0 + +"""Fail-loud regression tests for circuit-ingested DEM metadata. + +Out-of-range record offsets / meas_ids, and a declared ``num_measurements`` +that disagrees with the circuit, must be rejected on every circuit-ingest +path -- ``DetectorErrorModel.from_circuit``, ``DemSampler.from_circuit``, +and the public ``DemBuilder.build`` -- not silently dropped. +""" + +import pytest +from pecos_rslib import DagCircuit +from pecos_rslib.qec import ( + DagFaultAnalyzer, + DemBuilder, + DemSampler, + DetectorErrorModel, +) + + +def _one_measurement_dag(*, num_measurements: str = "1") -> DagCircuit: + """A circuit performing exactly one Z measurement.""" + dag = DagCircuit() + dag.pz([0]) + dag.mz([0]) + dag.set_attr("num_measurements", num_measurements) + return dag + + +_NOISE = {"p1": 0.0, "p2": 0.0, "p_meas": 0.1, "p_prep": 0.0} + + +# --- positive controls: valid metadata still builds on every path ---------- + + +def test_valid_metadata_builds_on_all_paths() -> None: + dag = _one_measurement_dag() + dag.set_attr("detectors", '[{"id": 0, "records": [-1]}]') + + assert DetectorErrorModel.from_circuit(dag, **_NOISE).num_detectors == 1 + assert DemSampler.from_circuit(dag, **_NOISE).num_detectors == 1 + + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = DemBuilder(im) + builder.with_noise(**_NOISE) + builder.with_num_measurements(1) + builder.with_detectors_json('[{"id": 0, "records": [-1]}]') + assert builder.build().num_detectors == 1 + + +# --- out-of-range record offsets ------------------------------------------- + + +def test_from_circuit_out_of_range_record_fails_loud() -> None: + dag = _one_measurement_dag() + dag.set_attr("detectors", '[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match=r"out of range|record offset"): + DetectorErrorModel.from_circuit(dag, **_NOISE) + + +def test_dem_sampler_out_of_range_record_fails_loud() -> None: + dag = _one_measurement_dag() + dag.set_attr("detectors", '[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match=r"out of range|record offset"): + DemSampler.from_circuit(dag, **_NOISE) + + +def test_public_dem_builder_out_of_range_record_fails_loud() -> None: + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = DemBuilder(im) + builder.with_noise(**_NOISE) + builder.with_num_measurements(1) + builder.with_detectors_json('[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match=r"out of range|record offset"): + builder.build() + + +# --- out-of-range meas_ids ------------------------------------------------- + + +def test_from_circuit_out_of_range_meas_id_fails_loud() -> None: + dag = _one_measurement_dag() + dag.set_attr("detectors", '[{"id": 0, "meas_ids": [999]}]') + with pytest.raises(ValueError, match="meas_id"): + DetectorErrorModel.from_circuit(dag, **_NOISE) + + +def test_dem_sampler_out_of_range_meas_id_fails_loud() -> None: + dag = _one_measurement_dag() + dag.set_attr("detectors", '[{"id": 0, "meas_ids": [999]}]') + with pytest.raises(ValueError, match="meas_id"): + DemSampler.from_circuit(dag, **_NOISE) + + +def test_public_dem_builder_out_of_range_meas_id_fails_loud() -> None: + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = DemBuilder(im) + builder.with_noise(**_NOISE) + builder.with_num_measurements(1) + builder.with_detectors_json('[{"id": 0, "meas_ids": [999]}]') + with pytest.raises(ValueError, match="meas_id"): + builder.build() + + +# --- bogus declared num_measurements --------------------------------------- + + +def test_from_circuit_inconsistent_num_measurements_fails_loud() -> None: + """Declaring 2 measurements on a 1-measurement circuit must be rejected; + otherwise a record offset of -2 would falsely validate and misbind.""" + dag = _one_measurement_dag(num_measurements="2") + dag.set_attr("detectors", '[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match="num_measurements"): + DetectorErrorModel.from_circuit(dag, **_NOISE) + + +def test_dem_sampler_inconsistent_num_measurements_fails_loud() -> None: + dag = _one_measurement_dag(num_measurements="2") + dag.set_attr("detectors", '[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match="num_measurements"): + DemSampler.from_circuit(dag, **_NOISE) + + +def test_public_dem_builder_inconsistent_num_measurements_fails_loud() -> None: + """Public builder with a real (non-empty) influence map must reject a + with_num_measurements() that disagrees with the circuit; otherwise an + out-of-range record (e.g. -2 against 1 measurement) silently misbinds.""" + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = DemBuilder(im) + builder.with_noise(**_NOISE) + builder.with_num_measurements(2) # circuit performs only 1 measurement + builder.with_detectors_json('[{"id": 0, "records": [-2]}]') + with pytest.raises(ValueError, match="num_measurements"): + builder.build() + + +def test_public_dem_builder_consistent_num_measurements_still_builds() -> None: + """The matching-count case (and the empty-influence-map escape hatch) + must keep working -- the count check only fires on a genuine mismatch.""" + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = DemBuilder(im) + builder.with_noise(**_NOISE) + builder.with_num_measurements(1) + builder.with_detectors_json('[{"id": 0, "records": [-1]}]') + assert builder.build().num_detectors == 1 + + +# --- DemSamplerBuilder JSON path (M-E): context-aware fail-loud ------------- +# The public sampler builder previously parsed detector/observable JSON with a +# hand-rolled string scanner that silently dropped out-of-range refs. It now +# resolves refs against the circuit's measurement count, like DemBuilder. + + +def test_dem_sampler_builder_out_of_range_record_fails_loud() -> None: + from pecos_rslib.qec import DemSamplerBuilder + + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json( + '[{"id": 0, "records": [-1, -2]}]', # -2 out of range for 1 measurement + ) + ) + with pytest.raises(ValueError, match=r"out of range"): + builder.build() + + +def test_dem_sampler_builder_out_of_range_observable_fails_loud() -> None: + from pecos_rslib.qec import DemSamplerBuilder + + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_observables_json( + '[{"id": 0, "records": [-1, -2]}]', + ) + ) + with pytest.raises(ValueError, match=r"out of range"): + builder.build() + + +def test_dem_sampler_builder_out_of_range_meas_id_fails_loud() -> None: + from pecos_rslib.qec import DemSamplerBuilder + + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json( + '[{"id": 0, "meas_ids": [0, 999]}]', # 999 absent / out of range + ) + ) + with pytest.raises(ValueError, match=r"not present|out of range"): + builder.build() + + +def test_dem_sampler_builder_valid_metadata_still_builds() -> None: + """Positive control: an in-range record still builds.""" + from pecos_rslib.qec import DemSamplerBuilder + + im = DagFaultAnalyzer(_one_measurement_dag()).build_influence_map() + sampler = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json( + '[{"id": 0, "records": [-1]}]', + ) + .build() + ) + assert sampler is not None + + +def test_dem_sampler_builder_resolves_stamped_meas_ids() -> None: + """meas_ids are stamped MeasIds resolved via the influence map (matching + DemBuilder), not positional indices. A stamped id present in the circuit + resolves; a value absent from the stamped set fails loud. Previously the + sampler treated meas_ids positionally, so a stamped id raised 'out of range' + and an absent id silently misbound.""" + from pecos_rslib.qec import DemSamplerBuilder + from pecos_rslib.quantum import TickCircuit + + tc = TickCircuit() + tc.tick().pz([0, 1]) + tc.tick().mz_with_ids([0, 1], [10, 5]) # non-positional stamped ids + im = DagFaultAnalyzer(tc.to_dag_circuit()).build_influence_map() + + # Stamped id 10 is present -> resolves and builds. + DemSamplerBuilder(im).with_noise(**_NOISE).with_detectors_json( + '[{"id": 0, "meas_ids": [10]}]', + ).build() + + # Stamped id 0 is absent -> fail loud (positional would have accepted index 0). + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json( + '[{"id": 0, "meas_ids": [0]}]', + ) + ) + with pytest.raises(ValueError, match=r"not present|out of range"): + builder.build() + + +def test_dem_sampler_builder_rejects_inconsistent_measurement_order() -> None: + """A measurement_order must cover every measurement; a shorter order would + let validated record offsets resolve in a different frame and silently + misbind (the count-frame hole).""" + from pecos_rslib.qec import DemSamplerBuilder + + dag = DagCircuit() + for q in range(3): + dag.pz([q]) + dag.mz([q]) + dag.set_attr("num_measurements", "3") + im = DagFaultAnalyzer(dag).build_influence_map() + + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json('[{"id": 0, "records": [-3]}]') + .with_measurement_order([0, 1]) # only 2 of 3 measurements + ) + with pytest.raises(ValueError, match=r"measurement_order|cover every measurement"): + builder.build() + + +def test_dem_sampler_builder_rejects_duplicate_stamped_meas_ids() -> None: + """Duplicate stable MeasIds make stamped-id resolution ambiguous (bind to + the first occurrence). DemBuilder rejects them; the sampler JSON path must + too, rather than silently binding.""" + from pecos_rslib.qec import DemSamplerBuilder + from pecos_rslib.quantum import TickCircuit + + tc = TickCircuit() + tc.tick().pz([0, 1]) + tc.tick().mz_with_ids([0, 1], [7, 7]) # duplicate stamped id 7 + im = DagFaultAnalyzer(tc.to_dag_circuit()).build_influence_map() + + builder = ( + DemSamplerBuilder(im) + .with_noise(**_NOISE) + .with_detectors_json( + '[{"id": 0, "meas_ids": [7]}]', + ) + ) + with pytest.raises(ValueError, match=r"duplicate stable MeasId"): + builder.build() diff --git a/python/quantum-pecos/tests/qec/test_from_guppy_dem.py b/python/quantum-pecos/tests/qec/test_from_guppy_dem.py new file mode 100644 index 000000000..5d493b5fc --- /dev/null +++ b/python/quantum-pecos/tests/qec/test_from_guppy_dem.py @@ -0,0 +1,607 @@ +# Copyright 2026 The PECOS Developers +# Licensed under the Apache License, Version 2.0 + +"""Regression tests for the Guppy-to-DEM convenience path.""" + +import pytest +from guppylang import guppy +from guppylang.std.builtins import result +from guppylang.std.quantum import h, measure, qubit, x +from pecos.guppy import get_num_qubits, make_surface_code +from pecos.qec import DetectorErrorModel +from pecos.qec.surface import SurfacePatch +from pecos.qec.surface.decode import ( + _build_surface_tick_circuit_for_native_model, + _reject_partially_lowered_trace, + _replay_lowered_qis_trace_into_tick_circuit, + _replay_qis_trace_into_tick_circuit, +) + + +@guppy +def _single_measurement() -> None: + q = qubit() + b = measure(q) + result("m", b) + + +@guppy +def _measurement_feedback() -> None: + q0 = qubit() + q1 = qubit() + h(q0) + b0 = measure(q0) + if b0: + x(q1) + b1 = measure(q1) + result("b0", b0) + result("b1", b1) + + +def _dem_text(*, detectors_json: str = "[]", observables_json: str = "[]") -> str: + dem = DetectorErrorModel.from_guppy( + _single_measurement, + num_qubits=1, + detectors_json=detectors_json, + observables_json=observables_json, + p1=0.0, + p2=0.0, + p_meas=0.1, + p_prep=0.0, + seed=0, + ) + return dem.to_string() + + +def _flat_mz_ids(tc) -> list[int]: + dag = tc.to_dag_circuit() + ids: list[int] = [] + for node_id in dag.nodes(): + gate = dag.gate(node_id) + if gate is not None and gate.gate_type.name == "MZ": + ids.extend(int(mid) for mid in gate.meas_ids) + return ids + + +def test_from_guppy_meas_ids_are_normalized_to_records() -> None: + assert _dem_text(detectors_json='[{"id":0,"meas_ids":[0]}]') == _dem_text( + detectors_json='[{"id":0,"records":[-1]}]', + ) + + assert _dem_text(observables_json='[{"id":0,"meas_ids":[0]}]') == _dem_text( + observables_json='[{"id":0,"records":[-1]}]', + ) + + +@pytest.mark.parametrize( + "detectors_json", + [ + "{}", + '[{"id":0,"records":["-1"]}]', + '[{"id":0,"records":[-1.2]}]', + '[{"id":0,"meas_ids":["0"]}]', + ], +) +def test_from_guppy_rejects_malformed_detector_metadata(detectors_json: str) -> None: + with pytest.raises(ValueError, match=r"JSON list|integer|record offset|meas_id"): + _dem_text(detectors_json=detectors_json) + + +def test_from_guppy_rejects_json_tracked_pauli_observables() -> None: + with pytest.raises(ValueError, match="tracked_pauli"): + _dem_text(observables_json='[{"kind":"tracked_pauli","label":"x","pauli":"X0"}]') + + +def test_from_guppy_dynamic_control_is_unsupported_and_unguarded() -> None: + """Measurement-dependent control flow is unsupported/undefined. + + A prior runtime-trace guard false-positived on the standard surface code + (statically-scheduled post-measurement gates look the same in the trace), + so it was reverted. This test pins that NO guard rejects programs here -- + from_guppy must not raise on either a dynamic program or, by extension, + the surface code. The DEM for a dynamic program is undefined/seed-dependent + and callers must not rely on it (see from_guppy docstring / proposal 001). + """ + for s in (0, 2, 5): + dem = DetectorErrorModel.from_guppy( + _measurement_feedback, + num_qubits=2, + detectors_json='[{"id":0,"records":[-2,-1]}]', + p1=0.0, + p2=0.0, + p_meas=0.1, + p_prep=0.0, + seed=s, + ) + assert dem.num_detectors == 1 # builds (undefined content; do not rely) + + +def test_lowered_replay_uses_measure_result_ids_directly() -> None: + chunks = [ + { + "operations": [ + {"AllocateResult": {"id": 42}}, + {"AllocateResult": {"id": 99}}, + {"Quantum": {"Measure": [0, 99]}}, + {"Quantum": {"Measure": [1, 42]}}, + ], + "lowered_quantum_ops": [ + {"gate_type": "MZ", "qubits": [0], "angles": []}, + {"gate_type": "MZ", "qubits": [1], "angles": []}, + ], + }, + ] + + tc = _replay_lowered_qis_trace_into_tick_circuit(chunks) + + assert _flat_mz_ids(tc) == [99, 42] + + +def test_lowered_replay_fails_on_measurement_count_mismatch() -> None: + chunks = [ + { + "operations": [{"Quantum": {"Measure": [0, 7]}}], + "lowered_quantum_ops": [{"gate_type": "MZ", "qubits": [0, 1], "angles": []}], + }, + ] + + with pytest.raises(ValueError, match="More measured qubits"): + _replay_lowered_qis_trace_into_tick_circuit(chunks) + + +def test_reject_partially_lowered_trace_passes_on_uniformly_lowered() -> None: + """A trace where every quantum-carrying chunk is also lowered is accepted + (this is the real Selene shape; the byte-identical regressions exercise it + end-to-end). A chunk with only non-quantum ops and no lowered form is fine + -- there are no gates to drop.""" + chunks = [ + { + "operations": [{"Quantum": {"Measure": [0, 7]}}], + "lowered_quantum_ops": [{"gate_type": "MZ", "qubits": [0], "angles": []}], + }, + { # allocation/output bookkeeping only; legitimately has no lowered ops + "operations": [{"AllocateResult": {"id": 7}}, {"RecordOutput": {"id": 7}}], + "lowered_quantum_ops": [], + }, + ] + _reject_partially_lowered_trace(chunks) # must not raise + + +def test_reject_partially_lowered_trace_fails_on_mixed_format() -> None: + """A chunk carrying raw quantum gates but no lowered form, alongside a + lowered chunk, is rejected fail-loud: the lowered replay would silently + drop that chunk's (non-measurement) gates, and the meas-count guard would + not catch it.""" + chunks = [ + { + "operations": [{"Quantum": {"H": 0}}], + "lowered_quantum_ops": [{"gate_type": "H", "qubits": [0], "angles": []}], + }, + { # raw quantum gate present, but not lowered -> would be dropped + "operations": [{"Quantum": {"CX": [0, 1]}}], + "lowered_quantum_ops": [], + }, + ] + with pytest.raises(ValueError, match=r"mixed/partially-lowered|incomplete gate stream"): + _reject_partially_lowered_trace(chunks) + + +def test_reject_partially_lowered_trace_fails_on_unlowered_allocation() -> None: + """``AllocateQubit`` lowers to a prep (PZ), so an unlowered chunk that + carries only an allocation alongside a lowered chunk would silently drop + that prep -- it must fail loud too, not just chunks with raw gate ops.""" + chunks = [ + { + "operations": [{"Quantum": {"H": 0}}], + "lowered_quantum_ops": [{"gate_type": "H", "qubits": [0], "angles": []}], + }, + { # allocation present (lowers to PZ) but not lowered -> would be dropped + "operations": [{"AllocateQubit": {"id": 1}}], + "lowered_quantum_ops": [], + }, + ] + with pytest.raises(ValueError, match=r"mixed/partially-lowered|incomplete gate stream"): + _reject_partially_lowered_trace(chunks) + + +def test_non_lowered_replay_preserves_non_sequential_result_ids() -> None: + operations = [ + {"AllocateQubit": {"id": 10}}, + {"AllocateQubit": {"id": 20}}, + {"Quantum": {"Measure": [10, 77]}}, + {"Quantum": {"Measure": [20, 3]}}, + ] + + tc = _replay_qis_trace_into_tick_circuit(operations) + + assert _flat_mz_ids(tc) == [77, 3] + + +def test_from_guppy_surface_code_is_byte_identical_to_reference() -> None: + """Regression: from_guppy(make_surface_code(...)) must work and match the + traced_qis reference DEM. A reverted dynamic-control guard had broken this + exact path (it false-positived on surface's post-measurement gates).""" + p = {"p1": 0.005, "p2": 0.005, "p_meas": 0.005, "p_prep": 0.005} + for basis in ("Z", "X"): + patch = SurfacePatch.create(distance=3) + ref = _build_surface_tick_circuit_for_native_model( + patch, + 3, + basis, + circuit_source="traced_qis", + ) + ref.lower_clifford_rotations() + ref.assign_missing_meas_ids() + ref_dem = DetectorErrorModel.from_circuit(ref, **p).to_string() + got = DetectorErrorModel.from_guppy( + make_surface_code(distance=3, num_rounds=3, basis=basis), + num_qubits=get_num_qubits(3), + detectors_json=ref.get_meta("detectors"), + observables_json=ref.get_meta("observables"), + num_measurements=int(ref.get_meta("num_measurements")), + **p, + ).to_string() + assert got == ref_dem, f"surface from_guppy not byte-identical ({basis})" + + +def test_from_guppy_out_of_range_record_fails_loud() -> None: + with pytest.raises(ValueError, match=r"out of range|record offset"): + _dem_text(detectors_json='[{"id":0,"records":[-2]}]') # only 1 measurement + + +def test_from_guppy_out_of_range_meas_id_fails_loud() -> None: + with pytest.raises(ValueError, match=r"meas_id|not present"): + _dem_text(detectors_json='[{"id":0,"meas_ids":[999]}]') + + +def test_from_guppy_accepts_dem_label_id_forms() -> None: + """The "D0"/"L0" id convenience form is now normalized in the Rust + builder (single source of truth), equivalent to the bare integer.""" + assert _dem_text(detectors_json='[{"id":"D0","records":[-1]}]') == _dem_text( + detectors_json='[{"id":0,"records":[-1]}]', + ) + assert _dem_text(observables_json='[{"id":"L0","records":[-1]}]') == _dem_text( + observables_json='[{"id":0,"records":[-1]}]', + ) + + +def test_from_guppy_rejects_bad_string_id() -> None: + with pytest.raises(ValueError, match=r"not a valid identifier"): + _dem_text(detectors_json='[{"id":"X0","records":[-1]}]') + + +def test_from_guppy_rejects_detector_tracked_pauli() -> None: + with pytest.raises(ValueError, match="tracked_pauli"): + _dem_text(detectors_json='[{"kind":"tracked_pauli","label":"x","pauli":"X0"}]') + + +def test_from_guppy_rejects_entry_without_records_or_meas_ids() -> None: + with pytest.raises(ValueError, match=r"records|meas_ids|neither"): + _dem_text(detectors_json='[{"id":0}]') + + +def test_from_guppy_redundant_records_and_meas_ids_are_accepted() -> None: + """Co-present records + meas_ids that name the SAME measurement are + tolerated (the surface logical_circuit path emits both redundantly) and + produce the same DEM as either form alone. (Non-redundant co-presence is + rejected fail-loud; that precise semantics is pinned by the deterministic + Rust unit test ``test_try_build_mixed_records_meas_ids_must_be_redundant``, + since stamped MeasId values are not predictable from Python here.)""" + both = _dem_text(detectors_json='[{"id":0,"records":[-1],"meas_ids":[0]}]') + assert both == _dem_text(detectors_json='[{"id":0,"records":[-1]}]') + + +# --------------------------------------------------------------------------- +# Constrained-ancilla surface support +# --------------------------------------------------------------------------- + + +def _constrained_surface_via_guppy(*, d, basis, rounds, budget, noise): + """Build the constrained-surface DEM through `from_guppy`.""" + patch = SurfacePatch.create(distance=d) + ref = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=rounds, + basis=basis, + ancilla_budget=budget, + circuit_source="traced_qis", + ) + ref.lower_clifford_rotations() + ref.assign_missing_meas_ids() + ref_dem = DetectorErrorModel.from_circuit(ref, **noise).to_string() + + got = DetectorErrorModel.from_guppy( + make_surface_code(distance=d, num_rounds=rounds, basis=basis, ancilla_budget=budget), + num_qubits=get_num_qubits(d, ancilla_budget=budget), + detectors_json=ref.get_meta("detectors"), + observables_json=ref.get_meta("observables"), + num_measurements=int(ref.get_meta("num_measurements")), + **noise, + ).to_string() + return ref_dem, got, ref + + +@pytest.mark.parametrize( + ("d", "basis", "rounds", "budget"), + [ + (3, "Z", 2, 1), # small-and-fast, minimum budget (one stabilizer/batch) + (3, "X", 2, 2), # asymmetric basis, X/Z paired per batch + (9, "Z", 3, 17), # canonical high-distance stress + ], +) +def test_from_guppy_constrained_surface_dem_byte_identical( + d: int, + basis: str, + rounds: int, + budget: int, +) -> None: + """`from_guppy(make_surface_code(..., ancilla_budget=b))` must produce a + DEM byte-identical to the reference DEM built through the + `_build_surface_tick_circuit_for_native_model(circuit_source="traced_qis", + ancilla_budget=b)` path. Parametrized so a regression isolates to the + specific (distance, budget, basis) case rather than failing the whole set.""" + noise = {"p1": 0.005, "p2": 0.005, "p_meas": 0.005, "p_prep": 0.005} + ref_dem, got, _ = _constrained_surface_via_guppy( + d=d, + basis=basis, + rounds=rounds, + budget=budget, + noise=noise, + ) + assert got == ref_dem, ( + f"constrained surface from_guppy not byte-identical for " + f"d={d}, budget={budget}, basis={basis}, rounds={rounds}" + ) + + +def test_constrained_surface_traced_metadata_matches_abstract() -> None: + """The traced TickCircuit's surface metadata is copied verbatim from the + abstract reference. Specifically pins that + ``_copy_surface_tick_circuit_metadata`` propagates ``ancilla_budget`` + (the new key added when the constrained codegen landed) alongside the + existing detectors/observables/counts.""" + patch = SurfacePatch.create(distance=3) + abstract_tc = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=2, + basis="Z", + ancilla_budget=2, + circuit_source="abstract", + ) + traced_tc = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=2, + basis="Z", + ancilla_budget=2, + circuit_source="traced_qis", + ) + for key in ( + "basis", + "detectors", + "observables", + "num_measurements", + "num_detectors", + "ancilla_budget", + ): + a = abstract_tc.get_meta(key) + b = traced_tc.get_meta(key) + assert a == b, f"metadata mismatch on key {key!r}: abstract={a!r}, traced={b!r}" + # ancilla_budget specifically must be the requested budget (stored as a string by set_meta). + assert traced_tc.get_meta("ancilla_budget") == "2" + + +@pytest.mark.parametrize(("d", "budget"), [(3, 1), (3, 2), (5, 3)]) +def test_constrained_surface_lowered_qubit_stream_within_budget(d: int, budget: int) -> None: + """The lowered-trace physical qubit IDs must stay within the budgeted + pool, and ancilla slots must be empirically reused (more measurements + than physical ancilla qubits). Pins the load-bearing assumption the + spike validated, across several (distance, budget) combinations so the + reuse invariant isn't only checked at one point.""" + import pecos + + program = make_surface_code(distance=d, num_rounds=2, basis="Z", ancilla_budget=budget) + n_q = get_num_qubits(d, ancilla_budget=budget) + chunks = list( + pecos.sim(program) + .classical(pecos.selene_engine()) + .quantum(pecos.stabilizer()) + .qubits(n_q) + .seed(0) + .capture_operation_trace(), + ) + + all_qubits: set[int] = set() + mz_qubits: list[int] = [] + for chunk in chunks: + for gate in chunk.get("lowered_quantum_ops") or []: + qs = [int(q) for q in gate.get("qubits", [])] + all_qubits.update(qs) + if str(gate.get("gate_type")) == "MZ": + mz_qubits.extend(qs) + + max_q = max(all_qubits) if all_qubits else -1 + # Budget enforcement: total physical qubits used must fit in d^2 + budget. + over_budget_msg = f"max physical qubit id {max_q} exceeds budgeted pool size {n_q}" + assert max_q < n_q, over_budget_msg + # Reuse demonstrated: some physical qubit appears in multiple MZ ops. + reuse = any(mz_qubits.count(q) > 1 for q in set(mz_qubits)) + assert reuse, "no physical qubit appears in more than one MZ op" + + +def test_constrained_from_guppy_dem_is_consumable_by_pecos_native_decoder() -> None: + """PECOS-native decoder smoke for the constrained-ancilla DEM: the DEM + returned by ``from_guppy(...)`` must be consumable by both the PECOS + sampler (``dem.to_sampler()``) and the PECOS Rust-backed + ``PyMatchingDecoder.from_dem(...)`` -- the actual downstream surfaces + callers use, not an external ``pymatching`` install. + + Also asserts ``stim.DetectorErrorModel(dem.to_string_decomposed())`` + parses as a lightweight syntax-compatibility smoke (optional reference, + not the correctness oracle). + """ + from pecos_rslib.decoders import PyMatchingDecoder + + p = {"p1": 0.005, "p2": 0.005, "p_meas": 0.005, "p_prep": 0.005} + patch = SurfacePatch.create(distance=3) + abstract_tc = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=2, + basis="Z", + ancilla_budget=2, + circuit_source="abstract", + ) + dem = DetectorErrorModel.from_guppy( + make_surface_code(distance=3, num_rounds=2, basis="Z", ancilla_budget=2), + num_qubits=get_num_qubits(3, ancilla_budget=2), + detectors_json=abstract_tc.get_meta("detectors"), + observables_json=abstract_tc.get_meta("observables"), + num_measurements=int(abstract_tc.get_meta("num_measurements")), + **p, + ) + + # PECOS-native sampler path: the sampler must agree with the DEM it was + # built from (substantive, not merely ``>= 0``) and actually produce + # well-shaped samples. + sampler = dem.to_sampler() + assert sampler.num_detectors == dem.num_detectors + assert sampler.num_observables == dem.num_observables + assert dem.num_observables == 1 # one logical observable for a single patch + + batch = sampler.generate_samples(16, 0) + assert batch.num_shots == 16 + # Each shot's syndrome covers exactly the DEM's detectors. + assert len(batch.get_syndrome(0)) == dem.num_detectors + # The observable mask fits within ``num_observables`` bits (no stray bits). + assert batch.get_observable_mask(0) >> dem.num_observables == 0 + + # PECOS-native Rust-backed matching decoder: DEM is consumable by + # the actual downstream decoder surface. + decomp = dem.to_string_decomposed() + decoder = PyMatchingDecoder.from_dem(decomp) + assert decoder is not None + + # Lightweight format-compatibility smoke (optional reference coverage, + # not the correctness oracle). Stim should parse the decomposed DEM. + import stim + + parsed = stim.DetectorErrorModel(decomp) + assert parsed.num_detectors >= 0 + + +def test_constrained_from_guppy_fails_loud_on_mismatched_num_measurements() -> None: + """The constrained-ancilla surface program must flow through the same + Rust metadata-validation fail-loud path as any other Guppy program. + No surface-specific bypass: passing a ``num_measurements`` that disagrees + with the count the traced program actually performs (here, one greater + than the true count) is rejected by the generic builder, not by anything + surface-aware in ``from_guppy``. The regex pins the builder's specific + 'declared count disagrees' diagnostic, not just the bare key name, so a + different ``num_measurements``-mentioning error wouldn't pass spuriously.""" + p = {"p1": 0.005, "p2": 0.005, "p_meas": 0.005, "p_prep": 0.005} + patch = SurfacePatch.create(distance=3) + abstract_tc = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=2, + basis="Z", + ancilla_budget=2, + circuit_source="abstract", + ) + actual = int(abstract_tc.get_meta("num_measurements")) + wrong = actual + 1 + + with pytest.raises( + ValueError, + match=r"num_measurements=\d+ disagrees with the \d+ measurement", + ): + DetectorErrorModel.from_guppy( + make_surface_code(distance=3, num_rounds=2, basis="Z", ancilla_budget=2), + num_qubits=get_num_qubits(3, ancilla_budget=2), + detectors_json=abstract_tc.get_meta("detectors"), + observables_json=abstract_tc.get_meta("observables"), + num_measurements=wrong, + **p, + ) + + +@pytest.mark.parametrize("entry", ["get_num_qubits", "make_surface_code"]) +def test_constrained_public_api_rejects_invalid_ancilla_budget(entry: str) -> None: + """Both public entry points that accept ``ancilla_budget`` -- ``get_num_qubits`` + and ``make_surface_code`` -- validate it fail-loud at the boundary (routing + through ``normalize_ancilla_budget``), so a bad budget never reaches codegen or + the qubit-count math. ``bool``/``float``/``str`` raise ``TypeError``; ``< 1`` + raises ``ValueError``.""" + + def call(budget: object): + if entry == "get_num_qubits": + return get_num_qubits(3, ancilla_budget=budget) + return make_surface_code(distance=3, num_rounds=2, basis="Z", ancilla_budget=budget) + + for bad in (True, 1.5, "2"): + with pytest.raises(TypeError, match=r"must be int or None"): + call(bad) + for bad in (0, -1): + with pytest.raises(ValueError, match=r"must be >= 1"): + call(bad) + + +def test_copy_surface_metadata_propagates_descriptors() -> None: + """``_copy_surface_tick_circuit_metadata`` must propagate the structured + detector/observable *descriptor* metadata, not just the raw + detectors/observables JSON. The constrained build path doesn't populate + descriptors lazily, so the byte-identical and metadata-match tests above + never exercise the descriptor branch of the copy helper -- this seeds them + explicitly on the source and pins that the copy carries them across.""" + from pecos.qec.surface import ( + get_detector_descriptors_from_tick_circuit, + get_observable_descriptors_from_tick_circuit, + ) + from pecos.qec.surface.decode import _copy_surface_tick_circuit_metadata + from pecos_rslib.quantum import TickCircuit + + patch = SurfacePatch.create(distance=3) + source = _build_surface_tick_circuit_for_native_model( + patch, + num_rounds=2, + basis="Z", + ancilla_budget=2, + circuit_source="abstract", + ) + # Seed the lazily-built descriptor metadata on the source. + det_desc = get_detector_descriptors_from_tick_circuit(source, patch) + obs_desc = get_observable_descriptors_from_tick_circuit(source, patch) + assert source.get_meta("detector_descriptors") is not None + assert source.get_meta("observable_descriptors") is not None + + target = TickCircuit() + _copy_surface_tick_circuit_metadata(source, target) + + assert target.get_meta("detector_descriptors") == source.get_meta("detector_descriptors") + assert target.get_meta("observable_descriptors") == source.get_meta("observable_descriptors") + # Sanity: the seeded descriptors are non-trivial (real content was copied). + assert len(det_desc) > 0 + assert len(obs_desc) > 0 + + +def test_surface_module_cache_collapses_unconstrained_budget_forms() -> None: + """``get_surface_code_module`` keys its cache on the *effective* budget + (``normalize_ancilla_budget(d*d-1, budget)``), so ``ancilla_budget=None`` + and any ``budget >= total_ancilla`` resolve to the SAME cached module -- + no redundant codegen for the two ways of saying "unconstrained". A finite + constrained budget is a distinct entry.""" + from pecos.guppy.surface import get_surface_code_module + + d = 3 + total_ancilla = d * d - 1 # all stabilizer ancillas live simultaneously + + unconstrained_none = get_surface_code_module(d, ancilla_budget=None) + unconstrained_exact = get_surface_code_module(d, ancilla_budget=total_ancilla) + unconstrained_large = get_surface_code_module(d, ancilla_budget=10**6) + # All three "unconstrained" spellings are the identical cached object. + assert unconstrained_none is unconstrained_exact + assert unconstrained_none is unconstrained_large + assert unconstrained_none["ancilla_budget"] == total_ancilla + + constrained = get_surface_code_module(d, ancilla_budget=2) + # A genuinely-constrained budget is a separate cache entry. + assert constrained is not unconstrained_none + assert constrained["ancilla_budget"] == 2 diff --git a/python/quantum-pecos/tests/qec/test_from_guppy_result_tags.py b/python/quantum-pecos/tests/qec/test_from_guppy_result_tags.py new file mode 100644 index 000000000..160bff950 --- /dev/null +++ b/python/quantum-pecos/tests/qec/test_from_guppy_result_tags.py @@ -0,0 +1,230 @@ +# Copyright 2026 The PECOS Developers +# Licensed under the Apache License, Version 2.0 + +"""Tests for tag-referenced detectors in ``DetectorErrorModel.from_guppy``. + +Covers: + +1. **Correspondence cross-check (load-bearing):** for a scrambled straight-line + Guppy program where ``result()`` calls are declared in non-source order, a + DEM built via ``result_tags`` (which goes through + ``pecos_hugr_qis::extract_result_tag_measurements`` to recover the + reorder-immune tag -> measurement binding from the compiled HUGR) is + **byte-identical** to the DEM built via the equivalent positional + ``records``. This is the committed verification of the + HUGR-traversal-ordinal == traced-``MeasId``-order property the prior + review (proposal 001 item #7) flagged as unproven. +2. **Runtime-loop guard:** a Guppy program with a runtime loop (the surface + code) using ``result_tags`` fails loud with the documented "static N vs + traced M" message, instead of silently misbinding. +3. **Non-Guppy callable rejection:** ``result_tags`` requires a HUGR-compilable + input (a ``@guppy``-decorated function or ``GuppyFunctionDefinition`` such + as ``make_surface_code`` returns). A plain Python callable cannot be + compiled to a HUGR and is rejected fail-loud upfront with a clear + ``@guppy`` message, not crashed later inside the HUGR compile step. +4. **Unknown-tag rejection:** referencing a tag the program never records is + an error. +""" + +import pytest +from guppylang import guppy +from guppylang.std.builtins import result +from guppylang.std.quantum import measure, qubit, x +from pecos.guppy import get_num_qubits, make_surface_code +from pecos.qec import DetectorErrorModel + + +# A scrambled straight-line program: three measurements in source order +# qa, qb, qc; ``result()`` is called for them in reverse-then-mixed order +# (c, a, b). The HUGR-side extractor binds tag_a -> [0], tag_b -> [1], +# tag_c -> [2] (ordinals of the measurements the tags actually record, not +# the order of the result() calls). +# +# Each qubit gets a *different* number of single-qubit gates before measure +# (qa: 0, qb: 1, qc: 2). With p1 > 0 those gates contribute distinct error +# mechanisms touching only that qubit's measurement, so the DEMs for +# detectors anchored to records [-3], [-2], [-1] differ in their (number of) +# mechanisms / probabilities. A test asserting result_tags equals positional +# records is then load-bearing: a wrong ordinal mapping would produce a +# different DEM string, not coincidentally equal as it does for symmetric +# programs. The test asserts up front that the three positional DEMs differ +# (so a future-symmetric refactor self-fails rather than silently passing). +@guppy +def _scrambled_three_measurements() -> None: + qa = qubit() + qb = qubit() + qc = qubit() + x(qb) + x(qc) + x(qc) + a = measure(qa) + b = measure(qb) + c = measure(qc) + result("tag_c", c) + result("tag_a", a) + result("tag_b", b) + + +_NOISE = {"p1": 0.01, "p2": 0.0, "p_meas": 0.1, "p_prep": 0.005} + + +def _from_guppy(detectors_json: str, *, observables_json: str = "[]") -> str: + """Build the scrambled-program DEM with the given metadata and return it as a string.""" + dem = DetectorErrorModel.from_guppy( + _scrambled_three_measurements, + num_qubits=3, + detectors_json=detectors_json, + observables_json=observables_json, + seed=0, + **_NOISE, + ) + return dem.to_string() + + +# --------------------------------------------------------------------------- +# 1. Correspondence: result_tags DEM == positional-records DEM (byte-identical) +# --------------------------------------------------------------------------- + +# Three measurements (a, b, c) in trace order; record offsets are +# (a -> -3, b -> -2, c -> -1) under the Stim convention. If +# HUGR-traversal-ordinal == traced-MeasId-order, then result_tags=["tag_X"] +# resolves to the same record offset as the positional form for tag X. + + +def test_result_tags_match_positional_records() -> None: + """Each tag resolves to the same DEM as the corresponding positional + record AND a wrong mapping would produce a different DEM. + + This is the load-bearing cross-check for the HUGR-ordinal == traced- + MeasId-order claim (proposal 001 item #7). The asymmetric pre-history + on qb (1 X gate) and qc (2 X gates) makes the three measurements + DEM-distinguishable, so swapping which tag points to which measurement + would yield a different DEM byte-string. + """ + via_records = { + -3: _from_guppy('[{"id":0,"records":[-3]}]'), + -2: _from_guppy('[{"id":0,"records":[-2]}]'), + -1: _from_guppy('[{"id":0,"records":[-1]}]'), + } + # Sanity: the three reference DEMs must differ -- otherwise the test + # is symmetric and a wrong ordinal mapping would pass spuriously. + assert via_records[-3] != via_records[-2] != via_records[-1] != via_records[-3], ( + "scrambled program is DEM-symmetric across the three measurements; the " + "correspondence test is no longer load-bearing. Restore asymmetric gates." + ) + + # Now the substantive claim: each tag's DEM matches the positional one. + assert _from_guppy('[{"id":0,"result_tags":["tag_a"]}]') == via_records[-3] + assert _from_guppy('[{"id":0,"result_tags":["tag_b"]}]') == via_records[-2] + assert _from_guppy('[{"id":0,"result_tags":["tag_c"]}]') == via_records[-1] + + +def test_result_tags_multi_tag_detector_matches_positional() -> None: + """A detector referencing multiple tags resolves to the same DEM as + the positional equivalent (asserts the property for combined refs too).""" + via_tags = _from_guppy('[{"id":0,"result_tags":["tag_a","tag_c"]}]') + via_records = _from_guppy('[{"id":0,"records":[-3,-1]}]') + assert via_tags == via_records + + +def test_result_tags_observables_path_matches_positional() -> None: + """The observables_json path resolves result_tags identically.""" + via_tags = _from_guppy( + "[]", + observables_json='[{"id":0,"result_tags":["tag_b"]}]', + ) + via_records = _from_guppy( + "[]", + observables_json='[{"id":0,"records":[-2]}]', + ) + assert via_tags == via_records + + +# --------------------------------------------------------------------------- +# 2. Runtime-loop guard: surface code fails loud, not silent +# --------------------------------------------------------------------------- + + +def test_result_tags_with_runtime_loop_program_fails_loud() -> None: + """The surface code uses ``for _ in range(comptime(n))`` rounds; the HUGR + has one static measure op per loop body, not per occurrence. The Rust + static-vs-traced count guard rejects this case rather than silently + misbinding (per-occurrence tag binding requires CFG-interpreter-class + machinery).""" + with pytest.raises(ValueError, match=r"runtime loops|not supported"): + DetectorErrorModel.from_guppy( + make_surface_code(distance=3, num_rounds=3, basis="Z"), + num_qubits=get_num_qubits(3), + detectors_json='[{"id":0,"result_tags":["any_tag"]}]', + **_NOISE, + ) + + +# --------------------------------------------------------------------------- +# 3. Non-Guppy callable rejection: result_tags requires a HUGR-compilable input +# --------------------------------------------------------------------------- + + +def test_result_tags_with_non_guppy_callable_fails_loud_upfront() -> None: + """``result_tags`` requires a ``@guppy``-decorated function (or a + ``GuppyFunctionDefinition`` such as ``make_surface_code`` returns). + A plain Python callable cannot be compiled to a HUGR; ``from_guppy`` + must reject it upfront with the clear ``@guppy`` message instead of + crashing later inside the HUGR compile step (the upfront guard the + review flagged as needed).""" + + def not_a_guppy_function() -> None: + pass + + with pytest.raises(ValueError, match=r"@guppy-decorated function"): + DetectorErrorModel.from_guppy( + not_a_guppy_function, + num_qubits=1, + detectors_json='[{"id":0,"result_tags":["any_tag"]}]', + **_NOISE, + ) + + +# --------------------------------------------------------------------------- +# 4. Unknown-tag rejection +# --------------------------------------------------------------------------- + + +def test_result_tags_unknown_tag_fails_loud() -> None: + with pytest.raises(ValueError, match=r"never records|result_tag"): + _from_guppy('[{"id":0,"result_tags":["nonexistent_tag"]}]') + + +# --------------------------------------------------------------------------- +# 5. result_tags + records: strict + redundancy-checked, not additive +# --------------------------------------------------------------------------- + + +def test_result_tags_with_redundant_records_builds_unchanged() -> None: + """When ``records`` exactly matches the resolved ``result_tags`` offsets + (sorted-set equality), the entry is accepted and the DEM equals the + records-only equivalent (no double-reference / parity corruption).""" + # tag_a resolves to record -3 in the asymmetric three-measurement program. + with_both = _from_guppy('[{"id":0,"records":[-3],"result_tags":["tag_a"]}]') + records_only = _from_guppy('[{"id":0,"records":[-3]}]') + assert with_both == records_only + + +def test_result_tags_non_redundant_with_records_fails_loud() -> None: + """When ``records`` and ``result_tags`` reference *different* measurements + they are not redundant; the rewriter must fail loud rather than silently + combine them additively (which would either weaken the DEM or, on + accidental duplicate, XOR-cancel the detector's mechanisms).""" + # tag_c resolves to -1; records=-3 is a different measurement. + with pytest.raises(ValueError, match=r"alternatives|not additive|different measurements"): + _from_guppy('[{"id":0,"records":[-3],"result_tags":["tag_c"]}]') + + +def test_result_tags_with_malformed_records_fails_loud() -> None: + """The rewriter must strict-parse existing ``records`` -- not silently + drop malformed entries via filter_map (a regression a previous review + caught). Both a non-integer entry and a non-array shape must fail loud.""" + with pytest.raises(ValueError, match=r"records entries must be integers"): + _from_guppy('[{"id":0,"records":["bad"],"result_tags":["tag_a"]}]') + with pytest.raises(ValueError, match=r"records must be a JSON array"): + _from_guppy('[{"id":0,"records":-3,"result_tags":["tag_a"]}]') diff --git a/python/quantum-pecos/tests/qec/test_qec_ux_entrypoints.py b/python/quantum-pecos/tests/qec/test_qec_ux_entrypoints.py index aceeb3050..84e03a7a5 100644 --- a/python/quantum-pecos/tests/qec/test_qec_ux_entrypoints.py +++ b/python/quantum-pecos/tests/qec/test_qec_ux_entrypoints.py @@ -87,6 +87,23 @@ def test_tick_circuit_metadata_helpers_build_detector_and_observable_json() -> N assert int(tc.get_meta("num_observables")) == 3 +def test_malformed_dem_metadata_fails_loud_from_circuit_entrypoints() -> None: + from pecos.qec import DemSampler, DetectorErrorModel + from pecos.quantum import TickCircuit + + tc = TickCircuit() + tc.tick().mz([0]) + tc.set_meta("num_measurements", "1") + tc.set_meta("detectors", '[{"id":0,"records":["-1"]}]') + tc.set_meta("observables", "[]") + + with pytest.raises(ValueError, match="record offsets must be integers"): + DetectorErrorModel.from_circuit(tc, p1=0.0, p2=0.0, p_meas=0.1, p_prep=0.0) + + with pytest.raises(ValueError, match="Invalid detector/observable metadata"): + DemSampler.from_circuit(tc, p1=0.0, p2=0.0, p_meas=0.1, p_prep=0.0) + + def test_tracked_pauli_public_api_uses_current_names_only() -> None: from pecos.quantum import DagCircuit, GateRegistry, GateType, TickCircuit, X diff --git a/ruff.toml b/ruff.toml index e012768bd..721f369aa 100644 --- a/ruff.toml +++ b/ruff.toml @@ -168,6 +168,10 @@ ignore = [ "SLF001", # Private member access - accessing internal decoder APIs "ANN401", # Any types - required for optional dependency types (pymatching, stim) ] +"python/quantum-pecos/src/pecos/qec/dem.py" = [ + "PLC0415", # Import inside function - lazy import breaks the qec/__init__ <-> decode cycle + "ANN401", # Any types - duck-typed Guppy program (whatever pecos.sim accepts) and Rust TickCircuit +] "python/quantum-pecos/src/pecos/qec/surface/plot.py" = [ "PLC0415", # Import inside function - lazy loading of qec.surface ] diff --git a/uv.lock b/uv.lock index 721e11ca6..f0e9303b7 100644 --- a/uv.lock +++ b/uv.lock @@ -3442,15 +3442,15 @@ wheels = [ [[package]] name = "pymdown-extensions" -version = "10.21.2" +version = "10.21.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/df/08/f1c908c581fd11913da4711ea7ba32c0eee40b0190000996bb863b0c9349/pymdown_extensions-10.21.2.tar.gz", hash = "sha256:c3f55a5b8a1d0edf6699e35dcbea71d978d34ff3fa79f3d807b8a5b3fa90fbdc", size = 853922, upload-time = "2026-03-29T15:01:55.233Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/26/d1015444da4d952a1ca487a236b522eb979766f0295a0bd0c5fc089989a9/pymdown_extensions-10.21.3.tar.gz", hash = "sha256:72cfcf55f07aea0d4af2c4f11dd4e52466ddfb1bb819673146398e0bd3a77354", size = 854140, upload-time = "2026-05-13T12:57:32.267Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/27/a2fc51a4a122dfd1015e921ae9d22fee3d20b0b8080d9a704578bf9deece/pymdown_extensions-10.21.2-py3-none-any.whl", hash = "sha256:5c0fd2a2bea14eb39af8ff284f1066d898ab2187d81b889b75d46d4348c01638", size = 268901, upload-time = "2026-03-29T15:01:53.244Z" }, + { url = "https://files.pythonhosted.org/packages/7e/85/545a951eecc270fcd688288c600017e2050a1aacb56c711d208586d3e470/pymdown_extensions-10.21.3-py3-none-any.whl", hash = "sha256:d7a5d08014fc571e80ca21dd6f854e31f94c489800350564d55d15b3c41e76b6", size = 269002, upload-time = "2026-05-13T12:57:30.296Z" }, ] [[package]]