diff --git a/.stats.json b/.stats.json
new file mode 100644
index 0000000..81c65d5
--- /dev/null
+++ b/.stats.json
@@ -0,0 +1,5 @@
+{
+  "lean_modules": 61,
+  "lean_lines": 32558,
+  "_note": "Generated by scripts/repo-stats.sh. Do not edit by hand."
+}
diff --git a/FormalSLT.lean b/FormalSLT.lean
index 6e71de2..36bb137 100644
--- a/FormalSLT.lean
+++ b/FormalSLT.lean
@@ -67,3 +67,4 @@ import FormalSLT.PACBayesFiniteProductMGF
 import FormalSLT.PACBayesBoundedLoss
 import FormalSLT.PACBayesMcAllester
 import FormalSLT.PACBayesBernstein
+import FormalSLT.PACBayesMargin
diff --git a/FormalSLT/PACBayesBernstein.lean b/FormalSLT/PACBayesBernstein.lean
index 2f9ffec..d074f28 100644
--- a/FormalSLT/PACBayesBernstein.lean
+++ b/FormalSLT/PACBayesBernstein.lean
@@ -4,6 +4,7 @@ Released under MIT license as described in the file LICENSE.
 Authors: Robby Sneiderman
 -/
 import FormalSLT.PACBayesKL
+import FormalSLT.Probability.FiniteUnionBound
 import Mathlib.Algebra.BigOperators.Field
 import Mathlib.Algebra.Order.BigOperators.Group.Finset
 import Mathlib.Analysis.SpecialFunctions.Sqrt
@@ -37,6 +38,8 @@ classifier margin loss.
   Markov/confidence layer for a fixed Bernstein parameter.
 * `finitePACBayesBernsteinMargin_badEventMass_le_delta` — posterior-dependent
   margin-style wrapper under explicit complexity and penalty certificates.
+* `finitePACBayesBernsteinGridOptimized_badEventMass_le_delta` — finite-grid
+  optimization wrapper over supplied Bernstein parameter buckets.
 
 ## Current boundaries
 
@@ -94,6 +97,114 @@ def expectedPriorBernsteinExpMoment [Fintype Ω] [Fintype ι]
   ∑ ω, ν ω *
     priorBernsteinExpMoment π lambda scale riskFn empiricalRiskFn varianceProxy ω
 
+/--
+Expected normalized PAC-Bayes Bernstein prior moment from per-hypothesis MGF
+budgets.
+
+This is the algebraic bridge used before plugging in a concrete iid
+concentration lemma: each hypothesis supplies the Bernstein exponential budget,
+then the prior-weighted normalized moment has expectation at most one.
+-/
+theorem expectedPriorBernsteinExpMoment_le_one_of_mgf_bound
+    [Fintype Ω] [Fintype ι]
+    {π : ι → ℝ} (hπ : IsPMF π)
+    (ν : Ω → ℝ) (lambda scale : ℝ)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ)
+    (hmgf :
+      ∀ i : ι,
+        (∑ ω : Ω, ν ω *
+          Real.exp (lambda * (riskFn i - empiricalRiskFn ω i))) ≤
+        Real.exp
+          (lambda ^ 2 * varianceProxy i /
+            (2 * (1 - scale * lambda)))) :
+    expectedPriorBernsteinExpMoment ν π lambda scale riskFn empiricalRiskFn
+      varianceProxy ≤ 1 := by
+  classical
+  let budget : ι → ℝ :=
+    fun i =>
+      lambda ^ 2 * varianceProxy i /
+        (2 * (1 - scale * lambda))
+  have hexp_split : ∀ a b : ℝ, Real.exp (a - b) = Real.exp a * Real.exp (-b) := by
+    intro a b
+    rw [← Real.exp_add]
+    ring_nf
+  have hswap :
+      expectedPriorBernsteinExpMoment ν π lambda scale riskFn empiricalRiskFn
+          varianceProxy =
+        ∑ i : ι,
+          π i * Real.exp (-(budget i)) *
+            (∑ ω : Ω, ν ω *
+              Real.exp (lambda * (riskFn i - empiricalRiskFn ω i))) := by
+    unfold expectedPriorBernsteinExpMoment priorBernsteinExpMoment
+    calc
+      (∑ ω : Ω,
+          ν ω *
+            ∑ i : ι,
+              π i *
+                Real.exp
+                  (lambda * (riskFn i - empiricalRiskFn ω i) -
+                    lambda ^ 2 * varianceProxy i /
+                      (2 * (1 - scale * lambda))))
+          =
+        ∑ ω : Ω, ∑ i : ι,
+          ν ω *
+            (π i *
+              Real.exp
+                (lambda * (riskFn i - empiricalRiskFn ω i) -
+                  budget i)) := by
+            apply Finset.sum_congr rfl
+            intro ω _hω
+            rw [Finset.mul_sum]
+      _ =
+        ∑ i : ι, ∑ ω : Ω,
+          ν ω *
+            (π i *
+              Real.exp
+                (lambda * (riskFn i - empiricalRiskFn ω i) -
+                  budget i)) := by
+            rw [Finset.sum_comm]
+      _ =
+        ∑ i : ι,
+          π i * Real.exp (-(budget i)) *
+            (∑ ω : Ω, ν ω *
+              Real.exp (lambda * (riskFn i - empiricalRiskFn ω i))) := by
+            apply Finset.sum_congr rfl
+            intro i _hi
+            rw [Finset.mul_sum]
+            apply Finset.sum_congr rfl
+            intro ω _hω
+            rw [hexp_split]
+            ring
+  rw [hswap]
+  calc
+    (∑ i : ι,
+        π i * Real.exp (-(budget i)) *
+          (∑ ω : Ω, ν ω *
+            Real.exp (lambda * (riskFn i - empiricalRiskFn ω i))))
+        ≤
+      ∑ i : ι,
+        π i * Real.exp (-(budget i)) *
+          Real.exp (budget i) := by
+        apply Finset.sum_le_sum
+        intro i _hi
+        exact mul_le_mul_of_nonneg_left
+          (hmgf i)
+          (mul_nonneg (hπ.nonneg i) (le_of_lt (Real.exp_pos _)))
+    _ = ∑ i : ι, π i := by
+        apply Finset.sum_congr rfl
+        intro i _hi
+        calc
+          π i * Real.exp (-(budget i)) * Real.exp (budget i)
+              = π i * (Real.exp (-(budget i)) * Real.exp (budget i)) := by ring
+          _ = π i * 1 := by
+              congr 1
+              rw [← Real.exp_add]
+              ring_nf
+              simp
+          _ = π i := by ring
+    _ = 1 := hπ.sum_one
+
 /-- Finite sample mass of outcomes whose Bernstein prior moment exceeds a threshold. -/
 def priorBernsteinExpMomentTailMass [Fintype Ω] [Fintype ι]
     (ν : Ω → ℝ) (π : ι → ℝ)
@@ -556,6 +667,238 @@ theorem finitePACBayesBernsteinMargin_badEventMass_le_delta
         scale * complexityOf ρ)
     hcomplexity hpenalty hExpected
 
+/-! ### Finite-grid peeling over Bernstein parameters -/
+
+/--
+Samples where some posterior violates at least one finite Bernstein parameter
+bucket.
+
+Each grid index supplies a parameter `lambdaOf g`, a scale `scaleOf g`, and a
+confidence allocation `confidenceOf g`. This is a finite-grid statement, not an
+optimization over all real parameters.
+-/
+def finitePACBayesBernsteinGridBadSamples
+    [Fintype Ω] [Fintype ι] [Fintype γ]
+    (π : ι → ℝ) (lambdaOf scaleOf confidenceOf : γ → ℝ)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ) : Finset Ω :=
+  Finset.univ.filter fun ω =>
+    ∃ g : γ, ∃ ρ : ι → ℝ,
+      IsPMF ρ ∧
+        posteriorGeneralizationGap ρ riskFn (empiricalRiskFn ω) >
+          (klDiv ρ π + Real.log (1 / confidenceOf g)) / lambdaOf g +
+            lambdaOf g * posteriorMarginVarianceProxy ρ varianceProxy /
+              (2 * (1 - scaleOf g * lambdaOf g))
+
+/--
+Finite-grid Bernstein bad-event bound.
+
+For each finite grid bucket `g`, the fixed-`lambda` Bernstein theorem is applied
+with its own confidence allocation. A finite weighted union bound controls the
+mass of samples on which any bucket fails.
+-/
+theorem finitePACBayesBernsteinGrid_badEventMass_le_sum_delta
+    [Fintype Ω] [DecidableEq Ω] [Fintype ι] [Nonempty ι] [Fintype γ]
+    {ν : Ω → ℝ} (hν : IsPMF ν)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf scaleOf confidenceOf : γ → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hscale : ∀ g : γ, scaleOf g * lambdaOf g < 1)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ)
+    (hExpected :
+      ∀ g : γ,
+        expectedPriorBernsteinExpMoment ν π (lambdaOf g) (scaleOf g)
+          riskFn empiricalRiskFn varianceProxy ≤ 1) :
+    (∑ ω ∈
+        finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+          riskFn empiricalRiskFn varianceProxy,
+        ν ω) ≤
+      ∑ g : γ, confidenceOf g := by
+  classical
+  let bucketEvent : γ → Finset Ω := fun g =>
+    finitePACBayesBernsteinFixedLambdaBadSamples π (lambdaOf g) (scaleOf g)
+      (confidenceOf g) riskFn empiricalRiskFn varianceProxy
+  let gridUnionEvent : Finset Ω :=
+    Finset.univ.filter fun ω : Ω => ∃ g : γ, ω ∈ bucketEvent g
+  have hsubset :
+      finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+          riskFn empiricalRiskFn varianceProxy
+        ⊆ gridUnionEvent := by
+    intro ω hω
+    rw [finitePACBayesBernsteinGridBadSamples, Finset.mem_filter] at hω
+    rcases hω.2 with ⟨g, ρ, hρ, hbad⟩
+    change ω ∈ (Finset.univ.filter fun ω : Ω => ∃ g : γ, ω ∈ bucketEvent g)
+    rw [Finset.mem_filter]
+    refine ⟨Finset.mem_univ ω, g, ?_⟩
+    change ω ∈ finitePACBayesBernsteinFixedLambdaBadSamples π (lambdaOf g) (scaleOf g)
+      (confidenceOf g) riskFn empiricalRiskFn varianceProxy
+    rw [finitePACBayesBernsteinFixedLambdaBadSamples, Finset.mem_filter]
+    exact ⟨Finset.mem_univ ω, ρ, hρ, hbad⟩
+  have hmass_subset :
+      (∑ ω ∈
+          finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+            riskFn empiricalRiskFn varianceProxy,
+          ν ω)
+        ≤ ∑ ω ∈ gridUnionEvent, ν ω := by
+    exact Finset.sum_le_sum_of_subset_of_nonneg hsubset (by
+      intro ω _hω _hnot
+      exact hν.nonneg ω)
+  have hunion :=
+    FormalSLT.Probability.FiniteUnionBound.finiteProbabilityUnionBound_proof
+      (support := (Finset.univ : Finset Ω))
+      (w := ν)
+      (events := bucketEvent)
+      (s := (Finset.univ : Finset γ))
+      (fun ω : Ω => hν.nonneg ω)
+  have hunion_mass :
+      (∑ ω ∈ gridUnionEvent, ν ω) ≤
+        ∑ g : γ, ∑ ω ∈ bucketEvent g, ν ω := by
+    change
+      (∑ ω ∈ (Finset.univ.filter fun ω : Ω => ∃ g : γ, ω ∈ bucketEvent g), ν ω) ≤
+        ∑ g : γ, ∑ ω ∈ bucketEvent g, ν ω
+    unfold FormalSLT.Probability.FiniteUnionBound.finiteUnionEventMass at hunion
+    unfold FormalSLT.Probability.FiniteUnionBound.finiteEventMassSum at hunion
+    unfold FormalSLT.Probability.FiniteUnionBound.finiteEventMass at hunion
+    simp_rw [← Finset.sum_filter] at hunion
+    simpa using hunion
+  have hbucket :
+      ∀ g : γ, (∑ ω ∈ bucketEvent g, ν ω) ≤ confidenceOf g := by
+    intro g
+    simpa [bucketEvent] using
+      finitePACBayesBernstein_fixedLambda_badEventMass_le_delta
+        (ν := ν) hν hπ (lambdaOf g) (scaleOf g) (confidenceOf g)
+        (hlambda g) (hscale g) (hconfidenceOf g) riskFn empiricalRiskFn
+        varianceProxy (hExpected g)
+  have hbucket_sum :
+      (∑ g : γ, ∑ ω ∈ bucketEvent g, ν ω) ≤ ∑ g : γ, confidenceOf g := by
+    exact Finset.sum_le_sum (fun g _hg => hbucket g)
+  exact hmass_subset.trans (hunion_mass.trans hbucket_sum)
+
+/-- Finite-grid Bernstein bad-event bound with an explicit confidence budget. -/
+theorem finitePACBayesBernsteinGrid_badEventMass_le_delta
+    [Fintype Ω] [DecidableEq Ω] [Fintype ι] [Nonempty ι] [Fintype γ]
+    {ν : Ω → ℝ} (hν : IsPMF ν)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf scaleOf confidenceOf : γ → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hscale : ∀ g : γ, scaleOf g * lambdaOf g < 1)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ)
+    (hExpected :
+      ∀ g : γ,
+        expectedPriorBernsteinExpMoment ν π (lambdaOf g) (scaleOf g)
+          riskFn empiricalRiskFn varianceProxy ≤ 1)
+    {delta : ℝ} (hgridConfidence : (∑ g : γ, confidenceOf g) ≤ delta) :
+    (∑ ω ∈
+        finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+          riskFn empiricalRiskFn varianceProxy,
+        ν ω) ≤ delta :=
+  (finitePACBayesBernsteinGrid_badEventMass_le_sum_delta
+    (Ω := Ω) (ι := ι) (γ := γ) hν hπ lambdaOf scaleOf confidenceOf
+    hlambda hscale hconfidenceOf riskFn empiricalRiskFn varianceProxy hExpected).trans
+      hgridConfidence
+
+/--
+Finite-grid optimized Bernstein bad-event bound.
+
+The user-supplied `posteriorPenalty` may depend on the posterior. The theorem
+requires a finite grid certificate: each posterior is assigned to a bucket whose
+fixed-parameter Bernstein penalty is no larger than the supplied penalty.
+-/
+theorem finitePACBayesBernsteinGridOptimized_badEventMass_le_sum_delta
+    [Fintype Ω] [DecidableEq Ω] [Fintype ι] [Nonempty ι] [Fintype γ]
+    {ν : Ω → ℝ} (hν : IsPMF ν)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf scaleOf confidenceOf : γ → ℝ)
+    (posteriorPenalty : (ι → ℝ) → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hscale : ∀ g : γ, scaleOf g * lambdaOf g < 1)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ)
+    (hgridCovers :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        ∃ g : γ,
+          (klDiv ρ π + Real.log (1 / confidenceOf g)) / lambdaOf g +
+              lambdaOf g * posteriorMarginVarianceProxy ρ varianceProxy /
+                (2 * (1 - scaleOf g * lambdaOf g))
+            ≤ posteriorPenalty ρ)
+    (hExpected :
+      ∀ g : γ,
+        expectedPriorBernsteinExpMoment ν π (lambdaOf g) (scaleOf g)
+          riskFn empiricalRiskFn varianceProxy ≤ 1) :
+    (∑ ω ∈
+        finitePACBayesBernsteinPenaltyBadSamples riskFn empiricalRiskFn posteriorPenalty,
+        ν ω) ≤
+      ∑ g : γ, confidenceOf g := by
+  classical
+  have hsubset :
+      finitePACBayesBernsteinPenaltyBadSamples riskFn empiricalRiskFn posteriorPenalty
+        ⊆
+      finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+        riskFn empiricalRiskFn varianceProxy := by
+    intro ω hω
+    rw [finitePACBayesBernsteinPenaltyBadSamples, Finset.mem_filter] at hω
+    rcases hω.2 with ⟨ρ, hρ, hbad⟩
+    rcases hgridCovers ρ hρ with ⟨g, hpenalty⟩
+    rw [finitePACBayesBernsteinGridBadSamples, Finset.mem_filter]
+    refine ⟨Finset.mem_univ ω, g, ρ, hρ, ?_⟩
+    linarith
+  have hmass_subset :
+      (∑ ω ∈
+          finitePACBayesBernsteinPenaltyBadSamples riskFn empiricalRiskFn posteriorPenalty,
+          ν ω)
+        ≤
+      ∑ ω ∈
+        finitePACBayesBernsteinGridBadSamples π lambdaOf scaleOf confidenceOf
+          riskFn empiricalRiskFn varianceProxy,
+        ν ω := by
+    exact Finset.sum_le_sum_of_subset_of_nonneg hsubset (by
+      intro ω _hω _hnot
+      exact hν.nonneg ω)
+  exact hmass_subset.trans
+    (finitePACBayesBernsteinGrid_badEventMass_le_sum_delta
+      (Ω := Ω) (ι := ι) (γ := γ) hν hπ lambdaOf scaleOf confidenceOf
+      hlambda hscale hconfidenceOf riskFn empiricalRiskFn varianceProxy hExpected)
+
+/--
+Finite-grid optimized Bernstein bad-event bound with an explicit total
+confidence budget.
+-/
+theorem finitePACBayesBernsteinGridOptimized_badEventMass_le_delta
+    [Fintype Ω] [DecidableEq Ω] [Fintype ι] [Nonempty ι] [Fintype γ]
+    {ν : Ω → ℝ} (hν : IsPMF ν)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf scaleOf confidenceOf : γ → ℝ)
+    (posteriorPenalty : (ι → ℝ) → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hscale : ∀ g : γ, scaleOf g * lambdaOf g < 1)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (riskFn : ι → ℝ) (empiricalRiskFn : Ω → ι → ℝ)
+    (varianceProxy : ι → ℝ)
+    (hgridCovers :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        ∃ g : γ,
+          (klDiv ρ π + Real.log (1 / confidenceOf g)) / lambdaOf g +
+              lambdaOf g * posteriorMarginVarianceProxy ρ varianceProxy /
+                (2 * (1 - scaleOf g * lambdaOf g))
+            ≤ posteriorPenalty ρ)
+    (hExpected :
+      ∀ g : γ,
+        expectedPriorBernsteinExpMoment ν π (lambdaOf g) (scaleOf g)
+          riskFn empiricalRiskFn varianceProxy ≤ 1)
+    {delta : ℝ} (hgridConfidence : (∑ g : γ, confidenceOf g) ≤ delta) :
+    (∑ ω ∈
+        finitePACBayesBernsteinPenaltyBadSamples riskFn empiricalRiskFn posteriorPenalty,
+        ν ω) ≤ delta :=
+  (finitePACBayesBernsteinGridOptimized_badEventMass_le_sum_delta
+    (Ω := Ω) (ι := ι) (γ := γ) hν hπ lambdaOf scaleOf confidenceOf
+    posteriorPenalty hlambda hscale hconfidenceOf riskFn empiricalRiskFn
+    varianceProxy hgridCovers hExpected).trans hgridConfidence
+
 end
 
 end FormalSLT.PACBayesBernstein
diff --git a/FormalSLT/PACBayesFiniteProductMGF.lean b/FormalSLT/PACBayesFiniteProductMGF.lean
index 9dfb491..6947403 100644
--- a/FormalSLT/PACBayesFiniteProductMGF.lean
+++ b/FormalSLT/PACBayesFiniteProductMGF.lean
@@ -163,6 +163,43 @@ theorem finiteProduct_mgf_empiricalRiskDeviation_le_of_single
     field_simp [hn_ne]
   exact h_pow.trans_eq h_exp_pow
 
+/--
+Finite product MGF bound from an arbitrary one-coordinate exponential budget.
+
+This is the same product-factorization step as
+`finiteProduct_mgf_empiricalRiskDeviation_le_of_single`, but keeps the
+one-coordinate budget explicit. It is useful for Bernstein/sub-Gamma bounds,
+whose denominator depends on the one-coordinate scale.
+-/
+theorem finiteProduct_mgf_empiricalRiskDeviation_le_exp_of_single
+    {n : ℕ} [Fintype Z] (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    (ℓ : ι → Z → ℝ) (i : ι) (lam singleBudget : ℝ)
+    (hsingle :
+      oneCoordinateDeviationMGF (n := n) p ℓ i lam ≤
+        Real.exp singleBudget) :
+    (∑ S : Fin n → Z,
+        finiteProductSampleWeight p S *
+          Real.exp (lam * (finitePopulationRisk p ℓ i - finiteEmpiricalRisk ℓ i S))) ≤
+      Real.exp ((n : ℝ) * singleBudget) := by
+  classical
+  have h_eq :=
+    finiteProduct_mgf_empiricalRiskDeviation_eq_pow
+      (ι := ι) (Z := Z) hn p ℓ i lam
+  rw [h_eq]
+  have hsingle_nonneg : 0 ≤ oneCoordinateDeviationMGF (n := n) p ℓ i lam := by
+    unfold oneCoordinateDeviationMGF
+    exact Finset.sum_nonneg
+      (fun z _hz => mul_nonneg (hp.nonneg z) (le_of_lt (Real.exp_pos _)))
+  have h_pow :
+      (oneCoordinateDeviationMGF (n := n) p ℓ i lam) ^ n ≤
+        (Real.exp singleBudget) ^ n :=
+    pow_le_pow_left₀ hsingle_nonneg hsingle n
+  have h_exp_pow :
+      (Real.exp singleBudget) ^ n = Real.exp ((n : ℝ) * singleBudget) := by
+    rw [← Real.exp_nat_mul]
+  exact h_pow.trans_eq h_exp_pow
+
 /--
 Prior-averaged finite product MGF bound.
 
diff --git a/FormalSLT/PACBayesMargin.lean b/FormalSLT/PACBayesMargin.lean
new file mode 100644
index 0000000..27ab9af
--- /dev/null
+++ b/FormalSLT/PACBayesMargin.lean
@@ -0,0 +1,642 @@
+/-
+Copyright (c) 2026 Robby Sneiderman. All rights reserved.
+Released under MIT license as described in the file LICENSE.
+Authors: Robby Sneiderman
+-/
+import FormalSLT.PACBayesBernstein
+import FormalSLT.PACBayesFiniteProductMGF
+import FormalSLT.Probability.BernsteinMGF
+
+/-!
+# Finite PAC-Bayes classifier-margin adapter
+
+This module specializes the finite PAC-Bayes Bernstein shell to classifier
+margin losses on a finite data domain.
+
+The supplied-certificate adapter remains available for arbitrary normalized
+Bernstein prior-moment certificates. For iid finite classifier-margin losses,
+this file also derives that certificate internally from the finite product law
+and the Bennett/sub-Gamma MGF layer.
+
+## Main declarations
+
+* `classifierMarginLoss` — thresholded margin loss in `{0,1}`.
+* `classifierMarginPopulationRisk` and `classifierMarginEmpiricalRisk` —
+  concrete finite risk functions for margin loss.
+* `classifierMarginVarianceProxy` — the margin-risk proxy used by the
+  Bernstein shell.
+* `expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one` — iid finite
+  classifier-margin losses discharge the normalized Bernstein prior-moment
+  certificate.
+* `finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta` — the
+  end-to-end finite iid PAC-Bayes Bernstein theorem for classifier-margin
+  losses and a fixed admissible `lambda`.
+* `finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_delta` —
+  the finite-grid version, with a caller-supplied grid certificate over
+  admissible Bernstein parameters.
+-/
+
+namespace FormalSLT.PACBayesMargin
+
+open Finset Real BigOperators
+open FormalSLT.PACBayesKL
+open FormalSLT.PACBayesFiniteProductMGF
+open FormalSLT.PACBayesBernstein
+open FormalSLT.Probability.BernsteinMGF
+
+noncomputable section
+
+variable {ι Z : Type*}
+
+local instance (p : Prop) : Decidable p := Classical.propDecidable p
+
+/-! ### Classifier margin losses -/
+
+/-- Encode a Boolean label as `+1` for `true` and `-1` for `false`. -/
+def signedBool (y : Bool) : ℝ :=
+  if y then 1 else -1
+
+/-- Signed margin of hypothesis `i` on example `z`. -/
+def classifierMargin
+    (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) (z : Z) : ℝ :=
+  signedBool (label z) * score i z
+
+/--
+Thresholded classifier margin loss.
+
+The loss is `1` when the signed margin is at most `threshold`, and `0`
+otherwise. For `threshold = 0`, this is the usual error-or-no-positive-margin
+indicator for real-valued scores with Boolean labels.
+-/
+def classifierMarginLoss
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (z : Z) : ℝ :=
+  if classifierMargin score label i z ≤ threshold then 1 else 0
+
+/-- Classifier margin loss is always in `[0,1]`. -/
+lemma classifierMarginLoss_mem_Icc_zero_one
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (z : Z) :
+    classifierMarginLoss threshold score label i z ∈ Set.Icc (0 : ℝ) 1 := by
+  unfold classifierMarginLoss
+  by_cases h : classifierMargin score label i z ≤ threshold
+  · simp [h]
+  · simp [h]
+
+/-- Finite population risk of a thresholded classifier margin loss. -/
+def classifierMarginPopulationRisk [Fintype Z]
+    (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) : ℝ :=
+  finitePopulationRisk p (classifierMarginLoss threshold score label) i
+
+/-- Finite empirical risk of a thresholded classifier margin loss. -/
+def classifierMarginEmpiricalRisk {n : ℕ}
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (S : Fin n → Z) : ℝ :=
+  finiteEmpiricalRisk (classifierMarginLoss threshold score label) i S
+
+/-- Sample-indexed empirical-risk function for the PAC-Bayes Bernstein shell. -/
+def classifierMarginEmpiricalRiskFn {n : ℕ}
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (S : Fin n → Z) (i : ι) : ℝ :=
+  classifierMarginEmpiricalRisk threshold score label i S
+
+/--
+Concrete margin-risk variance proxy.
+
+For an indicator margin loss, the second moment equals the population risk, so
+this is the natural Bernstein variance proxy exposed to the PAC-Bayes layer.
+The equality is kept as a definition here; deriving the normalized prior MGF
+from this proxy is a separate concentration step.
+-/
+def classifierMarginVarianceProxy [Fintype Z]
+    (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) : ℝ :=
+  classifierMarginPopulationRisk p threshold score label i
+
+/-- The margin variance proxy is definitionally the margin population risk. -/
+lemma classifierMarginVarianceProxy_eq_populationRisk [Fintype Z]
+    (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) :
+    classifierMarginVarianceProxy p threshold score label i =
+      classifierMarginPopulationRisk p threshold score label i := rfl
+
+/-- Sample-average variance proxy for iid classifier-margin empirical risks. -/
+def classifierMarginSampleVarianceProxy {n : ℕ} [Fintype Z]
+    (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) : ℝ :=
+  classifierMarginVarianceProxy p threshold score label i / (n : ℝ)
+
+/-- Posterior-averaged classifier-margin variance proxy. -/
+def posteriorClassifierMarginVarianceProxy [Fintype Z] [Fintype ι]
+    (ρ : ι → ℝ) (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) : ℝ :=
+  posteriorMarginVarianceProxy ρ
+    (classifierMarginVarianceProxy p threshold score label)
+
+/-- Posterior-averaged iid sample classifier-margin variance proxy. -/
+def posteriorClassifierMarginSampleVarianceProxy {n : ℕ} [Fintype Z] [Fintype ι]
+    (ρ : ι → ℝ) (p : Z → ℝ) (threshold : ℝ)
+    (score : ι → Z → ℝ) (label : Z → Bool) : ℝ :=
+  posteriorMarginVarianceProxy ρ
+    (classifierMarginSampleVarianceProxy (n := n) p threshold score label)
+
+lemma classifierMarginPopulationRisk_mem_Icc_zero_one [Fintype Z]
+    (p : Z → ℝ) (hp : IsPMF p)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) :
+    classifierMarginPopulationRisk p threshold score label i ∈ Set.Icc (0 : ℝ) 1 := by
+  constructor
+  · unfold classifierMarginPopulationRisk finitePopulationRisk
+    exact Finset.sum_nonneg
+      (fun z _hz =>
+        mul_nonneg (hp.nonneg z)
+          (classifierMarginLoss_mem_Icc_zero_one threshold score label i z).1)
+  · unfold classifierMarginPopulationRisk finitePopulationRisk
+    calc
+      (∑ z : Z, p z * classifierMarginLoss threshold score label i z)
+          ≤ ∑ z : Z, p z * 1 := by
+            apply Finset.sum_le_sum
+            intro z _hz
+            exact mul_le_mul_of_nonneg_left
+              (classifierMarginLoss_mem_Icc_zero_one threshold score label i z).2
+              (hp.nonneg z)
+      _ = 1 := by simp [hp.sum_one]
+
+lemma classifierMarginLoss_sq
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (z : Z) :
+    classifierMarginLoss threshold score label i z ^ 2 =
+      classifierMarginLoss threshold score label i z := by
+  unfold classifierMarginLoss
+  by_cases h : classifierMargin score label i z ≤ threshold
+  · simp [h]
+  · simp [h]
+
+lemma classifierMarginVariance_le_risk [Fintype Z]
+    (p : Z → ℝ) (hp : IsPMF p)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool) (i : ι) :
+    (∑ z : Z,
+        p z *
+          (classifierMarginPopulationRisk p threshold score label i -
+            classifierMarginLoss threshold score label i z) ^ 2)
+      ≤ classifierMarginVarianceProxy p threshold score label i := by
+  classical
+  let R := classifierMarginPopulationRisk p threshold score label i
+  let loss : Z → ℝ := classifierMarginLoss threshold score label i
+  have hsum_loss : (∑ z : Z, p z * loss z) = R := by
+    simp [R, loss, classifierMarginPopulationRisk, finitePopulationRisk]
+  have hsum_loss_sq : (∑ z : Z, p z * loss z ^ 2) = R := by
+    calc
+      (∑ z : Z, p z * loss z ^ 2)
+          = ∑ z : Z, p z * loss z := by
+              apply Finset.sum_congr rfl
+              intro z _hz
+              rw [classifierMarginLoss_sq]
+      _ = R := hsum_loss
+  have hvariance_eq :
+      (∑ z : Z, p z * (R - loss z) ^ 2) = R - R ^ 2 := by
+    calc
+      (∑ z : Z, p z * (R - loss z) ^ 2)
+          =
+        ∑ z : Z, (p z * R ^ 2 - 2 * R * (p z * loss z) +
+          p z * loss z ^ 2) := by
+            apply Finset.sum_congr rfl
+            intro z _hz
+            ring
+      _ =
+        (∑ z : Z, p z * R ^ 2) -
+          (∑ z : Z, 2 * R * (p z * loss z)) +
+          ∑ z : Z, p z * loss z ^ 2 := by
+            rw [Finset.sum_add_distrib, Finset.sum_sub_distrib]
+      _ =
+        (∑ z : Z, p z * R ^ 2) -
+          2 * R * (∑ z : Z, p z * loss z) +
+          ∑ z : Z, p z * loss z ^ 2 := by
+            congr 2
+            rw [Finset.mul_sum]
+      _ =
+        R ^ 2 * (∑ z : Z, p z) -
+          2 * R * (∑ z : Z, p z * loss z) +
+          ∑ z : Z, p z * loss z ^ 2 := by
+            congr 2
+            calc
+              (∑ z : Z, p z * R ^ 2)
+                  = ∑ z : Z, R ^ 2 * p z := by
+                      apply Finset.sum_congr rfl
+                      intro z _hz
+                      ring
+              _ = R ^ 2 * (∑ z : Z, p z) := by
+                      rw [Finset.mul_sum]
+      _ = R - R ^ 2 := by
+            rw [hp.sum_one, hsum_loss, hsum_loss_sq]
+            ring
+  have hR_nonneg : 0 ≤ R :=
+    (classifierMarginPopulationRisk_mem_Icc_zero_one
+      p hp threshold score label i).1
+  calc
+    (∑ z : Z,
+        p z *
+          (classifierMarginPopulationRisk p threshold score label i -
+            classifierMarginLoss threshold score label i z) ^ 2)
+        = ∑ z : Z, p z * (R - loss z) ^ 2 := by
+            simp [R, loss]
+    _ = R - R ^ 2 := hvariance_eq
+    _ ≤ R := by nlinarith [sq_nonneg R]
+    _ = classifierMarginVarianceProxy p threshold score label i := by
+            simp [classifierMarginVarianceProxy, R]
+
+theorem oneCoordinate_classifierMarginLoss_mgf_subgamma
+    {n : ℕ} [Fintype Z] (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (lambda : ℝ)
+    (hlambda : 0 ≤ lambda) (hlambda_bound : lambda * (n : ℝ)⁻¹ < 3) :
+    oneCoordinateDeviationMGF (n := n) p
+        (classifierMarginLoss threshold score label) i lambda
+      ≤
+    Real.exp
+      ((lambda * (n : ℝ)⁻¹) ^ 2 *
+          classifierMarginVarianceProxy p threshold score label i /
+        (2 * (1 - (lambda * (n : ℝ)⁻¹) / 3))) := by
+  classical
+  let R := classifierMarginPopulationRisk p threshold score label i
+  let loss : Z → ℝ := classifierMarginLoss threshold score label i
+  have hcenter : (∑ z : Z, p z * (R - loss z)) = 0 := by
+    have hexp :
+        (∑ z : Z, p z * (R - loss z))
+          = R * (∑ z : Z, p z) - ∑ z : Z, p z * loss z := by
+      rw [Finset.mul_sum, ← Finset.sum_sub_distrib]
+      exact Finset.sum_congr rfl (fun z _hz => by ring)
+    rw [hexp, hp.sum_one]
+    simp [R, loss, classifierMarginPopulationRisk, finitePopulationRisk]
+  have hbound : ∀ z : Z, R - loss z ≤ 1 := by
+    intro z
+    have hR_le :
+        R ≤ 1 :=
+      (classifierMarginPopulationRisk_mem_Icc_zero_one
+        p hp threshold score label i).2
+    have hloss_nonneg :
+        0 ≤ loss z :=
+      (classifierMarginLoss_mem_Icc_zero_one threshold score label i z).1
+    linarith
+  have hvar :
+      (∑ z : Z, p z * (R - loss z) ^ 2)
+        ≤ classifierMarginVarianceProxy p threshold score label i := by
+    simpa [R, loss] using
+      (classifierMarginVariance_le_risk p hp threshold score label i)
+  have h :=
+    bennett_mgf_subgamma p (fun z : Z => R - loss z)
+      (b := 1)
+      (v := classifierMarginVarianceProxy p threshold score label i)
+      (lam := lambda * (n : ℝ)⁻¹)
+      (by norm_num) (by positivity) (by simpa using hlambda_bound)
+      hp.nonneg hp.sum_one hcenter hbound hvar
+  simpa [oneCoordinateDeviationMGF, R, loss, classifierMarginPopulationRisk] using h
+
+theorem finiteProduct_classifierMarginLoss_mgf_subgamma
+    {n : ℕ} [Fintype Z] (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (i : ι) (lambda : ℝ)
+    (hlambda : 0 ≤ lambda) (hlambda_bound : lambda * (n : ℝ)⁻¹ < 3) :
+    (∑ S : Fin n → Z,
+        finiteProductSampleWeight p S *
+          Real.exp
+            (lambda *
+              (classifierMarginPopulationRisk p threshold score label i -
+                classifierMarginEmpiricalRisk threshold score label i S)))
+      ≤
+    Real.exp
+      (lambda ^ 2 *
+          classifierMarginSampleVarianceProxy (n := n) p threshold score label i /
+        (2 * (1 - (3 * (n : ℝ))⁻¹ * lambda))) := by
+  classical
+  let singleBudget : ℝ :=
+    (lambda * (n : ℝ)⁻¹) ^ 2 *
+        classifierMarginVarianceProxy p threshold score label i /
+      (2 * (1 - (lambda * (n : ℝ)⁻¹) / 3))
+  have hsingle :
+      oneCoordinateDeviationMGF (n := n) p
+          (classifierMarginLoss threshold score label) i lambda ≤
+        Real.exp singleBudget := by
+    simpa [singleBudget] using
+      (oneCoordinate_classifierMarginLoss_mgf_subgamma
+        (n := n) hn p hp threshold score label i lambda hlambda hlambda_bound)
+  have hprod :=
+    finiteProduct_mgf_empiricalRiskDeviation_le_exp_of_single
+      (ι := ι) (Z := Z) hn p hp
+      (classifierMarginLoss threshold score label) i lambda singleBudget hsingle
+  refine hprod.trans_eq ?_
+  congr 1
+  have hn_ne : (n : ℝ) ≠ 0 := by exact_mod_cast (Nat.ne_of_gt hn)
+  have hden₁ : 2 * (1 - (lambda * (n : ℝ)⁻¹) / 3) ≠ 0 := by
+    have hpos : 0 < 1 - (lambda * (n : ℝ)⁻¹) / 3 := by linarith
+    nlinarith
+  have hden₂ : 2 * (1 - (3 * (n : ℝ))⁻¹ * lambda) ≠ 0 := by
+    have hn_pos : 0 < (n : ℝ) := by exact_mod_cast hn
+    have hlt : lambda < 3 * (n : ℝ) := by
+      rw [← div_lt_iff₀ hn_pos]
+      simpa [div_eq_mul_inv] using hlambda_bound
+    have hpos : 0 < 1 - (3 * (n : ℝ))⁻¹ * lambda := by
+      rw [sub_pos]
+      rw [inv_mul_lt_iff₀ (by positivity : 0 < 3 * (n : ℝ))]
+      nlinarith
+    nlinarith
+  simp [singleBudget, classifierMarginSampleVarianceProxy]
+  field_simp [hn_ne, hden₁, hden₂]
+
+theorem expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one
+    {n : ℕ} [Fintype Z] [Fintype ι]
+    (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    {π : ι → ℝ} (hπ : IsPMF π)
+    (lambda : ℝ)
+    (hlambda : 0 ≤ lambda) (hlambda_bound : lambda * (n : ℝ)⁻¹ < 3)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool) :
+    expectedPriorBernsteinExpMoment
+        (finiteProductSampleWeight (n := n) p) π lambda ((3 * (n : ℝ))⁻¹)
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginSampleVarianceProxy (n := n) p threshold score label)
+      ≤ 1 := by
+  classical
+  apply expectedPriorBernsteinExpMoment_le_one_of_mgf_bound hπ
+  intro i
+  simpa [classifierMarginEmpiricalRiskFn] using
+    (finiteProduct_classifierMarginLoss_mgf_subgamma
+      (n := n) hn p hp threshold score label i lambda hlambda hlambda_bound)
+
+/-! ### Product sample law -/
+
+/-- Finite iid product sample weights form a PMF when the base mass is a PMF. -/
+theorem finiteProductSampleWeight_isPMF
+    {n : ℕ} [Fintype Z] {p : Z → ℝ} (hp : IsPMF p) :
+    IsPMF (finiteProductSampleWeight (n := n) p) where
+  nonneg := by
+    intro S
+    unfold finiteProductSampleWeight
+    exact Finset.prod_nonneg (fun k _hk => hp.nonneg (S k))
+  sum_one := by
+    unfold finiteProductSampleWeight
+    calc
+      (∑ S : Fin n → Z, ∏ k : Fin n, p (S k))
+          = ∏ _k : Fin n, ∑ z : Z, p z := by
+            exact (Fintype.prod_sum (f := fun _k : Fin n => p)).symm
+      _ = 1 := by simp [hp.sum_one]
+
+/-! ### PAC-Bayes Bernstein specialization -/
+
+/-- Bad samples for the concrete classifier-margin PAC-Bayes Bernstein event. -/
+def finitePACBayesClassifierMarginBernsteinBadSamples
+    {n : ℕ} [Fintype Z] [Fintype ι]
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (p : Z → ℝ) (posteriorPenalty : (ι → ℝ) → ℝ) :
+    Finset (Fin n → Z) :=
+  finitePACBayesBernsteinPenaltyBadSamples
+    (classifierMarginPopulationRisk p threshold score label)
+    (classifierMarginEmpiricalRiskFn threshold score label)
+    posteriorPenalty
+
+/--
+Finite PAC-Bayes Bernstein theorem specialized to classifier margin losses.
+
+The theorem states that, under the same explicit finite-parameter certificates
+as `finitePACBayesBernsteinMargin_badEventMass_le_delta`, the iid sample mass
+of classifier-margin bad samples is at most `delta`.
+-/
+theorem finitePACBayesClassifierMarginBernstein_badEventMass_le_delta
+    {n : ℕ} [Fintype Z] [Fintype ι] [Nonempty ι]
+    (p : Z → ℝ) (hp : IsPMF p)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambda scale delta : ℝ)
+    (hlambda : 0 < lambda) (hscale : scale * lambda < 1)
+    (hdelta : 0 < delta)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (complexityOf : (ι → ℝ) → ℝ)
+    (hcomplexity :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        klDiv ρ π + Real.log (1 / delta) ≤ complexityOf ρ)
+    (hpenalty :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        complexityOf ρ / lambda +
+            lambda *
+                posteriorClassifierMarginVarianceProxy ρ p threshold score label /
+              (2 * (1 - scale * lambda))
+          ≤
+        Real.sqrt
+            (2 * posteriorClassifierMarginVarianceProxy ρ p threshold score label *
+              complexityOf ρ) +
+          scale * complexityOf ρ)
+    (hExpected :
+      expectedPriorBernsteinExpMoment
+        (finiteProductSampleWeight (n := n) p) π lambda scale
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginVarianceProxy p threshold score label) ≤ 1) :
+    (∑ S ∈
+        finitePACBayesClassifierMarginBernsteinBadSamples
+          (n := n) threshold score label p
+          (fun ρ =>
+            Real.sqrt
+                (2 * posteriorClassifierMarginVarianceProxy ρ p threshold score label *
+                  complexityOf ρ) +
+              scale * complexityOf ρ),
+        finiteProductSampleWeight p S) ≤ delta := by
+  classical
+  simpa [finitePACBayesClassifierMarginBernsteinBadSamples,
+    posteriorClassifierMarginVarianceProxy]
+    using
+      (finitePACBayesBernsteinMargin_badEventMass_le_delta
+        (Ω := Fin n → Z) (ι := ι)
+        (ν := finiteProductSampleWeight (n := n) p)
+        (hν := finiteProductSampleWeight_isPMF (n := n) hp)
+        (π := π) hπ lambda scale delta hlambda hscale hdelta
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginVarianceProxy p threshold score label)
+        complexityOf hcomplexity hpenalty hExpected)
+
+/--
+Finite iid PAC-Bayes Bernstein theorem for classifier-margin losses.
+
+This version derives the normalized Bernstein prior-moment certificate from the
+iid product sample law and the finite Bennett/sub-Gamma bound for thresholded
+margin losses. The variance proxy is scaled by the sample size.
+-/
+theorem finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta
+    {n : ℕ} [Fintype Z] [Fintype ι] [Nonempty ι]
+    (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambda delta : ℝ)
+    (hlambda : 0 < lambda)
+    (hlambda_bound : lambda * (n : ℝ)⁻¹ < 3)
+    (hdelta : 0 < delta)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (complexityOf : (ι → ℝ) → ℝ)
+    (hcomplexity :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        klDiv ρ π + Real.log (1 / delta) ≤ complexityOf ρ)
+    (hpenalty :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        complexityOf ρ / lambda +
+            lambda *
+                posteriorClassifierMarginSampleVarianceProxy
+                  (n := n) ρ p threshold score label /
+              (2 * (1 - (3 * (n : ℝ))⁻¹ * lambda))
+          ≤
+        Real.sqrt
+            (2 *
+              posteriorClassifierMarginSampleVarianceProxy
+                (n := n) ρ p threshold score label *
+              complexityOf ρ) +
+          (3 * (n : ℝ))⁻¹ * complexityOf ρ) :
+    (∑ S ∈
+        finitePACBayesClassifierMarginBernsteinBadSamples
+          (n := n) threshold score label p
+          (fun ρ =>
+            Real.sqrt
+                (2 *
+                  posteriorClassifierMarginSampleVarianceProxy
+                    (n := n) ρ p threshold score label *
+                  complexityOf ρ) +
+              (3 * (n : ℝ))⁻¹ * complexityOf ρ),
+        finiteProductSampleWeight p S) ≤ delta := by
+  classical
+  have hExpected :
+      expectedPriorBernsteinExpMoment
+        (finiteProductSampleWeight (n := n) p) π lambda ((3 * (n : ℝ))⁻¹)
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginSampleVarianceProxy (n := n) p threshold score label) ≤ 1 := by
+    exact
+      expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one
+        (n := n) hn p hp hπ.toIsPMF lambda hlambda.le hlambda_bound
+        threshold score label
+  have hscale : (3 * (n : ℝ))⁻¹ * lambda < 1 := by
+    have hn_pos : 0 < (n : ℝ) := by exact_mod_cast hn
+    have hlt : lambda < 3 * (n : ℝ) := by
+      rw [← div_lt_iff₀ hn_pos]
+      simpa [div_eq_mul_inv] using hlambda_bound
+    rw [inv_mul_lt_iff₀ (by positivity : 0 < 3 * (n : ℝ))]
+    nlinarith
+  simpa [finitePACBayesClassifierMarginBernsteinBadSamples,
+    posteriorClassifierMarginSampleVarianceProxy]
+    using
+      (finitePACBayesBernsteinMargin_badEventMass_le_delta
+        (Ω := Fin n → Z) (ι := ι)
+        (ν := finiteProductSampleWeight (n := n) p)
+        (hν := finiteProductSampleWeight_isPMF (n := n) hp)
+        (π := π) hπ lambda ((3 * (n : ℝ))⁻¹) delta hlambda
+        hscale hdelta
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginSampleVarianceProxy (n := n) p threshold score label)
+        complexityOf hcomplexity hpenalty hExpected)
+
+/--
+Finite-grid iid PAC-Bayes Bernstein theorem for classifier-margin losses.
+
+Each grid index supplies an admissible Bernstein parameter and confidence
+allocation. A caller supplies the finite grid certificate assigning each
+posterior to a bucket whose fixed-parameter Bernstein penalty is no larger than
+`posteriorPenalty`.
+-/
+theorem finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_sum_delta
+    {n : ℕ} [Fintype Z] [Fintype ι] [Nonempty ι] [Fintype γ]
+    (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf confidenceOf : γ → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hlambda_bound : ∀ g : γ, lambdaOf g * (n : ℝ)⁻¹ < 3)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (posteriorPenalty : (ι → ℝ) → ℝ)
+    (hgridCovers :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        ∃ g : γ,
+          (klDiv ρ π + Real.log (1 / confidenceOf g)) / lambdaOf g +
+              lambdaOf g *
+                  posteriorClassifierMarginSampleVarianceProxy
+                    (n := n) ρ p threshold score label /
+                (2 * (1 - (3 * (n : ℝ))⁻¹ * lambdaOf g))
+            ≤ posteriorPenalty ρ) :
+    (∑ S ∈
+        finitePACBayesClassifierMarginBernsteinBadSamples
+          (n := n) threshold score label p posteriorPenalty,
+        finiteProductSampleWeight p S) ≤
+      ∑ g : γ, confidenceOf g := by
+  classical
+  have hscale : ∀ g : γ, (3 * (n : ℝ))⁻¹ * lambdaOf g < 1 := by
+    intro g
+    have hn_pos : 0 < (n : ℝ) := by exact_mod_cast hn
+    have hlt : lambdaOf g < 3 * (n : ℝ) := by
+      rw [← div_lt_iff₀ hn_pos]
+      simpa [div_eq_mul_inv] using hlambda_bound g
+    rw [inv_mul_lt_iff₀ (by positivity : 0 < 3 * (n : ℝ))]
+    nlinarith
+  have hExpected :
+      ∀ g : γ,
+        expectedPriorBernsteinExpMoment
+          (finiteProductSampleWeight (n := n) p) π (lambdaOf g)
+          ((3 * (n : ℝ))⁻¹)
+          (classifierMarginPopulationRisk p threshold score label)
+          (classifierMarginEmpiricalRiskFn threshold score label)
+          (classifierMarginSampleVarianceProxy (n := n) p threshold score label) ≤ 1 := by
+    intro g
+    exact
+      expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one
+        (n := n) hn p hp hπ.toIsPMF (lambdaOf g) (hlambda g).le
+        (hlambda_bound g) threshold score label
+  simpa [finitePACBayesClassifierMarginBernsteinBadSamples,
+    posteriorClassifierMarginSampleVarianceProxy]
+    using
+      (finitePACBayesBernsteinGridOptimized_badEventMass_le_sum_delta
+        (Ω := Fin n → Z) (ι := ι) (γ := γ)
+        (ν := finiteProductSampleWeight (n := n) p)
+        (hν := finiteProductSampleWeight_isPMF (n := n) hp)
+        (π := π) hπ lambdaOf (fun _g : γ => (3 * (n : ℝ))⁻¹) confidenceOf
+        posteriorPenalty hlambda hscale hconfidenceOf
+        (classifierMarginPopulationRisk p threshold score label)
+        (classifierMarginEmpiricalRiskFn threshold score label)
+        (classifierMarginSampleVarianceProxy (n := n) p threshold score label)
+        hgridCovers hExpected)
+
+/--
+Finite-grid iid PAC-Bayes Bernstein theorem for classifier-margin losses with
+an explicit total confidence budget.
+-/
+theorem finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_delta
+    {n : ℕ} [Fintype Z] [Fintype ι] [Nonempty ι] [Fintype γ]
+    (hn : 0 < n)
+    (p : Z → ℝ) (hp : IsPMF p)
+    {π : ι → ℝ} (hπ : IsFullSupportPMF π)
+    (lambdaOf confidenceOf : γ → ℝ)
+    (hlambda : ∀ g : γ, 0 < lambdaOf g)
+    (hlambda_bound : ∀ g : γ, lambdaOf g * (n : ℝ)⁻¹ < 3)
+    (hconfidenceOf : ∀ g : γ, 0 < confidenceOf g)
+    (threshold : ℝ) (score : ι → Z → ℝ) (label : Z → Bool)
+    (posteriorPenalty : (ι → ℝ) → ℝ)
+    (hgridCovers :
+      ∀ ρ : ι → ℝ, IsPMF ρ →
+        ∃ g : γ,
+          (klDiv ρ π + Real.log (1 / confidenceOf g)) / lambdaOf g +
+              lambdaOf g *
+                  posteriorClassifierMarginSampleVarianceProxy
+                    (n := n) ρ p threshold score label /
+                (2 * (1 - (3 * (n : ℝ))⁻¹ * lambdaOf g))
+            ≤ posteriorPenalty ρ)
+    {delta : ℝ} (hgridConfidence : (∑ g : γ, confidenceOf g) ≤ delta) :
+    (∑ S ∈
+        finitePACBayesClassifierMarginBernsteinBadSamples
+          (n := n) threshold score label p posteriorPenalty,
+        finiteProductSampleWeight p S) ≤ delta :=
+  (finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_sum_delta
+    (n := n) (Z := Z) (ι := ι) (γ := γ)
+    hn p hp hπ lambdaOf confidenceOf hlambda hlambda_bound hconfidenceOf
+    threshold score label posteriorPenalty hgridCovers).trans hgridConfidence
+
+end
+
+end FormalSLT.PACBayesMargin
diff --git a/FormalSLT/Test/PACBayesBernsteinTest.lean b/FormalSLT/Test/PACBayesBernsteinTest.lean
index 5609928..347d8a9 100644
--- a/FormalSLT/Test/PACBayesBernsteinTest.lean
+++ b/FormalSLT/Test/PACBayesBernsteinTest.lean
@@ -19,6 +19,7 @@ namespace FormalSLT.PACBayesBernstein.Test
 #check FormalSLT.PACBayesBernstein.posteriorMarginVarianceProxy
 #check FormalSLT.PACBayesBernstein.priorBernsteinExpMoment
 #check FormalSLT.PACBayesBernstein.expectedPriorBernsteinExpMoment
+#check FormalSLT.PACBayesBernstein.expectedPriorBernsteinExpMoment_le_one_of_mgf_bound
 #check FormalSLT.PACBayesBernstein.priorBernsteinExpMomentTailMass
 #check FormalSLT.PACBayesBernstein.priorBernsteinExpMoment_nonneg
 #check FormalSLT.PACBayesBernstein.priorBernsteinExpMoment_tailMass_le_expected_div
@@ -30,4 +31,6 @@ namespace FormalSLT.PACBayesBernstein.Test
 #check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinPenalty_badEventMass_le_delta
 #check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinMargin_badEventMass_le_delta
 
+#print axioms FormalSLT.PACBayesBernstein.expectedPriorBernsteinExpMoment_le_one_of_mgf_bound
+
 end FormalSLT.PACBayesBernstein.Test
diff --git a/README.md b/README.md
index 8f9ef74..272ccfc 100644
--- a/README.md
+++ b/README.md
@@ -7,154 +7,91 @@
 [![Axioms](https://img.shields.io/badge/axioms-propext%2C%20Classical.choice%2C%20Quot.sound-brightgreen.svg)](#audit-commands)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
 
-FormalSLT is a Lean 4 library for checking finite-sample statistical learning
-theory proof chains. The current v0.1 surface is organized around two checked
-endpoints:
-
-1. a finite-class, countable-time Hoeffding confidence sequence for `[0,1]`
-   losses; and
-2. a finite-net Dudley bridge for a concrete process indexed by the non-finite
-   unit interval `[0,1]`.
-
-The repository also packages reusable dyadic Dudley API instances for a
-two-point metric space and for finite discrete spaces `Fin n`. Those examples
-are API checks: they show that the finite-net wrapper is reusable outside the
-unit-interval file.
-
-The concentration layer now carries the sharp McDiarmid bounded-differences
-inequality over a homogeneous product measure. For a function whose value
-changes by at most `c k` when coordinate `k` is altered, the library proves the
-one-sided tail `exp(-2ε²/∑ₖcₖ²)` (`mcdiarmid_of_hasBoundedDifferences_sharp`),
-its lower-tail form (`mcdiarmid_of_hasBoundedDifferences_sharp_lower`), and the
-two-sided bound `2·exp(-2ε²/∑ₖcₖ²)`
-(`mcdiarmid_twoSided_of_hasBoundedDifferences_sharp`). The sharp `2B²` exponent
-is propagated into the high-probability Rademacher, finite-class, VC, and
-algorithmic-stability wrappers listed below, replacing the looser Azuma `8B²`
-constant on those paths. The localized-Rademacher wrapper still uses the older
-Azuma constant. The bounded-differences theorem is stated for the same product
-law in each coordinate, not for arbitrary non-iid product spaces.
-
-The PAC-Bayes layer now includes a finite Bernstein margin-proxy shell. It
-adds a supplied per-hypothesis variance proxy, a normalized Bernstein
-prior-moment certificate, fixed-`λ` finite bad-event bounds, and a
-posterior-dependent square-root-plus-linear wrapper. This is a supplied-proxy
-finite shell: it is not yet a concrete classifier-margin extractor, an
-all-real-`λ` optimization theorem, or a continuous hypothesis-space theorem.
-
-**60 `FormalSLT/` Lean files. 85 checked Lean files under `FormalSLT/` and
-`examples/`. 33,427 lines. Zero `sorry`. Zero `admit`. Zero custom axioms.**
-
-Counts are generated with `find FormalSLT -name '*.lean'`, `find FormalSLT
-examples -name '*.lean'`, and `find FormalSLT examples -name '*.lean' -print0 |
-xargs -0 wc -l`.
-
-The printed axiom profile for the v0.1 headline surface stays inside:
+FormalSLT is a compact Lean 4 library for the finite-sample statistical
+learning theory route from empirical risk minimization to VC-style
+generalization bounds, with recent extensions for contraction, linear
+predictors, finite sub-Gaussian chaining, algorithmic stability, and
+finite PAC-Bayes confidence bounds, a conditional sub-gamma MGF extractor,
+sharp bounded-differences/McDiarmid tails, and a first total-bounded finite-net
+bridge for the Dudley lane. The PAC-Bayes layer now also includes a finite iid
+Bernstein classifier-margin theorem for thresholded margin losses.
+
+> **Repository names.** The public home of this project is
+> [`Robby955/FormalSLT`](https://github.com/Robby955/FormalSLT); the same code
+> also lives in the development repo `Robby955/lean-statistical-learning`. Clone
+> instructions and badges below use the public `FormalSLT` name.
+
+<!-- STATS:BEGIN (auto-generated by scripts/repo-stats.sh — run it to refresh, do not edit by hand) -->
+**61 Lean modules. 32,558 lines of Lean. Zero `sorry`. Zero `admit`. Zero custom axioms.**
+<!-- STATS:END -->
+
+Axioms used by the public theorem spine:
 `[propext, Classical.choice, Quot.sound]`.
 
 ![FormalSLT theorem chain](./docs/theorem-chain.svg)
 
-## What v0.1 Proves
-
-| Surface | Main checked endpoint | Checker |
-|---|---|---|
-| Finite-class Hoeffding confidence sequence | `FiniteClassConfidenceSequence.failure_probability_le` | `examples/CheckUniformConvergence.lean` |
-| Unit-interval finite-net Dudley bridge | `unitIntervalRademacherLinearSup_roundedDyadicGrid_dudley_m_bound_prefixFree` | `examples/CheckUnitIntervalDudley.lean` |
-| Packaged finite dyadic Dudley API | `FiniteDyadicDudleyInstance.suppliedSup_dudley_bound` | `examples/CheckV01Usability.lean` |
-| Two-point dyadic Dudley instance | `twoPointDudleyInstance` | `examples/CheckTwoPointDudley.lean` |
-| `Fin n` discrete dyadic Dudley instance | `finDiscreteDudleyInstance` | `examples/CheckFiniteDiscreteDudley.lean` |
-| Two-sided sharp McDiarmid over a homogeneous product measure | `mcdiarmid_twoSided_of_hasBoundedDifferences_sharp` | `FormalSLT/Test/SharpMcDiarmidTest.lean` |
-| PAC-Bayes Bernstein supplied margin-proxy shell | `finitePACBayesBernsteinMargin_badEventMass_le_delta` | `examples/CheckPACBayesBernstein.lean` |
-
-The confidence-sequence chain makes the finite-class and countable-time union
-bound explicit. The Dudley chain connects finite sub-Gaussian chaining to a
-non-finite metric index space by constructing rounded dyadic finite nets and
-checking the supplied supremum for the concrete process
-`X(b,t) = signOfBool b * t`.
-
-## Fast Verification
-
-From the repository root:
-
-```bash
-lake exe cache get
-lake build FormalSLT
-lake env lean examples/CheckV01Usability.lean
-lake env lean examples/CheckUniformConvergence.lean
-lake env lean examples/CheckUnitIntervalDudley.lean
-lake env lean examples/CheckTwoPointDudley.lean
-lake env lean examples/CheckFiniteDiscreteDudley.lean
-lake env lean examples/CheckPACBayesBernstein.lean
-python3 scripts/generate_proof_frontier_manifest.py --check
-```
-
-If `lake` is not on the shell path, use `~/.elan/bin/lake`.
-
-## What v0.1 Does Not Prove
-
-- No full continuous Dudley entropy-integral theorem.
-- No arbitrary measurable-supremum construction over non-finite classes.
-- No general separability theorem.
-- No infinite-class confidence sequence.
-- No martingale or e-process stopping theorem in the v0.1 confidence-sequence
-  surface.
-- No neural-network generalization theorem.
+The core path runs from ERM through Rademacher symmetrization,
+high-probability Rademacher bounds, Massart, Sauer-Shelah, the binary VC
+bridge, finite contraction, linear predictors, finite sub-Gaussian chaining,
+finite Dudley entropy-budget wrappers, finite algorithmic stability, finite
+localized-Rademacher scaffolding, sharp McDiarmid product-measure concentration,
+finite PAC-Bayes KL/DV/MGF, bounded-loss confidence bounds, finite iid
+Bernstein classifier-margin adapters, and total-bounded finite-net adapters for
+the next Dudley steps.
 
 ## Where to start
 
-- **For the v0.1 proof surface:** start with
-  [FormalSLT v0.1 quickstart](./docs/formalslt-v0.1-quickstart.md).
-- **For the v0.1 technical note:** read
-  [FormalSLT v0.1 technical note](./docs/formalslt-v0.1-technical-note.md).
-- **For TheoremPath packaging:** read
-  [TheoremPath FormalSLT v0.1 draft](./docs/theorempath-formalslt-v0.1-page-draft.mdx).
-- **For the live walkthrough:** see
-  [theorempath.com/theorems/formalslt-v0-1](https://theorempath.com/theorems/formalslt-v0-1).
 - **For ML readers:** start with [How to read the proofs](./docs/how-to-read-the-proofs.md),
   then [Intuition](./docs/intuition.md).
 - **For proof structure:** see [Diagrams](./docs/diagrams.md).
 - **For exact theorem names:** use [Theorem map](./docs/theorem-map.md).
-- **For the conditional sub-Gamma extractor:** see
-  [Conditional Sub-Gamma Extractor](./docs/subgamma-extractor.md).
-- **For the non-finite unit-interval Dudley example:** see
-  [Unit-Interval Dudley Example](./docs/unit-interval-dudley.md).
-- **For reusable dyadic Dudley API packaging:** see
-  [FormalSLT v0.1 quickstart](./docs/formalslt-v0.1-quickstart.md).
-- **For the finite discrete dyadic-net family:** see
-  [FormalSLT v0.1 quickstart](./docs/formalslt-v0.1-quickstart.md).
-- **For a generated proof-surface index:** see
-  [Proof frontier manifest](./docs/proof-frontier.md).
 - **For scope and assumptions:** read
-  [Scope and assumptions](./docs/assumptions-and-nonclaims.md).
+  [Assumptions and current boundaries](./docs/assumptions-and-nonclaims.md).
 - **For contributors:** read [Contributing](./CONTRIBUTING.md), then
   [Good first issues](./docs/good-first-issues.md).
 - **For related Lean projects:** see [Related work](./docs/related-work.md).
-  FormalSLT is scoped as a finite-class theorem spine and is complementary to
-  existing empirical-process and Rademacher-generalization formalizations.
-
-## Library Map
-
-FormalSLT records finite-sample statistical learning bounds as Lean theorems
-with explicit hypotheses and constants. Beyond the v0.1 headline chains, the
-library contains checked finite routes for:
-
-- finite-class ERM, Rademacher symmetrization, Massart, Sauer-Shelah, and
-  VC-style sample-complexity wrappers;
-- high-probability Rademacher and VC sample-complexity bounds carrying the
-  sharp McDiarmid `2B²` exponent;
-- the sharp bounded-differences/McDiarmid concentration module
-  (`Concentration.SharpMcDiarmid`): one-sided, lower-tail, and two-sided
-  homogeneous product-measure tails, plus the additive independent special case;
-- finite contraction and linear-predictor Rademacher bounds;
-- finite Bennett/Bernstein and localized-Rademacher scaffolding;
-- conditional sub-Gamma MGF extraction for bounded, conditionally centered
-  increments with an explicit conditional second-moment proxy;
-- finite covering, finite sub-Gaussian chaining, and finite Dudley
-  entropy-budget wrappers;
-- finite algorithmic stability expected-gap and high-probability wrappers;
-- finite PAC-Bayes KL/DV/MGF, bounded-loss confidence bounds, and finite-grid
-  peeling wrappers;
-- finite PAC-Bayes Bernstein margin-proxy bounds with a supplied
-  per-hypothesis variance proxy.
+  FormalSLT is complementary to
+  [YuanheZ/lean-stat-learning-theory](https://github.com/YuanheZ/lean-stat-learning-theory),
+  which builds deeper empirical-process infrastructure; this repo focuses on a
+  readable finite-class theorem spine.
+
+## Why this matters
+
+Finite-sample proofs in statistical learning theory carry assumptions that are
+easy to blur in prose. FormalSLT turns a readable finite-sample route into
+machine-checked Lean statements, so each bound carries its hypotheses,
+constants, and finite-class scope in the theorem signature. The motivations:
+
+- **Reproducibility.** Every constant — the sharp McDiarmid exponent, the
+  `2 * E[Rad]` factor, the `(en/d)^d` Sauer-Shelah closed form — is a Lean
+  term that the kernel re-checks on every build. There is no implicit "without
+  loss of generality" or "up to constants".
+- **Curriculum.** A graduate student reading the README can click through to
+  the exact Lean statement for any bound they have seen on a chalkboard, with
+  the assumptions made explicit in the type signature.
+- **Frontier ML.** As learning theory increasingly feeds back into modern
+  deep-learning analyses (PAC-Bayes generalization, stability, chaining),
+  having a checked finite scaffold is the first step toward formal guarantees
+  on the methods themselves.
+
+## Theorem families
+
+| Family | Main modules | Representative result | Status |
+|---|---|---|---|
+| Finite-class ERM | `Risk`, `ERM`, `Rademacher.ERMGeneralization` | Excess risk controlled by uniform deviation | Verified |
+| Rademacher symmetrization | `GhostSample`, `Rademacher.Symmetrization` | `E[genGap] ≤ 2 * E[Rad]` | Verified |
+| High-probability Rademacher | `Azuma.*`, `Rademacher.HighProbability` | `P(genGap ≥ 2 * E[Rad] + ε) ≤ exp(-ε² n / (2B²))` | Verified |
+| Conditional sub-gamma concentration | `Concentration.SubGamma.Extractor`, `Concentration.SubGamma.BennettBound` | bounded + conditionally-centered + conditional second moment `≤ σ²` ⇒ conditional MGF `≤ exp(σ²λ²/(2(1−bλ/3)))` on `b·λ < 3` | Verified |
+| Massart finite-class bound | `Rademacher.Massart` | `Rad(H,S) ≤ B * sqrt(2 * log card(H) / n)` | Verified |
+| Binary VC route | `VC.SauerShelah`, `VC.BinaryVCBridge`, `VC.SampleComplexity` | VC-style ERM excess-risk tail and closed sample-complexity form via effective classes | Verified |
+| Finite contraction | `Rademacher.Contraction` | `Rad_S(φ ∘ F) ≤ L * Rad_S(F)` for scalar finite samples/classes | Verified |
+| Linear predictors | `Rademacher.LinearPredictor` | `Rad ≤ R * n⁻¹ * sqrt(∑ k, ‖z k‖²)` and `Rad ≤ R * B / sqrt n` | Verified |
+| Localized Rademacher scaffold | `Rademacher.Localized` | Bernstein excess-risk localization embeds into second-moment localized empirical Rademacher complexity | Verified finite scaffold |
+| Finite covering and two-scale chaining | `Covering.Rademacher`, `Covering.DudleyChaining` | ε-net peeling and two-scale finite chaining | Verified |
+| Finite sub-Gaussian chaining foundation | `Covering.FiniteSubGaussianChaining` | finite-max entropy bounds and finite Dudley-style entropy-budget sums | Verified finite infrastructure |
+| Total-bounded Dudley bridge | `Covering.TotalBoundedDudley` | totally bounded metric spaces yield dyadic finite-net schedules, projected finite-net wrappers, truncated interval-integral entropy comparisons, and supplied-supremum / finite-skeleton / pathwise-modulus / epsilonized boundary adapters | Verified bridge |
+| Algorithmic stability | `AlgorithmicStability`, `Stability.BousquetElisseeff` | bounded-differences constants, finite and product-measure expected-gap wrappers with bound `β`, bounded-loss measurability adapters, and bounded-loss Azuma-constant concentration wrappers | Verified finite scaffold |
+| PAC-Bayes finite confidence layer | `PACBayesKL`, `PACBayesMcAllester`, `PACBayesFiniteProductMGF`, `PACBayesBoundedLoss`, `PACBayesBernstein`, `PACBayesMargin` | finite KL/DV change-of-measure, bounded-loss Catoni-style bound, closed PAC-Bayes good-event payoff, fixed-budget McAllester corollary, finite-grid McAllester peeling wrapper, and finite iid Bernstein classifier-margin bad-event bounds | Verified finite layer |
 
 ## Scope and assumptions
 
@@ -165,45 +102,36 @@ The main generalization theorems are intentionally finite and explicit.
 | Hypothesis classes | Finite index types unless a theorem states a separate finite net/family |
 | Samples | Finite iid samples through product measures |
 | Losses/processes | Scalar real-valued, with boundedness or finite sub-Gaussian MGF assumptions |
-| Conditional MGF layer | Bounded, conditionally centered real increments with an explicit conditional second-moment proxy |
-| Constants | High-probability Rademacher and VC sample-complexity bounds use the sharp McDiarmid `2B²` exponent; the localized-Rademacher wrapper still cites the Azuma `8B²` constant |
-| PAC-Bayes Bernstein layer | Finite posterior/prior setting with supplied variance proxy and normalized prior-moment certificate |
-| Chaining | Finite nets/images, finite support/outcome spaces, finite entropy sums; the unit-interval example instantiates the bridge on a non-finite metric index space with explicit finite meshes |
+| Constants | Main Rademacher, VC, stability, and bounded-differences wrappers use the sharp McDiarmid route where stated; legacy Azuma lemmas remain as named infrastructure |
+| Chaining | Finite nets/images, finite support/outcome spaces, finite entropy sums |
 | Public axiom target | `[propext, Classical.choice, Quot.sound]` only |
 
-## Open work
+## Current boundaries
 
 Short version:
 
-- The main Rademacher and VC results are finite-class and finite-sample
-  theorems.
-- The sharp McDiarmid constant `exp(-2t²/∑cᵢ²)` is proved for the additive
-  independent case (`mcdiarmid_additive_independent`), for Doob martingale
-  increments with conditional sub-Gaussian MGF control
-  (`sharp_mcdiarmid_of_doob_increments`), and for a general bounded-differences
-  function over a homogeneous product measure, one-sided
-  (`mcdiarmid_of_hasBoundedDifferences_sharp`) and two-sided
-  (`mcdiarmid_twoSided_of_hasBoundedDifferences_sharp`). The high-probability
-  Rademacher and VC sample-complexity wrappers now use this sharp `2B²`
-  exponent; the localized-Rademacher wrapper still uses the older Azuma `8B²`
-  constant. The bounded-differences theorem is stated for the same product law
-  in each coordinate, not for arbitrary non-iid product spaces.
-- The chaining layer proves finite entropy-budget infrastructure and an initial
+- The main Rademacher and VC results are finite-class / finite-sample theorems.
+- The main Rademacher, VC, stability, and bounded-differences wrappers have
+  been rewired to the sharp McDiarmid route where the theorem name states that
+  sharp constant.
+- The chaining layer proves finite entropy-budget infrastructure and a first
   total-bounded finite-net extraction bridge, not the continuous Dudley
   integral.
 - PAC-Bayes includes a finite `[0,1]` bounded-loss Catoni-style confidence
   bound, a closed high-confidence good-event theorem, a fixed-budget
-  McAllester-style square-root corollary, a finite-grid peeling wrapper for
-  posterior-dependent penalties, and a finite Bernstein margin-proxy shell with
-  a supplied per-hypothesis variance proxy. Exact all-real-`λ`, concrete
-  classifier-margin extractors, infinite-hypothesis, and continuous-posterior
-  variants are not yet implemented.
+	  McAllester-style square-root corollary, a finite-grid peeling wrapper for
+	  posterior-dependent penalties, and a finite iid Bernstein classifier-margin
+	  theorem for thresholded margin losses with the prior-moment certificate
+	  derived from the iid finite product law. The Bernstein lane now also has a
+	  finite-grid `λ` wrapper for posterior-dependent penalties. Exact all-real
+	  `λ`, infinite-hypothesis, and continuous-posterior variants remain future
+	  work.
 - Algorithmic stability includes finite iid and measure-theoretic iid
   expected-gap wrappers, plus bounded-loss high-probability wrappers for
   finite measurable hypothesis interfaces.
 
 For the full scope statement, see
-[Scope and assumptions](./docs/assumptions-and-nonclaims.md).
+[Assumptions and current boundaries](./docs/assumptions-and-nonclaims.md).
 
 ## Installation
 
@@ -235,18 +163,7 @@ Run these before treating a branch as a release candidate:
 ```bash
 lake exe cache get
 lake build FormalSLT
-lake env lean examples/CheckV01Usability.lean
-lake env lean examples/CheckSubGammaExtractor.lean
-lake env lean examples/CheckUnitIntervalDudley.lean
-lake env lean examples/CheckTwoPointDudley.lean
-lake env lean examples/CheckFiniteDiscreteDudley.lean
-lake env lean examples/CheckPACBayesBernstein.lean
-python3 scripts/generate_proof_frontier_manifest.py --check
-python3 scripts/check_doc_anchors.py \
-  docs/formalslt-v0.1-technical-note.md \
-  docs/formalslt-v0.1-artifact-map-2026-06-01.md \
-  docs/formalslt-v0.1-release-review-2026-06-01.md \
-  docs/theorempath-formalslt-v0.1-page-draft.mdx
+lake env lean examples/CheckShowcaseTheorems.lean
 ```
 
 ## Audit commands
@@ -257,35 +174,16 @@ audits:
 ```bash
 rg -n --pcre2 '^\s*(?:by\s+)?(?:sorry|admit)\b|:=\s*(?:by\s+)?(?:sorry|admit)\b' FormalSLT examples
 rg -n --pcre2 '^\s*(?:axiom|constant)\s+[A-Za-z_]' FormalSLT examples
-python3 scripts/generate_proof_frontier_manifest.py --check
-python3 scripts/check_doc_anchors.py \
-  docs/formalslt-v0.1-technical-note.md \
-  docs/formalslt-v0.1-artifact-map-2026-06-01.md \
-  docs/formalslt-v0.1-release-review-2026-06-01.md \
-  docs/theorempath-formalslt-v0.1-page-draft.mdx
 git diff --check
 ```
 
 The expected result is:
 
 - `lake build FormalSLT` exits successfully;
-- `examples/CheckV01Usability.lean` resolves the v0.1 citation targets and
-  prints standard Lean/Mathlib axioms for the bundled confidence-sequence API,
-  the dyadic-net sequence API, and the concrete dyadic-net instances;
 - `examples/CheckShowcaseTheorems.lean` prints standard Lean/Mathlib axioms
   for selected public theorems;
-- `examples/CheckSubGammaExtractor.lean` prints standard Lean/Mathlib axioms
-  for the conditional sub-Gamma extractor and its helper lemmas;
-- `examples/CheckUnitIntervalDudley.lean` prints standard Lean/Mathlib axioms
-  for the concrete unit-interval Dudley example;
-- `examples/CheckTwoPointDudley.lean` prints standard Lean/Mathlib axioms
-  for the second dyadic-net example;
-- `examples/CheckFiniteDiscreteDudley.lean` prints standard Lean/Mathlib
-  axioms for the finite discrete embedded Rademacher dyadic-net family;
 - the `rg` commands find no executable `sorry`, no executable `admit`, and no
   custom axioms/constants in `FormalSLT` or `examples`;
-- the proof-frontier manifest is in sync with the theorem map and source counts;
-- documented `FormalSLT/...lean:line` anchors point at the named declarations;
 - `git diff --check` reports no whitespace errors.
 
 ## Module map
@@ -294,11 +192,12 @@ The expected result is:
 |---|---|
 | Core definitions | `Risk`, `ERM`, `UniformConvergence`, `GhostSample` |
 | Probability utilities | `Probability.Concentration`, `Probability.FiniteUnionBound`, `Probability.FiniteExpectation` |
-| Conditional sub-Gamma infrastructure | `Concentration.SubGamma.BennettBound`, `Concentration.SubGamma.BoundedExpIntegrable`, `Concentration.SubGamma.CondExpProduct`, `Concentration.SubGamma.CondJensen`, `Concentration.SubGamma.CondMarkov`, `Concentration.SubGamma.CondVarianceFromSquare`, `Concentration.SubGamma.Extractor` |
+| Conditional sub-gamma concentration | `Concentration.SubGamma.BennettBound`, `Concentration.SubGamma.BoundedExpIntegrable`, `Concentration.SubGamma.CondExpProduct`, `Concentration.SubGamma.CondJensen`, `Concentration.SubGamma.CondMarkov`, `Concentration.SubGamma.CondVarianceFromSquare`, `Concentration.SubGamma.Extractor` |
+| Sharp bounded-differences concentration | `Concentration.SharpMcDiarmid` |
 | Rademacher route | `Rademacher.FiniteSample`, `Rademacher.FiniteSampleSymmetrization`, `Rademacher.ProbabilityBridge`, `Rademacher.Decoupling`, `Rademacher.Symmetrization`, `Rademacher.Massart`, `Rademacher.HighProbability`, `Rademacher.FiniteClassHighProb`, `Rademacher.UniformDeviation`, `Rademacher.ERMGeneralization`, `Rademacher.Contraction`, `Rademacher.LinearPredictor`, `Rademacher.Localized` |
 | Azuma infrastructure | `Azuma.ExposureMartingale`, `Azuma.BoundedDifferences`, `Azuma.BoundedDiffMartingale`, `Azuma.BoundedDiffsAzumaInput`, `Azuma.BoundedIncrementBound`, `Azuma.HasBoundedDifferences`, `Azuma.ExposureIncrementHoeffding`, `Azuma.ExposureIncrementCondMGF`, `Azuma.GenGapTail` |
 | VC route | `VC.Dimension`, `VC.PACBridge`, `VC.SauerShelah`, `VC.Rademacher`, `VC.SampleComplexity`, `VC.BinaryVCBridge` |
-| Covering and chaining | `Covering.Rademacher`, `Covering.DudleyChaining`, `Covering.FiniteSubGaussianChaining`, `Covering.TotalBoundedDudley`, `Covering.UnitIntervalDudley`, `Covering.TwoPointDudley`, `Covering.FiniteDiscreteDudley` |
+| Covering and chaining | `Covering.Rademacher`, `Covering.DudleyChaining`, `Covering.FiniteSubGaussianChaining`, `Covering.TotalBoundedDudley` |
 | Stability and PAC-Bayes foundations | `AlgorithmicStability`, `Stability.BousquetElisseeff`, `PACBayesKL`, `PACBayesMcAllester`, `PACBayesFiniteProductMGF`, `PACBayesBoundedLoss`, `PACBayesBernstein` |
 
 ## Roadmap
@@ -308,6 +207,8 @@ The expected result is:
 - [x] Massart finite-class bound
 - [x] Azuma-Hoeffding genGap tail
 - [x] High-probability Rademacher bound
+- [x] Conditional sub-gamma MGF extractor (bounded + conditionally-centered +
+  conditional second moment ⇒ sub-gamma conditional MGF on `b·λ < 3`)
 - [x] Sauer-Shelah polynomial bound
 - [x] VC-style pointwise Rademacher
 - [x] VC uniform deviation and ERM excess-risk tail
@@ -316,8 +217,6 @@ The expected result is:
 - [x] Finite-sample scalar contraction
 - [x] Finite-dimensional linear predictor Rademacher bound
 - [x] Finite localized Rademacher/Bernstein variance-localization scaffold
-- [x] Conditional sub-Gamma MGF extractor from boundedness, conditional
-  centering, and a conditional second-moment proxy
 - [x] Covering number peeling and two-scale chaining
 - [x] Finite sub-Gaussian max and finite chaining entropy budgets
 - [x] Algorithmic stability bounded-differences scaffold
@@ -330,8 +229,10 @@ The expected result is:
 - [x] PAC-Bayes closed high-confidence generalization payoff theorem
 - [x] PAC-Bayes fixed-budget McAllester-style square-root corollary
 - [x] PAC-Bayes finite-grid McAllester peeling and optimized finite-grid wrapper
-- [x] PAC-Bayes finite Bernstein margin-proxy wrapper with supplied variance
-  proxy and normalized prior-moment certificate
+- [x] PAC-Bayes finite iid Bernstein classifier-margin confidence wrapper with
+  concrete margin losses and an iid-derived normalized prior-moment certificate
+- [x] PAC-Bayes finite-grid Bernstein `λ` wrapper for posterior-dependent
+  classifier-margin penalties
 - [x] Finite Dudley discrete entropy-bound refinements: annulus, integral-budget,
   and prefix-envelope wrappers
 - [x] Total-bounded finite-net extraction bridge for the continuous Dudley lane
@@ -360,8 +261,6 @@ The expected result is:
 - [x] Finite-cover/pathwise-modulus bridge into the separable-terminal Dudley
   boundary interface
 - [x] Finite-terminal total-bounded dyadic Dudley wrapper
-- [x] Concrete unit-interval Dudley example with explicit half/quarter meshes
-  and a supplied-supremum projected-mesh bound
 - [ ] Continuous Dudley entropy-integral theorem over total-bounded classes
 - [x] Measure-theoretic iid algorithmic stability expected bound, with explicit
   integrability assumptions
@@ -369,16 +268,13 @@ The expected result is:
   expected-gap theorem
 - [x] Bounded-loss high-probability stability wrappers for finite measurable
   hypothesis interfaces
-- [ ] PAC-Bayes concrete margin-loss Bernstein extractor, all-real-`λ`, or
-  continuous-posterior extensions
-- [x] Sharp McDiarmid constant for the additive independent case and for Doob
-  martingale increments with conditional sub-Gaussian MGF control
-- [x] Sharp McDiarmid for a general bounded-differences function over a
-  homogeneous product measure: one-sided, lower-tail, and two-sided
-  (`mcdiarmid_twoSided_of_hasBoundedDifferences_sharp`), with the sharp `2B²`
-  exponent propagated into the high-probability Rademacher and VC wrappers
-- [ ] Sharp McDiarmid over arbitrary non-iid product spaces (different law per
-  coordinate)
+- [x] PAC-Bayes iid margin-loss Bernstein prior-moment derivation
+- [x] PAC-Bayes Bernstein finite-grid `λ` extension
+- [ ] PAC-Bayes Bernstein all-real-`λ` or continuous-posterior extensions
+- [x] Sharp McDiarmid/product-kernel assembly. The sharp exposure-increment
+  conditional MGF, one-sided iid product bounded-differences tail, lower-tail
+  wrapper, and two-sided `mcdiarmid_twoSided_of_hasBoundedDifferences_sharp`
+  theorem are on the verified spine
 - [ ] Continuous Dudley-style entropy integral
 
 ## Dependencies
@@ -406,7 +302,7 @@ If you use FormalSLT in academic work, please cite:
 ```bibtex
 @software{formal_slt,
   title  = {FormalSLT: Formal Statistical Learning Theory in Lean 4},
-  author = {Sneiderman, Robert},
+  author = {Sneiderman, Robby},
   year   = {2026},
   url    = {https://github.com/Robby955/FormalSLT},
   note   = {Lean 4 formalization of finite-sample SLT bounds.}
diff --git a/docs/assumptions-and-nonclaims.md b/docs/assumptions-and-nonclaims.md
index d0f821e..17af217 100644
--- a/docs/assumptions-and-nonclaims.md
+++ b/docs/assumptions-and-nonclaims.md
@@ -1,6 +1,7 @@
-# Scope and Assumptions
+# Assumptions and Current Boundaries
 
-This page records the assumptions behind the current theorem spine.
+This page records the assumptions behind the current theorem spine and the
+current boundaries for public summaries.
 
 ## Scope
 
@@ -75,38 +76,10 @@ generic VC bridge is not yet part of the library.
 - finite multiscale chains;
 - finite entropy sums and entropy-budget wrappers.
 
-It is finite infrastructure for Dudley-style arguments. It is not the
-continuous Dudley entropy integral.
+It is a foundation for Dudley-style arguments. It is not the continuous
+Dudley entropy integral.
 
-### Unit-interval Dudley example
-
-`Covering.UnitIntervalDudley` instantiates the total-bounded finite-net bridge
-on the non-finite metric index space `[0,1]`. It constructs explicit half and
-quarter meshes, packages the Rademacher process
-`X(b,t) = signOfBool b * t`, and routes a nonzero supplied supremum through a
-projected finite-net Dudley bound with a concrete `sqrt (log 15)` entropy
-envelope.
-
-This example still uses the supplied-supremum interface. It does not construct
-arbitrary measurable suprema, prove a general separability theorem, or state
-the full continuous Dudley entropy integral.
-
-### Conditional sub-Gamma extraction
-
-`Concentration.SubGamma.Extractor` proves a conditional MGF bound for a
-single bounded real increment. The headline theorem
-`condSubGammaMGF_of_bounded_centered_condVariance` assumes:
-
-- an a.s. bound `|X| ≤ b`;
-- conditional centering `μ[X | m] = 0`;
-- a conditional second-moment proxy `μ[X² | m] ≤ σ²`;
-- the parameter regime `0 ≤ λ` and `b * λ < 3`.
-
-The theorem converts those assumptions into a conditional sub-Gamma MGF bound.
-It does not by itself construct a filtration, prove independence, or state a
-full sequential concentration inequality.
-
-## Open Work
+## Current Boundaries
 
 ### VC-to-PAC equivalence
 
@@ -121,11 +94,6 @@ general infinite classes requires covering-number APIs, measurable suprema,
 separability assumptions, and approximation arguments that are outside the
 current theorem spine.
 
-The unit-interval Dudley example is the current concrete bridge beyond a
-finite ambient index type. It verifies finite-net machinery on `[0,1]`, but it
-does not remove the remaining measurable-supremum and separability obligations
-for arbitrary non-finite classes.
-
 ### Downstream sharp-tail propagation
 
 The product-measure sharp McDiarmid theorem is checked, and the main
@@ -154,10 +122,13 @@ posterior-dependent penalties certified by that finite grid. The closed
 total iid product mass to state the finite high-confidence good event directly.
 `PACBayesBernstein` adds a finite Bernstein margin-proxy shell: the variance
 proxy is supplied per hypothesis, and the theorem consumes a normalized
-Bernstein prior-moment certificate. It does not yet derive that variance proxy
-from a concrete classifier-margin loss. Exact all-real `λ`, finite-grid
-Bernstein optimization, infinite-hypothesis, and continuous-posterior
-PAC-Bayes theorems are not yet implemented.
+Bernstein prior-moment certificate. `PACBayesMargin` specializes that shell to
+thresholded classifier-margin losses, proves the iid finite classifier-margin
+MGF budget, derives the normalized Bernstein prior-moment certificate, and
+packages the fixed-`lambda` iid bad-event theorem with the sample-size-scaled
+variance proxy. Exact all-real `λ`, finite-grid Bernstein optimization,
+infinite-hypothesis, and continuous-posterior PAC-Bayes theorems remain future
+work.
 
 ### Algorithmic stability expected bound
 
diff --git a/docs/diagrams.md b/docs/diagrams.md
index 264c04b..79b4013 100644
--- a/docs/diagrams.md
+++ b/docs/diagrams.md
@@ -186,8 +186,8 @@ flowchart TD
 The PAC-Bayes lane runs from the KL/Donsker-Varadhan change of measure through
 a finite Catoni-style bound to a finite-grid McAllester peeling wrapper for
 posterior-dependent penalties. The Bernstein lane adds a finite margin-proxy
-wrapper with a supplied per-hypothesis variance proxy and a normalized
-prior-moment certificate.
+wrapper, then discharges the normalized prior-moment certificate for iid finite
+classifier-margin losses, and then adds a finite-grid Bernstein `lambda` wrapper.
 
 ```mermaid
 flowchart TD
@@ -198,18 +198,20 @@ flowchart TD
     fixed["Fixed-budget McAllester square-root corollary"]
     peeling["<b>Finite-grid McAllester peeling</b><br/>posterior-dependent penalty"]
     optimized["Optimized finite-grid wrapper"]
-    bernstein["Finite Bernstein margin-proxy shell<br/>supplied variance proxy"]
-    future["Next: all-real-λ / continuous-posterior extensions<br/>(open)"]
-
-    kl --> mgf --> catoni --> payoff --> fixed --> peeling --> optimized
-    kl --> bernstein
-    bernstein -.-> future
-    optimized -.-> future
-
-    classDef verified fill:#f0fdf4,stroke:#16a34a,color:#14532d;
-    classDef future fill:#f8fafc,stroke:#94a3b8,color:#475569,stroke-dasharray: 5 4;
-    class kl,mgf,catoni,payoff,fixed,peeling,optimized,bernstein verified;
-    class future future;
+	    bernstein["Finite Bernstein margin-proxy shell<br/>supplied variance proxy"]
+	    margin["Iid classifier-margin MGF<br/>fixed-lambda bad-event theorem"]
+	    bernsteinGrid["Finite-grid Bernstein λ wrapper<br/>posterior-dependent penalty"]
+	    future["Next: all-real-λ / continuous-posterior extensions<br/>(open)"]
+
+	    kl --> mgf --> catoni --> payoff --> fixed --> peeling --> optimized
+	    kl --> bernstein --> margin --> bernsteinGrid
+	    bernsteinGrid -.-> future
+	    optimized -.-> future
+
+	    classDef verified fill:#f0fdf4,stroke:#16a34a,color:#14532d;
+	    classDef future fill:#f8fafc,stroke:#94a3b8,color:#475569,stroke-dasharray: 5 4;
+	    class kl,mgf,catoni,payoff,fixed,peeling,optimized,bernstein,margin,bernsteinGrid verified;
+	    class future future;
 ```
 
 ## Where each definition first appears
diff --git a/docs/proof-frontier-manifest.json b/docs/proof-frontier-manifest.json
index 0785093..a6e04e1 100644
--- a/docs/proof-frontier-manifest.json
+++ b/docs/proof-frontier-manifest.json
@@ -67,11 +67,11 @@
   },
   "schema": "FormalSLT.proof_frontier.v1",
   "source_counts": {
-    "example_lean_files": 25,
-    "library_and_example_lean_lines": 33427,
-    "library_lean_lines": 31533,
-    "library_modules": 60,
-    "theorem_map_entries": 412
+    "example_lean_files": 26,
+    "library_and_example_lean_lines": 34536,
+    "library_lean_lines": 32558,
+    "library_modules": 61,
+    "theorem_map_entries": 426
   },
   "theorem_families": [
     {
@@ -2437,6 +2437,12 @@
           "name": "finiteProduct_mgf_empiricalRiskDeviation_le_of_single",
           "summary": "Single-coordinate MGF budget lifts to the finite sample-average MGF"
         },
+        {
+          "kind": "theorem",
+          "module": "PACBayesFiniteProductMGF",
+          "name": "finiteProduct_mgf_empiricalRiskDeviation_le_exp_of_single",
+          "summary": "Single-coordinate exponential budget lifts to an explicit finite product MGF budget"
+        },
         {
           "kind": "theorem",
           "module": "PACBayesFiniteProductMGF",
@@ -2527,6 +2533,12 @@
           "name": "priorBernsteinExpMoment",
           "summary": "Normalized Bernstein prior exponential moment with variance and scale terms"
         },
+        {
+          "kind": "theorem",
+          "module": "PACBayesBernstein",
+          "name": "expectedPriorBernsteinExpMoment_le_one_of_mgf_bound",
+          "summary": "Expected normalized prior moment is at most one when each hypothesis has the corresponding Bernstein MGF budget"
+        },
         {
           "kind": "theorem",
           "module": "PACBayesBernstein",
@@ -2550,6 +2562,78 @@
           "module": "PACBayesBernstein",
           "name": "finitePACBayesBernsteinMargin_badEventMass_le_delta",
           "summary": "Finite supplied margin-proxy wrapper with sqrt(2 * V\u03c1 * C\u03c1) + scale * C\u03c1 penalty form"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginLoss",
+          "summary": "Thresholded finite classifier-margin loss in {0,1}"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginPopulationRisk",
+          "summary": "Concrete population risk for thresholded classifier-margin loss"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginEmpiricalRisk",
+          "summary": "Concrete empirical risk for thresholded classifier-margin loss"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginVarianceProxy",
+          "summary": "Concrete risk-as-variance proxy for indicator classifier-margin loss"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginSampleVarianceProxy",
+          "summary": "Sample-size-scaled variance proxy for iid classifier-margin empirical-risk deviations"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "classifierMarginVariance_le_risk",
+          "summary": "Indicator margin-loss centered second moment is bounded by its risk proxy"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "oneCoordinate_classifierMarginLoss_mgf_subgamma",
+          "summary": "One-coordinate finite Bennett/sub-Gamma MGF bound for thresholded classifier-margin loss"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "finiteProduct_classifierMarginLoss_mgf_subgamma",
+          "summary": "Iid product MGF bound for classifier-margin empirical-risk deviations"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one",
+          "summary": "Iid classifier-margin losses discharge the normalized Bernstein prior-moment certificate"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "finiteProductSampleWeight_isPMF",
+          "summary": "Iid finite product sample weights form a PMF when the base mass is a PMF"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "finitePACBayesClassifierMarginBernstein_badEventMass_le_delta",
+          "summary": "PAC-Bayes Bernstein bad-event theorem specialized to finite classifier-margin losses under the supplied prior-moment certificate"
+        },
+        {
+          "kind": "theorem",
+          "module": "PACBayesMargin",
+          "name": "finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta",
+          "summary": "End-to-end fixed-lambda iid PAC-Bayes Bernstein bad-event theorem for finite classifier-margin losses"
         }
       ],
       "name": "Stability and PAC-Bayes foundations"
diff --git a/docs/roadmap.md b/docs/roadmap.md
index f1dc628..4a0616f 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -231,6 +231,9 @@
     proxy.~~ ✓ (`PACBayesBernstein.posteriorMarginVarianceProxy`)
   - ~~Step 2: define the normalized Bernstein prior exponential moment with
     variance and scale terms.~~ ✓ (`PACBayesBernstein.priorBernsteinExpMoment`)
+  - ~~Step 2.5: lift per-hypothesis Bernstein MGF budgets to the expected
+    normalized prior moment.~~ ✓
+    (`PACBayesBernstein.expectedPriorBernsteinExpMoment_le_one_of_mgf_bound`)
   - ~~Step 3: prove the deterministic fixed-sample PAC-Bayes Bernstein
     adapter from a prior-moment certificate.~~ ✓
     (`PACBayesBernstein.posteriorGeneralizationGap_le_bernstein_of_priorBernsteinExpMoment_le`)
@@ -238,10 +241,19 @@
     posterior-dependent margin-style penalties under explicit complexity and
     penalty certificates.~~ ✓
     (`PACBayesBernstein.finitePACBayesBernsteinMargin_badEventMass_le_delta`)
-  - Remaining PAC-Bayes Bernstein extensions: concrete classifier-margin
-    variance extractors, finite-grid `lambda` optimization, exact all-real
-    `lambda` optimization, continuous posteriors, and infinite hypothesis
-    classes.
+  - ~~Step 5: specialize the finite Bernstein shell to concrete thresholded
+    classifier-margin losses and the risk-as-variance proxy.~~ ✓
+    (`PACBayesMargin.finitePACBayesClassifierMarginBernstein_badEventMass_le_delta`)
+	  - ~~Step 6: derive the normalized Bernstein prior-moment certificate from iid
+	    finite classifier-margin losses and package the fixed-`lambda` iid
+	    bad-event theorem.~~ ✓
+	    (`PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta`)
+	  - ~~Step 7: add finite-grid `lambda` optimization wrappers for Bernstein
+	    posterior-dependent penalties.~~ ✓
+	    (`PACBayesBernstein.finitePACBayesBernsteinGridOptimized_badEventMass_le_delta`,
+	    `PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_delta`)
+	  - Remaining PAC-Bayes Bernstein extensions: exact all-real `lambda`
+	    optimization, continuous posteriors, and infinite hypothesis classes.
 
 ### Long-term
 
diff --git a/docs/theorem-chain.svg b/docs/theorem-chain.svg
index 2c97431..97d4a03 100644
--- a/docs/theorem-chain.svg
+++ b/docs/theorem-chain.svg
@@ -1,6 +1,6 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1100 900" role="img" aria-labelledby="title desc">
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1100 1080" role="img" aria-labelledby="title desc">
   <title id="title">FormalSLT theorem chain</title>
-  <desc id="desc">A dependency diagram for the finite statistical learning theory spine in Lean.</desc>
+  <desc id="desc">A dependency diagram for the finite statistical learning theory spine in Lean, with the recent conditional sub-gamma concentration toolkit, iid PAC-Bayes Bernstein classifier-margin theorem, and total-bounded Dudley boundary adapters.</desc>
   <defs>
     <marker id="arrow-blue" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
       <path d="M0,0 L10,4 L0,8 Z" fill="#2563eb"/>
@@ -8,6 +8,9 @@
     <marker id="arrow-green" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto">
       <path d="M0,0 L10,4 L0,8 Z" fill="#059669"/>
     </marker>
+    <marker id="arrow-soft" markerWidth="9" markerHeight="7" refX="8" refY="3.5" orient="auto">
+      <path d="M0,0 L9,3.5 L0,7 Z" fill="#94a3b8"/>
+    </marker>
     <style>
       text { font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; }
       .bg { fill: #f8fafc; }
@@ -15,6 +18,7 @@
       .node { fill: #eef6ff; stroke: #2563eb; stroke-width: 1.6; rx: 10; }
       .node-green { fill: #ecfdf5; stroke: #059669; stroke-width: 1.6; rx: 10; }
       .node-gold { fill: #fffbeb; stroke: #d97706; stroke-width: 1.6; rx: 10; }
+      .node-violet { fill: #f5f3ff; stroke: #7c3aed; stroke-width: 1.6; rx: 10; }
       .node-gray { fill: #f8fafc; stroke: #94a3b8; stroke-width: 1.4; rx: 10; stroke-dasharray: 5 4; }
       .title { fill: #0f172a; font-size: 24px; font-weight: 760; }
       .subtitle { fill: #475569; font-size: 13px; }
@@ -24,16 +28,16 @@
       .tiny { fill: #64748b; font-size: 10px; }
       .arrow-blue { stroke: #2563eb; stroke-width: 2; fill: none; marker-end: url(#arrow-blue); }
       .arrow-green { stroke: #059669; stroke-width: 2; fill: none; marker-end: url(#arrow-green); }
-      .arrow-soft { stroke: #94a3b8; stroke-width: 1.6; fill: none; stroke-dasharray: 5 4; }
+      .arrow-soft { stroke: #94a3b8; stroke-width: 1.6; fill: none; stroke-dasharray: 5 4; marker-end: url(#arrow-soft); }
     </style>
   </defs>
 
-  <rect width="1100" height="900" class="bg"/>
+  <rect width="1100" height="1080" class="bg"/>
 
   <text x="550" y="40" text-anchor="middle" class="title">FormalSLT theorem chain</text>
-  <text x="550" y="62" text-anchor="middle" class="subtitle">
-    60 FormalSLT Lean files · 85 checked Lean files · zero sorry · zero admit · axioms: propext, Classical.choice, Quot.sound
-  </text>
+  <text x="550" y="62" text-anchor="middle" class="subtitle"><!-- STATS:BEGIN (auto-generated by scripts/repo-stats.sh — run it to refresh, do not edit by hand) -->
+61 Lean modules · 32,558 lines · zero sorry · zero admit · axioms: propext, Classical.choice, Quot.sound
+<!-- STATS:END --></text>
 
   <rect x="38" y="92" width="675" height="630" class="panel"/>
   <text x="70" y="126" class="section">Core finite Rademacher and VC spine</text>
@@ -56,8 +60,8 @@
   <path d="M201 210 C201 232, 256 246, 258 270" class="arrow-blue"/>
 
   <rect x="86" y="340" width="230" height="58" class="node"/>
-  <text x="201" y="365" text-anchor="middle" class="label">Sharp McDiarmid genGap tail</text>
-  <text x="201" y="382" text-anchor="middle" class="small">exp(-ε²n / 2B²)</text>
+  <text x="201" y="365" text-anchor="middle" class="label">Azuma tail</text>
+  <text x="201" y="382" text-anchor="middle" class="small">exp(-ε²n / 8B²)</text>
 
   <rect x="430" y="340" width="230" height="58" class="node"/>
   <text x="545" y="365" text-anchor="middle" class="label">Massart finite-class</text>
@@ -117,8 +121,8 @@
   <text x="904" y="664" text-anchor="middle" class="small">dyadic finite entropy wrappers</text>
 
   <rect x="785" y="716" width="237" height="60" class="node-green"/>
-  <text x="904" y="741" text-anchor="middle" class="label">PAC-Bayes finite confidence</text>
-  <text x="904" y="758" text-anchor="middle" class="small">Catoni, McAllester, Bernstein</text>
+  <text x="904" y="741" text-anchor="middle" class="label">PAC-Bayes finite layers</text>
+  <text x="904" y="758" text-anchor="middle" class="small">Catoni, McAllester, iid Bernstein</text>
 
   <path d="M660 181 C700 181, 742 181, 785 181" class="arrow-green"/>
   <path d="M660 181 C708 205, 742 246, 785 270" class="arrow-green"/>
@@ -126,32 +130,40 @@
   <path d="M904 400 C904 412, 904 421, 904 434" class="arrow-green"/>
   <path d="M904 494 C904 506, 904 515, 904 528" class="arrow-green"/>
   <path d="M904 588 C904 600, 904 609, 904 622" class="arrow-green"/>
-  <path d="M904 682 C904 694, 904 703, 904 716" class="arrow-green"/>
 
   <rect x="775" y="802" width="258" height="1" fill="#dbe3ef"/>
   <text x="775" y="822" class="tiny">Finite chaining is a finite entropy-budget layer.</text>
   <text x="775" y="838" class="tiny">Continuous Dudley is a separate theorem target.</text>
 
-  <rect x="38" y="738" width="675" height="150" class="panel"/>
-  <text x="70" y="768" class="section">Sharp McDiarmid bounded-differences (homogeneous product measure)</text>
-
-  <rect x="58" y="788" width="200" height="64" class="node-green"/>
-  <text x="158" y="812" text-anchor="middle" class="label">One-sided sharp tail</text>
-  <text x="158" y="830" text-anchor="middle" class="small">exp(-2ε² / Σ cₖ²)</text>
-  <text x="158" y="845" text-anchor="middle" class="tiny">bounded differences, product μ</text>
-
-  <rect x="278" y="788" width="200" height="64" class="node-green"/>
-  <text x="378" y="812" text-anchor="middle" class="label">Lower-tail wrapper</text>
-  <text x="378" y="830" text-anchor="middle" class="small">apply to -f, same widths</text>
-  <text x="378" y="845" text-anchor="middle" class="tiny">exp(-2ε² / Σ cₖ²)</text>
-
-  <rect x="498" y="788" width="196" height="64" class="node-green"/>
-  <text x="596" y="812" text-anchor="middle" class="label">Two-sided wrapper</text>
-  <text x="596" y="830" text-anchor="middle" class="small">2 · exp(-2ε² / Σ cₖ²)</text>
-  <text x="596" y="845" text-anchor="middle" class="tiny">union of upper and lower</text>
-
-  <path d="M258 820 C268 820, 270 820, 278 820" class="arrow-green"/>
-  <path d="M478 820 C488 820, 490 820, 498 820" class="arrow-green"/>
-  <path d="M158 788 C150 600, 150 470, 195 398" class="arrow-soft"/>
-  <text x="58" y="874" class="tiny">PAC-Bayes now includes a finite Bernstein margin-proxy shell with supplied variance proxy.</text>
+  <!-- Recent additions band (since 2026-05-09) -->
+  <rect x="38" y="872" width="1024" height="176" class="panel"/>
+  <text x="70" y="902" class="section">Recent additions — conditional concentration toolkit and Dudley boundary lane</text>
+
+  <rect x="58" y="916" width="210" height="64" class="node-violet"/>
+  <text x="163" y="941" text-anchor="middle" class="label">Sub-gamma MGF extractor</text>
+  <text x="163" y="958" text-anchor="middle" class="small">conditional, from bounded +</text>
+  <text x="163" y="972" text-anchor="middle" class="small">cond. variance, on b·λ &lt; 3</text>
+
+  <rect x="316" y="916" width="210" height="64" class="node-green"/>
+  <text x="421" y="941" text-anchor="middle" class="label">Total-bounded finite-net</text>
+  <text x="421" y="958" text-anchor="middle" class="small">dyadic finite-net schedules,</text>
+  <text x="421" y="972" text-anchor="middle" class="small">projected-sup wrappers</text>
+
+  <rect x="574" y="916" width="210" height="64" class="node-green"/>
+  <text x="679" y="941" text-anchor="middle" class="label">PAC-Bayes finite confidence</text>
+  <text x="679" y="958" text-anchor="middle" class="small">finite-grid peeling plus</text>
+  <text x="679" y="972" text-anchor="middle" class="small">iid Bernstein margin theorem</text>
+
+  <rect x="832" y="916" width="210" height="64" class="node-green"/>
+  <text x="937" y="941" text-anchor="middle" class="label">Dudley boundary adapters</text>
+  <text x="937" y="958" text-anchor="middle" class="small">separable-terminal · supplied-sup</text>
+  <text x="937" y="972" text-anchor="middle" class="small">· pathwise-modulus · ε-elimination</text>
+
+  <path d="M201 398 C150 600, 150 770, 163 916" class="arrow-soft"/>
+  <path d="M904 682 C922 760, 932 840, 937 916" class="arrow-soft"/>
+  <path d="M904 776 C820 840, 740 884, 679 916" class="arrow-soft"/>
+
+  <rect x="58" y="1002" width="984" height="1" fill="#dbe3ef"/>
+  <text x="58" y="1022" class="tiny">Recent lane (since 2026-05-09): conditional sub-gamma toolkit, iid PAC-Bayes Bernstein classifier-margin theorem, and total-bounded Dudley boundary adapters, all verified and axiom-clean.</text>
+  <text x="58" y="1038" class="tiny">Sharp McDiarmid now includes the iid product-kernel route, lower tail, and two-sided bounded-differences theorem.</text>
 </svg>
diff --git a/docs/theorem-map.md b/docs/theorem-map.md
index 9129dfe..a7a6693 100644
--- a/docs/theorem-map.md
+++ b/docs/theorem-map.md
@@ -442,6 +442,7 @@ declarations; modules are relative to `FormalSLT`.
 | `finiteEmpiricalRisk` | `PACBayesFiniteProductMGF` | Finite empirical risk for a real-valued loss |
 | `finiteProduct_mgf_empiricalRiskDeviation_eq_pow` | `PACBayesFiniteProductMGF` | Exact iid product factorization of `E exp(lam * (R_i - Rhat_i))` |
 | `finiteProduct_mgf_empiricalRiskDeviation_le_of_single` | `PACBayesFiniteProductMGF` | Single-coordinate MGF budget lifts to the finite sample-average MGF |
+| `finiteProduct_mgf_empiricalRiskDeviation_le_exp_of_single` | `PACBayesFiniteProductMGF` | Single-coordinate exponential budget lifts to an explicit finite product MGF budget |
 | `finitePriorAveraged_mgf_empiricalRiskDeviation_le` | `PACBayesFiniteProductMGF` | Prior-averaged finite iid empirical-risk-deviation MGF bound |
 | `oneCoordinate_boundedLoss_mgf` | `PACBayesBoundedLoss` | `[0,1]` bounded-loss one-coordinate MGF instantiation |
 | `sampleAverage_boundedLoss_mgf` | `PACBayesBoundedLoss` | Finite sample-average bounded-loss MGF bound |
@@ -457,10 +458,23 @@ declarations; modules are relative to `FormalSLT`.
 | `pac_bayes_generalization` | `PACBayesBoundedLoss` | Closed PAC-Bayes good-event theorem: with product-sample mass at least `1 - delta`, every posterior satisfies the Catoni-form risk bound |
 | `posteriorMarginVarianceProxy` | `PACBayesBernstein` | Posterior average of a supplied per-hypothesis margin-variance proxy |
 | `priorBernsteinExpMoment` | `PACBayesBernstein` | Normalized Bernstein prior exponential moment with variance and scale terms |
+| `expectedPriorBernsteinExpMoment_le_one_of_mgf_bound` | `PACBayesBernstein` | Expected normalized prior moment is at most one when each hypothesis has the corresponding Bernstein MGF budget |
 | `posteriorGeneralizationGap_le_bernstein_of_priorBernsteinExpMoment_le` | `PACBayesBernstein` | Deterministic fixed-sample PAC-Bayes Bernstein adapter from a prior-moment certificate |
 | `finitePACBayesBernstein_fixedLambda_badEventMass_le_delta` | `PACBayesBernstein` | Finite fixed-`lambda` PAC-Bayes Bernstein bad-event bound |
 | `finitePACBayesBernsteinPenalty_badEventMass_le_delta` | `PACBayesBernstein` | Posterior-dependent finite Bernstein bad-event wrapper under complexity and penalty certificates |
 | `finitePACBayesBernsteinMargin_badEventMass_le_delta` | `PACBayesBernstein` | Finite supplied margin-proxy wrapper with `sqrt(2 * Vρ * Cρ) + scale * Cρ` penalty form |
+| `classifierMarginLoss` | `PACBayesMargin` | Thresholded finite classifier-margin loss in `{0,1}` |
+| `classifierMarginPopulationRisk` | `PACBayesMargin` | Concrete population risk for thresholded classifier-margin loss |
+| `classifierMarginEmpiricalRisk` | `PACBayesMargin` | Concrete empirical risk for thresholded classifier-margin loss |
+| `classifierMarginVarianceProxy` | `PACBayesMargin` | Concrete risk-as-variance proxy for indicator classifier-margin loss |
+| `classifierMarginSampleVarianceProxy` | `PACBayesMargin` | Sample-size-scaled variance proxy for iid classifier-margin empirical-risk deviations |
+| `classifierMarginVariance_le_risk` | `PACBayesMargin` | Indicator margin-loss centered second moment is bounded by its risk proxy |
+| `oneCoordinate_classifierMarginLoss_mgf_subgamma` | `PACBayesMargin` | One-coordinate finite Bennett/sub-Gamma MGF bound for thresholded classifier-margin loss |
+| `finiteProduct_classifierMarginLoss_mgf_subgamma` | `PACBayesMargin` | Iid product MGF bound for classifier-margin empirical-risk deviations |
+| `expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one` | `PACBayesMargin` | Iid classifier-margin losses discharge the normalized Bernstein prior-moment certificate |
+| `finiteProductSampleWeight_isPMF` | `PACBayesMargin` | Iid finite product sample weights form a PMF when the base mass is a PMF |
+| `finitePACBayesClassifierMarginBernstein_badEventMass_le_delta` | `PACBayesMargin` | PAC-Bayes Bernstein bad-event theorem specialized to finite classifier-margin losses under the supplied prior-moment certificate |
+| `finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta` | `PACBayesMargin` | End-to-end fixed-`lambda` iid PAC-Bayes Bernstein bad-event theorem for finite classifier-margin losses |
 
 ## Conditional sub-Gamma extractor
 
diff --git a/examples/CheckPACBayesBernstein.lean b/examples/CheckPACBayesBernstein.lean
index a2ab608..4bc8a8a 100644
--- a/examples/CheckPACBayesBernstein.lean
+++ b/examples/CheckPACBayesBernstein.lean
@@ -24,3 +24,13 @@ posterior-dependent margin-style wrapper.
 
 #check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinMargin_badEventMass_le_delta
 #print axioms FormalSLT.PACBayesBernstein.finitePACBayesBernsteinMargin_badEventMass_le_delta
+
+#check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGridBadSamples
+#check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGrid_badEventMass_le_sum_delta
+#print axioms FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGrid_badEventMass_le_sum_delta
+#check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGrid_badEventMass_le_delta
+#print axioms FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGrid_badEventMass_le_delta
+#check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGridOptimized_badEventMass_le_sum_delta
+#print axioms FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGridOptimized_badEventMass_le_sum_delta
+#check FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGridOptimized_badEventMass_le_delta
+#print axioms FormalSLT.PACBayesBernstein.finitePACBayesBernsteinGridOptimized_badEventMass_le_delta
diff --git a/examples/CheckPACBayesMargin.lean b/examples/CheckPACBayesMargin.lean
new file mode 100644
index 0000000..7c35583
--- /dev/null
+++ b/examples/CheckPACBayesMargin.lean
@@ -0,0 +1,74 @@
+import FormalSLT.PACBayesMargin
+
+/-!
+# PAC-Bayes classifier-margin adapter audit
+
+Checks the concrete classifier-margin surface that specializes the finite
+PAC-Bayes Bernstein shell.
+-/
+
+namespace FormalSLT.PACBayesMargin.Check
+
+inductive ToyHyp
+  | left
+  | right
+  deriving DecidableEq, Fintype
+
+instance : Nonempty ToyHyp := ⟨ToyHyp.left⟩
+
+inductive ToyPoint
+  | neg
+  | pos
+  deriving DecidableEq, Fintype
+
+def toyLabel : ToyPoint → Bool
+  | ToyPoint.neg => false
+  | ToyPoint.pos => true
+
+def toyScore : ToyHyp → ToyPoint → ℝ
+  | ToyHyp.left, ToyPoint.neg => -1
+  | ToyHyp.left, ToyPoint.pos => 1
+  | ToyHyp.right, ToyPoint.neg => 1
+  | ToyHyp.right, ToyPoint.pos => -1
+
+#check FormalSLT.PACBayesMargin.signedBool
+#check FormalSLT.PACBayesMargin.classifierMargin
+#check FormalSLT.PACBayesMargin.classifierMarginLoss
+#check FormalSLT.PACBayesMargin.classifierMarginLoss_mem_Icc_zero_one
+#check FormalSLT.PACBayesMargin.classifierMarginPopulationRisk
+#check FormalSLT.PACBayesMargin.classifierMarginEmpiricalRisk
+#check FormalSLT.PACBayesMargin.classifierMarginEmpiricalRiskFn
+#check FormalSLT.PACBayesMargin.classifierMarginVarianceProxy
+#check FormalSLT.PACBayesMargin.classifierMarginSampleVarianceProxy
+#check FormalSLT.PACBayesMargin.posteriorClassifierMarginVarianceProxy
+#check FormalSLT.PACBayesMargin.posteriorClassifierMarginSampleVarianceProxy
+#check FormalSLT.PACBayesMargin.classifierMarginPopulationRisk_mem_Icc_zero_one
+#check FormalSLT.PACBayesMargin.classifierMarginLoss_sq
+#check FormalSLT.PACBayesMargin.classifierMarginVariance_le_risk
+#check FormalSLT.PACBayesMargin.oneCoordinate_classifierMarginLoss_mgf_subgamma
+#check FormalSLT.PACBayesMargin.finiteProduct_classifierMarginLoss_mgf_subgamma
+#check FormalSLT.PACBayesMargin.expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one
+#check FormalSLT.PACBayesMargin.finiteProductSampleWeight_isPMF
+#check FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernsteinBadSamples
+
+#check (FormalSLT.PACBayesMargin.classifierMarginLoss
+  (ι := ToyHyp) (Z := ToyPoint) 0 toyScore toyLabel ToyHyp.left ToyPoint.pos)
+
+#check (FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_badEventMass_le_delta
+  (n := 2) (Z := ToyPoint) (ι := ToyHyp))
+#print axioms FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_badEventMass_le_delta
+
+#check (FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta
+  (n := 2) (Z := ToyPoint) (ι := ToyHyp))
+#print axioms FormalSLT.PACBayesMargin.expectedPriorBernsteinExpMoment_classifierMargin_iid_le_one
+#print axioms FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_badEventMass_le_delta
+
+#check (FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_sum_delta
+  (n := 2) (Z := ToyPoint) (ι := ToyHyp) (γ := Fin 2))
+#print axioms FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_sum_delta
+
+#check (FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_delta
+  (n := 2) (Z := ToyPoint) (ι := ToyHyp) (γ := Fin 2))
+#print axioms FormalSLT.PACBayesMargin.finitePACBayesClassifierMarginBernstein_iid_gridOptimized_badEventMass_le_delta
+
+end FormalSLT.PACBayesMargin.Check
diff --git a/scripts/repo-stats.sh b/scripts/repo-stats.sh
new file mode 100755
index 0000000..f777c1a
--- /dev/null
+++ b/scripts/repo-stats.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+#
+# repo-stats.sh — single source of truth for the repo size figures the README,
+# the theorem-chain SVG, and any reviewer quote against the GitHub page.
+#
+# It recomputes the Lean module count and total Lean line count from disk using
+# exactly the commands a skeptical reader would run, writes them to
+# `.stats.json`, and stamps them into the marker-delimited STATS regions of
+# `README.md` and `docs/theorem-chain.svg` so the numbers cannot silently drift.
+#
+# Usage:
+#   scripts/repo-stats.sh           # recompute, write .stats.json, stamp files
+#   scripts/repo-stats.sh --check   # recompute and FAIL (exit 1) if anything is
+#                                   # stale; writes nothing. Use this in CI / the
+#                                   # pre-PR checklist to catch drift.
+#
+# The canonical count commands (kept identical to the audit instructions):
+#   modules: find FormalSLT -name "*.lean" | wc -l
+#   lines:   find FormalSLT -name "*.lean" -exec cat {} \; | wc -l
+set -euo pipefail
+
+# Resolve repo root from this script's location so it works from any cwd.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+cd "${ROOT}"
+
+CHECK=0
+if [[ "${1:-}" == "--check" ]]; then
+  CHECK=1
+fi
+
+MODULES="$(find FormalSLT -name '*.lean' | wc -l | tr -d ' ')"
+LINES="$(find FormalSLT -name '*.lean' -exec cat {} \; | wc -l | tr -d ' ')"
+
+README="README.md"
+SVG="docs/theorem-chain.svg"
+STATS_JSON=".stats.json"
+
+MODULES="${MODULES}" LINES="${LINES}" \
+README="${README}" SVG="${SVG}" STATS_JSON="${STATS_JSON}" CHECK="${CHECK}" \
+python3 - <<'PY'
+import os, re, sys, json
+
+modules   = os.environ["MODULES"]
+lines     = os.environ["LINES"]
+check     = os.environ["CHECK"] == "1"
+readme    = os.environ["README"]
+svg       = os.environ["SVG"]
+stats_json = os.environ["STATS_JSON"]
+
+# Thousands-separated line count for prose (e.g. 20789 -> 20,789).
+lines_human = f"{int(lines):,}"
+
+# The exact strings stamped between the STATS markers in each file.
+README_STATS = f"**{modules} Lean modules. {lines_human} lines of Lean. Zero `sorry`. Zero `admit`. Zero custom axioms.**"
+SVG_STATS    = f"{modules} Lean modules · {lines_human} lines · zero sorry · zero admit · axioms: propext, Classical.choice, Quot.sound"
+
+drift = []
+
+def restamp(path, replacement):
+    """Replace the text between STATS:BEGIN and STATS:END markers."""
+    with open(path, encoding="utf-8") as fh:
+        text = fh.read()
+    pat = re.compile(r"(<!--\s*STATS:BEGIN[^>]*-->)(.*?)(<!--\s*STATS:END\s*-->)", re.DOTALL)
+    m = pat.search(text)
+    if not m:
+        sys.stderr.write(f"error: STATS markers not found in {path}\n")
+        sys.exit(2)
+    current = m.group(2).strip()
+    want = replacement.strip()
+    if current != want:
+        drift.append(f"{path}: stale STATS region\n    have: {current}\n    want: {want}")
+    new = text[:m.start(2)] + "\n" + replacement + "\n" + text[m.end(2):]
+    if not check and new != text:
+        with open(path, "w", encoding="utf-8") as fh:
+            fh.write(new)
+
+restamp(readme, README_STATS)
+restamp(svg, SVG_STATS)
+
+# .stats.json is fully regenerated; compare semantically for --check.
+payload = {
+    "lean_modules": int(modules),
+    "lean_lines": int(lines),
+    "_note": "Generated by scripts/repo-stats.sh. Do not edit by hand.",
+}
+existing = None
+if os.path.exists(stats_json):
+    try:
+        with open(stats_json, encoding="utf-8") as fh:
+            existing = json.load(fh)
+    except Exception:
+        existing = None
+if existing != payload:
+    if existing is None:
+        drift.append(f"{stats_json}: missing or unreadable")
+    else:
+        drift.append(f"{stats_json}: stale (have {existing.get('lean_modules')}/{existing.get('lean_lines')}, want {modules}/{lines})")
+    if not check:
+        with open(stats_json, "w", encoding="utf-8") as fh:
+            json.dump(payload, fh, indent=2)
+            fh.write("\n")
+
+if check:
+    if drift:
+        sys.stderr.write("repo-stats: DRIFT DETECTED — run scripts/repo-stats.sh to fix:\n")
+        for d in drift:
+            sys.stderr.write("  - " + d + "\n")
+        sys.exit(1)
+    print(f"repo-stats: in sync ({modules} modules, {lines} lines)")
+else:
+    print(f"repo-stats: stamped {modules} modules, {lines} lines into {readme}, {svg}, {stats_json}")
+PY