diff --git a/web_demo/app.py b/web_demo/app.py
index 8100b2cd..fc4abe62 100644
--- a/web_demo/app.py
+++ b/web_demo/app.py
@@ -432,6 +432,51 @@ def cat_mode_decode_video():
         return json.dumps({"error": "Video decode failed"}), 500, {"Content-Type": "application/json"}
 
 
+def _refine_blink_period(runs_list, start_idx, end_idx, initial_bp):
+    """Refine blink period estimate using median of single-blink run lengths.
+
+    ~75% of runs in random 2-bit data are single-blink transitions. These give
+    exact bp measurements, so their median is a much more accurate bp than the
+    16-blink preamble alone.
+
+    Robustness: real (compressed / camera-captured) video produces spurious
+    short runs at ON/OFF transitions — VP9/H.264 inter-frame blur and rolling
+    shutter momentarily push a transitioning eye across the threshold. Those
+    sub-blink runs were polluting the single-blink set and dragging the median
+    well below the true period (observed: a correct 5.94-frame preamble estimate
+    collapsing to 4.0 on VP9 video, inflating the decoded bit count ~1.5×). Two
+    guards prevent that:
+      1. Only count runs inside a plausible single-blink band around the current
+         estimate, excluding the tiny transition artifacts.
+      2. Never let the refined value stray far from the preamble-derived
+         estimate, which is reliable (it spans exactly 16 known blinks).
+
+    Module-level (not a closure) so the regression guard in
+    web_demo/test_cat_mode_refine_bp.py can exercise the real implementation.
+    """
+    bp = initial_bp
+    for _ in range(3):
+        lo, hi = 0.5 * bp, 1.5 * bp  # plausible single-blink window
+        single_blink_lengths = [
+            runs_list[i][2]
+            for i in range(start_idx, end_idx)
+            if lo <= runs_list[i][2] <= hi
+        ]
+        if len(single_blink_lengths) < 10:
+            break
+        single_blink_lengths.sort()
+        new_bp = single_blink_lengths[len(single_blink_lengths) // 2]
+        # Reject refinements that drift implausibly far from the reliable
+        # preamble estimate — that only happens when artifacts dominate.
+        if not (0.7 * initial_bp <= new_bp <= 1.4 * initial_bp):
+            break
+        if abs(new_bp - bp) < 1e-6:
+            bp = float(new_bp)
+            break
+        bp = float(new_bp)
+    return float(bp)
+
+
 def _decode_cat_video(video_path):
     """
     Decode binary data from a Cat Mode video using NRZ run-length analysis.
@@ -642,46 +687,10 @@ def decode_nrz_adaptive(bp, runs_list, start_idx, end_idx, expected_bits=None):
 
         return "".join(bits * nb for _, _, nb, _, bits in adjusted)
 
-    def refine_bp(runs_list, start_idx, end_idx, initial_bp):
-        """Refine blink period estimate using median of single-blink run lengths.
-
-        ~75% of runs in random 2-bit data are single-blink transitions.
-        These give exact bp measurements. Using their median gives a much
-        more accurate bp than the 16-blink preamble alone.
-
-        Robustness: real (compressed / camera-captured) video produces spurious
-        short runs at ON/OFF transitions — VP9/H.264 inter-frame blur and rolling
-        shutter momentarily push a transitioning eye across the threshold. Those
-        sub-blink runs were polluting the single-blink set and dragging the median
-        well below the true period (observed: a correct 5.94-frame preamble
-        estimate collapsing to 4.0 on VP9 video, inflating the decoded bit count
-        ~1.5×). Two guards prevent that:
-          1. Only count runs inside a plausible single-blink band around the
-             current estimate, excluding the tiny transition artifacts.
-          2. Never let the refined value stray far from the preamble-derived
-             estimate, which is reliable (it spans exactly 16 known blinks).
-        """
-        bp = initial_bp
-        for _ in range(3):
-            lo, hi = 0.5 * bp, 1.5 * bp  # plausible single-blink window
-            single_blink_lengths = [
-                runs_list[i][2]
-                for i in range(start_idx, end_idx)
-                if lo <= runs_list[i][2] <= hi
-            ]
-            if len(single_blink_lengths) < 10:
-                break
-            single_blink_lengths.sort()
-            new_bp = single_blink_lengths[len(single_blink_lengths) // 2]
-            # Reject refinements that drift implausibly far from the reliable
-            # preamble estimate — that only happens when artifacts dominate.
-            if not (0.7 * initial_bp <= new_bp <= 1.4 * initial_bp):
-                break
-            if abs(new_bp - bp) < 1e-6:
-                bp = float(new_bp)
-                break
-            bp = float(new_bp)
-        return float(bp)
+    # refine_bp is implemented at module scope (_refine_blink_period) so it can
+    # be unit-tested directly; the artifact-collapse guard it carries is the
+    # cat-mode decode-reliability fix exercised by test_cat_mode_refine_bp.py.
+    refine_bp = _refine_blink_period
 
     def whiten(binary_str):
         """XOR binary string with deterministic PRNG to break up long same-state runs.
diff --git a/web_demo/test_cat_mode_refine_bp.py b/web_demo/test_cat_mode_refine_bp.py
new file mode 100644
index 00000000..b4d81313
--- /dev/null
+++ b/web_demo/test_cat_mode_refine_bp.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""Regression tests for cat-mode blink-period refinement on compressed video.
+
+Background
+----------
+Cat mode transmits 2 bits/frame by blinking two green eyes on screen; the
+decoder (`_decode_cat_video`) recovers the bitstream from run lengths. The blink
+period ``bp`` (in video frames) is first estimated from the 16-blink preamble,
+then sharpened by `_refine_blink_period` using the median of single-blink runs.
+
+The bug (fixed in commit 7101314, guarded here): on compressed / camera-captured
+video (VP9, H.264, rolling shutter) the ON/OFF transitions produce spurious
+1-2 frame runs. The old refinement counted those as "single blinks", dragging
+the median *below* the true period — a correct 5.94-frame estimate collapsing
+toward 4.0, which inflated the decoded bit count ~1.5x and broke every decode.
+
+`_refine_blink_period` now (1) only considers runs inside a plausible band
+around the current estimate and (2) clamps the result to the reliable
+preamble-derived estimate. These tests lock in both the unit-level property and
+the full generate -> VP9-compress -> decode round trip.
+"""
+
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from app import _refine_blink_period  # noqa: E402
+
+
+def _runs(lengths):
+    """Build a run list in the (state, start, length) shape the decoder uses."""
+    return [(("1", "0"), 0, int(length)) for length in lengths]
+
+
+# --------------------------------------------------------------------------
+# Unit tests: _refine_blink_period directly (fast, no cv2/ffmpeg needed)
+# --------------------------------------------------------------------------
+
+
+def test_clean_single_blinks_refine_to_true_period():
+    """With clean single-blink runs the median sharpens the estimate as intended."""
+    # True period 6; clean single-blink runs all ~6 frames, slightly noisy estimate.
+    runs = _runs([6] * 30)
+    assert _refine_blink_period(runs, 0, len(runs), initial_bp=5.6) == pytest.approx(6.0)
+
+
+def test_transition_artifacts_do_not_collapse_bp():
+    """The core regression: spurious short runs must NOT drag bp below the truth.
+
+    Mirrors compressed video: 35 real single-blink runs (~6 frames) polluted by
+    40 transition artifacts (1-3 frames, roughly one per blink boundary). The old
+    unfiltered median collapsed to ~2.0 on this mix; the band-filtered + clamped
+    version must stay at the true period.
+    """
+    real = [6] * 35
+    artifacts = [1, 2, 3, 2] * 10  # 40 sub-blink transition runs
+    runs = _runs(real + artifacts)
+    preamble_estimate = 5.94  # spans 16 known blinks -> reliable
+
+    refined = _refine_blink_period(runs, 0, len(runs), initial_bp=preamble_estimate)
+
+    # Must stay essentially at the true period, never collapse toward the artifacts.
+    assert refined == pytest.approx(6.0, abs=0.5)
+    # Explicit guard against the observed failure (collapse to ~4.0).
+    assert refined > 5.0
+
+
+def test_refinement_clamped_to_preamble_estimate():
+    """Even if artifacts dominate the band, the clamp prevents a wild refinement."""
+    # Pathological: many runs at ~4 frames would pull a naive median to 4.0,
+    # but that is <0.7 * 6.0, so the clamp must reject it and keep the estimate.
+    runs = _runs([4] * 50)
+    refined = _refine_blink_period(runs, 0, len(runs), initial_bp=6.0)
+    assert refined >= 0.7 * 6.0
+
+
+def test_too_few_runs_returns_initial_estimate():
+    """Below the sample threshold the function leaves the preamble estimate alone."""
+    runs = _runs([6] * 5)  # < 10 single-blink samples
+    assert _refine_blink_period(runs, 0, len(runs), initial_bp=5.9) == pytest.approx(5.9)
+
+
+# --------------------------------------------------------------------------
+# Integration test: full generate -> VP9 compress -> decode round trip.
+# Skipped automatically where cv2 / ffmpeg / the cat asset are unavailable.
+# --------------------------------------------------------------------------
+
+_HAS_CV2 = False
+try:
+    import cv2  # noqa: F401
+
+    _HAS_CV2 = True
+except Exception:  # pragma: no cover - environment dependent
+    pass
+
+_HAS_FFMPEG = shutil.which("ffmpeg") is not None
+
+
+@pytest.mark.skipif(not (_HAS_CV2 and _HAS_FFMPEG), reason="needs cv2 + ffmpeg")
+@pytest.mark.parametrize("speed_ms", [50, 100])
+def test_vp9_compressed_video_decodes_to_exact_payload(speed_ms):
+    """End-to-end: a VP9-compressed cat video decodes to the exact payload.
+
+    This is the scenario that was broken: compression-induced transition
+    artifacts collapsed bp and inflated the bitstream. The decode must recover
+    the original bits and keep bp at the true period.
+    """
+    import hashlib
+
+    import test_cat_e2e_speeds as gen
+    from app import _decode_cat_video
+
+    if not os.path.exists(gen.CAT_IMAGE_PATH):
+        pytest.skip("cat carrier image not available")
+
+    frames = gen.prepare_cat_frames()
+    raw_bits = gen.hex_to_binary(hashlib.sha256(b"").hexdigest())  # 256-bit payload
+    whitened = gen.whiten(raw_bits)
+
+    clean_path = gen.generate_video(whitened, speed_ms, frames)
+    vp9_fd, vp9_path = tempfile.mkstemp(suffix=".webm")
+    os.close(vp9_fd)
+    try:
+        subprocess.run(
+            ["ffmpeg", "-y", "-i", clean_path, "-c:v", "libvpx-vp9", "-b:v", "800k",
+             "-deadline", "realtime", "-cpu-used", "5", vp9_path],
+            check=True, capture_output=True,
+        )
+        result = _decode_cat_video(vp9_path)
+    finally:
+        os.unlink(clean_path)
+        os.unlink(vp9_path)
+
+    # _decode_cat_video returns the de-whitened payload bits.
+    assert result["binary"] == raw_bits
+    assert result["bits"] == len(raw_bits)
+
+    # bp must hold at the true period (speed_ms at 60fps), not collapse.
+    expected_bp = round(speed_ms / 1000.0 * gen.VIDEO_FPS)
+    assert result["diagnostics"]["bp_refined"] == pytest.approx(expected_bp, abs=0.5)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-v"]))