Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 49 additions & 40 deletions web_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,51 @@ def cat_mode_decode_video():
return json.dumps({"error": "Video decode failed"}), 500, {"Content-Type": "application/json"}


def _refine_blink_period(runs_list, start_idx, end_idx, initial_bp):
"""Refine blink period estimate using median of single-blink run lengths.

~75% of runs in random 2-bit data are single-blink transitions. These give
exact bp measurements, so their median is a much more accurate bp than the
16-blink preamble alone.

Robustness: real (compressed / camera-captured) video produces spurious
short runs at ON/OFF transitions — VP9/H.264 inter-frame blur and rolling
shutter momentarily push a transitioning eye across the threshold. Those
sub-blink runs were polluting the single-blink set and dragging the median
well below the true period (observed: a correct 5.94-frame preamble estimate
collapsing to 4.0 on VP9 video, inflating the decoded bit count ~1.5×). Two
guards prevent that:
1. Only count runs inside a plausible single-blink band around the current
estimate, excluding the tiny transition artifacts.
2. Never let the refined value stray far from the preamble-derived
estimate, which is reliable (it spans exactly 16 known blinks).

Module-level (not a closure) so the regression guard in
web_demo/test_cat_mode_refine_bp.py can exercise the real implementation.
"""
bp = initial_bp
for _ in range(3):
lo, hi = 0.5 * bp, 1.5 * bp # plausible single-blink window
single_blink_lengths = [
runs_list[i][2]
for i in range(start_idx, end_idx)
if lo <= runs_list[i][2] <= hi
]
if len(single_blink_lengths) < 10:
break
single_blink_lengths.sort()
new_bp = single_blink_lengths[len(single_blink_lengths) // 2]
# Reject refinements that drift implausibly far from the reliable
# preamble estimate — that only happens when artifacts dominate.
if not (0.7 * initial_bp <= new_bp <= 1.4 * initial_bp):
break
if abs(new_bp - bp) < 1e-6:
bp = float(new_bp)
break
bp = float(new_bp)
return float(bp)


def _decode_cat_video(video_path):
"""
Decode binary data from a Cat Mode video using NRZ run-length analysis.
Expand Down Expand Up @@ -642,46 +687,10 @@ def decode_nrz_adaptive(bp, runs_list, start_idx, end_idx, expected_bits=None):

return "".join(bits * nb for _, _, nb, _, bits in adjusted)

def refine_bp(runs_list, start_idx, end_idx, initial_bp):
"""Refine blink period estimate using median of single-blink run lengths.

~75% of runs in random 2-bit data are single-blink transitions.
These give exact bp measurements. Using their median gives a much
more accurate bp than the 16-blink preamble alone.

Robustness: real (compressed / camera-captured) video produces spurious
short runs at ON/OFF transitions — VP9/H.264 inter-frame blur and rolling
shutter momentarily push a transitioning eye across the threshold. Those
sub-blink runs were polluting the single-blink set and dragging the median
well below the true period (observed: a correct 5.94-frame preamble
estimate collapsing to 4.0 on VP9 video, inflating the decoded bit count
~1.5×). Two guards prevent that:
1. Only count runs inside a plausible single-blink band around the
current estimate, excluding the tiny transition artifacts.
2. Never let the refined value stray far from the preamble-derived
estimate, which is reliable (it spans exactly 16 known blinks).
"""
bp = initial_bp
for _ in range(3):
lo, hi = 0.5 * bp, 1.5 * bp # plausible single-blink window
single_blink_lengths = [
runs_list[i][2]
for i in range(start_idx, end_idx)
if lo <= runs_list[i][2] <= hi
]
if len(single_blink_lengths) < 10:
break
single_blink_lengths.sort()
new_bp = single_blink_lengths[len(single_blink_lengths) // 2]
# Reject refinements that drift implausibly far from the reliable
# preamble estimate — that only happens when artifacts dominate.
if not (0.7 * initial_bp <= new_bp <= 1.4 * initial_bp):
break
if abs(new_bp - bp) < 1e-6:
bp = float(new_bp)
break
bp = float(new_bp)
return float(bp)
# refine_bp is implemented at module scope (_refine_blink_period) so it can
# be unit-tested directly; the artifact-collapse guard it carries is the
# cat-mode decode-reliability fix exercised by test_cat_mode_refine_bp.py.
refine_bp = _refine_blink_period

def whiten(binary_str):
"""XOR binary string with deterministic PRNG to break up long same-state runs.
Expand Down
151 changes: 151 additions & 0 deletions web_demo/test_cat_mode_refine_bp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python3
"""Regression tests for cat-mode blink-period refinement on compressed video.

Background
----------
Cat mode transmits 2 bits/frame by blinking two green eyes on screen; the
decoder (`_decode_cat_video`) recovers the bitstream from run lengths. The blink
period ``bp`` (in video frames) is first estimated from the 16-blink preamble,
then sharpened by `_refine_blink_period` using the median of single-blink runs.

The bug (fixed in commit 7101314, guarded here): on compressed / camera-captured
video (VP9, H.264, rolling shutter) the ON/OFF transitions produce spurious
1-2 frame runs. The old refinement counted those as "single blinks", dragging
the median *below* the true period — a correct 5.94-frame estimate collapsing
toward 4.0, which inflated the decoded bit count ~1.5x and broke every decode.

`_refine_blink_period` now (1) only considers runs inside a plausible band
around the current estimate and (2) clamps the result to the reliable
preamble-derived estimate. These tests lock in both the unit-level property and
the full generate -> VP9-compress -> decode round trip.
"""

import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path

import pytest

sys.path.insert(0, str(Path(__file__).parent))

from app import _refine_blink_period # noqa: E402


def _runs(lengths):
"""Build a run list in the (state, start, length) shape the decoder uses."""
return [(("1", "0"), 0, int(length)) for length in lengths]


# --------------------------------------------------------------------------
# Unit tests: _refine_blink_period directly (fast, no cv2/ffmpeg needed)
# --------------------------------------------------------------------------


def test_clean_single_blinks_refine_to_true_period():
"""With clean single-blink runs the median sharpens the estimate as intended."""
# True period 6; clean single-blink runs all ~6 frames, slightly noisy estimate.
runs = _runs([6] * 30)
assert _refine_blink_period(runs, 0, len(runs), initial_bp=5.6) == pytest.approx(6.0)


def test_transition_artifacts_do_not_collapse_bp():
"""The core regression: spurious short runs must NOT drag bp below the truth.

Mirrors compressed video: 35 real single-blink runs (~6 frames) polluted by
40 transition artifacts (1-3 frames, roughly one per blink boundary). The old
unfiltered median collapsed to ~2.0 on this mix; the band-filtered + clamped
version must stay at the true period.
"""
real = [6] * 35
artifacts = [1, 2, 3, 2] * 10 # 40 sub-blink transition runs
runs = _runs(real + artifacts)
preamble_estimate = 5.94 # spans 16 known blinks -> reliable

refined = _refine_blink_period(runs, 0, len(runs), initial_bp=preamble_estimate)

# Must stay essentially at the true period, never collapse toward the artifacts.
assert refined == pytest.approx(6.0, abs=0.5)
# Explicit guard against the observed failure (collapse to ~4.0).
assert refined > 5.0


def test_refinement_clamped_to_preamble_estimate():
"""Even if artifacts dominate the band, the clamp prevents a wild refinement."""
# Pathological: many runs at ~4 frames would pull a naive median to 4.0,
# but that is <0.7 * 6.0, so the clamp must reject it and keep the estimate.
runs = _runs([4] * 50)
refined = _refine_blink_period(runs, 0, len(runs), initial_bp=6.0)
assert refined >= 0.7 * 6.0


def test_too_few_runs_returns_initial_estimate():
"""Below the sample threshold the function leaves the preamble estimate alone."""
runs = _runs([6] * 5) # < 10 single-blink samples
assert _refine_blink_period(runs, 0, len(runs), initial_bp=5.9) == pytest.approx(5.9)


# --------------------------------------------------------------------------
# Integration test: full generate -> VP9 compress -> decode round trip.
# Skipped automatically where cv2 / ffmpeg / the cat asset are unavailable.
# --------------------------------------------------------------------------

_HAS_CV2 = False
try:
import cv2 # noqa: F401

_HAS_CV2 = True
except Exception: # pragma: no cover - environment dependent
pass

_HAS_FFMPEG = shutil.which("ffmpeg") is not None


@pytest.mark.skipif(not (_HAS_CV2 and _HAS_FFMPEG), reason="needs cv2 + ffmpeg")
@pytest.mark.parametrize("speed_ms", [50, 100])
def test_vp9_compressed_video_decodes_to_exact_payload(speed_ms):
"""End-to-end: a VP9-compressed cat video decodes to the exact payload.

This is the scenario that was broken: compression-induced transition
artifacts collapsed bp and inflated the bitstream. The decode must recover
the original bits and keep bp at the true period.
"""
import hashlib

import test_cat_e2e_speeds as gen
from app import _decode_cat_video

if not os.path.exists(gen.CAT_IMAGE_PATH):
pytest.skip("cat carrier image not available")

frames = gen.prepare_cat_frames()
raw_bits = gen.hex_to_binary(hashlib.sha256(b"").hexdigest()) # 256-bit payload
whitened = gen.whiten(raw_bits)

clean_path = gen.generate_video(whitened, speed_ms, frames)
vp9_fd, vp9_path = tempfile.mkstemp(suffix=".webm")
os.close(vp9_fd)
try:
subprocess.run(
["ffmpeg", "-y", "-i", clean_path, "-c:v", "libvpx-vp9", "-b:v", "800k",
"-deadline", "realtime", "-cpu-used", "5", vp9_path],
check=True, capture_output=True,
)
result = _decode_cat_video(vp9_path)
finally:
os.unlink(clean_path)
os.unlink(vp9_path)

# _decode_cat_video returns the de-whitened payload bits.
assert result["binary"] == raw_bits
assert result["bits"] == len(raw_bits)

# bp must hold at the true period (speed_ms at 60fps), not collapse.
expected_bp = round(speed_ms / 1000.0 * gen.VIDEO_FPS)
assert result["diagnostics"]["bp_refined"] == pytest.approx(expected_bp, abs=0.5)


if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-v"]))
Loading