zenprocess · zenprocess · Apr 23, 2026 · Apr 22, 2026
diff --git a/cacp-python/LICENSE b/cacp-python/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 zenprocess contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/cacp-python/README.md b/cacp-python/README.md
@@ -0,0 +1,62 @@
+# cacp — Reference Python parser for CACP
+
+Canonical Python reference implementation of [CACP (Compressed Agent
+Communication Protocol)](https://github.com/zenprocess/cacp). The spec
+lives in the parent [`README.md`](../README.md); this package implements
+the parser against it.
+
+## Install
+
+```
+pip install cacp
+```
+
+Zero runtime dependencies — pure stdlib.
+
+## Usage
+
+```python
+from cacp import parse
+
+text = open("agent_response.txt").read()
+response = parse(text)
+
+if response is None:
+    print("not a CACP response")
+elif response.status == "ok":
+    print(f"agent created {len(response.files_created)} files")
+    print(f"files: {response.files_created}")
+else:
+    print(f"agent reported {response.status}: {response.error}")
+```
+
+`parse()` returns `None` when no `STATUS:` field is found — treat that as
+"not a CACP response" rather than an error.
+
+## Conformance
+
+The parser passes the [conformance test vector](../README.md#conformance-test-vector)
+from the parent spec, plus round-trip tests against the literal response
+example block. Tolerance rules honored:
+
+- Whitespace between `:` and value: zero or more spaces, or a tab
+- Field names: case-insensitive
+- STATUS / TESTS / BUILD values: normalized to lowercase
+
+## Design
+
+- Pure stdlib — no Pydantic, no external deps. A dataclass-based
+  `CACPResponse` keeps the dep tree empty; callers that want validation
+  can wrap it.
+- ~100 LOC in the parser. If it grows past 200 LOC it has probably
+  absorbed orchestrator-specific complexity that belongs upstream.
+
+## Status
+
+This is the canonical Python reference. Other language implementations
+(Rust, TypeScript, Go, ...) should pass an equivalent port of
+[`tests/test_conformance.py`](tests/test_conformance.py).
+
+## License
+
+MIT
diff --git a/cacp-python/pyproject.toml b/cacp-python/pyproject.toml
@@ -0,0 +1,37 @@
+[project]
+name = "cacp"
+version = "0.1.0"
+description = "Reference Python parser for CACP (Compressed Agent Communication Protocol)"
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.10"
+authors = [
+    { name = "zenprocess contributors" },
+]
+keywords = ["cacp", "llm", "agents", "protocol", "parser"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = []
+
+[project.optional-dependencies]
+dev = ["pytest>=7.0"]
+
+[project.urls]
+Homepage = "https://github.com/zenprocess/cacp"
+Specification = "https://github.com/zenprocess/cacp#readme"
+Issues = "https://github.com/zenprocess/cacp/issues"
+
+[build-system]
+requires = ["setuptools>=68.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
diff --git a/cacp-python/src/cacp/__init__.py b/cacp-python/src/cacp/__init__.py
@@ -0,0 +1,19 @@
+"""cacp — reference Python parser for the CACP protocol.
+
+See the canonical spec at https://github.com/zenprocess/cacp.
+"""
+
+from cacp.parser import parse
+from cacp.models import (
+    CACPResponse,
+    CANONICAL_STATUS_VALUES,
+    CANONICAL_TESTS_BUILD_VALUES,
+)
+
+__all__ = [
+    "parse",
+    "CACPResponse",
+    "CANONICAL_STATUS_VALUES",
+    "CANONICAL_TESTS_BUILD_VALUES",
+]
+__version__ = "0.1.0"
diff --git a/cacp-python/src/cacp/models.py b/cacp-python/src/cacp/models.py
@@ -0,0 +1,46 @@
+"""CACP response model and canonical vocabulary constants.
+
+The data model is a plain dataclass — zero runtime dependencies, cheap to
+construct, and trivial to serialize. Callers that want Pydantic-style
+validation can wrap it.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+CANONICAL_STATUS_VALUES: tuple[str, ...] = (
+    "ok",
+    "fail",
+    "partial",
+    "needs_decision",
+    "no_changes",
+    "decomposed",
+    "rejected",
+    "retry",
+    "fixture_gap",
+)
+"""The 9 canonical STATUS values per the CACP spec."""
+
+
+CANONICAL_TESTS_BUILD_VALUES: tuple[str, ...] = ("pass", "fail", "skip")
+"""The 3 canonical TESTS/BUILD values per the CACP spec."""
+
+
+@dataclass(slots=True)
+class CACPResponse:
+    """Parsed CACP response record.
+
+    All fields except ``status`` are optional — missing fields remain ``None``
+    or an empty list. ``tests`` preserves the optional ``:N`` count suffix
+    (e.g. ``"pass:12"``) verbatim so callers can choose whether to split it.
+    """
+
+    status: str
+    files_created: list[str] = field(default_factory=list)
+    files_modified: list[str] = field(default_factory=list)
+    tests: str | None = None
+    build: str | None = None
+    error: str | None = None
+    learned: str | None = None
diff --git a/cacp-python/src/cacp/parser.py b/cacp-python/src/cacp/parser.py
@@ -0,0 +1,145 @@
+"""Reference CACP parser.
+
+Implements the tolerance rules from the canonical spec:
+
+- Whitespace between the colon and the value: zero or more spaces, or a tab.
+  We deliberately use ``[ \\t]*`` rather than ``\\s*`` so newlines are not
+  consumed — each field stays on its own line.
+- Field names are case-insensitive (``STATUS:``, ``status:``, ``Status:``
+  all denote the same field).
+- Values for STATUS, TESTS, BUILD are normalized to lowercase.
+
+The parser is deliberately small (~7 compiled regexes, one per field) and
+has zero runtime dependencies outside the stdlib.
+"""
+
+from __future__ import annotations
+
+import re
+
+from cacp.models import (
+    CACPResponse,
+    CANONICAL_STATUS_VALUES,
+    CANONICAL_TESTS_BUILD_VALUES,
+)
+
+
+# ---------------------------------------------------------------------------
+# Shared regex fragments
+# ---------------------------------------------------------------------------
+
+# Whitespace-between-colon-and-value: spaces or a tab, but NEVER a newline.
+# Using ``\s*`` here would let a blank line swallow the following field.
+_SEP = r"[ \t]*"
+
+_STATUS_ALT = "|".join(re.escape(v) for v in CANONICAL_STATUS_VALUES)
+_TESTS_BUILD_ALT = "|".join(re.escape(v) for v in CANONICAL_TESTS_BUILD_VALUES)
+
+
+# ---------------------------------------------------------------------------
+# Compiled per-field regexes
+# ---------------------------------------------------------------------------
+
+_STATUS_RE = re.compile(
+    rf"^{_SEP}STATUS:{_SEP}({_STATUS_ALT})\b",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_TESTS_RE = re.compile(
+    rf"^{_SEP}TESTS:{_SEP}((?:{_TESTS_BUILD_ALT})(?::\d+)?)\b",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_BUILD_RE = re.compile(
+    rf"^{_SEP}BUILD:{_SEP}({_TESTS_BUILD_ALT})\b",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_FILES_CREATED_RE = re.compile(
+    rf"^{_SEP}FILES_CREATED:{_SEP}([^\n]*)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_FILES_MODIFIED_RE = re.compile(
+    rf"^{_SEP}FILES_MODIFIED:{_SEP}([^\n]*)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_ERROR_RE = re.compile(
+    rf"^{_SEP}ERROR:{_SEP}([^\n]*)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+_LEARNED_RE = re.compile(
+    rf"^{_SEP}LEARNED:{_SEP}([^\n]*)",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _split_paths(raw: str) -> list[str]:
+    """Split a comma-separated FILES_* value into a clean list."""
+    if not raw:
+        return []
+    return [p.strip() for p in raw.split(",") if p.strip()]
+
+
+def _optional_capture(pattern: re.Pattern[str], text: str) -> str | None:
+    m = pattern.search(text)
+    if not m:
+        return None
+    value = m.group(1).strip()
+    return value or None
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def parse(text: str) -> CACPResponse | None:
+    """Parse a CACP-formatted response into a typed record.
+
+    Returns ``None`` if ``STATUS`` cannot be extracted — callers interpret
+    that as "not a CACP response". All other fields are optional; missing
+    fields become ``None`` / ``[]`` as appropriate.
+
+    Tolerance rules (per the canonical CACP spec):
+
+    - Whitespace between ``:`` and the value: zero or more spaces or a tab.
+    - Field names are matched case-insensitively.
+    - STATUS / TESTS / BUILD values are normalized to lowercase.
+    """
+    status_match = _STATUS_RE.search(text)
+    if not status_match:
+        return None
+    status = status_match.group(1).lower()
+
+    tests_match = _TESTS_RE.search(text)
+    tests = tests_match.group(1).lower() if tests_match else None
+
+    build_match = _BUILD_RE.search(text)
+    build = build_match.group(1).lower() if build_match else None
+
+    fc_match = _FILES_CREATED_RE.search(text)
+    files_created = _split_paths(fc_match.group(1)) if fc_match else []
+
+    fm_match = _FILES_MODIFIED_RE.search(text)
+    files_modified = _split_paths(fm_match.group(1)) if fm_match else []
+
+    error = _optional_capture(_ERROR_RE, text)
+    learned = _optional_capture(_LEARNED_RE, text)
+
+    return CACPResponse(
+        status=status,
+        files_created=files_created,
+        files_modified=files_modified,
+        tests=tests,
+        build=build,
+        error=error,
+        learned=learned,
+    )
diff --git a/cacp-python/tests/__init__.py b/cacp-python/tests/__init__.py