From 82d6be2e7397827fdbe50754b59995d498d8acc1 Mon Sep 17 00:00:00 2001 From: Val Vladescu Date: Wed, 22 Apr 2026 21:29:14 +0300 Subject: [PATCH] spec 110 US-A: cacp-python reference parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New cacp-python/ subdirectory implementing the canonical Python parser for CACP per the spec in ../README.md. Parser (~145 LOC incl. docstrings; core logic ~80 LOC) covers: - 9 STATUS values: ok|fail|partial|needs_decision|no_changes| decomposed|rejected|retry|fixture_gap - 3 TESTS/BUILD values: pass|fail|skip (+ optional :N count for TESTS) - Whitespace tolerance (space, multiple spaces, tab) via [ \t]* — not \s*, which would swallow newlines and cross field boundaries - Case-insensitive field names; STATUS/TESTS/BUILD values normalized to lowercase Zero runtime dependencies (pure stdlib + dataclass model). A dataclass keeps the dep tree empty; callers wanting Pydantic-style validation can wrap the returned record. Tests (42 passing) cover: 9-value parametrized STATUS, the 6-line tolerance vector from README.md verbatim, README response example round-trip, dispatch-example negative (no STATUS field -> parse returns None), case-insensitive field names across the full grid, and whitespace tolerance for TESTS/BUILD. Spec 110 US-A of the switchyard project — this package becomes the canonical reference impl that downstream (switchyard v5.4.x, zendev) consumes as `cacp>=0.1.0`. Other language implementations should port tests/test_conformance.py against the same vector. Co-Authored-By: Claude Opus 4.7 (1M context) --- cacp-python/LICENSE | 21 ++++ cacp-python/README.md | 62 +++++++++++ cacp-python/pyproject.toml | 37 +++++++ cacp-python/src/cacp/__init__.py | 19 ++++ cacp-python/src/cacp/models.py | 46 ++++++++ cacp-python/src/cacp/parser.py | 145 ++++++++++++++++++++++++++ cacp-python/tests/__init__.py | 0 cacp-python/tests/test_conformance.py | 141 +++++++++++++++++++++++++ cacp-python/tests/test_roundtrip.py | 103 ++++++++++++++++++ 9 files changed, 574 insertions(+) create mode 100644 cacp-python/LICENSE create mode 100644 cacp-python/README.md create mode 100644 cacp-python/pyproject.toml create mode 100644 cacp-python/src/cacp/__init__.py create mode 100644 cacp-python/src/cacp/models.py create mode 100644 cacp-python/src/cacp/parser.py create mode 100644 cacp-python/tests/__init__.py create mode 100644 cacp-python/tests/test_conformance.py create mode 100644 cacp-python/tests/test_roundtrip.py diff --git a/cacp-python/LICENSE b/cacp-python/LICENSE new file mode 100644 index 0000000..941645a --- /dev/null +++ b/cacp-python/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 zenprocess contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cacp-python/README.md b/cacp-python/README.md new file mode 100644 index 0000000..4561a33 --- /dev/null +++ b/cacp-python/README.md @@ -0,0 +1,62 @@ +# cacp — Reference Python parser for CACP + +Canonical Python reference implementation of [CACP (Compressed Agent +Communication Protocol)](https://github.com/zenprocess/cacp). The spec +lives in the parent [`README.md`](../README.md); this package implements +the parser against it. + +## Install + +``` +pip install cacp +``` + +Zero runtime dependencies — pure stdlib. + +## Usage + +```python +from cacp import parse + +text = open("agent_response.txt").read() +response = parse(text) + +if response is None: + print("not a CACP response") +elif response.status == "ok": + print(f"agent created {len(response.files_created)} files") + print(f"files: {response.files_created}") +else: + print(f"agent reported {response.status}: {response.error}") +``` + +`parse()` returns `None` when no `STATUS:` field is found — treat that as +"not a CACP response" rather than an error. + +## Conformance + +The parser passes the [conformance test vector](../README.md#conformance-test-vector) +from the parent spec, plus round-trip tests against the literal response +example block. Tolerance rules honored: + +- Whitespace between `:` and value: zero or more spaces, or a tab +- Field names: case-insensitive +- STATUS / TESTS / BUILD values: normalized to lowercase + +## Design + +- Pure stdlib — no Pydantic, no external deps. A dataclass-based + `CACPResponse` keeps the dep tree empty; callers that want validation + can wrap it. +- ~100 LOC in the parser. If it grows past 200 LOC it has probably + absorbed orchestrator-specific complexity that belongs upstream. + +## Status + +This is the canonical Python reference. Other language implementations +(Rust, TypeScript, Go, ...) should pass an equivalent port of +[`tests/test_conformance.py`](tests/test_conformance.py). + +## License + +MIT diff --git a/cacp-python/pyproject.toml b/cacp-python/pyproject.toml new file mode 100644 index 0000000..f9f6e85 --- /dev/null +++ b/cacp-python/pyproject.toml @@ -0,0 +1,37 @@ +[project] +name = "cacp" +version = "0.1.0" +description = "Reference Python parser for CACP (Compressed Agent Communication Protocol)" +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.10" +authors = [ + { name = "zenprocess contributors" }, +] +keywords = ["cacp", "llm", "agents", "protocol", "parser"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [] + +[project.optional-dependencies] +dev = ["pytest>=7.0"] + +[project.urls] +Homepage = "https://github.com/zenprocess/cacp" +Specification = "https://github.com/zenprocess/cacp#readme" +Issues = "https://github.com/zenprocess/cacp/issues" + +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/cacp-python/src/cacp/__init__.py b/cacp-python/src/cacp/__init__.py new file mode 100644 index 0000000..ea098cf --- /dev/null +++ b/cacp-python/src/cacp/__init__.py @@ -0,0 +1,19 @@ +"""cacp — reference Python parser for the CACP protocol. + +See the canonical spec at https://github.com/zenprocess/cacp. +""" + +from cacp.parser import parse +from cacp.models import ( + CACPResponse, + CANONICAL_STATUS_VALUES, + CANONICAL_TESTS_BUILD_VALUES, +) + +__all__ = [ + "parse", + "CACPResponse", + "CANONICAL_STATUS_VALUES", + "CANONICAL_TESTS_BUILD_VALUES", +] +__version__ = "0.1.0" diff --git a/cacp-python/src/cacp/models.py b/cacp-python/src/cacp/models.py new file mode 100644 index 0000000..9323605 --- /dev/null +++ b/cacp-python/src/cacp/models.py @@ -0,0 +1,46 @@ +"""CACP response model and canonical vocabulary constants. + +The data model is a plain dataclass — zero runtime dependencies, cheap to +construct, and trivial to serialize. Callers that want Pydantic-style +validation can wrap it. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +CANONICAL_STATUS_VALUES: tuple[str, ...] = ( + "ok", + "fail", + "partial", + "needs_decision", + "no_changes", + "decomposed", + "rejected", + "retry", + "fixture_gap", +) +"""The 9 canonical STATUS values per the CACP spec.""" + + +CANONICAL_TESTS_BUILD_VALUES: tuple[str, ...] = ("pass", "fail", "skip") +"""The 3 canonical TESTS/BUILD values per the CACP spec.""" + + +@dataclass(slots=True) +class CACPResponse: + """Parsed CACP response record. + + All fields except ``status`` are optional — missing fields remain ``None`` + or an empty list. ``tests`` preserves the optional ``:N`` count suffix + (e.g. ``"pass:12"``) verbatim so callers can choose whether to split it. + """ + + status: str + files_created: list[str] = field(default_factory=list) + files_modified: list[str] = field(default_factory=list) + tests: str | None = None + build: str | None = None + error: str | None = None + learned: str | None = None diff --git a/cacp-python/src/cacp/parser.py b/cacp-python/src/cacp/parser.py new file mode 100644 index 0000000..aed1d4c --- /dev/null +++ b/cacp-python/src/cacp/parser.py @@ -0,0 +1,145 @@ +"""Reference CACP parser. + +Implements the tolerance rules from the canonical spec: + +- Whitespace between the colon and the value: zero or more spaces, or a tab. + We deliberately use ``[ \\t]*`` rather than ``\\s*`` so newlines are not + consumed — each field stays on its own line. +- Field names are case-insensitive (``STATUS:``, ``status:``, ``Status:`` + all denote the same field). +- Values for STATUS, TESTS, BUILD are normalized to lowercase. + +The parser is deliberately small (~7 compiled regexes, one per field) and +has zero runtime dependencies outside the stdlib. +""" + +from __future__ import annotations + +import re + +from cacp.models import ( + CACPResponse, + CANONICAL_STATUS_VALUES, + CANONICAL_TESTS_BUILD_VALUES, +) + + +# --------------------------------------------------------------------------- +# Shared regex fragments +# --------------------------------------------------------------------------- + +# Whitespace-between-colon-and-value: spaces or a tab, but NEVER a newline. +# Using ``\s*`` here would let a blank line swallow the following field. +_SEP = r"[ \t]*" + +_STATUS_ALT = "|".join(re.escape(v) for v in CANONICAL_STATUS_VALUES) +_TESTS_BUILD_ALT = "|".join(re.escape(v) for v in CANONICAL_TESTS_BUILD_VALUES) + + +# --------------------------------------------------------------------------- +# Compiled per-field regexes +# --------------------------------------------------------------------------- + +_STATUS_RE = re.compile( + rf"^{_SEP}STATUS:{_SEP}({_STATUS_ALT})\b", + re.IGNORECASE | re.MULTILINE, +) + +_TESTS_RE = re.compile( + rf"^{_SEP}TESTS:{_SEP}((?:{_TESTS_BUILD_ALT})(?::\d+)?)\b", + re.IGNORECASE | re.MULTILINE, +) + +_BUILD_RE = re.compile( + rf"^{_SEP}BUILD:{_SEP}({_TESTS_BUILD_ALT})\b", + re.IGNORECASE | re.MULTILINE, +) + +_FILES_CREATED_RE = re.compile( + rf"^{_SEP}FILES_CREATED:{_SEP}([^\n]*)", + re.IGNORECASE | re.MULTILINE, +) + +_FILES_MODIFIED_RE = re.compile( + rf"^{_SEP}FILES_MODIFIED:{_SEP}([^\n]*)", + re.IGNORECASE | re.MULTILINE, +) + +_ERROR_RE = re.compile( + rf"^{_SEP}ERROR:{_SEP}([^\n]*)", + re.IGNORECASE | re.MULTILINE, +) + +_LEARNED_RE = re.compile( + rf"^{_SEP}LEARNED:{_SEP}([^\n]*)", + re.IGNORECASE | re.MULTILINE, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _split_paths(raw: str) -> list[str]: + """Split a comma-separated FILES_* value into a clean list.""" + if not raw: + return [] + return [p.strip() for p in raw.split(",") if p.strip()] + + +def _optional_capture(pattern: re.Pattern[str], text: str) -> str | None: + m = pattern.search(text) + if not m: + return None + value = m.group(1).strip() + return value or None + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def parse(text: str) -> CACPResponse | None: + """Parse a CACP-formatted response into a typed record. + + Returns ``None`` if ``STATUS`` cannot be extracted — callers interpret + that as "not a CACP response". All other fields are optional; missing + fields become ``None`` / ``[]`` as appropriate. + + Tolerance rules (per the canonical CACP spec): + + - Whitespace between ``:`` and the value: zero or more spaces or a tab. + - Field names are matched case-insensitively. + - STATUS / TESTS / BUILD values are normalized to lowercase. + """ + status_match = _STATUS_RE.search(text) + if not status_match: + return None + status = status_match.group(1).lower() + + tests_match = _TESTS_RE.search(text) + tests = tests_match.group(1).lower() if tests_match else None + + build_match = _BUILD_RE.search(text) + build = build_match.group(1).lower() if build_match else None + + fc_match = _FILES_CREATED_RE.search(text) + files_created = _split_paths(fc_match.group(1)) if fc_match else [] + + fm_match = _FILES_MODIFIED_RE.search(text) + files_modified = _split_paths(fm_match.group(1)) if fm_match else [] + + error = _optional_capture(_ERROR_RE, text) + learned = _optional_capture(_LEARNED_RE, text) + + return CACPResponse( + status=status, + files_created=files_created, + files_modified=files_modified, + tests=tests, + build=build, + error=error, + learned=learned, + ) diff --git a/cacp-python/tests/__init__.py b/cacp-python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cacp-python/tests/test_conformance.py b/cacp-python/tests/test_conformance.py new file mode 100644 index 0000000..a0953a9 --- /dev/null +++ b/cacp-python/tests/test_conformance.py @@ -0,0 +1,141 @@ +"""Conformance tests for the canonical CACP parser. + +These tests implement the test vector published in the parent README.md +and the surrounding tolerance rules. A conformant implementation in any +language should pass an equivalent suite. +""" + +from __future__ import annotations + +import pytest + +from cacp import ( + CANONICAL_STATUS_VALUES, + CANONICAL_TESTS_BUILD_VALUES, + parse, +) + + +# --------------------------------------------------------------------------- +# 1. All 9 STATUS values parse +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("value", CANONICAL_STATUS_VALUES) +def test_all_canonical_status_values_parse(value: str) -> None: + response = parse(f"STATUS:{value}\n") + assert response is not None + assert response.status == value + + +# --------------------------------------------------------------------------- +# 2. Tolerance vector from the canonical spec (README.md §"Conformance +# test vector") +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "line, expected", + [ + ("STATUS:ok", "ok"), + ("STATUS: ok", "ok"), + ("STATUS: ok", "ok"), + ("STATUS:\tok", "ok"), + ("status: ok", "ok"), + ("Status:OK", "ok"), + ], +) +def test_tolerance_vector(line: str, expected: str) -> None: + response = parse(line + "\n") + assert response is not None + assert response.status == expected + + +# --------------------------------------------------------------------------- +# 3. TESTS values (pass/fail/skip + optional :N count) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "line, expected", + [ + ("TESTS:pass", "pass"), + ("TESTS:fail", "fail"), + ("TESTS:skip", "skip"), + ("TESTS:pass:42", "pass:42"), + ], +) +def test_tests_values(line: str, expected: str) -> None: + response = parse(f"STATUS:ok\n{line}\n") + assert response is not None + assert response.tests == expected + + +@pytest.mark.parametrize("value", CANONICAL_TESTS_BUILD_VALUES) +def test_tests_tolerance_whitespace(value: str) -> None: + """TESTS field must tolerate the same whitespace as STATUS.""" + for sep in ("", " ", " ", "\t"): + text = f"STATUS:ok\nTESTS:{sep}{value}\n" + response = parse(text) + assert response is not None + assert response.tests == value + + +# --------------------------------------------------------------------------- +# 4. BUILD values (pass/fail/skip, no count) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("value", CANONICAL_TESTS_BUILD_VALUES) +def test_build_values(value: str) -> None: + response = parse(f"STATUS:ok\nBUILD:{value}\n") + assert response is not None + assert response.build == value + + +@pytest.mark.parametrize("value", CANONICAL_TESTS_BUILD_VALUES) +def test_build_tolerance_whitespace(value: str) -> None: + for sep in ("", " ", " ", "\t"): + text = f"STATUS:ok\nBUILD:{sep}{value}\n" + response = parse(text) + assert response is not None + assert response.build == value + + +# --------------------------------------------------------------------------- +# 5. Negative: no STATUS → not a CACP response → parse returns None +# --------------------------------------------------------------------------- + + +def test_no_status_returns_none() -> None: + assert parse("hello world") is None + + +def test_empty_string_returns_none() -> None: + assert parse("") is None + + +def test_dispatch_format_returns_none() -> None: + """The dispatch (input) format has no STATUS field — parse must reject.""" + dispatch = ( + "TASK: Implement JWT auth middleware\n" + "CONTEXT: Go backend, chi router\n" + "ACCEPTANCE: 1. Middleware validates Bearer tokens\n" + "SCOPE: src/middleware/\n" + ) + assert parse(dispatch) is None + + +# --------------------------------------------------------------------------- +# 6. Case-insensitivity across field names +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "field_name", + ["STATUS", "status", "Status", "STATus", "sTaTuS"], +) +def test_status_field_name_case_insensitive(field_name: str) -> None: + response = parse(f"{field_name}:ok\n") + assert response is not None + assert response.status == "ok" diff --git a/cacp-python/tests/test_roundtrip.py b/cacp-python/tests/test_roundtrip.py new file mode 100644 index 0000000..722527f --- /dev/null +++ b/cacp-python/tests/test_roundtrip.py @@ -0,0 +1,103 @@ +"""Round-trip tests against the literal example blocks in the parent spec. + +The canonical README.md carries an example response block. Per the spec's +"prompt round-trip test" recommendation, the parser MUST accept everything +the spec teaches an agent to emit. +""" + +from __future__ import annotations + +from cacp import parse + + +# Verbatim copy of the response example in the parent README.md (§"Response +# (agent → orchestrator)"). If the spec's example changes, this test fails +# and the parser (or the spec) must be updated in lockstep. +RESPONSE_EXAMPLE = """STATUS:ok +FILES_CREATED:src/middleware/jwt.go,src/middleware/jwt_test.go +FILES_MODIFIED:go.mod +TESTS:pass:12 +BUILD:pass +LEARNED:JWT tokens need 24h expiry for mobile clients +""" + + +# Verbatim copy of the dispatch example (§"Dispatch (orchestrator → agent)"). +# This is the INPUT format; none of its fields are CACP response fields, +# so the parser MUST reject it (no STATUS → returns None). +DISPATCH_EXAMPLE = """TASK: Implement JWT auth middleware +CONTEXT: Go backend, chi router +ACCEPTANCE: 1. Middleware validates Bearer tokens 2. Tests pass +SCOPE: src/middleware/ +VERIFY: go test ./... +DONE: return STATUS format +""" + + +def test_readme_response_example_roundtrips() -> None: + response = parse(RESPONSE_EXAMPLE) + assert response is not None + assert response.status == "ok" + assert response.files_created == [ + "src/middleware/jwt.go", + "src/middleware/jwt_test.go", + ] + assert response.files_modified == ["go.mod"] + assert response.tests == "pass:12" + assert response.build == "pass" + assert response.learned is not None + assert response.learned.startswith("JWT tokens") + # No error field in the example. + assert response.error is None + + +def test_readme_dispatch_example_returns_none() -> None: + """Dispatch format has no STATUS → parser strictly rejects it.""" + assert parse(DISPATCH_EXAMPLE) is None + + +def test_fail_response_with_error_field() -> None: + """A failing response carries the ERROR field.""" + text = ( + "STATUS:fail\n" + "ERROR:compilation failed in jwt.go line 42\n" + ) + response = parse(text) + assert response is not None + assert response.status == "fail" + assert response.error == "compilation failed in jwt.go line 42" + + +def test_files_list_whitespace_stripped() -> None: + """Comma-split lists tolerate whitespace around entries.""" + text = "STATUS:ok\nFILES_CREATED: a.py , b.py , c.py\n" + response = parse(text) + assert response is not None + assert response.files_created == ["a.py", "b.py", "c.py"] + + +def test_empty_files_field_is_empty_list() -> None: + text = "STATUS:ok\nFILES_CREATED:\n" + response = parse(text) + assert response is not None + assert response.files_created == [] + + +def test_mixed_case_fields_all_parse() -> None: + """Every field name is case-insensitive, not just STATUS.""" + text = ( + "status:OK\n" + "Files_Created:a.py\n" + "FILES_modified:b.py\n" + "Tests:pass:3\n" + "build:PASS\n" + "Learned:pattern worked\n" + ) + response = parse(text) + assert response is not None + assert response.status == "ok" + assert response.files_created == ["a.py"] + assert response.files_modified == ["b.py"] + assert response.tests == "pass:3" + assert response.build == "pass" + assert response.learned == "pattern worked"