From 5bbdc0727d85b5fa178441a88f27aec20b360119 Mon Sep 17 00:00:00 2001
From: Dmitry Voropaev <workerv0ropaev@yandex.ru>
Date: Mon, 22 Jun 2026 00:40:18 +0300
Subject: [PATCH] feat: LLM-grounded semantic layer (kb describe) + semantic
 grounding gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First slice of the LLM-grounded layer (Roadmap). A separate, key-gated `kb
describe` pass has an LLM write a short NL summary + structured claims for each
api_route/entity in a snapshot; every claim is validated against the artifact's
own grounding spans by a DETERMINISTIC sub-property gate, unvalidated claims are
dropped, and a `description` artifact is stored only if something survives —
grounded on the same spans (extraction_method=llm_grounded, model_id +
prompt_version in the key). Never on the offline `kb index` path.

- kb/extract/semantic: grounding.validate_claims (deterministic, no LLM) +
  describe.describe_snapshot (orchestration, reuses kb.llm + write_grounded_artifact).
- queries.spans_for_artifact (span_id + fq_symbol_path + raw_text).
- CLI `kb describe` (lazy LLM import, key-gated via has_llm_key).
- MCP summarize + embed_text gain a `description` branch.
- HARD gate eval/semantic_grounding_test (stub LLM, no key): an adversarial
  fabricated claim is dropped, the grounded one stored, description served as
  llm_grounded. Headline gates eight -> nine.
- docs: README (Status, Quickstart, architecture, nine gates), DESIGN §11/§9
  (un-defer kb.extract.semantic, semantic floor implemented), CHANGELOG.

ruff + mypy --strict clean; 54 eval tests pass (+1 skipped). Stub end-to-end:
descriptions inherit cross-file grounding; an all-ungrounded route's description
is not stored at all.
---
 CHANGELOG.md                           |  13 ++-
 DESIGN.md                              |   8 +-
 README.md                              |  23 +++--
 src/kb/daemon/cli.py                   |  30 +++++++
 src/kb/embed/text.py                   |   3 +
 src/kb/eval/semantic_grounding_test.py |  91 +++++++++++++++++++
 src/kb/extract/semantic/__init__.py    |   7 ++
 src/kb/extract/semantic/describe.py    | 116 +++++++++++++++++++++++++
 src/kb/extract/semantic/grounding.py   |  53 +++++++++++
 src/kb/mcp/records.py                  |   3 +
 src/kb/store/queries.py                |  40 +++++++++
 11 files changed, 379 insertions(+), 8 deletions(-)
 create mode 100644 src/kb/eval/semantic_grounding_test.py
 create mode 100644 src/kb/extract/semantic/__init__.py
 create mode 100644 src/kb/extract/semantic/describe.py
 create mode 100644 src/kb/extract/semantic/grounding.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99f3001..6a7395e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- (nothing yet)
+- **LLM-grounded semantic layer — first slice** (`kb.extract.semantic`, `kb describe`): an optional,
+  key-gated pass (separate from `kb index`) has an LLM write a short NL summary + structured claims
+  for each `api_route` / `entity` artifact in a snapshot. Every claim is validated against the
+  artifact's own grounding spans by a **deterministic sub-property gate**
+  (`grounding.validate_claims`) — claims citing a symbol not in the code are dropped, and a
+  `description` artifact is stored only if something survives, grounded on the same spans
+  (`extraction_method = "llm_grounded"`, `model_id` + `prompt_version` in the artifact key). Surfaced
+  via MCP `get_knowledge` / `search_knowledge`. Uses `kb.llm` (Anthropic default, OpenAI optional).
+- **Semantic grounding HARD gate** (`kb.eval.semantic_grounding_test`): runs the describer on a
+  **stub** LLM (no API key) and asserts an adversarial fabricated claim is dropped while the grounded
+  claim is stored — the DESIGN §9 semantic floor, enforced deterministically in CI. Headline HARD
+  gates: eight → **nine**.
 
 ## [0.3.0] - 2026-06-21
 
diff --git a/DESIGN.md b/DESIGN.md
index 962edde..d8a10ef 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -285,6 +285,12 @@ rejected. **Verbalized LLM confidence is never used as the score.**
 > in a process summary is a real sink-registry match on the path; path endpoints are real
 > entrypoints/sinks. Confidence must honestly count *unknown-unknowns* (edges never discovered
 > by the ~70%-recall call-graph engine), not only "unresolved on the path it found".
+>
+> *Implemented (first slice):* the `kb describe` describer enforces this floor —
+> `kb.extract.semantic.grounding.validate_claims` drops any claim whose cited symbol is absent from
+> the artifact's grounding spans; an artifact with no surviving claim is not stored. The gate is
+> deterministic, so `semantic_grounding_test` enforces it in CI (stub LLM, no API key), including an
+> adversarial fabricated claim that must be dropped.
 
 ---
 
@@ -319,7 +325,7 @@ freshness(current|stale@sha)`, with a deterministic tie-break for reproducible e
 | `kb.eval` | Tiered eval; deterministic tiers gate CI. | pytest over SHA-pinned golden repos |
 | `kb.mcp` | Read-only MCP server; provenance-carrying records; budget-aware assembly. | FastMCP (pinned), Pydantic models |
 | `kb.daemon` | Orchestration + CLI: index a repo @ SHA, run extractors in order, write snapshot, host MCP. | typer |
-| `kb.extract.semantic` *(deferred)* | The one grounded business-process extractor: entrypoints → call-graph slice → sinks → LLM labeler → span-binding validator. | tree-sitter queries, `PathEngine` (call-graph), YAML sink registry, thin LLM adapter |
+| `kb.extract.semantic` | **First slice shipped:** `kb describe` — LLM-grounded NL descriptions of routes/entities, each claim validated against the artifact's spans by a deterministic sub-property gate (`grounding.validate_claims`); separate key-gated pass, never on `index`. *Deferred:* the grounded business-process extractor (entrypoints → call-graph slice → sinks → LLM labeler → span-binding validator). | thin LLM adapter (`kb.llm`); later: `PathEngine` (call-graph), YAML sink registry |
 
 ---
 
diff --git a/README.md b/README.md
index 923fb96..b728483 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,8 @@ flowchart LR
 - **Read-only MCP server** — `find_provenance`, `get_knowledge`, and `search_knowledge`, each returning provenance-carrying units (method + confidence + freshness).
 - **pgvector embeddings + semantic search** — a replaceable embedding provider (sentence-transformers by default, OpenAI optional) populated by a separate `kb embed` pass; torch stays out of the index path.
 - **A frozen RAG-over-source baseline** and the **Tier-3 knowledge-vs-RAG recall gate** — the honest A/B that backs the "knowledge > RAG" thesis.
-- **Eight HARD CI eval gates** (see [Development](#development)).
+- **LLM-grounded descriptions** — an optional, key-gated `kb describe` pass has an LLM write NL summaries for routes/entities; every claim is validated against the artifact's own spans by a deterministic sub-property gate, so ungrounded claims are *dropped* (the anti-hallucination invariant, with a model in the loop). Stored as `extraction_method = "llm_grounded"`, grounded on the same spans.
+- **Nine HARD CI eval gates** (see [Development](#development)).
 
 - **A nightly LLM-judged A/B** (optional, key-gated, **non-gating**) — an answerer LLM answers each question from knowbase's grounded context vs a RAG-over-source context, and a judge LLM scores **answer accuracy** (against hand-written gold) + **hallucination**. Tracked metrics on top of recall; it never blocks CI.
 
@@ -115,7 +116,7 @@ The base `--extra dev` install stays torch-free; the `embed` extra pulls sentenc
 ### Run the gates
 
 ```bash
-uv run pytest src/kb/eval -q   # the eight HARD gates (spins an ephemeral local Postgres)
+uv run pytest src/kb/eval -q   # the nine HARD gates (spins an ephemeral local Postgres)
 ```
 
 ### Index a commit
@@ -142,6 +143,14 @@ uv run kb embed --db-url <postgres-url>   # separate pass: populate artifact emb
 
 `kb embed` runs a replaceable embedding provider (sentence-transformers `all-MiniLM-L6-v2` by default, OpenAI optional via `KB_EMBED_PROVIDER=openai`) over the latest snapshot's artifacts and writes them into `artifact.embedding` (pgvector). It is idempotent and torch only loads when this command runs — never on the index path.
 
+### Generate semantic descriptions (LLM-grounded)
+
+```bash
+uv run kb describe --db-url <postgres-url>   # separate, key-gated pass (ANTHROPIC_API_KEY / OPENAI_API_KEY)
+```
+
+`kb describe` has an LLM (via `kb.llm`, `KB_LLM_PROVIDER` in {`anthropic`,`openai`}) write a short NL summary + structured claims for each route/entity in the latest snapshot. **Every claim is validated against that artifact's own grounding spans** — claims citing a symbol not in the code are dropped, and a `description` artifact is stored only if something survives, grounded on the same spans (`extraction_method = "llm_grounded"`). It needs an API key, never runs on `kb index`, and the deterministic grounding gate is exercised in CI without a key (stub LLM).
+
 ### Serve to an AI agent (MCP)
 
 ```bash
@@ -209,8 +218,9 @@ A Python package `kb` (uv, src-layout). Modules and their responsibilities:
 | `kb.mcp` | Read-only MCP server and its provenance-carrying records: `find_provenance`, `get_knowledge`, `search_knowledge`. |
 | `kb.embed` | Replaceable embedding adapters (sentence-transformers default, OpenAI optional) + snapshot population. Torch isolated behind the `embed` extra and a lazy import. |
 | `kb.rag` | The frozen pgvector RAG-over-source baseline — the "other arm" of the knowledge-vs-RAG A/B (no provenance, no grounding). |
-| `kb.daemon.cli` | The `kb` CLI: `index`, `migrate`, `embed`, `serve` (MCP), and `introspect` — all functional. |
-| `kb.eval` | Eight HARD CI gates (identity reproducibility, adversarial grounding, Tier-1 import oracle, Tier-1 API oracle, Tier-1 entities oracle, Tier-3 knowledge-vs-RAG recall, Tier-4 one-hop invalidation, invariants) plus the supporting MCP / embed / store suite. |
+| `kb.extract.semantic` | LLM-grounded extraction (`kb describe`): NL descriptions of routes/entities with a deterministic sub-property gate (`grounding.validate_claims`) that drops any claim not backed by the artifact's spans. Separate key-gated pass; never on `index`. |
+| `kb.daemon.cli` | The `kb` CLI: `index`, `migrate`, `embed`, `describe`, `serve` (MCP), and `introspect` — all functional. |
+| `kb.eval` | Nine HARD CI gates (identity reproducibility, adversarial grounding, Tier-1 import oracle, Tier-1 API oracle, Tier-1 entities oracle, Tier-3 knowledge-vs-RAG recall, Tier-4 one-hop invalidation, invariants, semantic grounding floor) plus the supporting MCP / embed / store suite. |
 
 Core tables: `commit_ref`, `branch_ref`, `code_span`, `span_occurrence`, `artifact` (now with `embedding vector(384)` + `embedding_model_id`), `artifact_derived_from`, `snapshot_entry`, and `rag_chunk` (the baseline arm).
 
@@ -220,10 +230,10 @@ Core tables: `commit_ref`, `branch_ref`, `code_span`, `span_occurrence`, `artifa
 uv sync --extra dev            # venv + install
 uv run ruff check src/kb       # lint
 uv run mypy                    # strict type-check
-uv run pytest src/kb/eval -q   # the eight HARD eval gates
+uv run pytest src/kb/eval -q   # the nine HARD eval gates
 ```
 
-CI (GitHub Actions, workflow **"CI"**, `.github/workflows/ci.yml`) runs ruff, `mypy --strict`, and the eval gates against a `pgvector/pgvector:pg17` service (with the embedding model cached). The **eight HARD gates** that block a merge:
+CI (GitHub Actions, workflow **"CI"**, `.github/workflows/ci.yml`) runs ruff, `mypy --strict`, and the eval gates against a `pgvector/pgvector:pg17` service (with the embedding model cached). The **nine HARD gates** that block a merge:
 
 1. **Identity reproducibility** — formatting / comment / docstring / location changes must NOT change `span_id`; a rename MUST. Pure identity core, no database.
 2. **Adversarial grounding** — an ungrounded artifact is rejected by *both* layers (the app's `GroundingError` and the DB's deferred `artifact_grounded_check` trigger); a genuinely grounded artifact commits cleanly.
@@ -233,6 +243,7 @@ CI (GitHub Actions, workflow **"CI"**, `.github/workflows/ci.yml`) runs ruff, `m
 6. **Tier-3 knowledge-vs-RAG recall** — knowbase cross-file recall@k == 1.0 for every cross-file question (API contracts **and** domain entities: in each case one artifact already spans both files, so the floor is *structural*, independent of embedding quality); the RAG arm is reported but **never asserted**, so a model bump can't redden CI.
 7. **Tier-4 one-hop invalidation** — a content diff invalidates *exactly* the artifacts whose grounding span changed (set-equality: no over-invalidation, no stale survivors); a version bump invalidates everything.
 8. **Invariants** — zero orphans (every snapshot artifact is grounded), and re-indexing the same SHA yields the identical set of artifact ids.
+9. **Semantic grounding floor** — the LLM-grounded describer's claims are validated against the artifact's own spans by a deterministic sub-property gate; an adversarial fabricated claim is *dropped*, never stored (run on a stub LLM, so it gates without an API key).
 
 The identity rules in `kb.ids` (and `kb.structural`) are **LOCKED**: changing one is a breaking change, gated behind a `NORMALIZATION_VERSION` / `extractor_version` bump so existing digests are invalidated rather than silently colliding.
 
diff --git a/src/kb/daemon/cli.py b/src/kb/daemon/cli.py
index 812e301..76e7359 100644
--- a/src/kb/daemon/cli.py
+++ b/src/kb/daemon/cli.py
@@ -77,6 +77,36 @@ def embed(
         engine.dispose()
 
 
+@app.command()
+def describe(
+    db_url: str | None = typer.Option(None, "--db-url", help="Postgres URL (else KB_DB_URL env)."),
+    sha: str | None = typer.Option(None, "--sha", help="Snapshot sha to describe (def: latest)."),
+) -> None:
+    """LLM-grounded NL descriptions for a snapshot (separate key-gated pass; never on `index`)."""
+    from kb.extract.semantic.describe import describe_snapshot  # lazy: keeps the LLM off other cmds
+    from kb.llm.providers import default_llm_provider, has_llm_key
+    from kb.store.queries import latest_ingested_sha
+
+    if not has_llm_key():
+        typer.echo("no LLM API key (set ANTHROPIC_API_KEY or OPENAI_API_KEY)")
+        raise typer.Exit(code=1)
+    engine = make_engine(db_url)
+    try:
+        with engine.connect() as conn:
+            target = sha or latest_ingested_sha(conn)
+        if target is None:
+            typer.echo("no snapshot to describe")
+            raise typer.Exit(code=1)
+        provider = default_llm_provider()
+        result = describe_snapshot(engine, target, provider)
+        typer.echo(
+            f"described {result.described} artifacts (dropped {result.dropped_claims} claims) "
+            f"@ {target[:12]} with {provider.model_id}"
+        )
+    finally:
+        engine.dispose()
+
+
 @app.command()
 def serve(
     db_url: str | None = typer.Option(None, "--db-url", help="Postgres URL (else KB_DB_URL env)."),
diff --git a/src/kb/embed/text.py b/src/kb/embed/text.py
index d64670c..f238c5b 100644
--- a/src/kb/embed/text.py
+++ b/src/kb/embed/text.py
@@ -35,4 +35,7 @@ def embed_text(kind: str, payload: dict[str, Any]) -> str:
             + " ".join(str(r.get("name", "")) for r in payload.get("related_entities", [])),
         ]
         return " ".join(p for p in parts if p.strip())
+    if kind == "description":
+        claims = " ".join(str(c.get("text", "")) for c in payload.get("claims", []))
+        return f"{payload.get('summary', '')} {claims}".strip() or head
     return head
diff --git a/src/kb/eval/semantic_grounding_test.py b/src/kb/eval/semantic_grounding_test.py
new file mode 100644
index 0000000..2e09cea
--- /dev/null
+++ b/src/kb/eval/semantic_grounding_test.py
@@ -0,0 +1,91 @@
+"""HARD GATE — semantic floor (DESIGN.md §9): LLM-grounded claims are span-validated.
+
+Uses a STUB LLM provider (fixed output: one real symbol + one fabricated one), so the
+anti-hallucination invariant of the LLM layer is enforced **deterministically and without an API
+key** — it gates in normal CI. The describer must store only the grounded claim and drop the
+fabricated one; the description is grounded (role `describes`) on its target's spans and served as
+`llm_grounded`.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from sqlalchemy import Engine, select
+
+from kb.daemon.pipeline import index_commit
+from kb.eval._fixtures import make_git_repo
+from kb.eval.tier1_api_test import FILES
+from kb.extract.deterministic.fastapi_contract import FastAPIExtractor
+from kb.extract.semantic.describe import describe_snapshot
+from kb.extract.semantic.grounding import validate_claims
+from kb.store import models as m
+from kb.store.queries import provenance_for_artifact
+
+REAL = "OrderOut"  # appears in the fixture (schemas.py + the routes' response_model)
+FAKE = "nonexistent_symbol_xyz"  # appears nowhere -> must be dropped as a hallucination
+
+
+class _StubProvider:
+    """Deterministic stand-in for an LLMProvider: always returns one real + one fabricated claim."""
+
+    model_id = "stub:describe-test"
+
+    def complete(self, system: str, user: str, *, max_tokens: int = 1024) -> str:
+        return json.dumps(
+            {
+                "summary": "Stub description.",
+                "claims": [
+                    {"text": f"returns {REAL}", "symbol": REAL},
+                    {"text": "calls a fabricated helper", "symbol": FAKE},
+                ],
+            }
+        )
+
+
+def _index(engine: Engine, tmp_path: Path) -> str:
+    sha = make_git_repo(tmp_path, [FILES])[0]
+    index_commit(
+        engine, str(tmp_path), sha, extractors=[FastAPIExtractor()], first_party_root="src"
+    )
+    return sha
+
+
+def test_validator_drops_fabricated_symbol() -> None:
+    claims = [{"text": "a", "symbol": REAL}, {"text": "b", "symbol": FAKE}]
+    kept, dropped = validate_claims(
+        claims, ["class OrderOut(BaseModel):\n    id: int\n"], ["app.schemas.OrderOut"]
+    )
+    assert [c["symbol"] for c in kept] == [REAL]
+    assert [c["symbol"] for c in dropped] == [FAKE]
+
+
+def test_describe_stores_only_grounded_claims(engine: Engine, tmp_path: Path) -> None:
+    sha = _index(engine, tmp_path)
+    result = describe_snapshot(engine, sha, _StubProvider())
+
+    assert result.described > 0
+    assert result.dropped_claims > 0  # the fabricated claim was dropped on every artifact
+
+    join = m.snapshot_entry.join(
+        m.artifact, m.artifact.c.artifact_id == m.snapshot_entry.c.artifact_id
+    )
+    with engine.connect() as conn:
+        rows = conn.execute(
+            select(
+                m.artifact.c.logical_key,
+                m.artifact.c.payload,
+                m.artifact.c.is_deterministic,
+            )
+            .select_from(join)
+            .where(m.snapshot_entry.c.sha == sha, m.artifact.c.kind == "description")
+        ).all()
+        assert rows
+        for row in rows:
+            symbols = [c["symbol"] for c in row.payload["claims"]]
+            assert REAL in symbols  # the grounded claim survives
+            assert FAKE not in symbols  # adversarial: the hallucinated claim is never stored
+            assert row.is_deterministic is False  # surfaced as llm_grounded
+            prov_files = {p.file_path for p in provenance_for_artifact(conn, sha, row.logical_key)}
+            assert prov_files  # grounded on its target's spans (>= 1 file)
diff --git a/src/kb/extract/semantic/__init__.py b/src/kb/extract/semantic/__init__.py
new file mode 100644
index 0000000..5bff449
--- /dev/null
+++ b/src/kb/extract/semantic/__init__.py
@@ -0,0 +1,7 @@
+"""LLM-grounded semantic extraction (DESIGN.md §4, §9) — the first model-backed knowledge layer.
+
+Runs as a separate, key-gated pass (``kb describe``), never on the deterministic ``kb index`` path.
+Every produced claim is validated against the artifact's own grounding spans by a deterministic
+sub-property gate (``grounding.validate_claims``); unvalidated claims are dropped — the
+anti-hallucination invariant, enforced without a model in the loop so it is gateable in CI.
+"""
diff --git a/src/kb/extract/semantic/describe.py b/src/kb/extract/semantic/describe.py
new file mode 100644
index 0000000..51e29a7
--- /dev/null
+++ b/src/kb/extract/semantic/describe.py
@@ -0,0 +1,116 @@
+"""LLM-grounded NL descriptions over a snapshot — a separate, key-gated pass (DESIGN.md §4, §9).
+
+For each ``api_route`` / ``entity`` artifact, an LLM writes a short summary plus structured claims;
+each claim is validated against the artifact's own grounding spans (``grounding.validate_claims``),
+unvalidated claims are dropped, and — if anything survives — a ``description`` artifact is stored
+grounded on the SAME spans (role ``describes``, ``is_deterministic=False``). Never on the
+``kb index`` path. Idempotent per (model, prompt): ``artifact_id`` folds in model_id + prompt.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from typing import Any
+
+from sqlalchemy import Engine, select
+
+from kb.extract.base import DerivedEdge, ExtractedArtifact
+from kb.extract.semantic.grounding import validate_claims
+from kb.llm.providers import LLMProvider
+from kb.store import models as m
+from kb.store.queries import spans_for_artifact
+from kb.store.writer import write_grounded_artifact, write_snapshot_entry
+
+EXTRACTOR_ID = "llm_describe"
+EXTRACTOR_VERSION = "1"
+PROMPT_VERSION = "1"
+DESCRIBE_KINDS = ("api_route", "entity")
+
+_SYSTEM = (
+    "You describe a code artifact using ONLY the provided source spans. Respond with STRICT JSON "
+    'and nothing else: {"summary": "<= 2 sentences", "claims": [{"text": "...", "symbol": '
+    '"<one identifier that appears verbatim in the code>"}]}. Every claim must cite a real '
+    "identifier from the code (a function, class, field, or parameter name); never invent names."
+)
+
+
+@dataclass(frozen=True)
+class DescribeResult:
+    sha: str
+    described: int
+    dropped_claims: int
+
+
+def describe_snapshot(engine: Engine, sha: str, provider: LLMProvider) -> DescribeResult:
+    """Generate grounded descriptions for the snapshot's api_route / entity artifacts."""
+    join = m.snapshot_entry.join(
+        m.artifact, m.artifact.c.artifact_id == m.snapshot_entry.c.artifact_id
+    )
+    described = 0
+    dropped_total = 0
+    with engine.begin() as conn:
+        targets = conn.execute(
+            select(m.artifact.c.logical_key, m.artifact.c.kind, m.artifact.c.payload)
+            .select_from(join)
+            .where(
+                m.snapshot_entry.c.sha == sha,
+                m.artifact.c.kind.in_(list(DESCRIBE_KINDS)),
+            )
+            .order_by(m.artifact.c.logical_key)
+        ).all()
+        for target in targets:
+            spans = spans_for_artifact(conn, sha, target.logical_key)
+            if not spans:
+                continue
+            prompt = _build_prompt(target.kind, target.payload, spans)
+            data = _parse_json(provider.complete(_SYSTEM, prompt, max_tokens=600))
+            if data is None:
+                continue
+            raw_claims = [c for c in data.get("claims", []) if isinstance(c, dict)]
+            kept, dropped = validate_claims(
+                raw_claims, [s.raw_text for s in spans], [s.fq_symbol_path for s in spans]
+            )
+            dropped_total += len(dropped)
+            if not kept:
+                continue  # nothing grounded survives -> store nothing (anti-hallucination)
+            artifact = ExtractedArtifact(
+                kind="description",
+                logical_key=f"desc:{target.logical_key}",
+                payload={
+                    "target_logical_key": target.logical_key,
+                    "target_kind": target.kind,
+                    "summary": str(data.get("summary", ""))[:500],
+                    "claims": kept,
+                    "dropped_claims": len(dropped),
+                },
+                derived_from=[DerivedEdge(s.span_id, "describes") for s in spans],
+                extractor_id=EXTRACTOR_ID,
+                extractor_version=EXTRACTOR_VERSION,
+                prompt_version=PROMPT_VERSION,
+                model_id=provider.model_id,
+                is_deterministic=False,
+                confidence=len(kept) / (len(kept) + len(dropped)),
+            )
+            artifact_id = write_grounded_artifact(conn, artifact)
+            write_snapshot_entry(conn, sha, artifact.logical_key, artifact_id)
+            described += 1
+    return DescribeResult(sha=sha, described=described, dropped_claims=dropped_total)
+
+
+def _build_prompt(kind: str, payload: dict[str, Any], spans: list[Any]) -> str:
+    facts = json.dumps(payload, default=str)[:800]
+    body = "\n\n".join(f"# {s.fq_symbol_path}\n{s.raw_text}" for s in spans)
+    return f"Artifact kind: {kind}\nKnown facts: {facts}\n\nSource spans:\n{body}"
+
+
+def _parse_json(raw: str) -> dict[str, Any] | None:
+    match = re.search(r"\{.*\}", raw, re.S)
+    if match is None:
+        return None
+    try:
+        data = json.loads(match.group(0))
+    except json.JSONDecodeError:
+        return None
+    return data if isinstance(data, dict) else None
diff --git a/src/kb/extract/semantic/grounding.py b/src/kb/extract/semantic/grounding.py
new file mode 100644
index 0000000..196eebf
--- /dev/null
+++ b/src/kb/extract/semantic/grounding.py
@@ -0,0 +1,53 @@
+"""Deterministic sub-property gate for LLM-grounded claims (DESIGN.md §9 — the semantic hard floor).
+
+A claim is kept only if the code identifier it cites actually appears in the artifact's grounding
+spans (their source text or a fully-qualified symbol path). Everything else is dropped. No model is
+in the loop here, so the anti-hallucination invariant is enforced deterministically and gated in CI
+without an API key.
+"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Sequence
+from typing import Any
+
+_IDENT = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
+
+
+def _tokens(texts: Sequence[str]) -> set[str]:
+    out: set[str] = set()
+    for text in texts:
+        out.update(_IDENT.findall(text))
+    return out
+
+
+def _innermost_identifier(symbol: str) -> str | None:
+    """The innermost identifier of a cited symbol (``shop.models.Order`` / ``List[OrderOut]`` -> the
+    last identifier token), so dotted paths and simple generics still match a grounded name."""
+    found = _IDENT.findall(symbol)
+    return found[-1] if found else None
+
+
+def validate_claims(
+    claims: Sequence[dict[str, Any]],
+    span_texts: Sequence[str],
+    fq_paths: Sequence[str],
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Split ``claims`` into ``(kept, dropped)``.
+
+    A claim is kept iff the innermost identifier of its ``symbol`` is a token of the grounding
+    spans' source text or one of their ``fq_symbol_path`` components. A claim with no ``symbol``, or
+    one whose symbol does not appear in the code, is dropped (treated as a hallucination).
+    """
+    grounded = _tokens(span_texts) | _tokens(fq_paths)
+    kept: list[dict[str, Any]] = []
+    dropped: list[dict[str, Any]] = []
+    for claim in claims:
+        symbol = claim.get("symbol")
+        ident = _innermost_identifier(symbol) if isinstance(symbol, str) else None
+        if ident is not None and ident in grounded:
+            kept.append(claim)
+        else:
+            dropped.append(claim)
+    return kept, dropped
diff --git a/src/kb/mcp/records.py b/src/kb/mcp/records.py
index 0a6fa34..327edb7 100644
--- a/src/kb/mcp/records.py
+++ b/src/kb/mcp/records.py
@@ -73,6 +73,9 @@ def summarize(kind: str, payload: dict[str, Any]) -> str:
         framework = payload.get("framework", "?")
         n_fields = len(payload.get("fields", []))
         return f"{payload.get('qualified_name', '?')} ({framework}, {n_fields} fields)"
+    if kind == "description":
+        summary = str(payload.get("summary", "")).strip().splitlines()
+        return summary[0] if summary else f"description of {payload.get('target_logical_key', '?')}"
     return kind
 
 
diff --git a/src/kb/store/queries.py b/src/kb/store/queries.py
index c0e4e09..1d4e896 100644
--- a/src/kb/store/queries.py
+++ b/src/kb/store/queries.py
@@ -192,6 +192,46 @@ def provenance_for_artifact(conn: Connection, sha: str, logical_key: str) -> lis
     return [ProvenanceRow(*row) for row in rows]
 
 
+@dataclass(frozen=True)
+class ArtifactSpanRow:
+    span_id: bytes
+    fq_symbol_path: str
+    raw_text: str  # the span's source text at this sha (input + ground-truth for the LLM describer)
+
+
+def spans_for_artifact(conn: Connection, sha: str, logical_key: str) -> list[ArtifactSpanRow]:
+    """The grounding spans of the ``(sha, logical_key)`` artifact, with id + fq path + source text.
+
+    Feeds the LLM-grounded describer: the spans are both the prompt context and the deterministic
+    ground truth its claims are validated against (DESIGN.md §9).
+    """
+    join = (
+        m.snapshot_entry.join(
+            m.artifact_derived_from,
+            m.artifact_derived_from.c.artifact_id == m.snapshot_entry.c.artifact_id,
+        )
+        .join(m.code_span, m.code_span.c.span_id == m.artifact_derived_from.c.span_id)
+        .join(
+            m.span_occurrence,
+            and_(
+                m.span_occurrence.c.span_id == m.artifact_derived_from.c.span_id,
+                m.span_occurrence.c.sha == sha,
+            ),
+        )
+    )
+    rows = conn.execute(
+        select(
+            m.code_span.c.span_id,
+            m.code_span.c.fq_symbol_path,
+            m.span_occurrence.c.raw_text,
+        )
+        .select_from(join)
+        .where(m.snapshot_entry.c.sha == sha, m.snapshot_entry.c.logical_key == logical_key)
+        .order_by(m.code_span.c.fq_symbol_path)
+    ).all()
+    return [ArtifactSpanRow(r.span_id, r.fq_symbol_path, r.raw_text) for r in rows]
+
+
 def _like_literal(value: str, suffix: str) -> str:
     escaped = value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
     return escaped + suffix