diff --git a/SOURCES_CONTRACT.md b/SOURCES_CONTRACT.md
new file mode 100644
index 0000000..553aaae
--- /dev/null
+++ b/SOURCES_CONTRACT.md
@@ -0,0 +1,147 @@
+# `sources/` Input Contract
+
+This document defines the **stable input contract** for the `sources/` directory:
+the format a programmatic producer (for example, **a downstream rule importer**) writes to drive
+`llmwiki compile` without going through the interactive `llmwiki ingest` command.
+
+Anything that can write a markdown file with the frontmatter described here can feed
+the compiler. The compiler treats `sources/` as the single source of truth for what
+to compile; it never reaches back to the original URL/file.
+
+> Stability: the **field names, semantics, slug/filename rules, and
+> `MAX_SOURCE_CHARS` limit below are stable.** New optional frontmatter fields may be
+> added over time; existing fields will not change meaning without a contract revision.
+
+---
+
+## File layout
+
+Each source is one UTF-8 markdown file in `sources/`:
+
+```
+sources/
+  retrieval-augmented-generation.md
+  some-design-doc.md
+```
+
+A source file is:
+
+```markdown
+---
+title: Retrieval-Augmented Generation
+source: https://example.com/rag
+ingestedAt: 2026-05-31T12:00:00.000Z
+sourceType: web
+---
+
+<body markdown — the actual content to compile>
+```
+
+The body is everything after the closing `---` of the YAML frontmatter block. The
+compiler hashes the **entire file** (frontmatter + body) with SHA-256 to detect
+changes; see [Change detection](#change-detection-incremental-compile).
+
+Only files ending in `.md` are scanned. Other files in `sources/` are ignored.
+
+---
+
+## Frontmatter fields
+
+The frontmatter is a single YAML block delimited by `---` lines at the very top of
+the file.
+
+### Required
+
+| Field        | Type   | Meaning |
+|--------------|--------|---------|
+| `title`      | string | Human-readable title. Drives the **filename slug** (see below) and the wiki page title. Must contain at least one letter or digit, otherwise the slug is empty and the write is rejected. |
+| `source`     | string | Source identity (URL, file path, or any stable producer-chosen URI). Used for re-ingest idempotency and basename-collision disambiguation. For a git-log producer this would be e.g. a commit URL or `repo@sha:path`. |
+| `ingestedAt` | string | ISO-8601 timestamp of when the source was captured. |
+
+### Optional
+
+| Field          | Type    | Meaning |
+|----------------|---------|---------|
+| `sourceType`   | string  | Origin tag. One of `web`, `file`, `image`, `pdf`, `transcript`. Persisted for downstream tooling and human readers. A programmatic producer that does not map cleanly to one of these should pick the closest (`file` is the safe generic default). |
+| `truncated`    | boolean | `true` when the body was truncated to fit `MAX_SOURCE_CHARS`. Omit when the body is complete. |
+| `originalChars`| number  | Original character count **before** truncation. Set this together with `truncated: true` so consumers can see how much was dropped. Omit when `truncated` is absent. |
+
+Producers may include additional YAML keys; the compiler ignores unrecognized
+frontmatter fields rather than failing. Do not rely on unspecified fields surviving
+into the compiled output.
+
+---
+
+## Filename and slug rules
+
+The filename a producer chooses should match how `llmwiki ingest` would name it, so
+re-ingest stays idempotent:
+
+1. **Slug** is derived from `title` by lowercasing, transliterating to ASCII-ish
+   kebab-case, and stripping characters that are not letters/digits/hyphens. A title
+   that slugifies to the empty string (e.g. pure punctuation/emoji) is **rejected** —
+   choose a title with at least one letter or digit.
+2. The default filename is `<slug>.md`.
+3. **Basename collisions:** if `<slug>.md` already exists for a *different* `source`,
+   the disambiguated name is `<slug>-<8-hex>.md`, where `<8-hex>` is the first 8 hex
+   chars of `sha256(source)`. Re-writing the *same* `source` overwrites `<slug>.md`
+   in place (the existing file's frontmatter `source` is consulted first), so a
+   producer that re-emits an updated version of the same source must keep `source`
+   identical to overwrite rather than fork.
+
+A producer that does not want to replicate the slug algorithm may simply write a
+stable `<producer-chosen-name>.md` of its own choosing — the compiler keys change
+detection off the **filename + file hash**, not the slug. The slug rules above only
+matter for staying byte-compatible with `llmwiki ingest` output.
+
+---
+
+## Size limit: `MAX_SOURCE_CHARS`
+
+The compiler-facing size ceiling is **`MAX_SOURCE_CHARS = 100_000` characters** of
+body content (see `src/utils/constants.ts`). Producers should:
+
+- Truncate the body to at most `MAX_SOURCE_CHARS` characters.
+- When truncating, set `truncated: true` and `originalChars: <pre-truncation length>`
+  in the frontmatter.
+
+Very short bodies (under `MIN_SOURCE_CHARS = 50`) compile but are low-signal; the
+interactive ingester warns on them. A programmatic producer should avoid emitting
+near-empty sources.
+
+---
+
+## Change detection (incremental compile)
+
+`llmwiki compile` is incremental. It records each source file's SHA-256 hash in
+`.llmwiki/state.json` under `sources[<filename>] = { hash, concepts, compiledAt }`.
+On the next compile, a source whose hash is unchanged is **skipped**.
+
+Consequences for a producer:
+
+- To trigger recompilation of a source, change its file contents (frontmatter or
+  body) so the hash changes.
+- Writing a byte-identical file is a no-op for that source.
+- Deleting a source file marks its owned wiki page(s) orphaned on the next compile.
+
+These same per-source hashes are surfaced in the JSON export as each page's
+`sourceHashes`, and the export envelope carries `modelId` / `promptVersion`, so a
+downstream consumer can audit which source bytes and which model/prompt produced a
+page (the W4 provenance stamp).
+
+---
+
+## Future: a `git`-log adapter
+
+W1 documents the contract only; no new connector ships with it. The natural next
+connector for a downstream rule importer is a **git-log adapter** that walks commit history and
+emits one `sources/*.md` per commit (or per changed file), with:
+
+- `title` = commit subject (or `path @ short-sha`),
+- `source` = a stable commit/blob URI,
+- `ingestedAt` = commit timestamp,
+- `sourceType: file`,
+- body = the commit message and/or diff hunk, truncated to `MAX_SOURCE_CHARS`.
+
+Such an adapter is purely a producer of files in this format; it requires no compiler
+changes because it targets this stable contract.
diff --git a/src/cli.ts b/src/cli.ts
index 1038336..705ece2 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -29,6 +29,7 @@ import reviewListCommand from "./commands/review-list.js";
 import reviewShowCommand from "./commands/review-show.js";
 import reviewApproveCommand from "./commands/review-approve.js";
 import reviewRejectCommand from "./commands/review-reject.js";
+import { registerRulesCommand } from "./commands/rules-register.js";
 import nextCommand from "./commands/next.js";
 import quickstartCommand, { type QuickstartOptions } from "./commands/quickstart.js";
 import contextCommand, { type ContextCommandOptions } from "./commands/context.js";
@@ -160,6 +161,8 @@ reviewCommand
     }
   });
 
+registerRulesCommand(program, requireProvider);
+
 program
   .command("query <question>")
   .description("Ask a question against the wiki")
diff --git a/src/commands/rules-register.ts b/src/commands/rules-register.ts
new file mode 100644
index 0000000..04564c7
--- /dev/null
+++ b/src/commands/rules-register.ts
@@ -0,0 +1,96 @@
+/**
+ * Commander registration for `llmwiki rules ...`.
+ *
+ * Keeping the rule-pipeline command tree outside `src/cli.ts` prevents the
+ * entrypoint from becoming the dumping ground for every nested command while
+ * leaving the actual rule actions in `commands/rules.ts`.
+ */
+
+import type { Command } from "commander";
+import {
+  rulesApproveCommand,
+  rulesExportCommand,
+  rulesExtractCommand,
+  rulesListCommand,
+  rulesRejectCommand,
+} from "./rules.js";
+
+/** Provider guard injected by the CLI entrypoint. */
+type RequireProvider = () => void;
+
+/**
+ * Register the `rules` command group and its subcommands.
+ * @param program - Root Commander program.
+ * @param requireProvider - CLI provider guard for extraction.
+ */
+export function registerRulesCommand(program: Command, requireProvider: RequireProvider): void {
+  const rulesCommand = program
+    .command("rules")
+    .description(
+      "Extract, review, and export machine-actionable RuleCandidate records for a downstream rule importer",
+    );
+
+  registerExtract(rulesCommand, requireProvider);
+  registerList(rulesCommand);
+  registerApprove(rulesCommand);
+  registerReject(rulesCommand);
+  registerExport(rulesCommand);
+}
+
+/** Register `rules extract`. */
+function registerExtract(rulesCommand: Command, requireProvider: RequireProvider): void {
+  rulesCommand
+    .command("extract")
+    .description("Extract rule candidates from changed sources (writes .llmwiki/rule-candidates/)")
+    .action(async () =>
+      runRulesAction(async () => {
+        requireProvider();
+        await rulesExtractCommand();
+      }),
+    );
+}
+
+/** Register `rules list`. */
+function registerList(rulesCommand: Command): void {
+  rulesCommand
+    .command("list")
+    .description("List pending rule candidates")
+    .action(async () => runRulesAction(() => rulesListCommand()));
+}
+
+/** Register `rules approve`. */
+function registerApprove(rulesCommand: Command): void {
+  rulesCommand
+    .command("approve <id>")
+    .description("Approve a rule candidate (status -> approved)")
+    .action(async (id: string) => runRulesAction(() => rulesApproveCommand(id)));
+}
+
+/** Register `rules reject`. */
+function registerReject(rulesCommand: Command): void {
+  rulesCommand
+    .command("reject <id>")
+    .description("Reject a rule candidate (status -> rejected, archived)")
+    .action(async (id: string) => runRulesAction(() => rulesRejectCommand(id)));
+}
+
+/** Register `rules export`. */
+function registerExport(rulesCommand: Command): void {
+  rulesCommand
+    .command("export")
+    .description("Emit rule candidates as a JSON array for the rule importer (dist/exports/rule-candidates.json)")
+    .option("--scope <scope>", "approved (default), proposed, or all")
+    .action(async (options: { scope?: string }) =>
+      runRulesAction(() => rulesExportCommand(options)),
+    );
+}
+
+/** Shared CLI error wrapper for the rule command group. */
+async function runRulesAction(work: () => Promise<void>): Promise<void> {
+  try {
+    await work();
+  } catch (err) {
+    console.error(`\x1b[31mError:\x1b[0m ${err instanceof Error ? err.message : err}`);
+    process.exit(1);
+  }
+}
diff --git a/src/commands/rules.ts b/src/commands/rules.ts
new file mode 100644
index 0000000..d51b3ee
--- /dev/null
+++ b/src/commands/rules.ts
@@ -0,0 +1,181 @@
+/**
+ * Commander actions for `llmwiki rules …` (rule pipeline).
+ *
+ * The rule-candidate lifecycle mirrors the concept review flow but emits
+ * structured `RuleCandidate` records for a downstream rule importer instead of prose pages:
+ *
+ *   rules extract            — LLM-extract rules from changed sources into
+ *                              .llmwiki/rule-candidates/<id>.json (status proposed)
+ *   rules list               — list pending candidates
+ *   rules approve <id>       — flip status → approved (in place)
+ *   rules reject  <id>       — flip status → rejected, archive out of pending
+ *   rules export [--scope]   — write the candidate array as JSON for the rule importer
+ *
+ * Mutations run under `.llmwiki/lock` to serialize against a concurrent
+ * extract/approve/reject, matching the concept review lock discipline.
+ */
+
+import { existsSync } from "fs";
+import path from "path";
+import { atomicWrite } from "../utils/markdown.js";
+import { acquireLock, releaseLock } from "../utils/lock.js";
+import * as output from "../utils/output.js";
+import { SOURCES_DIR } from "../utils/constants.js";
+import {
+  extractRuleCandidates,
+  type RuleExtractionResult,
+} from "../compiler/rule-extractor.js";
+import {
+  archiveRuleCandidate,
+  readRuleCandidate,
+  setRuleCandidateStatus,
+} from "../compiler/rule-candidates.js";
+import { candidateFileId } from "../utils/candidate-store.js";
+import {
+  RULE_EXPORT_SCOPES,
+  buildRuleCandidatesJson,
+  collectRuleCandidatesForExport,
+  type RuleExportScope,
+} from "../export/rule-candidates-json.js";
+
+/** Default output path (relative to root) for `rules export`. */
+const RULE_EXPORT_PATH = "dist/exports/rule-candidates.json";
+
+/**
+ * Extract rule candidates from changed sources. Requires the sources/ folder
+ * and an available provider (the caller's CLI guard enforces the latter).
+ */
+export async function rulesExtractCommand(): Promise<void> {
+  const root = process.cwd();
+  if (!existsSync(path.join(root, SOURCES_DIR))) {
+    output.status("!", output.warn("No sources found. Run `llmwiki ingest <url>` first."));
+    return;
+  }
+
+  const locked = await acquireLock(root);
+  if (!locked) {
+    output.status("!", output.error("Could not acquire lock. Try again later."));
+    process.exitCode = 1;
+    return;
+  }
+  try {
+    output.header("Extracting rule candidates");
+    reportExtraction(await extractRuleCandidates(root));
+  } finally {
+    await releaseLock(root);
+  }
+}
+
+/** Print extraction notes, each candidate, and a one-line summary. */
+function reportExtraction(result: RuleExtractionResult): void {
+  for (const note of result.notes) output.status("i", output.dim(note));
+  for (const candidate of result.candidates) {
+    output.status("?", output.info(`Rule candidate ready: ${candidate.id}`));
+  }
+  output.status(
+    "✓",
+    output.success(
+      `${result.candidates.length} rule candidate(s) from ${result.processedSources.length} source(s).`,
+    ),
+  );
+}
+
+/** List pending rule candidates with their proposed-rule summary line. */
+export async function rulesListCommand(): Promise<void> {
+  const pending = await collectRuleCandidatesForExport(process.cwd(), "proposed");
+  if (pending.length === 0) {
+    output.status("i", output.dim("No pending rule candidates."));
+    return;
+  }
+  for (const c of pending) {
+    output.status(
+      "?",
+      output.info(`${c.id} [${c.confidence}] — ${c.proposed.title}`),
+    );
+  }
+}
+
+/** Approve a candidate by flipping its status to "approved" under the lock. */
+export async function rulesApproveCommand(id: string): Promise<void> {
+  await mutateUnderLock(id, async (root, fileId) => {
+    const updated = await setRuleCandidateStatus(root, fileId, "approved");
+    if (!updated) return false;
+    output.status("+", output.success(`Approved rule candidate ${updated.id}.`));
+    return true;
+  });
+}
+
+/** Reject a candidate: flip status to "rejected" then archive it out of pending. */
+export async function rulesRejectCommand(id: string): Promise<void> {
+  await mutateUnderLock(id, async (root, fileId) => {
+    const updated = await setRuleCandidateStatus(root, fileId, "rejected");
+    if (!updated) return false;
+    await archiveRuleCandidate(root, fileId);
+    output.status("-", output.warn(`Rejected rule candidate ${updated.id} — archived.`));
+    return true;
+  });
+}
+
+/**
+ * Export rule candidates as a JSON array for the rule importer. Defaults to approved-only;
+ * `--scope proposed|all` widens the selection. Writes to
+ * dist/exports/rule-candidates.json.
+ */
+export async function rulesExportCommand(options: { scope?: string } = {}): Promise<void> {
+  const root = process.cwd();
+  const scope = resolveScope(options.scope);
+  const candidates = await collectRuleCandidatesForExport(root, scope);
+  const outPath = path.join(root, RULE_EXPORT_PATH);
+  await atomicWrite(outPath, buildRuleCandidatesJson(candidates));
+  output.status(
+    "+",
+    output.success(`Exported ${candidates.length} rule candidate(s) → ${output.source(outPath)}`),
+  );
+}
+
+/** Validate the --scope flag, defaulting to "approved". Throws on bad input. */
+function resolveScope(raw: string | undefined): RuleExportScope {
+  if (!raw) return "approved";
+  if (!(RULE_EXPORT_SCOPES as readonly string[]).includes(raw)) {
+    throw new Error(
+      `Unknown --scope value "${raw}". Valid: ${RULE_EXPORT_SCOPES.join(", ")}`,
+    );
+  }
+  return raw as RuleExportScope;
+}
+
+/**
+ * Shared approve/reject skeleton: pre-check the candidate exists, acquire the
+ * lock, re-read under it (TOCTOU guard), run the mutation, release. Sets exit
+ * code 1 when the candidate is missing at either check.
+ */
+async function mutateUnderLock(
+  id: string,
+  underLock: (root: string, fileId: string) => Promise<boolean>,
+): Promise<void> {
+  const root = process.cwd();
+  const fileId = candidateFileId(id);
+
+  const preCheck = await readRuleCandidate(root, fileId);
+  if (!preCheck) {
+    output.status("!", output.error(`Rule candidate not found: ${id}`));
+    process.exitCode = 1;
+    return;
+  }
+
+  const locked = await acquireLock(root);
+  if (!locked) {
+    output.status("!", output.error("Could not acquire lock. Try again later."));
+    process.exitCode = 1;
+    return;
+  }
+  try {
+    const ok = await underLock(root, fileId);
+    if (!ok) {
+      output.status("!", output.error(`Rule candidate ${id} was removed during review.`));
+      process.exitCode = 1;
+    }
+  } finally {
+    await releaseLock(root);
+  }
+}
diff --git a/src/compiler/candidates.ts b/src/compiler/candidates.ts
index eb0d2d1..1493c54 100644
--- a/src/compiler/candidates.ts
+++ b/src/compiler/candidates.ts
@@ -12,11 +12,15 @@
  * called again at approval time.
  */
 
-import { readdir, rename, unlink, writeFile, mkdir } from "fs/promises";
+import { unlink } from "fs/promises";
 import { existsSync } from "fs";
 import path from "path";
 import { randomBytes } from "crypto";
 import { atomicWrite, safeReadFile } from "../utils/markdown.js";
+import {
+  listCandidateFileIds,
+  moveCandidateToArchive,
+} from "../utils/candidate-store.js";
 import * as output from "../utils/output.js";
 import {
   CANDIDATES_DIR,
@@ -189,13 +193,9 @@ function isValidCandidate(value: unknown): value is ReviewCandidate {
  */
 export async function listCandidates(root: string): Promise<ReviewCandidate[]> {
   const dir = path.join(root, CANDIDATES_DIR);
-  if (!existsSync(dir)) return [];
-
-  const entries = await readdir(dir, { withFileTypes: true });
+  const ids = await listCandidateFileIds(dir);
   const candidates: ReviewCandidate[] = [];
-  for (const entry of entries) {
-    if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
-    const id = entry.name.slice(0, -CANDIDATE_EXT.length);
+  for (const id of ids) {
     const candidate = await readCandidate(root, id);
     if (candidate) candidates.push(candidate);
   }
@@ -231,18 +231,5 @@ export async function deleteCandidate(root: string, id: string): Promise<boolean
  * @returns True when the candidate was found and archived.
  */
 export async function archiveCandidate(root: string, id: string): Promise<boolean> {
-  const sourcePath = candidatePath(root, id);
-  if (!existsSync(sourcePath)) return false;
-
-  const target = archivePath(root, id);
-  await mkdir(path.dirname(target), { recursive: true });
-  // Copy via writeFile + unlink to support cross-filesystem rename failures.
-  try {
-    await rename(sourcePath, target);
-  } catch {
-    const raw = await safeReadFile(sourcePath);
-    await writeFile(target, raw, "utf-8");
-    await unlink(sourcePath);
-  }
-  return true;
+  return moveCandidateToArchive(candidatePath(root, id), archivePath(root, id));
 }
diff --git a/src/compiler/delta.ts b/src/compiler/delta.ts
new file mode 100644
index 0000000..949b35d
--- /dev/null
+++ b/src/compiler/delta.ts
@@ -0,0 +1,85 @@
+/**
+ * Programmatic incremental compile delta (incremental delta).
+ *
+ * Exposes {@link compileDelta}: a library entry point that runs the normal
+ * hash-gated compile and then returns ONLY the export pages that changed in
+ * that run, instead of the full corpus. A caller (e.g. a downstream rule importer) can
+ * poll this after each ingest and ship just the deltas to its downstream
+ * store, without diffing the whole wiki itself.
+ *
+ * The change set is driven entirely by the compiler's existing
+ * SHA-256-over-source change detection (`detectChanges` / `.llmwiki/state.json`
+ * via `src/compiler/hasher.ts`). `compileAndReport` already returns the slugs
+ * it (re)wrote this run on `CompileResult.pages`; we intersect those with the
+ * freshly-collected export pages so the delta carries the same enriched,
+ * provenance-stamped {@link ExportPage} shape the full JSON export emits.
+ *
+ * When nothing changed since the persisted state, `CompileResult.pages` is
+ * empty and the returned delta is empty too — the "unchanged ⇒ empty delta"
+ * contract the W5 test pins.
+ */
+
+import { compileAndReport } from "./index.js";
+import { collectExportPages } from "../export/collect.js";
+import type { ExportPage } from "../export/types.js";
+import type { CompileOptions } from "../utils/types.js";
+
+/** Options for {@link compileDelta}. Pass-through to the compile pipeline. */
+export type CompileDeltaOptions = CompileOptions;
+
+/** Result of an incremental delta compile. */
+export interface CompileDeltaResult {
+  /**
+   * Export pages that changed in this run — new or recompiled. Carries the
+   * full {@link ExportPage} shape (provenance hashes included) so the caller
+   * can persist deltas through the same contract as a full export.
+   */
+  changedPages: ExportPage[];
+  /** Slugs of the changed pages, in collection (title-sorted) order. */
+  changedSlugs: string[];
+  /** Count of sources (re)compiled this run. */
+  compiled: number;
+  /** Count of unchanged sources skipped this run. */
+  skipped: number;
+  /** Count of sources whose pages were orphaned by deletion this run. */
+  deleted: number;
+  /** Non-fatal errors collected during the compile. */
+  errors: string[];
+}
+
+/**
+ * Run an incremental compile and return only the pages that changed.
+ *
+ * Reuses the compiler's hash-gated change detection: sources whose SHA-256
+ * matches the persisted `.llmwiki/state.json` entry are skipped, so a second
+ * call with an up-to-date state yields an empty `changedPages`. Adding or
+ * editing a source yields exactly that source's page(s) in the delta.
+ *
+ * @param root - Project root directory.
+ * @param options - Optional pipeline overrides (forwarded to compile).
+ * @returns The changed export pages plus run counts.
+ */
+export async function compileDelta(
+  root: string,
+  options: CompileDeltaOptions = {},
+): Promise<CompileDeltaResult> {
+  const result = await compileAndReport(root, options);
+  const changedSlugSet = new Set(result.pages);
+
+  // `result.pages` are concept/seed slugs, all written under wiki/concepts.
+  // Match on (pageDirectory, slug), not bare slug, so a saved query that
+  // happens to share a slug with a changed concept is never mis-included.
+  const allPages = await collectExportPages(root);
+  const changedPages = allPages.filter(
+    (page) => page.pageDirectory === "concepts" && changedSlugSet.has(page.slug),
+  );
+
+  return {
+    changedPages,
+    changedSlugs: changedPages.map((page) => page.slug),
+    compiled: result.compiled,
+    skipped: result.skipped,
+    deleted: result.deleted,
+    errors: result.errors,
+  };
+}
diff --git a/src/compiler/index.ts b/src/compiler/index.ts
index b7e209d..f716292 100644
--- a/src/compiler/index.ts
+++ b/src/compiler/index.ts
@@ -46,6 +46,7 @@ import { resolveLinks } from "./resolver.js";
 import { generateIndex } from "./indexgen.js";
 import { buildBudgetedCombinedContent, type SourceSlice } from "./prompt-budget.js";
 import { addObsidianMeta, generateMOC } from "./obsidian.js";
+import { addModelProvenanceMeta } from "./provenance.js";
 import { updateEmbeddings } from "../utils/embeddings.js";
 import { writeCandidate } from "./candidates.js";
 import {
@@ -700,6 +701,7 @@ async function generateSingleSeedPage(
   };
   const frontmatterFields: Record<string, unknown> = { ...typedFields };
   addObsidianMeta(frontmatterFields, seed.title, []);
+  addModelProvenanceMeta(frontmatterFields);
   const frontmatter = buildFrontmatter(frontmatterFields);
   const error = await writePageIfValid(pagePath, `${frontmatter}\n\n${pageBody}\n`, seed.title);
   return error ? { slug, error } : { slug };
diff --git a/src/compiler/page-renderer.ts b/src/compiler/page-renderer.ts
index 9ddc1a5..73b2767 100644
--- a/src/compiler/page-renderer.ts
+++ b/src/compiler/page-renderer.ts
@@ -18,7 +18,7 @@ import {
 import { callClaude } from "../utils/llm.js";
 import { buildPagePrompt } from "./prompts.js";
 import { addObsidianMeta } from "./obsidian.js";
-import { addProvenanceMeta, reportContradictionWarnings } from "./provenance.js";
+import { addModelProvenanceMeta, addProvenanceMeta, reportContradictionWarnings } from "./provenance.js";
 import { CONCEPTS_DIR } from "../utils/constants.js";
 import type { SchemaConfig } from "../schema/index.js";
 import type { ExtractedConcept } from "../utils/types.js";
@@ -94,6 +94,7 @@ function buildMergedFrontmatter(
   };
   addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
   addProvenanceMeta(frontmatterFields, entry.concept);
+  addModelProvenanceMeta(frontmatterFields);
   return buildFrontmatter(frontmatterFields);
 }
 
diff --git a/src/compiler/prompt-budget.ts b/src/compiler/prompt-budget.ts
index 97d3fbe..b05c1c0 100644
--- a/src/compiler/prompt-budget.ts
+++ b/src/compiler/prompt-budget.ts
@@ -90,6 +90,25 @@ function numberLines(content: string): string {
     .join("\n");
 }
 
+/**
+ * Clip a single source's content to the active prompt budget and prepend line
+ * numbers, so a prompt that asks the model for line spans actually shows the
+ * model numbered lines (and never exceeds the budget). Used by the rule
+ * extractor, which feeds one source per call rather than a merged concept.
+ *
+ * @param file - Source filename, for the truncation warning only.
+ * @param content - Raw source content.
+ * @returns Numbered (and, when over budget, truncated) content.
+ */
+export function budgetAndNumberSource(file: string, content: string): string {
+  const budget = resolvePromptBudgetChars();
+  if (content.length <= budget) {
+    return numberLines(content);
+  }
+  warnTruncation(file, content.length, 1, budget, budget);
+  return numberLines(content.slice(0, budget) + TRUNCATION_MARKER);
+}
+
 /** Render the slice list using the same `--- SOURCE: ---` headers the LLM is taught to read. */
 function formatSlices(slices: SourceSlice[]): string {
   return slices
diff --git a/src/compiler/prompts.ts b/src/compiler/prompts.ts
index ec24948..bb57bfe 100644
--- a/src/compiler/prompts.ts
+++ b/src/compiler/prompts.ts
@@ -24,6 +24,18 @@ function withLangLine(...lines: string[]): string[] {
   return lang ? [...lines, lang] : lines;
 }
 
+/**
+ * Named version of the extraction + page-generation prompt contract.
+ *
+ * Bump this whenever the wording of the extraction tool schema, the
+ * extraction system prompt, or the page-generation prompt changes in a way
+ * that could alter compiled page content. The export provenance stamp
+ * (`promptVersion` in the JSON export envelope) carries this value so a
+ * downstream auditor can distinguish pages produced under different prompt
+ * generations even when the model id is identical. Format is `vMAJOR`.
+ */
+export const PROMPT_VERSION = "v1";
+
 /** Allowed provenance state strings emitted by the LLM tool schema. */
 const PROVENANCE_STATE_VALUES: ProvenanceState[] = [
   "extracted",
diff --git a/src/compiler/provenance.ts b/src/compiler/provenance.ts
index 0d49f33..a5b9735 100644
--- a/src/compiler/provenance.ts
+++ b/src/compiler/provenance.ts
@@ -8,8 +8,24 @@
  */
 
 import * as output from "../utils/output.js";
+import { resolveActiveModelId } from "../utils/provider.js";
+import { PROMPT_VERSION } from "./prompts.js";
 import type { ExtractedConcept } from "../utils/types.js";
 
+/**
+ * Stamp compile-time lineage onto a page's frontmatter: the model id that the
+ * active provider would use and the named prompt-contract version. Written when
+ * the page is (re)generated, so it records the model/prompt that actually
+ * produced the page's current content — unlike an export-time env read, which
+ * can attribute a page to a model that never touched it. Surfaced per-page in
+ * the JSON export (`ExportPage.modelId` / `promptVersion`).
+ * @param fields - Mutable frontmatter record being assembled for a page.
+ */
+export function addModelProvenanceMeta(fields: Record<string, unknown>): void {
+  fields.modelId = resolveActiveModelId();
+  fields.promptVersion = PROMPT_VERSION;
+}
+
 /**
  * Copy provenance metadata fields from an extracted concept onto the
  * frontmatter record, omitting fields the LLM did not provide so existing
diff --git a/src/compiler/rule-candidates.ts b/src/compiler/rule-candidates.ts
new file mode 100644
index 0000000..62e2458
--- /dev/null
+++ b/src/compiler/rule-candidates.ts
@@ -0,0 +1,398 @@
+/**
+ * RuleCandidate persistence for the llmwiki rule-extraction pipeline (rule pipeline).
+ *
+ * Parallel to `candidates.ts` (the concept review queue) but for structured
+ * `RuleCandidate` records. `llmwiki rules extract` writes one JSON file per
+ * candidate under `.llmwiki/rule-candidates/<id>.json`; `rules approve`/`reject`
+ * flip `status` (and archive rejects); `rules export` emits the array the rule importer
+ * consumes. The full candidate is stored on disk so approval is a pure
+ * status flip — the LLM is never called again at approval time.
+ *
+ * Candidate JSON is the canonical the rule importer import shape: camelCase keys, tagged
+ * evidence, lowercase status/confidence. Do not reshape it for local use.
+ */
+
+import path from "path";
+import { createHash } from "node:crypto";
+import { atomicWrite, safeReadFile, slugify } from "../utils/markdown.js";
+import {
+  CANDIDATE_JSON_EXT,
+  candidateFileId,
+  listCandidateFileIds,
+  moveCandidateToArchive,
+} from "../utils/candidate-store.js";
+import {
+  RULE_CANDIDATES_DIR,
+  RULE_CANDIDATES_ARCHIVE_DIR,
+} from "../utils/constants.js";
+import type {
+  EvidenceRef,
+  RuleCandidate,
+  RuleConfidence,
+  RuleProvenance,
+  RuleStatus,
+} from "../utils/rule-types.js";
+
+/** Allowed confidence values, used by the on-disk validity guard. */
+const CONFIDENCE_VALUES: readonly RuleConfidence[] = ["low", "medium", "high"];
+
+/** Allowed status values, used by the on-disk validity guard. */
+const STATUS_VALUES: readonly RuleStatus[] = ["proposed", "approved", "rejected"];
+
+/** Runtime evidence shape checker for a tagged evidence variant. */
+type EvidenceShapeChecker = (ref: Record<string, unknown>) => string | null;
+
+/** Absolute path to a rule candidate's JSON file. */
+function ruleCandidatePath(root: string, id: string): string {
+  return path.join(root, RULE_CANDIDATES_DIR, `${id}${CANDIDATE_JSON_EXT}`);
+}
+
+/** Absolute path to the archived JSON file for a rejected rule candidate. */
+function ruleArchivePath(root: string, id: string): string {
+  return path.join(root, RULE_CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_JSON_EXT}`);
+}
+
+/** the rule importer contract caps (mirrored from the rule-import contract rule_candidate_validation.rs). */
+const CATEGORY_CAP = 64;
+const TITLE_CAP = 256;
+const PREDICATE_CAP = 512;
+const EVIDENCE_REF_CAP = 1024;
+const MAX_EVIDENCE_PER_CANDIDATE = 64;
+const CANDIDATE_ID_RE = /^rulecand\.[a-z0-9_]+\.[a-z0-9-]+$/;
+const RULE_ID_RE = /^rule\.[a-z0-9_]+\.[a-z0-9-]+$/;
+
+/**
+ * Normalize a raw LLM category into the rule importer's category alphabet `[a-z0-9_]+`.
+ * the rule importer rejects hyphens in the category segment, but `slugify` emits them, so
+ * a multi-word category ("code review") must collapse to underscores
+ * ("code_review") or the candidate is silently dropped at import.
+ * @param raw - The model-supplied category string.
+ */
+export function sanitizeRuleCategory(raw: string): string {
+  const cleaned = raw.toLowerCase().replace(/[^a-z0-9_]+/g, "_").replace(/^_+|_+$/g, "");
+  return (cleaned || "general").slice(0, CATEGORY_CAP);
+}
+
+/**
+ * Build a collision-resistant slug segment in the rule importer's slug alphabet `[a-z0-9-]+`.
+ * Appends a short hex digest of a content signature (source identity + rule
+ * body) so two rules with the same title — across sources or from similar LLM
+ * outputs — never collapse onto the same candidate id/file.
+ * @param title - The rule title.
+ * @param contentSignature - Stable per-rule signature (e.g. source + when + then).
+ */
+export function buildRuleSlug(title: string, contentSignature: string): string {
+  const base = slugify(title);
+  const hash = createHash("sha256").update(contentSignature).digest("hex").slice(0, 8);
+  return base ? `${base}-${hash}` : `rule-${hash}`;
+}
+
+/**
+ * Producer-side mirror of the rule importer's import gate. Returns an error string when a
+ * candidate would be rejected at import (bad id/category alphabet, oversized
+ * field, non-https url, unsafe evidence path, too many refs), or null when it
+ * is importable. Keeps the producer from "successfully" emitting candidates
+ * the rule importer silently refuses.
+ * @param c - The candidate to validate.
+ */
+export function validateRuleCandidate(c: RuleCandidate): string | null {
+  const shapeError = ruleCandidateShapeError(c);
+  if (shapeError) return shapeError;
+  if (!CANDIDATE_ID_RE.test(c.id)) return `candidate id "${c.id}" violates ${CANDIDATE_ID_RE}`;
+  if (!RULE_ID_RE.test(c.proposed.id)) return `proposed rule id "${c.proposed.id}" violates ${RULE_ID_RE}`;
+  if (c.proposed.id !== `rule.${c.id.slice("rulecand.".length)}`) {
+    return `proposed rule id "${c.proposed.id}" does not match candidate id "${c.id}"`;
+  }
+  if (!c.id.startsWith(`rulecand.${c.proposed.category}.`)) {
+    return `candidate id "${c.id}" does not match category "${c.proposed.category}"`;
+  }
+  const capError = firstFieldOverCap(c);
+  if (capError) return capError;
+  if (c.evidence.length > MAX_EVIDENCE_PER_CANDIDATE) {
+    return `too many evidence refs: ${c.evidence.length} (max ${MAX_EVIDENCE_PER_CANDIDATE})`;
+  }
+  return firstEvidenceError(c.evidence);
+}
+
+/** Runtime shape validation before contract-specific validation reads fields. */
+function ruleCandidateShapeError(value: unknown): string | null {
+  if (!value || typeof value !== "object") return "candidate must be an object";
+  const c = value as Record<string, unknown>;
+  return candidateScalarShapeError(c)
+    ?? candidateEnumShapeError(c)
+    ?? proposedRuleShapeError(c.proposed)
+    ?? candidateEvidenceShapeError(c.evidence)
+    ?? provenanceShapeError(c.provenance);
+}
+
+/** Runtime shape validation for top-level scalar candidate fields. */
+function candidateScalarShapeError(c: Record<string, unknown>): string | null {
+  if (typeof c.id !== "string") return "candidate id must be a string";
+  if (typeof c.createdAt !== "string") return "createdAt must be a string";
+  return null;
+}
+
+/** Runtime shape validation for top-level candidate enum fields. */
+function candidateEnumShapeError(c: Record<string, unknown>): string | null {
+  if (!CONFIDENCE_VALUES.includes(c.confidence as RuleConfidence)) return "invalid confidence";
+  if (!STATUS_VALUES.includes(c.status as RuleStatus)) return "invalid status";
+  return null;
+}
+
+/** Runtime shape validation for the top-level evidence array. */
+function candidateEvidenceShapeError(value: unknown): string | null {
+  if (!Array.isArray(value)) return "evidence must be an array";
+  return firstEvidenceShapeError(value);
+}
+
+/** Runtime shape validation for the proposed rule object. */
+function proposedRuleShapeError(value: unknown): string | null {
+  if (!value || typeof value !== "object") return "proposed rule must be an object";
+  const proposed = value as Record<string, unknown>;
+  for (const field of ["id", "category", "title", "description", "when", "then"]) {
+    if (typeof proposed[field] !== "string") return `proposed.${field} must be a string`;
+  }
+  if (proposed.version !== 1) return "proposed.version must be 1";
+  return null;
+}
+
+/** First over-cap proposed-rule field, or null. */
+function firstFieldOverCap(c: RuleCandidate): string | null {
+  const checks: Array<[string, string, number]> = [
+    ["category", c.proposed.category, CATEGORY_CAP],
+    ["title", c.proposed.title, TITLE_CAP],
+    ["when", c.proposed.when, PREDICATE_CAP],
+    ["then", c.proposed.then, PREDICATE_CAP],
+  ];
+  for (const [name, value, cap] of checks) {
+    if (value.length > cap) return `${name} exceeds ${cap} chars`;
+  }
+  return null;
+}
+
+/** First evidence ref that the rule importer would reject (scheme/path/length), or null. */
+function firstEvidenceError(evidence: EvidenceRef[]): string | null {
+  for (const ref of evidence) {
+    const error = evidenceRefError(ref);
+    if (error) return error;
+  }
+  return null;
+}
+
+/** Runtime shape validation for every evidence ref. */
+function firstEvidenceShapeError(evidence: unknown[]): string | null {
+  for (const ref of evidence) {
+    const error = evidenceShapeError(ref);
+    if (error) return error;
+  }
+  return null;
+}
+
+/** Runtime shape validation for one evidence ref. */
+function evidenceShapeError(ref: unknown): string | null {
+  if (!ref || typeof ref !== "object") return "evidence ref must be an object";
+  const candidate = ref as Record<string, unknown>;
+  if (typeof candidate.kind !== "string") return "evidence kind must be a string";
+  const checkEvidenceShape = EVIDENCE_SHAPE_CHECKERS[candidate.kind];
+  return checkEvidenceShape ? checkEvidenceShape(candidate) : "unknown evidence kind";
+}
+
+/** Runtime shape validation for each tagged evidence variant. */
+const EVIDENCE_SHAPE_CHECKERS: Record<string, EvidenceShapeChecker> = {
+  audit: (ref) => requiredStringField(ref, "auditId", "audit evidence requires auditId"),
+  file: fileEvidenceShapeError,
+  memory: (ref) => requiredStringField(ref, "memoryId", "memory evidence requires memoryId"),
+  url: (ref) => requiredStringField(ref, "url", "url evidence requires url"),
+};
+
+/** Require a string field inside an on-disk tagged object. */
+function requiredStringField(
+  value: Record<string, unknown>,
+  field: string,
+  message: string,
+): string | null {
+  return typeof value[field] === "string" ? null : message;
+}
+
+/** Runtime shape validation for file evidence, including optional line spans. */
+function fileEvidenceShapeError(ref: Record<string, unknown>): string | null {
+  if (typeof ref.path !== "string") return "file evidence requires path";
+  if (ref.lineStart !== undefined && typeof ref.lineStart !== "number") return "lineStart must be a number";
+  if (ref.lineEnd !== undefined && typeof ref.lineEnd !== "number") return "lineEnd must be a number";
+  return null;
+}
+
+/** Runtime shape validation for provenance. */
+function provenanceShapeError(value: unknown): string | null {
+  if (!value || typeof value !== "object") return "provenance must be an object";
+  const provenance = value as Record<string, unknown>;
+  if (typeof provenance.source !== "string") return "provenance.source must be a string";
+  if (provenance.modelId !== undefined && typeof provenance.modelId !== "string") return "provenance.modelId must be a string";
+  if (provenance.modelVersion !== undefined && typeof provenance.modelVersion !== "string") return "provenance.modelVersion must be a string";
+  return null;
+}
+
+/** the rule importer's per-ref check for the two network/filesystem-backed evidence kinds. */
+function evidenceRefError(ref: EvidenceRef): string | null {
+  if (ref.kind === "url") return urlEvidenceError(ref.url);
+  if (ref.kind === "file") return fileEvidenceError(ref.path);
+  return null;
+}
+
+/** Url evidence must be https and within the reference cap. */
+function urlEvidenceError(url: string): string | null {
+  if (url.length > EVIDENCE_REF_CAP) return `evidence url exceeds ${EVIDENCE_REF_CAP} chars`;
+  if (!url.startsWith("https://")) return `url evidence must be https: ${url}`;
+  return null;
+}
+
+/** File evidence must be a safe relative path within the reference cap. */
+function fileEvidenceError(filePath: string): string | null {
+  if (filePath.length > EVIDENCE_REF_CAP) return `evidence path exceeds ${EVIDENCE_REF_CAP} chars`;
+  if (isUnsafeEvidencePath(filePath)) return `unsafe evidence path: ${filePath}`;
+  return null;
+}
+
+/** Reject absolute paths, Windows drive/UNC roots, and any `..` traversal segment. */
+function isUnsafeEvidencePath(p: string): boolean {
+  if (p.startsWith("/") || p.startsWith("\\") || /^[a-zA-Z]:/.test(p)) return true;
+  return p.split(/[/\\]/).some((seg) => seg === "..");
+}
+
+/** Input shape for assembling a new candidate (id/status/createdAt derived here). */
+export interface RuleCandidateDraft {
+  category: string;
+  slug: string;
+  title: string;
+  description: string;
+  when: string;
+  then: string;
+  evidence: EvidenceRef[];
+  provenance: RuleProvenance;
+  confidence: RuleConfidence;
+}
+
+/**
+ * Assemble a RuleCandidate from a draft. Ids follow the rule importer's convention
+ * (`rulecand.<category>.<slug>` / `rule.<category>.<slug>`), status starts at
+ * `proposed`, and version starts at 1.
+ * @param draft - The extracted rule fields.
+ * @param createdAt - RFC3339 creation timestamp (injected for determinism).
+ */
+export function buildRuleCandidate(
+  draft: RuleCandidateDraft,
+  createdAt: string,
+): RuleCandidate {
+  return {
+    id: `rulecand.${draft.category}.${draft.slug}`,
+    proposed: {
+      id: `rule.${draft.category}.${draft.slug}`,
+      category: draft.category,
+      title: draft.title,
+      description: draft.description,
+      when: draft.when,
+      then: draft.then,
+      version: 1,
+    },
+    evidence: draft.evidence,
+    provenance: draft.provenance,
+    confidence: draft.confidence,
+    status: "proposed",
+    createdAt,
+  };
+}
+
+/**
+ * Persist a rule candidate as JSON. The filename is derived from the id with
+ * `.` replaced by `-` so it is a safe single path segment.
+ * @param root - Project root directory.
+ * @param candidate - Fully-formed candidate to write.
+ * @returns The path the candidate was written to.
+ */
+export async function writeRuleCandidate(
+  root: string,
+  candidate: RuleCandidate,
+): Promise<string> {
+  const fileId = candidateFileId(candidate.id);
+  const target = ruleCandidatePath(root, fileId);
+  await atomicWrite(target, JSON.stringify(candidate, null, 2));
+  return target;
+}
+
+/** Defensive type-guard so corrupted candidate files don't blow up the CLI. */
+function isValidRuleCandidate(value: unknown): value is RuleCandidate {
+  return validateRuleCandidate(value as RuleCandidate) === null;
+}
+
+/** Read one candidate JSON file. Returns null when missing or malformed. */
+export async function readRuleCandidate(
+  root: string,
+  fileId: string,
+): Promise<RuleCandidate | null> {
+  const raw = await safeReadFile(ruleCandidatePath(root, fileId));
+  if (!raw) return null;
+  try {
+    const parsed = JSON.parse(raw);
+    return isValidRuleCandidate(parsed) ? parsed : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * List every pending rule candidate, sorted by createdAt then id so the order
+ * is deterministic. Skips non-JSON entries (e.g. the archive subdirectory).
+ * @param root - Project root directory.
+ */
+export async function listRuleCandidates(root: string): Promise<RuleCandidate[]> {
+  const dir = path.join(root, RULE_CANDIDATES_DIR);
+  const fileIds = await listCandidateFileIds(dir);
+  const candidates: RuleCandidate[] = [];
+  for (const fileId of fileIds) {
+    const candidate = await readRuleCandidate(root, fileId);
+    if (candidate) candidates.push(candidate);
+  }
+
+  candidates.sort(
+    (a, b) => a.createdAt.localeCompare(b.createdAt) || a.id.localeCompare(b.id),
+  );
+  return candidates;
+}
+
+/**
+ * Flip a pending candidate's status in place and rewrite its file.
+ * @param root - Project root directory.
+ * @param fileId - Filesystem id of the candidate (dotted id with `.`→`-`).
+ * @param status - New status to set.
+ * @returns The updated candidate, or null when it did not exist.
+ */
+export async function setRuleCandidateStatus(
+  root: string,
+  fileId: string,
+  status: RuleStatus,
+): Promise<RuleCandidate | null> {
+  const candidate = await readRuleCandidate(root, fileId);
+  if (!candidate) return null;
+  const updated: RuleCandidate = { ...candidate, status };
+  await atomicWrite(
+    ruleCandidatePath(root, fileId),
+    JSON.stringify(updated, null, 2),
+  );
+  return updated;
+}
+
+/**
+ * Archive a candidate into the archive subdirectory so rejected proposals stay
+ * auditable. The status flip to "rejected" happens before this via
+ * {@link setRuleCandidateStatus}; here we only move the file.
+ * @returns True when the candidate existed and was moved.
+ */
+export async function archiveRuleCandidate(
+  root: string,
+  fileId: string,
+): Promise<boolean> {
+  return moveCandidateToArchive(
+    ruleCandidatePath(root, fileId),
+    ruleArchivePath(root, fileId),
+  );
+}
diff --git a/src/compiler/rule-extractor.ts b/src/compiler/rule-extractor.ts
new file mode 100644
index 0000000..c1a2bf3
--- /dev/null
+++ b/src/compiler/rule-extractor.ts
@@ -0,0 +1,252 @@
+/**
+ * Rule-extraction orchestrator (rule pipeline).
+ *
+ * Drives the `RuleCandidate` producer half of the learning loop: for each
+ * changed source file (gated by the same SHA-256 change detection the concept
+ * compiler uses), call the LLM with the rule-extraction tool, map each
+ * extracted rule into a `RuleCandidate`, and persist it under
+ * `.llmwiki/rule-candidates/`.
+ *
+ * Provenance is stamped with the active model id (W4's `resolveActiveModelId`)
+ * and the rule-prompt version so each recommendation is auditable even though
+ * the extraction itself is nondeterministic. The createdAt timestamp is the
+ * only nondeterministic field by design (RFC3339 wall-clock).
+ */
+
+import { readFile } from "fs/promises";
+import path from "path";
+import { detectChanges } from "./hasher.js";
+import { parseFrontmatter, slugify } from "../utils/markdown.js";
+import { callClaude } from "../utils/llm.js";
+import { resolveActiveModelId } from "../utils/provider.js";
+import { budgetAndNumberSource } from "./prompt-budget.js";
+import { SOURCES_DIR } from "../utils/constants.js";
+import {
+  readRuleState,
+  updateRuleSourceState,
+} from "./rule-state.js";
+import {
+  RULE_EXTRACTION_TOOL,
+  RULE_PROMPT_VERSION,
+  buildRuleExtractionPrompt,
+  parseRules,
+  type ExtractedRule,
+} from "./rule-prompts.js";
+import {
+  buildRuleCandidate,
+  buildRuleSlug,
+  readRuleCandidate,
+  sanitizeRuleCategory,
+  validateRuleCandidate,
+  writeRuleCandidate,
+} from "./rule-candidates.js";
+import { candidateFileId } from "../utils/candidate-store.js";
+import { createHash } from "node:crypto";
+import type { EvidenceRef, RuleCandidate, RuleProvenance } from "../utils/rule-types.js";
+
+/** Producer tag stamped on every candidate's provenance. */
+const PROVENANCE_SOURCE = "llm-wiki-compiler";
+
+/** Structured outcome of a rules-extraction run, for CLI + programmatic use. */
+export interface RuleExtractionResult {
+  /** Source files processed (changed/new since last state). */
+  processedSources: string[];
+  /** Candidates written this run. */
+  candidates: RuleCandidate[];
+  /** Non-fatal problems (e.g. a source that yielded no rules). */
+  notes: string[];
+}
+
+/** Determine whether a source's `source` frontmatter field is a URL. */
+function isUrl(value: unknown): value is string {
+  return typeof value === "string" && /^https?:\/\//i.test(value);
+}
+
+/**
+ * Build the evidence list for an extracted rule from its source file.
+ *
+ * URL-origin sources emit a `url` evidence ref; everything else emits a `file`
+ * ref keyed on the source filename, carrying the extraction's line span when
+ * present. Exactly one evidence ref is produced per rule so the contract stays
+ * predictable for the rule importer.
+ */
+function buildEvidence(
+  sourceFile: string,
+  sourceMeta: Record<string, unknown>,
+  rule: ExtractedRule,
+  maxLine: number,
+): EvidenceRef[] {
+  const origin = sourceMeta.source;
+  if (isUrl(origin)) {
+    return [{ kind: "url", url: origin }];
+  }
+  const fileRef: EvidenceRef = { kind: "file", path: sourceFile };
+  // Drop spans pointing past the content actually shown to the model — an
+  // out-of-bounds anchor is a hallucination, not evidence.
+  if (rule.evidenceLineStart !== undefined && rule.evidenceLineStart <= maxLine) {
+    fileRef.lineStart = rule.evidenceLineStart;
+  }
+  if (rule.evidenceLineEnd !== undefined && rule.evidenceLineEnd <= maxLine) {
+    fileRef.lineEnd = rule.evidenceLineEnd;
+  }
+  return [fileRef];
+}
+
+/** Build the provenance stamp shared by every candidate from a run. */
+function buildProvenance(): RuleProvenance {
+  return {
+    source: PROVENANCE_SOURCE,
+    modelId: resolveActiveModelId(),
+    modelVersion: RULE_PROMPT_VERSION,
+  };
+}
+
+/**
+ * Call the LLM with the rule-extraction tool and parse the result.
+ * @param numberedContent - Source content with 1-based line numbers already
+ *   prepended (and clipped to the prompt budget), so the model's line-span
+ *   citations refer to anchors it can actually see.
+ */
+async function extractRulesFromContent(numberedContent: string): Promise<ExtractedRule[]> {
+  const system = buildRuleExtractionPrompt(numberedContent);
+  const raw = await callClaude({
+    system,
+    messages: [{ role: "user", content: "Extract the actionable rules from this source." }],
+    tools: [RULE_EXTRACTION_TOOL],
+  });
+  return parseRules(raw);
+}
+
+/** A rule paired with the source line count the model was shown, for span bounding. */
+interface RuleInContext {
+  rule: ExtractedRule;
+  maxLine: number;
+}
+
+/**
+ * Build a candidate for a single extracted rule. The category is normalized to
+ * the rule importer's `[a-z0-9_]` alphabet and the slug carries a content-hash suffix so
+ * distinct rules never collide on the same id/file. createdAt is injected by
+ * the caller for a single consistent timestamp per run.
+ */
+function candidateForRule(
+  sourceFile: string,
+  sourceMeta: Record<string, unknown>,
+  context: RuleInContext,
+  provenance: RuleProvenance,
+  createdAt: string,
+): RuleCandidate {
+  const { rule, maxLine } = context;
+  const signature = `${sourceFile}\n${rule.when}\n${rule.then}\n${rule.description}`;
+  return buildRuleCandidate(
+    {
+      category: sanitizeRuleCategory(rule.category),
+      slug: buildRuleSlug(rule.title, signature),
+      title: rule.title,
+      description: rule.description,
+      when: rule.when,
+      then: rule.then,
+      evidence: buildEvidence(sourceFile, sourceMeta, rule, maxLine),
+      provenance,
+      confidence: rule.confidence,
+    },
+    createdAt,
+  );
+}
+
+/** Process one source file end-to-end: read, number, extract, build candidates. */
+async function extractForSource(
+  root: string,
+  sourceFile: string,
+  provenance: RuleProvenance,
+  createdAt: string,
+): Promise<{ candidates: RuleCandidate[]; note?: string; hash: string }> {
+  const sourcePath = path.join(root, SOURCES_DIR, sourceFile);
+  const raw = await readFile(sourcePath, "utf-8");
+  const hash = createHash("sha256").update(raw).digest("hex");
+  const { meta } = parseFrontmatter(raw);
+  const numbered = budgetAndNumberSource(sourceFile, raw);
+  const maxLine = numbered.split("\n").length;
+  const rules = await extractRulesFromContent(numbered);
+  if (rules.length === 0) {
+    return { candidates: [], note: `No rules extracted from ${sourceFile}`, hash };
+  }
+  const candidates = rules
+    .filter((rule) => slugify(rule.title).length > 0)
+    .map((rule) => candidateForRule(sourceFile, meta, { rule, maxLine }, provenance, createdAt));
+  return { candidates, hash };
+}
+
+/**
+ * Source filenames that are new or changed since rule extraction last ran.
+ * Compares against `.llmwiki/rule-state.json` — NOT the concept compiler's
+ * state — so extraction has an independent change-detection cursor.
+ */
+async function changedSources(root: string): Promise<string[]> {
+  const state = await readRuleState(root);
+  const changes = await detectChanges(root, state);
+  return changes
+    .filter((c) => c.status === "new" || c.status === "changed")
+    .map((c) => c.file);
+}
+
+/**
+ * Extract rule candidates for every changed source and persist them.
+ *
+ * @param root - Project root directory.
+ * @param createdAt - RFC3339 timestamp injected once per run for determinism in
+ *   tests; defaults to the current wall-clock time.
+ * @returns Structured result with processed sources, written candidates, notes.
+ */
+export async function extractRuleCandidates(
+  root: string,
+  createdAt: string = new Date().toISOString(),
+): Promise<RuleExtractionResult> {
+  const provenance = buildProvenance();
+  const sources = await changedSources(root);
+
+  const candidates: RuleCandidate[] = [];
+  const notes: string[] = [];
+  for (const sourceFile of sources) {
+    const outcome = await extractForSource(root, sourceFile, provenance, createdAt);
+    if (outcome.note) notes.push(outcome.note);
+    for (const candidate of outcome.candidates) {
+      if (await persistCandidate(root, candidate, notes)) candidates.push(candidate);
+    }
+    // Advance the rule cursor whether or not this source yielded candidates, so
+    // an unchanged source is not re-extracted (and approvals are never re-fired)
+    // on the next run.
+    await updateRuleSourceState(root, sourceFile, {
+      hash: outcome.hash,
+      concepts: [],
+      compiledAt: createdAt,
+    });
+  }
+
+  return { processedSources: sources, candidates, notes };
+}
+
+/**
+ * Persist a freshly-extracted candidate, refusing to clobber a human decision.
+ * Returns true when the candidate was written. An existing candidate that has
+ * already been approved or rejected is preserved as-is; a candidate that would
+ * fail the rule importer's import gate is dropped with a note instead of being emitted.
+ */
+async function persistCandidate(
+  root: string,
+  candidate: RuleCandidate,
+  notes: string[],
+): Promise<boolean> {
+  const existing = await readRuleCandidate(root, candidateFileId(candidate.id));
+  if (existing && existing.status !== "proposed") {
+    notes.push(`Kept ${existing.status} candidate ${candidate.id} (re-extraction did not overwrite it).`);
+    return false;
+  }
+  const invalid = validateRuleCandidate(candidate);
+  if (invalid) {
+    notes.push(`Dropped candidate ${candidate.id}: ${invalid}`);
+    return false;
+  }
+  await writeRuleCandidate(root, candidate);
+  return true;
+}
diff --git a/src/compiler/rule-prompts.ts b/src/compiler/rule-prompts.ts
new file mode 100644
index 0000000..657f4cb
--- /dev/null
+++ b/src/compiler/rule-prompts.ts
@@ -0,0 +1,226 @@
+/**
+ * Rule-extraction prompt + tool schema (rule pipeline).
+ *
+ * Sibling of `prompts.ts`'s concept-extraction contract. Where concept
+ * extraction yields prose wiki pages, rule extraction yields structured
+ * `RuleCandidate.proposed` fields: a machine-actionable proposed rule with a
+ * trigger predicate (`when`), an action discriminator (`then`), a category,
+ * and an extraction confidence. The compiler maps this tool output into
+ * `RuleCandidate` records that a downstream rule importer imports for human approval.
+ *
+ * The `when`/`then` language is interpreter-defined: a concise human-readable
+ * condition/action string is sufficient at this stage.
+ */
+
+import type { RuleConfidence } from "../utils/rule-types.js";
+import { languageDirective } from "../utils/output-language.js";
+
+/**
+ * Named version of the rule-extraction prompt + tool contract.
+ *
+ * Stamped onto `RuleCandidate.provenance.modelVersion` so a downstream auditor
+ * can distinguish candidates produced under different prompt generations even
+ * when the model id is identical. Bump on any wording change that could alter
+ * extracted rules. Format is `vMAJOR`.
+ */
+export const RULE_PROMPT_VERSION = "v1";
+
+/** Allowed confidence levels emitted by the rule-extraction tool schema. */
+const RULE_CONFIDENCE_VALUES: RuleConfidence[] = ["low", "medium", "high"];
+
+/**
+ * Anthropic Tool definition for extracting actionable rules from a source.
+ * Each extracted rule maps to a `RuleCandidate.proposed` plus `confidence`
+ * and optional evidence hints (line spans within the source).
+ */
+export const RULE_EXTRACTION_TOOL = {
+  name: "extract_rules",
+  description:
+    "Extract machine-actionable workflow rules from a source document. " +
+    "A rule is a reusable guideline a team would want enforced.",
+  input_schema: {
+    type: "object" as const,
+    properties: {
+      rules: {
+        type: "array",
+        items: {
+          type: "object",
+          properties: {
+            category: {
+              type: "string",
+              description:
+                "Coarse grouping for the rule, lowercase (e.g. 'process', 'security', 'docs', 'testing').",
+            },
+            title: {
+              type: "string",
+              description: "Short human-readable rule title.",
+            },
+            description: {
+              type: "string",
+              description: "What the rule enforces and why it matters.",
+            },
+            when: {
+              type: "string",
+              description:
+                "Trigger predicate — a concise condition string describing when the rule should fire.",
+            },
+            then: {
+              type: "string",
+              description:
+                "Action discriminator — what should happen when the rule fires (e.g. 'warn', 'block', 'suggest <x>').",
+            },
+            confidence: {
+              type: "string",
+              enum: RULE_CONFIDENCE_VALUES,
+              description:
+                "Extraction confidence: 'high' if directly stated, 'medium' if synthesised, 'low' if speculative.",
+            },
+            evidenceLineStart: {
+              type: "number",
+              description:
+                "Optional 1-based start line in the numbered source content that supports this rule.",
+            },
+            evidenceLineEnd: {
+              type: "number",
+              description: "Optional 1-based end line supporting this rule.",
+            },
+          },
+          required: ["category", "title", "description", "when", "then", "confidence"],
+        },
+      },
+    },
+    required: ["rules"],
+  },
+};
+
+/** Build optional prompt lines, splicing the output-language directive when set. */
+function withLangLine(...lines: string[]): string[] {
+  const lang = languageDirective();
+  return lang ? [...lines, lang] : lines;
+}
+
+/**
+ * Build the system prompt for the rule-extraction phase.
+ * Instructs the LLM to identify reusable, enforceable workflow rules.
+ * @param sourceContent - Full text of the source document.
+ * @returns System prompt string for the extraction call.
+ */
+export function buildRuleExtractionPrompt(sourceContent: string): string {
+  return [
+    ...withLangLine(
+      "You are a rule extraction engine for a team-memory system. Analyze the",
+      "source document and identify 1-6 reusable, enforceable workflow rules a",
+      "team would want a guidance system to apply automatically.",
+      "A good rule is specific, actionable, and triggerable — not a vague value.",
+      "Use the extract_rules tool to return your findings.",
+    ),
+    "",
+    "For every rule emit:",
+    "  - category: lowercase coarse grouping (process, security, docs, testing, …).",
+    "  - title + description: what the rule enforces and why.",
+    "  - when: a concise trigger predicate (the condition that fires the rule).",
+    "  - then: the action to take when it fires.",
+    "  - confidence: 'high' if the source states it directly, 'medium' if you",
+    "    synthesised it, 'low' if it is speculative.",
+    "  - evidenceLineStart/evidenceLineEnd: the source line range supporting the",
+    "    rule when you can identify it (1-based, from the numbered content below).",
+    "",
+    "--- SOURCE DOCUMENT ---",
+    "",
+    sourceContent,
+  ].join("\n");
+}
+
+/** Raw rule shape as it arrives from the tool JSON, before validation. */
+interface RawRule {
+  category: unknown;
+  title: unknown;
+  description: unknown;
+  when: unknown;
+  then: unknown;
+  confidence: unknown;
+  evidenceLineStart?: unknown;
+  evidenceLineEnd?: unknown;
+}
+
+/** A validated extracted rule with normalized field types. */
+export interface ExtractedRule {
+  category: string;
+  title: string;
+  description: string;
+  when: string;
+  then: string;
+  confidence: RuleConfidence;
+  evidenceLineStart?: number;
+  evidenceLineEnd?: number;
+}
+
+/** True when every required string field is a non-empty string. */
+function hasRequiredStrings(r: RawRule): boolean {
+  const fields = [r.category, r.title, r.description, r.when, r.then];
+  return fields.every((f) => typeof f === "string" && f.trim().length > 0);
+}
+
+/** True if the raw rule passes validation (required strings + known confidence). */
+function isValidRawRule(r: RawRule): boolean {
+  return (
+    hasRequiredStrings(r) &&
+    typeof r.confidence === "string" &&
+    RULE_CONFIDENCE_VALUES.includes(r.confidence as RuleConfidence)
+  );
+}
+
+/** Coerce an optional numeric line field; undefined when absent or invalid. */
+function coerceLine(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isInteger(value) && value > 0
+    ? value
+    : undefined;
+}
+
+/** Map a validated raw rule into an ExtractedRule with trimmed strings. */
+function mapRawRule(r: RawRule): ExtractedRule {
+  const rule: ExtractedRule = {
+    category: (r.category as string).trim(),
+    title: (r.title as string).trim(),
+    description: (r.description as string).trim(),
+    when: (r.when as string).trim(),
+    then: (r.then as string).trim(),
+    confidence: r.confidence as RuleConfidence,
+  };
+  assignEvidenceSpan(rule, coerceLine(r.evidenceLineStart), coerceLine(r.evidenceLineEnd));
+  return rule;
+}
+
+/**
+ * Attach a line span to a rule only when it is internally consistent. An
+ * inverted span (end < start) is dropped entirely rather than shipped to the rule importer,
+ * which would otherwise render a negative-length range. A lone start or end is
+ * still carried — it is a valid single-anchor hint.
+ */
+function assignEvidenceSpan(
+  rule: ExtractedRule,
+  start: number | undefined,
+  end: number | undefined,
+): void {
+  if (start !== undefined && end !== undefined && end < start) return;
+  if (start !== undefined) rule.evidenceLineStart = start;
+  if (end !== undefined) rule.evidenceLineEnd = end;
+}
+
+/**
+ * Parse the JSON tool output from rule extraction into typed objects.
+ * Malformed JSON or invalid entries are dropped rather than throwing so a
+ * single bad rule never aborts a whole source's extraction.
+ * @param toolOutput - Raw JSON string returned from the extract_rules tool.
+ * @returns Array of validated ExtractedRule objects.
+ */
+export function parseRules(toolOutput: string): ExtractedRule[] {
+  let parsed: { rules?: RawRule[] };
+  try {
+    parsed = JSON.parse(toolOutput) as { rules?: RawRule[] };
+  } catch {
+    return [];
+  }
+  const rules: RawRule[] = parsed.rules ?? [];
+  return rules.filter(isValidRawRule).map(mapRawRule);
+}
diff --git a/src/compiler/rule-state.ts b/src/compiler/rule-state.ts
new file mode 100644
index 0000000..f0052fe
--- /dev/null
+++ b/src/compiler/rule-state.ts
@@ -0,0 +1,67 @@
+/**
+ * Rule-extraction change-detection state (rule pipeline).
+ *
+ * `llmwiki rules extract` must decide which sources to (re)process on its own
+ * cadence, independent of the concept compiler. If it reused `.llmwiki/state.json`
+ * a source already compiled into pages would be considered "unchanged" and
+ * yield zero rule candidates, while a source not yet compiled would be
+ * re-extracted on every run. This module persists a parallel per-source hash
+ * map in `.llmwiki/rule-state.json` so rule extraction advances its own cursor.
+ *
+ * The shape mirrors {@link WikiState} (so `detectChanges` can compare against
+ * it directly), but only the per-source `hash` is meaningful here — `concepts`
+ * is always empty because rule extraction produces candidates, not pages.
+ */
+
+import { readFile, writeFile, rename, mkdir } from "fs/promises";
+import { existsSync } from "fs";
+import path from "path";
+import { LLMWIKI_DIR, RULE_STATE_FILE } from "../utils/constants.js";
+import type { WikiState, SourceState } from "../utils/types.js";
+
+/** A fresh, empty rule-extraction state. */
+function emptyRuleState(): WikiState {
+  return { version: 1, indexHash: "", sources: {} };
+}
+
+/**
+ * Read `.llmwiki/rule-state.json`, returning an empty state when it is missing
+ * or unreadable (a corrupt cursor must never block extraction — it just means
+ * everything looks new).
+ * @param root - Project root directory.
+ */
+export async function readRuleState(root: string): Promise<WikiState> {
+  const filePath = path.join(root, RULE_STATE_FILE);
+  if (!existsSync(filePath)) return emptyRuleState();
+  try {
+    return JSON.parse(await readFile(filePath, "utf-8")) as WikiState;
+  } catch {
+    return emptyRuleState();
+  }
+}
+
+/** Atomically write rule-state.json (write .tmp then rename). */
+async function writeRuleState(root: string, state: WikiState): Promise<void> {
+  await mkdir(path.join(root, LLMWIKI_DIR), { recursive: true });
+  const filePath = path.join(root, RULE_STATE_FILE);
+  const tmpPath = `${filePath}.tmp`;
+  await writeFile(tmpPath, JSON.stringify(state, null, 2), "utf-8");
+  await rename(tmpPath, filePath);
+}
+
+/**
+ * Record that a source was processed by rule extraction at the given hash, so
+ * a subsequent `rules extract` skips it until the source changes again.
+ * @param root - Project root directory.
+ * @param sourceFile - Source filename within `sources/`.
+ * @param entry - The source's hash + processing timestamp.
+ */
+export async function updateRuleSourceState(
+  root: string,
+  sourceFile: string,
+  entry: SourceState,
+): Promise<void> {
+  const state = await readRuleState(root);
+  state.sources[sourceFile] = entry;
+  await writeRuleState(root, state);
+}
diff --git a/src/export/collect.ts b/src/export/collect.ts
index 2081fb6..a4c6252 100644
--- a/src/export/collect.ts
+++ b/src/export/collect.ts
@@ -19,6 +19,12 @@ import { flattenCitations } from "../context/provenance.js";
 import type { PageKind } from "../schema/types.js";
 import type { ProvenanceState, ContradictionRef } from "../utils/types.js";
 import { CONCEPTS_DIR, QUERIES_DIR } from "../utils/constants.js";
+import {
+  hashPageBody,
+  resolveSourceHashes,
+  sourceHashLookupFromSnapshot,
+  type SourceHashLookup,
+} from "./provenance.js";
 import type { ExportPage, PageDirectory } from "./types.js";
 
 export { extractWikilinkSlugs };
@@ -85,9 +91,14 @@ function readPageKind(meta: Record<string, unknown>): PageKind | undefined {
  * (path, kind, advisory*, citations, aliases) are populated here so
  * every export format gets the same enriched payload.
  */
-function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage {
+function toExportPage(
+  raw: RawWikiPage,
+  snapshot: FreshnessSnapshot,
+  sourceHashes: SourceHashLookup,
+): ExportPage {
   const meta = raw.frontmatter;
   const aliases = readStringArray(meta, "aliases");
+  const sources = readStringArray(meta, "sources");
   const freshness = computeFreshness(
     { slug: raw.slug, pageDirectory: raw.pageDirectory, frontmatter: meta },
     snapshot,
@@ -98,7 +109,7 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage
     pageDirectory: raw.pageDirectory,
     path: buildPagePath(raw.pageDirectory, raw.slug),
     summary: typeof meta.summary === "string" ? meta.summary : "",
-    sources: readStringArray(meta, "sources"),
+    sources,
     tags: readStringArray(meta, "tags"),
     createdAt: typeof meta.createdAt === "string" ? meta.createdAt : new Date().toISOString(),
     updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : new Date().toISOString(),
@@ -113,6 +124,10 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage
     freshnessStatus: freshness.freshnessStatus,
     contradicted: freshness.contradicted,
     archived: freshness.archived,
+    contentHash: hashPageBody(raw.body),
+    sourceHashes: resolveSourceHashes(sources, sourceHashes),
+    ...(typeof meta.modelId === "string" ? { modelId: meta.modelId } : {}),
+    ...(typeof meta.promptVersion === "string" ? { promptVersion: meta.promptVersion } : {}),
   };
 }
 
@@ -126,9 +141,10 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage
 export async function collectExportPages(root: string): Promise<ExportPage[]> {
   const raw = await collectRawWikiPages(root);
   const snapshot = await buildFreshnessSnapshot(root);
+  const sourceHashes = sourceHashLookupFromSnapshot(snapshot);
   const kept = raw.filter((page) => page.parseStatus.hasTitle && !page.parseStatus.orphaned);
   const pages = kept
-    .map((page) => toExportPage(page, snapshot))
+    .map((page) => toExportPage(page, snapshot, sourceHashes))
     .filter((page) => page.freshnessStatus !== "orphaned");
   pages.sort((a, b) => a.title.localeCompare(b.title));
   return pages;
diff --git a/src/export/json-export.ts b/src/export/json-export.ts
index a48e65d..1e323f3 100644
--- a/src/export/json-export.ts
+++ b/src/export/json-export.ts
@@ -7,7 +7,17 @@
  * additional transformation.
  *
  * Schema:
- *   { exportedAt, pageCount, projectId?, pages: ExportPage[] }
+ *   { schemaVersion, exportedAt, pageCount, projectId?, pages: ExportPage[] }
+ *
+ * W4 provenance lives PER PAGE (`ExportPage.modelId` / `promptVersion` plus
+ * `contentHash` / `sourceHashes`), stamped into each page at compile time.
+ * It is deliberately not summarized at the envelope level: a single
+ * export-time model id would misattribute pages compiled under a different
+ * model, which is exactly the lineage bug this avoids.
+ *
+ * `schemaVersion` lets downstream consumers (e.g. the rule importer) pin to a known
+ * contract. Increment when a breaking field change lands; additive fields
+ * do not require a bump.
  *
  * `projectId` is the optional bridge identifier. When present it pins the
  * on-disk export to a stable identity that downstream consumers (the
@@ -19,8 +29,19 @@
 import { validateProjectId } from "./project-id.js";
 import type { ExportPage } from "./types.js";
 
+/**
+ * Monotonically-incremented envelope version.
+ * Bump when a breaking field change lands; additive additions do not require a bump.
+ */
+export const EXPORT_SCHEMA_VERSION = 1;
+
 /** Top-level shape of the JSON export file. */
 interface JsonExportDocument {
+  /**
+   * Contract version for downstream consumers. Start at 1; increment only on
+   * breaking envelope changes so consumers can pin a supported range.
+   */
+  schemaVersion: number;
   exportedAt: string;
   pageCount: number;
   /** Optional bridge identifier. See `src/export/project-id.ts` for the validation rule. */
@@ -48,6 +69,7 @@ export function buildJsonExport(
   options: BuildJsonExportOptions = {},
 ): string {
   const doc: JsonExportDocument = {
+    schemaVersion: EXPORT_SCHEMA_VERSION,
     exportedAt: new Date().toISOString(),
     pageCount: pages.length,
     pages,
diff --git a/src/export/provenance.ts b/src/export/provenance.ts
new file mode 100644
index 0000000..0edba31
--- /dev/null
+++ b/src/export/provenance.ts
@@ -0,0 +1,77 @@
+/**
+ * Export provenance helpers (export provenance).
+ *
+ * Surfaces the auditable lineage a downstream consumer (e.g. a downstream rule importer)
+ * needs to answer "this page came from these sources, via this model and
+ * prompt version":
+ *
+ *  - {@link hashPageBody} derives a deterministic SHA-256 over a page body so
+ *    a consumer can detect content drift without re-reading the markdown.
+ *  - {@link sourceHashLookupFromSnapshot} reuses a freshness snapshot's
+ *    recorded per-source SHA-256 hashes as a filename → hash map.
+ *  - {@link resolveSourceHashes} maps a page's `sources` frontmatter list to
+ *    those committed hashes, preserving order and de-duplicating.
+ *
+ * The hashes here are the SAME digests `hasher.ts` writes to state.json — we
+ * surface them rather than recompute, so the export stays consistent with the
+ * compiler's own change-detection view and stays deterministic (no filesystem
+ * re-reads, no wall-clock, no map-iteration order dependence).
+ */
+
+import { createHash } from "node:crypto";
+import type { FreshnessSnapshot } from "../freshness/types.js";
+
+/** Map of source filename → committed SHA-256 hash from state.json. */
+export type SourceHashLookup = Record<string, string>;
+
+/**
+ * Deterministic SHA-256 (hex) of a page body. Stable for identical input:
+ * the same body string always yields the same digest regardless of when or
+ * where the export runs.
+ * @param body - Full markdown page body (without frontmatter).
+ * @returns Hex-encoded SHA-256 digest.
+ */
+export function hashPageBody(body: string): string {
+  return createHash("sha256").update(body, "utf-8").digest("hex");
+}
+
+/**
+ * Build a filename -> source-hash lookup from an existing freshness snapshot.
+ * The snapshot already contains state.json's recorded hashes, so export callers
+ * that compute freshness can reuse the same read-only state pass.
+ */
+export function sourceHashLookupFromSnapshot(snapshot: FreshnessSnapshot): SourceHashLookup {
+  const lookup: SourceHashLookup = {};
+  for (const [file, source] of Object.entries(snapshot.sources)) {
+    lookup[file] = source.recordedHash;
+  }
+  return lookup;
+}
+
+/**
+ * Resolve the source-file SHA-256 hashes a page derived from.
+ *
+ * Maps each entry in the page's `sources` frontmatter list to its committed
+ * hash. Sources without a recorded hash (e.g. seed pages with an empty
+ * source list, or a source removed from state) contribute nothing. Order
+ * follows the `sources` list and duplicates collapse, so the output is
+ * deterministic for a given (sources, state) pair.
+ *
+ * @param sources - Source filenames cited by the page (frontmatter `sources`).
+ * @param lookup - Filename → hash map from {@link sourceHashLookupFromSnapshot}.
+ * @returns Ordered, de-duplicated list of source hashes.
+ */
+export function resolveSourceHashes(
+  sources: string[],
+  lookup: SourceHashLookup,
+): string[] {
+  const hashes: string[] = [];
+  const seen = new Set<string>();
+  for (const file of sources) {
+    const hash = lookup[file];
+    if (hash === undefined || seen.has(hash)) continue;
+    hashes.push(hash);
+    seen.add(hash);
+  }
+  return hashes;
+}
diff --git a/src/export/rule-candidates-json.ts b/src/export/rule-candidates-json.ts
new file mode 100644
index 0000000..5e0f080
--- /dev/null
+++ b/src/export/rule-candidates-json.ts
@@ -0,0 +1,52 @@
+/**
+ * Rule-candidate JSON export (rule pipeline).
+ *
+ * Emits the persisted `RuleCandidate` records as a JSON array for a downstream rule importer
+ * to import. The array element shape is the canonical the rule importer contract verbatim
+ * — camelCase keys, tagged evidence, lowercase status/confidence — because the
+ * on-disk candidates are already stored in that shape. Export is a pure read +
+ * filter + serialize with no LLM calls.
+ *
+ * Approved candidates remain in `.llmwiki/rule-candidates/` with `status:
+ * "approved"` (only rejects are archived out), so export reads the live
+ * candidate directory and filters by status scope.
+ */
+
+import { listRuleCandidates } from "../compiler/rule-candidates.js";
+import type { RuleCandidate } from "../utils/rule-types.js";
+
+/** Which candidates to include in an export. */
+export type RuleExportScope = "approved" | "proposed" | "all";
+
+/** Valid export scopes, used to validate CLI input. */
+export const RULE_EXPORT_SCOPES: readonly RuleExportScope[] = [
+  "approved",
+  "proposed",
+  "all",
+];
+
+/**
+ * Collect rule candidates for export, filtered by status scope. Ordering is
+ * the deterministic createdAt-then-id order from {@link listRuleCandidates}.
+ *
+ * @param root - Project root directory.
+ * @param scope - "approved" (default), "proposed", or "all".
+ * @returns The candidates matching the scope.
+ */
+export async function collectRuleCandidatesForExport(
+  root: string,
+  scope: RuleExportScope = "approved",
+): Promise<RuleCandidate[]> {
+  const all = await listRuleCandidates(root);
+  if (scope === "all") return all;
+  return all.filter((c) => c.status === scope);
+}
+
+/**
+ * Serialize rule candidates as a pretty-printed JSON array string.
+ * @param candidates - Candidates to serialize.
+ * @returns JSON array string matching the rule importer's RuleCandidate[] contract.
+ */
+export function buildRuleCandidatesJson(candidates: RuleCandidate[]): string {
+  return `${JSON.stringify(candidates, null, 2)}\n`;
+}
diff --git a/src/export/types.ts b/src/export/types.ts
index 3edb831..9fcb298 100644
--- a/src/export/types.ts
+++ b/src/export/types.ts
@@ -107,6 +107,34 @@ export interface ExportPage {
   contradicted: boolean;
   /** True when the page is explicitly archived (`archived: true` frontmatter). */
   archived: boolean;
+  /**
+   * Deterministic SHA-256 (hex) of {@link ExportPage.body}. Lets a
+   * downstream auditor (export provenance) detect content drift and verify that an
+   * imported page still matches what the compiler exported, without
+   * re-reading the markdown. Stable for identical bodies.
+   */
+  contentHash: string;
+  /**
+   * SHA-256 hashes of the source files this page derived from — the same
+   * per-source digests the compiler records in `.llmwiki/state.json` for
+   * change detection. Resolved from the page's `sources` list; ordered and
+   * de-duplicated. Empty when a page has no recorded sources (e.g. seed
+   * pages). Lets an auditor tie a page back to exact source bytes.
+   */
+  sourceHashes: string[];
+  /**
+   * Model id that produced this page's current content, stamped into the
+   * page's frontmatter at compile time (export provenance). Unlike an export-time env
+   * read, this is true per-page lineage: a page compiled by model A keeps
+   * `modelId: A` even if the exporter's env later points at model B. Absent
+   * for pages compiled before provenance stamping shipped.
+   */
+  modelId?: string;
+  /**
+   * Named prompt-contract version the page was compiled under (export provenance),
+   * stamped at compile time. Absent for pre-provenance pages.
+   */
+  promptVersion?: string;
 }
 
 /**
diff --git a/src/utils/candidate-store.ts b/src/utils/candidate-store.ts
new file mode 100644
index 0000000..82813ca
--- /dev/null
+++ b/src/utils/candidate-store.ts
@@ -0,0 +1,72 @@
+/**
+ * Shared filesystem primitives for candidate queues.
+ *
+ * Both the concept review queue (`compiler/candidates.ts`) and the rule
+ * candidate queue (`compiler/rule-candidates.ts`) persist one JSON file per
+ * candidate under a directory, list those files, and move rejected records
+ * into an archive subdirectory. These two operations were identical across the
+ * queues; extracting them here removes the duplication while keeping each
+ * queue's own id/shape logic local to its module.
+ */
+
+import { readdir, rename, unlink, writeFile, mkdir } from "fs/promises";
+import { existsSync } from "fs";
+import path from "path";
+import { safeReadFile } from "./markdown.js";
+
+/** Extension used for all candidate JSON files. */
+export const CANDIDATE_JSON_EXT = ".json";
+
+/**
+ * Turn a dotted candidate id into a single filesystem-safe path segment.
+ *
+ * Only characters outside `[a-z0-9._-]` are replaced (with `_`); dots are
+ * PRESERVED. Collapsing dots to `-` (the old behavior) made `rulecand.a.b-c`
+ * and `rulecand.a-b.c` map to the same file, silently overwriting one
+ * candidate with the other. Keeping dots makes the mapping injective for the
+ * ids this codebase emits (category in `[a-z0-9_]`, slug in `[a-z0-9-]`).
+ * @param candidateId - The dotted candidate id.
+ */
+export function candidateFileId(candidateId: string): string {
+  return candidateId.replace(/[^a-zA-Z0-9._-]/g, "_");
+}
+
+/**
+ * List the file ids (basename without `.json`) of every candidate JSON file in
+ * a directory, ignoring subdirectories (e.g. an `archive/` folder). Returns an
+ * empty list when the directory does not exist.
+ * @param dir - Absolute path to the candidate directory.
+ */
+export async function listCandidateFileIds(dir: string): Promise<string[]> {
+  if (!existsSync(dir)) return [];
+  const entries = await readdir(dir, { withFileTypes: true });
+  const ids: string[] = [];
+  for (const entry of entries) {
+    if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_JSON_EXT)) continue;
+    ids.push(entry.name.slice(0, -CANDIDATE_JSON_EXT.length));
+  }
+  return ids;
+}
+
+/**
+ * Move a candidate JSON file into an archive location, creating the archive
+ * directory if needed. Falls back to copy + unlink when `rename` fails across
+ * filesystems. Returns false when the source file does not exist.
+ * @param sourcePath - Absolute path of the pending candidate file.
+ * @param targetPath - Absolute archive destination path.
+ */
+export async function moveCandidateToArchive(
+  sourcePath: string,
+  targetPath: string,
+): Promise<boolean> {
+  if (!existsSync(sourcePath)) return false;
+  await mkdir(path.dirname(targetPath), { recursive: true });
+  try {
+    await rename(sourcePath, targetPath);
+  } catch {
+    const raw = await safeReadFile(sourcePath);
+    await writeFile(targetPath, raw, "utf-8");
+    await unlink(sourcePath);
+  }
+  return true;
+}
diff --git a/src/utils/constants.ts b/src/utils/constants.ts
index 11d1e7f..9f36278 100644
--- a/src/utils/constants.ts
+++ b/src/utils/constants.ts
@@ -99,6 +99,19 @@ export const CANDIDATES_DIR = ".llmwiki/candidates";
 /** Rejected review candidates archived for audit (not deleted). */
 export const CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
 
+/**
+ * Per-source hashes already processed by `rules extract` (rule pipeline). Kept
+ * separate from STATE_FILE so rule extraction and concept compilation advance
+ * their change-detection cursors independently.
+ */
+export const RULE_STATE_FILE = ".llmwiki/rule-state.json";
+
+/** Pending rule candidates (rule pipeline) awaiting approve/reject. */
+export const RULE_CANDIDATES_DIR = ".llmwiki/rule-candidates";
+
+/** Rejected rule candidates archived for audit (not deleted). */
+export const RULE_CANDIDATES_ARCHIVE_DIR = ".llmwiki/rule-candidates/archive";
+
 /** Number of most similar pages to return from embedding-based pre-filter. */
 export const EMBEDDING_TOP_K = 15;
 
diff --git a/src/utils/provider.ts b/src/utils/provider.ts
index ddb688b..cf8efcf 100644
--- a/src/utils/provider.ts
+++ b/src/utils/provider.ts
@@ -157,3 +157,20 @@ function getProviderName(): string {
 export function getActiveProviderName(): string {
   return getProviderName();
 }
+
+/**
+ * Resolve the model id the compile pipeline would call, without
+ * instantiating a provider (which can require API credentials).
+ *
+ * Used by the export provenance stamp so a downstream auditor can tie a
+ * compiled page back to the exact model that produced it. Mirrors the
+ * per-provider model resolution in {@link getProvider} so the reported id
+ * matches what an actual compile call would use.
+ */
+export function resolveActiveModelId(): string {
+  const providerName = getProviderName();
+  if (providerName === "anthropic") {
+    return resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
+  }
+  return getModelForProvider(providerName as "openai" | "ollama" | "minimax" | "copilot");
+}
diff --git a/src/utils/rule-types.ts b/src/utils/rule-types.ts
new file mode 100644
index 0000000..1396456
--- /dev/null
+++ b/src/utils/rule-types.ts
@@ -0,0 +1,82 @@
+/**
+ * RuleCandidate protocol types (rule pipeline).
+ *
+ * These shapes mirror a downstream rule importer's `RuleCandidate` import contract exactly:
+ * the compiler is the "recommend rules" producer in the learning loop, emitting
+ * machine-actionable proposed rules that the rule importer imports and a human approves.
+ *
+ * Wire encoding is camelCase JSON. Evidence is a tagged union discriminated by
+ * `kind`. Status and confidence are lowercase string literals. Because these
+ * records cross the boundary into the rule importer, the field names and casing here are
+ * load-bearing — do not rename them for local convenience.
+ */
+
+/** Confidence in an extracted rule, on the rule importer's three-level scale. */
+export type RuleConfidence = "low" | "medium" | "high";
+
+/**
+ * Lifecycle status of a candidate. Newly extracted candidates are always
+ * `proposed`; `approve`/`reject` flip them to `approved`/`rejected`.
+ */
+export type RuleStatus = "proposed" | "approved" | "rejected";
+
+/**
+ * Evidence reference backing a proposed rule. Tagged union discriminated by
+ * `kind`. For compiled-wiki sources the producer emits `url` (when the source
+ * is a URL) or `file` (referencing the page's `sources` filename).
+ */
+export type EvidenceRef =
+  | { kind: "file"; path: string; lineStart?: number; lineEnd?: number }
+  | { kind: "memory"; memoryId: string }
+  | { kind: "audit"; auditId: string }
+  | { kind: "url"; url: string };
+
+/** The rule that a candidate becomes when approved. Starts at version 1. */
+export interface ProposedRule {
+  /** Stable rule id: `rule.<category>.<slug>`. */
+  id: string;
+  /** Coarse grouping (e.g. "process", "security", "docs"). */
+  category: string;
+  /** Short human-readable rule title. */
+  title: string;
+  /** Longer description of what the rule enforces and why. */
+  description: string;
+  /** Trigger predicate — a concise, interpreter-defined condition string. */
+  when: string;
+  /** Action discriminator emitted when the rule fires. */
+  then: string;
+  /** Monotonic rule version; new rules start at 1. */
+  version: number;
+}
+
+/** Lineage of a candidate so a downstream auditor can trace its origin. */
+export interface RuleProvenance {
+  /** Producing system; always "llm-wiki-compiler" for this surface. */
+  source: string;
+  /** Model id the extraction ran against (W4 `resolveActiveModelId`). */
+  modelId?: string;
+  /** Prompt/model version stamp for auditability. */
+  modelVersion?: string;
+}
+
+/**
+ * A proposed rule awaiting human approval in the rule importer. Persisted as JSON under
+ * `.llmwiki/rule-candidates/<id>.json` and exported as a JSON array for the rule importer
+ * to consume.
+ */
+export interface RuleCandidate {
+  /** Candidate id: `rulecand.<category>.<slug>`. */
+  id: string;
+  /** The rule this candidate becomes on approval. */
+  proposed: ProposedRule;
+  /** Evidence backing the proposal. */
+  evidence: EvidenceRef[];
+  /** Where this candidate came from. */
+  provenance: RuleProvenance;
+  /** Extraction confidence. */
+  confidence: RuleConfidence;
+  /** Lifecycle status: proposed → approved | rejected. */
+  status: RuleStatus;
+  /** RFC3339 timestamp recorded when the candidate was created. */
+  createdAt: string;
+}
diff --git a/test/bridge-export-contract.test.ts b/test/bridge-export-contract.test.ts
index ff7c90c..12b25f2 100644
--- a/test/bridge-export-contract.test.ts
+++ b/test/bridge-export-contract.test.ts
@@ -19,7 +19,7 @@ import path from "path";
 import { writePage } from "./fixtures/write-page.js";
 import { makeTempRoot } from "./fixtures/temp-root.js";
 import { collectExportPages } from "../src/export/collect.js";
-import { buildJsonExport } from "../src/export/json-export.js";
+import { buildJsonExport, EXPORT_SCHEMA_VERSION } from "../src/export/json-export.js";
 import {
   PROJECT_ID_PATTERN,
   validateProjectId,
@@ -42,6 +42,7 @@ interface BridgeExportPage {
 }
 
 interface BridgeExportEnvelope {
+  schemaVersion: number;
   exportedAt: string;
   pageCount: number;
   projectId?: string;
@@ -54,6 +55,13 @@ function findPage(envelope: BridgeExportEnvelope, slug: string): BridgeExportPag
   return page;
 }
 
+/** Temp root seeded with one minimal concept page — the common envelope-test setup. */
+async function rootWithOnePage(suffix: string): Promise<string> {
+  const root = await makeTempRoot(suffix);
+  await writePage(path.join(root, "wiki/concepts"), "p", { title: "P", summary: "s" }, "Body.\n");
+  return root;
+}
+
 describe("bridge export contract — collectExportPages + buildJsonExport", () => {
   it("populates path, kind, citations, and freshnessStatus for a basic concept", async () => {
     const root = await makeTempRoot("basic");
@@ -144,31 +152,28 @@ describe("bridge export contract — collectExportPages + buildJsonExport", () =
 
 describe("bridge export contract — projectId envelope field", () => {
   it("omits projectId from the envelope when none is supplied", async () => {
-    const root = await makeTempRoot("noproj");
-    await writePage(
-      path.join(root, "wiki/concepts"),
-      "p",
-      { title: "P", summary: "s" },
-      "Body.\n",
-    );
+    const root = await rootWithOnePage("noproj");
     const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as BridgeExportEnvelope;
     expect(env.projectId).toBeUndefined();
   });
 
   it("embeds a valid projectId in the envelope", async () => {
-    const root = await makeTempRoot("proj");
-    await writePage(
-      path.join(root, "wiki/concepts"),
-      "p",
-      { title: "P", summary: "s" },
-      "Body.\n",
-    );
+    const root = await rootWithOnePage("proj");
     const pages = await collectExportPages(root);
     const env = JSON.parse(buildJsonExport(pages, { projectId: "my-kb" })) as BridgeExportEnvelope;
     expect(env.projectId).toBe("my-kb");
   });
 });
 
+describe("bridge export contract — schemaVersion envelope field", () => {
+  it("emits schemaVersion equal to EXPORT_SCHEMA_VERSION on every build", async () => {
+    const root = await rootWithOnePage("schemaversion");
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as BridgeExportEnvelope;
+    expect(env.schemaVersion).toBe(EXPORT_SCHEMA_VERSION);
+    expect(env.schemaVersion).toBe(1);
+  });
+});
+
 describe("bridge export contract — validateProjectId", () => {
   it("accepts canonical kebab-case identifiers", () => {
     for (const id of ["a", "kb", "my-kb", "team-foo-2024", "abc123"]) {
diff --git a/test/compile-delta.test.ts b/test/compile-delta.test.ts
new file mode 100644
index 0000000..50403e6
--- /dev/null
+++ b/test/compile-delta.test.ts
@@ -0,0 +1,93 @@
+/**
+ * Tests for the W5 programmatic incremental compile delta (`compileDelta`).
+ *
+ * Verifies the hash-gated delta contract a downstream consumer relies on:
+ *
+ *  - A first delta compile of a single source returns that source's page.
+ *  - A second delta compile with the now-up-to-date state returns an EMPTY
+ *    delta (nothing changed ⇒ nothing to ship).
+ *  - Adding a new source yields ONLY that new source's page in the delta.
+ *
+ * Strategy mirrors compile-provenance.test.ts: stub AnthropicProvider so the
+ * extraction tool and page-generation calls are deterministic and no real
+ * API is hit. The extraction title is derived from the source filename so
+ * each source maps to a distinct, predictable slug.
+ */
+
+import { describe, it, expect, vi } from "vitest";
+import { writeFile } from "fs/promises";
+import path from "path";
+import { compileDelta } from "../src/compiler/delta.js";
+import { AnthropicProvider } from "../src/providers/anthropic.js";
+import { useCompileProject } from "./fixtures/compile-project.js";
+
+const FIRST_SOURCE = "alpha.md";
+const FIRST_TITLE = "Alpha Topic";
+const FIRST_SLUG = "alpha-topic";
+const SECOND_SOURCE = "beta.md";
+const SECOND_TITLE = "Beta Topic";
+const SECOND_SLUG = "beta-topic";
+
+/** Extraction JSON for one concept titled `title`. */
+function extractionFor(title: string): string {
+  return JSON.stringify({
+    concepts: [{ concept: title, summary: `Summary of ${title}.`, is_new: true }],
+  });
+}
+
+const STUB_BODY = "Body content for the topic. ^[alpha.md]";
+
+/**
+ * Stub the provider so toolCall returns extraction keyed on the source
+ * currently being processed. The compiler reads one source at a time, so we
+ * route by inspecting the system prompt for the source's title marker.
+ */
+function stubProvider(): void {
+  vi.spyOn(AnthropicProvider.prototype, "toolCall").mockImplementation(
+    async (system: string) => {
+      if (system.includes(SECOND_TITLE) || system.includes("beta")) {
+        return extractionFor(SECOND_TITLE);
+      }
+      return extractionFor(FIRST_TITLE);
+    },
+  );
+  vi.spyOn(AnthropicProvider.prototype, "complete").mockResolvedValue(STUB_BODY);
+}
+
+describe("compileDelta — incremental change-gated delta", () => {
+  const ctx = useCompileProject({
+    dirSuffix: "delta",
+    sourceFile: FIRST_SOURCE,
+    sourceContent: `# ${FIRST_TITLE}\n\nAbout alpha.`,
+  });
+
+  it("returns the compiled page on first run, then an empty delta when unchanged", async () => {
+    stubProvider();
+
+    const first = await compileDelta(ctx.dir);
+    expect(first.changedSlugs).toContain(FIRST_SLUG);
+    expect(first.compiled).toBe(1);
+
+    const second = await compileDelta(ctx.dir);
+    expect(second.changedPages).toEqual([]);
+    expect(second.changedSlugs).toEqual([]);
+    expect(second.skipped).toBe(1);
+  });
+
+  it("returns only the newly added source's page in the delta", async () => {
+    stubProvider();
+
+    await compileDelta(ctx.dir);
+
+    await writeFile(
+      path.join(ctx.dir, "sources", SECOND_SOURCE),
+      `# ${SECOND_TITLE}\n\nAbout beta.`,
+      "utf-8",
+    );
+
+    const delta = await compileDelta(ctx.dir);
+    expect(delta.changedSlugs).toEqual([SECOND_SLUG]);
+    expect(delta.changedPages).toHaveLength(1);
+    expect(delta.changedPages[0]?.slug).toBe(SECOND_SLUG);
+  });
+});
diff --git a/test/export-provenance.test.ts b/test/export-provenance.test.ts
new file mode 100644
index 0000000..920a9d3
--- /dev/null
+++ b/test/export-provenance.test.ts
@@ -0,0 +1,178 @@
+/**
+ * Unit tests for the W4 export provenance stamp.
+ *
+ * Verifies the auditable lineage fields a downstream consumer (a downstream rule importer)
+ * relies on:
+ *
+ *  - Each page carries `modelId` / `promptVersion` surfaced from its compile-time
+ *    frontmatter — true per-page lineage, NOT a single export-time env read.
+ *  - Each page carries a deterministic `contentHash` over its body and the
+ *    `sourceHashes` it derived from (surfaced from `.llmwiki/state.json`).
+ *  - `contentHash` is stable for the same body and changes when the body does.
+ */
+
+import { describe, it, expect } from "vitest";
+import { createHash } from "node:crypto";
+import { existsSync } from "fs";
+import { mkdir, writeFile } from "fs/promises";
+import path from "path";
+import { writePage } from "./fixtures/write-page.js";
+import { makeTempRoot } from "./fixtures/temp-root.js";
+import { collectExportPages } from "../src/export/collect.js";
+import { buildJsonExport } from "../src/export/json-export.js";
+
+interface ProvenancePage {
+  slug: string;
+  body: string;
+  contentHash: string;
+  sourceHashes: string[];
+  modelId?: string;
+  promptVersion?: string;
+}
+
+interface ProvenanceEnvelope {
+  pages: ProvenancePage[];
+}
+
+/** Hex SHA-256 of a string — mirror of the export's body hash for assertions. */
+function sha256(text: string): string {
+  return createHash("sha256").update(text, "utf-8").digest("hex");
+}
+
+/** Write a `.llmwiki/state.json` mapping source filenames to fixed hashes. */
+async function writeState(root: string, sources: Record<string, string>): Promise<void> {
+  await mkdir(path.join(root, ".llmwiki"), { recursive: true });
+  const state = {
+    version: 1,
+    indexHash: "",
+    sources: Object.fromEntries(
+      Object.entries(sources).map(([file, hash]) => [
+        file,
+        { hash, concepts: [], compiledAt: "2024-01-01T00:00:00.000Z" },
+      ]),
+    ),
+  };
+  await writeFile(path.join(root, ".llmwiki/state.json"), JSON.stringify(state), "utf-8");
+}
+
+/** Write a corrupt `.llmwiki/state.json` fixture. */
+async function writeCorruptState(root: string): Promise<void> {
+  await mkdir(path.join(root, ".llmwiki"), { recursive: true });
+  await writeFile(path.join(root, ".llmwiki/state.json"), "{not-json", "utf-8");
+}
+
+function findPage(env: ProvenanceEnvelope, slug: string): ProvenancePage {
+  const page = env.pages.find((p) => p.slug === slug);
+  if (!page) throw new Error(`expected page "${slug}" in export`);
+  return page;
+}
+
+describe("export provenance — per-page modelId + promptVersion", () => {
+  it("surfaces the compile-time modelId/promptVersion stamped in frontmatter", async () => {
+    const root = await makeTempRoot("prov-perpage");
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "retrieval",
+      { title: "Retrieval", summary: "x", sources: [], modelId: "model-a", promptVersion: "v1" },
+      "Body.\n",
+    );
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+
+    expect(findPage(env, "retrieval").modelId).toBe("model-a");
+    expect(findPage(env, "retrieval").promptVersion).toBe("v1");
+  });
+
+  it("omits provenance for pages compiled before stamping shipped", async () => {
+    const root = await makeTempRoot("prov-legacy");
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "legacy",
+      { title: "Legacy", summary: "s", sources: [] },
+      "Body.\n",
+    );
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+
+    expect(findPage(env, "legacy").modelId).toBeUndefined();
+    expect(findPage(env, "legacy").promptVersion).toBeUndefined();
+  });
+
+  it("keeps a page's modelId even when a different model exports it", async () => {
+    const root = await makeTempRoot("prov-noenv");
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "pinned",
+      { title: "Pinned", summary: "s", sources: [], modelId: "compiled-by-A", promptVersion: "v1" },
+      "Body.\n",
+    );
+    process.env.LLMWIKI_PROVIDER = "anthropic";
+    process.env.LLMWIKI_MODEL = "exporting-with-B";
+    try {
+      const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+      expect(findPage(env, "pinned").modelId).toBe("compiled-by-A");
+    } finally {
+      delete process.env.LLMWIKI_PROVIDER;
+      delete process.env.LLMWIKI_MODEL;
+    }
+  });
+});
+
+describe("export provenance — per-page contentHash + sourceHashes", () => {
+  it("emits a deterministic body hash and resolves source hashes from state", async () => {
+    const root = await makeTempRoot("prov-page");
+    const body = "Retrieval is selective lookup.";
+    await writeState(root, { "paper.md": "a".repeat(64), "other.md": "b".repeat(64) });
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "retrieval",
+      { title: "Retrieval", summary: "x", sources: ["paper.md", "other.md"] },
+      body,
+    );
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+    const page = findPage(env, "retrieval");
+
+    expect(page.contentHash).toBe(sha256(page.body));
+    expect(page.sourceHashes).toEqual(["a".repeat(64), "b".repeat(64)]);
+  });
+
+  it("keeps contentHash stable for the same body across builds", async () => {
+    const root = await makeTempRoot("prov-stable");
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "stable",
+      { title: "Stable", summary: "s", sources: [] },
+      "Identical body content.\n",
+    );
+    const first = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+    const second = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+
+    expect(findPage(first, "stable").contentHash).toBe(findPage(second, "stable").contentHash);
+  });
+
+  it("omits unrecorded sources from sourceHashes (empty when none recorded)", async () => {
+    const root = await makeTempRoot("prov-nosrc");
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "seedlike",
+      { title: "Seedlike", summary: "s", sources: [] },
+      "Body.\n",
+    );
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+    expect(findPage(env, "seedlike").sourceHashes).toEqual([]);
+  });
+
+  it("does not write a .bak when state.json is corrupt", async () => {
+    const root = await makeTempRoot("prov-corrupt-state");
+    await writeCorruptState(root);
+    await writePage(
+      path.join(root, "wiki/concepts"),
+      "safe-export",
+      { title: "Safe Export", summary: "s", sources: ["a.md"] },
+      "Body.\n",
+    );
+
+    const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope;
+
+    expect(findPage(env, "safe-export").sourceHashes).toEqual([]);
+    expect(existsSync(path.join(root, ".llmwiki/state.json.bak"))).toBe(false);
+  });
+});
diff --git a/test/fixtures/rule-extraction.ts b/test/fixtures/rule-extraction.ts
new file mode 100644
index 0000000..90024d6
--- /dev/null
+++ b/test/fixtures/rule-extraction.ts
@@ -0,0 +1,68 @@
+/**
+ * Shared fixtures for the rule-extraction tests.
+ *
+ * Stubs the LLM tool call so extraction is deterministic and offline, and
+ * seeds a single source file with the provider env the model-id resolver
+ * needs. Reused across the rule-candidate and rule-extraction-state suites so
+ * the stub/seed boilerplate lives in one place.
+ */
+
+import { vi, afterEach } from "vitest";
+import { writeFile } from "fs/promises";
+import path from "path";
+
+/** Provider env vars that {@link seedRuleSource} sets and must not leak. */
+const PROVIDER_ENV_KEYS = ["LLMWIKI_PROVIDER", "ANTHROPIC_API_KEY"] as const;
+
+/**
+ * Snapshot the provider env at call time and restore it after every test in
+ * the calling file. Call once at the top of any suite that uses
+ * {@link seedRuleSource}, so the env mutation never leaks into other files and
+ * makes their credential-dependent assertions order-dependent.
+ */
+export function restoreProviderEnvAfterEach(): void {
+  const saved = Object.fromEntries(PROVIDER_ENV_KEYS.map((k) => [k, process.env[k]]));
+  afterEach(() => {
+    for (const key of PROVIDER_ENV_KEYS) {
+      const value = saved[key];
+      if (value === undefined) delete process.env[key];
+      else process.env[key] = value;
+    }
+  });
+}
+
+/**
+ * Stub `callClaude` so the extract_rules tool returns one deterministic rule.
+ * @param category - Category the stubbed rule reports (default "Process").
+ * @param evidenceLineEnd - End line the model "cites" (default 2; pass a large
+ *   value to exercise out-of-bounds span dropping).
+ */
+export async function stubRuleExtraction(category = "Process", evidenceLineEnd = 2): Promise<void> {
+  const llm = await import("../../src/utils/llm.js");
+  vi.spyOn(llm, "callClaude").mockImplementation(async ({ tools }) => {
+    if (!tools || tools.length === 0) return "";
+    return JSON.stringify({
+      rules: [{
+        category,
+        title: "Require tests before merge",
+        description: "All PRs must include passing tests.",
+        when: "a pull request is opened without test changes",
+        then: "warn",
+        confidence: "high",
+        evidenceLineStart: 1,
+        evidenceLineEnd,
+      }],
+    });
+  });
+}
+
+/** Seed `sources/guide.md` and set the provider env for model-id resolution. */
+export async function seedRuleSource(dir: string): Promise<void> {
+  process.env.LLMWIKI_PROVIDER = "anthropic";
+  process.env.ANTHROPIC_API_KEY = "test-key";
+  await writeFile(
+    path.join(dir, "sources", "guide.md"),
+    "Always run the test suite before merging a change.\nNo exceptions.",
+    "utf-8",
+  );
+}
diff --git a/test/prompt-budget.test.ts b/test/prompt-budget.test.ts
index 874a39b..d1ab6ad 100644
--- a/test/prompt-budget.test.ts
+++ b/test/prompt-budget.test.ts
@@ -8,6 +8,7 @@
 
 import { describe, it, expect, afterEach } from "vitest";
 import {
+  budgetAndNumberSource,
   buildBudgetedCombinedContent,
   resolvePromptBudgetChars,
   type SourceSlice,
@@ -20,6 +21,21 @@ afterEach(() => {
   delete process.env[ENV_KEY];
 });
 
+describe("budgetAndNumberSource", () => {
+  it("prepends 1-based line numbers so line-span prompts have real anchors", () => {
+    const numbered = budgetAndNumberSource("guide.md", "first\nsecond");
+    expect(numbered).toContain("1 | first");
+    expect(numbered).toContain("2 | second");
+  });
+
+  it("clips content past the budget so extraction never blows the prompt window", () => {
+    process.env[ENV_KEY] = "10";
+    const numbered = budgetAndNumberSource("guide.md", "x".repeat(500));
+    expect(numbered).toContain("truncated for prompt budget");
+    expect(numbered.length).toBeLessThan(500);
+  });
+});
+
 describe("resolvePromptBudgetChars", () => {
   it("returns the default when env is unset", () => {
     expect(resolvePromptBudgetChars()).toBe(DEFAULT_PROMPT_BUDGET_CHARS);
diff --git a/test/rule-candidates.test.ts b/test/rule-candidates.test.ts
new file mode 100644
index 0000000..301e460
--- /dev/null
+++ b/test/rule-candidates.test.ts
@@ -0,0 +1,148 @@
+/**
+ * Tests for the rule-candidate pipeline: extraction → candidate →
+ * approve → export. The LLM tool call is stubbed via vi.spyOn on the shared
+ * `callClaude` helper (the same mock pattern used by review.test.ts), so no
+ * network call is made and the extracted rule is deterministic.
+ *
+ * The shape assertions verify the emitted record matches a downstream rule importer's
+ * `RuleCandidate` contract exactly: camelCase keys, `status: "proposed"`,
+ * tagged evidence, the `proposed` rule fields, and a stamped provenance.modelId.
+ */
+
+import { describe, it, expect } from "vitest";
+import { mkdir, writeFile } from "fs/promises";
+import path from "path";
+import { extractRuleCandidates } from "../src/compiler/rule-extractor.js";
+import {
+  listRuleCandidates,
+  setRuleCandidateStatus,
+  validateRuleCandidate,
+} from "../src/compiler/rule-candidates.js";
+import {
+  buildRuleCandidatesJson,
+  collectRuleCandidatesForExport,
+} from "../src/export/rule-candidates-json.js";
+import { candidateFileId } from "../src/utils/candidate-store.js";
+import { useTempRoot } from "./fixtures/temp-root.js";
+import {
+  restoreProviderEnvAfterEach,
+  seedRuleSource as seedSource,
+  stubRuleExtraction,
+} from "./fixtures/rule-extraction.js";
+import type { RuleCandidate } from "../src/utils/rule-types.js";
+
+const FIXED_NOW = "2026-05-31T00:00:00.000Z";
+
+/** Candidate ids carry a content-hash suffix so distinct rules never collide. */
+const CANDIDATE_ID_RE = /^rulecand\.process\.require-tests-before-merge-[a-f0-9]{8}$/;
+
+restoreProviderEnvAfterEach();
+
+describe("rule-candidate extraction", () => {
+  const ctx = useTempRoot(["sources"]);
+
+  it("emits a RuleCandidate matching the rule-import contract shape", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+
+    const result = await extractRuleCandidates(ctx.dir, FIXED_NOW);
+    expect(result.candidates).toHaveLength(1);
+    const candidate = result.candidates[0]!;
+
+    expect(candidate.id).toMatch(CANDIDATE_ID_RE);
+    expect(candidate.status).toBe("proposed");
+    expect(candidate.confidence).toBe("high");
+    expect(candidate.createdAt).toBe(FIXED_NOW);
+    expect(validateRuleCandidate(candidate)).toBeNull();
+    assertProposedRule(candidate);
+    assertEvidenceAndProvenance(candidate);
+  });
+
+  it("persists the candidate JSON and lists it back", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+
+    await extractRuleCandidates(ctx.dir, FIXED_NOW);
+    const listed = await listRuleCandidates(ctx.dir);
+    expect(listed).toHaveLength(1);
+    expect(listed[0]!.proposed.id).toBe(`rule.${listed[0]!.id.slice("rulecand.".length)}`);
+  });
+});
+
+describe("rule-candidate approve + export", () => {
+  const ctx = useTempRoot(["sources"]);
+
+  it("approve flips status to approved", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+    const { candidates } = await extractRuleCandidates(ctx.dir, FIXED_NOW);
+    const fileId = candidateFileId(candidates[0]!.id);
+
+    const updated = await setRuleCandidateStatus(ctx.dir, fileId, "approved");
+    expect(updated!.status).toBe("approved");
+
+    const listed = await listRuleCandidates(ctx.dir);
+    expect(listed[0]!.status).toBe("approved");
+  });
+
+  it("export emits a JSON array of approved RuleCandidate records", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+    const { candidates } = await extractRuleCandidates(ctx.dir, FIXED_NOW);
+    const fileId = candidateFileId(candidates[0]!.id);
+    await setRuleCandidateStatus(ctx.dir, fileId, "approved");
+
+    const approved = await collectRuleCandidatesForExport(ctx.dir, "approved");
+    const json = JSON.parse(buildRuleCandidatesJson(approved)) as RuleCandidate[];
+    expect(Array.isArray(json)).toBe(true);
+    expect(json).toHaveLength(1);
+    expect(json[0]!.status).toBe("approved");
+    expect(json[0]!.proposed.version).toBe(1);
+    expect(json[0]!.evidence[0]).toEqual({ kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 });
+  });
+
+  it("skips malformed persisted candidates instead of exporting them", async () => {
+    await mkdir(path.join(ctx.dir, ".llmwiki/rule-candidates"), { recursive: true });
+    await writeFile(
+      path.join(ctx.dir, ".llmwiki/rule-candidates/rulecand-process-bad.json"),
+      JSON.stringify({
+        id: "rulecand.process.bad",
+        proposed: {},
+        evidence: [],
+        provenance: { source: "llm-wiki-compiler" },
+        confidence: "high",
+        status: "approved",
+        createdAt: FIXED_NOW,
+      }),
+      "utf-8",
+    );
+
+    const approved = await collectRuleCandidatesForExport(ctx.dir, "approved");
+
+    expect(approved).toEqual([]);
+  });
+});
+
+/** Assert the `proposed` rule sub-object matches the contract. */
+function assertProposedRule(candidate: RuleCandidate): void {
+  expect(candidate.proposed).toEqual({
+    id: `rule.${candidate.id.slice("rulecand.".length)}`,
+    category: "process",
+    title: "Require tests before merge",
+    description: "All PRs must include passing tests.",
+    when: "a pull request is opened without test changes",
+    then: "warn",
+    version: 1,
+  });
+}
+
+/** Assert tagged evidence + provenance stamp (modelId from W4 resolver). */
+function assertEvidenceAndProvenance(candidate: RuleCandidate): void {
+  expect(candidate.evidence).toEqual([
+    { kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 },
+  ]);
+  expect(candidate.provenance.source).toBe("llm-wiki-compiler");
+  expect(typeof candidate.provenance.modelId).toBe("string");
+  expect(candidate.provenance.modelId!.length).toBeGreaterThan(0);
+  expect(candidate.provenance.modelVersion).toBe("v1");
+}
diff --git a/test/rule-extraction-state.test.ts b/test/rule-extraction-state.test.ts
new file mode 100644
index 0000000..8b5582e
--- /dev/null
+++ b/test/rule-extraction-state.test.ts
@@ -0,0 +1,90 @@
+/**
+ * Tests for rule-extraction state + approval-preservation (the rule pipeline blockers).
+ *
+ * `rules extract` must (a) advance its OWN change-detection cursor in
+ * `.llmwiki/rule-state.json` so an unchanged source is not re-extracted every
+ * run, and (b) never overwrite a human's approve/reject decision. The LLM tool
+ * call is stubbed via vi.spyOn so extraction is deterministic and offline.
+ */
+
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { rm } from "fs/promises";
+import path from "path";
+import { extractRuleCandidates } from "../src/compiler/rule-extractor.js";
+import {
+  listRuleCandidates,
+  setRuleCandidateStatus,
+} from "../src/compiler/rule-candidates.js";
+import { candidateFileId } from "../src/utils/candidate-store.js";
+import { RULE_STATE_FILE } from "../src/utils/constants.js";
+import { useTempRoot } from "./fixtures/temp-root.js";
+import {
+  restoreProviderEnvAfterEach,
+  seedRuleSource as seedSource,
+  stubRuleExtraction,
+} from "./fixtures/rule-extraction.js";
+
+const NOW = "2026-05-31T00:00:00.000Z";
+
+/** Out-of-bounds end line (past the 2-line source) to exercise span dropping. */
+const OUT_OF_BOUNDS_END = 9999;
+
+restoreProviderEnvAfterEach();
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe("rule extraction state cursor", () => {
+  const ctx = useTempRoot(["sources"]);
+
+  it("does not re-process an unchanged source on a second run", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+    const first = await extractRuleCandidates(ctx.dir, NOW);
+    expect(first.processedSources).toEqual(["guide.md"]);
+
+    const second = await extractRuleCandidates(ctx.dir, NOW);
+    expect(second.processedSources).toEqual([]);
+    expect(second.candidates).toHaveLength(0);
+  });
+});
+
+describe("approval preservation", () => {
+  const ctx = useTempRoot(["sources"]);
+
+  it("does not overwrite an approved candidate when the source is re-extracted", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction();
+    const { candidates } = await extractRuleCandidates(ctx.dir, NOW);
+    await setRuleCandidateStatus(ctx.dir, candidateFileId(candidates[0]!.id), "approved");
+
+    // Force re-extraction of the same (unchanged) source by clearing the cursor.
+    await rm(path.join(ctx.dir, RULE_STATE_FILE), { force: true });
+    const rerun = await extractRuleCandidates(ctx.dir, NOW);
+
+    const listed = await listRuleCandidates(ctx.dir);
+    expect(listed).toHaveLength(1);
+    expect(listed[0]!.status).toBe("approved");
+    expect(rerun.candidates).toHaveLength(0);
+    expect(rerun.notes.some((n) => n.includes("approved"))).toBe(true);
+  });
+});
+
+describe("evidence span bounding + category sanitization", () => {
+  const ctx = useTempRoot(["sources"]);
+
+  it("drops an out-of-bounds evidence line and emits an import-valid id", async () => {
+    await seedSource(ctx.dir);
+    await stubRuleExtraction("Code Review", OUT_OF_BOUNDS_END);
+    const { candidates } = await extractRuleCandidates(ctx.dir, NOW);
+    const candidate = candidates[0]!;
+
+    // category had a space -> underscored segment; id passes the rule importer's regex.
+    expect(candidate.id).toMatch(/^rulecand\.code_review\.[a-z0-9-]+$/);
+    // evidenceLineEnd was 9999 (past the 2-line source) -> dropped.
+    const ref = candidate.evidence[0]!;
+    expect(ref.kind).toBe("file");
+    expect("lineEnd" in ref ? ref.lineEnd : undefined).toBeUndefined();
+  });
+});
diff --git a/test/rule-validation.test.ts b/test/rule-validation.test.ts
new file mode 100644
index 0000000..5cf76ae
--- /dev/null
+++ b/test/rule-validation.test.ts
@@ -0,0 +1,104 @@
+/**
+ * Unit tests for the rule-candidate id/category/validation helpers (rule pipeline).
+ *
+ * These guard the producer↔the rule importer contract: category alphabet, collision-free
+ * ids, evidence-span sanity, and the producer-side mirror of the rule importer's import
+ * gate (so the compiler never "successfully" emits a candidate the rule importer rejects).
+ */
+
+import { describe, it, expect } from "vitest";
+import {
+  buildRuleCandidate,
+  buildRuleSlug,
+  sanitizeRuleCategory,
+  validateRuleCandidate,
+} from "../src/compiler/rule-candidates.js";
+import { parseRules } from "../src/compiler/rule-prompts.js";
+import type { RuleCandidate } from "../src/utils/rule-types.js";
+
+const NOW = "2026-05-31T00:00:00.000Z";
+
+function candidate(category: string, slug: string): RuleCandidate {
+  return buildRuleCandidate(
+    {
+      category,
+      slug,
+      title: "T",
+      description: "d",
+      when: "w",
+      then: "warn",
+      evidence: [{ kind: "file", path: "guide.md" }],
+      provenance: { source: "llm-wiki-compiler" },
+      confidence: "high",
+    },
+    NOW,
+  );
+}
+
+describe("sanitizeRuleCategory", () => {
+  it("collapses hyphen/space runs to underscores (the rule importer's [a-z0-9_] alphabet)", () => {
+    expect(sanitizeRuleCategory("Code Review")).toBe("code_review");
+    expect(sanitizeRuleCategory("ci/cd pipeline")).toBe("ci_cd_pipeline");
+  });
+
+  it("falls back to 'general' for an empty result", () => {
+    expect(sanitizeRuleCategory("!!!")).toBe("general");
+  });
+});
+
+describe("buildRuleSlug", () => {
+  it("appends an 8-hex content hash so same-title rules never collide", () => {
+    const a = buildRuleSlug("Require tests", "sourceA\nwhen\nthen");
+    const b = buildRuleSlug("Require tests", "sourceB\nwhen\nthen");
+    expect(a).toMatch(/^require-tests-[a-f0-9]{8}$/);
+    expect(a).not.toBe(b);
+  });
+});
+
+describe("validateRuleCandidate", () => {
+  it("accepts a sanitized multi-word category", () => {
+    expect(validateRuleCandidate(candidate("code_review", "x-abcd1234"))).toBeNull();
+  });
+
+  it("rejects a hyphen in the category segment (the rule importer would refuse it)", () => {
+    expect(validateRuleCandidate(candidate("code-review", "x-abcd1234"))).toContain("candidate id");
+  });
+
+  it("rejects non-https url evidence", () => {
+    const c = candidate("process", "x-abcd1234");
+    c.evidence = [{ kind: "url", url: "http://example.com" }];
+    expect(validateRuleCandidate(c)).toContain("https");
+  });
+
+  it("rejects an over-cap predicate", () => {
+    const c = candidate("process", "x-abcd1234");
+    c.proposed.when = "x".repeat(513);
+    expect(validateRuleCandidate(c)).toContain("when");
+  });
+
+  it("rejects malformed proposed-rule objects before export", () => {
+    const c = candidate("process", "x-abcd1234") as unknown as Record<string, unknown>;
+    c.proposed = {};
+    expect(validateRuleCandidate(c as RuleCandidate)).toContain("proposed.id");
+  });
+
+  it("rejects mismatched candidate/proposed ids", () => {
+    const c = candidate("process", "x-abcd1234");
+    c.proposed.id = "rule.other.x-abcd1234";
+    expect(validateRuleCandidate(c)).toContain("does not match");
+  });
+});
+
+describe("parseRules evidence-span sanity", () => {
+  it("drops an inverted span (end < start) rather than emitting it", () => {
+    const raw = JSON.stringify({
+      rules: [{
+        category: "process", title: "T", description: "d", when: "w", then: "warn",
+        confidence: "high", evidenceLineStart: 40, evidenceLineEnd: 7,
+      }],
+    });
+    const [rule] = parseRules(raw);
+    expect(rule!.evidenceLineStart).toBeUndefined();
+    expect(rule!.evidenceLineEnd).toBeUndefined();
+  });
+});
diff --git a/test/rules-integration.test.ts b/test/rules-integration.test.ts
new file mode 100644
index 0000000..bb3d305
--- /dev/null
+++ b/test/rules-integration.test.ts
@@ -0,0 +1,125 @@
+/**
+ * CLI-level integration tests for the `llmwiki rules` subcommand family (rule pipeline).
+ *
+ * Spawns real subprocesses via the shared run-cli fixture so the full CLI
+ * surface (Commander routing, exit codes, stdout/stderr) is exercised without
+ * mocking internal modules. Candidate JSON is written manually so list /
+ * approve / reject / export need no LLM call; `rules extract` is tested only
+ * for its credential-failure path (a real extraction would need an API key).
+ */
+
+import { describe, it, expect } from "vitest";
+import path from "path";
+import { mkdir, rm, writeFile, readFile } from "fs/promises";
+import { tmpdir } from "os";
+import { runCLI, expectCLIExit, expectCLIFailure } from "./fixtures/run-cli.js";
+import type { RuleCandidate } from "../src/utils/rule-types.js";
+
+/** Create a disposable temp workspace with a sources/ sub-folder. */
+async function makeWorkspace(suffix: string): Promise<string> {
+  const cwd = path.join(tmpdir(), `llmwiki-rules-${suffix}-${Date.now()}`);
+  await mkdir(path.join(cwd, "sources"), { recursive: true });
+  return cwd;
+}
+
+/** A minimal, import-valid approved candidate for export/list fixtures. */
+function makeCandidate(): RuleCandidate {
+  return {
+    id: "rulecand.process.require-tests-abcd1234",
+    proposed: {
+      id: "rule.process.require-tests-abcd1234",
+      category: "process",
+      title: "Require tests",
+      description: "PRs need tests.",
+      when: "a PR is opened",
+      then: "warn",
+      version: 1,
+    },
+    evidence: [{ kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 }],
+    provenance: { source: "llm-wiki-compiler", modelId: "m", modelVersion: "v1" },
+    confidence: "high",
+    status: "approved",
+    createdAt: "2026-05-31T00:00:00.000Z",
+  };
+}
+
+/** Write a candidate JSON into the pending rule-candidate directory. */
+async function writeCandidate(cwd: string, candidate: RuleCandidate): Promise<void> {
+  const dir = path.join(cwd, ".llmwiki", "rule-candidates");
+  await mkdir(dir, { recursive: true });
+  await writeFile(path.join(dir, `${candidate.id}.json`), JSON.stringify(candidate, null, 2), "utf-8");
+}
+
+describe("rules CLI integration", () => {
+  it("rules list on a fresh project exits 0 and reports no pending candidates", async () => {
+    const cwd = await makeWorkspace("list-empty");
+    try {
+      const result = await runCLI(["rules", "list"], cwd);
+      expectCLIExit(result, 0);
+      expect(result.stdout.toLowerCase()).toContain("no pending");
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it("rules approve with a missing id exits non-zero with a not-found error", async () => {
+    const cwd = await makeWorkspace("approve-missing");
+    try {
+      const result = await runCLI(["rules", "approve", "rulecand.x.does-not-exist"], cwd);
+      expectCLIFailure(result);
+      expect(`${result.stdout}${result.stderr}`.toLowerCase()).toContain("not found");
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it("rules reject with a missing id exits non-zero", async () => {
+    const cwd = await makeWorkspace("reject-missing");
+    try {
+      const result = await runCLI(["rules", "reject", "rulecand.x.does-not-exist"], cwd);
+      expectCLIFailure(result);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it("rules export with an invalid --scope exits non-zero with a guidance message", async () => {
+    const cwd = await makeWorkspace("export-bad-scope");
+    try {
+      const result = await runCLI(["rules", "export", "--scope", "bogus"], cwd);
+      expectCLIFailure(result);
+      expect(`${result.stdout}${result.stderr}`).toContain("scope");
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it("rules export writes the candidate array to the default output path", async () => {
+    const cwd = await makeWorkspace("export-writes");
+    try {
+      await writeCandidate(cwd, makeCandidate());
+      const result = await runCLI(["rules", "export", "--scope", "approved"], cwd);
+      expectCLIExit(result, 0);
+      const written = await readFile(path.join(cwd, "dist/exports/rule-candidates.json"), "utf-8");
+      const parsed = JSON.parse(written) as RuleCandidate[];
+      expect(parsed).toHaveLength(1);
+      expect(parsed[0]!.id).toBe("rulecand.process.require-tests-abcd1234");
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+
+  it("rules extract fails with a credential error when no API key is set", async () => {
+    const cwd = await makeWorkspace("extract-no-key");
+    try {
+      await writeFile(path.join(cwd, "sources", "guide.md"), "Always run tests.\nNo exceptions.\n", "utf-8");
+      const result = await runCLI(["rules", "extract"], cwd, {
+        ANTHROPIC_API_KEY: "",
+        ANTHROPIC_AUTH_TOKEN: "",
+      });
+      expectCLIFailure(result);
+    } finally {
+      await rm(cwd, { recursive: true, force: true });
+    }
+  }, 30_000);
+});
diff --git a/vitest.config.ts b/vitest.config.ts
index 5979585..6805d8c 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -8,6 +8,14 @@ export default defineConfig({
     globals: true,
     testTimeout: TEST_TIMEOUT_MS,
     hookTimeout: HOOK_TIMEOUT_MS,
+    // Many integration tests spawn a CLI subprocess. With one worker per core
+    // each ALSO spawning a node process, the machine is oversubscribed ~2x and
+    // subprocess spawns get starved past their timeout — a non-deterministic
+    // failure whose victim varies per run. Cap workers to half the cores so
+    // each worker+subprocess pair fits, independent of how many subprocess
+    // tests exist.
+    maxWorkers: "50%",
+    minWorkers: 1,
     // Don't pick up tests from sibling worktrees living under local worktree dirs.
     // Worktrees share the parent's working directory tree, so without this
     // exclude vitest discovers and runs every feature branch's tests.