diff --git a/SOURCES_CONTRACT.md b/SOURCES_CONTRACT.md new file mode 100644 index 0000000..553aaae --- /dev/null +++ b/SOURCES_CONTRACT.md @@ -0,0 +1,147 @@ +# `sources/` Input Contract + +This document defines the **stable input contract** for the `sources/` directory: +the format a programmatic producer (for example, **a downstream rule importer**) writes to drive +`llmwiki compile` without going through the interactive `llmwiki ingest` command. + +Anything that can write a markdown file with the frontmatter described here can feed +the compiler. The compiler treats `sources/` as the single source of truth for what +to compile; it never reaches back to the original URL/file. + +> Stability: the **field names, semantics, slug/filename rules, and +> `MAX_SOURCE_CHARS` limit below are stable.** New optional frontmatter fields may be +> added over time; existing fields will not change meaning without a contract revision. + +--- + +## File layout + +Each source is one UTF-8 markdown file in `sources/`: + +``` +sources/ + retrieval-augmented-generation.md + some-design-doc.md +``` + +A source file is: + +```markdown +--- +title: Retrieval-Augmented Generation +source: https://example.com/rag +ingestedAt: 2026-05-31T12:00:00.000Z +sourceType: web +--- + + +``` + +The body is everything after the closing `---` of the YAML frontmatter block. The +compiler hashes the **entire file** (frontmatter + body) with SHA-256 to detect +changes; see [Change detection](#change-detection-incremental-compile). + +Only files ending in `.md` are scanned. Other files in `sources/` are ignored. + +--- + +## Frontmatter fields + +The frontmatter is a single YAML block delimited by `---` lines at the very top of +the file. + +### Required + +| Field | Type | Meaning | +|--------------|--------|---------| +| `title` | string | Human-readable title. Drives the **filename slug** (see below) and the wiki page title. Must contain at least one letter or digit, otherwise the slug is empty and the write is rejected. | +| `source` | string | Source identity (URL, file path, or any stable producer-chosen URI). Used for re-ingest idempotency and basename-collision disambiguation. For a git-log producer this would be e.g. a commit URL or `repo@sha:path`. | +| `ingestedAt` | string | ISO-8601 timestamp of when the source was captured. | + +### Optional + +| Field | Type | Meaning | +|----------------|---------|---------| +| `sourceType` | string | Origin tag. One of `web`, `file`, `image`, `pdf`, `transcript`. Persisted for downstream tooling and human readers. A programmatic producer that does not map cleanly to one of these should pick the closest (`file` is the safe generic default). | +| `truncated` | boolean | `true` when the body was truncated to fit `MAX_SOURCE_CHARS`. Omit when the body is complete. | +| `originalChars`| number | Original character count **before** truncation. Set this together with `truncated: true` so consumers can see how much was dropped. Omit when `truncated` is absent. | + +Producers may include additional YAML keys; the compiler ignores unrecognized +frontmatter fields rather than failing. Do not rely on unspecified fields surviving +into the compiled output. + +--- + +## Filename and slug rules + +The filename a producer chooses should match how `llmwiki ingest` would name it, so +re-ingest stays idempotent: + +1. **Slug** is derived from `title` by lowercasing, transliterating to ASCII-ish + kebab-case, and stripping characters that are not letters/digits/hyphens. A title + that slugifies to the empty string (e.g. pure punctuation/emoji) is **rejected** — + choose a title with at least one letter or digit. +2. The default filename is `.md`. +3. **Basename collisions:** if `.md` already exists for a *different* `source`, + the disambiguated name is `-<8-hex>.md`, where `<8-hex>` is the first 8 hex + chars of `sha256(source)`. Re-writing the *same* `source` overwrites `.md` + in place (the existing file's frontmatter `source` is consulted first), so a + producer that re-emits an updated version of the same source must keep `source` + identical to overwrite rather than fork. + +A producer that does not want to replicate the slug algorithm may simply write a +stable `.md` of its own choosing — the compiler keys change +detection off the **filename + file hash**, not the slug. The slug rules above only +matter for staying byte-compatible with `llmwiki ingest` output. + +--- + +## Size limit: `MAX_SOURCE_CHARS` + +The compiler-facing size ceiling is **`MAX_SOURCE_CHARS = 100_000` characters** of +body content (see `src/utils/constants.ts`). Producers should: + +- Truncate the body to at most `MAX_SOURCE_CHARS` characters. +- When truncating, set `truncated: true` and `originalChars: ` + in the frontmatter. + +Very short bodies (under `MIN_SOURCE_CHARS = 50`) compile but are low-signal; the +interactive ingester warns on them. A programmatic producer should avoid emitting +near-empty sources. + +--- + +## Change detection (incremental compile) + +`llmwiki compile` is incremental. It records each source file's SHA-256 hash in +`.llmwiki/state.json` under `sources[] = { hash, concepts, compiledAt }`. +On the next compile, a source whose hash is unchanged is **skipped**. + +Consequences for a producer: + +- To trigger recompilation of a source, change its file contents (frontmatter or + body) so the hash changes. +- Writing a byte-identical file is a no-op for that source. +- Deleting a source file marks its owned wiki page(s) orphaned on the next compile. + +These same per-source hashes are surfaced in the JSON export as each page's +`sourceHashes`, and the export envelope carries `modelId` / `promptVersion`, so a +downstream consumer can audit which source bytes and which model/prompt produced a +page (the W4 provenance stamp). + +--- + +## Future: a `git`-log adapter + +W1 documents the contract only; no new connector ships with it. The natural next +connector for a downstream rule importer is a **git-log adapter** that walks commit history and +emits one `sources/*.md` per commit (or per changed file), with: + +- `title` = commit subject (or `path @ short-sha`), +- `source` = a stable commit/blob URI, +- `ingestedAt` = commit timestamp, +- `sourceType: file`, +- body = the commit message and/or diff hunk, truncated to `MAX_SOURCE_CHARS`. + +Such an adapter is purely a producer of files in this format; it requires no compiler +changes because it targets this stable contract. diff --git a/src/cli.ts b/src/cli.ts index 1038336..705ece2 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -29,6 +29,7 @@ import reviewListCommand from "./commands/review-list.js"; import reviewShowCommand from "./commands/review-show.js"; import reviewApproveCommand from "./commands/review-approve.js"; import reviewRejectCommand from "./commands/review-reject.js"; +import { registerRulesCommand } from "./commands/rules-register.js"; import nextCommand from "./commands/next.js"; import quickstartCommand, { type QuickstartOptions } from "./commands/quickstart.js"; import contextCommand, { type ContextCommandOptions } from "./commands/context.js"; @@ -160,6 +161,8 @@ reviewCommand } }); +registerRulesCommand(program, requireProvider); + program .command("query ") .description("Ask a question against the wiki") diff --git a/src/commands/rules-register.ts b/src/commands/rules-register.ts new file mode 100644 index 0000000..04564c7 --- /dev/null +++ b/src/commands/rules-register.ts @@ -0,0 +1,96 @@ +/** + * Commander registration for `llmwiki rules ...`. + * + * Keeping the rule-pipeline command tree outside `src/cli.ts` prevents the + * entrypoint from becoming the dumping ground for every nested command while + * leaving the actual rule actions in `commands/rules.ts`. + */ + +import type { Command } from "commander"; +import { + rulesApproveCommand, + rulesExportCommand, + rulesExtractCommand, + rulesListCommand, + rulesRejectCommand, +} from "./rules.js"; + +/** Provider guard injected by the CLI entrypoint. */ +type RequireProvider = () => void; + +/** + * Register the `rules` command group and its subcommands. + * @param program - Root Commander program. + * @param requireProvider - CLI provider guard for extraction. + */ +export function registerRulesCommand(program: Command, requireProvider: RequireProvider): void { + const rulesCommand = program + .command("rules") + .description( + "Extract, review, and export machine-actionable RuleCandidate records for a downstream rule importer", + ); + + registerExtract(rulesCommand, requireProvider); + registerList(rulesCommand); + registerApprove(rulesCommand); + registerReject(rulesCommand); + registerExport(rulesCommand); +} + +/** Register `rules extract`. */ +function registerExtract(rulesCommand: Command, requireProvider: RequireProvider): void { + rulesCommand + .command("extract") + .description("Extract rule candidates from changed sources (writes .llmwiki/rule-candidates/)") + .action(async () => + runRulesAction(async () => { + requireProvider(); + await rulesExtractCommand(); + }), + ); +} + +/** Register `rules list`. */ +function registerList(rulesCommand: Command): void { + rulesCommand + .command("list") + .description("List pending rule candidates") + .action(async () => runRulesAction(() => rulesListCommand())); +} + +/** Register `rules approve`. */ +function registerApprove(rulesCommand: Command): void { + rulesCommand + .command("approve ") + .description("Approve a rule candidate (status -> approved)") + .action(async (id: string) => runRulesAction(() => rulesApproveCommand(id))); +} + +/** Register `rules reject`. */ +function registerReject(rulesCommand: Command): void { + rulesCommand + .command("reject ") + .description("Reject a rule candidate (status -> rejected, archived)") + .action(async (id: string) => runRulesAction(() => rulesRejectCommand(id))); +} + +/** Register `rules export`. */ +function registerExport(rulesCommand: Command): void { + rulesCommand + .command("export") + .description("Emit rule candidates as a JSON array for the rule importer (dist/exports/rule-candidates.json)") + .option("--scope ", "approved (default), proposed, or all") + .action(async (options: { scope?: string }) => + runRulesAction(() => rulesExportCommand(options)), + ); +} + +/** Shared CLI error wrapper for the rule command group. */ +async function runRulesAction(work: () => Promise): Promise { + try { + await work(); + } catch (err) { + console.error(`\x1b[31mError:\x1b[0m ${err instanceof Error ? err.message : err}`); + process.exit(1); + } +} diff --git a/src/commands/rules.ts b/src/commands/rules.ts new file mode 100644 index 0000000..d51b3ee --- /dev/null +++ b/src/commands/rules.ts @@ -0,0 +1,181 @@ +/** + * Commander actions for `llmwiki rules …` (rule pipeline). + * + * The rule-candidate lifecycle mirrors the concept review flow but emits + * structured `RuleCandidate` records for a downstream rule importer instead of prose pages: + * + * rules extract — LLM-extract rules from changed sources into + * .llmwiki/rule-candidates/.json (status proposed) + * rules list — list pending candidates + * rules approve — flip status → approved (in place) + * rules reject — flip status → rejected, archive out of pending + * rules export [--scope] — write the candidate array as JSON for the rule importer + * + * Mutations run under `.llmwiki/lock` to serialize against a concurrent + * extract/approve/reject, matching the concept review lock discipline. + */ + +import { existsSync } from "fs"; +import path from "path"; +import { atomicWrite } from "../utils/markdown.js"; +import { acquireLock, releaseLock } from "../utils/lock.js"; +import * as output from "../utils/output.js"; +import { SOURCES_DIR } from "../utils/constants.js"; +import { + extractRuleCandidates, + type RuleExtractionResult, +} from "../compiler/rule-extractor.js"; +import { + archiveRuleCandidate, + readRuleCandidate, + setRuleCandidateStatus, +} from "../compiler/rule-candidates.js"; +import { candidateFileId } from "../utils/candidate-store.js"; +import { + RULE_EXPORT_SCOPES, + buildRuleCandidatesJson, + collectRuleCandidatesForExport, + type RuleExportScope, +} from "../export/rule-candidates-json.js"; + +/** Default output path (relative to root) for `rules export`. */ +const RULE_EXPORT_PATH = "dist/exports/rule-candidates.json"; + +/** + * Extract rule candidates from changed sources. Requires the sources/ folder + * and an available provider (the caller's CLI guard enforces the latter). + */ +export async function rulesExtractCommand(): Promise { + const root = process.cwd(); + if (!existsSync(path.join(root, SOURCES_DIR))) { + output.status("!", output.warn("No sources found. Run `llmwiki ingest ` first.")); + return; + } + + const locked = await acquireLock(root); + if (!locked) { + output.status("!", output.error("Could not acquire lock. Try again later.")); + process.exitCode = 1; + return; + } + try { + output.header("Extracting rule candidates"); + reportExtraction(await extractRuleCandidates(root)); + } finally { + await releaseLock(root); + } +} + +/** Print extraction notes, each candidate, and a one-line summary. */ +function reportExtraction(result: RuleExtractionResult): void { + for (const note of result.notes) output.status("i", output.dim(note)); + for (const candidate of result.candidates) { + output.status("?", output.info(`Rule candidate ready: ${candidate.id}`)); + } + output.status( + "✓", + output.success( + `${result.candidates.length} rule candidate(s) from ${result.processedSources.length} source(s).`, + ), + ); +} + +/** List pending rule candidates with their proposed-rule summary line. */ +export async function rulesListCommand(): Promise { + const pending = await collectRuleCandidatesForExport(process.cwd(), "proposed"); + if (pending.length === 0) { + output.status("i", output.dim("No pending rule candidates.")); + return; + } + for (const c of pending) { + output.status( + "?", + output.info(`${c.id} [${c.confidence}] — ${c.proposed.title}`), + ); + } +} + +/** Approve a candidate by flipping its status to "approved" under the lock. */ +export async function rulesApproveCommand(id: string): Promise { + await mutateUnderLock(id, async (root, fileId) => { + const updated = await setRuleCandidateStatus(root, fileId, "approved"); + if (!updated) return false; + output.status("+", output.success(`Approved rule candidate ${updated.id}.`)); + return true; + }); +} + +/** Reject a candidate: flip status to "rejected" then archive it out of pending. */ +export async function rulesRejectCommand(id: string): Promise { + await mutateUnderLock(id, async (root, fileId) => { + const updated = await setRuleCandidateStatus(root, fileId, "rejected"); + if (!updated) return false; + await archiveRuleCandidate(root, fileId); + output.status("-", output.warn(`Rejected rule candidate ${updated.id} — archived.`)); + return true; + }); +} + +/** + * Export rule candidates as a JSON array for the rule importer. Defaults to approved-only; + * `--scope proposed|all` widens the selection. Writes to + * dist/exports/rule-candidates.json. + */ +export async function rulesExportCommand(options: { scope?: string } = {}): Promise { + const root = process.cwd(); + const scope = resolveScope(options.scope); + const candidates = await collectRuleCandidatesForExport(root, scope); + const outPath = path.join(root, RULE_EXPORT_PATH); + await atomicWrite(outPath, buildRuleCandidatesJson(candidates)); + output.status( + "+", + output.success(`Exported ${candidates.length} rule candidate(s) → ${output.source(outPath)}`), + ); +} + +/** Validate the --scope flag, defaulting to "approved". Throws on bad input. */ +function resolveScope(raw: string | undefined): RuleExportScope { + if (!raw) return "approved"; + if (!(RULE_EXPORT_SCOPES as readonly string[]).includes(raw)) { + throw new Error( + `Unknown --scope value "${raw}". Valid: ${RULE_EXPORT_SCOPES.join(", ")}`, + ); + } + return raw as RuleExportScope; +} + +/** + * Shared approve/reject skeleton: pre-check the candidate exists, acquire the + * lock, re-read under it (TOCTOU guard), run the mutation, release. Sets exit + * code 1 when the candidate is missing at either check. + */ +async function mutateUnderLock( + id: string, + underLock: (root: string, fileId: string) => Promise, +): Promise { + const root = process.cwd(); + const fileId = candidateFileId(id); + + const preCheck = await readRuleCandidate(root, fileId); + if (!preCheck) { + output.status("!", output.error(`Rule candidate not found: ${id}`)); + process.exitCode = 1; + return; + } + + const locked = await acquireLock(root); + if (!locked) { + output.status("!", output.error("Could not acquire lock. Try again later.")); + process.exitCode = 1; + return; + } + try { + const ok = await underLock(root, fileId); + if (!ok) { + output.status("!", output.error(`Rule candidate ${id} was removed during review.`)); + process.exitCode = 1; + } + } finally { + await releaseLock(root); + } +} diff --git a/src/compiler/candidates.ts b/src/compiler/candidates.ts index eb0d2d1..1493c54 100644 --- a/src/compiler/candidates.ts +++ b/src/compiler/candidates.ts @@ -12,11 +12,15 @@ * called again at approval time. */ -import { readdir, rename, unlink, writeFile, mkdir } from "fs/promises"; +import { unlink } from "fs/promises"; import { existsSync } from "fs"; import path from "path"; import { randomBytes } from "crypto"; import { atomicWrite, safeReadFile } from "../utils/markdown.js"; +import { + listCandidateFileIds, + moveCandidateToArchive, +} from "../utils/candidate-store.js"; import * as output from "../utils/output.js"; import { CANDIDATES_DIR, @@ -189,13 +193,9 @@ function isValidCandidate(value: unknown): value is ReviewCandidate { */ export async function listCandidates(root: string): Promise { const dir = path.join(root, CANDIDATES_DIR); - if (!existsSync(dir)) return []; - - const entries = await readdir(dir, { withFileTypes: true }); + const ids = await listCandidateFileIds(dir); const candidates: ReviewCandidate[] = []; - for (const entry of entries) { - if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue; - const id = entry.name.slice(0, -CANDIDATE_EXT.length); + for (const id of ids) { const candidate = await readCandidate(root, id); if (candidate) candidates.push(candidate); } @@ -231,18 +231,5 @@ export async function deleteCandidate(root: string, id: string): Promise { - const sourcePath = candidatePath(root, id); - if (!existsSync(sourcePath)) return false; - - const target = archivePath(root, id); - await mkdir(path.dirname(target), { recursive: true }); - // Copy via writeFile + unlink to support cross-filesystem rename failures. - try { - await rename(sourcePath, target); - } catch { - const raw = await safeReadFile(sourcePath); - await writeFile(target, raw, "utf-8"); - await unlink(sourcePath); - } - return true; + return moveCandidateToArchive(candidatePath(root, id), archivePath(root, id)); } diff --git a/src/compiler/delta.ts b/src/compiler/delta.ts new file mode 100644 index 0000000..949b35d --- /dev/null +++ b/src/compiler/delta.ts @@ -0,0 +1,85 @@ +/** + * Programmatic incremental compile delta (incremental delta). + * + * Exposes {@link compileDelta}: a library entry point that runs the normal + * hash-gated compile and then returns ONLY the export pages that changed in + * that run, instead of the full corpus. A caller (e.g. a downstream rule importer) can + * poll this after each ingest and ship just the deltas to its downstream + * store, without diffing the whole wiki itself. + * + * The change set is driven entirely by the compiler's existing + * SHA-256-over-source change detection (`detectChanges` / `.llmwiki/state.json` + * via `src/compiler/hasher.ts`). `compileAndReport` already returns the slugs + * it (re)wrote this run on `CompileResult.pages`; we intersect those with the + * freshly-collected export pages so the delta carries the same enriched, + * provenance-stamped {@link ExportPage} shape the full JSON export emits. + * + * When nothing changed since the persisted state, `CompileResult.pages` is + * empty and the returned delta is empty too — the "unchanged ⇒ empty delta" + * contract the W5 test pins. + */ + +import { compileAndReport } from "./index.js"; +import { collectExportPages } from "../export/collect.js"; +import type { ExportPage } from "../export/types.js"; +import type { CompileOptions } from "../utils/types.js"; + +/** Options for {@link compileDelta}. Pass-through to the compile pipeline. */ +export type CompileDeltaOptions = CompileOptions; + +/** Result of an incremental delta compile. */ +export interface CompileDeltaResult { + /** + * Export pages that changed in this run — new or recompiled. Carries the + * full {@link ExportPage} shape (provenance hashes included) so the caller + * can persist deltas through the same contract as a full export. + */ + changedPages: ExportPage[]; + /** Slugs of the changed pages, in collection (title-sorted) order. */ + changedSlugs: string[]; + /** Count of sources (re)compiled this run. */ + compiled: number; + /** Count of unchanged sources skipped this run. */ + skipped: number; + /** Count of sources whose pages were orphaned by deletion this run. */ + deleted: number; + /** Non-fatal errors collected during the compile. */ + errors: string[]; +} + +/** + * Run an incremental compile and return only the pages that changed. + * + * Reuses the compiler's hash-gated change detection: sources whose SHA-256 + * matches the persisted `.llmwiki/state.json` entry are skipped, so a second + * call with an up-to-date state yields an empty `changedPages`. Adding or + * editing a source yields exactly that source's page(s) in the delta. + * + * @param root - Project root directory. + * @param options - Optional pipeline overrides (forwarded to compile). + * @returns The changed export pages plus run counts. + */ +export async function compileDelta( + root: string, + options: CompileDeltaOptions = {}, +): Promise { + const result = await compileAndReport(root, options); + const changedSlugSet = new Set(result.pages); + + // `result.pages` are concept/seed slugs, all written under wiki/concepts. + // Match on (pageDirectory, slug), not bare slug, so a saved query that + // happens to share a slug with a changed concept is never mis-included. + const allPages = await collectExportPages(root); + const changedPages = allPages.filter( + (page) => page.pageDirectory === "concepts" && changedSlugSet.has(page.slug), + ); + + return { + changedPages, + changedSlugs: changedPages.map((page) => page.slug), + compiled: result.compiled, + skipped: result.skipped, + deleted: result.deleted, + errors: result.errors, + }; +} diff --git a/src/compiler/index.ts b/src/compiler/index.ts index b7e209d..f716292 100644 --- a/src/compiler/index.ts +++ b/src/compiler/index.ts @@ -46,6 +46,7 @@ import { resolveLinks } from "./resolver.js"; import { generateIndex } from "./indexgen.js"; import { buildBudgetedCombinedContent, type SourceSlice } from "./prompt-budget.js"; import { addObsidianMeta, generateMOC } from "./obsidian.js"; +import { addModelProvenanceMeta } from "./provenance.js"; import { updateEmbeddings } from "../utils/embeddings.js"; import { writeCandidate } from "./candidates.js"; import { @@ -700,6 +701,7 @@ async function generateSingleSeedPage( }; const frontmatterFields: Record = { ...typedFields }; addObsidianMeta(frontmatterFields, seed.title, []); + addModelProvenanceMeta(frontmatterFields); const frontmatter = buildFrontmatter(frontmatterFields); const error = await writePageIfValid(pagePath, `${frontmatter}\n\n${pageBody}\n`, seed.title); return error ? { slug, error } : { slug }; diff --git a/src/compiler/page-renderer.ts b/src/compiler/page-renderer.ts index 9ddc1a5..73b2767 100644 --- a/src/compiler/page-renderer.ts +++ b/src/compiler/page-renderer.ts @@ -18,7 +18,7 @@ import { import { callClaude } from "../utils/llm.js"; import { buildPagePrompt } from "./prompts.js"; import { addObsidianMeta } from "./obsidian.js"; -import { addProvenanceMeta, reportContradictionWarnings } from "./provenance.js"; +import { addModelProvenanceMeta, addProvenanceMeta, reportContradictionWarnings } from "./provenance.js"; import { CONCEPTS_DIR } from "../utils/constants.js"; import type { SchemaConfig } from "../schema/index.js"; import type { ExtractedConcept } from "../utils/types.js"; @@ -94,6 +94,7 @@ function buildMergedFrontmatter( }; addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []); addProvenanceMeta(frontmatterFields, entry.concept); + addModelProvenanceMeta(frontmatterFields); return buildFrontmatter(frontmatterFields); } diff --git a/src/compiler/prompt-budget.ts b/src/compiler/prompt-budget.ts index 97d3fbe..b05c1c0 100644 --- a/src/compiler/prompt-budget.ts +++ b/src/compiler/prompt-budget.ts @@ -90,6 +90,25 @@ function numberLines(content: string): string { .join("\n"); } +/** + * Clip a single source's content to the active prompt budget and prepend line + * numbers, so a prompt that asks the model for line spans actually shows the + * model numbered lines (and never exceeds the budget). Used by the rule + * extractor, which feeds one source per call rather than a merged concept. + * + * @param file - Source filename, for the truncation warning only. + * @param content - Raw source content. + * @returns Numbered (and, when over budget, truncated) content. + */ +export function budgetAndNumberSource(file: string, content: string): string { + const budget = resolvePromptBudgetChars(); + if (content.length <= budget) { + return numberLines(content); + } + warnTruncation(file, content.length, 1, budget, budget); + return numberLines(content.slice(0, budget) + TRUNCATION_MARKER); +} + /** Render the slice list using the same `--- SOURCE: ---` headers the LLM is taught to read. */ function formatSlices(slices: SourceSlice[]): string { return slices diff --git a/src/compiler/prompts.ts b/src/compiler/prompts.ts index ec24948..bb57bfe 100644 --- a/src/compiler/prompts.ts +++ b/src/compiler/prompts.ts @@ -24,6 +24,18 @@ function withLangLine(...lines: string[]): string[] { return lang ? [...lines, lang] : lines; } +/** + * Named version of the extraction + page-generation prompt contract. + * + * Bump this whenever the wording of the extraction tool schema, the + * extraction system prompt, or the page-generation prompt changes in a way + * that could alter compiled page content. The export provenance stamp + * (`promptVersion` in the JSON export envelope) carries this value so a + * downstream auditor can distinguish pages produced under different prompt + * generations even when the model id is identical. Format is `vMAJOR`. + */ +export const PROMPT_VERSION = "v1"; + /** Allowed provenance state strings emitted by the LLM tool schema. */ const PROVENANCE_STATE_VALUES: ProvenanceState[] = [ "extracted", diff --git a/src/compiler/provenance.ts b/src/compiler/provenance.ts index 0d49f33..a5b9735 100644 --- a/src/compiler/provenance.ts +++ b/src/compiler/provenance.ts @@ -8,8 +8,24 @@ */ import * as output from "../utils/output.js"; +import { resolveActiveModelId } from "../utils/provider.js"; +import { PROMPT_VERSION } from "./prompts.js"; import type { ExtractedConcept } from "../utils/types.js"; +/** + * Stamp compile-time lineage onto a page's frontmatter: the model id that the + * active provider would use and the named prompt-contract version. Written when + * the page is (re)generated, so it records the model/prompt that actually + * produced the page's current content — unlike an export-time env read, which + * can attribute a page to a model that never touched it. Surfaced per-page in + * the JSON export (`ExportPage.modelId` / `promptVersion`). + * @param fields - Mutable frontmatter record being assembled for a page. + */ +export function addModelProvenanceMeta(fields: Record): void { + fields.modelId = resolveActiveModelId(); + fields.promptVersion = PROMPT_VERSION; +} + /** * Copy provenance metadata fields from an extracted concept onto the * frontmatter record, omitting fields the LLM did not provide so existing diff --git a/src/compiler/rule-candidates.ts b/src/compiler/rule-candidates.ts new file mode 100644 index 0000000..62e2458 --- /dev/null +++ b/src/compiler/rule-candidates.ts @@ -0,0 +1,398 @@ +/** + * RuleCandidate persistence for the llmwiki rule-extraction pipeline (rule pipeline). + * + * Parallel to `candidates.ts` (the concept review queue) but for structured + * `RuleCandidate` records. `llmwiki rules extract` writes one JSON file per + * candidate under `.llmwiki/rule-candidates/.json`; `rules approve`/`reject` + * flip `status` (and archive rejects); `rules export` emits the array the rule importer + * consumes. The full candidate is stored on disk so approval is a pure + * status flip — the LLM is never called again at approval time. + * + * Candidate JSON is the canonical the rule importer import shape: camelCase keys, tagged + * evidence, lowercase status/confidence. Do not reshape it for local use. + */ + +import path from "path"; +import { createHash } from "node:crypto"; +import { atomicWrite, safeReadFile, slugify } from "../utils/markdown.js"; +import { + CANDIDATE_JSON_EXT, + candidateFileId, + listCandidateFileIds, + moveCandidateToArchive, +} from "../utils/candidate-store.js"; +import { + RULE_CANDIDATES_DIR, + RULE_CANDIDATES_ARCHIVE_DIR, +} from "../utils/constants.js"; +import type { + EvidenceRef, + RuleCandidate, + RuleConfidence, + RuleProvenance, + RuleStatus, +} from "../utils/rule-types.js"; + +/** Allowed confidence values, used by the on-disk validity guard. */ +const CONFIDENCE_VALUES: readonly RuleConfidence[] = ["low", "medium", "high"]; + +/** Allowed status values, used by the on-disk validity guard. */ +const STATUS_VALUES: readonly RuleStatus[] = ["proposed", "approved", "rejected"]; + +/** Runtime evidence shape checker for a tagged evidence variant. */ +type EvidenceShapeChecker = (ref: Record) => string | null; + +/** Absolute path to a rule candidate's JSON file. */ +function ruleCandidatePath(root: string, id: string): string { + return path.join(root, RULE_CANDIDATES_DIR, `${id}${CANDIDATE_JSON_EXT}`); +} + +/** Absolute path to the archived JSON file for a rejected rule candidate. */ +function ruleArchivePath(root: string, id: string): string { + return path.join(root, RULE_CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_JSON_EXT}`); +} + +/** the rule importer contract caps (mirrored from the rule-import contract rule_candidate_validation.rs). */ +const CATEGORY_CAP = 64; +const TITLE_CAP = 256; +const PREDICATE_CAP = 512; +const EVIDENCE_REF_CAP = 1024; +const MAX_EVIDENCE_PER_CANDIDATE = 64; +const CANDIDATE_ID_RE = /^rulecand\.[a-z0-9_]+\.[a-z0-9-]+$/; +const RULE_ID_RE = /^rule\.[a-z0-9_]+\.[a-z0-9-]+$/; + +/** + * Normalize a raw LLM category into the rule importer's category alphabet `[a-z0-9_]+`. + * the rule importer rejects hyphens in the category segment, but `slugify` emits them, so + * a multi-word category ("code review") must collapse to underscores + * ("code_review") or the candidate is silently dropped at import. + * @param raw - The model-supplied category string. + */ +export function sanitizeRuleCategory(raw: string): string { + const cleaned = raw.toLowerCase().replace(/[^a-z0-9_]+/g, "_").replace(/^_+|_+$/g, ""); + return (cleaned || "general").slice(0, CATEGORY_CAP); +} + +/** + * Build a collision-resistant slug segment in the rule importer's slug alphabet `[a-z0-9-]+`. + * Appends a short hex digest of a content signature (source identity + rule + * body) so two rules with the same title — across sources or from similar LLM + * outputs — never collapse onto the same candidate id/file. + * @param title - The rule title. + * @param contentSignature - Stable per-rule signature (e.g. source + when + then). + */ +export function buildRuleSlug(title: string, contentSignature: string): string { + const base = slugify(title); + const hash = createHash("sha256").update(contentSignature).digest("hex").slice(0, 8); + return base ? `${base}-${hash}` : `rule-${hash}`; +} + +/** + * Producer-side mirror of the rule importer's import gate. Returns an error string when a + * candidate would be rejected at import (bad id/category alphabet, oversized + * field, non-https url, unsafe evidence path, too many refs), or null when it + * is importable. Keeps the producer from "successfully" emitting candidates + * the rule importer silently refuses. + * @param c - The candidate to validate. + */ +export function validateRuleCandidate(c: RuleCandidate): string | null { + const shapeError = ruleCandidateShapeError(c); + if (shapeError) return shapeError; + if (!CANDIDATE_ID_RE.test(c.id)) return `candidate id "${c.id}" violates ${CANDIDATE_ID_RE}`; + if (!RULE_ID_RE.test(c.proposed.id)) return `proposed rule id "${c.proposed.id}" violates ${RULE_ID_RE}`; + if (c.proposed.id !== `rule.${c.id.slice("rulecand.".length)}`) { + return `proposed rule id "${c.proposed.id}" does not match candidate id "${c.id}"`; + } + if (!c.id.startsWith(`rulecand.${c.proposed.category}.`)) { + return `candidate id "${c.id}" does not match category "${c.proposed.category}"`; + } + const capError = firstFieldOverCap(c); + if (capError) return capError; + if (c.evidence.length > MAX_EVIDENCE_PER_CANDIDATE) { + return `too many evidence refs: ${c.evidence.length} (max ${MAX_EVIDENCE_PER_CANDIDATE})`; + } + return firstEvidenceError(c.evidence); +} + +/** Runtime shape validation before contract-specific validation reads fields. */ +function ruleCandidateShapeError(value: unknown): string | null { + if (!value || typeof value !== "object") return "candidate must be an object"; + const c = value as Record; + return candidateScalarShapeError(c) + ?? candidateEnumShapeError(c) + ?? proposedRuleShapeError(c.proposed) + ?? candidateEvidenceShapeError(c.evidence) + ?? provenanceShapeError(c.provenance); +} + +/** Runtime shape validation for top-level scalar candidate fields. */ +function candidateScalarShapeError(c: Record): string | null { + if (typeof c.id !== "string") return "candidate id must be a string"; + if (typeof c.createdAt !== "string") return "createdAt must be a string"; + return null; +} + +/** Runtime shape validation for top-level candidate enum fields. */ +function candidateEnumShapeError(c: Record): string | null { + if (!CONFIDENCE_VALUES.includes(c.confidence as RuleConfidence)) return "invalid confidence"; + if (!STATUS_VALUES.includes(c.status as RuleStatus)) return "invalid status"; + return null; +} + +/** Runtime shape validation for the top-level evidence array. */ +function candidateEvidenceShapeError(value: unknown): string | null { + if (!Array.isArray(value)) return "evidence must be an array"; + return firstEvidenceShapeError(value); +} + +/** Runtime shape validation for the proposed rule object. */ +function proposedRuleShapeError(value: unknown): string | null { + if (!value || typeof value !== "object") return "proposed rule must be an object"; + const proposed = value as Record; + for (const field of ["id", "category", "title", "description", "when", "then"]) { + if (typeof proposed[field] !== "string") return `proposed.${field} must be a string`; + } + if (proposed.version !== 1) return "proposed.version must be 1"; + return null; +} + +/** First over-cap proposed-rule field, or null. */ +function firstFieldOverCap(c: RuleCandidate): string | null { + const checks: Array<[string, string, number]> = [ + ["category", c.proposed.category, CATEGORY_CAP], + ["title", c.proposed.title, TITLE_CAP], + ["when", c.proposed.when, PREDICATE_CAP], + ["then", c.proposed.then, PREDICATE_CAP], + ]; + for (const [name, value, cap] of checks) { + if (value.length > cap) return `${name} exceeds ${cap} chars`; + } + return null; +} + +/** First evidence ref that the rule importer would reject (scheme/path/length), or null. */ +function firstEvidenceError(evidence: EvidenceRef[]): string | null { + for (const ref of evidence) { + const error = evidenceRefError(ref); + if (error) return error; + } + return null; +} + +/** Runtime shape validation for every evidence ref. */ +function firstEvidenceShapeError(evidence: unknown[]): string | null { + for (const ref of evidence) { + const error = evidenceShapeError(ref); + if (error) return error; + } + return null; +} + +/** Runtime shape validation for one evidence ref. */ +function evidenceShapeError(ref: unknown): string | null { + if (!ref || typeof ref !== "object") return "evidence ref must be an object"; + const candidate = ref as Record; + if (typeof candidate.kind !== "string") return "evidence kind must be a string"; + const checkEvidenceShape = EVIDENCE_SHAPE_CHECKERS[candidate.kind]; + return checkEvidenceShape ? checkEvidenceShape(candidate) : "unknown evidence kind"; +} + +/** Runtime shape validation for each tagged evidence variant. */ +const EVIDENCE_SHAPE_CHECKERS: Record = { + audit: (ref) => requiredStringField(ref, "auditId", "audit evidence requires auditId"), + file: fileEvidenceShapeError, + memory: (ref) => requiredStringField(ref, "memoryId", "memory evidence requires memoryId"), + url: (ref) => requiredStringField(ref, "url", "url evidence requires url"), +}; + +/** Require a string field inside an on-disk tagged object. */ +function requiredStringField( + value: Record, + field: string, + message: string, +): string | null { + return typeof value[field] === "string" ? null : message; +} + +/** Runtime shape validation for file evidence, including optional line spans. */ +function fileEvidenceShapeError(ref: Record): string | null { + if (typeof ref.path !== "string") return "file evidence requires path"; + if (ref.lineStart !== undefined && typeof ref.lineStart !== "number") return "lineStart must be a number"; + if (ref.lineEnd !== undefined && typeof ref.lineEnd !== "number") return "lineEnd must be a number"; + return null; +} + +/** Runtime shape validation for provenance. */ +function provenanceShapeError(value: unknown): string | null { + if (!value || typeof value !== "object") return "provenance must be an object"; + const provenance = value as Record; + if (typeof provenance.source !== "string") return "provenance.source must be a string"; + if (provenance.modelId !== undefined && typeof provenance.modelId !== "string") return "provenance.modelId must be a string"; + if (provenance.modelVersion !== undefined && typeof provenance.modelVersion !== "string") return "provenance.modelVersion must be a string"; + return null; +} + +/** the rule importer's per-ref check for the two network/filesystem-backed evidence kinds. */ +function evidenceRefError(ref: EvidenceRef): string | null { + if (ref.kind === "url") return urlEvidenceError(ref.url); + if (ref.kind === "file") return fileEvidenceError(ref.path); + return null; +} + +/** Url evidence must be https and within the reference cap. */ +function urlEvidenceError(url: string): string | null { + if (url.length > EVIDENCE_REF_CAP) return `evidence url exceeds ${EVIDENCE_REF_CAP} chars`; + if (!url.startsWith("https://")) return `url evidence must be https: ${url}`; + return null; +} + +/** File evidence must be a safe relative path within the reference cap. */ +function fileEvidenceError(filePath: string): string | null { + if (filePath.length > EVIDENCE_REF_CAP) return `evidence path exceeds ${EVIDENCE_REF_CAP} chars`; + if (isUnsafeEvidencePath(filePath)) return `unsafe evidence path: ${filePath}`; + return null; +} + +/** Reject absolute paths, Windows drive/UNC roots, and any `..` traversal segment. */ +function isUnsafeEvidencePath(p: string): boolean { + if (p.startsWith("/") || p.startsWith("\\") || /^[a-zA-Z]:/.test(p)) return true; + return p.split(/[/\\]/).some((seg) => seg === ".."); +} + +/** Input shape for assembling a new candidate (id/status/createdAt derived here). */ +export interface RuleCandidateDraft { + category: string; + slug: string; + title: string; + description: string; + when: string; + then: string; + evidence: EvidenceRef[]; + provenance: RuleProvenance; + confidence: RuleConfidence; +} + +/** + * Assemble a RuleCandidate from a draft. Ids follow the rule importer's convention + * (`rulecand..` / `rule..`), status starts at + * `proposed`, and version starts at 1. + * @param draft - The extracted rule fields. + * @param createdAt - RFC3339 creation timestamp (injected for determinism). + */ +export function buildRuleCandidate( + draft: RuleCandidateDraft, + createdAt: string, +): RuleCandidate { + return { + id: `rulecand.${draft.category}.${draft.slug}`, + proposed: { + id: `rule.${draft.category}.${draft.slug}`, + category: draft.category, + title: draft.title, + description: draft.description, + when: draft.when, + then: draft.then, + version: 1, + }, + evidence: draft.evidence, + provenance: draft.provenance, + confidence: draft.confidence, + status: "proposed", + createdAt, + }; +} + +/** + * Persist a rule candidate as JSON. The filename is derived from the id with + * `.` replaced by `-` so it is a safe single path segment. + * @param root - Project root directory. + * @param candidate - Fully-formed candidate to write. + * @returns The path the candidate was written to. + */ +export async function writeRuleCandidate( + root: string, + candidate: RuleCandidate, +): Promise { + const fileId = candidateFileId(candidate.id); + const target = ruleCandidatePath(root, fileId); + await atomicWrite(target, JSON.stringify(candidate, null, 2)); + return target; +} + +/** Defensive type-guard so corrupted candidate files don't blow up the CLI. */ +function isValidRuleCandidate(value: unknown): value is RuleCandidate { + return validateRuleCandidate(value as RuleCandidate) === null; +} + +/** Read one candidate JSON file. Returns null when missing or malformed. */ +export async function readRuleCandidate( + root: string, + fileId: string, +): Promise { + const raw = await safeReadFile(ruleCandidatePath(root, fileId)); + if (!raw) return null; + try { + const parsed = JSON.parse(raw); + return isValidRuleCandidate(parsed) ? parsed : null; + } catch { + return null; + } +} + +/** + * List every pending rule candidate, sorted by createdAt then id so the order + * is deterministic. Skips non-JSON entries (e.g. the archive subdirectory). + * @param root - Project root directory. + */ +export async function listRuleCandidates(root: string): Promise { + const dir = path.join(root, RULE_CANDIDATES_DIR); + const fileIds = await listCandidateFileIds(dir); + const candidates: RuleCandidate[] = []; + for (const fileId of fileIds) { + const candidate = await readRuleCandidate(root, fileId); + if (candidate) candidates.push(candidate); + } + + candidates.sort( + (a, b) => a.createdAt.localeCompare(b.createdAt) || a.id.localeCompare(b.id), + ); + return candidates; +} + +/** + * Flip a pending candidate's status in place and rewrite its file. + * @param root - Project root directory. + * @param fileId - Filesystem id of the candidate (dotted id with `.`→`-`). + * @param status - New status to set. + * @returns The updated candidate, or null when it did not exist. + */ +export async function setRuleCandidateStatus( + root: string, + fileId: string, + status: RuleStatus, +): Promise { + const candidate = await readRuleCandidate(root, fileId); + if (!candidate) return null; + const updated: RuleCandidate = { ...candidate, status }; + await atomicWrite( + ruleCandidatePath(root, fileId), + JSON.stringify(updated, null, 2), + ); + return updated; +} + +/** + * Archive a candidate into the archive subdirectory so rejected proposals stay + * auditable. The status flip to "rejected" happens before this via + * {@link setRuleCandidateStatus}; here we only move the file. + * @returns True when the candidate existed and was moved. + */ +export async function archiveRuleCandidate( + root: string, + fileId: string, +): Promise { + return moveCandidateToArchive( + ruleCandidatePath(root, fileId), + ruleArchivePath(root, fileId), + ); +} diff --git a/src/compiler/rule-extractor.ts b/src/compiler/rule-extractor.ts new file mode 100644 index 0000000..c1a2bf3 --- /dev/null +++ b/src/compiler/rule-extractor.ts @@ -0,0 +1,252 @@ +/** + * Rule-extraction orchestrator (rule pipeline). + * + * Drives the `RuleCandidate` producer half of the learning loop: for each + * changed source file (gated by the same SHA-256 change detection the concept + * compiler uses), call the LLM with the rule-extraction tool, map each + * extracted rule into a `RuleCandidate`, and persist it under + * `.llmwiki/rule-candidates/`. + * + * Provenance is stamped with the active model id (W4's `resolveActiveModelId`) + * and the rule-prompt version so each recommendation is auditable even though + * the extraction itself is nondeterministic. The createdAt timestamp is the + * only nondeterministic field by design (RFC3339 wall-clock). + */ + +import { readFile } from "fs/promises"; +import path from "path"; +import { detectChanges } from "./hasher.js"; +import { parseFrontmatter, slugify } from "../utils/markdown.js"; +import { callClaude } from "../utils/llm.js"; +import { resolveActiveModelId } from "../utils/provider.js"; +import { budgetAndNumberSource } from "./prompt-budget.js"; +import { SOURCES_DIR } from "../utils/constants.js"; +import { + readRuleState, + updateRuleSourceState, +} from "./rule-state.js"; +import { + RULE_EXTRACTION_TOOL, + RULE_PROMPT_VERSION, + buildRuleExtractionPrompt, + parseRules, + type ExtractedRule, +} from "./rule-prompts.js"; +import { + buildRuleCandidate, + buildRuleSlug, + readRuleCandidate, + sanitizeRuleCategory, + validateRuleCandidate, + writeRuleCandidate, +} from "./rule-candidates.js"; +import { candidateFileId } from "../utils/candidate-store.js"; +import { createHash } from "node:crypto"; +import type { EvidenceRef, RuleCandidate, RuleProvenance } from "../utils/rule-types.js"; + +/** Producer tag stamped on every candidate's provenance. */ +const PROVENANCE_SOURCE = "llm-wiki-compiler"; + +/** Structured outcome of a rules-extraction run, for CLI + programmatic use. */ +export interface RuleExtractionResult { + /** Source files processed (changed/new since last state). */ + processedSources: string[]; + /** Candidates written this run. */ + candidates: RuleCandidate[]; + /** Non-fatal problems (e.g. a source that yielded no rules). */ + notes: string[]; +} + +/** Determine whether a source's `source` frontmatter field is a URL. */ +function isUrl(value: unknown): value is string { + return typeof value === "string" && /^https?:\/\//i.test(value); +} + +/** + * Build the evidence list for an extracted rule from its source file. + * + * URL-origin sources emit a `url` evidence ref; everything else emits a `file` + * ref keyed on the source filename, carrying the extraction's line span when + * present. Exactly one evidence ref is produced per rule so the contract stays + * predictable for the rule importer. + */ +function buildEvidence( + sourceFile: string, + sourceMeta: Record, + rule: ExtractedRule, + maxLine: number, +): EvidenceRef[] { + const origin = sourceMeta.source; + if (isUrl(origin)) { + return [{ kind: "url", url: origin }]; + } + const fileRef: EvidenceRef = { kind: "file", path: sourceFile }; + // Drop spans pointing past the content actually shown to the model — an + // out-of-bounds anchor is a hallucination, not evidence. + if (rule.evidenceLineStart !== undefined && rule.evidenceLineStart <= maxLine) { + fileRef.lineStart = rule.evidenceLineStart; + } + if (rule.evidenceLineEnd !== undefined && rule.evidenceLineEnd <= maxLine) { + fileRef.lineEnd = rule.evidenceLineEnd; + } + return [fileRef]; +} + +/** Build the provenance stamp shared by every candidate from a run. */ +function buildProvenance(): RuleProvenance { + return { + source: PROVENANCE_SOURCE, + modelId: resolveActiveModelId(), + modelVersion: RULE_PROMPT_VERSION, + }; +} + +/** + * Call the LLM with the rule-extraction tool and parse the result. + * @param numberedContent - Source content with 1-based line numbers already + * prepended (and clipped to the prompt budget), so the model's line-span + * citations refer to anchors it can actually see. + */ +async function extractRulesFromContent(numberedContent: string): Promise { + const system = buildRuleExtractionPrompt(numberedContent); + const raw = await callClaude({ + system, + messages: [{ role: "user", content: "Extract the actionable rules from this source." }], + tools: [RULE_EXTRACTION_TOOL], + }); + return parseRules(raw); +} + +/** A rule paired with the source line count the model was shown, for span bounding. */ +interface RuleInContext { + rule: ExtractedRule; + maxLine: number; +} + +/** + * Build a candidate for a single extracted rule. The category is normalized to + * the rule importer's `[a-z0-9_]` alphabet and the slug carries a content-hash suffix so + * distinct rules never collide on the same id/file. createdAt is injected by + * the caller for a single consistent timestamp per run. + */ +function candidateForRule( + sourceFile: string, + sourceMeta: Record, + context: RuleInContext, + provenance: RuleProvenance, + createdAt: string, +): RuleCandidate { + const { rule, maxLine } = context; + const signature = `${sourceFile}\n${rule.when}\n${rule.then}\n${rule.description}`; + return buildRuleCandidate( + { + category: sanitizeRuleCategory(rule.category), + slug: buildRuleSlug(rule.title, signature), + title: rule.title, + description: rule.description, + when: rule.when, + then: rule.then, + evidence: buildEvidence(sourceFile, sourceMeta, rule, maxLine), + provenance, + confidence: rule.confidence, + }, + createdAt, + ); +} + +/** Process one source file end-to-end: read, number, extract, build candidates. */ +async function extractForSource( + root: string, + sourceFile: string, + provenance: RuleProvenance, + createdAt: string, +): Promise<{ candidates: RuleCandidate[]; note?: string; hash: string }> { + const sourcePath = path.join(root, SOURCES_DIR, sourceFile); + const raw = await readFile(sourcePath, "utf-8"); + const hash = createHash("sha256").update(raw).digest("hex"); + const { meta } = parseFrontmatter(raw); + const numbered = budgetAndNumberSource(sourceFile, raw); + const maxLine = numbered.split("\n").length; + const rules = await extractRulesFromContent(numbered); + if (rules.length === 0) { + return { candidates: [], note: `No rules extracted from ${sourceFile}`, hash }; + } + const candidates = rules + .filter((rule) => slugify(rule.title).length > 0) + .map((rule) => candidateForRule(sourceFile, meta, { rule, maxLine }, provenance, createdAt)); + return { candidates, hash }; +} + +/** + * Source filenames that are new or changed since rule extraction last ran. + * Compares against `.llmwiki/rule-state.json` — NOT the concept compiler's + * state — so extraction has an independent change-detection cursor. + */ +async function changedSources(root: string): Promise { + const state = await readRuleState(root); + const changes = await detectChanges(root, state); + return changes + .filter((c) => c.status === "new" || c.status === "changed") + .map((c) => c.file); +} + +/** + * Extract rule candidates for every changed source and persist them. + * + * @param root - Project root directory. + * @param createdAt - RFC3339 timestamp injected once per run for determinism in + * tests; defaults to the current wall-clock time. + * @returns Structured result with processed sources, written candidates, notes. + */ +export async function extractRuleCandidates( + root: string, + createdAt: string = new Date().toISOString(), +): Promise { + const provenance = buildProvenance(); + const sources = await changedSources(root); + + const candidates: RuleCandidate[] = []; + const notes: string[] = []; + for (const sourceFile of sources) { + const outcome = await extractForSource(root, sourceFile, provenance, createdAt); + if (outcome.note) notes.push(outcome.note); + for (const candidate of outcome.candidates) { + if (await persistCandidate(root, candidate, notes)) candidates.push(candidate); + } + // Advance the rule cursor whether or not this source yielded candidates, so + // an unchanged source is not re-extracted (and approvals are never re-fired) + // on the next run. + await updateRuleSourceState(root, sourceFile, { + hash: outcome.hash, + concepts: [], + compiledAt: createdAt, + }); + } + + return { processedSources: sources, candidates, notes }; +} + +/** + * Persist a freshly-extracted candidate, refusing to clobber a human decision. + * Returns true when the candidate was written. An existing candidate that has + * already been approved or rejected is preserved as-is; a candidate that would + * fail the rule importer's import gate is dropped with a note instead of being emitted. + */ +async function persistCandidate( + root: string, + candidate: RuleCandidate, + notes: string[], +): Promise { + const existing = await readRuleCandidate(root, candidateFileId(candidate.id)); + if (existing && existing.status !== "proposed") { + notes.push(`Kept ${existing.status} candidate ${candidate.id} (re-extraction did not overwrite it).`); + return false; + } + const invalid = validateRuleCandidate(candidate); + if (invalid) { + notes.push(`Dropped candidate ${candidate.id}: ${invalid}`); + return false; + } + await writeRuleCandidate(root, candidate); + return true; +} diff --git a/src/compiler/rule-prompts.ts b/src/compiler/rule-prompts.ts new file mode 100644 index 0000000..657f4cb --- /dev/null +++ b/src/compiler/rule-prompts.ts @@ -0,0 +1,226 @@ +/** + * Rule-extraction prompt + tool schema (rule pipeline). + * + * Sibling of `prompts.ts`'s concept-extraction contract. Where concept + * extraction yields prose wiki pages, rule extraction yields structured + * `RuleCandidate.proposed` fields: a machine-actionable proposed rule with a + * trigger predicate (`when`), an action discriminator (`then`), a category, + * and an extraction confidence. The compiler maps this tool output into + * `RuleCandidate` records that a downstream rule importer imports for human approval. + * + * The `when`/`then` language is interpreter-defined: a concise human-readable + * condition/action string is sufficient at this stage. + */ + +import type { RuleConfidence } from "../utils/rule-types.js"; +import { languageDirective } from "../utils/output-language.js"; + +/** + * Named version of the rule-extraction prompt + tool contract. + * + * Stamped onto `RuleCandidate.provenance.modelVersion` so a downstream auditor + * can distinguish candidates produced under different prompt generations even + * when the model id is identical. Bump on any wording change that could alter + * extracted rules. Format is `vMAJOR`. + */ +export const RULE_PROMPT_VERSION = "v1"; + +/** Allowed confidence levels emitted by the rule-extraction tool schema. */ +const RULE_CONFIDENCE_VALUES: RuleConfidence[] = ["low", "medium", "high"]; + +/** + * Anthropic Tool definition for extracting actionable rules from a source. + * Each extracted rule maps to a `RuleCandidate.proposed` plus `confidence` + * and optional evidence hints (line spans within the source). + */ +export const RULE_EXTRACTION_TOOL = { + name: "extract_rules", + description: + "Extract machine-actionable workflow rules from a source document. " + + "A rule is a reusable guideline a team would want enforced.", + input_schema: { + type: "object" as const, + properties: { + rules: { + type: "array", + items: { + type: "object", + properties: { + category: { + type: "string", + description: + "Coarse grouping for the rule, lowercase (e.g. 'process', 'security', 'docs', 'testing').", + }, + title: { + type: "string", + description: "Short human-readable rule title.", + }, + description: { + type: "string", + description: "What the rule enforces and why it matters.", + }, + when: { + type: "string", + description: + "Trigger predicate — a concise condition string describing when the rule should fire.", + }, + then: { + type: "string", + description: + "Action discriminator — what should happen when the rule fires (e.g. 'warn', 'block', 'suggest ').", + }, + confidence: { + type: "string", + enum: RULE_CONFIDENCE_VALUES, + description: + "Extraction confidence: 'high' if directly stated, 'medium' if synthesised, 'low' if speculative.", + }, + evidenceLineStart: { + type: "number", + description: + "Optional 1-based start line in the numbered source content that supports this rule.", + }, + evidenceLineEnd: { + type: "number", + description: "Optional 1-based end line supporting this rule.", + }, + }, + required: ["category", "title", "description", "when", "then", "confidence"], + }, + }, + }, + required: ["rules"], + }, +}; + +/** Build optional prompt lines, splicing the output-language directive when set. */ +function withLangLine(...lines: string[]): string[] { + const lang = languageDirective(); + return lang ? [...lines, lang] : lines; +} + +/** + * Build the system prompt for the rule-extraction phase. + * Instructs the LLM to identify reusable, enforceable workflow rules. + * @param sourceContent - Full text of the source document. + * @returns System prompt string for the extraction call. + */ +export function buildRuleExtractionPrompt(sourceContent: string): string { + return [ + ...withLangLine( + "You are a rule extraction engine for a team-memory system. Analyze the", + "source document and identify 1-6 reusable, enforceable workflow rules a", + "team would want a guidance system to apply automatically.", + "A good rule is specific, actionable, and triggerable — not a vague value.", + "Use the extract_rules tool to return your findings.", + ), + "", + "For every rule emit:", + " - category: lowercase coarse grouping (process, security, docs, testing, …).", + " - title + description: what the rule enforces and why.", + " - when: a concise trigger predicate (the condition that fires the rule).", + " - then: the action to take when it fires.", + " - confidence: 'high' if the source states it directly, 'medium' if you", + " synthesised it, 'low' if it is speculative.", + " - evidenceLineStart/evidenceLineEnd: the source line range supporting the", + " rule when you can identify it (1-based, from the numbered content below).", + "", + "--- SOURCE DOCUMENT ---", + "", + sourceContent, + ].join("\n"); +} + +/** Raw rule shape as it arrives from the tool JSON, before validation. */ +interface RawRule { + category: unknown; + title: unknown; + description: unknown; + when: unknown; + then: unknown; + confidence: unknown; + evidenceLineStart?: unknown; + evidenceLineEnd?: unknown; +} + +/** A validated extracted rule with normalized field types. */ +export interface ExtractedRule { + category: string; + title: string; + description: string; + when: string; + then: string; + confidence: RuleConfidence; + evidenceLineStart?: number; + evidenceLineEnd?: number; +} + +/** True when every required string field is a non-empty string. */ +function hasRequiredStrings(r: RawRule): boolean { + const fields = [r.category, r.title, r.description, r.when, r.then]; + return fields.every((f) => typeof f === "string" && f.trim().length > 0); +} + +/** True if the raw rule passes validation (required strings + known confidence). */ +function isValidRawRule(r: RawRule): boolean { + return ( + hasRequiredStrings(r) && + typeof r.confidence === "string" && + RULE_CONFIDENCE_VALUES.includes(r.confidence as RuleConfidence) + ); +} + +/** Coerce an optional numeric line field; undefined when absent or invalid. */ +function coerceLine(value: unknown): number | undefined { + return typeof value === "number" && Number.isInteger(value) && value > 0 + ? value + : undefined; +} + +/** Map a validated raw rule into an ExtractedRule with trimmed strings. */ +function mapRawRule(r: RawRule): ExtractedRule { + const rule: ExtractedRule = { + category: (r.category as string).trim(), + title: (r.title as string).trim(), + description: (r.description as string).trim(), + when: (r.when as string).trim(), + then: (r.then as string).trim(), + confidence: r.confidence as RuleConfidence, + }; + assignEvidenceSpan(rule, coerceLine(r.evidenceLineStart), coerceLine(r.evidenceLineEnd)); + return rule; +} + +/** + * Attach a line span to a rule only when it is internally consistent. An + * inverted span (end < start) is dropped entirely rather than shipped to the rule importer, + * which would otherwise render a negative-length range. A lone start or end is + * still carried — it is a valid single-anchor hint. + */ +function assignEvidenceSpan( + rule: ExtractedRule, + start: number | undefined, + end: number | undefined, +): void { + if (start !== undefined && end !== undefined && end < start) return; + if (start !== undefined) rule.evidenceLineStart = start; + if (end !== undefined) rule.evidenceLineEnd = end; +} + +/** + * Parse the JSON tool output from rule extraction into typed objects. + * Malformed JSON or invalid entries are dropped rather than throwing so a + * single bad rule never aborts a whole source's extraction. + * @param toolOutput - Raw JSON string returned from the extract_rules tool. + * @returns Array of validated ExtractedRule objects. + */ +export function parseRules(toolOutput: string): ExtractedRule[] { + let parsed: { rules?: RawRule[] }; + try { + parsed = JSON.parse(toolOutput) as { rules?: RawRule[] }; + } catch { + return []; + } + const rules: RawRule[] = parsed.rules ?? []; + return rules.filter(isValidRawRule).map(mapRawRule); +} diff --git a/src/compiler/rule-state.ts b/src/compiler/rule-state.ts new file mode 100644 index 0000000..f0052fe --- /dev/null +++ b/src/compiler/rule-state.ts @@ -0,0 +1,67 @@ +/** + * Rule-extraction change-detection state (rule pipeline). + * + * `llmwiki rules extract` must decide which sources to (re)process on its own + * cadence, independent of the concept compiler. If it reused `.llmwiki/state.json` + * a source already compiled into pages would be considered "unchanged" and + * yield zero rule candidates, while a source not yet compiled would be + * re-extracted on every run. This module persists a parallel per-source hash + * map in `.llmwiki/rule-state.json` so rule extraction advances its own cursor. + * + * The shape mirrors {@link WikiState} (so `detectChanges` can compare against + * it directly), but only the per-source `hash` is meaningful here — `concepts` + * is always empty because rule extraction produces candidates, not pages. + */ + +import { readFile, writeFile, rename, mkdir } from "fs/promises"; +import { existsSync } from "fs"; +import path from "path"; +import { LLMWIKI_DIR, RULE_STATE_FILE } from "../utils/constants.js"; +import type { WikiState, SourceState } from "../utils/types.js"; + +/** A fresh, empty rule-extraction state. */ +function emptyRuleState(): WikiState { + return { version: 1, indexHash: "", sources: {} }; +} + +/** + * Read `.llmwiki/rule-state.json`, returning an empty state when it is missing + * or unreadable (a corrupt cursor must never block extraction — it just means + * everything looks new). + * @param root - Project root directory. + */ +export async function readRuleState(root: string): Promise { + const filePath = path.join(root, RULE_STATE_FILE); + if (!existsSync(filePath)) return emptyRuleState(); + try { + return JSON.parse(await readFile(filePath, "utf-8")) as WikiState; + } catch { + return emptyRuleState(); + } +} + +/** Atomically write rule-state.json (write .tmp then rename). */ +async function writeRuleState(root: string, state: WikiState): Promise { + await mkdir(path.join(root, LLMWIKI_DIR), { recursive: true }); + const filePath = path.join(root, RULE_STATE_FILE); + const tmpPath = `${filePath}.tmp`; + await writeFile(tmpPath, JSON.stringify(state, null, 2), "utf-8"); + await rename(tmpPath, filePath); +} + +/** + * Record that a source was processed by rule extraction at the given hash, so + * a subsequent `rules extract` skips it until the source changes again. + * @param root - Project root directory. + * @param sourceFile - Source filename within `sources/`. + * @param entry - The source's hash + processing timestamp. + */ +export async function updateRuleSourceState( + root: string, + sourceFile: string, + entry: SourceState, +): Promise { + const state = await readRuleState(root); + state.sources[sourceFile] = entry; + await writeRuleState(root, state); +} diff --git a/src/export/collect.ts b/src/export/collect.ts index 2081fb6..a4c6252 100644 --- a/src/export/collect.ts +++ b/src/export/collect.ts @@ -19,6 +19,12 @@ import { flattenCitations } from "../context/provenance.js"; import type { PageKind } from "../schema/types.js"; import type { ProvenanceState, ContradictionRef } from "../utils/types.js"; import { CONCEPTS_DIR, QUERIES_DIR } from "../utils/constants.js"; +import { + hashPageBody, + resolveSourceHashes, + sourceHashLookupFromSnapshot, + type SourceHashLookup, +} from "./provenance.js"; import type { ExportPage, PageDirectory } from "./types.js"; export { extractWikilinkSlugs }; @@ -85,9 +91,14 @@ function readPageKind(meta: Record): PageKind | undefined { * (path, kind, advisory*, citations, aliases) are populated here so * every export format gets the same enriched payload. */ -function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage { +function toExportPage( + raw: RawWikiPage, + snapshot: FreshnessSnapshot, + sourceHashes: SourceHashLookup, +): ExportPage { const meta = raw.frontmatter; const aliases = readStringArray(meta, "aliases"); + const sources = readStringArray(meta, "sources"); const freshness = computeFreshness( { slug: raw.slug, pageDirectory: raw.pageDirectory, frontmatter: meta }, snapshot, @@ -98,7 +109,7 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage pageDirectory: raw.pageDirectory, path: buildPagePath(raw.pageDirectory, raw.slug), summary: typeof meta.summary === "string" ? meta.summary : "", - sources: readStringArray(meta, "sources"), + sources, tags: readStringArray(meta, "tags"), createdAt: typeof meta.createdAt === "string" ? meta.createdAt : new Date().toISOString(), updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : new Date().toISOString(), @@ -113,6 +124,10 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage freshnessStatus: freshness.freshnessStatus, contradicted: freshness.contradicted, archived: freshness.archived, + contentHash: hashPageBody(raw.body), + sourceHashes: resolveSourceHashes(sources, sourceHashes), + ...(typeof meta.modelId === "string" ? { modelId: meta.modelId } : {}), + ...(typeof meta.promptVersion === "string" ? { promptVersion: meta.promptVersion } : {}), }; } @@ -126,9 +141,10 @@ function toExportPage(raw: RawWikiPage, snapshot: FreshnessSnapshot): ExportPage export async function collectExportPages(root: string): Promise { const raw = await collectRawWikiPages(root); const snapshot = await buildFreshnessSnapshot(root); + const sourceHashes = sourceHashLookupFromSnapshot(snapshot); const kept = raw.filter((page) => page.parseStatus.hasTitle && !page.parseStatus.orphaned); const pages = kept - .map((page) => toExportPage(page, snapshot)) + .map((page) => toExportPage(page, snapshot, sourceHashes)) .filter((page) => page.freshnessStatus !== "orphaned"); pages.sort((a, b) => a.title.localeCompare(b.title)); return pages; diff --git a/src/export/json-export.ts b/src/export/json-export.ts index a48e65d..1e323f3 100644 --- a/src/export/json-export.ts +++ b/src/export/json-export.ts @@ -7,7 +7,17 @@ * additional transformation. * * Schema: - * { exportedAt, pageCount, projectId?, pages: ExportPage[] } + * { schemaVersion, exportedAt, pageCount, projectId?, pages: ExportPage[] } + * + * W4 provenance lives PER PAGE (`ExportPage.modelId` / `promptVersion` plus + * `contentHash` / `sourceHashes`), stamped into each page at compile time. + * It is deliberately not summarized at the envelope level: a single + * export-time model id would misattribute pages compiled under a different + * model, which is exactly the lineage bug this avoids. + * + * `schemaVersion` lets downstream consumers (e.g. the rule importer) pin to a known + * contract. Increment when a breaking field change lands; additive fields + * do not require a bump. * * `projectId` is the optional bridge identifier. When present it pins the * on-disk export to a stable identity that downstream consumers (the @@ -19,8 +29,19 @@ import { validateProjectId } from "./project-id.js"; import type { ExportPage } from "./types.js"; +/** + * Monotonically-incremented envelope version. + * Bump when a breaking field change lands; additive additions do not require a bump. + */ +export const EXPORT_SCHEMA_VERSION = 1; + /** Top-level shape of the JSON export file. */ interface JsonExportDocument { + /** + * Contract version for downstream consumers. Start at 1; increment only on + * breaking envelope changes so consumers can pin a supported range. + */ + schemaVersion: number; exportedAt: string; pageCount: number; /** Optional bridge identifier. See `src/export/project-id.ts` for the validation rule. */ @@ -48,6 +69,7 @@ export function buildJsonExport( options: BuildJsonExportOptions = {}, ): string { const doc: JsonExportDocument = { + schemaVersion: EXPORT_SCHEMA_VERSION, exportedAt: new Date().toISOString(), pageCount: pages.length, pages, diff --git a/src/export/provenance.ts b/src/export/provenance.ts new file mode 100644 index 0000000..0edba31 --- /dev/null +++ b/src/export/provenance.ts @@ -0,0 +1,77 @@ +/** + * Export provenance helpers (export provenance). + * + * Surfaces the auditable lineage a downstream consumer (e.g. a downstream rule importer) + * needs to answer "this page came from these sources, via this model and + * prompt version": + * + * - {@link hashPageBody} derives a deterministic SHA-256 over a page body so + * a consumer can detect content drift without re-reading the markdown. + * - {@link sourceHashLookupFromSnapshot} reuses a freshness snapshot's + * recorded per-source SHA-256 hashes as a filename → hash map. + * - {@link resolveSourceHashes} maps a page's `sources` frontmatter list to + * those committed hashes, preserving order and de-duplicating. + * + * The hashes here are the SAME digests `hasher.ts` writes to state.json — we + * surface them rather than recompute, so the export stays consistent with the + * compiler's own change-detection view and stays deterministic (no filesystem + * re-reads, no wall-clock, no map-iteration order dependence). + */ + +import { createHash } from "node:crypto"; +import type { FreshnessSnapshot } from "../freshness/types.js"; + +/** Map of source filename → committed SHA-256 hash from state.json. */ +export type SourceHashLookup = Record; + +/** + * Deterministic SHA-256 (hex) of a page body. Stable for identical input: + * the same body string always yields the same digest regardless of when or + * where the export runs. + * @param body - Full markdown page body (without frontmatter). + * @returns Hex-encoded SHA-256 digest. + */ +export function hashPageBody(body: string): string { + return createHash("sha256").update(body, "utf-8").digest("hex"); +} + +/** + * Build a filename -> source-hash lookup from an existing freshness snapshot. + * The snapshot already contains state.json's recorded hashes, so export callers + * that compute freshness can reuse the same read-only state pass. + */ +export function sourceHashLookupFromSnapshot(snapshot: FreshnessSnapshot): SourceHashLookup { + const lookup: SourceHashLookup = {}; + for (const [file, source] of Object.entries(snapshot.sources)) { + lookup[file] = source.recordedHash; + } + return lookup; +} + +/** + * Resolve the source-file SHA-256 hashes a page derived from. + * + * Maps each entry in the page's `sources` frontmatter list to its committed + * hash. Sources without a recorded hash (e.g. seed pages with an empty + * source list, or a source removed from state) contribute nothing. Order + * follows the `sources` list and duplicates collapse, so the output is + * deterministic for a given (sources, state) pair. + * + * @param sources - Source filenames cited by the page (frontmatter `sources`). + * @param lookup - Filename → hash map from {@link sourceHashLookupFromSnapshot}. + * @returns Ordered, de-duplicated list of source hashes. + */ +export function resolveSourceHashes( + sources: string[], + lookup: SourceHashLookup, +): string[] { + const hashes: string[] = []; + const seen = new Set(); + for (const file of sources) { + const hash = lookup[file]; + if (hash === undefined || seen.has(hash)) continue; + hashes.push(hash); + seen.add(hash); + } + return hashes; +} diff --git a/src/export/rule-candidates-json.ts b/src/export/rule-candidates-json.ts new file mode 100644 index 0000000..5e0f080 --- /dev/null +++ b/src/export/rule-candidates-json.ts @@ -0,0 +1,52 @@ +/** + * Rule-candidate JSON export (rule pipeline). + * + * Emits the persisted `RuleCandidate` records as a JSON array for a downstream rule importer + * to import. The array element shape is the canonical the rule importer contract verbatim + * — camelCase keys, tagged evidence, lowercase status/confidence — because the + * on-disk candidates are already stored in that shape. Export is a pure read + + * filter + serialize with no LLM calls. + * + * Approved candidates remain in `.llmwiki/rule-candidates/` with `status: + * "approved"` (only rejects are archived out), so export reads the live + * candidate directory and filters by status scope. + */ + +import { listRuleCandidates } from "../compiler/rule-candidates.js"; +import type { RuleCandidate } from "../utils/rule-types.js"; + +/** Which candidates to include in an export. */ +export type RuleExportScope = "approved" | "proposed" | "all"; + +/** Valid export scopes, used to validate CLI input. */ +export const RULE_EXPORT_SCOPES: readonly RuleExportScope[] = [ + "approved", + "proposed", + "all", +]; + +/** + * Collect rule candidates for export, filtered by status scope. Ordering is + * the deterministic createdAt-then-id order from {@link listRuleCandidates}. + * + * @param root - Project root directory. + * @param scope - "approved" (default), "proposed", or "all". + * @returns The candidates matching the scope. + */ +export async function collectRuleCandidatesForExport( + root: string, + scope: RuleExportScope = "approved", +): Promise { + const all = await listRuleCandidates(root); + if (scope === "all") return all; + return all.filter((c) => c.status === scope); +} + +/** + * Serialize rule candidates as a pretty-printed JSON array string. + * @param candidates - Candidates to serialize. + * @returns JSON array string matching the rule importer's RuleCandidate[] contract. + */ +export function buildRuleCandidatesJson(candidates: RuleCandidate[]): string { + return `${JSON.stringify(candidates, null, 2)}\n`; +} diff --git a/src/export/types.ts b/src/export/types.ts index 3edb831..9fcb298 100644 --- a/src/export/types.ts +++ b/src/export/types.ts @@ -107,6 +107,34 @@ export interface ExportPage { contradicted: boolean; /** True when the page is explicitly archived (`archived: true` frontmatter). */ archived: boolean; + /** + * Deterministic SHA-256 (hex) of {@link ExportPage.body}. Lets a + * downstream auditor (export provenance) detect content drift and verify that an + * imported page still matches what the compiler exported, without + * re-reading the markdown. Stable for identical bodies. + */ + contentHash: string; + /** + * SHA-256 hashes of the source files this page derived from — the same + * per-source digests the compiler records in `.llmwiki/state.json` for + * change detection. Resolved from the page's `sources` list; ordered and + * de-duplicated. Empty when a page has no recorded sources (e.g. seed + * pages). Lets an auditor tie a page back to exact source bytes. + */ + sourceHashes: string[]; + /** + * Model id that produced this page's current content, stamped into the + * page's frontmatter at compile time (export provenance). Unlike an export-time env + * read, this is true per-page lineage: a page compiled by model A keeps + * `modelId: A` even if the exporter's env later points at model B. Absent + * for pages compiled before provenance stamping shipped. + */ + modelId?: string; + /** + * Named prompt-contract version the page was compiled under (export provenance), + * stamped at compile time. Absent for pre-provenance pages. + */ + promptVersion?: string; } /** diff --git a/src/utils/candidate-store.ts b/src/utils/candidate-store.ts new file mode 100644 index 0000000..82813ca --- /dev/null +++ b/src/utils/candidate-store.ts @@ -0,0 +1,72 @@ +/** + * Shared filesystem primitives for candidate queues. + * + * Both the concept review queue (`compiler/candidates.ts`) and the rule + * candidate queue (`compiler/rule-candidates.ts`) persist one JSON file per + * candidate under a directory, list those files, and move rejected records + * into an archive subdirectory. These two operations were identical across the + * queues; extracting them here removes the duplication while keeping each + * queue's own id/shape logic local to its module. + */ + +import { readdir, rename, unlink, writeFile, mkdir } from "fs/promises"; +import { existsSync } from "fs"; +import path from "path"; +import { safeReadFile } from "./markdown.js"; + +/** Extension used for all candidate JSON files. */ +export const CANDIDATE_JSON_EXT = ".json"; + +/** + * Turn a dotted candidate id into a single filesystem-safe path segment. + * + * Only characters outside `[a-z0-9._-]` are replaced (with `_`); dots are + * PRESERVED. Collapsing dots to `-` (the old behavior) made `rulecand.a.b-c` + * and `rulecand.a-b.c` map to the same file, silently overwriting one + * candidate with the other. Keeping dots makes the mapping injective for the + * ids this codebase emits (category in `[a-z0-9_]`, slug in `[a-z0-9-]`). + * @param candidateId - The dotted candidate id. + */ +export function candidateFileId(candidateId: string): string { + return candidateId.replace(/[^a-zA-Z0-9._-]/g, "_"); +} + +/** + * List the file ids (basename without `.json`) of every candidate JSON file in + * a directory, ignoring subdirectories (e.g. an `archive/` folder). Returns an + * empty list when the directory does not exist. + * @param dir - Absolute path to the candidate directory. + */ +export async function listCandidateFileIds(dir: string): Promise { + if (!existsSync(dir)) return []; + const entries = await readdir(dir, { withFileTypes: true }); + const ids: string[] = []; + for (const entry of entries) { + if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_JSON_EXT)) continue; + ids.push(entry.name.slice(0, -CANDIDATE_JSON_EXT.length)); + } + return ids; +} + +/** + * Move a candidate JSON file into an archive location, creating the archive + * directory if needed. Falls back to copy + unlink when `rename` fails across + * filesystems. Returns false when the source file does not exist. + * @param sourcePath - Absolute path of the pending candidate file. + * @param targetPath - Absolute archive destination path. + */ +export async function moveCandidateToArchive( + sourcePath: string, + targetPath: string, +): Promise { + if (!existsSync(sourcePath)) return false; + await mkdir(path.dirname(targetPath), { recursive: true }); + try { + await rename(sourcePath, targetPath); + } catch { + const raw = await safeReadFile(sourcePath); + await writeFile(targetPath, raw, "utf-8"); + await unlink(sourcePath); + } + return true; +} diff --git a/src/utils/constants.ts b/src/utils/constants.ts index 11d1e7f..9f36278 100644 --- a/src/utils/constants.ts +++ b/src/utils/constants.ts @@ -99,6 +99,19 @@ export const CANDIDATES_DIR = ".llmwiki/candidates"; /** Rejected review candidates archived for audit (not deleted). */ export const CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive"; +/** + * Per-source hashes already processed by `rules extract` (rule pipeline). Kept + * separate from STATE_FILE so rule extraction and concept compilation advance + * their change-detection cursors independently. + */ +export const RULE_STATE_FILE = ".llmwiki/rule-state.json"; + +/** Pending rule candidates (rule pipeline) awaiting approve/reject. */ +export const RULE_CANDIDATES_DIR = ".llmwiki/rule-candidates"; + +/** Rejected rule candidates archived for audit (not deleted). */ +export const RULE_CANDIDATES_ARCHIVE_DIR = ".llmwiki/rule-candidates/archive"; + /** Number of most similar pages to return from embedding-based pre-filter. */ export const EMBEDDING_TOP_K = 15; diff --git a/src/utils/provider.ts b/src/utils/provider.ts index ddb688b..cf8efcf 100644 --- a/src/utils/provider.ts +++ b/src/utils/provider.ts @@ -157,3 +157,20 @@ function getProviderName(): string { export function getActiveProviderName(): string { return getProviderName(); } + +/** + * Resolve the model id the compile pipeline would call, without + * instantiating a provider (which can require API credentials). + * + * Used by the export provenance stamp so a downstream auditor can tie a + * compiled page back to the exact model that produced it. Mirrors the + * per-provider model resolution in {@link getProvider} so the reported id + * matches what an actual compile call would use. + */ +export function resolveActiveModelId(): string { + const providerName = getProviderName(); + if (providerName === "anthropic") { + return resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic; + } + return getModelForProvider(providerName as "openai" | "ollama" | "minimax" | "copilot"); +} diff --git a/src/utils/rule-types.ts b/src/utils/rule-types.ts new file mode 100644 index 0000000..1396456 --- /dev/null +++ b/src/utils/rule-types.ts @@ -0,0 +1,82 @@ +/** + * RuleCandidate protocol types (rule pipeline). + * + * These shapes mirror a downstream rule importer's `RuleCandidate` import contract exactly: + * the compiler is the "recommend rules" producer in the learning loop, emitting + * machine-actionable proposed rules that the rule importer imports and a human approves. + * + * Wire encoding is camelCase JSON. Evidence is a tagged union discriminated by + * `kind`. Status and confidence are lowercase string literals. Because these + * records cross the boundary into the rule importer, the field names and casing here are + * load-bearing — do not rename them for local convenience. + */ + +/** Confidence in an extracted rule, on the rule importer's three-level scale. */ +export type RuleConfidence = "low" | "medium" | "high"; + +/** + * Lifecycle status of a candidate. Newly extracted candidates are always + * `proposed`; `approve`/`reject` flip them to `approved`/`rejected`. + */ +export type RuleStatus = "proposed" | "approved" | "rejected"; + +/** + * Evidence reference backing a proposed rule. Tagged union discriminated by + * `kind`. For compiled-wiki sources the producer emits `url` (when the source + * is a URL) or `file` (referencing the page's `sources` filename). + */ +export type EvidenceRef = + | { kind: "file"; path: string; lineStart?: number; lineEnd?: number } + | { kind: "memory"; memoryId: string } + | { kind: "audit"; auditId: string } + | { kind: "url"; url: string }; + +/** The rule that a candidate becomes when approved. Starts at version 1. */ +export interface ProposedRule { + /** Stable rule id: `rule..`. */ + id: string; + /** Coarse grouping (e.g. "process", "security", "docs"). */ + category: string; + /** Short human-readable rule title. */ + title: string; + /** Longer description of what the rule enforces and why. */ + description: string; + /** Trigger predicate — a concise, interpreter-defined condition string. */ + when: string; + /** Action discriminator emitted when the rule fires. */ + then: string; + /** Monotonic rule version; new rules start at 1. */ + version: number; +} + +/** Lineage of a candidate so a downstream auditor can trace its origin. */ +export interface RuleProvenance { + /** Producing system; always "llm-wiki-compiler" for this surface. */ + source: string; + /** Model id the extraction ran against (W4 `resolveActiveModelId`). */ + modelId?: string; + /** Prompt/model version stamp for auditability. */ + modelVersion?: string; +} + +/** + * A proposed rule awaiting human approval in the rule importer. Persisted as JSON under + * `.llmwiki/rule-candidates/.json` and exported as a JSON array for the rule importer + * to consume. + */ +export interface RuleCandidate { + /** Candidate id: `rulecand..`. */ + id: string; + /** The rule this candidate becomes on approval. */ + proposed: ProposedRule; + /** Evidence backing the proposal. */ + evidence: EvidenceRef[]; + /** Where this candidate came from. */ + provenance: RuleProvenance; + /** Extraction confidence. */ + confidence: RuleConfidence; + /** Lifecycle status: proposed → approved | rejected. */ + status: RuleStatus; + /** RFC3339 timestamp recorded when the candidate was created. */ + createdAt: string; +} diff --git a/test/bridge-export-contract.test.ts b/test/bridge-export-contract.test.ts index ff7c90c..12b25f2 100644 --- a/test/bridge-export-contract.test.ts +++ b/test/bridge-export-contract.test.ts @@ -19,7 +19,7 @@ import path from "path"; import { writePage } from "./fixtures/write-page.js"; import { makeTempRoot } from "./fixtures/temp-root.js"; import { collectExportPages } from "../src/export/collect.js"; -import { buildJsonExport } from "../src/export/json-export.js"; +import { buildJsonExport, EXPORT_SCHEMA_VERSION } from "../src/export/json-export.js"; import { PROJECT_ID_PATTERN, validateProjectId, @@ -42,6 +42,7 @@ interface BridgeExportPage { } interface BridgeExportEnvelope { + schemaVersion: number; exportedAt: string; pageCount: number; projectId?: string; @@ -54,6 +55,13 @@ function findPage(envelope: BridgeExportEnvelope, slug: string): BridgeExportPag return page; } +/** Temp root seeded with one minimal concept page — the common envelope-test setup. */ +async function rootWithOnePage(suffix: string): Promise { + const root = await makeTempRoot(suffix); + await writePage(path.join(root, "wiki/concepts"), "p", { title: "P", summary: "s" }, "Body.\n"); + return root; +} + describe("bridge export contract — collectExportPages + buildJsonExport", () => { it("populates path, kind, citations, and freshnessStatus for a basic concept", async () => { const root = await makeTempRoot("basic"); @@ -144,31 +152,28 @@ describe("bridge export contract — collectExportPages + buildJsonExport", () = describe("bridge export contract — projectId envelope field", () => { it("omits projectId from the envelope when none is supplied", async () => { - const root = await makeTempRoot("noproj"); - await writePage( - path.join(root, "wiki/concepts"), - "p", - { title: "P", summary: "s" }, - "Body.\n", - ); + const root = await rootWithOnePage("noproj"); const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as BridgeExportEnvelope; expect(env.projectId).toBeUndefined(); }); it("embeds a valid projectId in the envelope", async () => { - const root = await makeTempRoot("proj"); - await writePage( - path.join(root, "wiki/concepts"), - "p", - { title: "P", summary: "s" }, - "Body.\n", - ); + const root = await rootWithOnePage("proj"); const pages = await collectExportPages(root); const env = JSON.parse(buildJsonExport(pages, { projectId: "my-kb" })) as BridgeExportEnvelope; expect(env.projectId).toBe("my-kb"); }); }); +describe("bridge export contract — schemaVersion envelope field", () => { + it("emits schemaVersion equal to EXPORT_SCHEMA_VERSION on every build", async () => { + const root = await rootWithOnePage("schemaversion"); + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as BridgeExportEnvelope; + expect(env.schemaVersion).toBe(EXPORT_SCHEMA_VERSION); + expect(env.schemaVersion).toBe(1); + }); +}); + describe("bridge export contract — validateProjectId", () => { it("accepts canonical kebab-case identifiers", () => { for (const id of ["a", "kb", "my-kb", "team-foo-2024", "abc123"]) { diff --git a/test/compile-delta.test.ts b/test/compile-delta.test.ts new file mode 100644 index 0000000..50403e6 --- /dev/null +++ b/test/compile-delta.test.ts @@ -0,0 +1,93 @@ +/** + * Tests for the W5 programmatic incremental compile delta (`compileDelta`). + * + * Verifies the hash-gated delta contract a downstream consumer relies on: + * + * - A first delta compile of a single source returns that source's page. + * - A second delta compile with the now-up-to-date state returns an EMPTY + * delta (nothing changed ⇒ nothing to ship). + * - Adding a new source yields ONLY that new source's page in the delta. + * + * Strategy mirrors compile-provenance.test.ts: stub AnthropicProvider so the + * extraction tool and page-generation calls are deterministic and no real + * API is hit. The extraction title is derived from the source filename so + * each source maps to a distinct, predictable slug. + */ + +import { describe, it, expect, vi } from "vitest"; +import { writeFile } from "fs/promises"; +import path from "path"; +import { compileDelta } from "../src/compiler/delta.js"; +import { AnthropicProvider } from "../src/providers/anthropic.js"; +import { useCompileProject } from "./fixtures/compile-project.js"; + +const FIRST_SOURCE = "alpha.md"; +const FIRST_TITLE = "Alpha Topic"; +const FIRST_SLUG = "alpha-topic"; +const SECOND_SOURCE = "beta.md"; +const SECOND_TITLE = "Beta Topic"; +const SECOND_SLUG = "beta-topic"; + +/** Extraction JSON for one concept titled `title`. */ +function extractionFor(title: string): string { + return JSON.stringify({ + concepts: [{ concept: title, summary: `Summary of ${title}.`, is_new: true }], + }); +} + +const STUB_BODY = "Body content for the topic. ^[alpha.md]"; + +/** + * Stub the provider so toolCall returns extraction keyed on the source + * currently being processed. The compiler reads one source at a time, so we + * route by inspecting the system prompt for the source's title marker. + */ +function stubProvider(): void { + vi.spyOn(AnthropicProvider.prototype, "toolCall").mockImplementation( + async (system: string) => { + if (system.includes(SECOND_TITLE) || system.includes("beta")) { + return extractionFor(SECOND_TITLE); + } + return extractionFor(FIRST_TITLE); + }, + ); + vi.spyOn(AnthropicProvider.prototype, "complete").mockResolvedValue(STUB_BODY); +} + +describe("compileDelta — incremental change-gated delta", () => { + const ctx = useCompileProject({ + dirSuffix: "delta", + sourceFile: FIRST_SOURCE, + sourceContent: `# ${FIRST_TITLE}\n\nAbout alpha.`, + }); + + it("returns the compiled page on first run, then an empty delta when unchanged", async () => { + stubProvider(); + + const first = await compileDelta(ctx.dir); + expect(first.changedSlugs).toContain(FIRST_SLUG); + expect(first.compiled).toBe(1); + + const second = await compileDelta(ctx.dir); + expect(second.changedPages).toEqual([]); + expect(second.changedSlugs).toEqual([]); + expect(second.skipped).toBe(1); + }); + + it("returns only the newly added source's page in the delta", async () => { + stubProvider(); + + await compileDelta(ctx.dir); + + await writeFile( + path.join(ctx.dir, "sources", SECOND_SOURCE), + `# ${SECOND_TITLE}\n\nAbout beta.`, + "utf-8", + ); + + const delta = await compileDelta(ctx.dir); + expect(delta.changedSlugs).toEqual([SECOND_SLUG]); + expect(delta.changedPages).toHaveLength(1); + expect(delta.changedPages[0]?.slug).toBe(SECOND_SLUG); + }); +}); diff --git a/test/export-provenance.test.ts b/test/export-provenance.test.ts new file mode 100644 index 0000000..920a9d3 --- /dev/null +++ b/test/export-provenance.test.ts @@ -0,0 +1,178 @@ +/** + * Unit tests for the W4 export provenance stamp. + * + * Verifies the auditable lineage fields a downstream consumer (a downstream rule importer) + * relies on: + * + * - Each page carries `modelId` / `promptVersion` surfaced from its compile-time + * frontmatter — true per-page lineage, NOT a single export-time env read. + * - Each page carries a deterministic `contentHash` over its body and the + * `sourceHashes` it derived from (surfaced from `.llmwiki/state.json`). + * - `contentHash` is stable for the same body and changes when the body does. + */ + +import { describe, it, expect } from "vitest"; +import { createHash } from "node:crypto"; +import { existsSync } from "fs"; +import { mkdir, writeFile } from "fs/promises"; +import path from "path"; +import { writePage } from "./fixtures/write-page.js"; +import { makeTempRoot } from "./fixtures/temp-root.js"; +import { collectExportPages } from "../src/export/collect.js"; +import { buildJsonExport } from "../src/export/json-export.js"; + +interface ProvenancePage { + slug: string; + body: string; + contentHash: string; + sourceHashes: string[]; + modelId?: string; + promptVersion?: string; +} + +interface ProvenanceEnvelope { + pages: ProvenancePage[]; +} + +/** Hex SHA-256 of a string — mirror of the export's body hash for assertions. */ +function sha256(text: string): string { + return createHash("sha256").update(text, "utf-8").digest("hex"); +} + +/** Write a `.llmwiki/state.json` mapping source filenames to fixed hashes. */ +async function writeState(root: string, sources: Record): Promise { + await mkdir(path.join(root, ".llmwiki"), { recursive: true }); + const state = { + version: 1, + indexHash: "", + sources: Object.fromEntries( + Object.entries(sources).map(([file, hash]) => [ + file, + { hash, concepts: [], compiledAt: "2024-01-01T00:00:00.000Z" }, + ]), + ), + }; + await writeFile(path.join(root, ".llmwiki/state.json"), JSON.stringify(state), "utf-8"); +} + +/** Write a corrupt `.llmwiki/state.json` fixture. */ +async function writeCorruptState(root: string): Promise { + await mkdir(path.join(root, ".llmwiki"), { recursive: true }); + await writeFile(path.join(root, ".llmwiki/state.json"), "{not-json", "utf-8"); +} + +function findPage(env: ProvenanceEnvelope, slug: string): ProvenancePage { + const page = env.pages.find((p) => p.slug === slug); + if (!page) throw new Error(`expected page "${slug}" in export`); + return page; +} + +describe("export provenance — per-page modelId + promptVersion", () => { + it("surfaces the compile-time modelId/promptVersion stamped in frontmatter", async () => { + const root = await makeTempRoot("prov-perpage"); + await writePage( + path.join(root, "wiki/concepts"), + "retrieval", + { title: "Retrieval", summary: "x", sources: [], modelId: "model-a", promptVersion: "v1" }, + "Body.\n", + ); + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + + expect(findPage(env, "retrieval").modelId).toBe("model-a"); + expect(findPage(env, "retrieval").promptVersion).toBe("v1"); + }); + + it("omits provenance for pages compiled before stamping shipped", async () => { + const root = await makeTempRoot("prov-legacy"); + await writePage( + path.join(root, "wiki/concepts"), + "legacy", + { title: "Legacy", summary: "s", sources: [] }, + "Body.\n", + ); + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + + expect(findPage(env, "legacy").modelId).toBeUndefined(); + expect(findPage(env, "legacy").promptVersion).toBeUndefined(); + }); + + it("keeps a page's modelId even when a different model exports it", async () => { + const root = await makeTempRoot("prov-noenv"); + await writePage( + path.join(root, "wiki/concepts"), + "pinned", + { title: "Pinned", summary: "s", sources: [], modelId: "compiled-by-A", promptVersion: "v1" }, + "Body.\n", + ); + process.env.LLMWIKI_PROVIDER = "anthropic"; + process.env.LLMWIKI_MODEL = "exporting-with-B"; + try { + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + expect(findPage(env, "pinned").modelId).toBe("compiled-by-A"); + } finally { + delete process.env.LLMWIKI_PROVIDER; + delete process.env.LLMWIKI_MODEL; + } + }); +}); + +describe("export provenance — per-page contentHash + sourceHashes", () => { + it("emits a deterministic body hash and resolves source hashes from state", async () => { + const root = await makeTempRoot("prov-page"); + const body = "Retrieval is selective lookup."; + await writeState(root, { "paper.md": "a".repeat(64), "other.md": "b".repeat(64) }); + await writePage( + path.join(root, "wiki/concepts"), + "retrieval", + { title: "Retrieval", summary: "x", sources: ["paper.md", "other.md"] }, + body, + ); + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + const page = findPage(env, "retrieval"); + + expect(page.contentHash).toBe(sha256(page.body)); + expect(page.sourceHashes).toEqual(["a".repeat(64), "b".repeat(64)]); + }); + + it("keeps contentHash stable for the same body across builds", async () => { + const root = await makeTempRoot("prov-stable"); + await writePage( + path.join(root, "wiki/concepts"), + "stable", + { title: "Stable", summary: "s", sources: [] }, + "Identical body content.\n", + ); + const first = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + const second = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + + expect(findPage(first, "stable").contentHash).toBe(findPage(second, "stable").contentHash); + }); + + it("omits unrecorded sources from sourceHashes (empty when none recorded)", async () => { + const root = await makeTempRoot("prov-nosrc"); + await writePage( + path.join(root, "wiki/concepts"), + "seedlike", + { title: "Seedlike", summary: "s", sources: [] }, + "Body.\n", + ); + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + expect(findPage(env, "seedlike").sourceHashes).toEqual([]); + }); + + it("does not write a .bak when state.json is corrupt", async () => { + const root = await makeTempRoot("prov-corrupt-state"); + await writeCorruptState(root); + await writePage( + path.join(root, "wiki/concepts"), + "safe-export", + { title: "Safe Export", summary: "s", sources: ["a.md"] }, + "Body.\n", + ); + + const env = JSON.parse(buildJsonExport(await collectExportPages(root))) as ProvenanceEnvelope; + + expect(findPage(env, "safe-export").sourceHashes).toEqual([]); + expect(existsSync(path.join(root, ".llmwiki/state.json.bak"))).toBe(false); + }); +}); diff --git a/test/fixtures/rule-extraction.ts b/test/fixtures/rule-extraction.ts new file mode 100644 index 0000000..90024d6 --- /dev/null +++ b/test/fixtures/rule-extraction.ts @@ -0,0 +1,68 @@ +/** + * Shared fixtures for the rule-extraction tests. + * + * Stubs the LLM tool call so extraction is deterministic and offline, and + * seeds a single source file with the provider env the model-id resolver + * needs. Reused across the rule-candidate and rule-extraction-state suites so + * the stub/seed boilerplate lives in one place. + */ + +import { vi, afterEach } from "vitest"; +import { writeFile } from "fs/promises"; +import path from "path"; + +/** Provider env vars that {@link seedRuleSource} sets and must not leak. */ +const PROVIDER_ENV_KEYS = ["LLMWIKI_PROVIDER", "ANTHROPIC_API_KEY"] as const; + +/** + * Snapshot the provider env at call time and restore it after every test in + * the calling file. Call once at the top of any suite that uses + * {@link seedRuleSource}, so the env mutation never leaks into other files and + * makes their credential-dependent assertions order-dependent. + */ +export function restoreProviderEnvAfterEach(): void { + const saved = Object.fromEntries(PROVIDER_ENV_KEYS.map((k) => [k, process.env[k]])); + afterEach(() => { + for (const key of PROVIDER_ENV_KEYS) { + const value = saved[key]; + if (value === undefined) delete process.env[key]; + else process.env[key] = value; + } + }); +} + +/** + * Stub `callClaude` so the extract_rules tool returns one deterministic rule. + * @param category - Category the stubbed rule reports (default "Process"). + * @param evidenceLineEnd - End line the model "cites" (default 2; pass a large + * value to exercise out-of-bounds span dropping). + */ +export async function stubRuleExtraction(category = "Process", evidenceLineEnd = 2): Promise { + const llm = await import("../../src/utils/llm.js"); + vi.spyOn(llm, "callClaude").mockImplementation(async ({ tools }) => { + if (!tools || tools.length === 0) return ""; + return JSON.stringify({ + rules: [{ + category, + title: "Require tests before merge", + description: "All PRs must include passing tests.", + when: "a pull request is opened without test changes", + then: "warn", + confidence: "high", + evidenceLineStart: 1, + evidenceLineEnd, + }], + }); + }); +} + +/** Seed `sources/guide.md` and set the provider env for model-id resolution. */ +export async function seedRuleSource(dir: string): Promise { + process.env.LLMWIKI_PROVIDER = "anthropic"; + process.env.ANTHROPIC_API_KEY = "test-key"; + await writeFile( + path.join(dir, "sources", "guide.md"), + "Always run the test suite before merging a change.\nNo exceptions.", + "utf-8", + ); +} diff --git a/test/prompt-budget.test.ts b/test/prompt-budget.test.ts index 874a39b..d1ab6ad 100644 --- a/test/prompt-budget.test.ts +++ b/test/prompt-budget.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, afterEach } from "vitest"; import { + budgetAndNumberSource, buildBudgetedCombinedContent, resolvePromptBudgetChars, type SourceSlice, @@ -20,6 +21,21 @@ afterEach(() => { delete process.env[ENV_KEY]; }); +describe("budgetAndNumberSource", () => { + it("prepends 1-based line numbers so line-span prompts have real anchors", () => { + const numbered = budgetAndNumberSource("guide.md", "first\nsecond"); + expect(numbered).toContain("1 | first"); + expect(numbered).toContain("2 | second"); + }); + + it("clips content past the budget so extraction never blows the prompt window", () => { + process.env[ENV_KEY] = "10"; + const numbered = budgetAndNumberSource("guide.md", "x".repeat(500)); + expect(numbered).toContain("truncated for prompt budget"); + expect(numbered.length).toBeLessThan(500); + }); +}); + describe("resolvePromptBudgetChars", () => { it("returns the default when env is unset", () => { expect(resolvePromptBudgetChars()).toBe(DEFAULT_PROMPT_BUDGET_CHARS); diff --git a/test/rule-candidates.test.ts b/test/rule-candidates.test.ts new file mode 100644 index 0000000..301e460 --- /dev/null +++ b/test/rule-candidates.test.ts @@ -0,0 +1,148 @@ +/** + * Tests for the rule-candidate pipeline: extraction → candidate → + * approve → export. The LLM tool call is stubbed via vi.spyOn on the shared + * `callClaude` helper (the same mock pattern used by review.test.ts), so no + * network call is made and the extracted rule is deterministic. + * + * The shape assertions verify the emitted record matches a downstream rule importer's + * `RuleCandidate` contract exactly: camelCase keys, `status: "proposed"`, + * tagged evidence, the `proposed` rule fields, and a stamped provenance.modelId. + */ + +import { describe, it, expect } from "vitest"; +import { mkdir, writeFile } from "fs/promises"; +import path from "path"; +import { extractRuleCandidates } from "../src/compiler/rule-extractor.js"; +import { + listRuleCandidates, + setRuleCandidateStatus, + validateRuleCandidate, +} from "../src/compiler/rule-candidates.js"; +import { + buildRuleCandidatesJson, + collectRuleCandidatesForExport, +} from "../src/export/rule-candidates-json.js"; +import { candidateFileId } from "../src/utils/candidate-store.js"; +import { useTempRoot } from "./fixtures/temp-root.js"; +import { + restoreProviderEnvAfterEach, + seedRuleSource as seedSource, + stubRuleExtraction, +} from "./fixtures/rule-extraction.js"; +import type { RuleCandidate } from "../src/utils/rule-types.js"; + +const FIXED_NOW = "2026-05-31T00:00:00.000Z"; + +/** Candidate ids carry a content-hash suffix so distinct rules never collide. */ +const CANDIDATE_ID_RE = /^rulecand\.process\.require-tests-before-merge-[a-f0-9]{8}$/; + +restoreProviderEnvAfterEach(); + +describe("rule-candidate extraction", () => { + const ctx = useTempRoot(["sources"]); + + it("emits a RuleCandidate matching the rule-import contract shape", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + + const result = await extractRuleCandidates(ctx.dir, FIXED_NOW); + expect(result.candidates).toHaveLength(1); + const candidate = result.candidates[0]!; + + expect(candidate.id).toMatch(CANDIDATE_ID_RE); + expect(candidate.status).toBe("proposed"); + expect(candidate.confidence).toBe("high"); + expect(candidate.createdAt).toBe(FIXED_NOW); + expect(validateRuleCandidate(candidate)).toBeNull(); + assertProposedRule(candidate); + assertEvidenceAndProvenance(candidate); + }); + + it("persists the candidate JSON and lists it back", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + + await extractRuleCandidates(ctx.dir, FIXED_NOW); + const listed = await listRuleCandidates(ctx.dir); + expect(listed).toHaveLength(1); + expect(listed[0]!.proposed.id).toBe(`rule.${listed[0]!.id.slice("rulecand.".length)}`); + }); +}); + +describe("rule-candidate approve + export", () => { + const ctx = useTempRoot(["sources"]); + + it("approve flips status to approved", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + const { candidates } = await extractRuleCandidates(ctx.dir, FIXED_NOW); + const fileId = candidateFileId(candidates[0]!.id); + + const updated = await setRuleCandidateStatus(ctx.dir, fileId, "approved"); + expect(updated!.status).toBe("approved"); + + const listed = await listRuleCandidates(ctx.dir); + expect(listed[0]!.status).toBe("approved"); + }); + + it("export emits a JSON array of approved RuleCandidate records", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + const { candidates } = await extractRuleCandidates(ctx.dir, FIXED_NOW); + const fileId = candidateFileId(candidates[0]!.id); + await setRuleCandidateStatus(ctx.dir, fileId, "approved"); + + const approved = await collectRuleCandidatesForExport(ctx.dir, "approved"); + const json = JSON.parse(buildRuleCandidatesJson(approved)) as RuleCandidate[]; + expect(Array.isArray(json)).toBe(true); + expect(json).toHaveLength(1); + expect(json[0]!.status).toBe("approved"); + expect(json[0]!.proposed.version).toBe(1); + expect(json[0]!.evidence[0]).toEqual({ kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 }); + }); + + it("skips malformed persisted candidates instead of exporting them", async () => { + await mkdir(path.join(ctx.dir, ".llmwiki/rule-candidates"), { recursive: true }); + await writeFile( + path.join(ctx.dir, ".llmwiki/rule-candidates/rulecand-process-bad.json"), + JSON.stringify({ + id: "rulecand.process.bad", + proposed: {}, + evidence: [], + provenance: { source: "llm-wiki-compiler" }, + confidence: "high", + status: "approved", + createdAt: FIXED_NOW, + }), + "utf-8", + ); + + const approved = await collectRuleCandidatesForExport(ctx.dir, "approved"); + + expect(approved).toEqual([]); + }); +}); + +/** Assert the `proposed` rule sub-object matches the contract. */ +function assertProposedRule(candidate: RuleCandidate): void { + expect(candidate.proposed).toEqual({ + id: `rule.${candidate.id.slice("rulecand.".length)}`, + category: "process", + title: "Require tests before merge", + description: "All PRs must include passing tests.", + when: "a pull request is opened without test changes", + then: "warn", + version: 1, + }); +} + +/** Assert tagged evidence + provenance stamp (modelId from W4 resolver). */ +function assertEvidenceAndProvenance(candidate: RuleCandidate): void { + expect(candidate.evidence).toEqual([ + { kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 }, + ]); + expect(candidate.provenance.source).toBe("llm-wiki-compiler"); + expect(typeof candidate.provenance.modelId).toBe("string"); + expect(candidate.provenance.modelId!.length).toBeGreaterThan(0); + expect(candidate.provenance.modelVersion).toBe("v1"); +} diff --git a/test/rule-extraction-state.test.ts b/test/rule-extraction-state.test.ts new file mode 100644 index 0000000..8b5582e --- /dev/null +++ b/test/rule-extraction-state.test.ts @@ -0,0 +1,90 @@ +/** + * Tests for rule-extraction state + approval-preservation (the rule pipeline blockers). + * + * `rules extract` must (a) advance its OWN change-detection cursor in + * `.llmwiki/rule-state.json` so an unchanged source is not re-extracted every + * run, and (b) never overwrite a human's approve/reject decision. The LLM tool + * call is stubbed via vi.spyOn so extraction is deterministic and offline. + */ + +import { describe, it, expect, vi, afterEach } from "vitest"; +import { rm } from "fs/promises"; +import path from "path"; +import { extractRuleCandidates } from "../src/compiler/rule-extractor.js"; +import { + listRuleCandidates, + setRuleCandidateStatus, +} from "../src/compiler/rule-candidates.js"; +import { candidateFileId } from "../src/utils/candidate-store.js"; +import { RULE_STATE_FILE } from "../src/utils/constants.js"; +import { useTempRoot } from "./fixtures/temp-root.js"; +import { + restoreProviderEnvAfterEach, + seedRuleSource as seedSource, + stubRuleExtraction, +} from "./fixtures/rule-extraction.js"; + +const NOW = "2026-05-31T00:00:00.000Z"; + +/** Out-of-bounds end line (past the 2-line source) to exercise span dropping. */ +const OUT_OF_BOUNDS_END = 9999; + +restoreProviderEnvAfterEach(); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("rule extraction state cursor", () => { + const ctx = useTempRoot(["sources"]); + + it("does not re-process an unchanged source on a second run", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + const first = await extractRuleCandidates(ctx.dir, NOW); + expect(first.processedSources).toEqual(["guide.md"]); + + const second = await extractRuleCandidates(ctx.dir, NOW); + expect(second.processedSources).toEqual([]); + expect(second.candidates).toHaveLength(0); + }); +}); + +describe("approval preservation", () => { + const ctx = useTempRoot(["sources"]); + + it("does not overwrite an approved candidate when the source is re-extracted", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction(); + const { candidates } = await extractRuleCandidates(ctx.dir, NOW); + await setRuleCandidateStatus(ctx.dir, candidateFileId(candidates[0]!.id), "approved"); + + // Force re-extraction of the same (unchanged) source by clearing the cursor. + await rm(path.join(ctx.dir, RULE_STATE_FILE), { force: true }); + const rerun = await extractRuleCandidates(ctx.dir, NOW); + + const listed = await listRuleCandidates(ctx.dir); + expect(listed).toHaveLength(1); + expect(listed[0]!.status).toBe("approved"); + expect(rerun.candidates).toHaveLength(0); + expect(rerun.notes.some((n) => n.includes("approved"))).toBe(true); + }); +}); + +describe("evidence span bounding + category sanitization", () => { + const ctx = useTempRoot(["sources"]); + + it("drops an out-of-bounds evidence line and emits an import-valid id", async () => { + await seedSource(ctx.dir); + await stubRuleExtraction("Code Review", OUT_OF_BOUNDS_END); + const { candidates } = await extractRuleCandidates(ctx.dir, NOW); + const candidate = candidates[0]!; + + // category had a space -> underscored segment; id passes the rule importer's regex. + expect(candidate.id).toMatch(/^rulecand\.code_review\.[a-z0-9-]+$/); + // evidenceLineEnd was 9999 (past the 2-line source) -> dropped. + const ref = candidate.evidence[0]!; + expect(ref.kind).toBe("file"); + expect("lineEnd" in ref ? ref.lineEnd : undefined).toBeUndefined(); + }); +}); diff --git a/test/rule-validation.test.ts b/test/rule-validation.test.ts new file mode 100644 index 0000000..5cf76ae --- /dev/null +++ b/test/rule-validation.test.ts @@ -0,0 +1,104 @@ +/** + * Unit tests for the rule-candidate id/category/validation helpers (rule pipeline). + * + * These guard the producer↔the rule importer contract: category alphabet, collision-free + * ids, evidence-span sanity, and the producer-side mirror of the rule importer's import + * gate (so the compiler never "successfully" emits a candidate the rule importer rejects). + */ + +import { describe, it, expect } from "vitest"; +import { + buildRuleCandidate, + buildRuleSlug, + sanitizeRuleCategory, + validateRuleCandidate, +} from "../src/compiler/rule-candidates.js"; +import { parseRules } from "../src/compiler/rule-prompts.js"; +import type { RuleCandidate } from "../src/utils/rule-types.js"; + +const NOW = "2026-05-31T00:00:00.000Z"; + +function candidate(category: string, slug: string): RuleCandidate { + return buildRuleCandidate( + { + category, + slug, + title: "T", + description: "d", + when: "w", + then: "warn", + evidence: [{ kind: "file", path: "guide.md" }], + provenance: { source: "llm-wiki-compiler" }, + confidence: "high", + }, + NOW, + ); +} + +describe("sanitizeRuleCategory", () => { + it("collapses hyphen/space runs to underscores (the rule importer's [a-z0-9_] alphabet)", () => { + expect(sanitizeRuleCategory("Code Review")).toBe("code_review"); + expect(sanitizeRuleCategory("ci/cd pipeline")).toBe("ci_cd_pipeline"); + }); + + it("falls back to 'general' for an empty result", () => { + expect(sanitizeRuleCategory("!!!")).toBe("general"); + }); +}); + +describe("buildRuleSlug", () => { + it("appends an 8-hex content hash so same-title rules never collide", () => { + const a = buildRuleSlug("Require tests", "sourceA\nwhen\nthen"); + const b = buildRuleSlug("Require tests", "sourceB\nwhen\nthen"); + expect(a).toMatch(/^require-tests-[a-f0-9]{8}$/); + expect(a).not.toBe(b); + }); +}); + +describe("validateRuleCandidate", () => { + it("accepts a sanitized multi-word category", () => { + expect(validateRuleCandidate(candidate("code_review", "x-abcd1234"))).toBeNull(); + }); + + it("rejects a hyphen in the category segment (the rule importer would refuse it)", () => { + expect(validateRuleCandidate(candidate("code-review", "x-abcd1234"))).toContain("candidate id"); + }); + + it("rejects non-https url evidence", () => { + const c = candidate("process", "x-abcd1234"); + c.evidence = [{ kind: "url", url: "http://example.com" }]; + expect(validateRuleCandidate(c)).toContain("https"); + }); + + it("rejects an over-cap predicate", () => { + const c = candidate("process", "x-abcd1234"); + c.proposed.when = "x".repeat(513); + expect(validateRuleCandidate(c)).toContain("when"); + }); + + it("rejects malformed proposed-rule objects before export", () => { + const c = candidate("process", "x-abcd1234") as unknown as Record; + c.proposed = {}; + expect(validateRuleCandidate(c as RuleCandidate)).toContain("proposed.id"); + }); + + it("rejects mismatched candidate/proposed ids", () => { + const c = candidate("process", "x-abcd1234"); + c.proposed.id = "rule.other.x-abcd1234"; + expect(validateRuleCandidate(c)).toContain("does not match"); + }); +}); + +describe("parseRules evidence-span sanity", () => { + it("drops an inverted span (end < start) rather than emitting it", () => { + const raw = JSON.stringify({ + rules: [{ + category: "process", title: "T", description: "d", when: "w", then: "warn", + confidence: "high", evidenceLineStart: 40, evidenceLineEnd: 7, + }], + }); + const [rule] = parseRules(raw); + expect(rule!.evidenceLineStart).toBeUndefined(); + expect(rule!.evidenceLineEnd).toBeUndefined(); + }); +}); diff --git a/test/rules-integration.test.ts b/test/rules-integration.test.ts new file mode 100644 index 0000000..bb3d305 --- /dev/null +++ b/test/rules-integration.test.ts @@ -0,0 +1,125 @@ +/** + * CLI-level integration tests for the `llmwiki rules` subcommand family (rule pipeline). + * + * Spawns real subprocesses via the shared run-cli fixture so the full CLI + * surface (Commander routing, exit codes, stdout/stderr) is exercised without + * mocking internal modules. Candidate JSON is written manually so list / + * approve / reject / export need no LLM call; `rules extract` is tested only + * for its credential-failure path (a real extraction would need an API key). + */ + +import { describe, it, expect } from "vitest"; +import path from "path"; +import { mkdir, rm, writeFile, readFile } from "fs/promises"; +import { tmpdir } from "os"; +import { runCLI, expectCLIExit, expectCLIFailure } from "./fixtures/run-cli.js"; +import type { RuleCandidate } from "../src/utils/rule-types.js"; + +/** Create a disposable temp workspace with a sources/ sub-folder. */ +async function makeWorkspace(suffix: string): Promise { + const cwd = path.join(tmpdir(), `llmwiki-rules-${suffix}-${Date.now()}`); + await mkdir(path.join(cwd, "sources"), { recursive: true }); + return cwd; +} + +/** A minimal, import-valid approved candidate for export/list fixtures. */ +function makeCandidate(): RuleCandidate { + return { + id: "rulecand.process.require-tests-abcd1234", + proposed: { + id: "rule.process.require-tests-abcd1234", + category: "process", + title: "Require tests", + description: "PRs need tests.", + when: "a PR is opened", + then: "warn", + version: 1, + }, + evidence: [{ kind: "file", path: "guide.md", lineStart: 1, lineEnd: 2 }], + provenance: { source: "llm-wiki-compiler", modelId: "m", modelVersion: "v1" }, + confidence: "high", + status: "approved", + createdAt: "2026-05-31T00:00:00.000Z", + }; +} + +/** Write a candidate JSON into the pending rule-candidate directory. */ +async function writeCandidate(cwd: string, candidate: RuleCandidate): Promise { + const dir = path.join(cwd, ".llmwiki", "rule-candidates"); + await mkdir(dir, { recursive: true }); + await writeFile(path.join(dir, `${candidate.id}.json`), JSON.stringify(candidate, null, 2), "utf-8"); +} + +describe("rules CLI integration", () => { + it("rules list on a fresh project exits 0 and reports no pending candidates", async () => { + const cwd = await makeWorkspace("list-empty"); + try { + const result = await runCLI(["rules", "list"], cwd); + expectCLIExit(result, 0); + expect(result.stdout.toLowerCase()).toContain("no pending"); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); + + it("rules approve with a missing id exits non-zero with a not-found error", async () => { + const cwd = await makeWorkspace("approve-missing"); + try { + const result = await runCLI(["rules", "approve", "rulecand.x.does-not-exist"], cwd); + expectCLIFailure(result); + expect(`${result.stdout}${result.stderr}`.toLowerCase()).toContain("not found"); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); + + it("rules reject with a missing id exits non-zero", async () => { + const cwd = await makeWorkspace("reject-missing"); + try { + const result = await runCLI(["rules", "reject", "rulecand.x.does-not-exist"], cwd); + expectCLIFailure(result); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); + + it("rules export with an invalid --scope exits non-zero with a guidance message", async () => { + const cwd = await makeWorkspace("export-bad-scope"); + try { + const result = await runCLI(["rules", "export", "--scope", "bogus"], cwd); + expectCLIFailure(result); + expect(`${result.stdout}${result.stderr}`).toContain("scope"); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); + + it("rules export writes the candidate array to the default output path", async () => { + const cwd = await makeWorkspace("export-writes"); + try { + await writeCandidate(cwd, makeCandidate()); + const result = await runCLI(["rules", "export", "--scope", "approved"], cwd); + expectCLIExit(result, 0); + const written = await readFile(path.join(cwd, "dist/exports/rule-candidates.json"), "utf-8"); + const parsed = JSON.parse(written) as RuleCandidate[]; + expect(parsed).toHaveLength(1); + expect(parsed[0]!.id).toBe("rulecand.process.require-tests-abcd1234"); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); + + it("rules extract fails with a credential error when no API key is set", async () => { + const cwd = await makeWorkspace("extract-no-key"); + try { + await writeFile(path.join(cwd, "sources", "guide.md"), "Always run tests.\nNo exceptions.\n", "utf-8"); + const result = await runCLI(["rules", "extract"], cwd, { + ANTHROPIC_API_KEY: "", + ANTHROPIC_AUTH_TOKEN: "", + }); + expectCLIFailure(result); + } finally { + await rm(cwd, { recursive: true, force: true }); + } + }, 30_000); +}); diff --git a/vitest.config.ts b/vitest.config.ts index 5979585..6805d8c 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -8,6 +8,14 @@ export default defineConfig({ globals: true, testTimeout: TEST_TIMEOUT_MS, hookTimeout: HOOK_TIMEOUT_MS, + // Many integration tests spawn a CLI subprocess. With one worker per core + // each ALSO spawning a node process, the machine is oversubscribed ~2x and + // subprocess spawns get starved past their timeout — a non-deterministic + // failure whose victim varies per run. Cap workers to half the cores so + // each worker+subprocess pair fits, independent of how many subprocess + // tests exist. + maxWorkers: "50%", + minWorkers: 1, // Don't pick up tests from sibling worktrees living under local worktree dirs. // Worktrees share the parent's working directory tree, so without this // exclude vitest discovers and runs every feature branch's tests.