From 7f88ca7e5e5379f3d4b71df385ba71e86c62bb40 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 16 Jan 2026 12:14:02 +0100 Subject: [PATCH 001/106] feat: save documentation info to SQLite databse This PR adds a SQLite database that contains all of the documentation info. --- DocGen4.lean | 1 + DocGen4/DB.lean | 729 ++++++++++++++++++++++++++++++++++++++ DocGen4/Process/Base.lean | 12 + Main.lean | 8 + lake-manifest.json | 24 +- lakefile.lean | 6 +- 6 files changed, 777 insertions(+), 3 deletions(-) create mode 100644 DocGen4/DB.lean diff --git a/DocGen4.lean b/DocGen4.lean index 70ad4a31..98f8cca0 100644 --- a/DocGen4.lean +++ b/DocGen4.lean @@ -6,3 +6,4 @@ Authors: Henrik Böving import DocGen4.Process import DocGen4.Load import DocGen4.Output +import DocGen4.DB diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean new file mode 100644 index 00000000..36e463d9 --- /dev/null +++ b/DocGen4/DB.lean @@ -0,0 +1,729 @@ +import DocGen4.Process +import SQLite + +namespace DocGen4.DB + +section +open Lean Widget Elab +open SQLite.Blob + +inductive SortFormer where + | type | prop | sort +deriving ToJson, FromJson, BEq, Hashable, Repr + +inductive RenderedCode.Tag where + | keyword + | string + | const (name : Lean.Name) + | sort (former : Option SortFormer) +deriving BEq, Hashable, Repr + +instance : ToBinary RenderedCode.Tag where + serializer + | .keyword, b => b.push 0 + | .string, b => b.push 1 + | .const n, b => b.push 2 |> ToBinary.serializer n + | .sort none, b => b.push 3 + | .sort (some .type), b => b.push 4 + | .sort (some .prop), b => b.push 5 + | .sort (some .sort), b => b.push 6 + +instance : FromBinary RenderedCode.Tag where + deserializer := do + match (← .byte) with + | 0 => return .keyword + | 1 => return .string + | 2 => .const <$> FromBinary.deserializer + | 3 => return .sort none + | 4 => return .sort (some .type) + | 5 => return .sort (some .prop) + | 6 => return .sort (some .sort) + | other => throw s!"Expected 0...7 for `Tag`, got {other}" + +partial instance [ToBinary α] : ToBinary (Lean.Widget.TaggedText α) where + serializer := go +where + go + | .text s, b => b.push 0 |> ToBinary.serializer s + | .tag a t, b => b.push 1 |> ToBinary.serializer a |> go t + | .append xs, b => + have : ToBinary (Lean.Widget.TaggedText α) := ⟨go⟩ + b.push 2 |> ToBinary.serializer xs + +partial instance [FromBinary α] : FromBinary (Lean.Widget.TaggedText α) where + deserializer := go +where + go := do + match (← .byte) with + | 0 => .text <$> FromBinary.deserializer + | 1 => .tag <$> FromBinary.deserializer <*> go + | 2 => + have : FromBinary (Lean.Widget.TaggedText α) := ⟨go⟩ + .append <$> FromBinary.deserializer + | other => throw s!"Expected 0...3 for `TaggedText`, got {other}" + +def RenderedCode := Lean.Widget.TaggedText RenderedCode.Tag +deriving Inhabited, BEq, Repr, ToBinary, FromBinary + +def RenderedCode.empty : RenderedCode := .append #[] + +open Lean.Widget in +mutual +partial def RenderedCode.pushRight (xs : Array RenderedCode) (x : RenderedCode) : Array RenderedCode := + if xs.size = 0 then #[x] + else xs.modify (xs.size - 1) (·.appendImpl x) + +partial def RenderedCode.pushLeft (x : RenderedCode) (xs : Array RenderedCode) : Array RenderedCode := + if xs.size = 0 then #[x] + else xs.modify 0 x.appendImpl + +partial def RenderedCode.appendImpl : RenderedCode → RenderedCode → RenderedCode + | .text "", x => x + | x, .text "" => x + | .append #[], x => x + | x, .append #[] => x + | .append xs, .append ys => .append (xs ++ ys) + | .append xs, y => .append (pushRight xs y) + | x, .append ys => .append (pushLeft x ys) + | .text x, .text y => .text (x ++ y) + | x, y => .append #[x, y] +end + +instance : Append RenderedCode := ⟨RenderedCode.appendImpl⟩ + +/-- +In Lean syntax declarations the following pattern is quite common: +``` +syntax term " + " term : term +``` +that is, we place spaces around the operator in the middle. When the +`InfoTree` framework provides us with information about what source token +corresponds to which identifier it will thus say that `" + "` corresponds to +`HAdd.hadd`. This is however not the way we want this to be linked, in the HTML +only `+` should be linked, taking care of this is what this function is +responsible for. +-/ +-- TODO dedup with original location +def splitWhitespaces (s : String) : String × String × String := + let length := s.length + let s := s.trimAsciiStart + let front := "".pushn ' ' (length - s.positions.count) + let length := s.positions.count + let s := s.trimAsciiEnd.copy + let back := "".pushn ' ' (length - s.length) + (front, s, back) + +def findWs (s : String.Slice) : s.Pos := go s.startPos +where + go (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h).isWhitespace then go (i.next h) + else i + termination_by i + +-- This doesn't fail on malformed strings because it's better to give the user some feedback than +-- none here. This tokenization is just to highlight keywords correctly. +def findString (s : String.Slice) : s.Pos := start s.startPos +where + start (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h) == '"' then contents (i.next h) + else i + contents (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h) == '\\' then escape (i.next h) + else if (i.get h) == '"' then i.next h + else contents (i.next h) + termination_by i + escape (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else contents (i.next h) + termination_by i + +def findOther (s : String.Slice) : s.Pos := go s.startPos +where + go (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else + let c := i.get h + if c == '"' then i + else if c.isWhitespace then i + else go (i.next h) + termination_by i + +def tokenize (txt : String) : RenderedCode := Id.run do + let mut todo := txt.drop 0 + let mut toks : RenderedCode := .empty + while !todo.isEmpty do + if todo.startsWith Char.isWhitespace then + let i := findWs todo + let ws := todo.sliceTo i + todo := todo.sliceFrom i + toks := toks ++ .text ws.copy + continue + else if todo.startsWith '"' then + let i := findString todo + let str := todo.sliceTo i + todo := todo.sliceFrom i + toks := toks ++ .tag .string (.text str.copy) + else + let i := findOther todo + let tok := todo.sliceTo i + todo := todo.sliceFrom i + let tok := tok.copy + if tok ∈ kws then + toks := toks ++ .tag .keyword (.text tok) + else + toks := toks ++ .text tok + continue + return toks +where + tokenEnder (str : String.Slice) : Bool := str.front?.map Char.isAlphanum |>.getD true + kws := ["let", "fun", "do", "match", "with", "if", "then", "else", "break", "continue", "for", "in", "mut"] + +partial def renderTagged + (doc : CodeWithInfos) : + RenderedCode := Id.run do + match doc with + | .text txt => + return tokenize txt + | .tag i t => + let {ctx := _, info, children := _} := i.info.val + match info with + | .ofTermInfo termInfo => + match termInfo.expr with + | .const n _ => + -- TODO replicate blacklist logic + match t with + | .text t => + let (front, t, back) := splitWhitespaces t + return .append #[.text front, .tag (.const n) (.text t), .text back] + | _ => + .tag (.const n) <$> renderTagged t + | .sort _u => + match t with + | .text t => + let sortPrefix :: rest := t.splitOn " " | unreachable! + let sortFormer := match sortPrefix with + | "Type" => some .type + | "Prop" => some .prop + | "Sort" => some .sort + | _ => none + let mut restStr := String.intercalate " " rest + if restStr.length != 0 then + restStr := " " ++ restStr + return .append #[.tag (.sort sortFormer) (.text sortPrefix), .text restStr] + | _ => + .tag (.sort none) <$> renderTagged t + | _ => renderTagged t + | _ => renderTagged t + | .append xs => xs.mapM renderTagged <&> (·.foldl (init := .empty) (· ++ ·)) + + +end + +def getDb (dbFile : System.FilePath) : IO SQLite := do + -- SQLite atomically creates the DB file, and the schema and journal settings here are applied + -- idempotently. This avoids DB creation race conditions. + let db ← SQLite.openWith dbFile .readWriteCreate + db.exec "PRAGMA busy_timeout = 5000" + db.exec "PRAGMA journal_mode = WAL" + db.exec "PRAGMA foreign_keys = ON" + db.transaction (db.exec ddl) + return db +where + ddl := + r#" +PRAGMA journal_mode = WAL; + +-- Modules table +CREATE TABLE IF NOT EXISTS modules ( + name TEXT PRIMARY KEY, + source_url TEXT +); + +-- Direct imports +CREATE TABLE IF NOT EXISTS module_imports ( + importer TEXT NOT NULL, + imported TEXT NOT NULL, + PRIMARY KEY (importer, imported), + FOREIGN KEY (importer) REFERENCES modules(name) ON DELETE CASCADE + -- There's no + -- FOREIGN KEY (imported) REFERENCES modules(name) + -- because docs are built incrementally. +); + +-- Index for reverse queries: "what imports this module?" +CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); + +CREATE TABLE IF NOT EXISTS module_items ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + item_type TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type TEXT NOT NULL, + doc TEXT, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS axioms ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS constructors ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + type_position INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE + FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS inductives ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS class_inductives ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS opaques ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + safety TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS definitions ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + hints TEXT NOT NULL, + is_noncomputable INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS definition_equations ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + code TEXT NOT NULL, + sequence INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS instances ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + class_name TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS instance_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + type_name TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES instances(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structures ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_class INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_parents ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + projection_fn TEXT NOT NULL, + type TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_constructors ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + name TEXT NOT NULL, + type TEXT NOT NULL, + doc TEXT, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_fields ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + name TEXT NOT NULL, + type TEXT NOT NULL, + doc TEXT, + render INTEGER NOT NULL, + sequence INTEGER NOT NULL, + is_direct INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); +"# + +structure DB where + sqlite : SQLite + deleteModule (modName : String) : IO Unit + saveModule (modName : String) (sourceUrl? : Option String) : IO Unit + saveImport (modName : String) (imported : Lean.Name) : IO Unit + saveMarkdownDocstring (modName : String) (position : Int64) (text : String) : IO Unit + saveDeclarationRange (modName : String) (position : Int64) (declRange : Lean.DeclarationRange) : IO Unit + saveInfo (modName : String) (position : Int64) (kind : String) (info : Process.Info) : IO Unit + saveAxiom (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit + saveOpaque (modName : String) (position : Int64) (safety : Lean.DefinitionSafety) : IO Unit + saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) : IO Unit + saveDefinitionEquation (modName : String) (position : Int64) (code : Lean.Widget.CodeWithInfos) (sequence : Int64) : IO Unit + saveInstance (modName : String) (position : Int64) (className : String) : IO Unit + saveInstanceArg (modName : String) (position : Int64) (sequence : Int64) (typeName : String) : IO Unit + saveInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit + saveConstructor (modName : String) (position : Int64) (typePosition : Int64) : IO Unit + saveClassInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit + saveStructure (modName : String) (position : Int64) (isClass : Bool) : IO Unit + saveStructureConstructor (modName : String) (position : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (doc : Option String) : IO Unit + saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : Lean.Widget.CodeWithInfos) : IO Unit + saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (doc : Option String) (render : Bool) (isDirect : Bool) : IO Unit + +instance : Coe DB SQLite where + coe := DB.sqlite + +private def run (stmt : SQLite.Stmt) : IO Unit := do + stmt.exec + stmt.reset + stmt.clearBindings + +def _root_.SQLite.Stmt.bind [SQLite.NullableQueryParam α] (stmt : SQLite.Stmt) (index : Int32) (param : α) : IO Unit := do + SQLite.NullableQueryParam.bind stmt index param + +instance : SQLite.QueryParam Lean.DefinitionSafety where + bind stmt index safety := + SQLite.QueryParam.bind stmt index <| + match safety with + | .safe => "safe" + | .unsafe => "unsafe" + | .partial => "partial" + +instance : SQLite.QueryParam Lean.ReducibilityHints where + bind stmt index + | .opaque => SQLite.QueryParam.bind stmt index "opaque" + | .abbrev => SQLite.QueryParam.bind stmt index "abbrev" + | .regular i => SQLite.QueryParam.bind stmt index i.toNat.toInt64 + +open SQLite.Blob in +instance : SQLite.QueryParam Lean.Widget.CodeWithInfos where + bind stmt index code := Id.run do + let str := ToBinary.serializer (renderTagged code) .empty + SQLite.QueryParam.bind stmt index str + +def ensureDb (dbFile : System.FilePath) : IO DB := do + let sqlite ← getDb dbFile + let deleteModuleStmt ← sqlite.prepare "DELETE FROM modules WHERE name = ?" + let deleteModule modName := do + deleteModuleStmt.bind 1 modName + run deleteModuleStmt + let saveModuleStmt ← sqlite.prepare "INSERT INTO modules (name, source_url) VALUES (?, ?)" + let saveModule modName sourceUrl? := do + saveModuleStmt.bind 1 modName + saveModuleStmt.bind 2 sourceUrl? + run saveModuleStmt + -- This is INSERT OR IGNORE because the module system often results in multiple imports of the same module (e.g. as meta) + let saveImportStmt ← sqlite.prepare "INSERT OR IGNORE INTO module_imports (importer, imported) VALUES (?, ?)" + let saveImport modName imported := do + saveImportStmt.bind 1 modName + saveImportStmt.bind 2 imported.toString + run saveImportStmt + let saveMarkdownDocstringStmt ← sqlite.prepare "INSERT INTO markdown_docstrings (module_name, position, text) VALUES (?, ?, ?)" + let saveMarkdownDocstring modName position text := do + saveMarkdownDocstringStmt.bind 1 modName + saveMarkdownDocstringStmt.bind 2 position + saveMarkdownDocstringStmt.bind 3 text + run saveMarkdownDocstringStmt + let saveDeclarationRangeStmt ← + sqlite.prepare + "INSERT INTO declaration_ranges (module_name, position, start_line, start_column, start_utf16, end_line, end_column, end_utf16) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := do + saveDeclarationRangeStmt.bind 1 modName + saveDeclarationRangeStmt.bind 2 position + saveDeclarationRangeStmt.bind 3 declRange.pos.line + saveDeclarationRangeStmt.bind 4 declRange.pos.column + saveDeclarationRangeStmt.bind 5 declRange.charUtf16 + saveDeclarationRangeStmt.bind 6 declRange.endPos.line + saveDeclarationRangeStmt.bind 7 declRange.endPos.column + saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16 + run saveDeclarationRangeStmt + let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, doc, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + let saveInfo modName position kind (info : Process.Info) := do + saveInfoStmt.bind 1 modName + saveInfoStmt.bind 2 position + saveInfoStmt.bind 3 kind + saveInfoStmt.bind 4 info.name.toString + saveInfoStmt.bind 5 info.type + saveInfoStmt.bind 6 info.doc + saveInfoStmt.bind 7 info.sorried + saveInfoStmt.bind 8 info.render + run saveInfoStmt + let saveAxiomStmt ← sqlite.prepare "INSERT INTO axioms (module_name, position, is_unsafe) VALUES (?, ?, ?)" + let saveAxiom modName position isUnsafe := do + saveAxiomStmt.bind 1 modName + saveAxiomStmt.bind 2 position + saveAxiomStmt.bind 3 isUnsafe + run saveAxiomStmt + let saveOpaqueStmt ← sqlite.prepare "INSERT INTO opaques (module_name, position, safety) VALUES (?, ?, ?)" + let saveOpaque modName position safety := do + saveOpaqueStmt.bind 1 modName + saveOpaqueStmt.bind 2 position + saveOpaqueStmt.bind 3 safety + run saveOpaqueStmt + let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable) VALUES (?, ?, ?, ?, ?)" + let saveDefinition modName position isUnsafe hints isNonComputable := do + saveDefinitionStmt.bind 1 modName + saveDefinitionStmt.bind 2 position + saveDefinitionStmt.bind 3 isUnsafe + saveDefinitionStmt.bind 4 hints + saveDefinitionStmt.bind 5 isNonComputable + run saveDefinitionStmt + let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" + let saveDefinitionEquation modName position code sequence := do + saveDefinitionEquationStmt.bind 1 modName + saveDefinitionEquationStmt.bind 2 position + saveDefinitionEquationStmt.bind 3 code + saveDefinitionEquationStmt.bind 4 sequence + run saveDefinitionEquationStmt + let saveInstanceStmt ← sqlite.prepare "INSERT INTO instances (module_name, position, class_name) VALUES (?, ?, ?)" + let saveInstance modName position className := do + saveInstanceStmt.bind 1 modName + saveInstanceStmt.bind 2 position + saveInstanceStmt.bind 3 className + run saveInstanceStmt + let saveInstanceArgStmt ← sqlite.prepare "INSERT INTO instance_args (module_name, position, sequence, type_name) VALUES (?, ?, ?, ?)" + let saveInstanceArg modName position sequence typeName := do + saveInstanceArgStmt.bind 1 modName + saveInstanceArgStmt.bind 2 position + saveInstanceArgStmt.bind 3 sequence + saveInstanceArgStmt.bind 4 typeName + run saveInstanceArgStmt + let saveInductiveStmt ← sqlite.prepare "INSERT INTO inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" + let saveInductive modName position isUnsafe := do + saveInductiveStmt.bind 1 modName + saveInductiveStmt.bind 2 position + saveInductiveStmt.bind 3 isUnsafe + run saveInductiveStmt + let saveConstructorStmt ← sqlite.prepare "INSERT INTO constructors (module_name, position, type_position) VALUES (?, ?, ?)" + let saveConstructor modName position typePosition := do + saveConstructorStmt.bind 1 modName + saveConstructorStmt.bind 2 position + saveConstructorStmt.bind 3 typePosition + run saveConstructorStmt + let saveClassInductiveStmt ← sqlite.prepare "INSERT INTO class_inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" + let saveClassInductive modName position isUnsafe := do + saveClassInductiveStmt.bind 1 modName + saveClassInductiveStmt.bind 2 position + saveClassInductiveStmt.bind 3 isUnsafe + run saveClassInductiveStmt + let saveStructureStmt ← sqlite.prepare "INSERT INTO structures (module_name, position, is_class) VALUES (?, ?, ?)" + let saveStructure modName position isClass := do + saveStructureStmt.bind 1 modName + saveStructureStmt.bind 2 position + saveStructureStmt.bind 3 isClass + run saveStructureStmt + let saveStructureConstructorStmt ← sqlite.prepare "INSERT INTO structure_constructors (module_name, position, name, type, doc) VALUES (?, ?, ?, ?, ?)" + let saveStructureConstructor modName position name type doc := do + saveStructureConstructorStmt.bind 1 modName + saveStructureConstructorStmt.bind 2 position + saveStructureConstructorStmt.bind 3 name + saveStructureConstructorStmt.bind 4 type + saveStructureConstructorStmt.bind 5 doc + run saveStructureConstructorStmt + let saveStructureParentStmt ← sqlite.prepare "INSERT INTO structure_parents (module_name, position, sequence, projection_fn, type) VALUES (?, ?, ?, ?, ?)" + let saveStructureParent modName position sequence projectionFn type := do + saveStructureParentStmt.bind 1 modName + saveStructureParentStmt.bind 2 position + saveStructureParentStmt.bind 3 sequence + saveStructureParentStmt.bind 4 projectionFn + saveStructureParentStmt.bind 5 type + run saveStructureParentStmt + let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, name, type, doc, render, is_direct) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + let saveStructureField modName position sequence name type doc render isDirect := do + saveStructureFieldStmt.bind 1 modName + saveStructureFieldStmt.bind 2 position + saveStructureFieldStmt.bind 3 sequence + saveStructureFieldStmt.bind 4 name + saveStructureFieldStmt.bind 5 type + saveStructureFieldStmt.bind 6 doc + saveStructureFieldStmt.bind 7 render + saveStructureFieldStmt.bind 8 isDirect + run saveStructureFieldStmt + pure { + sqlite, + deleteModule, + saveModule, + saveImport, + saveMarkdownDocstring, + saveDeclarationRange, + saveInfo, + saveAxiom, + saveOpaque, + saveDefinition, + saveDefinitionEquation, + saveInstance, + saveInstanceArg, + saveInductive, + saveConstructor, + saveClassInductive, + saveStructure, + saveStructureConstructor, + saveStructureParent, + saveStructureField + } + + + +end DB + +open DB + +def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do + let dbFile := buildDir / dbFile + let db ← ensureDb dbFile + let ms1 ← IO.monoMsNow + db.sqlite.transaction do + for (modName, modInfo) in doc.moduleInfo do + let modName := modName.toString + db.deleteModule modName + db.saveModule modName sourceUrl? + for imported in modInfo.imports do + db.saveImport modName imported + let mut i : Int64 := 0 + for mem in modInfo.members do + let pos := i + i := i + 1 + match mem with + | .modDoc doc => + db.saveDeclarationRange modName pos doc.declarationRange + db.saveMarkdownDocstring modName pos doc.doc + | .docInfo info => + let baseInfo := info.toInfo + db.saveInfo modName pos (infoKind info) baseInfo + db.saveDeclarationRange modName pos baseInfo.declarationRange + match info with + | .axiomInfo info => + db.saveAxiom modName pos info.isUnsafe + | .theoremInfo _info => -- No extra info here + pure () + | .opaqueInfo info => + db.saveOpaque modName pos info.definitionSafety + | .definitionInfo info => + db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + | .instanceInfo info => + db.saveInstance modName pos info.className.toString + for h : j in 0...info.typeNames.size do + db.saveInstanceArg modName pos j.toInt64 info.typeNames[j].toString + | .inductiveInfo info => + db.saveInductive modName pos info.isUnsafe + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modName cpos "constructor" ctor + db.saveDeclarationRange modName cpos ctor.declarationRange + db.saveConstructor modName cpos pos + | .structureInfo info => + i := (← (saveStructureInfo false info db modName pos).run i).2 + | .classInfo info => + i := (← (saveStructureInfo true info db modName pos).run i).2 + | .classInductiveInfo info => + db.saveClassInductive modName pos info.isUnsafe + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modName cpos "constructor" ctor + db.saveDeclarationRange modName cpos ctor.declarationRange + db.saveConstructor modName cpos pos + | .ctorInfo info => + -- Here we do nothing because they were inserted along with the inductive + pure () + let ms2 ← IO.monoMsNow + (← IO.FS.Handle.mk "db-timing" .append).write <| s!"{doc.moduleInfo.keysArray}\t{ms2 - ms1}ms\n".toUTF8 + pure () + +where + saveStructureInfo (isClass : Bool) (info : Process.StructureInfo) (db : DB) (modName : String) (pos : Int64) : StateT Int64 IO Unit := do + db.saveStructure modName pos isClass + db.saveStructureConstructor modName pos info.ctor.name.toString info.ctor.type info.ctor.doc + let mut seq : Int32 := 0 + for parent in info.parents do + db.saveStructureParent modName pos seq parent.projFn.toString parent.type + seq := seq + 1 + for field in info.fieldInfo do + let fpos ← get + modify (· + 1) + db.saveStructureField modName pos fpos field.name.toString field.type field.doc field.render field.isDirect + + infoKind : Process.DocInfo → String + | .axiomInfo _ => "axiom" + | .theoremInfo info => "theorem" + | .opaqueInfo info => "opaque" + | .definitionInfo info => "definition" + | .instanceInfo info => "instance" + | .inductiveInfo info => "inductive" + | .structureInfo info => "structure" + | .classInfo info => "class" + | .classInductiveInfo info => "class inductive" + | .ctorInfo info => "constructor" diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 2caf0652..00f14207 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -192,6 +192,18 @@ inductive DocInfo where | ctorInfo (info : ConstructorInfo) : DocInfo deriving Inhabited +def DocInfo.toInfo : DocInfo → Info + | .axiomInfo info => info.toInfo + | .theoremInfo info => info.toInfo + | .opaqueInfo info => info.toInfo + | .definitionInfo info => info.toInfo + | .instanceInfo info => info.toInfo + | .inductiveInfo info => info.toInfo + | .structureInfo info => info.toInfo + | .classInfo info => info.toInfo + | .classInductiveInfo info => info.toInfo + | .ctorInfo info => info + /-- Turns an `Expr` into a pretty printed `CodeWithInfos`. -/ diff --git a/Main.lean b/Main.lean index b42aa823..5a88a5e2 100644 --- a/Main.lean +++ b/Main.lean @@ -21,10 +21,13 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String | none => ".lake/build" + let dbFile? := p.flag? "db" |>.map (·.as! String) let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String let (doc, hierarchy) ← load <| .analyzeConcreteModules relevantModules let baseConfig ← getSimpleBaseContext buildDir hierarchy + if let some dbFile := dbFile? then + updateModuleDb doc buildDir dbFile (some sourceUri) discard <| htmlOutputResults baseConfig doc (some sourceUri) return 0 @@ -41,10 +44,13 @@ def runGenCoreCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String | none => ".lake/build" + let dbFile? := p.flag? "db" |>.map (·.as! String) let manifestOutput? := (p.flag? "manifest").map (·.as! String) let module := p.positionalArg! "module" |>.as! String |> String.toName let (doc, hierarchy) ← load <| .analyzePrefixModules module let baseConfig ← getSimpleBaseContext buildDir hierarchy + if let some dbFile := dbFile? then + updateModuleDb doc buildDir dbFile none let outputs ← htmlOutputResults baseConfig doc none if let .some manifestOutput := manifestOutput? then IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress @@ -80,6 +86,7 @@ def singleCmd := `[Cli| FLAGS: b, build : String; "Build directory." + db : String; "Database" ARGS: module : String; "The module to generate the HTML for. Does not have to be part of topLevelModules." @@ -100,6 +107,7 @@ def genCoreCmd := `[Cli| FLAGS: b, build : String; "Build directory." + db : String; "Database" m, manifest : String; "Manifest output, to list all the files generated." ARGS: diff --git a/lake-manifest.json b/lake-manifest.json index dda84a58..2e2b7ebc 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -1,7 +1,17 @@ {"version": "1.1.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover/lean4-cli", + [{"url": "https://github.com/david-christiansen/leansqlite", + "type": "git", + "subDir": null, + "scope": "", + "rev": "64e35cc2cc0959f4ff371933a86ac298f20bf1fe", + "name": "leansqlite", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": false, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover/lean4-cli", "type": "git", "subDir": null, "scope": "", @@ -40,6 +50,16 @@ "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": false, - "configFile": "lakefile.lean"}], + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/plausible", + "type": "git", + "subDir": null, + "scope": "", + "rev": "b3dd6c3ebc0a71685e86bea9223be39ea4c299fb", + "name": "plausible", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}], "name": "«doc-gen4»", "lakeDir": ".lake"} diff --git a/lakefile.lean b/lakefile.lean index 8e61872b..665a39ee 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -23,6 +23,9 @@ require «UnicodeBasic» from git require Cli from git "https://github.com/leanprover/lean4-cli" @ "main" +require leansqlite from git + "https://github.com/david-christiansen/leansqlite" @ "main" + /-- Obtain the subdirectory of the Lean package relative to the root of the enclosing git repository. -/ @@ -240,7 +243,7 @@ module_facet docs (mod) : DepSet FilePath := do let srcUri ← uriJob.await proc { cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, mod.name.toString, srcUri] + args := #["single", "--build", buildDir.toString, "--db", "lean-docs.db", mod.name.toString, srcUri] env := ← getAugmentedEnv } return DepSet.mk #[docFile] docDeps @@ -259,6 +262,7 @@ def coreTarget (component : Lean.Name) : FetchM (Job <| Array FilePath) := do cmd := exeFile.toString args := #["genCore", component.toString, "--build", buildDir.toString, + "--db", "lean-docs.db", "--manifest", manifestFile.toString] env := ← getAugmentedEnv } From 9272ece1130db2fcb4e854a1d0053033f367d185 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 20 Jan 2026 12:35:36 +0100 Subject: [PATCH 002/106] Try multiple DBs Experiment to see if slowdown due to database file contention --- DocGen4/DB.lean | 3 --- lakefile.lean | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 36e463d9..281bf866 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -640,7 +640,6 @@ open DB def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile let db ← ensureDb dbFile - let ms1 ← IO.monoMsNow db.sqlite.transaction do for (modName, modInfo) in doc.moduleInfo do let modName := modName.toString @@ -699,8 +698,6 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( | .ctorInfo info => -- Here we do nothing because they were inserted along with the inductive pure () - let ms2 ← IO.monoMsNow - (← IO.FS.Handle.mk "db-timing" .append).write <| s!"{doc.moduleInfo.keysArray}\t{ms2 - ms1}ms\n".toUTF8 pure () where diff --git a/lakefile.lean b/lakefile.lean index 665a39ee..07136f43 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -243,7 +243,7 @@ module_facet docs (mod) : DepSet FilePath := do let srcUri ← uriJob.await proc { cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, "--db", "lean-docs.db", mod.name.toString, srcUri] + args := #["single", "--build", buildDir.toString, "--db", s!"lean-docs-{mod.name}.db", mod.name.toString, srcUri] env := ← getAugmentedEnv } return DepSet.mk #[docFile] docDeps @@ -262,7 +262,7 @@ def coreTarget (component : Lean.Name) : FetchM (Job <| Array FilePath) := do cmd := exeFile.toString args := #["genCore", component.toString, "--build", buildDir.toString, - "--db", "lean-docs.db", + "--db", s!"lean-core-docs-{component}.db", "--manifest", manifestFile.toString] env := ← getAugmentedEnv } From 418fa515c8f42a021c64b1a1ed39865a641c3418 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 20 Jan 2026 13:10:04 +0100 Subject: [PATCH 003/106] Try not generating equations in DB --- DocGen4/DB.lean | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 281bf866..deed9090 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -668,9 +668,9 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( db.saveOpaque modName pos info.definitionSafety | .definitionInfo info => db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable - if let some eqns := info.equations then - for h : j in 0...eqns.size do - db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + -- if let some eqns := info.equations then + -- for h : j in 0...eqns.size do + -- db.saveDefinitionEquation modName pos eqns[j] j.toInt64 | .instanceInfo info => db.saveInstance modName pos info.className.toString for h : j in 0...info.typeNames.size do From 76667eab11224dd4802082f70744a5aafa0d144b Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 20 Jan 2026 13:33:51 +0100 Subject: [PATCH 004/106] try getting Mathlib cache earlier --- scripts/bench/mathlib-docs/run | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index b355964d..1fb17a2e 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -25,6 +25,9 @@ lake +"$TOOLCHAIN" new mathproject math-lax cd mathproject +# Get Mathlib cache +lake exe cache get + # Add a dependency to the doc-gen4 checkout cat >> lakefile.toml < Date: Tue, 20 Jan 2026 14:00:48 +0100 Subject: [PATCH 005/106] It seems to build modules and not just make docs. Try this. --- scripts/bench/mathlib-docs/run | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 1fb17a2e..863863cf 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -41,6 +41,7 @@ MATHLIB_NO_CACHE_ON_UPDATE=1 lake update doc-gen4 # Build DocGen4 and its executable first (we want to measure docs generation, not tool building) lake build DocGen4 +lake build Mathlib lake build doc-gen4 popd From c90bd81bd58b6bcff0533a4ed375bc882518d0a4 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 20 Jan 2026 14:22:57 +0100 Subject: [PATCH 006/106] Revert "Try not generating equations in DB" This reverts commit 418fa515c8f42a021c64b1a1ed39865a641c3418. --- DocGen4/DB.lean | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index deed9090..281bf866 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -668,9 +668,9 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( db.saveOpaque modName pos info.definitionSafety | .definitionInfo info => db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable - -- if let some eqns := info.equations then - -- for h : j in 0...eqns.size do - -- db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modName pos eqns[j] j.toInt64 | .instanceInfo info => db.saveInstance modName pos info.className.toString for h : j in 0...info.typeNames.size do From e1d2a8b5425755e6502170cca8e90bf7493c0b04 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 20 Jan 2026 14:23:00 +0100 Subject: [PATCH 007/106] Revert "Try multiple DBs" This reverts commit 9272ece1130db2fcb4e854a1d0053033f367d185. --- DocGen4/DB.lean | 3 +++ lakefile.lean | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 281bf866..36e463d9 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -640,6 +640,7 @@ open DB def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile let db ← ensureDb dbFile + let ms1 ← IO.monoMsNow db.sqlite.transaction do for (modName, modInfo) in doc.moduleInfo do let modName := modName.toString @@ -698,6 +699,8 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( | .ctorInfo info => -- Here we do nothing because they were inserted along with the inductive pure () + let ms2 ← IO.monoMsNow + (← IO.FS.Handle.mk "db-timing" .append).write <| s!"{doc.moduleInfo.keysArray}\t{ms2 - ms1}ms\n".toUTF8 pure () where diff --git a/lakefile.lean b/lakefile.lean index 07136f43..665a39ee 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -243,7 +243,7 @@ module_facet docs (mod) : DepSet FilePath := do let srcUri ← uriJob.await proc { cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, "--db", s!"lean-docs-{mod.name}.db", mod.name.toString, srcUri] + args := #["single", "--build", buildDir.toString, "--db", "lean-docs.db", mod.name.toString, srcUri] env := ← getAugmentedEnv } return DepSet.mk #[docFile] docDeps @@ -262,7 +262,7 @@ def coreTarget (component : Lean.Name) : FetchM (Job <| Array FilePath) := do cmd := exeFile.toString args := #["genCore", component.toString, "--build", buildDir.toString, - "--db", s!"lean-core-docs-{component}.db", + "--db", "lean-docs.db", "--manifest", manifestFile.toString] env := ← getAugmentedEnv } From 1d77c52722ff70a551f459843f2e87b51cfe4a04 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 26 Jan 2026 09:34:19 +0100 Subject: [PATCH 008/106] feat: serialize Verso docstrings Extensions are not presently handled, but the fallback data are saved. --- DocGen4/DB.lean | 317 +++++++++++++++++++++++++++++----- DocGen4/Output/DocString.lean | 15 +- DocGen4/Output/Module.lean | 2 +- DocGen4/Output/ToJson.lean | 2 +- DocGen4/Process/Analyze.lean | 4 +- DocGen4/Process/Base.lean | 2 +- DocGen4/Process/DocInfo.lean | 14 +- DocGen4/Process/NameInfo.lean | 58 ++++++- 8 files changed, 361 insertions(+), 53 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 36e463d9..a63a4336 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -3,6 +3,205 @@ import SQLite namespace DocGen4.DB +section +open Lean +open SQLite.Blob + +structure DocstringDataHandler where + serialize : Serializer Dynamic + deserialize : Deserializer Dynamic + +structure DocstringValues where + inlines : NameMap DocstringDataHandler := {} + blocks : NameMap DocstringDataHandler := {} + +def toBinaryElabInline (vals : DocstringValues) : Serializer ElabInline + | { name, val }, b => + match vals.inlines.get? name with + | none => b.push 0 |> ToBinary.serializer name + | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + +def toBinaryElabBlock (vals : DocstringValues) : Serializer ElabBlock + | { name, val }, b => + match vals.blocks.get? name with + | none => b.push 0 |> ToBinary.serializer name + | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + +structure Unknown where +deriving BEq, Hashable, Ord, DecidableEq, Inhabited, TypeName + +instance : Subsingleton Unknown where + allEq := by intros; rfl + +def fromBinaryElabInline (vals : DocstringValues) : Deserializer ElabInline := do + match (← Deserializer.byte) with + | 0 => + let name ← FromBinary.deserializer + pure { name := `unknown ++ name, val := .mk Unknown.mk } + | 1 => + let name ← FromBinary.deserializer + match vals.inlines.get? name with + | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } + | some d => + let val ← d.deserialize + pure { name, val } + | other => throw s!"Expected 0 or 1 for `ElabInline`'s tag, got `{other}`" + +def fromBinaryElabBlock (vals : DocstringValues) : Deserializer ElabBlock := do + match (← Deserializer.byte) with + | 0 => + let name ← FromBinary.deserializer + pure { name := `unknown ++ name, val := .mk Unknown.mk } + | 1 => + let name ← FromBinary.deserializer + match vals.blocks.get? name with + | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } + | some d => + let val ← d.deserialize + pure { name, val } + | other => throw s!"Expected 0 or 1 for `ElabBlock`'s tag, got `{other}`" + +partial instance [ToBinary i] : ToBinary (Doc.Inline i) where + serializer := go +where + go + | .text s, b => b.push 0 |> ToBinary.serializer s + | .linebreak s, b => b.push 1 |> ToBinary.serializer s + | .emph xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 2 |> ToBinary.serializer xs + | .bold xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 3 |> ToBinary.serializer xs + | .code s, b => + b.push 4 |> ToBinary.serializer s + | .math .inline s, b => b.push 5 |> ToBinary.serializer s + | .math .display s, b => b.push 6 |> ToBinary.serializer s + | .link xs url, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 7 |> ToBinary.serializer xs |> ToBinary.serializer url + | .footnote name xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 8 |> ToBinary.serializer name |> ToBinary.serializer xs + | .image alt url, b => b.push 9 |> ToBinary.serializer alt |> ToBinary.serializer url + | .concat xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 10 |> ToBinary.serializer xs + | .other container content, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 11 |> ToBinary.serializer container |> ToBinary.serializer content + +partial instance [FromBinary i] : FromBinary (Doc.Inline i) where + deserializer := go +where go := do + have : FromBinary (Doc.Inline i) := ⟨go⟩ + match (← .byte) with + | 0 => .text <$> FromBinary.deserializer + | 1 => .linebreak <$> FromBinary.deserializer + | 2 => .emph <$> FromBinary.deserializer + | 3 => .bold <$> FromBinary.deserializer + | 4 => .code <$> FromBinary.deserializer + | 5 => .math .inline <$> FromBinary.deserializer + | 6 => .math .display <$> FromBinary.deserializer + | 7 => .link <$> FromBinary.deserializer <*> FromBinary.deserializer + | 8 => .footnote <$> FromBinary.deserializer <*> FromBinary.deserializer + | 9 => .image <$> FromBinary.deserializer <*> FromBinary.deserializer + | 10 => .concat <$> FromBinary.deserializer + | 11 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected a tag for `Doc.Inline` in 0...12, got {other}" + + +partial instance [ToBinary i] [ToBinary b] : ToBinary (Doc.Block i b) where + serializer := go +where + go + | .para xs, bs => bs.push 0 |> ToBinary.serializer xs + | .code s, bs => bs.push 1 |> ToBinary.serializer s + | .concat xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 2 |> ToBinary.serializer xs + | .ul xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 3 |> ToBinary.serializer (xs.map (·.contents)) + | .ol n xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 4 |> ToBinary.serializer n |> ToBinary.serializer (xs.map (·.contents)) + | .dl xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 5 |> ToBinary.serializer (xs.map (fun i => (i.term, i.desc))) + | .blockquote xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 6 |> ToBinary.serializer xs + | .other container content, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 7 |> ToBinary.serializer container |> ToBinary.serializer content + + +partial instance [FromBinary i] [FromBinary b] : FromBinary (Doc.Block i b) where + deserializer := go +where go := do + have : FromBinary (Doc.Block i b) := ⟨go⟩ + match (← .byte) with + | 0 => .para <$> FromBinary.deserializer + | 1 => .code <$> FromBinary.deserializer + | 2 => .concat <$> FromBinary.deserializer + | 3 => + let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer + return .ul <| xss.map (⟨·⟩) + | 4 => + let n ← FromBinary.deserializer + let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer + return .ol n <| xss.map (⟨·⟩) + | 5 => + let items : Array (_ × _) ← FromBinary.deserializer + return .dl <| items.map (fun x => Doc.DescItem.mk x.1 x.2) + | 6 => .blockquote <$> FromBinary.deserializer + | 7 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected a tag for `Doc.Block` in 0...8, got {other}" + +partial instance [ToBinary i] [ToBinary b] [ToBinary p] : ToBinary (Doc.Part i p b) where + serializer := go +where + go + | .mk title titleString metadata content subParts, bs => + have : ToBinary (Doc.Part i p b) := ⟨go⟩ + bs + |> ToBinary.serializer title + |> ToBinary.serializer titleString + |> ToBinary.serializer metadata + |> ToBinary.serializer content + |> ToBinary.serializer subParts + +partial instance [FromBinary i] [FromBinary b] [FromBinary p] : FromBinary (Doc.Part i p b) where + deserializer := go +where + go := do + have : FromBinary (Doc.Part i p b) := ⟨go⟩ + .mk + <$> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + +instance : ToBinary VersoDocString where + serializer + | {text, subsections}, b => + -- TODO customizable handling of Verso docstring extension data + have : ToBinary ElabInline := ⟨toBinaryElabInline {}⟩ + have : ToBinary ElabBlock := ⟨toBinaryElabBlock {}⟩ + b |> ToBinary.serializer text |> ToBinary.serializer subsections + +instance : FromBinary VersoDocString where + deserializer := do + -- TODO customizable handling of Verso docstring extension data + have : FromBinary ElabInline := ⟨fromBinaryElabInline {}⟩ + have : FromBinary ElabBlock := ⟨fromBinaryElabBlock {}⟩ + .mk <$> FromBinary.deserializer <*> FromBinary.deserializer + +instance : SQLite.QueryParam VersoDocString := .asBlob + +end section open Lean Widget Elab open SQLite.Blob @@ -219,7 +418,6 @@ partial def renderTagged | _ => renderTagged t | .append xs => xs.mapM renderTagged <&> (·.foldl (init := .empty) (· ++ ·)) - end def getDb (dbFile : System.FilePath) : IO SQLite := do @@ -285,13 +483,20 @@ CREATE TABLE IF NOT EXISTS markdown_docstrings ( FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE ); +CREATE TABLE IF NOT EXISTS verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + CREATE TABLE IF NOT EXISTS name_info ( module_name TEXT NOT NULL, position INTEGER NOT NULL, kind TEXT, name TEXT NOT NULL, type TEXT NOT NULL, - doc TEXT, sorried INTEGER NOT NULL, render INTEGER NOT NULL, PRIMARY KEY (module_name, position), @@ -395,10 +600,10 @@ CREATE TABLE IF NOT EXISTS structure_parents ( CREATE TABLE IF NOT EXISTS structure_constructors ( module_name TEXT NOT NULL, - position INTEGER NOT NULL, + position INTEGER NOT NULL, -- The structure's position + ctor_position INTEGER NOT NULL, name TEXT NOT NULL, - type TEXT NOT NULL, - doc TEXT, + type BLOB NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); @@ -408,7 +613,6 @@ CREATE TABLE IF NOT EXISTS structure_fields ( position INTEGER NOT NULL, name TEXT NOT NULL, type TEXT NOT NULL, - doc TEXT, render INTEGER NOT NULL, sequence INTEGER NOT NULL, is_direct INTEGER NOT NULL, @@ -417,12 +621,19 @@ CREATE TABLE IF NOT EXISTS structure_fields ( ); "# +def withTableName (tableName : String) (act : IO α) : IO α := + try + act + catch + | e => throw <| .userError s!"Exception while modifying `{tableName}`: {e.toString}" + structure DB where sqlite : SQLite deleteModule (modName : String) : IO Unit saveModule (modName : String) (sourceUrl? : Option String) : IO Unit saveImport (modName : String) (imported : Lean.Name) : IO Unit saveMarkdownDocstring (modName : String) (position : Int64) (text : String) : IO Unit + saveVersoDocstring (modName : String) (position : Int64) (text : Lean.VersoDocString) : IO Unit saveDeclarationRange (modName : String) (position : Int64) (declRange : Lean.DeclarationRange) : IO Unit saveInfo (modName : String) (position : Int64) (kind : String) (info : Process.Info) : IO Unit saveAxiom (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit @@ -435,9 +646,14 @@ structure DB where saveConstructor (modName : String) (position : Int64) (typePosition : Int64) : IO Unit saveClassInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveStructure (modName : String) (position : Int64) (isClass : Bool) : IO Unit - saveStructureConstructor (modName : String) (position : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (doc : Option String) : IO Unit + saveStructureConstructor (modName : String) (position : Int64) (ctorPos : Int64) (info : Process.NameInfo) : IO Unit saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : Lean.Widget.CodeWithInfos) : IO Unit - saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (doc : Option String) (render : Bool) (isDirect : Bool) : IO Unit + saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (render : Bool) (isDirect : Bool) : IO Unit + +def DB.saveDocstring (db : DB) (modName : String) (position : Int64) (text : String ⊕ Lean.VersoDocString) : IO Unit := + match text with + | .inl md => db.saveMarkdownDocstring modName position md + | .inr v => db.saveVersoDocstring modName position v instance : Coe DB SQLite where coe := DB.sqlite @@ -473,30 +689,36 @@ instance : SQLite.QueryParam Lean.Widget.CodeWithInfos where def ensureDb (dbFile : System.FilePath) : IO DB := do let sqlite ← getDb dbFile let deleteModuleStmt ← sqlite.prepare "DELETE FROM modules WHERE name = ?" - let deleteModule modName := do + let deleteModule modName := withTableName "modules" do deleteModuleStmt.bind 1 modName run deleteModuleStmt let saveModuleStmt ← sqlite.prepare "INSERT INTO modules (name, source_url) VALUES (?, ?)" - let saveModule modName sourceUrl? := do + let saveModule modName sourceUrl? := withTableName "modules" do saveModuleStmt.bind 1 modName saveModuleStmt.bind 2 sourceUrl? run saveModuleStmt -- This is INSERT OR IGNORE because the module system often results in multiple imports of the same module (e.g. as meta) let saveImportStmt ← sqlite.prepare "INSERT OR IGNORE INTO module_imports (importer, imported) VALUES (?, ?)" - let saveImport modName imported := do + let saveImport modName imported := withTableName "module_imports" do saveImportStmt.bind 1 modName saveImportStmt.bind 2 imported.toString run saveImportStmt let saveMarkdownDocstringStmt ← sqlite.prepare "INSERT INTO markdown_docstrings (module_name, position, text) VALUES (?, ?, ?)" - let saveMarkdownDocstring modName position text := do + let saveMarkdownDocstring modName position text := withTableName "markdown_docstrings" do saveMarkdownDocstringStmt.bind 1 modName saveMarkdownDocstringStmt.bind 2 position saveMarkdownDocstringStmt.bind 3 text run saveMarkdownDocstringStmt + let saveVersoDocstringStmt ← sqlite.prepare "INSERT INTO verso_docstrings (module_name, position, content) VALUES (?, ?, ?)" + let saveVersoDocstring modName position text := withTableName "verso_docstrings" do + saveVersoDocstringStmt.bind 1 modName + saveVersoDocstringStmt.bind 2 position + saveVersoDocstringStmt.bind 3 text + run saveVersoDocstringStmt let saveDeclarationRangeStmt ← sqlite.prepare "INSERT INTO declaration_ranges (module_name, position, start_line, start_column, start_utf16, end_line, end_column, end_utf16) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := do + let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := withTableName "declaration_ranges" do saveDeclarationRangeStmt.bind 1 modName saveDeclarationRangeStmt.bind 2 position saveDeclarationRangeStmt.bind 3 declRange.pos.line @@ -506,31 +728,34 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDeclarationRangeStmt.bind 7 declRange.endPos.column saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16 run saveDeclarationRangeStmt - let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, doc, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - let saveInfo modName position kind (info : Process.Info) := do + let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?)" + let saveInfo modName position kind (info : Process.Info) := withTableName "name_info" do saveInfoStmt.bind 1 modName saveInfoStmt.bind 2 position saveInfoStmt.bind 3 kind saveInfoStmt.bind 4 info.name.toString saveInfoStmt.bind 5 info.type - saveInfoStmt.bind 6 info.doc - saveInfoStmt.bind 7 info.sorried - saveInfoStmt.bind 8 info.render + saveInfoStmt.bind 6 info.sorried + saveInfoStmt.bind 7 info.render run saveInfoStmt + match info.doc with + | some (.inl md) => saveMarkdownDocstring modName position md + | some (.inr v) => saveVersoDocstring modName position v + | none => pure () let saveAxiomStmt ← sqlite.prepare "INSERT INTO axioms (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveAxiom modName position isUnsafe := do + let saveAxiom modName position isUnsafe := withTableName "axioms" do saveAxiomStmt.bind 1 modName saveAxiomStmt.bind 2 position saveAxiomStmt.bind 3 isUnsafe run saveAxiomStmt let saveOpaqueStmt ← sqlite.prepare "INSERT INTO opaques (module_name, position, safety) VALUES (?, ?, ?)" - let saveOpaque modName position safety := do + let saveOpaque modName position safety := withTableName "opaques" do saveOpaqueStmt.bind 1 modName saveOpaqueStmt.bind 2 position saveOpaqueStmt.bind 3 safety run saveOpaqueStmt let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable) VALUES (?, ?, ?, ?, ?)" - let saveDefinition modName position isUnsafe hints isNonComputable := do + let saveDefinition modName position isUnsafe hints isNonComputable := withTableName "definitions" do saveDefinitionStmt.bind 1 modName saveDefinitionStmt.bind 2 position saveDefinitionStmt.bind 3 isUnsafe @@ -538,75 +763,79 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDefinitionStmt.bind 5 isNonComputable run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" - let saveDefinitionEquation modName position code sequence := do + let saveDefinitionEquation modName position code sequence := withTableName "definition_equations" do saveDefinitionEquationStmt.bind 1 modName saveDefinitionEquationStmt.bind 2 position saveDefinitionEquationStmt.bind 3 code saveDefinitionEquationStmt.bind 4 sequence run saveDefinitionEquationStmt let saveInstanceStmt ← sqlite.prepare "INSERT INTO instances (module_name, position, class_name) VALUES (?, ?, ?)" - let saveInstance modName position className := do + let saveInstance modName position className := withTableName "instances" do saveInstanceStmt.bind 1 modName saveInstanceStmt.bind 2 position saveInstanceStmt.bind 3 className run saveInstanceStmt let saveInstanceArgStmt ← sqlite.prepare "INSERT INTO instance_args (module_name, position, sequence, type_name) VALUES (?, ?, ?, ?)" - let saveInstanceArg modName position sequence typeName := do + let saveInstanceArg modName position sequence typeName := withTableName "instance_args" do saveInstanceArgStmt.bind 1 modName saveInstanceArgStmt.bind 2 position saveInstanceArgStmt.bind 3 sequence saveInstanceArgStmt.bind 4 typeName run saveInstanceArgStmt let saveInductiveStmt ← sqlite.prepare "INSERT INTO inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveInductive modName position isUnsafe := do + let saveInductive modName position isUnsafe := withTableName "inductives" do saveInductiveStmt.bind 1 modName saveInductiveStmt.bind 2 position saveInductiveStmt.bind 3 isUnsafe run saveInductiveStmt let saveConstructorStmt ← sqlite.prepare "INSERT INTO constructors (module_name, position, type_position) VALUES (?, ?, ?)" - let saveConstructor modName position typePosition := do + let saveConstructor modName position typePosition := withTableName "constructors" do saveConstructorStmt.bind 1 modName saveConstructorStmt.bind 2 position saveConstructorStmt.bind 3 typePosition run saveConstructorStmt let saveClassInductiveStmt ← sqlite.prepare "INSERT INTO class_inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveClassInductive modName position isUnsafe := do + let saveClassInductive modName position isUnsafe := withTableName "class_inductives" do saveClassInductiveStmt.bind 1 modName saveClassInductiveStmt.bind 2 position saveClassInductiveStmt.bind 3 isUnsafe run saveClassInductiveStmt let saveStructureStmt ← sqlite.prepare "INSERT INTO structures (module_name, position, is_class) VALUES (?, ?, ?)" - let saveStructure modName position isClass := do + let saveStructure modName position isClass := withTableName "structures" do saveStructureStmt.bind 1 modName saveStructureStmt.bind 2 position saveStructureStmt.bind 3 isClass run saveStructureStmt - let saveStructureConstructorStmt ← sqlite.prepare "INSERT INTO structure_constructors (module_name, position, name, type, doc) VALUES (?, ?, ?, ?, ?)" - let saveStructureConstructor modName position name type doc := do + let saveStructureConstructorStmt ← sqlite.prepare "INSERT INTO structure_constructors (module_name, position, ctor_position, name, type) VALUES (?, ?, ?, ?, ?)" + let saveStructureConstructor modName position ctorPos info := withTableName "structure_constructors" do saveStructureConstructorStmt.bind 1 modName saveStructureConstructorStmt.bind 2 position - saveStructureConstructorStmt.bind 3 name - saveStructureConstructorStmt.bind 4 type - saveStructureConstructorStmt.bind 5 doc + saveStructureConstructorStmt.bind 3 ctorPos + saveStructureConstructorStmt.bind 4 info.name.toString + saveStructureConstructorStmt.bind 5 info.type run saveStructureConstructorStmt + match info.doc with + | some (.inl md) => saveMarkdownDocstring modName ctorPos md + | some (.inr v) => saveVersoDocstring modName ctorPos v + | none => pure () + let saveStructureParentStmt ← sqlite.prepare "INSERT INTO structure_parents (module_name, position, sequence, projection_fn, type) VALUES (?, ?, ?, ?, ?)" - let saveStructureParent modName position sequence projectionFn type := do + let saveStructureParent modName position sequence projectionFn type := withTableName "structure_parents" do saveStructureParentStmt.bind 1 modName saveStructureParentStmt.bind 2 position saveStructureParentStmt.bind 3 sequence saveStructureParentStmt.bind 4 projectionFn saveStructureParentStmt.bind 5 type run saveStructureParentStmt - let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, name, type, doc, render, is_direct) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - let saveStructureField modName position sequence name type doc render isDirect := do + let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, name, type, render, is_direct) VALUES (?, ?, ?, ?, ?, ?, ?)" + let saveStructureField modName position sequence name type render isDirect := withTableName "structure_fields" do saveStructureFieldStmt.bind 1 modName saveStructureFieldStmt.bind 2 position saveStructureFieldStmt.bind 3 sequence saveStructureFieldStmt.bind 4 name saveStructureFieldStmt.bind 5 type - saveStructureFieldStmt.bind 6 doc - saveStructureFieldStmt.bind 7 render - saveStructureFieldStmt.bind 8 isDirect + saveStructureFieldStmt.bind 6 render + saveStructureFieldStmt.bind 7 isDirect run saveStructureFieldStmt pure { sqlite, @@ -614,6 +843,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveModule, saveImport, saveMarkdownDocstring, + saveVersoDocstring, saveDeclarationRange, saveInfo, saveAxiom, @@ -631,8 +861,6 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructureField } - - end DB open DB @@ -706,15 +934,18 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( where saveStructureInfo (isClass : Bool) (info : Process.StructureInfo) (db : DB) (modName : String) (pos : Int64) : StateT Int64 IO Unit := do db.saveStructure modName pos isClass - db.saveStructureConstructor modName pos info.ctor.name.toString info.ctor.type info.ctor.doc + modify (· + 1) + db.saveStructureConstructor modName pos (← get) info.ctor let mut seq : Int32 := 0 for parent in info.parents do db.saveStructureParent modName pos seq parent.projFn.toString parent.type seq := seq + 1 + modify (· + 1) for field in info.fieldInfo do let fpos ← get modify (· + 1) - db.saveStructureField modName pos fpos field.name.toString field.type field.doc field.render field.isDirect + db.saveStructureField modName pos fpos field.name.toString field.type field.render field.isDirect + if let some doc := field.doc then db.saveDocstring modName fpos doc infoKind : Process.DocInfo → String | .axiomInfo _ => "axiom" diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index 92222fad..8eb482ad 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -258,7 +258,11 @@ partial def findAllReferences (refsMap : Std.HashMap String BibItem) (s : String ret /-- Convert docstring to Html. -/ -def docStringToHtml (docString : String) (funName : String) : HtmlM (Array Html) := do +def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : HtmlM (Array Html) := do + let docString := + match docString with + | .inl md => md + | .inr v => toMarkdown v let refsMarkdown := "\n\n" ++ (String.join <| (findAllReferences (← read).refsMap docString).toList.map fun s => s!"[{s}]: references.html#ref_{s}\n") @@ -276,6 +280,13 @@ def docStringToHtml (docString : String) (funName : String) : HtmlM (Array Html) | .none => addError <| "Error: failed to parse markdown:\n" ++ docString return #[.raw "Error: failed to parse markdown: ", .text docString] - +where + -- TODO: natively render Verso docstrings + toMarkdown : VersoDocString → String + | .mk bs ps => Doc.MarkdownM.run' do + for b in bs do + Doc.ToMarkdown.toMarkdown b + for p in ps do + Doc.ToMarkdown.toMarkdown p end Output end DocGen4 diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index f53ad9cd..2fda2e44 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -117,7 +117,7 @@ as HTML. def modDocToHtml (mdoc : ModuleDoc) : HtmlM Html := do pure
- [← docStringToHtml mdoc.doc ""] + [← docStringToHtml (.inl mdoc.doc) ""]
/-- diff --git a/DocGen4/Output/ToJson.lean b/DocGen4/Output/ToJson.lean index 294ba6bb..5dd0e23e 100644 --- a/DocGen4/Output/ToJson.lean +++ b/DocGen4/Output/ToJson.lean @@ -120,7 +120,7 @@ def JsonIndex.addModule (index : JsonIndex) (module : JsonModule) : BaseHtmlM Js def DocInfo.toJson (sourceLinker : Option DeclarationRange → String) (info : Process.DocInfo) : HtmlM JsonDeclaration := do let name := info.getName.toString let kind := info.getKind - let doc := info.getDocString.getD "" + let doc := info.getMarkdownDocString.getD "" let docLink ← declNameToLink info.getName let sourceLink := sourceLinker info.getDeclarationRange let line := info.getDeclarationRange.pos.line diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index c6f1f398..6c0802d2 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -73,9 +73,9 @@ def getName : ModuleMember → Name | docInfo i => i.getName | modDoc _ => Name.anonymous -def getDocString : ModuleMember → Option String +def getDocString : ModuleMember → Option (String ⊕ VersoDocString) | docInfo i => i.getDocString -| modDoc i => i.doc +| modDoc i => some (.inl i.doc) def shouldRender : ModuleMember → Bool | docInfo i => i.shouldRender diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 00f14207..0b4d633a 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -29,7 +29,7 @@ structure NameInfo where /-- The doc string of the name if it exists. -/ - doc : Option String + doc : Option (String ⊕ VersoDocString) deriving Inhabited /-- diff --git a/DocGen4/Process/DocInfo.lean b/DocGen4/Process/DocInfo.lean index cd8e9954..5d5ee24a 100644 --- a/DocGen4/Process/DocInfo.lean +++ b/DocGen4/Process/DocInfo.lean @@ -105,7 +105,7 @@ def getAttrs : DocInfo → Array String | classInductiveInfo i => i.attrs | ctorInfo i => i.attrs -def getDocString : DocInfo → Option String +def getDocString : DocInfo → Option (String ⊕ VersoDocString) | axiomInfo i => i.doc | theoremInfo i => i.doc | opaqueInfo i => i.doc @@ -117,6 +117,18 @@ def getDocString : DocInfo → Option String | classInductiveInfo i => i.doc | ctorInfo i => i.doc +def getMarkdownDocString (i : DocInfo) : Option String := + i.getDocString.map fun + | .inl md => md + | .inr v => toMarkdown v +where + toMarkdown : VersoDocString → String + | .mk bs ps => Doc.MarkdownM.run' do + for b in bs do + Doc.ToMarkdown.toMarkdown b + for p in ps do + Doc.ToMarkdown.toMarkdown p + def shouldRender : DocInfo → Bool | axiomInfo i => i.render | theoremInfo i => i.render diff --git a/DocGen4/Process/NameInfo.lean b/DocGen4/Process/NameInfo.lean index 8946fc7c..96f25e8a 100644 --- a/DocGen4/Process/NameInfo.lean +++ b/DocGen4/Process/NameInfo.lean @@ -11,9 +11,63 @@ import DocGen4.Process.Attributes namespace DocGen4.Process open Lean Meta +open Lean.Parser.Tactic.Doc in +/-- Gets the rendered extensions for the given canonical tactic name as Verso content -/ +def getTacticExtensionText (env : Environment) (tactic : Name) : Option (Doc.Block ElabInline ElabBlock) := + let exts := getTacticExtensions env tactic + if exts.size == 0 then none + else + some <| .concat #[ + .para #[.text "Extensions:"], + .ul <| exts.map (⟨#[.para #[.text ·]]⟩) + ] + +open Lean.Parser.Term.Doc in +/-- +Renders the recommended spellings for the given declaration into Verso content for appending to +the docstring. +-/ +def getRecommendedSpellingText (env : Environment) (declName : Name) : Option (Doc.Block ElabInline ElabBlock) := Id.run do + let spellings := getRecommendedSpellingsForName env declName + if spellings.size == 0 then none + else some <| .concat #[ + .para #[.text "Conventions for notations in identifiers:"], + .ul (spellings.map bullet) + ] +where + bullet (spelling : RecommendedSpelling) : Doc.ListItem (Doc.Block ElabInline ElabBlock) := + let firstLine : Array (Doc.Inline ElabInline) := #[ + .text "The recommended spelling of ", + .code spelling.«notation», + .text " in identifiers is ", + .code spelling.recommendedSpelling + ] + let additionalInfoLines := spelling.additionalInformation?.map (·.split '\n' |>.toStringList) + .mk <| (#[.para ·]) <| match additionalInfoLines with + | none | some [] => firstLine ++ #[.text ".", .linebreak "\n", .linebreak "\n"] + | some [l] => firstLine ++ #[.text s!" ({l.trimAsciiEnd}).", .linebreak "\n", .linebreak "\n"] + | some ls => firstLine ++ #[.text ".", .linebreak "\n", .linebreak "\n", .text (String.join ls), .linebreak "\n", .linebreak "\n"] + + +open Lean.Parser.Tactic.Doc in +open Lean.Parser.Term.Doc in +def getDocString? (env : Environment) (name : Name) : IO (Option (String ⊕ VersoDocString)) := do + let name := alternativeOfTactic env name |>.getD name + match (← findInternalDocString? env name) with + | none => return none + | some (.inl markdown) => + let exts := getTacticExtensionString env name + let spellings := getRecommendedSpellingString env name + return some <| .inl <| markdown ++ exts ++ spellings + | some (.inr verso) => + let exts := getTacticExtensionText env name |>.map (#[·]) |>.getD #[] + let spellings := getRecommendedSpellingText env name |>.map (#[·]) |>.getD #[] + return some <| .inr <| { verso with text := verso.text ++ exts ++ spellings } + + def NameInfo.ofTypedName (n : Name) (t : Expr) : MetaM NameInfo := do let env ← getEnv - return { name := n, type := ← prettyPrintTerm t, doc := ← findDocString? env n} + return { name := n, type := ← prettyPrintTerm t, doc := ← getDocString? env n} /-- Pretty prints a `Lean.Parser.Term.bracketedBinder`. @@ -66,7 +120,7 @@ def Info.ofTypedName (n : Name) (t : Expr) : MetaM Info := do -- TODO: Maybe selection range is more relevant? Figure this out in the future | some range => return { - toNameInfo := { name := n, type, doc := ← findDocString? (← getEnv) n}, + toNameInfo := { name := n, type, doc := ← getDocString? (← getEnv) n}, args, declarationRange := range.range, attrs := ← getAllAttributes n From ec4cf3e501b76c1dd1915e87c7a2b7e03ede95d9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 27 Jan 2026 06:43:07 +0100 Subject: [PATCH 009/106] chore: render code to HTML via RenderedCode This is the first step towards rendering HTML from the DB instead of directly. The serializable version of CodeWithInfos used here can be saved in the DB. The generated HTML is the same, modulo commit hashes and external URLs. --- DocGen4/DB.lean | 218 +--------------------------------- DocGen4/Output/Base.lean | 93 +++++++-------- DocGen4/RenderedCode.lean | 242 ++++++++++++++++++++++++++++++++++++++ lake-manifest.json | 2 +- 4 files changed, 289 insertions(+), 266 deletions(-) create mode 100644 DocGen4/RenderedCode.lean diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index a63a4336..3643437c 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1,4 +1,5 @@ import DocGen4.Process +import DocGen4.RenderedCode import SQLite namespace DocGen4.DB @@ -201,223 +202,6 @@ instance : FromBinary VersoDocString where instance : SQLite.QueryParam VersoDocString := .asBlob -end -section -open Lean Widget Elab -open SQLite.Blob - -inductive SortFormer where - | type | prop | sort -deriving ToJson, FromJson, BEq, Hashable, Repr - -inductive RenderedCode.Tag where - | keyword - | string - | const (name : Lean.Name) - | sort (former : Option SortFormer) -deriving BEq, Hashable, Repr - -instance : ToBinary RenderedCode.Tag where - serializer - | .keyword, b => b.push 0 - | .string, b => b.push 1 - | .const n, b => b.push 2 |> ToBinary.serializer n - | .sort none, b => b.push 3 - | .sort (some .type), b => b.push 4 - | .sort (some .prop), b => b.push 5 - | .sort (some .sort), b => b.push 6 - -instance : FromBinary RenderedCode.Tag where - deserializer := do - match (← .byte) with - | 0 => return .keyword - | 1 => return .string - | 2 => .const <$> FromBinary.deserializer - | 3 => return .sort none - | 4 => return .sort (some .type) - | 5 => return .sort (some .prop) - | 6 => return .sort (some .sort) - | other => throw s!"Expected 0...7 for `Tag`, got {other}" - -partial instance [ToBinary α] : ToBinary (Lean.Widget.TaggedText α) where - serializer := go -where - go - | .text s, b => b.push 0 |> ToBinary.serializer s - | .tag a t, b => b.push 1 |> ToBinary.serializer a |> go t - | .append xs, b => - have : ToBinary (Lean.Widget.TaggedText α) := ⟨go⟩ - b.push 2 |> ToBinary.serializer xs - -partial instance [FromBinary α] : FromBinary (Lean.Widget.TaggedText α) where - deserializer := go -where - go := do - match (← .byte) with - | 0 => .text <$> FromBinary.deserializer - | 1 => .tag <$> FromBinary.deserializer <*> go - | 2 => - have : FromBinary (Lean.Widget.TaggedText α) := ⟨go⟩ - .append <$> FromBinary.deserializer - | other => throw s!"Expected 0...3 for `TaggedText`, got {other}" - -def RenderedCode := Lean.Widget.TaggedText RenderedCode.Tag -deriving Inhabited, BEq, Repr, ToBinary, FromBinary - -def RenderedCode.empty : RenderedCode := .append #[] - -open Lean.Widget in -mutual -partial def RenderedCode.pushRight (xs : Array RenderedCode) (x : RenderedCode) : Array RenderedCode := - if xs.size = 0 then #[x] - else xs.modify (xs.size - 1) (·.appendImpl x) - -partial def RenderedCode.pushLeft (x : RenderedCode) (xs : Array RenderedCode) : Array RenderedCode := - if xs.size = 0 then #[x] - else xs.modify 0 x.appendImpl - -partial def RenderedCode.appendImpl : RenderedCode → RenderedCode → RenderedCode - | .text "", x => x - | x, .text "" => x - | .append #[], x => x - | x, .append #[] => x - | .append xs, .append ys => .append (xs ++ ys) - | .append xs, y => .append (pushRight xs y) - | x, .append ys => .append (pushLeft x ys) - | .text x, .text y => .text (x ++ y) - | x, y => .append #[x, y] -end - -instance : Append RenderedCode := ⟨RenderedCode.appendImpl⟩ - -/-- -In Lean syntax declarations the following pattern is quite common: -``` -syntax term " + " term : term -``` -that is, we place spaces around the operator in the middle. When the -`InfoTree` framework provides us with information about what source token -corresponds to which identifier it will thus say that `" + "` corresponds to -`HAdd.hadd`. This is however not the way we want this to be linked, in the HTML -only `+` should be linked, taking care of this is what this function is -responsible for. --/ --- TODO dedup with original location -def splitWhitespaces (s : String) : String × String × String := - let length := s.length - let s := s.trimAsciiStart - let front := "".pushn ' ' (length - s.positions.count) - let length := s.positions.count - let s := s.trimAsciiEnd.copy - let back := "".pushn ' ' (length - s.length) - (front, s, back) - -def findWs (s : String.Slice) : s.Pos := go s.startPos -where - go (i : s.Pos) : s.Pos := - if h : i = s.endPos then i - else if (i.get h).isWhitespace then go (i.next h) - else i - termination_by i - --- This doesn't fail on malformed strings because it's better to give the user some feedback than --- none here. This tokenization is just to highlight keywords correctly. -def findString (s : String.Slice) : s.Pos := start s.startPos -where - start (i : s.Pos) : s.Pos := - if h : i = s.endPos then i - else if (i.get h) == '"' then contents (i.next h) - else i - contents (i : s.Pos) : s.Pos := - if h : i = s.endPos then i - else if (i.get h) == '\\' then escape (i.next h) - else if (i.get h) == '"' then i.next h - else contents (i.next h) - termination_by i - escape (i : s.Pos) : s.Pos := - if h : i = s.endPos then i - else contents (i.next h) - termination_by i - -def findOther (s : String.Slice) : s.Pos := go s.startPos -where - go (i : s.Pos) : s.Pos := - if h : i = s.endPos then i - else - let c := i.get h - if c == '"' then i - else if c.isWhitespace then i - else go (i.next h) - termination_by i - -def tokenize (txt : String) : RenderedCode := Id.run do - let mut todo := txt.drop 0 - let mut toks : RenderedCode := .empty - while !todo.isEmpty do - if todo.startsWith Char.isWhitespace then - let i := findWs todo - let ws := todo.sliceTo i - todo := todo.sliceFrom i - toks := toks ++ .text ws.copy - continue - else if todo.startsWith '"' then - let i := findString todo - let str := todo.sliceTo i - todo := todo.sliceFrom i - toks := toks ++ .tag .string (.text str.copy) - else - let i := findOther todo - let tok := todo.sliceTo i - todo := todo.sliceFrom i - let tok := tok.copy - if tok ∈ kws then - toks := toks ++ .tag .keyword (.text tok) - else - toks := toks ++ .text tok - continue - return toks -where - tokenEnder (str : String.Slice) : Bool := str.front?.map Char.isAlphanum |>.getD true - kws := ["let", "fun", "do", "match", "with", "if", "then", "else", "break", "continue", "for", "in", "mut"] - -partial def renderTagged - (doc : CodeWithInfos) : - RenderedCode := Id.run do - match doc with - | .text txt => - return tokenize txt - | .tag i t => - let {ctx := _, info, children := _} := i.info.val - match info with - | .ofTermInfo termInfo => - match termInfo.expr with - | .const n _ => - -- TODO replicate blacklist logic - match t with - | .text t => - let (front, t, back) := splitWhitespaces t - return .append #[.text front, .tag (.const n) (.text t), .text back] - | _ => - .tag (.const n) <$> renderTagged t - | .sort _u => - match t with - | .text t => - let sortPrefix :: rest := t.splitOn " " | unreachable! - let sortFormer := match sortPrefix with - | "Type" => some .type - | "Prop" => some .prop - | "Sort" => some .sort - | _ => none - let mut restStr := String.intercalate " " rest - if restStr.length != 0 then - restStr := " " ++ restStr - return .append #[.tag (.sort sortFormer) (.text sortPrefix), .text restStr] - | _ => - .tag (.sort none) <$> renderTagged t - | _ => renderTagged t - | _ => renderTagged t - | .append xs => xs.mapM renderTagged <&> (·.foldl (init := .empty) (· ++ ·)) - end def getDb (dbFile : System.FilePath) : IO SQLite := do diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index c964892f..42795b65 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -5,6 +5,7 @@ Authors: Henrik Böving -/ import DocGen4.Process import DocGen4.Output.ToHtmlFormat +import DocGen4.RenderedCode namespace DocGen4.Output @@ -280,64 +281,60 @@ def splitWhitespaces (s : String) : String × String × String := (front, s, back) /-- -Implementation for `infoFormatToHtml`. - -Returns (1) whether the HTML contains an anchor tag and (2) the resulting HTML. +Convert RenderedCode to HTML with declaration links. +Returns (hasAnchor, html) where hasAnchor indicates if the result contains an anchor tag. +This is used to avoid creating nested anchors (invalid HTML). -/ -private partial def infoFormatToHtmlAux (i : CodeWithInfos) : HtmlM (Bool × Array Html) := do - match i with +partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array Html) := do + match code with | .text t => return (false, #[t]) - | .append tt => tt.foldlM (fun (a?, acc) t => do - let (a?', acc') ← infoFormatToHtmlAux t - return (a? || a?', acc ++ acc')) (false, #[]) - | .tag a t => - match a.info.val.info with - | Info.ofTermInfo i => - let cleanExpr := i.expr.consumeMData - match cleanExpr with - | .const name _ => - -- TODO: this is some very primitive blacklisting but real Blacklisting needs MetaM - -- find a better solution - if (← getResult).name2ModIdx.contains name then - match t with - | .text t => - let (front, t, back) := splitWhitespaces t - let elem := {t} - return (true, #[Html.text front, elem, Html.text back]) - | _ => - toHtmlMaybeLink t (← declNameToLink name) + | .append xs => + xs.foldlM (init := (false, #[])) fun (a?, acc) t => do + let (a?', acc') ← renderedCodeToHtmlAux t + pure (a? || a?', acc ++ acc') + | .tag tag inner => + let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner + match tag with + | .const name => + if (← getResult).name2ModIdx.contains name then + let link ← declNameToLink name + -- Avoid nested anchors: if inner content already has anchors, don't wrap again + -- Match original behavior: no fn wrapper when const is in name2ModIdx + if innerHasAnchor then + return (true, innerHtml) else - toHtmlWrapFn t - | .sort _ => - match t with - | .text t => - let sortPrefix :: rest := t.splitOn " " | unreachable! - let sortLink := {sortPrefix} - let mut restStr := String.intercalate " " rest - if restStr.length != 0 then - restStr := " " ++ restStr - return (true, #[sortLink, Html.text restStr]) - | _ => - toHtmlMaybeLink t s!"{← getRoot}foundational_types.html" - | _ => toHtmlWrapFn t - | _ => toHtmlWrapFn t + return (true, #[[innerHtml]]) + else + return (innerHasAnchor, fn innerHtml) + | .sort _ => + let link := s!"{← getRoot}foundational_types.html" + -- Avoid nested anchors + -- Match original behavior: no fn wrapper when creating sort link + if innerHasAnchor then + return (true, innerHtml) + else + return (true, #[[innerHtml]]) + -- For Phase 1 compatibility: treat keyword/string as plain content (no extra styling) + -- This matches the original infoFormatToHtml behavior + | .keyword => return (innerHasAnchor, innerHtml) + | .string => return (innerHasAnchor, innerHtml) + | .otherExpr => return (innerHasAnchor, fn innerHtml) where - toHtmlWrapFn (t : TaggedText SubexprInfo) : HtmlM (Bool × Array Html) := do - let (a?, acc) ← infoFormatToHtmlAux t - return (a?, #[[acc]]) - toHtmlMaybeLink (t : TaggedText SubexprInfo) (link : String) : HtmlM (Bool × Array Html) := do - let (a?, acc) ← infoFormatToHtmlAux t - if a? then - return (true, acc) - else - return (true, #[[acc]]) + fn (html : Array Html) : Array Html := #[[html]] + +/-- +Convert RenderedCode to HTML with declaration links. +-/ +def renderedCodeToHtml (code : RenderedCode) : HtmlM (Array Html) := + Prod.snd <$> renderedCodeToHtmlAux code /- Turns a `CodeWithInfos` object, that is basically a Lean syntax tree with information about what the identifiers mean, into an HTML object that links to as much information as possible. -/ -def infoFormatToHtml (i : CodeWithInfos) : HtmlM (Array Html) := Prod.snd <$> infoFormatToHtmlAux i +def infoFormatToHtml (i : CodeWithInfos) : HtmlM (Array Html) := + renderedCodeToHtml (renderTagged i) def baseHtmlHeadDeclarations : BaseHtmlM (Array Html) := do return #[ diff --git a/DocGen4/RenderedCode.lean b/DocGen4/RenderedCode.lean new file mode 100644 index 00000000..4a7ea58c --- /dev/null +++ b/DocGen4/RenderedCode.lean @@ -0,0 +1,242 @@ +/- +Copyright (c) 2021 Henrik Böving. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +import Lean +import SQLite + +namespace DocGen4 + +open Lean Widget Elab +open SQLite.Blob + +/-- +Used in `RenderedCode.Tag` to track what kind of sort this is. +-/ +inductive SortFormer where + | type | prop | sort +deriving ToJson, FromJson, BEq, Hashable, Repr + +/-- +Tags for code elements in rendered code. Used to indicate semantic meaning +for HTML rendering (linking, syntax highlighting). +-/ +inductive RenderedCode.Tag where + | keyword + | string + | const (name : Lean.Name) + | sort (former : Option SortFormer) + | otherExpr +deriving BEq, Hashable, Repr + +instance : ToBinary RenderedCode.Tag where + serializer + | .keyword, b => b.push 0 + | .string, b => b.push 1 + | .const n, b => b.push 2 |> ToBinary.serializer n + | .sort none, b => b.push 3 + | .sort (some .type), b => b.push 4 + | .sort (some .prop), b => b.push 5 + | .sort (some .sort), b => b.push 6 + | .otherExpr, b => b.push 7 + +instance : FromBinary RenderedCode.Tag where + deserializer := do + match (← .byte) with + | 0 => return .keyword + | 1 => return .string + | 2 => .const <$> FromBinary.deserializer + | 3 => return .sort none + | 4 => return .sort (some .type) + | 5 => return .sort (some .prop) + | 6 => return .sort (some .sort) + | 7 => return .otherExpr + | other => throw s!"Expected 0...8 for `Tag`, got {other}" + +partial instance [ToBinary α] : ToBinary (Lean.Widget.TaggedText α) where + serializer := go +where + go + | .text s, b => b.push 0 |> ToBinary.serializer s + | .tag a t, b => b.push 1 |> ToBinary.serializer a |> go t + | .append xs, b => + have : ToBinary (Lean.Widget.TaggedText α) := ⟨go⟩ + b.push 2 |> ToBinary.serializer xs + +partial instance [FromBinary α] : FromBinary (Lean.Widget.TaggedText α) where + deserializer := go +where + go := do + match (← .byte) with + | 0 => .text <$> FromBinary.deserializer + | 1 => .tag <$> FromBinary.deserializer <*> go + | 2 => + have : FromBinary (Lean.Widget.TaggedText α) := ⟨go⟩ + .append <$> FromBinary.deserializer + | other => throw s!"Expected 0...3 for `TaggedText`, got {other}" + +/-- +A simplified representation of code with semantic tags for rendering. +Unlike `CodeWithInfos`, this only contains the information needed for HTML rendering +(links to declarations, syntax highlighting) and can be serialized to/from the database. +-/ +def RenderedCode := Lean.Widget.TaggedText RenderedCode.Tag +deriving Inhabited, BEq, Repr, ToBinary, FromBinary + +def RenderedCode.empty : RenderedCode := .append #[] + +open Lean.Widget in +mutual +partial def RenderedCode.pushRight (xs : Array RenderedCode) (x : RenderedCode) : Array RenderedCode := + if xs.size = 0 then #[x] + else xs.modify (xs.size - 1) (·.appendImpl x) + +partial def RenderedCode.pushLeft (x : RenderedCode) (xs : Array RenderedCode) : Array RenderedCode := + if xs.size = 0 then #[x] + else xs.modify 0 x.appendImpl + +partial def RenderedCode.appendImpl : RenderedCode → RenderedCode → RenderedCode + | .text "", x => x + | x, .text "" => x + | .append #[], x => x + | x, .append #[] => x + | .append xs, .append ys => .append (xs ++ ys) + | .append xs, y => .append (pushRight xs y) + | x, .append ys => .append (pushLeft x ys) + | .text x, .text y => .text (x ++ y) + | x, y => .append #[x, y] +end + +instance : Append RenderedCode := ⟨RenderedCode.appendImpl⟩ + +/-- +In Lean syntax declarations the following pattern is quite common: +``` +syntax term " + " term : term +``` +that is, we place spaces around the operator in the middle. When the +`InfoTree` framework provides us with information about what source token +corresponds to which identifier it will thus say that `" + "` corresponds to +`HAdd.hadd`. This is however not the way we want this to be linked, in the HTML +only `+` should be linked, taking care of this is what this function is +responsible for. +-/ +def splitWhitespaces (s : String) : String × String × String := + let length := s.length + let s := s.trimAsciiStart + let front := "".pushn ' ' (length - s.positions.count) + let length := s.positions.count + let s := s.trimAsciiEnd.copy + let back := "".pushn ' ' (length - s.length) + (front, s, back) + +private def findWs (s : String.Slice) : s.Pos := go s.startPos +where + go (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h).isWhitespace then go (i.next h) + else i + termination_by i + +-- This doesn't fail on malformed strings because it's better to give the user some feedback than +-- none here. This tokenization is just to highlight keywords correctly. +private def findString (s : String.Slice) : s.Pos := start s.startPos +where + start (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h) == '"' then contents (i.next h) + else i + contents (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else if (i.get h) == '\\' then escape (i.next h) + else if (i.get h) == '"' then i.next h + else contents (i.next h) + termination_by i + escape (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else contents (i.next h) + termination_by i + +private def findOther (s : String.Slice) : s.Pos := go s.startPos +where + go (i : s.Pos) : s.Pos := + if h : i = s.endPos then i + else + let c := i.get h + if c == '"' then i + else if c.isWhitespace then i + else go (i.next h) + termination_by i + +private def tokenize (txt : String) : RenderedCode := Id.run do + let mut todo := txt.drop 0 + let mut toks : RenderedCode := .empty + while !todo.isEmpty do + if todo.startsWith Char.isWhitespace then + let i := findWs todo + let ws := todo.sliceTo i + todo := todo.sliceFrom i + toks := toks ++ .text ws.copy + continue + else if todo.startsWith '"' then + let i := findString todo + let str := todo.sliceTo i + todo := todo.sliceFrom i + toks := toks ++ .tag .string (.text str.copy) + else + let i := findOther todo + let tok := todo.sliceTo i + todo := todo.sliceFrom i + let tok := tok.copy + if tok ∈ kws then + toks := toks ++ .tag .keyword (.text tok) + else + toks := toks ++ .text tok + continue + return toks +where + tokenEnder (str : String.Slice) : Bool := str.front?.map Char.isAlphanum |>.getD true + kws := ["let", "fun", "do", "match", "with", "if", "then", "else", "break", "continue", "for", "in", "mut"] + +/-- +Convert `CodeWithInfos` (from Lean's pretty printer) to `RenderedCode` +by extracting only the information needed for HTML rendering. +-/ +partial def renderTagged (doc : CodeWithInfos) : RenderedCode := Id.run do + match doc with + | .text txt => + return tokenize txt + | .tag i t => + let {ctx := _, info, children := _} := i.info.val + match info with + | .ofTermInfo termInfo => + match termInfo.expr with + | .const n _ => + -- TODO replicate blacklist logic + match t with + | .text t => + let (front, t, back) := splitWhitespaces t + return .append #[.text front, .tag (.const n) (.text t), .text back] + | _ => + .tag (.const n) <$> renderTagged t + | .sort _u => + match t with + | .text t => + let sortPrefix :: rest := t.splitOn " " | unreachable! + let sortFormer := match sortPrefix with + | "Type" => some .type + | "Prop" => some .prop + | "Sort" => some .sort + | _ => none + let mut restStr := String.intercalate " " rest + if restStr.length != 0 then + restStr := " " ++ restStr + return .append #[.tag (.sort sortFormer) (.text sortPrefix), .text restStr] + | _ => + .tag (.sort none) <$> renderTagged t + | _ => .tag .otherExpr <$> renderTagged t + | _ => .tag .otherExpr <$> renderTagged t + | .append xs => xs.mapM renderTagged <&> (·.foldl (init := .empty) (· ++ ·)) + +end DocGen4 diff --git a/lake-manifest.json b/lake-manifest.json index 2e2b7ebc..7e91a092 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -25,7 +25,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "cff8377dbe50aae42cbd04213d5b3dacf742c3ba", + "rev": "8668e1ab7c987fb8ed1349f14c3b7b60bd5f27b6", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "main", From 9489cfdbb5de4dedc53723ebf36f6ef751ebbc21 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 27 Jan 2026 07:55:37 +0100 Subject: [PATCH 010/106] refactor: save RenderedCode instead of CodeWithInfos This is preliminary to generating HTML from the database. The output is still unchanged, modulo commit hashes and source URLs. --- DocGen4/DB.lean | 16 ++++++++-------- DocGen4/Output/Arg.lean | 2 +- DocGen4/Output/Definition.lean | 4 ++-- DocGen4/Output/Inductive.lean | 4 ++-- DocGen4/Output/Module.lean | 4 ++-- DocGen4/Output/Structure.lean | 4 ++-- DocGen4/Process/Base.lean | 15 ++++++++------- DocGen4/Process/DefinitionInfo.lean | 6 +++--- DocGen4/Process/DocInfo.lean | 2 +- DocGen4/Process/NameInfo.lean | 9 +++++---- 10 files changed, 34 insertions(+), 32 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 3643437c..8e1a9db7 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -423,7 +423,7 @@ structure DB where saveAxiom (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveOpaque (modName : String) (position : Int64) (safety : Lean.DefinitionSafety) : IO Unit saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) : IO Unit - saveDefinitionEquation (modName : String) (position : Int64) (code : Lean.Widget.CodeWithInfos) (sequence : Int64) : IO Unit + saveDefinitionEquation (modName : String) (position : Int64) (code : RenderedCode) (sequence : Int64) : IO Unit saveInstance (modName : String) (position : Int64) (className : String) : IO Unit saveInstanceArg (modName : String) (position : Int64) (sequence : Int64) (typeName : String) : IO Unit saveInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit @@ -431,8 +431,8 @@ structure DB where saveClassInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveStructure (modName : String) (position : Int64) (isClass : Bool) : IO Unit saveStructureConstructor (modName : String) (position : Int64) (ctorPos : Int64) (info : Process.NameInfo) : IO Unit - saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : Lean.Widget.CodeWithInfos) : IO Unit - saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : Lean.Widget.CodeWithInfos) (render : Bool) (isDirect : Bool) : IO Unit + saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : RenderedCode) : IO Unit + saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : RenderedCode) (render : Bool) (isDirect : Bool) : IO Unit def DB.saveDocstring (db : DB) (modName : String) (position : Int64) (text : String ⊕ Lean.VersoDocString) : IO Unit := match text with @@ -465,9 +465,9 @@ instance : SQLite.QueryParam Lean.ReducibilityHints where | .regular i => SQLite.QueryParam.bind stmt index i.toNat.toInt64 open SQLite.Blob in -instance : SQLite.QueryParam Lean.Widget.CodeWithInfos where +instance : SQLite.QueryParam RenderedCode where bind stmt index code := Id.run do - let str := ToBinary.serializer (renderTagged code) .empty + let str := ToBinary.serializer code .empty SQLite.QueryParam.bind stmt index str def ensureDb (dbFile : System.FilePath) : IO DB := do @@ -547,7 +547,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDefinitionStmt.bind 5 isNonComputable run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" - let saveDefinitionEquation modName position code sequence := withTableName "definition_equations" do + let saveDefinitionEquation modName position (code : RenderedCode) sequence := withTableName "definition_equations" do saveDefinitionEquationStmt.bind 1 modName saveDefinitionEquationStmt.bind 2 position saveDefinitionEquationStmt.bind 3 code @@ -604,7 +604,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do | none => pure () let saveStructureParentStmt ← sqlite.prepare "INSERT INTO structure_parents (module_name, position, sequence, projection_fn, type) VALUES (?, ?, ?, ?, ?)" - let saveStructureParent modName position sequence projectionFn type := withTableName "structure_parents" do + let saveStructureParent modName position sequence projectionFn (type : RenderedCode) := withTableName "structure_parents" do saveStructureParentStmt.bind 1 modName saveStructureParentStmt.bind 2 position saveStructureParentStmt.bind 3 sequence @@ -612,7 +612,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructureParentStmt.bind 5 type run saveStructureParentStmt let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, name, type, render, is_direct) VALUES (?, ?, ?, ?, ?, ?, ?)" - let saveStructureField modName position sequence name type render isDirect := withTableName "structure_fields" do + let saveStructureField modName position sequence name (type : RenderedCode) render isDirect := withTableName "structure_fields" do saveStructureFieldStmt.bind 1 modName saveStructureFieldStmt.bind 2 position saveStructureFieldStmt.bind 3 sequence diff --git a/DocGen4/Output/Arg.lean b/DocGen4/Output/Arg.lean index 3d1c66c5..536f7268 100644 --- a/DocGen4/Output/Arg.lean +++ b/DocGen4/Output/Arg.lean @@ -10,7 +10,7 @@ Render an `Arg` as HTML, adding opacity effects etc. depending on what type of binder it has. -/ def argToHtml (arg : Process.Arg) : HtmlM Html := do - let node ← infoFormatToHtml arg.binder + let node ← renderedCodeToHtml arg.binder let inner := [node] let html := Html.element "span" false #[("class", "decl_args")] #[inner] if arg.implicit then diff --git a/DocGen4/Output/Definition.lean b/DocGen4/Output/Definition.lean index c468cebf..5027f775 100644 --- a/DocGen4/Output/Definition.lean +++ b/DocGen4/Output/Definition.lean @@ -11,8 +11,8 @@ open Lean Widget /-- This is basically an arbitrary number that seems to work okay. -/ def equationLimit : Nat := 200 -def equationToHtml (c : CodeWithInfos) : HtmlM Html := do - return
  • [← infoFormatToHtml c]
  • +def equationToHtml (c : RenderedCode) : HtmlM Html := do + return
  • [← renderedCodeToHtml c]
  • /-- Attempt to render all `simp` equations for this definition. At a size diff --git a/DocGen4/Output/Inductive.lean b/DocGen4/Output/Inductive.lean index 042c8aa5..ca3ddd34 100644 --- a/DocGen4/Output/Inductive.lean +++ b/DocGen4/Output/Inductive.lean @@ -24,13 +24,13 @@ def ctorToHtml (c : Process.ConstructorInfo) : HtmlM Html := do let renderedDoc ← docStringToHtml doc name pure
  • - {shortName} [args] {" : "} [← infoFormatToHtml c.type] + {shortName} [args] {" : "} [← renderedCodeToHtml c.type]
    [renderedDoc]
  • else pure
  • - {shortName} [args] {" : "} [← infoFormatToHtml c.type] + {shortName} [args] {" : "} [← renderedCodeToHtml c.type]
  • def inductiveToHtml (i : Process.InductiveInfo) : HtmlM (Array Html) := do diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 2fda2e44..c6e4d3da 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -32,7 +32,7 @@ def structureInfoHeader (s : Process.StructureInfo) : HtmlM (Array Html) := do for parent in s.parents, i in [0:s.parents.size] do if i > 0 then parents := parents.push (Html.text ", ") - parents := parents ++ (← infoFormatToHtml parent.type) + parents := parents ++ (← renderedCodeToHtml parent.type) nodes := nodes ++ parents return nodes @@ -57,7 +57,7 @@ def docInfoHeader (doc : DocInfo) : HtmlM Html := do | _ => nodes := nodes nodes := nodes.push <| Html.element "span" true #[("class", "decl_args")] #[" :"] - nodes := nodes.push
    [← infoFormatToHtml doc.getType]
    + nodes := nodes.push
    [← renderedCodeToHtml doc.getType]
    return
    [nodes]
    /-- diff --git a/DocGen4/Output/Structure.lean b/DocGen4/Output/Structure.lean index d6579ece..7bf513cd 100644 --- a/DocGen4/Output/Structure.lean +++ b/DocGen4/Output/Structure.lean @@ -25,13 +25,13 @@ def fieldToHtml (f : Process.FieldInfo) : HtmlM Html := do pure #[] pure
  • -
    {shortName} [args] {" : "} [← infoFormatToHtml f.type]
    +
    {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
    [doc]
  • else pure
  • -
    {shortName} [args] {" : "} [← infoFormatToHtml f.type]
    +
    {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
  • /-- diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 0b4d633a..49ec9fd2 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -5,6 +5,7 @@ Authors: Henrik Böving -/ import Lean +import DocGen4.RenderedCode namespace DocGen4.Process open Lean Widget Meta @@ -25,7 +26,7 @@ structure NameInfo where /-- The pretty printed type of this name. -/ - type : CodeWithInfos + type : RenderedCode /-- The doc string of the name if it exists. -/ @@ -39,7 +40,7 @@ structure Arg where /-- The pretty printed binder syntax itself. -/ - binder : CodeWithInfos + binder : RenderedCode /-- Whether the binder is implicit. -/ @@ -101,7 +102,7 @@ Information about a `def` declaration, note that partial defs are handled by `Op structure DefinitionInfo extends Info where isUnsafe : Bool hints : ReducibilityHints - equations : Option (Array CodeWithInfos) + equations : Option (Array RenderedCode) isNonComputable : Bool deriving Inhabited @@ -145,7 +146,7 @@ structure StructureParentInfo where /-- Name of the projection function. -/ projFn : Name /-- Pretty printed type. -/ - type : CodeWithInfos + type : RenderedCode /-- Information about a `structure` declaration. @@ -205,9 +206,9 @@ def DocInfo.toInfo : DocInfo → Info | .ctorInfo info => info /-- -Turns an `Expr` into a pretty printed `CodeWithInfos`. +Turns an `Expr` into a pretty printed `RenderedCode`. -/ -def prettyPrintTerm (expr : Expr) : MetaM CodeWithInfos := do +def prettyPrintTerm (expr : Expr) : MetaM RenderedCode := do let ⟨fmt, infos⟩ ← PrettyPrinter.ppExprWithInfos expr let tt := TaggedText.prettyTagged fmt let ctx := { @@ -219,7 +220,7 @@ def prettyPrintTerm (expr : Expr) : MetaM CodeWithInfos := do fileMap := default, ngen := ← getNGen } - tagCodeInfos ctx infos tt + return renderTagged (← tagCodeInfos ctx infos tt) def isInstance (declName : Name) : MetaM Bool := do return (instanceExtension.getState (← getEnv)).instanceNames.contains declName diff --git a/DocGen4/Process/DefinitionInfo.lean b/DocGen4/Process/DefinitionInfo.lean index 1d08a9a5..9ac8b549 100644 --- a/DocGen4/Process/DefinitionInfo.lean +++ b/DocGen4/Process/DefinitionInfo.lean @@ -20,14 +20,14 @@ def valueToEq (v : DefinitionVal) : MetaM Expr := withLCtx {} {} do let type ← mkForallFVars xs type return type -def prettyPrintEquation (expr : Expr) : MetaM CodeWithInfos := +def prettyPrintEquation (expr : Expr) : MetaM RenderedCode := Meta.forallTelescope expr.consumeMData (fun _ e => prettyPrintTerm e) -def processEq (eq : Name) : MetaM CodeWithInfos := do +def processEq (eq : Name) : MetaM RenderedCode := do let type ← (mkConstWithFreshMVarLevels eq >>= inferType) prettyPrintEquation type -def computeEquations? (v : DefinitionVal) : AnalyzeM (Array CodeWithInfos) := do +def computeEquations? (v : DefinitionVal) : AnalyzeM (Array RenderedCode) := do unless (← read).genEquations do return #[] let eqs? ← getEqnsFor? v.name match eqs? with diff --git a/DocGen4/Process/DocInfo.lean b/DocGen4/Process/DocInfo.lean index 5d5ee24a..452190a0 100644 --- a/DocGen4/Process/DocInfo.lean +++ b/DocGen4/Process/DocInfo.lean @@ -57,7 +57,7 @@ def getKind : DocInfo → String | classInductiveInfo _ => "class" | ctorInfo _ => "ctor" -- TODO: kind ctor support in js -def getType : DocInfo → CodeWithInfos +def getType : DocInfo → RenderedCode | axiomInfo i => i.type | theoremInfo i => i.type | opaqueInfo i => i.type diff --git a/DocGen4/Process/NameInfo.lean b/DocGen4/Process/NameInfo.lean index 96f25e8a..7e69e7dc 100644 --- a/DocGen4/Process/NameInfo.lean +++ b/DocGen4/Process/NameInfo.lean @@ -7,6 +7,7 @@ import Lean import DocGen4.Process.Base import DocGen4.Process.Attributes +import DocGen4.RenderedCode namespace DocGen4.Process open Lean Meta @@ -72,7 +73,7 @@ def NameInfo.ofTypedName (n : Name) (t : Expr) : MetaM NameInfo := do /-- Pretty prints a `Lean.Parser.Term.bracketedBinder`. -/ -private def prettyPrintBinder (stx : Syntax) (infos : SubExpr.PosMap Elab.Info) : MetaM Widget.CodeWithInfos := do +private def prettyPrintBinder (stx : Syntax) (infos : SubExpr.PosMap Elab.Info) : MetaM RenderedCode := do let fmt ← PrettyPrinter.format Parser.Term.bracketedBinder.formatter stx let tt := Widget.TaggedText.prettyTagged fmt let ctx := { @@ -84,9 +85,9 @@ private def prettyPrintBinder (stx : Syntax) (infos : SubExpr.PosMap Elab.Info) fileMap := default, ngen := ← getNGen } - Widget.tagCodeInfos ctx infos tt + return renderTagged (← Widget.tagCodeInfos ctx infos tt) -private def prettyPrintTermStx (stx : Term) (infos : SubExpr.PosMap Elab.Info) : MetaM Widget.CodeWithInfos := do +private def prettyPrintTermStx (stx : Term) (infos : SubExpr.PosMap Elab.Info) : MetaM RenderedCode := do let fmt ← PrettyPrinter.formatTerm stx let tt := Widget.TaggedText.prettyTagged fmt let ctx := { @@ -98,7 +99,7 @@ private def prettyPrintTermStx (stx : Term) (infos : SubExpr.PosMap Elab.Info) : fileMap := default, ngen := ← getNGen } - Widget.tagCodeInfos ctx infos tt + return renderTagged (← Widget.tagCodeInfos ctx infos tt) def Info.ofTypedName (n : Name) (t : Expr) : MetaM Info := do -- Use the main signature delaborator. We need to run sanitization, parenthesization, and formatting ourselves From 931fd6a034677414e2a889dbfea4d2e11389301c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 30 Jan 2026 10:50:01 +0100 Subject: [PATCH 011/106] chore: check differences between HTML output The scripts indicate that the output is the same, modulo minor differences in automatic linking --- DocGen4/DB.lean | 722 ++++++++++++- DocGen4/Output/Base.lean | 68 +- DocGen4/Process/DocInfo.lean | 29 +- Main.lean | 60 +- lake-manifest.json | 2 +- lakefile.lean | 29 + scripts/check_diff_soup.py | 1849 ++++++++++++++++++++++++++++++++++ scripts/compare_docs.py | 789 +++++++++++++++ scripts/compare_pre_post.py | 1224 ++++++++++++++++++++++ 9 files changed, 4730 insertions(+), 42 deletions(-) create mode 100755 scripts/check_diff_soup.py create mode 100644 scripts/compare_docs.py create mode 100644 scripts/compare_pre_post.py diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 8e1a9db7..04e4e345 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -211,7 +211,11 @@ def getDb (dbFile : System.FilePath) : IO SQLite := do db.exec "PRAGMA busy_timeout = 5000" db.exec "PRAGMA journal_mode = WAL" db.exec "PRAGMA foreign_keys = ON" - db.transaction (db.exec ddl) + try + db.transaction (db.exec ddl) + catch + | e => + throw <| .userError s!"Exception while creating schema: {e}" return db where ddl := @@ -295,6 +299,14 @@ CREATE TABLE IF NOT EXISTS axioms ( FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); +-- Internal names (like recursors) that aren't rendered but should link to a rendered declaration +CREATE TABLE IF NOT EXISTS internal_names ( + name TEXT NOT NULL PRIMARY KEY, + target_module TEXT NOT NULL, + target_position INTEGER NOT NULL, + FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + CREATE TABLE IF NOT EXISTS constructors ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -334,6 +346,7 @@ CREATE TABLE IF NOT EXISTS definitions ( is_unsafe INTEGER NOT NULL, hints TEXT NOT NULL, is_noncomputable INTEGER NOT NULL, + has_equations INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); @@ -347,6 +360,15 @@ CREATE TABLE IF NOT EXISTS definition_equations ( FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); +-- Trigger to ensure has_equations is true when equations are inserted +CREATE TRIGGER IF NOT EXISTS ensure_has_equations_on_insert +AFTER INSERT ON definition_equations +BEGIN + UPDATE definitions + SET has_equations = 1 + WHERE module_name = NEW.module_name AND position = NEW.position AND has_equations = 0; +END; + CREATE TABLE IF NOT EXISTS instances ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -395,13 +417,44 @@ CREATE TABLE IF NOT EXISTS structure_constructors ( CREATE TABLE IF NOT EXISTS structure_fields ( module_name TEXT NOT NULL, position INTEGER NOT NULL, - name TEXT NOT NULL, - type TEXT NOT NULL, - render INTEGER NOT NULL, sequence INTEGER NOT NULL, + proj_name TEXT NOT NULL, + type BLOB NOT NULL, is_direct INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE + -- Note: No FK on proj_name because the projection function may be in a different module + -- (for inherited fields) that hasn't been processed yet. The JOIN at load time handles this. +); + +CREATE TABLE IF NOT EXISTS structure_field_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + field_sequence INTEGER NOT NULL, + arg_sequence INTEGER NOT NULL, + binder BLOB NOT NULL, + is_implicit INTEGER NOT NULL, + PRIMARY KEY (module_name, position, field_sequence, arg_sequence), + FOREIGN KEY (module_name, position, field_sequence) REFERENCES structure_fields(module_name, position, sequence) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + binder BLOB NOT NULL, + is_implicit INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_attrs ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + attr TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); "# @@ -422,7 +475,7 @@ structure DB where saveInfo (modName : String) (position : Int64) (kind : String) (info : Process.Info) : IO Unit saveAxiom (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveOpaque (modName : String) (position : Int64) (safety : Lean.DefinitionSafety) : IO Unit - saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) : IO Unit + saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) (hasEquations : Bool) : IO Unit saveDefinitionEquation (modName : String) (position : Int64) (code : RenderedCode) (sequence : Int64) : IO Unit saveInstance (modName : String) (position : Int64) (className : String) : IO Unit saveInstanceArg (modName : String) (position : Int64) (sequence : Int64) (typeName : String) : IO Unit @@ -431,8 +484,15 @@ structure DB where saveClassInductive (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveStructure (modName : String) (position : Int64) (isClass : Bool) : IO Unit saveStructureConstructor (modName : String) (position : Int64) (ctorPos : Int64) (info : Process.NameInfo) : IO Unit + /-- Save minimal info to name_info for name lookups (not for rendering) -/ + saveNameOnly (modName : String) (position : Int64) (kind : String) (name : Lean.Name) (type : RenderedCode) (declRange : Lean.DeclarationRange) : IO Unit saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : RenderedCode) : IO Unit - saveStructureField (modName : String) (position : Int64) (sequence : Int64) (name : String) (type : RenderedCode) (render : Bool) (isDirect : Bool) : IO Unit + saveStructureField (modName : String) (position : Int64) (sequence : Int64) (projName : String) (type : RenderedCode) (isDirect : Bool) : IO Unit + saveStructureFieldArg (modName : String) (position : Int64) (fieldSeq : Int64) (argSeq : Int64) (binder : RenderedCode) (isImplicit : Bool) : IO Unit + saveArg (modName : String) (position : Int64) (sequence : Int64) (binder : RenderedCode) (isImplicit : Bool) : IO Unit + saveAttr (modName : String) (position : Int64) (sequence : Int64) (attr : String) : IO Unit + /-- Save an internal name (like a recursor) that should link to its target declaration -/ + saveInternalName (name : Lean.Name) (targetModule : String) (targetPosition : Int64) : IO Unit def DB.saveDocstring (db : DB) (modName : String) (position : Int64) (text : String ⊕ Lean.VersoDocString) : IO Unit := match text with @@ -513,6 +573,8 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16 run saveDeclarationRangeStmt let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?)" + let saveArgStmt' ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" + let saveAttrStmt' ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" let saveInfo modName position kind (info : Process.Info) := withTableName "name_info" do saveInfoStmt.bind 1 modName saveInfoStmt.bind 2 position @@ -526,6 +588,25 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do | some (.inl md) => saveMarkdownDocstring modName position md | some (.inr v) => saveVersoDocstring modName position v | none => pure () + -- Save args + for h : j in 0...info.args.size do + let arg := info.args[j] + withTableName "declaration_args" do + saveArgStmt'.bind 1 modName + saveArgStmt'.bind 2 position + saveArgStmt'.bind 3 j.toInt64 + saveArgStmt'.bind 4 arg.binder + saveArgStmt'.bind 5 arg.implicit + run saveArgStmt' + -- Save attrs + for h : j in 0...info.attrs.size do + let attr := info.attrs[j] + withTableName "declaration_attrs" do + saveAttrStmt'.bind 1 modName + saveAttrStmt'.bind 2 position + saveAttrStmt'.bind 3 j.toInt64 + saveAttrStmt'.bind 4 attr + run saveAttrStmt' let saveAxiomStmt ← sqlite.prepare "INSERT INTO axioms (module_name, position, is_unsafe) VALUES (?, ?, ?)" let saveAxiom modName position isUnsafe := withTableName "axioms" do saveAxiomStmt.bind 1 modName @@ -538,13 +619,14 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveOpaqueStmt.bind 2 position saveOpaqueStmt.bind 3 safety run saveOpaqueStmt - let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable) VALUES (?, ?, ?, ?, ?)" - let saveDefinition modName position isUnsafe hints isNonComputable := withTableName "definitions" do + let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable, has_equations) VALUES (?, ?, ?, ?, ?, ?)" + let saveDefinition modName position isUnsafe hints isNonComputable hasEquations := withTableName "definitions" do saveDefinitionStmt.bind 1 modName saveDefinitionStmt.bind 2 position saveDefinitionStmt.bind 3 isUnsafe saveDefinitionStmt.bind 4 hints saveDefinitionStmt.bind 5 isNonComputable + saveDefinitionStmt.bind 6 hasEquations run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" let saveDefinitionEquation modName position (code : RenderedCode) sequence := withTableName "definition_equations" do @@ -611,16 +693,58 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructureParentStmt.bind 4 projectionFn saveStructureParentStmt.bind 5 type run saveStructureParentStmt - let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, name, type, render, is_direct) VALUES (?, ?, ?, ?, ?, ?, ?)" - let saveStructureField modName position sequence name (type : RenderedCode) render isDirect := withTableName "structure_fields" do + -- Store projection function name directly; lookup happens at load time + let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, proj_name, type, is_direct) VALUES (?, ?, ?, ?, ?, ?)" + let saveStructureField modName position sequence projName (type : RenderedCode) isDirect := withTableName "structure_fields" do saveStructureFieldStmt.bind 1 modName saveStructureFieldStmt.bind 2 position saveStructureFieldStmt.bind 3 sequence - saveStructureFieldStmt.bind 4 name + saveStructureFieldStmt.bind 4 projName saveStructureFieldStmt.bind 5 type - saveStructureFieldStmt.bind 6 render - saveStructureFieldStmt.bind 7 isDirect + saveStructureFieldStmt.bind 6 isDirect run saveStructureFieldStmt + let saveStructureFieldArgStmt ← sqlite.prepare "INSERT INTO structure_field_args (module_name, position, field_sequence, arg_sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?, ?)" + let saveStructureFieldArg modName position fieldSeq argSeq (binder : RenderedCode) isImplicit := withTableName "structure_field_args" do + saveStructureFieldArgStmt.bind 1 modName + saveStructureFieldArgStmt.bind 2 position + saveStructureFieldArgStmt.bind 3 fieldSeq + saveStructureFieldArgStmt.bind 4 argSeq + saveStructureFieldArgStmt.bind 5 binder + saveStructureFieldArgStmt.bind 6 isImplicit + run saveStructureFieldArgStmt + let saveArgStmt ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" + let saveArg modName position sequence (binder : RenderedCode) isImplicit := withTableName "declaration_args" do + saveArgStmt.bind 1 modName + saveArgStmt.bind 2 position + saveArgStmt.bind 3 sequence + saveArgStmt.bind 4 binder + saveArgStmt.bind 5 isImplicit + run saveArgStmt + let saveAttrStmt ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" + let saveAttr modName position sequence attr := withTableName "declaration_attrs" do + saveAttrStmt.bind 1 modName + saveAttrStmt.bind 2 position + saveAttrStmt.bind 3 sequence + saveAttrStmt.bind 4 attr + run saveAttrStmt + -- For saving minimal info to name_info for name lookups only (not rendering) + let saveNameOnlyStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, 0, 0)" + let saveNameOnly modName position kind (name : Lean.Name) (type : RenderedCode) (declRange : Lean.DeclarationRange) := withTableName "name_info" do + saveNameOnlyStmt.bind 1 modName + saveNameOnlyStmt.bind 2 position + saveNameOnlyStmt.bind 3 kind + saveNameOnlyStmt.bind 4 name.toString + saveNameOnlyStmt.bind 5 type + run saveNameOnlyStmt + -- Also save declaration range + saveDeclarationRange modName position declRange + -- For saving internal names (like recursors) that link to their target declaration + let saveInternalNameStmt ← sqlite.prepare "INSERT OR IGNORE INTO internal_names (name, target_module, target_position) VALUES (?, ?, ?)" + let saveInternalName (name : Lean.Name) (targetModule : String) (targetPosition : Int64) := withTableName "internal_names" do + saveInternalNameStmt.bind 1 name.toString + saveInternalNameStmt.bind 2 targetModule + saveInternalNameStmt.bind 3 targetPosition + run saveInternalNameStmt pure { sqlite, deleteModule, @@ -642,7 +766,12 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructure, saveStructureConstructor, saveStructureParent, - saveStructureField + saveStructureField, + saveStructureFieldArg, + saveArg, + saveAttr, + saveNameOnly, + saveInternalName } end DB @@ -653,7 +782,9 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( let dbFile := buildDir / dbFile let db ← ensureDb dbFile let ms1 ← IO.monoMsNow - db.sqlite.transaction do + db.sqlite.transaction (mode := .immediate) do + -- Collect structure field info to save in second pass (after all declarations are in name_info) + let mut pendingStructureFields : Array (String × Int64 × Process.StructureInfo) := #[] for (modName, modInfo) in doc.moduleInfo do let modName := modName.toString db.deleteModule modName @@ -670,8 +801,10 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( db.saveMarkdownDocstring modName pos doc.doc | .docInfo info => let baseInfo := info.toInfo - db.saveInfo modName pos (infoKind info) baseInfo - db.saveDeclarationRange modName pos baseInfo.declarationRange + -- Skip saving ctorInfo here - they're saved along with their parent inductive + if !info.isCtorInfo then + db.saveInfo modName pos (infoKind info) baseInfo + db.saveDeclarationRange modName pos baseInfo.declarationRange match info with | .axiomInfo info => db.saveAxiom modName pos info.isUnsafe @@ -680,16 +813,24 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( | .opaqueInfo info => db.saveOpaque modName pos info.definitionSafety | .definitionInfo info => - db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable + db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome if let some eqns := info.equations then for h : j in 0...eqns.size do db.saveDefinitionEquation modName pos eqns[j] j.toInt64 | .instanceInfo info => + -- Save definition data (InstanceInfo extends DefinitionInfo) + db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + -- Save instance-specific data db.saveInstance modName pos info.className.toString for h : j in 0...info.typeNames.size do db.saveInstanceArg modName pos j.toInt64 info.typeNames[j].toString | .inductiveInfo info => db.saveInductive modName pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this inductive + saveRecursors doc.name2ModIdx db modName pos info.name for ctor in info.ctors do let cpos := i i := i + 1 @@ -697,11 +838,17 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( db.saveDeclarationRange modName cpos ctor.declarationRange db.saveConstructor modName cpos pos | .structureInfo info => - i := (← (saveStructureInfo false info db modName pos).run i).2 + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata false info db modName pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (modName, pos, info) | .classInfo info => - i := (← (saveStructureInfo true info db modName pos).run i).2 + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata true info db modName pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (modName, pos, info) | .classInductiveInfo info => db.saveClassInductive modName pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this class inductive + saveRecursors doc.name2ModIdx db modName pos info.name for ctor in info.ctors do let cpos := i i := i + 1 @@ -711,25 +858,69 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( | .ctorInfo info => -- Here we do nothing because they were inserted along with the inductive pure () + -- Second pass: save structure fields (now that all projection functions are in name_info) + for (modName, pos, info) in pendingStructureFields do + saveStructureFields info db modName pos let ms2 ← IO.monoMsNow (← IO.FS.Handle.mk "db-timing" .append).write <| s!"{doc.moduleInfo.keysArray}\t{ms2 - ms1}ms\n".toUTF8 pure () where - saveStructureInfo (isClass : Bool) (info : Process.StructureInfo) (db : DB) (modName : String) (pos : Int64) : StateT Int64 IO Unit := do + -- Save all recursors (main + aux) for an inductive type + -- Uses name2ModIdx (from env.const2ModIdx) to check if names exist + saveRecursors (name2ModIdx : Std.HashMap Lean.Name Lean.ModuleIdx) (db : DB) (modName : String) (pos : Int64) (indName : Lean.Name) : IO Unit := do + -- Save the main recursor + db.saveInternalName (Lean.mkRecName indName) modName pos + -- Save aux recursors if they exist in the environment + for auxName in [Lean.mkCasesOnName indName, Lean.mkRecOnName indName, Lean.mkBRecOnName indName] do + if name2ModIdx.contains auxName then + db.saveInternalName auxName modName pos + -- Special case: Eq and HEq also have ndrec and ndrecOn (non-dependent versions) + if indName == `Eq then + db.saveInternalName `Eq.ndrec modName pos + db.saveInternalName `Eq.ndrecOn modName pos + if indName == `HEq then + db.saveInternalName `HEq.ndrec modName pos + db.saveInternalName `HEq.ndrecOn modName pos + + -- First pass: save structure metadata (not fields) + saveStructureMetadata (isClass : Bool) (info : Process.StructureInfo) (db : DB) (modName : String) (pos : Int64) (name2ModIdx : Std.HashMap Lean.Name Lean.ModuleIdx) : StateT Int64 IO Unit := do db.saveStructure modName pos isClass + -- Save recursors for this structure + saveRecursors name2ModIdx db modName pos info.name modify (· + 1) - db.saveStructureConstructor modName pos (← get) info.ctor + let ctorPos := ← get + db.saveStructureConstructor modName pos ctorPos info.ctor + -- Also save to name_info for name lookups (constructor is not rendered separately) + -- Use the structure's declaration range for the constructor + db.saveNameOnly modName ctorPos "constructor" info.ctor.name info.ctor.type info.declarationRange let mut seq : Int32 := 0 for parent in info.parents do db.saveStructureParent modName pos seq parent.projFn.toString parent.type seq := seq + 1 modify (· + 1) + -- Skip field count to maintain position consistency + -- Note: We don't save projection functions here because: + -- - Direct fields (isDirect = true): projection function is saved when the definition is processed + -- - Inherited fields (isDirect = false): projection function is in a different module and will + -- be saved when that module is processed. The lookup at load time handles missing projections. + modify (· + info.fieldInfo.size.toInt64) + + -- Second pass: save structure fields (after all projection functions are in name_info) + -- The INSERT...SELECT in saveStructureField handles the projection function lookup + saveStructureFields (info : Process.StructureInfo) (db : DB) (modName : String) (pos : Int64) : IO Unit := do + let mut fieldSeq : Int64 := 0 for field in info.fieldInfo do - let fpos ← get - modify (· + 1) - db.saveStructureField modName pos fpos field.name.toString field.type field.render field.isDirect - if let some doc := field.doc then db.saveDocstring modName fpos doc + let projName := field.name.toString + db.saveStructureField modName pos fieldSeq projName field.type field.isDirect + -- Save projection function name to internal_names so it can be linked + -- (projection functions like _private.*.field link to their parent structure) + db.saveInternalName field.name modName pos + -- Save field args to structure_field_args + for h : j in 0...field.args.size do + let arg := field.args[j] + db.saveStructureFieldArg modName pos fieldSeq j.toInt64 arg.binder arg.implicit + fieldSeq := fieldSeq + 1 infoKind : Process.DocInfo → String | .axiomInfo _ => "axiom" @@ -742,3 +933,482 @@ where | .classInfo info => "class" | .classInductiveInfo info => "class inductive" | .ctorInfo info => "constructor" + +/-! ## DB Reading -/ + +section Reading +open Lean SQLite.Blob + +/-- Open a database for reading. -/ +def openDbForReading (dbFile : System.FilePath) : IO SQLite := do + let db ← SQLite.openWith dbFile .readonly + db.exec "PRAGMA busy_timeout = 50000" + return db + +/-- Read RenderedCode from a blob. -/ +def readRenderedCode (blob : ByteArray) : IO RenderedCode := do + match fromBinary blob with + | .ok code => return code + | .error e => throw <| IO.userError s!"Failed to deserialize RenderedCode: {e}" + +/-- Read VersoDocString from a blob. -/ +def readVersoDocString (blob : ByteArray) : IO VersoDocString := do + match fromBinary blob with + | .ok doc => return doc + | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" + +/-- Get all module names from the database. -/ +def getModuleNames (db : SQLite) : IO (Array Name) := withTableName "modules (B)" do + let stmt ← db.prepare "SELECT name FROM modules ORDER BY name" + let mut names := #[] + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + names := names.push name + return names + +/-- Get all module source URLs from the database. -/ +def getModuleSourceUrls (db : SQLite) : IO (Std.HashMap Name String) := withTableName "modules (C)" do + let stmt ← db.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" + let mut urls : Std.HashMap Name String := {} + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + let url ← stmt.columnText 1 + urls := urls.insert name url + return urls + +/-- Get all module imports from the database. -/ +def getModuleImports (db : SQLite) (moduleName : Name) : IO (Array Name) := withTableName "module_imports" do + let stmt ← db.prepare "SELECT imported FROM module_imports WHERE importer = ?" + stmt.bind 1 moduleName.toString + let mut imports := #[] + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + imports := imports.push name + return imports + +/-- Build the name-to-module index needed for cross-linking. -/ +def buildName2ModIdx (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap Name ModuleIdx) := do + -- First build a map from module name string to index + let modNameToIdx : Std.HashMap Name ModuleIdx := + moduleNames.foldl (init := {}) fun acc modName => + acc.insert modName acc.size + -- Now query all names and their modules + let stmt ← db.prepare "SELECT name, module_name FROM name_info" + let mut result : Std.HashMap Name ModuleIdx := {} + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + let moduleName := (← stmt.columnText 1).toName + if let some idx := modNameToIdx[moduleName]? then + result := result.insert name idx + -- Also add internal names (like recursors) that map to their target's module. + -- Only add if not already in result (name_info entries take precedence). + let internalStmt ← db.prepare "SELECT name, target_module FROM internal_names" + while (← internalStmt.step) do + let name := (← internalStmt.columnText 0).toName + if !result.contains name then + let targetModule := (← internalStmt.columnText 1).toName + if let some idx := modNameToIdx[targetModule]? then + result := result.insert name idx + return result + +/-- Load declaration arguments from the database. -/ +def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.Arg) := withTableName "declaration_args" do + let stmt ← db.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut args := #[] + while (← stmt.step) do + let binderBlob ← stmt.columnBlob 0 + let binder ← readRenderedCode binderBlob + let isImplicit := (← stmt.columnInt64 1) != 0 + args := args.push { binder, implicit := isImplicit } + return args + +/-- Load declaration attributes from the database. -/ +def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array String) := withTableName "declaration_attrs" do + let stmt ← db.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut attrs := #[] + while (← stmt.step) do + let attr ← stmt.columnText 0 + attrs := attrs.push attr + return attrs + +/-- Load a docstring from the database. -/ +def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (String ⊕ VersoDocString)) := withTableName "markdown_docstrings verso_docstrings" do + -- Try markdown first + let mdStmt ← db.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" + mdStmt.bind 1 moduleName + mdStmt.bind 2 position + if (← mdStmt.step) then + let text ← mdStmt.columnText 0 + return some (.inl text) + -- Try verso + let versoStmt ← db.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" + versoStmt.bind 1 moduleName + versoStmt.bind 2 position + if (← versoStmt.step) then + let blob ← versoStmt.columnBlob 0 + let doc ← readVersoDocString blob + return some (.inr doc) + return none + +/-- Load a declaration range from the database. -/ +def loadDeclarationRange (db : SQLite) (moduleName : String) (position : Int64) : IO (Option DeclarationRange) := withTableName "declaration_ranges" do + let stmt ← db.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let startLine := (← stmt.columnInt64 0).toNatClampNeg + let startCol := (← stmt.columnInt64 1).toNatClampNeg + let startUtf16 := (← stmt.columnInt64 2).toNatClampNeg + let endLine := (← stmt.columnInt64 3).toNatClampNeg + let endCol := (← stmt.columnInt64 4).toNatClampNeg + let endUtf16 := (← stmt.columnInt64 5).toNatClampNeg + return some { + pos := ⟨startLine, startCol⟩ + charUtf16 := startUtf16 + endPos := ⟨endLine, endCol⟩ + endCharUtf16 := endUtf16 + } + return none + +/-- Load base Info from the database row. -/ +def loadInfo (db : SQLite) (moduleName : String) (position : Int64) (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO Process.Info := do + let type ← readRenderedCode typeBlob + let doc ← loadDocstring db moduleName position + let args ← loadArgs db moduleName position + let attrs ← loadAttrs db moduleName position + let some declRange ← loadDeclarationRange db moduleName position + | throw <| IO.userError s!"Missing declaration range for {name}" + return { + name + type + doc + args + declarationRange := declRange + attrs + sorried + render + } + +/-- Load definition equations from the database. + Takes hasEquations flag to distinguish `none` from `some #[]`. -/ +def loadEquations (db : SQLite) (moduleName : String) (position : Int64) (hasEquations : Bool) : IO (Option (Array RenderedCode)) := withTableName "definition_equations" do + if !hasEquations then return none + let stmt ← db.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut eqns := #[] + while (← stmt.step) do + let blob ← stmt.columnBlob 0 + let code ← readRenderedCode blob + eqns := eqns.push code + return some eqns + +/-- Load instance type names from the database. -/ +def loadInstanceArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Name) := do + let stmt ← db.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut typeNames := #[] + while (← stmt.step) do + let typeName := (← stmt.columnText 0).toName + typeNames := typeNames.push typeName + return typeNames + +/-- Load structure parents from the database. -/ +def loadStructureParents (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.StructureParentInfo) := do + let stmt ← db.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut parents := #[] + while (← stmt.step) do + let projFn := (← stmt.columnText 0).toName + let typeBlob ← stmt.columnBlob 1 + let type ← readRenderedCode typeBlob + parents := parents.push { projFn, type } + return parents + +/-- Load structure field args from the database. -/ +def loadStructureFieldArgs (db : SQLite) (moduleName : String) (position : Int64) (fieldSeq : Int64) : IO (Array Process.Arg) := do + let stmt ← db.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + stmt.bind 3 fieldSeq + let mut args := #[] + while (← stmt.step) do + let binderBlob ← stmt.columnBlob 0 + let binder ← readRenderedCode binderBlob + let isImplicit := (← stmt.columnInt64 1) != 0 + args := args.push { binder, implicit := isImplicit } + return args + +/-- Load structure fields from the database. -/ +def loadStructureFields (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.FieldInfo) := do + -- Get structure fields and look up projection function info by name + let stmt ← db.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut fields := #[] + while (← stmt.step) do + let fieldSeq := ← stmt.columnInt64 0 + let name := (← stmt.columnText 1).toName + let typeBlob ← stmt.columnBlob 2 + let type ← readRenderedCode typeBlob + let isDirect := (← stmt.columnInt64 3) != 0 + -- Look up projection function by name to get its module and position + let projStmt ← db.prepare "SELECT module_name, position FROM name_info WHERE name = ? LIMIT 1" + projStmt.bind 1 name.toString + let (doc, attrs, declRange, render) ← if (← projStmt.step) then do + let projModName ← projStmt.columnText 0 + let projPos ← projStmt.columnInt64 1 + -- Load projection function's docstring, attrs, and declaration range + let doc ← loadDocstring db projModName projPos + let attrs ← loadAttrs db projModName projPos + let declRange ← loadDeclarationRange db projModName projPos + -- Get render flag from projection function's name_info + let render ← do + let renderStmt ← db.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" + renderStmt.bind 1 projModName + renderStmt.bind 2 projPos + if (← renderStmt.step) then + pure ((← renderStmt.columnInt64 0) != 0) + else + pure true + pure (doc, attrs, declRange, render) + else + -- Projection function not found in name_info - use defaults + -- This can happen for inherited fields whose parent module wasn't processed + pure (none, #[], none, true) + -- Load field-specific args from structure_field_args + let args ← loadStructureFieldArgs db moduleName position fieldSeq + fields := fields.push { + name + type + doc + args + declarationRange := declRange.getD default + attrs + render + isDirect + } + return fields + +/-- Load structure constructor from the database. -/ +def loadStructureConstructor (db : SQLite) (moduleName : String) (position : Int64) : IO (Option Process.NameInfo) := do + let stmt ← db.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let name := (← stmt.columnText 0).toName + let typeBlob ← stmt.columnBlob 1 + let ctorPos ← stmt.columnInt64 2 + let type ← readRenderedCode typeBlob + let doc ← loadDocstring db moduleName ctorPos + return some { name, type, doc } + return none + +/-- Load constructors for an inductive type. -/ +def loadConstructors (db : SQLite) (moduleName : String) (position : Int64) : IO (List Process.ConstructorInfo) := do + let stmt ← db.prepare "SELECT c.position FROM constructors c WHERE c.module_name = ? AND c.type_position = ? ORDER BY c.position" + stmt.bind 1 moduleName + stmt.bind 2 position + let mut ctors := [] + while (← stmt.step) do + let ctorPos ← stmt.columnInt64 0 + -- Now load the full info for this constructor + let infoStmt ← db.prepare "SELECT name, type, sorried, render FROM name_info WHERE module_name = ? AND position = ?" + infoStmt.bind 1 moduleName + infoStmt.bind 2 ctorPos + if (← infoStmt.step) then + let name := (← infoStmt.columnText 0).toName + let typeBlob ← infoStmt.columnBlob 1 + let sorried := (← infoStmt.columnInt64 2) != 0 + let render := (← infoStmt.columnInt64 3) != 0 + let info ← loadInfo db moduleName ctorPos name typeBlob sorried render + ctors := ctors ++ [info] + return ctors + +/-- Load a DocInfo from the database based on its kind. -/ +def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : String) + (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO (Option Process.DocInfo) := do + let info ← loadInfo db moduleName position name typeBlob sorried render + match kind with + | "axiom" => + let stmt ← db.prepare "SELECT is_unsafe FROM axioms WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let isUnsafe := (← stmt.columnInt64 0) != 0 + return some <| .axiomInfo { toInfo := info, isUnsafe } + return none + | "theorem" => + return some <| .theoremInfo { toInfo := info } + | "opaque" => + let stmt ← db.prepare "SELECT safety FROM opaques WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let safetyStr ← stmt.columnText 0 + let safety := match safetyStr with + | "unsafe" => .unsafe + | "partial" => .partial + | _ => .safe + return some <| .opaqueInfo { toInfo := info, definitionSafety := safety } + return none + | "definition" => + let stmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable, has_equations FROM definitions WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let isUnsafe := (← stmt.columnInt64 0) != 0 + let hintsStr ← stmt.columnText 1 + let isNonComputable := (← stmt.columnInt64 2) != 0 + let hasEquations := (← stmt.columnInt64 3) != 0 + let hints : ReducibilityHints := match hintsStr with + | "opaque" => .opaque + | "abbrev" => .abbrev + | s => .regular (s.toNat?.getD 0 |>.toUInt32) + let equations ← loadEquations db moduleName position hasEquations + return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } + return none + | "instance" => + let instStmt ← db.prepare "SELECT class_name FROM instances WHERE module_name = ? AND position = ?" + instStmt.bind 1 moduleName + instStmt.bind 2 position + if (← instStmt.step) then + let className := (← instStmt.columnText 0).toName + let defStmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable, has_equations FROM definitions WHERE module_name = ? AND position = ?" + defStmt.bind 1 moduleName + defStmt.bind 2 position + if (← defStmt.step) then + let isUnsafe := (← defStmt.columnInt64 0) != 0 + let hintsStr ← defStmt.columnText 1 + let isNonComputable := (← defStmt.columnInt64 2) != 0 + let hasEquations := (← defStmt.columnInt64 3) != 0 + let hints : ReducibilityHints := match hintsStr with + | "opaque" => .opaque + | "abbrev" => .abbrev + | s => .regular (s.toNat?.getD 0 |>.toUInt32) + let equations ← loadEquations db moduleName position hasEquations + let typeNames ← loadInstanceArgs db moduleName position + return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } + return none + | "inductive" => + let stmt ← db.prepare "SELECT is_unsafe FROM inductives WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let isUnsafe := (← stmt.columnInt64 0) != 0 + let ctors ← loadConstructors db moduleName position + return some <| .inductiveInfo { toInfo := info, isUnsafe, ctors } + return none + | "structure" => + let stmt ← db.prepare "SELECT is_class FROM structures WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let parents ← loadStructureParents db moduleName position + let fieldInfo ← loadStructureFields db moduleName position + let some ctor ← loadStructureConstructor db moduleName position + | return none + return some <| .structureInfo { toInfo := info, fieldInfo, parents, ctor } + return none + | "class" => + let stmt ← db.prepare "SELECT is_class FROM structures WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let parents ← loadStructureParents db moduleName position + let fieldInfo ← loadStructureFields db moduleName position + let some ctor ← loadStructureConstructor db moduleName position + | return none + return some <| .classInfo { toInfo := info, fieldInfo, parents, ctor } + return none + | "class inductive" => + let stmt ← db.prepare "SELECT is_unsafe FROM class_inductives WHERE module_name = ? AND position = ?" + stmt.bind 1 moduleName + stmt.bind 2 position + if (← stmt.step) then + let isUnsafe := (← stmt.columnInt64 0) != 0 + let ctors ← loadConstructors db moduleName position + return some <| .classInductiveInfo { toInfo := info, isUnsafe, ctors } + return none + | "constructor" => + -- Constructors are handled as part of their parent inductive + return some <| .ctorInfo info + | _ => + return none + +/-- Load a module from the database. -/ +def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do + let modNameStr := moduleName.toString + let imports ← getModuleImports db moduleName + -- Load all members (declarations and module docs) with their positions. + -- We'll sort by (declaration range, position) to maintain deterministic ordering + -- even when multiple declarations have the same position (which happens for + -- auto-generated declarations like instance defaults). + let stmt ← db.prepare " + SELECT n.position, n.kind, n.name, n.type, n.sorried, n.render + FROM name_info n + WHERE n.module_name = ?" + stmt.bind 1 modNameStr + let mut members : Array (Int64 × Process.ModuleMember) := #[] + while (← stmt.step) do + let position ← stmt.columnInt64 0 + let kind ← stmt.columnText 1 + let name := (← stmt.columnText 2).toName + let typeBlob ← stmt.columnBlob 3 + let sorried := (← stmt.columnInt64 4) != 0 + let render := (← stmt.columnInt64 5) != 0 + if let some docInfo ← loadDocInfo db modNameStr position kind name typeBlob sorried render then + members := members.push (position, .docInfo docInfo) + -- Load module docs + let mdStmt ← db.prepare " + SELECT m.position, m.text + FROM markdown_docstrings m + WHERE m.module_name = ? + AND m.position NOT IN (SELECT position FROM name_info WHERE module_name = ?)" + mdStmt.bind 1 modNameStr + mdStmt.bind 2 modNameStr + while (← mdStmt.step) do + let position ← mdStmt.columnInt64 0 + let doc ← mdStmt.columnText 1 + if let some declRange ← loadDeclarationRange db modNameStr position then + members := members.push (position, .modDoc { doc, declarationRange := declRange }) + -- Sort by (declaration range, position) to maintain deterministic ordering. + -- Primary key: declaration range position (line, column) using Position.lt + -- Secondary key: DB position (to break ties when ranges are equal) + let sortedMembers := members.qsort fun (pos1, m1) (pos2, m2) => + let r1 := m1.getDeclarationRange.pos + let r2 := m2.getDeclarationRange.pos + if Position.lt r1 r2 then true + else if Position.lt r2 r1 then false + else pos1 < pos2 -- Tiebreaker: use DB position + return { name := moduleName, members := sortedMembers.map (·.2), imports } + +/-- Load all modules from the database. -/ +def loadAllModules (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap Name Process.Module) := do + let mut modules : Std.HashMap Name Process.Module := {} + for modName in moduleNames do + let module ← loadModule db modName + modules := modules.insert modName module + return modules + +/-- Result of loading from the database, including source URLs. -/ +structure LoadFromDbResult where + result : Process.AnalyzerResult + sourceUrls : Std.HashMap Name String + +/-- Load a complete AnalyzerResult from the database. -/ +def loadFromDb (dbFile : System.FilePath) : IO LoadFromDbResult := do + let db ← openDbForReading dbFile + let moduleNames ← getModuleNames db + let sourceUrls ← getModuleSourceUrls db + let name2ModIdx ← buildName2ModIdx db moduleNames + let moduleInfo ← loadAllModules db moduleNames + return { result := { name2ModIdx, moduleNames, moduleInfo }, sourceUrls } + +end Reading diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index 42795b65..c82c5326 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -280,6 +280,43 @@ def splitWhitespaces (s : String) : String × String × String := let back := "".pushn ' ' (length - s.length) (front, s, back) +/-- +For a name, try to find a linkable target by stripping suffix components +that are numeric or start with `_`. Returns the first name found in name2ModIdx, +or none if nothing is found. +-/ +private def findLinkableParent (name2ModIdx : Std.HashMap Name ModuleIdx) (name : Name) : Option Name := + match name with + | .str parent s => + -- If this component starts with _ or is numeric-like, try the parent + if s.startsWith "_" then + findLinkableParent name2ModIdx parent + else if name2ModIdx.contains name then + some name + else + findLinkableParent name2ModIdx parent + | .num parent _ => + findLinkableParent name2ModIdx parent + | .anonymous => none + +/-- +Extract the module name from a private name prefix like `_private.Init.Prelude.0`. +Returns the module name (e.g., `Init.Prelude`). +-/ +private def moduleFromPrivatePrefix (pfx : Name) : Name := + match pfx with + | .num parent 0 => go parent + | _ => .anonymous +where + go (n : Name) : Name := + match n with + | .str parent s => + if parent == Lean.privateHeader then + .str .anonymous s + else + .str (go parent) s + | _ => .anonymous + /-- Convert RenderedCode to HTML with declaration links. Returns (hasAnchor, html) where hasAnchor indicates if the result contains an anchor tag. @@ -296,7 +333,8 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner match tag with | .const name => - if (← getResult).name2ModIdx.contains name then + let name2ModIdx := (← getResult).name2ModIdx + if name2ModIdx.contains name then let link ← declNameToLink name -- Avoid nested anchors: if inner content already has anchors, don't wrap again -- Match original behavior: no fn wrapper when const is in name2ModIdx @@ -305,7 +343,33 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H else return (true, #[[innerHtml]]) else - return (innerHasAnchor, fn innerHtml) + -- Name not in name2ModIdx - try to find a linkable parent + -- This handles both: + -- 1. Private names like `_private.Init.Prelude.0.Lean.Name.hash._proof_1` + -- 2. Auxiliary names like `Std.Do.Option.instWPMonad._proof_2` + let nameToSearch := Lean.privateToUserName? name |>.getD name + match findLinkableParent name2ModIdx nameToSearch with + | some target => + let link ← declNameToLink target + if innerHasAnchor then + return (true, innerHtml) + else + return (true, #[[innerHtml]]) + | none => + -- For private names, fall back to linking to the module itself (no anchor) + match Lean.privatePrefix? name with + | some pfx => + let modName := moduleFromPrivatePrefix pfx + if modName != .anonymous then + let link ← moduleNameToLink modName + if innerHasAnchor then + return (true, innerHtml) + else + return (true, #[[innerHtml]]) + else + return (innerHasAnchor, fn innerHtml) + | none => + return (innerHasAnchor, fn innerHtml) | .sort _ => let link := s!"{← getRoot}foundational_types.html" -- Avoid nested anchors diff --git a/DocGen4/Process/DocInfo.lean b/DocGen4/Process/DocInfo.lean index 452190a0..31201881 100644 --- a/DocGen4/Process/DocInfo.lean +++ b/DocGen4/Process/DocInfo.lean @@ -139,9 +139,26 @@ def shouldRender : DocInfo → Bool | structureInfo i => i.render | classInfo i => i.render | classInductiveInfo i => i.render -| ctorInfo i => i.render +| ctorInfo _ => false -- Constructors are rendered as part of their parent inductive + +def isCtorInfo : DocInfo → Bool +| ctorInfo _ => true +| _ => false + +/-- Returns `true` if `declName` is a field projection or a parent projection for a structure. -/ +def isProjFn (declName : Name) : MetaM Bool := do + let env ← getEnv + match declName with + | Name.str parent name => + let some si := getStructureInfo? env parent | return false + return getProjFnForField? env parent (Name.mkSimple name) == declName + || (si.parentInfo.any fun pi => pi.projFn == declName) + | _ => return false def isBlackListed (declName : Name) : MetaM Bool := do + -- Don't blacklist projection functions - we need them for structure field references + if ← isProjFn declName then + return false match ← findDeclarationRanges? declName with | some _ => let env ← getEnv @@ -154,16 +171,6 @@ def isBlackListed (declName : Name) : MetaM Bool := do -- TODO: Evaluate whether filtering out declarations without range is sensible | none => return true -/-- Returns `true` if `declName` is a field projection or a parent projection for a structure. -/ -def isProjFn (declName : Name) : MetaM Bool := do - let env ← getEnv - match declName with - | Name.str parent name => - let some si := getStructureInfo? env parent | return false - return getProjFnForField? env parent (Name.mkSimple name) == declName - || (si.parentInfo.any fun pi => pi.projFn == declName) - | _ => return false - def ofConstant : (Name × ConstantInfo) → AnalyzeM (Option DocInfo) := fun (name, info) => do if ← isBlackListed name then diff --git a/Main.lean b/Main.lean index 5a88a5e2..abdcb938 100644 --- a/Main.lean +++ b/Main.lean @@ -2,7 +2,7 @@ import DocGen4 import Lean import Cli -open DocGen4 Lean Cli +open DocGen4 DocGen4.DB DocGen4.Output Lean Cli def getTopLevelModules (p : Parsed) : IO (List String) := do let topLevelModules := p.variableArgsAs! String |>.toList @@ -61,6 +61,50 @@ def runDocGenCmd (_p : Parsed) : IO UInt32 := do IO.println "https://github.com/leanprover/doc-gen4" return 0 +/-- A source linker that uses URLs from the database, falling back to core module URLs -/ +def dbSourceLinker (sourceUrls : Std.HashMap Name String) (_gitUrl? : Option String) (module : Name) : Option DeclarationRange → String := + let root := module.getRoot + let leanHash := Lean.githash + if root == `Lean ∨ root == `Init ∨ root == `Std then + let parts := module.components.map (Name.toString (escape := false)) + let path := "/".intercalate parts + Output.SourceLinker.mkGithubSourceLinker s!"https://github.com/leanprover/lean4/blob/{leanHash}/src/{path}.lean" + else if root == `Lake then + let parts := module.components.map (Name.toString (escape := false)) + let path := "/".intercalate parts + Output.SourceLinker.mkGithubSourceLinker s!"https://github.com/leanprover/lean4/blob/{leanHash}/src/lake/{path}.lean" + else + -- Look up source URL from database + match sourceUrls[module]? with + | some url => + if url.startsWith "vscode://file/" then + Output.SourceLinker.mkVscodeSourceLinker url + else if url.startsWith "https://github.com" then + Output.SourceLinker.mkGithubSourceLinker url + else + fun _ => url + | none => + -- Fallback for modules without source URL + fun _ => "#" + +def runFromDbCmd (p : Parsed) : IO UInt32 := do + let buildDir := match p.flag? "build" with + | some dir => dir.as! String + | none => ".lake/build/doc-from-db" -- Different default for DB-generated docs + let dbPath := p.positionalArg! "db" |>.as! String + IO.println s!"Loading documentation from database: {dbPath}" + let dbResult ← loadFromDb dbPath + let result := dbResult.result + IO.println s!"Loaded {result.moduleNames.size} modules with {result.name2ModIdx.size} declarations" + -- Add `references` pseudo-module to hierarchy since references.html is always generated + let hierarchy := Hierarchy.fromArray (result.moduleNames.push `references) + let baseConfig ← getSimpleBaseContext buildDir hierarchy + IO.println s!"Generating HTML to: {buildDir}" + discard <| htmlOutputResults baseConfig result none (sourceLinker? := some (dbSourceLinker dbResult.sourceUrls)) + htmlOutputIndex baseConfig + IO.println "Done!" + return 0 + def runBibPrepassCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String @@ -135,6 +179,17 @@ def headerDataCmd := `[Cli| b, build : String; "Build directory." ] +def fromDbCmd := `[Cli| + fromDb VIA runFromDbCmd; + "Generate all HTML documentation from a SQLite database. Output goes to a separate directory for easy comparison with traditional generation." + + FLAGS: + b, build : String; "Output directory for generated docs (default: .lake/build/doc-from-db)" + + ARGS: + db : String; "Path to the SQLite database" +] + def docGenCmd : Cmd := `[Cli| "doc-gen4" VIA runDocGenCmd; ["0.1.0"] "A documentation generator for Lean 4." @@ -144,7 +199,8 @@ def docGenCmd : Cmd := `[Cli| indexCmd; genCoreCmd; bibPrepassCmd; - headerDataCmd + headerDataCmd; + fromDbCmd ] def main (args : List String) : IO UInt32 := diff --git a/lake-manifest.json b/lake-manifest.json index 7e91a092..382af8cd 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "64e35cc2cc0959f4ff371933a86ac298f20bf1fe", + "rev": "5f76f5af0df8b908a91083be81027d03fb96c7fc", "name": "leansqlite", "manifestFile": "lake-manifest.json", "inputRev": "main", diff --git a/lakefile.lean b/lakefile.lean index 665a39ee..5a1a7fe3 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -346,3 +346,32 @@ library_facet docsHeader (lib) : FilePath := do args := #["headerData", "--build", buildDir.toString] } return dataFile + +/-- Generate documentation from the SQLite database. +This facet depends on the regular `docs` facet to ensure the database is populated first, +then generates HTML from the database into a separate directory for comparison. -/ +library_facet dbdocs (lib) : FilePath := do + -- First, ensure the regular docs are built (which populates the DB) + let docsJob ← fetch <| lib.facet `docs + let exeJob ← «doc-gen4».fetch + let buildDir := (← getRootPackage).buildDir + let dbPath := buildDir / "lean-docs.db" + let outputDir := buildDir / "doc-from-db" + let outputDataDir := outputDir / "doc-data" + let outputMarker := outputDir / "doc" / "index.html" + docsJob.bindM fun _ => do + exeJob.mapM fun exeFile => do + buildFileUnlessUpToDate' outputMarker do + -- Copy references.json to the DB output directory so navbar includes references link + IO.FS.createDirAll outputDataDir + let srcRefsFile := buildDir / "doc-data" / "references.json" + let dstRefsFile := outputDataDir / "references.json" + if ← srcRefsFile.pathExists then + IO.FS.writeFile dstRefsFile (← IO.FS.readFile srcRefsFile) + logInfo s!"Generating documentation from database: {dbPath}" + proc { + cmd := exeFile.toString + args := #["fromDb", "--build", outputDir.toString, dbPath.toString] + env := ← getAugmentedEnv + } + return outputMarker diff --git a/scripts/check_diff_soup.py b/scripts/check_diff_soup.py new file mode 100755 index 00000000..b35dcf6b --- /dev/null +++ b/scripts/check_diff_soup.py @@ -0,0 +1,1849 @@ +#!/usr/bin/env -S uv run --script +# /// script +# dependencies = ["beautifulsoup4", "lxml"] +# /// +""" +HTML Documentation Diff Comparison Tool + +Compares two documentation directories, parsing HTML with BeautifulSoup +and supporting declarative rules for allowed differences. + +Usage: + uv run scripts/check_diff_soup.py +""" + +from __future__ import annotations + +import argparse +import difflib +import re +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable +from urllib.parse import unquote, urljoin + +from bs4 import BeautifulSoup, NavigableString, Tag + +# Parser configuration - uses lxml for speed. +# To use built-in parser instead, remove lxml from dependencies above +# and change PARSER to "html.parser" +PARSER = "lxml" + +# ANSI color codes for TTY output +IS_TTY = sys.stdout.isatty() +RED = "\033[91m" if IS_TTY else "" +GREEN = "\033[92m" if IS_TTY else "" +RESET = "\033[0m" if IS_TTY else "" +RED_BG = "\033[41m" if IS_TTY else "" +GREEN_BG = "\033[42m" if IS_TTY else "" +BOLD = "\033[1m" if IS_TTY else "" + + +def log(msg: str = "") -> None: + """Print a message and flush stdout immediately.""" + print(msg, flush=True) + + +# ============================================================================= +# Data Classes +# ============================================================================= + + +@dataclass +class DiffContext: + """Context object passed to rules for evaluating differences.""" + + file_path: str # Relative path of HTML file being compared + old_elem: Tag | None # Element in old version (None if added) + new_elem: Tag | None # Element in new version (None if removed) + old_ancestors: list[Tag] # Parent chain from old tree (nearest first) + new_ancestors: list[Tag] # Parent chain from new tree (nearest first) + diff_type: str # "attribute", "text", "element_replaced", "element_added", "element_removed" + attribute_name: str | None # For attribute diffs + old_value: Any # Old value (attribute value, text, etc.) + new_value: Any # New value + old_targets: set[str] # Valid link targets in old docs + new_targets: set[str] # Valid link targets in new docs + + def _resolve_href(self, href: str) -> str: + """Resolve an href relative to the current file path.""" + if not href or href.startswith(("#", "javascript:", "mailto:", "data:")): + # Fragment-only or special URLs + if href.startswith("#"): + return f"{self.file_path}{href}" + return href + + # Handle external URLs + if href.startswith(("http://", "https://", "//")): + return href + + # Resolve relative path + current_dir = str(Path(self.file_path).parent) + if current_dir == ".": + resolved = href + else: + resolved = urljoin(self.file_path, href) + + # Normalize the path + resolved = unquote(resolved) + parts = resolved.split("#", 1) + path_part = parts[0] + + # Normalize path (resolve .. and .) + try: + normalized = str(Path(path_part).as_posix()) + # Handle paths that go above root + if normalized.startswith(".."): + normalized = path_part + except Exception: + normalized = path_part + + if len(parts) > 1: + return f"{normalized}#{parts[1]}" + return normalized + + def old_href_is_broken(self) -> bool: + """Check if old element's href pointed to nonexistent target.""" + if self.old_elem is None: + return False + href = self.old_elem.get("href") + if not href: + return False + + # Skip external links + if str(href).startswith(("http://", "https://", "//", "javascript:", "mailto:")): + return False + + resolved = self._resolve_href(str(href)) + + # Check if target exists + if resolved in self.old_targets: + return False + + # Check file without fragment + path_only = resolved.split("#")[0] + if path_only in self.old_targets: + # File exists, but fragment might not + if "#" in resolved: + return resolved not in self.old_targets + return False + + return True + + def new_href_is_broken(self) -> bool: + """Check if new element's href points to nonexistent target.""" + if self.new_elem is None: + return False + href = self.new_elem.get("href") + if not href: + return False + + # Skip external links + if str(href).startswith(("http://", "https://", "//", "javascript:", "mailto:")): + return False + + resolved = self._resolve_href(str(href)) + + # Check if target exists + if resolved in self.new_targets: + return False + + # Check file without fragment + path_only = resolved.split("#")[0] + if path_only in self.new_targets: + if "#" in resolved: + return resolved not in self.new_targets + return False + + return True + + def has_ancestor(self, selector: str) -> bool: + """Check if any ancestor in old tree matches CSS selector (simple matching).""" + # Parse simple selectors: tag, .class, tag.class, #id + tag_name = None + class_name = None + id_name = None + + if "#" in selector: + parts = selector.split("#", 1) + tag_name = parts[0] if parts[0] else None + id_name = parts[1] + elif "." in selector: + parts = selector.split(".", 1) + tag_name = parts[0] if parts[0] else None + class_name = parts[1] + else: + tag_name = selector + + for ancestor in self.old_ancestors: + if not isinstance(ancestor, Tag): + continue + + if tag_name and ancestor.name != tag_name: + continue + if class_name and class_name not in ancestor.get("class", []): + continue + if id_name and ancestor.get("id") != id_name: + continue + + return True + + return False + + +@dataclass +class Difference: + """Represents a detected difference between two HTML documents.""" + + file_path: str + diff_type: str + old_elem: Tag | None + new_elem: Tag | None + old_ancestors: list[Tag] + new_ancestors: list[Tag] + attribute_name: str | None = None + old_value: Any = None + new_value: Any = None + accepted: bool = False + reason: str | None = None + + +@dataclass +class FileComparisonResult: + """Result of comparing a single HTML file.""" + + file_path: str + differences: list[Difference] = field(default_factory=list) + error: str | None = None + elapsed_ms: float = 0.0 + + +# ============================================================================= +# Rules +# ============================================================================= + +# Rule type: takes DiffContext, returns reason string if allowed, None if not handled +Rule = Callable[[DiffContext], str | None] + + +def allow_href_change_if_old_broken(ctx: DiffContext) -> str | None: + """Allow href changes on tags if old version pointed at nonexistent target.""" + if ctx.diff_type != "attribute" or ctx.attribute_name != "href": + return None + if ctx.old_elem and ctx.old_elem.name == "a" and ctx.old_href_is_broken(): + return "href changed because old target was broken" + return None + + +def allow_a_to_span_if_broken(ctx: DiffContext) -> str | None: + """Allow to be replaced by if link was broken.""" + if ctx.diff_type != "element_replaced": + return None + if ( + ctx.old_elem + and ctx.old_elem.name == "a" + and ctx.new_elem + and ctx.new_elem.name == "span" + and "fn" in ctx.new_elem.get("class", []) + ): + if ctx.old_href_is_broken(): + return " with broken link replaced by " + return None + + +def allow_unwrap_broken_link(ctx: DiffContext) -> str | None: + """Allow broken tags to be unwrapped (contents remain, tag removed).""" + if ctx.diff_type != "element_removed": + return None + if ctx.old_elem and ctx.old_elem.name == "a" and ctx.old_href_is_broken(): + return "broken tag unwrapped" + return None + + +def _check_href_valid(href: str, file_path: str, targets: set[str]) -> bool: + """Check if an href target exists in the given target set.""" + if not href: + return False + # Skip external links + if href.startswith(("http://", "https://", "//", "javascript:", "mailto:", "#")): + return True # External/anchor links are considered valid + + # Resolve relative path + from urllib.parse import unquote, urljoin + + current_dir = str(Path(file_path).parent) + if current_dir == ".": + resolved = href + else: + resolved = urljoin(file_path, href) + + resolved = unquote(resolved) + parts = resolved.split("#", 1) + path_part = parts[0] + + try: + normalized = str(Path(path_part).as_posix()) + if normalized.startswith(".."): + normalized = path_part + except Exception: + normalized = path_part + + if len(parts) > 1: + resolved = f"{normalized}#{parts[1]}" + else: + resolved = normalized + + # Check if target exists + if resolved in targets: + return True + # If there's a fragment, the exact target must exist + # (don't fall back to just the file) + if "#" in resolved: + return False + # No fragment - check if file exists + return resolved in targets + + +def allow_added_link_with_valid_target(ctx: DiffContext) -> str | None: + """Allow added elements or replacements with if their href target exists.""" + if ctx.diff_type not in ("element_added", "element_replaced"): + return None + if not ctx.new_elem or ctx.new_elem.name != "a": + return None + + href = ctx.new_elem.get("href") + if not href: + return None + + # Skip external links + if str(href).startswith(("http://", "https://", "//", "javascript:", "mailto:")): + return None + + # Check if target exists (inverse of new_href_is_broken) + if not ctx.new_href_is_broken(): + return "added link with valid target" + + return None + + +def allow_span_fn_to_link(ctx: DiffContext) -> str | None: + """Allow to be replaced by if the link target is valid.""" + if ctx.diff_type != "element_replaced": + return None + if not ctx.old_elem or ctx.old_elem.name != "span": + return None + if "fn" not in ctx.old_elem.get("class", []): + return None + if not ctx.new_elem or ctx.new_elem.name != "a": + return None + + # Check if the new link has a valid target + if not ctx.new_href_is_broken(): + return " replaced by link with valid target" + + return None + + +def allow_duplicate_li_removal_in_imports(ctx: DiffContext) -> str | None: + """Allow changes inside
    that remove duplicate
  • elements.""" + if not ctx.has_ancestor("div.imports"): + return None + + # Find the
  • ') + search_pos = next_close + 6 + + # Extract and process the signature div + sig_div = content[div_start:div_end] if depth == 0 else content[div_start:] + result_parts.append(strip_links_in_section(sig_div, in_signature=True)) + pos = div_end if depth == 0 else len(content) + + content = ''.join(result_parts) + + return content + + +def compare_html_files(file1: Path, file2: Path, base_dir1: Path, base_dir2: Path, rel_path: str, + valid_targets1: Optional[set[str]] = None, + valid_targets2: Optional[set[str]] = None) -> tuple[bool, str]: + """ + Compare two HTML files. + Returns (are_equal, diff_summary). + + Args: + file1: Path to first file (traditional) + file2: Path to second file (DB-generated) + base_dir1: Base directory of traditional docs + base_dir2: Base directory of DB-generated docs + rel_path: Relative path of the file within the docs + valid_targets1: Pre-built anchor index for traditional docs + valid_targets2: Pre-built anchor index for DB-generated docs + """ + try: + content1 = file1.read_text(encoding='utf-8') + content2 = file2.read_text(encoding='utf-8') + except Exception as e: + return False, f"Error reading files: {e}" + + # First try exact match + if content1 == content2: + return True, "" + + # Normalize with tidy + tidy1 = tidy_html(content1) + tidy2 = tidy_html(content2) + + if tidy1 is None or tidy2 is None: + return False, "tidy failed to process one or both files" + + rel_path_obj = Path(rel_path) + + # Step 1: Check for broken links that are unique to DB version + # If traditional also has the same broken links, that's not a DB-specific issue + broken_in_db = set(find_broken_links(tidy2, base_dir2, base_dir2 / rel_path_obj, valid_targets2)) + broken_in_trad = set(find_broken_links(tidy1, base_dir1, base_dir1 / rel_path_obj, valid_targets1)) + db_only_broken = broken_in_db - broken_in_trad + if db_only_broken: + return False, f"DB has {len(db_only_broken)} broken link(s): {', '.join(sorted(db_only_broken)[:3])}" + + # Step 2: Check that DB has all valid links from traditional + missing_links = find_missing_links( + tidy1, tidy2, + base_dir1, base_dir2, + base_dir1 / rel_path_obj, base_dir2 / rel_path_obj, + valid_targets1 + ) + if missing_links: + return False, f"DB missing {len(missing_links)} link(s): {', '.join(missing_links[:3])}" + + # Step 2.5: Check that link text matches (catches boundary changes) + text_mismatches = find_link_text_mismatches( + tidy1, tidy2, + base_dir1, base_dir2, + base_dir1 / rel_path_obj, base_dir2 / rel_path_obj, + valid_targets1 + ) + if text_mismatches: + return False, f"Link text mismatch: {text_mismatches[0]}" + + # Step 3: Normalize both by stripping all internal links + # This makes comparison ignore link presence differences (DB having more links is fine) + norm1 = normalize_html(tidy1, base_dir1, base_dir1 / rel_path_obj, valid_targets1, strip_all_internal_links=True) + norm2 = normalize_html(tidy2, base_dir2, base_dir2 / rel_path_obj, valid_targets2, strip_all_internal_links=True) + + if norm1 == norm2: + return True, "" + + # Generate a summary of differences + diff = list(difflib.unified_diff( + norm1.splitlines(keepends=True), + norm2.splitlines(keepends=True), + fromfile='traditional', + tofile='db-generated', + n=1, # context lines + )) + + if not diff: + return True, "" + + # Count changed lines + additions = sum(1 for line in diff if line.startswith('+') and not line.startswith('+++')) + deletions = sum(1 for line in diff if line.startswith('-') and not line.startswith('---')) + + return False, f"+{additions}/-{deletions} lines" + + +def compare_directories(traditional_dir: Path, db_dir: Path, verbose: bool = False) -> ComparisonResult: + """Compare all HTML files in two directories.""" + result = ComparisonResult() + + traditional_files = get_all_html_files(traditional_dir) + db_files = get_all_html_files(db_dir) + + # Find files only in one directory + result.only_in_traditional = sorted(traditional_files - db_files) + result.only_in_db = sorted(db_files - traditional_files) + + # Build anchor indices for both directories (for link validation) + if verbose: + print("Building anchor index for traditional docs...", file=sys.stderr) + valid_targets1 = build_anchor_index(traditional_dir, verbose) + if verbose: + print("Building anchor index for DB-generated docs...", file=sys.stderr) + valid_targets2 = build_anchor_index(db_dir, verbose) + + # Compare common files + common_files = sorted(traditional_files & db_files) + total = len(common_files) + + for i, rel_path in enumerate(common_files, 1): + if verbose: + print(f"\rComparing {i}/{total}: {rel_path[:60]:<60}", end="", file=sys.stderr) + + file1 = traditional_dir / rel_path + file2 = db_dir / rel_path + + try: + are_equal, diff_summary = compare_html_files( + file1, file2, traditional_dir, db_dir, rel_path, + valid_targets1, valid_targets2 + ) + if are_equal: + result.identical.append(rel_path) + else: + result.different.append((rel_path, diff_summary)) + except Exception as e: + result.errors.append((rel_path, str(e))) + + if verbose: + print("\r" + " " * 80 + "\r", end="", file=sys.stderr) + + return result + + +def print_report(result: ComparisonResult, show_identical: bool = False): + """Print a comparison report.""" + print("=" * 70) + print("HTML Documentation Comparison Report") + print("=" * 70) + print() + + # Summary + total_files = ( + len(result.identical) + + len(result.different) + + len(result.only_in_traditional) + + len(result.only_in_db) + ) + print(f"Total files examined: {total_files}") + print(f" Identical: {len(result.identical)}") + print(f" Different: {len(result.different)}") + print(f" Only in traditional: {len(result.only_in_traditional)}") + print(f" Only in DB-generated: {len(result.only_in_db)}") + print(f" Errors: {len(result.errors)}") + print() + + # Files only in traditional + if result.only_in_traditional: + print("-" * 70) + print(f"Files only in traditional ({len(result.only_in_traditional)}):") + for f in result.only_in_traditional[:20]: + print(f" {f}") + if len(result.only_in_traditional) > 20: + print(f" ... and {len(result.only_in_traditional) - 20} more") + print() + + # Files only in DB-generated + if result.only_in_db: + print("-" * 70) + print(f"Files only in DB-generated ({len(result.only_in_db)}):") + for f in result.only_in_db[:20]: + print(f" {f}") + if len(result.only_in_db) > 20: + print(f" ... and {len(result.only_in_db) - 20} more") + print() + + # Different files + if result.different: + print("-" * 70) + print(f"Different files ({len(result.different)}):") + for f, summary in result.different[:50]: + print(f" {f}: {summary}") + if len(result.different) > 50: + print(f" ... and {len(result.different) - 50} more") + print() + + # Errors + if result.errors: + print("-" * 70) + print(f"Errors ({len(result.errors)}):") + for f, err in result.errors[:20]: + print(f" {f}: {err}") + if len(result.errors) > 20: + print(f" ... and {len(result.errors) - 20} more") + print() + + # Identical files (if requested) + if show_identical and result.identical: + print("-" * 70) + print(f"Identical files ({len(result.identical)}):") + for f in result.identical[:50]: + print(f" {f}") + if len(result.identical) > 50: + print(f" ... and {len(result.identical) - 50} more") + print() + + # Final verdict + print("=" * 70) + if not result.different and not result.only_in_traditional and not result.only_in_db and not result.errors: + print("PASS: All files are identical!") + return 0 + else: + print("FAIL: Differences found") + return 1 + + +def show_diff(traditional_dir: Path, db_dir: Path, rel_path: str): + """Show detailed diff for a specific file.""" + file1 = traditional_dir / rel_path + file2 = db_dir / rel_path + + if not file1.exists(): + print(f"File not found in traditional: {rel_path}") + return + if not file2.exists(): + print(f"File not found in DB-generated: {rel_path}") + return + + # Build anchor indices for link validation + print("Building anchor indices...", file=sys.stderr) + valid_targets1 = build_anchor_index(traditional_dir) + valid_targets2 = build_anchor_index(db_dir) + + content1 = file1.read_text(encoding='utf-8') + content2 = file2.read_text(encoding='utf-8') + + tidy1 = tidy_html(content1) or content1 + tidy2 = tidy_html(content2) or content2 + + rel_path_obj = Path(rel_path) + + # Check for broken links in DB version + broken_links = find_broken_links(tidy2, db_dir, db_dir / rel_path_obj, valid_targets2) + if broken_links: + print(f"WARNING: DB has {len(broken_links)} broken link(s):", file=sys.stderr) + for link in broken_links[:10]: + print(f" {link}", file=sys.stderr) + + # Check for link text mismatches + text_mismatches = find_link_text_mismatches( + tidy1, tidy2, + traditional_dir, db_dir, + traditional_dir / rel_path_obj, db_dir / rel_path_obj, + valid_targets1 + ) + if text_mismatches: + print(f"WARNING: {len(text_mismatches)} link text mismatch(es):", file=sys.stderr) + for mismatch in text_mismatches[:10]: + print(f" {mismatch}", file=sys.stderr) + + # Normalize both by stripping internal links + norm1 = normalize_html(tidy1, traditional_dir, traditional_dir / rel_path_obj, valid_targets1, strip_all_internal_links=True) + norm2 = normalize_html(tidy2, db_dir, db_dir / rel_path_obj, valid_targets2, strip_all_internal_links=True) + + diff = difflib.unified_diff( + norm1.splitlines(keepends=True), + norm2.splitlines(keepends=True), + fromfile=f'traditional/{rel_path}', + tofile=f'db-generated/{rel_path}', + n=3, + ) + + for line in diff: + print(line, end='') + + +def main(): + parser = argparse.ArgumentParser( + description="Compare HTML documentation output between traditional and DB-generated docs." + ) + parser.add_argument( + "traditional_dir", + nargs="?", + default=".lake/build/doc", + help="Directory containing traditional docs (default: .lake/build/doc)", + ) + parser.add_argument( + "db_dir", + nargs="?", + default=".lake/build/doc-from-db/doc", + help="Directory containing DB-generated docs (default: .lake/build/doc-from-db/doc)", + ) + parser.add_argument( + "-v", "--verbose", + action="store_true", + help="Show progress during comparison", + ) + parser.add_argument( + "--show-identical", + action="store_true", + help="Also list identical files in the report", + ) + parser.add_argument( + "--diff", + metavar="FILE", + help="Show detailed diff for a specific file (relative path)", + ) + + args = parser.parse_args() + + traditional_dir = Path(args.traditional_dir) + db_dir = Path(args.db_dir) + + if not traditional_dir.exists(): + print(f"Error: Traditional docs directory not found: {traditional_dir}", file=sys.stderr) + sys.exit(1) + + if not db_dir.exists(): + print(f"Error: DB-generated docs directory not found: {db_dir}", file=sys.stderr) + sys.exit(1) + + if args.diff: + show_diff(traditional_dir, db_dir, args.diff) + return + + result = compare_directories(traditional_dir, db_dir, verbose=args.verbose) + exit_code = print_report(result, show_identical=args.show_identical) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/scripts/compare_pre_post.py b/scripts/compare_pre_post.py new file mode 100644 index 00000000..a2a3c7a3 --- /dev/null +++ b/scripts/compare_pre_post.py @@ -0,0 +1,1224 @@ +# /// script +# dependencies = ["lxml", "xmldiff"] +# /// +""" +Compare two directories of HTML documentation output, producing tree-based diffs +and filtering for allowed vs non-allowed differences. +""" + +import argparse +import re +import sys +from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import dataclass +from functools import partial +from pathlib import Path +from typing import Callable, Iterator + +from lxml import html +from lxml.html import HtmlElement +from xmldiff import main as xmldiff_main +from xmldiff.actions import ( + DeleteAttrib, + DeleteNode, + InsertAttrib, + InsertNode, + MoveNode, + RenameAttrib, + RenameNode, + UpdateAttrib, + UpdateTextAfter, + UpdateTextIn, +) + + +# ============================================================================= +# LINK TARGET COLLECTION +# ============================================================================= + + +@dataclass +class LinkTargets: + """Valid link targets in a directory.""" + + files: set[Path] # All HTML files (relative paths) + ids: dict[Path, set[str]] # file -> set of IDs in that file + + def has_target(self, href: str, from_file: Path) -> bool: + """Check if a link href has a valid target.""" + if not href or href.startswith(("http://", "https://", "mailto:", "javascript:")): + return True # External links assumed valid + + # Parse href into file and fragment parts + if "#" in href: + file_part, fragment = href.split("#", 1) + else: + file_part, fragment = href, None + + # Determine target file + if file_part: + # Relative file link + target_file = (from_file.parent / file_part).resolve() + # Normalize to relative path + try: + # Find matching file in our set + for f in self.files: + if f.resolve() == target_file or str(f) == file_part: + target_file = f + break + else: + # Try resolving relative to from_file + rel_target = from_file.parent / file_part + if rel_target in self.files: + target_file = rel_target + else: + return False # File not found + except Exception: + return False + else: + # Same-file anchor link + target_file = from_file + + # Check fragment if present + if fragment: + file_ids = self.ids.get(target_file, set()) + return fragment in file_ids + + return target_file in self.files + + +def _extract_ids(tree: HtmlElement) -> set[str]: + """Extract all id attribute values from an HTML tree.""" + ids = set() + try: + for elem in tree.xpath("//*[@id]"): + if isinstance(elem, HtmlElement): + id_val = elem.get("id") + if id_val: + ids.add(id_val) + except Exception: + pass + return ids + + +def collect_link_targets(directory: Path, files: set[Path]) -> LinkTargets: + """Collect all valid link targets from HTML files in a directory.""" + ids: dict[Path, set[str]] = {} + + for rel_path in files: + file_path = directory / rel_path + try: + with open(file_path, "rb") as f: + tree = html.parse(f).getroot() + if tree is not None: + ids[rel_path] = _extract_ids(tree) + except Exception: + ids[rel_path] = set() + + return LinkTargets(files=files, ids=ids) + + +# ============================================================================= +# RULE SYSTEM +# ============================================================================= + + +@dataclass +class DiffContext: + """Context for evaluating whether a difference is allowed.""" + + action: object # xmldiff action + old_tree: HtmlElement | None + new_tree: HtmlElement | None + old_dir: Path + new_dir: Path + rel_path: Path # relative path of the file being compared + old_targets: LinkTargets | None = None # Valid link targets in old directory + new_targets: LinkTargets | None = None # Valid link targets in new directory + + +class Rule: + """Base class for composable diff rules.""" + + def __init__( + self, + predicate: Callable[[DiffContext], bool], + reason: str | None = None, + ): + self._predicate = predicate + self._reason = reason + + def __call__(self, ctx: DiffContext) -> bool: + return self._predicate(ctx) + + def check_with_reason(self, ctx: DiffContext) -> str | None: + """Check if rule matches and return the reason if it does.""" + if self._predicate(ctx): + return self._reason or "unknown reason" + return None + + def __and__(self, other: "Rule") -> "Rule": + def combined(ctx: DiffContext) -> bool: + return self(ctx) and other(ctx) + # Use the more specific rule's reason (the second one) + return Rule(combined, other._reason or self._reason) + + def __or__(self, other: "Rule") -> "Rule": + def combined(ctx: DiffContext) -> bool: + return self(ctx) or other(ctx) + # For OR, we need to check which one matched + combined_rule = Rule(combined) + def get_reason(ctx: DiffContext) -> str | None: + if self(ctx): + return self._reason + if other(ctx): + return other._reason + return None + combined_rule.check_with_reason = get_reason # type: ignore + return combined_rule + + def __invert__(self) -> "Rule": + return Rule(lambda ctx: not self(ctx), f"not {self._reason}" if self._reason else None) + + def with_reason(self, reason: str) -> "Rule": + """Return a copy of this rule with the given reason.""" + return Rule(self._predicate, reason) + + +def make_rule(predicate: Callable[[DiffContext], bool], reason: str | None = None) -> Rule: + """Create a rule from a predicate function.""" + return Rule(predicate, reason) + + +# ----------------------------------------------------------------------------- +# Base predicates +# ----------------------------------------------------------------------------- + + +def _is_deletion(ctx: DiffContext) -> bool: + """Check if action is a deletion.""" + return isinstance(ctx.action, (DeleteNode, DeleteAttrib)) + + +def _is_insertion(ctx: DiffContext) -> bool: + """Check if action is an insertion.""" + return isinstance(ctx.action, (InsertNode, InsertAttrib)) + + +def _is_text_change(ctx: DiffContext) -> bool: + """Check if action is a text change.""" + return isinstance(ctx.action, (UpdateTextIn, UpdateTextAfter)) + + +is_deletion = Rule(_is_deletion, "deletion") +is_insertion = Rule(_is_insertion, "insertion") +is_text_change = Rule(_is_text_change, "text change") + + +def _get_node_by_xpath(tree: HtmlElement | None, xpath: str) -> HtmlElement | None: + """Get a node from the tree by xpath.""" + if tree is None: + return None + try: + results = tree.xpath(xpath) + if results and len(results) > 0: + return results[0] + except Exception: + pass + return None + + +def _get_action_node(ctx: DiffContext) -> HtmlElement | None: + """Get the node affected by the action.""" + action = ctx.action + tree = ctx.old_tree if _is_deletion(ctx) else ctx.new_tree + + if tree is None: + return None + + node_path = None + if hasattr(action, "node"): + node_path = action.node + elif hasattr(action, "target"): + node_path = action.target + + if node_path is None: + return None + + return _get_node_by_xpath(tree, node_path) + + +def _whitespace_only(ctx: DiffContext) -> bool: + """Check if the change is whitespace-only.""" + action = ctx.action + + if isinstance(action, UpdateTextIn): + # For UpdateTextIn, we need to compare with what was there before + # The action contains the new text; we need to check if normalizing both + # old and new would result in the same content + if ctx.old_tree is None: + return False + node = _get_node_by_xpath(ctx.old_tree, action.node) + if node is None: + return False + old_content = node.text or "" + new_content = action.text or "" + return old_content.strip() == new_content.strip() + + if isinstance(action, UpdateTextAfter): + if ctx.old_tree is None: + return False + node = _get_node_by_xpath(ctx.old_tree, action.node) + if node is None: + return False + old_content = node.tail or "" + new_content = action.text or "" + return old_content.strip() == new_content.strip() + + return False + + +whitespace_only = Rule(_whitespace_only, "whitespace-only change") + + +def _is_no_change(ctx: DiffContext) -> bool: + """Check if this is a false-positive change where old and new values are identical.""" + action = ctx.action + + if isinstance(action, UpdateAttrib): + old_value = _get_attrib(ctx.old_tree, action.node, action.name) + new_value = action.value + return old_value == new_value + + if isinstance(action, InsertAttrib): + # If attribute already exists with same value in old tree, it's not a real change + old_value = _get_attrib(ctx.old_tree, action.node, action.name) + return old_value == action.value + + if isinstance(action, DeleteAttrib): + # If attribute doesn't exist in old tree, it's not a real change + old_value = _get_attrib(ctx.old_tree, action.node, action.name) + return old_value is None + + if isinstance(action, (UpdateTextIn, UpdateTextAfter)): + if isinstance(action, UpdateTextIn): + old_value = _get_element_text(ctx.old_tree, action.node) + else: + old_value = _get_element_tail(ctx.old_tree, action.node) + new_value = action.text + return old_value == new_value + + return False + + +no_actual_change = Rule(_is_no_change, "no actual change") + + +def _is_broken_link(ctx: DiffContext) -> bool: + """ + Check if the deleted element is a link that was already broken in the old version. + A link removal is allowed if the link didn't have a valid target in the old directory. + Only applies to deletions. + """ + if not _is_deletion(ctx): + return False + + node = _get_action_node(ctx) + if node is None: + return False + + # Check if it's an tag or contains one + links = [] + if node.tag == "a": + links = [node] + else: + links = node.xpath(".//a[@href]") + + if not links: + return False + + # If we have pre-collected targets, use them + if ctx.old_targets is not None: + for link in links: + href = link.get("href", "") + if not href: + continue + # Check if this link had a valid target in the old directory + if not ctx.old_targets.has_target(href, ctx.rel_path): + return True # Link was broken in old version + return False + + # Fallback: check filesystem directly (less accurate) + for link in links: + href = link.get("href", "") + if not href: + continue + + # Skip external links + if href.startswith(("http://", "https://", "mailto:", "javascript:")): + continue + + # Check if the target exists in old directory + if href.startswith("#"): + # Anchor link - check if ID exists in the old document + anchor_id = href[1:] + if ctx.old_tree is not None: + found = ctx.old_tree.xpath(f'//*[@id="{anchor_id}"]') + if not found: + return True # Broken anchor link + else: + # File link - check if file exists in old directory + file_part = href.split("#")[0] + if file_part: + target_path = ctx.old_dir / ctx.rel_path.parent / file_part + if not target_path.exists(): + return True # Broken file link + + return False + + +broken_link = Rule(_is_broken_link, "broken link in old version") + + +def _is_broken_link_href_change(ctx: DiffContext) -> bool: + """ + Check if this is a change to an href attribute where the old href was broken. + Allows fixing broken links by changing their href to a valid target. + """ + action = ctx.action + + # Only applies to UpdateAttrib on href + if not isinstance(action, UpdateAttrib): + return False + if action.name != "href": + return False + + # Get the old href value + old_href = _get_attrib(ctx.old_tree, action.node, "href") + if not old_href: + return False + + # Check if old href was broken + if ctx.old_targets is not None: + # Use pre-collected targets + return not ctx.old_targets.has_target(old_href, ctx.rel_path) + else: + # Fallback: check filesystem + if old_href.startswith(("http://", "https://", "mailto:", "javascript:")): + return False + if old_href.startswith("#"): + anchor_id = old_href[1:] + if ctx.old_tree is not None: + found = ctx.old_tree.xpath(f'//*[@id="{anchor_id}"]') + return not found + else: + file_part = old_href.split("#")[0] + if file_part: + target_path = ctx.old_dir / ctx.rel_path.parent / file_part + return not target_path.exists() + return False + + +broken_link_href_change = Rule(_is_broken_link_href_change, "href change on broken link") + + +def in_context(selector: str) -> Rule: + """ + Create a rule that checks if the affected node is within a given context. + Selector format: "tag.class" or "tag" or ".class" + """ + + def check_context(ctx: DiffContext) -> bool: + node = _get_action_node(ctx) + if node is None: + return False + + # Parse selector + parts = selector.split(".") + tag = parts[0] if parts[0] else None + classes = parts[1:] if len(parts) > 1 else [] + + # Walk up the ancestor chain + current = node.getparent() + while current is not None: + if not isinstance(current, HtmlElement): + current = current.getparent() if hasattr(current, "getparent") else None + continue + + # Check tag match + if tag and current.tag != tag: + current = current.getparent() + continue + + # Check class match + if classes: + node_classes = (current.get("class") or "").split() + if not all(c in node_classes for c in classes): + current = current.getparent() + continue + + return True + + return False + + return Rule(check_context) + + +# ============================================================================= +# ALLOWED DIFFERENCE RULES +# ============================================================================= + +# Declare all allowed-diff rules here for easy auditing +ALLOWED_RULES: list[Rule] = [ + # False positives where old == new + no_actual_change, + # Whitespace-only text changes are always allowed + whitespace_only, + # Broken links may be removed + (is_deletion & broken_link).with_reason("removal of broken link"), + # Broken link hrefs may be changed + broken_link_href_change, +] + + +def check_allowed_difference(ctx: DiffContext) -> str | None: + """Check if a difference is allowed and return the reason if so.""" + for rule in ALLOWED_RULES: + reason = rule.check_with_reason(ctx) + if reason: + return reason + return None + + +def is_allowed_difference(ctx: DiffContext) -> bool: + """Check if a difference is allowed by any of the rules.""" + return check_allowed_difference(ctx) is not None + + +# ============================================================================= +# DIRECTORY COMPARISON +# ============================================================================= + + +def find_html_files(directory: Path) -> set[Path]: + """Find all .html files in a directory, returning relative paths.""" + return {p.relative_to(directory) for p in directory.rglob("*.html")} + + +@dataclass +class FileCategories: + """Categorization of files between two directories.""" + + only_in_old: set[Path] + only_in_new: set[Path] + in_both: set[Path] + + +def categorize_files(old_dir: Path, new_dir: Path) -> FileCategories: + """Categorize files as only-in-old, only-in-new, or in-both.""" + old_files = find_html_files(old_dir) + new_files = find_html_files(new_dir) + + return FileCategories( + only_in_old=old_files - new_files, + only_in_new=new_files - old_files, + in_both=old_files & new_files, + ) + + +def parse_html(path: Path) -> HtmlElement | None: + """Parse an HTML file into a tree.""" + try: + with open(path, "rb") as f: + return html.parse(f).getroot() + except Exception as e: + print(f"Warning: Failed to parse {path}: {e}", file=sys.stderr) + return None + + +def _convert_moves_to_delete_insert( + actions: list, old_tree: HtmlElement, new_tree: HtmlElement +) -> list: + """Convert MoveNode actions to DeleteNode + InsertNode pairs. + + xmldiff detects moves even when nodes are far apart in the document, + which isn't semantically meaningful for our use case. Converting to + delete+insert makes the diff easier to understand. + """ + result = [] + for action in actions: + if isinstance(action, MoveNode): + # Get the tag of the moved node from the new tree + node = _get_node_by_xpath(new_tree, action.node) + tag = node.tag if node is not None else "unknown" + + # Create DeleteNode for the old location + result.append(DeleteNode(node=action.node)) + + # Create InsertNode for the new location + result.append(InsertNode(target=action.target, tag=tag, position=action.position)) + else: + result.append(action) + return result + + +def _xpath_sort_key(xpath: str) -> list[tuple[str, int]]: + """Convert xpath to a sortable key with proper numeric ordering. + + E.g., "/html/body/div[10]/p[2]" -> [("html", 1), ("body", 1), ("div", 10), ("p", 2)] + """ + result = [] + # Split by / and parse each segment + for segment in xpath.split("/"): + if not segment: + continue + # Match tag name and optional index + match = re.match(r"([a-zA-Z0-9_-]+)(?:\[(\d+)\])?", segment) + if match: + tag = match.group(1) + idx = int(match.group(2)) if match.group(2) else 1 + result.append((tag, idx)) + return result + + +def _get_action_xpath(action: object) -> str: + """Get the primary xpath from an action for sorting purposes.""" + if isinstance(action, InsertNode): + return action.target + elif hasattr(action, "node"): + return action.node + return "" + + +def _sort_actions_by_xpath(actions: list) -> list: + """Sort actions by their xpath position in document order.""" + return sorted(actions, key=lambda a: _xpath_sort_key(_get_action_xpath(a))) + + +@dataclass +class FormattedDiff: + """A formatted difference for display.""" + description: str # Multi-line formatted description + reason: str | None = None # Why this diff is allowed (if applicable) + + +@dataclass +class FileDiff: + """Differences found in a single file.""" + + rel_path: Path + allowed: list[FormattedDiff] # Allowed differences (formatted) + not_allowed: list[FormattedDiff] # Non-allowed differences (formatted) + parse_error: bool = False + + +def diff_file( + old_dir: Path, + new_dir: Path, + old_targets: LinkTargets | None, + new_targets: LinkTargets | None, + rel_path: Path, +) -> FileDiff: + """Compute differences for a single file.""" + old_path = old_dir / rel_path + new_path = new_dir / rel_path + + old_tree = parse_html(old_path) + new_tree = parse_html(new_path) + + if old_tree is None or new_tree is None: + return FileDiff(rel_path=rel_path, allowed=[], not_allowed=[], parse_error=True) + + try: + actions = xmldiff_main.diff_trees(old_tree, new_tree) + except Exception as e: + print(f"Warning: Failed to diff {rel_path}: {e}", file=sys.stderr) + return FileDiff(rel_path=rel_path, allowed=[], not_allowed=[], parse_error=True) + + # Convert MoveNode to DeleteNode + InsertNode for clearer semantics + actions = _convert_moves_to_delete_insert(actions, old_tree, new_tree) + + # Sort by xpath for consistent document-order output + actions = _sort_actions_by_xpath(actions) + + allowed = [] + not_allowed = [] + + for action in actions: + ctx = DiffContext( + action=action, + old_tree=old_tree, + new_tree=new_tree, + old_dir=old_dir, + new_dir=new_dir, + rel_path=rel_path, + old_targets=old_targets, + new_targets=new_targets, + ) + + # Format the action while we have the trees + # Check if this diff is allowed and get the reason + reason = check_allowed_difference(ctx) + formatted = FormattedDiff( + description=format_action(action, old_tree, new_tree), + reason=reason, + ) + + if reason: + allowed.append(formatted) + else: + not_allowed.append(formatted) + + return FileDiff(rel_path=rel_path, allowed=allowed, not_allowed=not_allowed) + + +def diff_all_files( + old_dir: Path, + new_dir: Path, + files: set[Path], + old_targets: LinkTargets | None = None, + new_targets: LinkTargets | None = None, +) -> Iterator[FileDiff]: + """Diff all files in parallel, yielding results in sorted order as available.""" + sorted_files = sorted(files) + if not sorted_files: + return + + diff_fn = partial(diff_file, old_dir, new_dir, old_targets, new_targets) + file_to_idx = {f: i for i, f in enumerate(sorted_files)} + results: list[FileDiff | None] = [None] * len(sorted_files) + next_to_yield = 0 + + with ProcessPoolExecutor() as executor: + futures = {executor.submit(diff_fn, f): f for f in sorted_files} + + for future in as_completed(futures): + f = futures[future] + idx = file_to_idx[f] + results[idx] = future.result() + + # Yield all consecutive ready results + while next_to_yield < len(results) and results[next_to_yield] is not None: + yield results[next_to_yield] # type: ignore + results[next_to_yield] = None # Free memory + next_to_yield += 1 + + +# ============================================================================= +# OUTPUT +# ============================================================================= + + +def _get_parent_xpath(xpath: str, levels: int = 1) -> str: + """Get the xpath of an ancestor by going up N levels.""" + parts = xpath.rsplit("/", levels) + return parts[0] if parts[0] else "/" + + +def _render_tree_summary( + elem: HtmlElement, + max_depth: int = 2, + max_children: int = 3, + indent: int = 0, +) -> list[str]: + """Render a tree structure as HTML-like output.""" + lines = [] + prefix = " " * indent + + # Build opening tag with all attributes + tag = elem.tag + attr_strs = [] + for attr, value in elem.attrib.items(): + attr_strs.append(f'{attr}="{value}"') + + if attr_strs: + open_tag = f"<{tag} {' '.join(attr_strs)}>" + else: + open_tag = f"<{tag}>" + + # Get text content (preserve whitespace to show real differences) + text = elem.text or "" + children = [c for c in elem if isinstance(c, HtmlElement)] + + # Format based on content + if not children and not text: + # Empty element + lines.append(f"{prefix}{open_tag}") + elif not children: + # Text-only element + truncated = _truncate_text(text, 60) + lines.append(f"{prefix}{open_tag}{truncated}") + else: + # Element with children + lines.append(f"{prefix}{open_tag}") + if text.strip(): + # Text before first child (only show if non-whitespace) + truncated = _truncate_text(text, 60) + lines.append(f"{prefix} {truncated}") + + # Recurse into children (limited) + if max_depth > 0: + for child in children[:max_children]: + lines.extend(_render_tree_summary(child, max_depth - 1, max_children, indent + 1)) + if len(children) > max_children: + lines.append(f"{prefix} ... ({len(children) - max_children} more)") + else: + lines.append(f"{prefix} ...") + + lines.append(f"{prefix}") + + return lines + + +def _get_element_html(tree: HtmlElement | None, xpath: str, max_len: int = 200) -> str | None: + """Get the HTML string for an element at the given xpath.""" + if tree is None: + return None + try: + results = tree.xpath(xpath) + if results and len(results) > 0: + elem = results[0] + if isinstance(elem, HtmlElement): + html_str = html.tostring(elem, encoding="unicode") + if len(html_str) > max_len: + html_str = html_str[:max_len] + "..." + return html_str + else: + # Text node or other + return repr(elem)[:max_len] + except Exception: + pass + return None + + +def _get_element_text(tree: HtmlElement | None, xpath: str) -> str | None: + """Get the text content of an element at the given xpath.""" + if tree is None: + return None + try: + results = tree.xpath(xpath) + if results and len(results) > 0: + elem = results[0] + if isinstance(elem, HtmlElement): + return elem.text + return str(elem) + except Exception: + pass + return None + + +def _get_element_tail(tree: HtmlElement | None, xpath: str) -> str | None: + """Get the tail text of an element at the given xpath.""" + if tree is None: + return None + try: + results = tree.xpath(xpath) + if results and len(results) > 0: + elem = results[0] + if isinstance(elem, HtmlElement): + return elem.tail + except Exception: + pass + return None + + +def _get_attrib(tree: HtmlElement | None, xpath: str, attr: str) -> str | None: + """Get an attribute value from an element.""" + if tree is None: + return None + try: + results = tree.xpath(xpath) + if results and len(results) > 0: + elem = results[0] + if isinstance(elem, HtmlElement): + return elem.get(attr) + except Exception: + pass + return None + + +def _truncate_text(s: str, max_len: int = 80) -> str: + """Truncate text intelligently for display. + + - If no whitespace, don't truncate + - If truncation needed, break at first whitespace after max_len + """ + if len(s) <= max_len: + return s + # If no whitespace, don't truncate + if " " not in s and "\t" not in s and "\n" not in s: + return s + # Find first whitespace after max_len + for i in range(max_len, len(s)): + if s[i] in " \t\n": + return s[:i] + "..." + # No whitespace found after max_len, return full string + return s + + +def _truncate(s: str | None, max_len: int = 100) -> str: + """Truncate a string for display.""" + if s is None: + return "" + return _truncate_text(s, max_len) + return s + + +def _add_element_context( + lines: list[str], + old_tree: HtmlElement | None, + new_tree: HtmlElement | None, + xpath: str, +) -> None: + """Add before/after tree context for an element at xpath.""" + # Get parent xpath for context (1 level up) + parent_xpath = _get_parent_xpath(xpath, 1) + + # Show old tree context + if old_tree is not None: + try: + parents = old_tree.xpath(parent_xpath) + if parents and isinstance(parents[0], HtmlElement): + lines.append(" Before:") + for line in _render_tree_summary(parents[0], max_depth=1, max_children=3): + lines.append(f" - {line}") + except Exception: + pass + + # Show new tree context + if new_tree is not None: + try: + parents = new_tree.xpath(parent_xpath) + if parents and isinstance(parents[0], HtmlElement): + lines.append(" After:") + for line in _render_tree_summary(parents[0], max_depth=1, max_children=3): + lines.append(f" + {line}") + except Exception: + pass + + +def format_action( + action: object, + old_tree: HtmlElement | None = None, + new_tree: HtmlElement | None = None, +) -> str: + """Format a diff action for display with before/after content.""" + action_type = type(action).__name__ + lines = [] + + if isinstance(action, DeleteNode): + xpath = action.node + old_html = _get_element_html(old_tree, xpath) + lines.append(f"{action_type} at {xpath}") + if old_html: + lines.append(f" - {_truncate(old_html)}") + + elif isinstance(action, InsertNode): + target = action.target + tag = action.tag + position = action.position + lines.append(f"{action_type}: <{tag}> into {target} at position {position}") + + # Show the parent context with siblings around the insertion point + if new_tree is not None: + try: + parents = new_tree.xpath(target) + if parents and isinstance(parents[0], HtmlElement): + parent = parents[0] + children = [c for c in parent if isinstance(c, HtmlElement)] + + # Build context showing siblings around insertion + lines.append(" Inserted into:") + + # Show parent opening tag + attr_strs = [f'{a}="{v}"' for a, v in parent.attrib.items()] + if attr_strs: + lines.append(f" <{parent.tag} {' '.join(attr_strs)}>") + else: + lines.append(f" <{parent.tag}>") + + # Show elided content before + if position > 1: + lines.append(f" ... ({position - 1} siblings)") + + # Show left sibling if exists + if position > 0 and position - 1 < len(children): + for cl in _render_tree_summary(children[position - 1], max_depth=0): + lines.append(f" {cl}") + + # Show inserted element (highlighted) + if position < len(children): + for cl in _render_tree_summary(children[position], max_depth=1): + lines.append(f" >>> {cl}") + + # Show right sibling if exists + if position + 1 < len(children): + for cl in _render_tree_summary(children[position + 1], max_depth=0): + lines.append(f" {cl}") + + # Show elided content after + remaining = len(children) - position - 2 + if remaining > 0: + lines.append(f" ... ({remaining} more siblings)") + + lines.append(f" ") + except Exception: + pass + + elif isinstance(action, MoveNode): + xpath = action.node + target = action.target + lines.append(f"{action_type}: {xpath} -> {target}") + + # Show context from old tree (parent of moved node) + old_parent_xpath = _get_parent_xpath(xpath, 1) + if old_tree is not None: + try: + old_parents = old_tree.xpath(old_parent_xpath) + if old_parents and isinstance(old_parents[0], HtmlElement): + lines.append(" Before:") + for line in _render_tree_summary(old_parents[0], max_depth=2): + lines.append(f" - {line}") + except Exception: + pass + + # Show context from new tree (target location) + new_parent_xpath = _get_parent_xpath(target, 1) if "/" in target else target + if new_tree is not None: + try: + new_parents = new_tree.xpath(new_parent_xpath) + if new_parents and isinstance(new_parents[0], HtmlElement): + lines.append(" After:") + for line in _render_tree_summary(new_parents[0], max_depth=2): + lines.append(f" + {line}") + except Exception: + pass + + elif isinstance(action, UpdateTextIn): + xpath = action.node + old_text = _get_element_text(old_tree, xpath) + new_text = action.text or "" + lines.append(f"{action_type} at {xpath}") + lines.append(f" text: {_truncate(repr(old_text))} -> {_truncate(repr(new_text))}") + # Show element context + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, UpdateTextAfter): + xpath = action.node + old_text = _get_element_tail(old_tree, xpath) + new_text = action.text or "" + lines.append(f"{action_type} at {xpath}") + lines.append(f" tail: {_truncate(repr(old_text))} -> {_truncate(repr(new_text))}") + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, InsertAttrib): + xpath = action.node + attr = action.name + value = action.value + lines.append(f"{action_type} at {xpath}") + lines.append(f" + @{attr}={_truncate(repr(value))}") + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, DeleteAttrib): + xpath = action.node + attr = action.name + old_value = _get_attrib(old_tree, xpath, attr) + lines.append(f"{action_type} at {xpath}") + lines.append(f" - @{attr}={_truncate(repr(old_value))}") + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, UpdateAttrib): + xpath = action.node + attr = action.name + old_value = _get_attrib(old_tree, xpath, attr) + new_value = action.value + lines.append(f"{action_type} at {xpath}") + lines.append(f" @{attr}: {_truncate(repr(old_value))} -> {_truncate(repr(new_value))}") + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, RenameAttrib): + xpath = action.node + old_name = action.name + new_name = action.new_name if hasattr(action, "new_name") else "?" + lines.append(f"{action_type} at {xpath}") + lines.append(f" @{old_name} -> @{new_name}") + _add_element_context(lines, old_tree, new_tree, xpath) + + elif isinstance(action, RenameNode): + xpath = action.node + new_tag = action.tag + lines.append(f"{action_type} at {xpath} -> <{new_tag}>") + _add_element_context(lines, old_tree, new_tree, xpath) + + else: + lines.append(str(action)) + + return "\n".join(lines) + + +@dataclass +class Summary: + """Summary of comparison results.""" + + only_in_old: int + only_in_new: int + identical: int + allowed_only: int + has_real_diff: int + parse_errors: int + + +def print_file_diff(diff: FileDiff, verbose: bool) -> None: + """Print details for a single file diff.""" + print(f"\n--- {diff.rel_path} ---") + + if diff.parse_error: + print(" [PARSE ERROR]") + return + + if diff.not_allowed: + print(f" Non-allowed differences ({len(diff.not_allowed)}):") + for fd in diff.not_allowed[:10]: # Limit output + # Indent multi-line output + indented = "\n".join(" " + line for line in fd.description.split("\n")) + print(f" ! {indented.strip()}") + if len(diff.not_allowed) > 10: + print(f" ... and {len(diff.not_allowed) - 10} more") + + if verbose and diff.allowed: + print(f" Allowed differences ({len(diff.allowed)}):") + for fd in diff.allowed[:10]: # Limit output + reason_str = f" [{fd.reason}]" if fd.reason else "" + indented = "\n".join(" " + line for line in fd.description.split("\n")) + print(f" ~ {indented.strip()}{reason_str}") + if len(diff.allowed) > 10: + print(f" ... and {len(diff.allowed) - 10} more") + + +def print_results( + categories: FileCategories, + diffs: Iterator[FileDiff], + verbose: bool, +) -> Summary: + """Stream file diffs as they arrive, then print summary at the end.""" + identical = 0 + allowed_only = 0 + has_real_diff = 0 + parse_errors = 0 + printed_header = False + + # Stream file details as they arrive + for diff in diffs: + # Categorize and count + if diff.parse_error: + parse_errors += 1 + should_print = True + elif not diff.allowed and not diff.not_allowed: + identical += 1 + should_print = False + elif diff.not_allowed: + has_real_diff += 1 + should_print = True + else: + allowed_only += 1 + should_print = verbose + + # Print if needed + if should_print: + if not printed_header: + print("FILE DETAILS:") + printed_header = True + print_file_diff(diff, verbose) + + # Print files only in old/new + if categories.only_in_old: + print("\nFILES ONLY IN OLD:") + for f in sorted(categories.only_in_old): + print(f" - {f}") + + if categories.only_in_new: + print("\nFILES ONLY IN NEW:") + for f in sorted(categories.only_in_new): + print(f" + {f}") + + # Print summary at the end + print("\n" + "=" * 60) + print("COMPARISON SUMMARY") + print("=" * 60) + print(f"Files only in old: {len(categories.only_in_old)}") + print(f"Files only in new: {len(categories.only_in_new)}") + print(f"Files in both: {len(categories.in_both)}") + print("-" * 40) + print(f" Identical: {identical}") + print(f" Allowed diffs: {allowed_only}") + print(f" Real diffs: {has_real_diff}") + print(f" Parse errors: {parse_errors}") + print("=" * 60) + + return Summary( + only_in_old=len(categories.only_in_old), + only_in_new=len(categories.only_in_new), + identical=identical, + allowed_only=allowed_only, + has_real_diff=has_real_diff, + parse_errors=parse_errors, + ) + + +# ============================================================================= +# MAIN +# ============================================================================= + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Compare two directories of HTML documentation output" + ) + parser.add_argument("old_dir", type=Path, help="Path to old/original documentation") + parser.add_argument("new_dir", type=Path, help="Path to new documentation") + parser.add_argument( + "-v", "--verbose", action="store_true", + help="Show details including allowed differences" + ) + + args = parser.parse_args() + + old_dir: Path = args.old_dir.resolve() + new_dir: Path = args.new_dir.resolve() + + if not old_dir.is_dir(): + print(f"Error: {old_dir} is not a directory", file=sys.stderr) + return 1 + + if not new_dir.is_dir(): + print(f"Error: {new_dir} is not a directory", file=sys.stderr) + return 1 + + # Categorize files + categories = categorize_files(old_dir, new_dir) + + # Collect valid link targets from both directories + print("Collecting link targets...", file=sys.stderr) + old_files = find_html_files(old_dir) + new_files = find_html_files(new_dir) + old_targets = collect_link_targets(old_dir, old_files) + new_targets = collect_link_targets(new_dir, new_files) + print(f" Old: {len(old_files)} files, {sum(len(ids) for ids in old_targets.ids.values())} IDs", file=sys.stderr) + print(f" New: {len(new_files)} files, {sum(len(ids) for ids in new_targets.ids.values())} IDs", file=sys.stderr) + + # Diff files and stream results + diffs = diff_all_files(old_dir, new_dir, categories.in_both, old_targets, new_targets) + + # Print results (streams as diffs arrive, summary at end) + summary = print_results(categories, diffs, args.verbose) + + # Return exit code + if summary.has_real_diff > 0 or summary.parse_errors > 0: + return 1 + if summary.only_in_old > 0 or summary.only_in_new > 0: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 9af029370faeb29053102f036c6e564658c8018c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 30 Jan 2026 14:23:05 +0100 Subject: [PATCH 012/106] feat: save hashes for material in DB This is to serve as a basis for incremental builds of the DB with no HTML output in the future. --- DocGen4/DB.lean | 181 ++++++++++++++++++++--------------- DocGen4/Process/Analyze.lean | 6 +- DocGen4/Process/Base.lean | 56 +++++++++-- DocGen4/RenderedCode.lean | 4 +- Main.lean | 12 ++- 5 files changed, 165 insertions(+), 94 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 04e4e345..c09ab1ab 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -204,11 +204,18 @@ instance : SQLite.QueryParam VersoDocString := .asBlob end +/-- Hash of a module's input data, used for Lake trace integration. + Changes to module content cause changes to the hash file, triggering rebuilds of dependents. -/ +structure ModuleHash where + moduleName : String + hash : UInt64 + deriving Lean.ToJson, Lean.FromJson, Inhabited + def getDb (dbFile : System.FilePath) : IO SQLite := do -- SQLite atomically creates the DB file, and the schema and journal settings here are applied -- idempotently. This avoids DB creation race conditions. let db ← SQLite.openWith dbFile .readWriteCreate - db.exec "PRAGMA busy_timeout = 5000" + db.exec "PRAGMA busy_timeout = 60000" -- 60 seconds for parallel builds db.exec "PRAGMA journal_mode = WAL" db.exec "PRAGMA foreign_keys = ON" try @@ -307,6 +314,9 @@ CREATE TABLE IF NOT EXISTS internal_names ( FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); +-- Index for CASCADE deletes: when name_info rows are deleted, find matching internal_names +CREATE INDEX IF NOT EXISTS idx_internal_names_target ON internal_names(target_module, target_position); + CREATE TABLE IF NOT EXISTS constructors ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -316,6 +326,9 @@ CREATE TABLE IF NOT EXISTS constructors ( FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); +-- Index for CASCADE deletes on the second FK (type_position) +CREATE INDEX IF NOT EXISTS idx_constructors_type_pos ON constructors(module_name, type_position); + CREATE TABLE IF NOT EXISTS inductives ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -458,11 +471,14 @@ CREATE TABLE IF NOT EXISTS declaration_attrs ( ); "# -def withTableName (tableName : String) (act : IO α) : IO α := +def withDbContext (context : String) (act : IO α) : IO α := do + let ms ← IO.monoMsNow try act catch - | e => throw <| .userError s!"Exception while modifying `{tableName}`: {e.toString}" + | e => + let ms' ← IO.monoMsNow + throw <| .userError s!"Exception in `{context}` after {ms' - ms}ms: {e.toString}" structure DB where sqlite : SQLite @@ -533,28 +549,28 @@ instance : SQLite.QueryParam RenderedCode where def ensureDb (dbFile : System.FilePath) : IO DB := do let sqlite ← getDb dbFile let deleteModuleStmt ← sqlite.prepare "DELETE FROM modules WHERE name = ?" - let deleteModule modName := withTableName "modules" do + let deleteModule modName := withDbContext "write:delete:modules" do deleteModuleStmt.bind 1 modName run deleteModuleStmt let saveModuleStmt ← sqlite.prepare "INSERT INTO modules (name, source_url) VALUES (?, ?)" - let saveModule modName sourceUrl? := withTableName "modules" do + let saveModule modName sourceUrl? := withDbContext "write:insert:modules" do saveModuleStmt.bind 1 modName saveModuleStmt.bind 2 sourceUrl? run saveModuleStmt -- This is INSERT OR IGNORE because the module system often results in multiple imports of the same module (e.g. as meta) let saveImportStmt ← sqlite.prepare "INSERT OR IGNORE INTO module_imports (importer, imported) VALUES (?, ?)" - let saveImport modName imported := withTableName "module_imports" do + let saveImport modName imported := withDbContext "write:insert:module_imports" do saveImportStmt.bind 1 modName saveImportStmt.bind 2 imported.toString run saveImportStmt let saveMarkdownDocstringStmt ← sqlite.prepare "INSERT INTO markdown_docstrings (module_name, position, text) VALUES (?, ?, ?)" - let saveMarkdownDocstring modName position text := withTableName "markdown_docstrings" do + let saveMarkdownDocstring modName position text := withDbContext "write:insert:markdown_docstrings" do saveMarkdownDocstringStmt.bind 1 modName saveMarkdownDocstringStmt.bind 2 position saveMarkdownDocstringStmt.bind 3 text run saveMarkdownDocstringStmt let saveVersoDocstringStmt ← sqlite.prepare "INSERT INTO verso_docstrings (module_name, position, content) VALUES (?, ?, ?)" - let saveVersoDocstring modName position text := withTableName "verso_docstrings" do + let saveVersoDocstring modName position text := withDbContext "write:insert:verso_docstrings" do saveVersoDocstringStmt.bind 1 modName saveVersoDocstringStmt.bind 2 position saveVersoDocstringStmt.bind 3 text @@ -562,7 +578,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do let saveDeclarationRangeStmt ← sqlite.prepare "INSERT INTO declaration_ranges (module_name, position, start_line, start_column, start_utf16, end_line, end_column, end_utf16) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" - let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := withTableName "declaration_ranges" do + let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := withDbContext "write:insert:declaration_ranges" do saveDeclarationRangeStmt.bind 1 modName saveDeclarationRangeStmt.bind 2 position saveDeclarationRangeStmt.bind 3 declRange.pos.line @@ -575,7 +591,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?)" let saveArgStmt' ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" let saveAttrStmt' ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" - let saveInfo modName position kind (info : Process.Info) := withTableName "name_info" do + let saveInfo modName position kind (info : Process.Info) := withDbContext "write:insert:name_info" do saveInfoStmt.bind 1 modName saveInfoStmt.bind 2 position saveInfoStmt.bind 3 kind @@ -591,7 +607,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do -- Save args for h : j in 0...info.args.size do let arg := info.args[j] - withTableName "declaration_args" do + withDbContext "write:insert:declaration_args:info" do saveArgStmt'.bind 1 modName saveArgStmt'.bind 2 position saveArgStmt'.bind 3 j.toInt64 @@ -601,26 +617,26 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do -- Save attrs for h : j in 0...info.attrs.size do let attr := info.attrs[j] - withTableName "declaration_attrs" do + withDbContext "write:insert:declaration_attrs:info" do saveAttrStmt'.bind 1 modName saveAttrStmt'.bind 2 position saveAttrStmt'.bind 3 j.toInt64 saveAttrStmt'.bind 4 attr run saveAttrStmt' let saveAxiomStmt ← sqlite.prepare "INSERT INTO axioms (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveAxiom modName position isUnsafe := withTableName "axioms" do + let saveAxiom modName position isUnsafe := withDbContext "write:insert:axioms" do saveAxiomStmt.bind 1 modName saveAxiomStmt.bind 2 position saveAxiomStmt.bind 3 isUnsafe run saveAxiomStmt let saveOpaqueStmt ← sqlite.prepare "INSERT INTO opaques (module_name, position, safety) VALUES (?, ?, ?)" - let saveOpaque modName position safety := withTableName "opaques" do + let saveOpaque modName position safety := withDbContext "write:insert:opaques" do saveOpaqueStmt.bind 1 modName saveOpaqueStmt.bind 2 position saveOpaqueStmt.bind 3 safety run saveOpaqueStmt let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable, has_equations) VALUES (?, ?, ?, ?, ?, ?)" - let saveDefinition modName position isUnsafe hints isNonComputable hasEquations := withTableName "definitions" do + let saveDefinition modName position isUnsafe hints isNonComputable hasEquations := withDbContext "write:insert:definitions" do saveDefinitionStmt.bind 1 modName saveDefinitionStmt.bind 2 position saveDefinitionStmt.bind 3 isUnsafe @@ -629,51 +645,51 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDefinitionStmt.bind 6 hasEquations run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" - let saveDefinitionEquation modName position (code : RenderedCode) sequence := withTableName "definition_equations" do + let saveDefinitionEquation modName position (code : RenderedCode) sequence := withDbContext "write:insert:definition_equations" do saveDefinitionEquationStmt.bind 1 modName saveDefinitionEquationStmt.bind 2 position saveDefinitionEquationStmt.bind 3 code saveDefinitionEquationStmt.bind 4 sequence run saveDefinitionEquationStmt let saveInstanceStmt ← sqlite.prepare "INSERT INTO instances (module_name, position, class_name) VALUES (?, ?, ?)" - let saveInstance modName position className := withTableName "instances" do + let saveInstance modName position className := withDbContext "write:insert:instances" do saveInstanceStmt.bind 1 modName saveInstanceStmt.bind 2 position saveInstanceStmt.bind 3 className run saveInstanceStmt let saveInstanceArgStmt ← sqlite.prepare "INSERT INTO instance_args (module_name, position, sequence, type_name) VALUES (?, ?, ?, ?)" - let saveInstanceArg modName position sequence typeName := withTableName "instance_args" do + let saveInstanceArg modName position sequence typeName := withDbContext "write:insert:instance_args" do saveInstanceArgStmt.bind 1 modName saveInstanceArgStmt.bind 2 position saveInstanceArgStmt.bind 3 sequence saveInstanceArgStmt.bind 4 typeName run saveInstanceArgStmt let saveInductiveStmt ← sqlite.prepare "INSERT INTO inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveInductive modName position isUnsafe := withTableName "inductives" do + let saveInductive modName position isUnsafe := withDbContext "write:insert:inductives" do saveInductiveStmt.bind 1 modName saveInductiveStmt.bind 2 position saveInductiveStmt.bind 3 isUnsafe run saveInductiveStmt let saveConstructorStmt ← sqlite.prepare "INSERT INTO constructors (module_name, position, type_position) VALUES (?, ?, ?)" - let saveConstructor modName position typePosition := withTableName "constructors" do + let saveConstructor modName position typePosition := withDbContext "write:insert:constructors" do saveConstructorStmt.bind 1 modName saveConstructorStmt.bind 2 position saveConstructorStmt.bind 3 typePosition run saveConstructorStmt let saveClassInductiveStmt ← sqlite.prepare "INSERT INTO class_inductives (module_name, position, is_unsafe) VALUES (?, ?, ?)" - let saveClassInductive modName position isUnsafe := withTableName "class_inductives" do + let saveClassInductive modName position isUnsafe := withDbContext "write:insert:class_inductives" do saveClassInductiveStmt.bind 1 modName saveClassInductiveStmt.bind 2 position saveClassInductiveStmt.bind 3 isUnsafe run saveClassInductiveStmt let saveStructureStmt ← sqlite.prepare "INSERT INTO structures (module_name, position, is_class) VALUES (?, ?, ?)" - let saveStructure modName position isClass := withTableName "structures" do + let saveStructure modName position isClass := withDbContext "write:insert:structures" do saveStructureStmt.bind 1 modName saveStructureStmt.bind 2 position saveStructureStmt.bind 3 isClass run saveStructureStmt let saveStructureConstructorStmt ← sqlite.prepare "INSERT INTO structure_constructors (module_name, position, ctor_position, name, type) VALUES (?, ?, ?, ?, ?)" - let saveStructureConstructor modName position ctorPos info := withTableName "structure_constructors" do + let saveStructureConstructor modName position ctorPos info := withDbContext "write:insert:structure_constructors" do saveStructureConstructorStmt.bind 1 modName saveStructureConstructorStmt.bind 2 position saveStructureConstructorStmt.bind 3 ctorPos @@ -686,7 +702,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do | none => pure () let saveStructureParentStmt ← sqlite.prepare "INSERT INTO structure_parents (module_name, position, sequence, projection_fn, type) VALUES (?, ?, ?, ?, ?)" - let saveStructureParent modName position sequence projectionFn (type : RenderedCode) := withTableName "structure_parents" do + let saveStructureParent modName position sequence projectionFn (type : RenderedCode) := withDbContext "write:insert:structure_parents" do saveStructureParentStmt.bind 1 modName saveStructureParentStmt.bind 2 position saveStructureParentStmt.bind 3 sequence @@ -695,7 +711,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do run saveStructureParentStmt -- Store projection function name directly; lookup happens at load time let saveStructureFieldStmt ← sqlite.prepare "INSERT INTO structure_fields (module_name, position, sequence, proj_name, type, is_direct) VALUES (?, ?, ?, ?, ?, ?)" - let saveStructureField modName position sequence projName (type : RenderedCode) isDirect := withTableName "structure_fields" do + let saveStructureField modName position sequence projName (type : RenderedCode) isDirect := withDbContext "write:insert:structure_fields" do saveStructureFieldStmt.bind 1 modName saveStructureFieldStmt.bind 2 position saveStructureFieldStmt.bind 3 sequence @@ -704,7 +720,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructureFieldStmt.bind 6 isDirect run saveStructureFieldStmt let saveStructureFieldArgStmt ← sqlite.prepare "INSERT INTO structure_field_args (module_name, position, field_sequence, arg_sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?, ?)" - let saveStructureFieldArg modName position fieldSeq argSeq (binder : RenderedCode) isImplicit := withTableName "structure_field_args" do + let saveStructureFieldArg modName position fieldSeq argSeq (binder : RenderedCode) isImplicit := withDbContext "write:insert:structure_field_args" do saveStructureFieldArgStmt.bind 1 modName saveStructureFieldArgStmt.bind 2 position saveStructureFieldArgStmt.bind 3 fieldSeq @@ -713,7 +729,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveStructureFieldArgStmt.bind 6 isImplicit run saveStructureFieldArgStmt let saveArgStmt ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" - let saveArg modName position sequence (binder : RenderedCode) isImplicit := withTableName "declaration_args" do + let saveArg modName position sequence (binder : RenderedCode) isImplicit := withDbContext "write:insert:declaration_args" do saveArgStmt.bind 1 modName saveArgStmt.bind 2 position saveArgStmt.bind 3 sequence @@ -721,7 +737,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveArgStmt.bind 5 isImplicit run saveArgStmt let saveAttrStmt ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" - let saveAttr modName position sequence attr := withTableName "declaration_attrs" do + let saveAttr modName position sequence attr := withDbContext "write:insert:declaration_attrs" do saveAttrStmt.bind 1 modName saveAttrStmt.bind 2 position saveAttrStmt.bind 3 sequence @@ -729,7 +745,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do run saveAttrStmt -- For saving minimal info to name_info for name lookups only (not rendering) let saveNameOnlyStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, 0, 0)" - let saveNameOnly modName position kind (name : Lean.Name) (type : RenderedCode) (declRange : Lean.DeclarationRange) := withTableName "name_info" do + let saveNameOnly modName position kind (name : Lean.Name) (type : RenderedCode) (declRange : Lean.DeclarationRange) := withDbContext "write:insert:name_info:nameonly" do saveNameOnlyStmt.bind 1 modName saveNameOnlyStmt.bind 2 position saveNameOnlyStmt.bind 3 kind @@ -740,7 +756,7 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveDeclarationRange modName position declRange -- For saving internal names (like recursors) that link to their target declaration let saveInternalNameStmt ← sqlite.prepare "INSERT OR IGNORE INTO internal_names (name, target_module, target_position) VALUES (?, ?, ?)" - let saveInternalName (name : Lean.Name) (targetModule : String) (targetPosition : Int64) := withTableName "internal_names" do + let saveInternalName (name : Lean.Name) (targetModule : String) (targetPosition : Int64) := withDbContext "write:insert:internal_names" do saveInternalNameStmt.bind 1 name.toString saveInternalNameStmt.bind 2 targetModule saveInternalNameStmt.bind 3 targetPosition @@ -778,91 +794,98 @@ end DB open DB -def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do +def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) + (hashDir : System.FilePath) (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile let db ← ensureDb dbFile - let ms1 ← IO.monoMsNow - db.sqlite.transaction (mode := .immediate) do - -- Collect structure field info to save in second pass (after all declarations are in name_info) - let mut pendingStructureFields : Array (String × Int64 × Process.StructureInfo) := #[] - for (modName, modInfo) in doc.moduleInfo do - let modName := modName.toString - db.deleteModule modName - db.saveModule modName sourceUrl? + IO.FS.createDirAll hashDir + for (modName, modInfo) in doc.moduleInfo do + let modNameStr := modName.toString + -- Hash input BEFORE transaction (no DB lock needed) + let inputHash := hash modInfo + -- Each module gets its own transaction to reduce lock contention + let _ ← withDbContext s!"transaction:immediate:{modNameStr}" <| db.sqlite.transaction (mode := .immediate) do + -- Collect structure field info to save in second pass (after all declarations are in name_info) + let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] + db.deleteModule modNameStr + db.saveModule modNameStr sourceUrl? for imported in modInfo.imports do - db.saveImport modName imported + db.saveImport modNameStr imported let mut i : Int64 := 0 for mem in modInfo.members do let pos := i i := i + 1 match mem with | .modDoc doc => - db.saveDeclarationRange modName pos doc.declarationRange - db.saveMarkdownDocstring modName pos doc.doc + db.saveDeclarationRange modNameStr pos doc.declarationRange + db.saveMarkdownDocstring modNameStr pos doc.doc | .docInfo info => let baseInfo := info.toInfo -- Skip saving ctorInfo here - they're saved along with their parent inductive if !info.isCtorInfo then - db.saveInfo modName pos (infoKind info) baseInfo - db.saveDeclarationRange modName pos baseInfo.declarationRange + db.saveInfo modNameStr pos (infoKind info) baseInfo + db.saveDeclarationRange modNameStr pos baseInfo.declarationRange match info with | .axiomInfo info => - db.saveAxiom modName pos info.isUnsafe + db.saveAxiom modNameStr pos info.isUnsafe | .theoremInfo _info => -- No extra info here pure () | .opaqueInfo info => - db.saveOpaque modName pos info.definitionSafety + db.saveOpaque modNameStr pos info.definitionSafety | .definitionInfo info => - db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome if let some eqns := info.equations then for h : j in 0...eqns.size do - db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 | .instanceInfo info => -- Save definition data (InstanceInfo extends DefinitionInfo) - db.saveDefinition modName pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome if let some eqns := info.equations then for h : j in 0...eqns.size do - db.saveDefinitionEquation modName pos eqns[j] j.toInt64 + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 -- Save instance-specific data - db.saveInstance modName pos info.className.toString + db.saveInstance modNameStr pos info.className.toString for h : j in 0...info.typeNames.size do - db.saveInstanceArg modName pos j.toInt64 info.typeNames[j].toString + db.saveInstanceArg modNameStr pos j.toInt64 info.typeNames[j].toString | .inductiveInfo info => - db.saveInductive modName pos info.isUnsafe + db.saveInductive modNameStr pos info.isUnsafe -- Save recursors (main + aux) as internal names linking to this inductive - saveRecursors doc.name2ModIdx db modName pos info.name + saveRecursors doc.name2ModIdx db modNameStr pos info.name for ctor in info.ctors do let cpos := i i := i + 1 - db.saveInfo modName cpos "constructor" ctor - db.saveDeclarationRange modName cpos ctor.declarationRange - db.saveConstructor modName cpos pos + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos | .structureInfo info => -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata false info db modName pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (modName, pos, info) + i := (← (saveStructureMetadata false info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) | .classInfo info => -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata true info db modName pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (modName, pos, info) + i := (← (saveStructureMetadata true info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) | .classInductiveInfo info => - db.saveClassInductive modName pos info.isUnsafe + db.saveClassInductive modNameStr pos info.isUnsafe -- Save recursors (main + aux) as internal names linking to this class inductive - saveRecursors doc.name2ModIdx db modName pos info.name + saveRecursors doc.name2ModIdx db modNameStr pos info.name for ctor in info.ctors do let cpos := i i := i + 1 - db.saveInfo modName cpos "constructor" ctor - db.saveDeclarationRange modName cpos ctor.declarationRange - db.saveConstructor modName cpos pos + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos | .ctorInfo info => -- Here we do nothing because they were inserted along with the inductive pure () - -- Second pass: save structure fields (now that all projection functions are in name_info) - for (modName, pos, info) in pendingStructureFields do - saveStructureFields info db modName pos - let ms2 ← IO.monoMsNow - (← IO.FS.Handle.mk "db-timing" .append).write <| s!"{doc.moduleInfo.keysArray}\t{ms2 - ms1}ms\n".toUTF8 + -- Second pass: save structure fields (now that all projection functions are in name_info) + for (pos, info) in pendingStructureFields do + saveStructureFields info db modNameStr pos + pure () + -- Write hash file AFTER transaction commits successfully + let hashFile := hashDir / s!"{modNameStr}.dbhash" + let moduleHash : ModuleHash := { moduleName := modNameStr, hash := inputHash } + IO.FS.writeFile hashFile (Lean.toJson moduleHash).pretty pure () where @@ -958,7 +981,7 @@ def readVersoDocString (blob : ByteArray) : IO VersoDocString := do | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" /-- Get all module names from the database. -/ -def getModuleNames (db : SQLite) : IO (Array Name) := withTableName "modules (B)" do +def getModuleNames (db : SQLite) : IO (Array Name) := withDbContext "read:modules:names" do let stmt ← db.prepare "SELECT name FROM modules ORDER BY name" let mut names := #[] while (← stmt.step) do @@ -967,7 +990,7 @@ def getModuleNames (db : SQLite) : IO (Array Name) := withTableName "modules (B) return names /-- Get all module source URLs from the database. -/ -def getModuleSourceUrls (db : SQLite) : IO (Std.HashMap Name String) := withTableName "modules (C)" do +def getModuleSourceUrls (db : SQLite) : IO (Std.HashMap Name String) := withDbContext "read:modules:source_urls" do let stmt ← db.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" let mut urls : Std.HashMap Name String := {} while (← stmt.step) do @@ -977,7 +1000,7 @@ def getModuleSourceUrls (db : SQLite) : IO (Std.HashMap Name String) := withTabl return urls /-- Get all module imports from the database. -/ -def getModuleImports (db : SQLite) (moduleName : Name) : IO (Array Name) := withTableName "module_imports" do +def getModuleImports (db : SQLite) (moduleName : Name) : IO (Array Name) := withDbContext "read:module_imports" do let stmt ← db.prepare "SELECT imported FROM module_imports WHERE importer = ?" stmt.bind 1 moduleName.toString let mut imports := #[] @@ -1012,7 +1035,7 @@ def buildName2ModIdx (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap return result /-- Load declaration arguments from the database. -/ -def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.Arg) := withTableName "declaration_args" do +def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.Arg) := withDbContext "read:declaration_args" do let stmt ← db.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1025,7 +1048,7 @@ def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array return args /-- Load declaration attributes from the database. -/ -def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array String) := withTableName "declaration_attrs" do +def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array String) := withDbContext "read:declaration_attrs" do let stmt ← db.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1036,7 +1059,7 @@ def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array return attrs /-- Load a docstring from the database. -/ -def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (String ⊕ VersoDocString)) := withTableName "markdown_docstrings verso_docstrings" do +def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (String ⊕ VersoDocString)) := withDbContext "read:docstrings" do -- Try markdown first let mdStmt ← db.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" mdStmt.bind 1 moduleName @@ -1055,7 +1078,7 @@ def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (O return none /-- Load a declaration range from the database. -/ -def loadDeclarationRange (db : SQLite) (moduleName : String) (position : Int64) : IO (Option DeclarationRange) := withTableName "declaration_ranges" do +def loadDeclarationRange (db : SQLite) (moduleName : String) (position : Int64) : IO (Option DeclarationRange) := withDbContext "read:declaration_ranges" do let stmt ← db.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" stmt.bind 1 moduleName stmt.bind 2 position @@ -1095,7 +1118,7 @@ def loadInfo (db : SQLite) (moduleName : String) (position : Int64) (name : Name /-- Load definition equations from the database. Takes hasEquations flag to distinguish `none` from `some #[]`. -/ -def loadEquations (db : SQLite) (moduleName : String) (position : Int64) (hasEquations : Bool) : IO (Option (Array RenderedCode)) := withTableName "definition_equations" do +def loadEquations (db : SQLite) (moduleName : String) (position : Int64) (hasEquations : Bool) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do if !hasEquations then return none let stmt ← db.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index 6c0802d2..ee9f2e11 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -16,13 +16,15 @@ namespace DocGen4.Process open Lean Meta +deriving instance Hashable for ModuleDoc + /-- Member of a module, either a declaration or some module doc string. -/ inductive ModuleMember where | docInfo (info : DocInfo) : ModuleMember | modDoc (doc : ModuleDoc) : ModuleMember -deriving Inhabited +deriving Inhabited, Hashable /-- A Lean module. @@ -37,7 +39,7 @@ structure Module where -/ members : Array ModuleMember imports : Array Name - deriving Inhabited + deriving Inhabited, Hashable /-- The result of running a full doc-gen analysis on a project. diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 49ec9fd2..9cd25aac 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -15,6 +15,35 @@ structure DocGenOptions where abbrev AnalyzeM : Type → Type := ReaderT DocGenOptions MetaM +-- BOGUS INSTANCE: good enough for here, though +local instance : Hashable ElabInline := ⟨fun x => hash x.name⟩ + +-- BOGUS INSTANCE: good enough for here, though +local instance : Hashable ElabBlock := ⟨fun x => hash x.name⟩ + +deriving instance Hashable for Doc.Inline + +deriving instance Hashable for Doc.ListItem + +deriving instance Hashable for Doc.DescItem + +deriving instance Hashable for Doc.Block + +deriving instance Hashable for Doc.Part + +instance : Hashable Empty := ⟨nofun⟩ + +instance : Hashable VersoDocString where + hash x := mixHash (hash x.text) (hash x.subsections) + +deriving instance Hashable for Sum + +deriving instance Hashable for Position + +deriving instance Hashable for DeclarationRange + + + /-- Stores information about a typed name. -/ @@ -31,7 +60,7 @@ structure NameInfo where The doc string of the name if it exists. -/ doc : Option (String ⊕ VersoDocString) - deriving Inhabited + deriving Inhabited, Hashable /-- An argument to a declaration, e.g. the `(x : Nat)` in `def foo (x : Nat) := x`. @@ -45,6 +74,7 @@ structure Arg where Whether the binder is implicit. -/ implicit : Bool + deriving Hashable /-- A base structure for information about a declaration. @@ -70,20 +100,22 @@ structure Info extends NameInfo where Whether this info item should be rendered -/ render : Bool := true - deriving Inhabited + deriving Inhabited, Hashable /-- Information about an `axiom` declaration. -/ structure AxiomInfo extends Info where isUnsafe : Bool - deriving Inhabited + deriving Inhabited, Hashable /-- Information about a `theorem` declaration. -/ structure TheoremInfo extends Info - deriving Inhabited + deriving Inhabited, Hashable + +deriving instance Hashable for DefinitionSafety /-- Information about an `opaque` declaration. @@ -94,7 +126,9 @@ structure OpaqueInfo extends Info where since the actual definition for a partial def is hidden behind an inaccessible value. -/ definitionSafety : DefinitionSafety - deriving Inhabited + deriving Inhabited, Hashable + +deriving instance Hashable for ReducibilityHints /-- Information about a `def` declaration, note that partial defs are handled by `OpaqueInfo`. @@ -104,7 +138,7 @@ structure DefinitionInfo extends Info where hints : ReducibilityHints equations : Option (Array RenderedCode) isNonComputable : Bool - deriving Inhabited + deriving Inhabited, Hashable /-- Information about an `instance` declaration. @@ -112,7 +146,7 @@ Information about an `instance` declaration. structure InstanceInfo extends DefinitionInfo where className : Name typeNames : Array Name - deriving Inhabited + deriving Inhabited, Hashable /-- Information about a constructor of an inductive type @@ -128,7 +162,7 @@ structure InductiveInfo extends Info where -/ ctors : List ConstructorInfo isUnsafe : Bool - deriving Inhabited + deriving Inhabited, Hashable /-- Stores information about a structure field. @@ -138,6 +172,7 @@ structure FieldInfo extends Info where Whether or not this field is new to this structure, or instead whether it was inherited from a parent. -/ isDirect : Bool + deriving Hashable /-- Information about a `structure` parent. @@ -147,6 +182,7 @@ structure StructureParentInfo where projFn : Name /-- Pretty printed type. -/ type : RenderedCode + deriving Hashable /-- Information about a `structure` declaration. @@ -164,7 +200,7 @@ structure StructureInfo extends Info where The constructor of the structure. -/ ctor : NameInfo - deriving Inhabited + deriving Inhabited, Hashable /-- Information about a `class` declaration. @@ -191,7 +227,7 @@ inductive DocInfo where | classInfo (info : ClassInfo) : DocInfo | classInductiveInfo (info : ClassInductiveInfo) : DocInfo | ctorInfo (info : ConstructorInfo) : DocInfo - deriving Inhabited + deriving Inhabited, Hashable def DocInfo.toInfo : DocInfo → Info | .axiomInfo info => info.toInfo diff --git a/DocGen4/RenderedCode.lean b/DocGen4/RenderedCode.lean index 4a7ea58c..9c64fc62 100644 --- a/DocGen4/RenderedCode.lean +++ b/DocGen4/RenderedCode.lean @@ -76,13 +76,15 @@ where .append <$> FromBinary.deserializer | other => throw s!"Expected 0...3 for `TaggedText`, got {other}" +deriving instance Hashable for TaggedText + /-- A simplified representation of code with semantic tags for rendering. Unlike `CodeWithInfos`, this only contains the information needed for HTML rendering (links to declarations, syntax highlighting) and can be serialized to/from the database. -/ def RenderedCode := Lean.Widget.TaggedText RenderedCode.Tag -deriving Inhabited, BEq, Repr, ToBinary, FromBinary +deriving Inhabited, BEq, Repr, ToBinary, FromBinary, Hashable def RenderedCode.empty : RenderedCode := .append #[] diff --git a/Main.lean b/Main.lean index abdcb938..e35dd5d1 100644 --- a/Main.lean +++ b/Main.lean @@ -22,12 +22,15 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do | some dir => dir.as! String | none => ".lake/build" let dbFile? := p.flag? "db" |>.map (·.as! String) + let hashDir := match p.flag? "hash-dir" with + | some dir => dir.as! String + | none => s!"{buildDir}/doc-hashes" let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String let (doc, hierarchy) ← load <| .analyzeConcreteModules relevantModules let baseConfig ← getSimpleBaseContext buildDir hierarchy if let some dbFile := dbFile? then - updateModuleDb doc buildDir dbFile (some sourceUri) + updateModuleDb doc buildDir dbFile hashDir (some sourceUri) discard <| htmlOutputResults baseConfig doc (some sourceUri) return 0 @@ -45,12 +48,15 @@ def runGenCoreCmd (p : Parsed) : IO UInt32 := do | some dir => dir.as! String | none => ".lake/build" let dbFile? := p.flag? "db" |>.map (·.as! String) + let hashDir := match p.flag? "hash-dir" with + | some dir => dir.as! String + | none => s!"{buildDir}/doc-hashes" let manifestOutput? := (p.flag? "manifest").map (·.as! String) let module := p.positionalArg! "module" |>.as! String |> String.toName let (doc, hierarchy) ← load <| .analyzePrefixModules module let baseConfig ← getSimpleBaseContext buildDir hierarchy if let some dbFile := dbFile? then - updateModuleDb doc buildDir dbFile none + updateModuleDb doc buildDir dbFile hashDir none let outputs ← htmlOutputResults baseConfig doc none if let .some manifestOutput := manifestOutput? then IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress @@ -131,6 +137,7 @@ def singleCmd := `[Cli| FLAGS: b, build : String; "Build directory." db : String; "Database" + "hash-dir" : String; "Directory for module hash files (default: /doc-hashes)" ARGS: module : String; "The module to generate the HTML for. Does not have to be part of topLevelModules." @@ -152,6 +159,7 @@ def genCoreCmd := `[Cli| FLAGS: b, build : String; "Build directory." db : String; "Database" + "hash-dir" : String; "Directory for module hash files (default: /doc-hashes)" m, manifest : String; "Manifest output, to list all the files generated." ARGS: From 0160e76934e788e0b149694368143dfccf4f34ef Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 30 Jan 2026 14:44:41 +0100 Subject: [PATCH 013/106] chore: print timing for html from db --- Main.lean | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Main.lean b/Main.lean index e35dd5d1..6de6ff0f 100644 --- a/Main.lean +++ b/Main.lean @@ -98,16 +98,26 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do | some dir => dir.as! String | none => ".lake/build/doc-from-db" -- Different default for DB-generated docs let dbPath := p.positionalArg! "db" |>.as! String + let start ← IO.monoMsNow IO.println s!"Loading documentation from database: {dbPath}" let dbResult ← loadFromDb dbPath let result := dbResult.result + IO.println s!"Loading took {(← IO.monoMsNow) - start}ms" IO.println s!"Loaded {result.moduleNames.size} modules with {result.name2ModIdx.size} declarations" -- Add `references` pseudo-module to hierarchy since references.html is always generated + let start ← IO.monoMsNow let hierarchy := Hierarchy.fromArray (result.moduleNames.push `references) + IO.println s!"Hierarchy took {(← IO.monoMsNow) - start}ms" + let start ← IO.monoMsNow let baseConfig ← getSimpleBaseContext buildDir hierarchy + IO.println s!"Context took {(← IO.monoMsNow) - start}ms" + let start ← IO.monoMsNow IO.println s!"Generating HTML to: {buildDir}" discard <| htmlOutputResults baseConfig result none (sourceLinker? := some (dbSourceLinker dbResult.sourceUrls)) + IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" + let start ← IO.monoMsNow htmlOutputIndex baseConfig + IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" IO.println "Done!" return 0 From 5bd12d0abf777ef033eff88d4323ac868c7766b1 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 30 Jan 2026 17:16:12 +0100 Subject: [PATCH 014/106] feat: try parallel generation from db --- DocGen4/DB.lean | 13 ++++++++ DocGen4/Output.lean | 72 ++++++++++++++++++++++++++++++++++++++++++++- Main.lean | 20 ++++++++----- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index c09ab1ab..69741122 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1434,4 +1434,17 @@ def loadFromDb (dbFile : System.FilePath) : IO LoadFromDbResult := do let moduleInfo ← loadAllModules db moduleNames return { result := { name2ModIdx, moduleNames, moduleInfo }, sourceUrls } +/-- Shared index data needed for cross-module linking, without loading full module contents. -/ +structure SharedIndex where + moduleNames : Array Name + sourceUrls : Std.HashMap Name String + name2ModIdx : Std.HashMap Name ModuleIdx + +/-- Load just the shared index (fast) - only what's needed for cross-module linking. -/ +def loadSharedIndex (db : SQLite) : IO SharedIndex := do + let moduleNames ← getModuleNames db + let sourceUrls ← getModuleSourceUrls db + let name2ModIdx ← buildName2ModIdx db moduleNames + return { moduleNames, sourceUrls, name2ModIdx } + end Reading diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 99417554..428ee667 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -5,6 +5,7 @@ Authors: Henrik Böving -/ import Lean import DocGen4.Process +import DocGen4.DB import DocGen4.Output.Base import DocGen4.Output.Index import DocGen4.Output.Module @@ -97,7 +98,9 @@ def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) ( let config : SiteContext := { result := result sourceLinker := (sourceLinker?.getD SourceLinker.sourceLinker) sourceUrl? - refsMap := .ofList (baseConfig.refs.map fun x => (x.citekey, x)).toList + refsMap := + Std.HashMap.emptyWithCapacity baseConfig.refs.size + |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) declarationDecorator := declarationDecorator?.getD defaultDeclarationDecorator } @@ -128,6 +131,73 @@ def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) ( outputs := outputs.push relFilePath return outputs +/-- Generate HTML for all modules in parallel. + Each task loads its module from DB, renders HTML, and writes output files. + The shared index provides cross-module linking without loading all module data upfront. -/ +def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.FilePath) + (shared : SharedIndex) (sourceLinker? : Option SourceLinkerFn := none) + (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath) := do + FS.createDirAll <| basePath baseConfig.buildDir + FS.createDirAll <| declarationsBasePath baseConfig.buildDir + + -- Spawn one task per module, each returning its output file path + let tasks ← shared.moduleNames.mapM fun modName => IO.asTask do + -- Each task opens its own DB connection (SQLite handles concurrent readers well) + let db ← openDbForReading dbPath + let module ← loadModule db modName + + -- Build a minimal AnalyzerResult with just this module's info + let result : AnalyzerResult := { + name2ModIdx := shared.name2ModIdx + moduleNames := shared.moduleNames + moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module + } + + let config : SiteContext := { + result := result + sourceLinker := (sourceLinker?.getD SourceLinker.sourceLinker) none + refsMap := Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) + declarationDecorator := declarationDecorator?.getD defaultDeclarationDecorator + } + + -- path: 'basePath/module/components/till/last.html' + -- The last component is the file name, so we drop it from the depth to root. + let baseConfig' := { baseConfig with + depthToRoot := modName.components.dropLast.length + currentName := some modName + } + + -- Render HTML + let (moduleHtml, cfg) := moduleToHtml module |>.run {} config baseConfig' + if not cfg.errors.isEmpty then + throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" + + -- Write HTML file + let relFilePath := basePathComponent / moduleNameToFile modName + let filePath := baseConfig.buildDir / relFilePath + if let .some d := filePath.parent then + FS.createDirAll d + FS.writeFile filePath moduleHtml.toString + + -- Write backrefs JSON + FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") + (toString (toJson cfg.backrefs)) + + -- Generate declaration data JSON for search + let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig' + FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") + jsonDecls.compress + + return relFilePath + + -- Wait for all tasks and collect output paths + let mut outputs := #[] + for task in tasks do + match ← IO.wait task with + | .ok path => outputs := outputs.push path + | .error e => throw e + return outputs + def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : IO SiteBaseContext := do let contents ← FS.readFile (declarationsBasePath buildDir / "references.json") <|> (pure "[]") match Json.parse contents with diff --git a/Main.lean b/Main.lean index 6de6ff0f..ae26f023 100644 --- a/Main.lean +++ b/Main.lean @@ -98,22 +98,26 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do | some dir => dir.as! String | none => ".lake/build/doc-from-db" -- Different default for DB-generated docs let dbPath := p.positionalArg! "db" |>.as! String + + -- Phase 1: Load shared index (fast - just names and cross-references) let start ← IO.monoMsNow - IO.println s!"Loading documentation from database: {dbPath}" - let dbResult ← loadFromDb dbPath - let result := dbResult.result - IO.println s!"Loading took {(← IO.monoMsNow) - start}ms" - IO.println s!"Loaded {result.moduleNames.size} modules with {result.name2ModIdx.size} declarations" + IO.println s!"Loading shared index from database: {dbPath}" + let db ← openDbForReading dbPath + let shared ← loadSharedIndex db + IO.println s!"Index loaded in {(← IO.monoMsNow) - start}ms ({shared.name2ModIdx.size} declarations, {shared.moduleNames.size} modules)" + -- Add `references` pseudo-module to hierarchy since references.html is always generated let start ← IO.monoMsNow - let hierarchy := Hierarchy.fromArray (result.moduleNames.push `references) + let hierarchy := Hierarchy.fromArray (shared.moduleNames.push `references) IO.println s!"Hierarchy took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow let baseConfig ← getSimpleBaseContext buildDir hierarchy IO.println s!"Context took {(← IO.monoMsNow) - start}ms" + + -- Phase 2: Parallel HTML generation (one task per module) let start ← IO.monoMsNow - IO.println s!"Generating HTML to: {buildDir}" - discard <| htmlOutputResults baseConfig result none (sourceLinker? := some (dbSourceLinker dbResult.sourceUrls)) + IO.println s!"Generating HTML in parallel to: {buildDir}" + discard <| htmlOutputResultsParallel baseConfig dbPath shared (sourceLinker? := some (dbSourceLinker shared.sourceUrls)) IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow htmlOutputIndex baseConfig From b9c77db13d40a7d6daeed055a6eb2685a54c8f66 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 30 Jan 2026 22:36:19 +0100 Subject: [PATCH 015/106] chore: use database-driven HTML in benchmark --- scripts/bench/mathlib-docs/run | 2 +- scripts/bench/own-docs/run | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 863863cf..e1da0c98 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -48,4 +48,4 @@ popd # Benchmark documentation generation env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake --dir "$TMPDIR/mathproject" build Mathlib:docs + lake --dir "$TMPDIR/mathproject" build Mathlib:dbdocs diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index 8e80df29..a5486aec 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -13,4 +13,4 @@ lake build DocGen4 lake build doc-gen4 env DOCGEN_SRC="file" "$BENCH/measure.py" -t own-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake build DocGen4:docs + lake build DocGen4:dbdocs From 61cb4085086b1c2914a0387a58f1c46f06e474bb Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 13:41:21 +0100 Subject: [PATCH 016/106] chore: remove legacy HTML output --- DocGen4/DB.lean | 16 +-- DocGen4/Load.lean | 6 +- DocGen4/Output.lean | 55 -------- DocGen4/Process/Analyze.lean | 6 +- Main.lean | 54 +++----- README.md | 12 +- lakefile.lean | 221 ++++++++++++++++----------------- scripts/bench/mathlib-docs/run | 2 +- scripts/bench/own-docs/run | 2 +- 9 files changed, 136 insertions(+), 238 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 69741122..87c2c598 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -204,13 +204,6 @@ instance : SQLite.QueryParam VersoDocString := .asBlob end -/-- Hash of a module's input data, used for Lake trace integration. - Changes to module content cause changes to the hash file, triggering rebuilds of dependents. -/ -structure ModuleHash where - moduleName : String - hash : UInt64 - deriving Lean.ToJson, Lean.FromJson, Inhabited - def getDb (dbFile : System.FilePath) : IO SQLite := do -- SQLite atomically creates the DB file, and the schema and journal settings here are applied -- idempotently. This avoids DB creation race conditions. @@ -795,14 +788,11 @@ end DB open DB def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) - (hashDir : System.FilePath) (sourceUrl? : Option String) : IO Unit := do + (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile let db ← ensureDb dbFile - IO.FS.createDirAll hashDir for (modName, modInfo) in doc.moduleInfo do let modNameStr := modName.toString - -- Hash input BEFORE transaction (no DB lock needed) - let inputHash := hash modInfo -- Each module gets its own transaction to reduce lock contention let _ ← withDbContext s!"transaction:immediate:{modNameStr}" <| db.sqlite.transaction (mode := .immediate) do -- Collect structure field info to save in second pass (after all declarations are in name_info) @@ -882,10 +872,6 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( for (pos, info) in pendingStructureFields do saveStructureFields info db modNameStr pos pure () - -- Write hash file AFTER transaction commits successfully - let hashFile := hashDir / s!"{modNameStr}.dbhash" - let moduleHash : ModuleHash := { moduleName := modNameStr, hash := inputHash } - IO.FS.writeFile hashFile (Lean.toJson moduleHash).pretty pure () where diff --git a/DocGen4/Load.lean b/DocGen4/Load.lean index 233c608e..03d6c278 100644 --- a/DocGen4/Load.lean +++ b/DocGen4/Load.lean @@ -14,15 +14,11 @@ def envOfImports (imports : Array Name) : IO Environment := do unsafe Lean.enableInitializersExecution importModules (imports.map (Import.mk · false true false)) Options.empty (leakEnv := true) (loadExts := true) -def loadInit (imports : Array Name) : IO Hierarchy := do - let env ← envOfImports imports - pure <| Hierarchy.fromArray env.header.moduleNames - /-- Load a list of modules from the current Lean search path into an `Environment` to process for documentation. -/ -def load (task : Process.AnalyzeTask) : IO (Process.AnalyzerResult × Hierarchy) := do +def load (task : Process.AnalyzeTask) : IO Process.AnalyzerResult := do initSearchPath (← findSysroot) let env ← envOfImports task.getLoad let config := { diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 428ee667..d75d1bf8 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -84,53 +84,9 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do for (fileName, content) in findStatic do FS.writeFile (findBasePath config.buildDir / fileName) content -def htmlOutputDeclarationDatas (buildDir : System.FilePath) (result : AnalyzerResult) : HtmlT IO Unit := do - for (_, mod) in result.moduleInfo.toArray do - let jsonDecls ← Module.toJson mod - FS.writeFile (declarationsBasePath buildDir / s!"declaration-data-{mod.name}.bmp") (toJson jsonDecls).compress - /-- Custom source linker type: given an optional source URL and module name, returns a function from declaration range to URL -/ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → String -def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) (sourceUrl? : Option String) - (sourceLinker? : Option SourceLinkerFn := none) - (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath) := do - let config : SiteContext := { - result := result - sourceLinker := (sourceLinker?.getD SourceLinker.sourceLinker) sourceUrl? - refsMap := - Std.HashMap.emptyWithCapacity baseConfig.refs.size - |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) - declarationDecorator := declarationDecorator?.getD defaultDeclarationDecorator - } - - FS.createDirAll <| basePath baseConfig.buildDir - FS.createDirAll <| declarationsBasePath baseConfig.buildDir - - discard <| htmlOutputDeclarationDatas baseConfig.buildDir result |>.run {} config baseConfig - - let mut outputs := #[] - for (modName, module) in result.moduleInfo.toArray do - let relFilePath := basePathComponent / moduleNameToFile modName - let filePath := baseConfig.buildDir / relFilePath - -- path: 'basePath/module/components/till/last.html' - -- The last component is the file name, so we drop it from the depth to root. - let baseConfig := { baseConfig with - depthToRoot := modName.components.dropLast.length - currentName := some modName - } - let (moduleHtml, cfg) := moduleToHtml module |>.run {} config baseConfig - if not cfg.errors.isEmpty then - throw <| IO.userError s!"There are errors when generating '{filePath}': {cfg.errors}" - if let .some d := filePath.parent then - FS.createDirAll d - FS.writeFile filePath moduleHtml.toString - FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") (toString (toJson cfg.backrefs)) - -- The output paths need to be relative to the build directory, as they are stored in a build - -- artifact. - outputs := outputs.push relFilePath - return outputs - /-- Generate HTML for all modules in parallel. Each task loads its module from DB, renders HTML, and writes output files. The shared index provides cross-module linking without loading all module data upfront. -/ @@ -253,15 +209,4 @@ def headerDataOutput (buildDir : System.FilePath) : IO Unit := do FS.createDirAll declarationDir FS.writeFile (declarationDir / "header-data.bmp") finalHeaderJson.compress -/-- -The main entrypoint for outputting the documentation HTML based on an -`AnalyzerResult`. --/ -def htmlOutput (buildDir : System.FilePath) (result : AnalyzerResult) (hierarchy : Hierarchy) - (sourceUrl? : Option String) (sourceLinker? : Option SourceLinkerFn := none) - (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO Unit := do - let baseConfig ← getSimpleBaseContext buildDir hierarchy - discard <| htmlOutputResults baseConfig result sourceUrl? sourceLinker? declarationDecorator? - htmlOutputIndex baseConfig - end DocGen4 diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index ee9f2e11..e470c072 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -114,7 +114,7 @@ def mkOptions : IO DocGenOptions := do Run the doc-gen analysis on all modules that are loaded into the `Environment` of this `MetaM` run and mentioned by the `AnalyzeTask`. -/ -def process (task : AnalyzeTask) : MetaM (AnalyzerResult × Hierarchy) := do +def process (task : AnalyzeTask) : MetaM AnalyzerResult := do let env ← getEnv let allModules := env.header.moduleNames let relevantModules := @@ -164,13 +164,11 @@ def process (task : AnalyzeTask) : MetaM (AnalyzerResult × Hierarchy) := do for (moduleName, module) in res.toArray do res := res.insert moduleName {module with members := module.members.qsort ModuleMember.order} - let hierarchy := Hierarchy.fromArray allModules - let analysis := { + return { name2ModIdx := env.const2ModIdx, moduleNames := allModules, moduleInfo := res, } - return (analysis, hierarchy) def filterDocInfo (ms : Array ModuleMember) : Array DocInfo := ms.filterMap filter diff --git a/Main.lean b/Main.lean index ae26f023..e68376c9 100644 --- a/Main.lean +++ b/Main.lean @@ -21,17 +21,11 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String | none => ".lake/build" - let dbFile? := p.flag? "db" |>.map (·.as! String) - let hashDir := match p.flag? "hash-dir" with - | some dir => dir.as! String - | none => s!"{buildDir}/doc-hashes" + let dbFile := p.positionalArg! "db" |>.as! String let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String - let (doc, hierarchy) ← load <| .analyzeConcreteModules relevantModules - let baseConfig ← getSimpleBaseContext buildDir hierarchy - if let some dbFile := dbFile? then - updateModuleDb doc buildDir dbFile hashDir (some sourceUri) - discard <| htmlOutputResults baseConfig doc (some sourceUri) + let doc ← load <| .analyzeConcreteModules relevantModules + updateModuleDb doc buildDir dbFile (some sourceUri) return 0 def runIndexCmd (p : Parsed) : IO UInt32 := do @@ -47,19 +41,10 @@ def runGenCoreCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String | none => ".lake/build" - let dbFile? := p.flag? "db" |>.map (·.as! String) - let hashDir := match p.flag? "hash-dir" with - | some dir => dir.as! String - | none => s!"{buildDir}/doc-hashes" - let manifestOutput? := (p.flag? "manifest").map (·.as! String) + let dbFile := p.positionalArg! "db" |>.as! String let module := p.positionalArg! "module" |>.as! String |> String.toName - let (doc, hierarchy) ← load <| .analyzePrefixModules module - let baseConfig ← getSimpleBaseContext buildDir hierarchy - if let some dbFile := dbFile? then - updateModuleDb doc buildDir dbFile hashDir none - let outputs ← htmlOutputResults baseConfig doc none - if let .some manifestOutput := manifestOutput? then - IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress + let doc ← load <| .analyzePrefixModules module + updateModuleDb doc buildDir dbFile none return 0 def runDocGenCmd (_p : Parsed) : IO UInt32 := do @@ -96,8 +81,9 @@ def dbSourceLinker (sourceUrls : Std.HashMap Name String) (_gitUrl? : Option Str def runFromDbCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String - | none => ".lake/build/doc-from-db" -- Different default for DB-generated docs + | none => ".lake/build" let dbPath := p.positionalArg! "db" |>.as! String + let manifestOutput? := (p.flag? "manifest").map (·.as! String) -- Phase 1: Load shared index (fast - just names and cross-references) let start ← IO.monoMsNow @@ -117,12 +103,14 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Phase 2: Parallel HTML generation (one task per module) let start ← IO.monoMsNow IO.println s!"Generating HTML in parallel to: {buildDir}" - discard <| htmlOutputResultsParallel baseConfig dbPath shared (sourceLinker? := some (dbSourceLinker shared.sourceUrls)) + let outputs ← htmlOutputResultsParallel baseConfig dbPath shared (sourceLinker? := some (dbSourceLinker shared.sourceUrls)) IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow htmlOutputIndex baseConfig IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" IO.println "Done!" + if let .some manifestOutput := manifestOutput? then + IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress return 0 def runBibPrepassCmd (p : Parsed) : IO UInt32 := do @@ -146,15 +134,14 @@ def runBibPrepassCmd (p : Parsed) : IO UInt32 := do def singleCmd := `[Cli| single VIA runSingleCmd; - "Only generate the documentation for the module it was given, might contain broken links unless all documentation is generated." + "Populate the database with documentation for the specified module." FLAGS: b, build : String; "Build directory." - db : String; "Database" - "hash-dir" : String; "Directory for module hash files (default: /doc-hashes)" ARGS: - module : String; "The module to generate the HTML for. Does not have to be part of topLevelModules." + module : String; "The module to document." + db : String; "Path to the SQLite database (relative to build dir)" sourceUri : String; "The sourceUri as computed by the Lake facet" ] @@ -168,16 +155,14 @@ def indexCmd := `[Cli| def genCoreCmd := `[Cli| genCore VIA runGenCoreCmd; - "Generate documentation for the specified Lean core module as they are not lake projects." + "Populate the database with documentation for the specified Lean core module (Init, Std, Lake, Lean)." FLAGS: b, build : String; "Build directory." - db : String; "Database" - "hash-dir" : String; "Directory for module hash files (default: /doc-hashes)" - m, manifest : String; "Manifest output, to list all the files generated." ARGS: - module : String; "The module to generate the HTML for." + module : String; "The core module prefix to document (e.g., Init, Lean)." + db : String; "Path to the SQLite database (relative to build dir)" ] def bibPrepassCmd := `[Cli| @@ -203,10 +188,11 @@ def headerDataCmd := `[Cli| def fromDbCmd := `[Cli| fromDb VIA runFromDbCmd; - "Generate all HTML documentation from a SQLite database. Output goes to a separate directory for easy comparison with traditional generation." + "Generate all HTML documentation from a SQLite database." FLAGS: - b, build : String; "Output directory for generated docs (default: .lake/build/doc-from-db)" + b, build : String; "Build directory (default: .lake/build)" + m, manifest : String; "Manifest output file, listing all generated HTML files." ARGS: db : String; "Path to the SQLite database" diff --git a/README.md b/README.md index 8cbd6c87..c3c366b9 100644 --- a/README.md +++ b/README.md @@ -31,18 +31,14 @@ rev = "main" 4. If your parent project has dependencies you want to run `lake update YourLibraryName` within `docbuild` whenever you update the dependencies of your parent project. -After this setup step you can generate documentation for an entire library and all files imported -by that library using the following command within `docbuild`: +After this setup step you can generate documentation for all libraries in your package and its dependencies +using the following command within `docbuild`: ``` -lake build YourLibraryName:docs -``` -If you have multiple libraries you want to generate full documentation for: -``` -lake build Test:docs YourLibraryName:docs +lake build :docs ``` Note that `doc-gen4` currently always generates documentation for `Lean`, `Init`, `Lake` and `Std` -in addition to the provided targets. +as dependencies of your package. The root of the built docs will be `docbuild/.lake/build/doc/index.html`. However, due to the "Same Origin Policy", the generated website will be partially broken if you just diff --git a/lakefile.lean b/lakefile.lean index c738d4d4..433923d9 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -27,7 +27,7 @@ require leansqlite from git "https://github.com/david-christiansen/leansqlite" @ "main" /-- -Obtain the subdirectory of the Lean package relative to the root of the enclosing git repository. +Obtains the subdirectory of the Lean package relative to the root of the enclosing git repository. -/ def getGitSubDirectory (directory : System.FilePath := "." ) : IO System.FilePath := do let out ← IO.Process.output { @@ -46,7 +46,7 @@ def getGitSubDirectory (directory : System.FilePath := "." ) : IO System.FilePat return if subdir == "".toSlice then "." else subdir.dropEnd 1 |>.copy /-- -Obtain the Github URL of a project by parsing the origin remote. +Obtains the GitHub URL of a project by parsing the origin remote. -/ def getGitRemoteUrl (directory : System.FilePath := "." ) (remote : String := "origin") : IO String := do let out ← IO.Process.output { @@ -61,7 +61,7 @@ def getGitRemoteUrl (directory : System.FilePath := "." ) (remote : String := "o return out.stdout.trimAsciiEnd.copy /-- -Obtain the git commit hash of the project that is currently getting analyzed. +Obtains the Git commit hash of the project that is currently getting analyzed. -/ def getProjectCommit (directory : System.FilePath := "." ) : IO String := do let out ← IO.Process.output { @@ -110,7 +110,7 @@ def UriSource.parse : IO UriSource := do /-! Note that all URIs can use `/` even when the system path separator is `\`. -/ -/-- The github URI of the source code of the package. -/ +/-- The GitHub URI of the source code of the package. -/ package_facet srcUri.github (pkg) : String := Job.async do let url ← getGitRemoteUrl pkg.dir "origin" let .some baseUrl := getGithubBaseUrl url @@ -163,7 +163,7 @@ private def makeModuleSrcUriFacet (mod : Module) (which : Lean.Name) libUri.mapM (sync := true) fun libUri => do return mod.name.components.foldl (init := libUri) (·.push '/' ++ ·.toString (escape := False)) ++ ".lean" -/-- The github URI of the source code of the module. -/ +/-- The GitHub URI of the source code of the module. -/ module_facet srcUri.github (mod) : String := makeModuleSrcUriFacet mod `srcUri.github /-- The `vscode://` URI of the source code of the module. -/ module_facet srcUri.vscode (mod) : String := makeModuleSrcUriFacet mod `srcUri.vscode @@ -201,96 +201,117 @@ target bibPrepass : FilePath := do return outputFile /-- -Direct and transitive dependencies. +Places the module's documentation content into the package's documentation database. -Loosely inspired by bazel's [depset](https://bazel.build/rules/lib/builtins/depset). -/ -abbrev DepSet (α) [Hashable α] [BEq α] := Array α × OrdHashSet α - -namespace DepSet -variable {α} [Hashable α] [BEq α] - -def mk (direct : Array α) (trans : Array (DepSet α)) : DepSet α := Id.run do - let mut deps := OrdHashSet.mkEmpty 0 - for (direct, trans) in trans do - deps := deps.appendArray direct - deps := deps.append trans - return (direct, deps) - -/-- Flatten a set of dependencies into a single list. -/ -def toArray (d : DepSet α) : Array α := d.1 ++ d.2.toArray - -instance [Lean.ToJson α] : Lean.ToJson (OrdHashSet α) where toJson x := Lean.toJson x.toArray -instance [QueryText α] : Lake.QueryText (DepSet α) where queryText d := Lake.QueryText.queryText d.toArray - -end DepSet - -module_facet docs (mod) : DepSet FilePath := do +Returns a marker file that indicates the database has been populated for this module. +The marker file participates in Lake's dependency tracking, allowing for incremental updates. +-/ +module_facet docInfo (mod) : FilePath := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch let modJob ← mod.leanArts.fetch - -- Build all documentation imported modules + -- Build all documentation for imported modules let imports ← (← mod.imports.fetch).await - let depDocJobs := Job.collectArray <| ← imports.mapM fun mod => fetch <| mod.facet `docs + let depDocJobs := Job.mixArray <| ← imports.mapM fun mod => fetch <| mod.facet `docInfo let buildDir := (← getRootPackage).buildDir - let docFile := mod.filePath (buildDir / "doc") "html" - depDocJobs.bindM fun docDeps => do + let markerFile := buildDir / "doc-data" / s!"{mod.name}.doc" + depDocJobs.bindM fun _ => do bibPrepassJob.bindM fun _ => do exeJob.bindM fun exeFile => do modJob.mapM fun _ => do - buildFileUnlessUpToDate' docFile do - -- hack: do this here to avoid having to save the git output anywhere else + buildFileUnlessUpToDate' markerFile do let uriJob ← fetch <| mod.facet `srcUri let srcUri ← uriJob.await proc { cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, "--db", "lean-docs.db", mod.name.toString, srcUri] + args := #["single", "--build", buildDir.toString, mod.name.toString, "doc/api-docs.db", srcUri] env := ← getAugmentedEnv } - return DepSet.mk #[docFile] docDeps + IO.FS.createDirAll markerFile.parent.get! + IO.FS.writeFile markerFile "" + return markerFile -def coreTarget (component : Lean.Name) : FetchM (Job <| Array FilePath) := do +def coreTarget (component : Lean.Name) : FetchM (Job FilePath) := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch - let dataPath := (← getRootPackage).buildDir / "doc-data" - let manifestFile := (← getRootPackage).buildDir / s!"{component}-manifest.json" - let dataFile := dataPath / s!"declaration-data-{component}.bmp" let buildDir := (← getRootPackage).buildDir + let markerFile := buildDir / "doc-data" / s!"core-{component}.doc" bibPrepassJob.bindM fun _ => do exeJob.mapM fun exeFile => do - buildFileUnlessUpToDate' manifestFile do + buildFileUnlessUpToDate' markerFile do proc { cmd := exeFile.toString - args := #["genCore", component.toString, - "--build", buildDir.toString, - "--db", "lean-docs.db", - "--manifest", manifestFile.toString] + args := #["genCore", "--build", buildDir.toString, component.toString, "doc/api-docs.db"] env := ← getAugmentedEnv } - addTrace (← computeTrace dataFile) - match Lean.Json.parse <| ← IO.FS.readFile manifestFile with - | .error e => ELog.error s!"Could not parse json from {manifestFile}: {e}" - | .ok manifestData => - match Lean.fromJson? manifestData with - | .error e => ELog.error s!"Could not parse an array from {manifestFile}: {e}" - | .ok (deps : Array System.FilePath) => - return deps.map (buildDir / ·) + IO.FS.createDirAll markerFile.parent.get! + IO.FS.writeFile markerFile "" + return markerFile +/-- +Populates the database with documentation data for core Lean. Returns a set of marker files that +indicate that the database has been updated for the corresponding modules, allowing Lake to track +changes and dependencies. +-/ target coreDocs : Array FilePath := do let coreComponents := #[`Init, `Std, `Lake, `Lean] return ← (Job.collectArray <| ← coreComponents.mapM coreTarget).mapM fun deps => - return deps.flatten + return deps -/-- A facet to generate the docs for a library. Returns all the filepaths that are required to -deploy a doc archive as a starting website. -/ -library_facet docs (lib) : Array FilePath := do +/-- +Populates the database with information for all modules in a library. +-/ +library_facet docInfo (lib) : Array FilePath := do let mods ← (← lib.modules.fetch).await - let moduleJobs := Job.collectArray <| ← mods.mapM (fetch <| ·.facet `docs) + let moduleJobs := Job.collectArray <| ← mods.mapM (fetch <| ·.facet `docInfo) + moduleJobs.mapM fun modDeps => + return modDeps + +/-- +A facet to collect docInfo dependencies for a package (no HTML generation). +This populates the database with all module data and core docs. +Returns the database file path. +-/ +package_facet docInfo (pkg) : FilePath := do + let libs := pkg.leanLibs + let libDocJobs := Job.collectArray <| ← libs.mapM (fetch <| ·.facet `docInfo) let coreJobs ← coreDocs.fetch + let dbPath := pkg.buildDir / "doc" / "api-docs.db" + coreJobs.bindM fun _ => do + libDocJobs.mapM fun _ => + return dbPath + +library_facet docsHeader (lib) : FilePath := do + -- Depend on the package docs facet to ensure HTML is generated first + let pkgDocsJob ← fetch <| lib.pkg.facet `docs let exeJob ← «doc-gen4».fetch - let bibPrepassJob ← bibPrepass.fetch -- Shared with DocGen4.Output let buildDir := (← getRootPackage).buildDir let basePath := buildDir / "doc" + let dataFile := basePath / "declarations" / "header-data.bmp" + exeJob.bindM fun exeFile => do + pkgDocsJob.mapM fun _ => do + buildFileUnlessUpToDate' dataFile do + logInfo "Documentation header indexing" + proc { + cmd := exeFile.toString + args := #["headerData", "--build", buildDir.toString] + } + return dataFile + +/-- +Generates all documentation from the SQLite database for the package. This facet depends on the +package facet `docInfo` to ensure the database is populated, then generates HTML from the database. +-/ +package_facet docs (pkg) : Array FilePath := do + -- Depend on docInfo to ensure DB is populated + let docInfoJob ← fetch <| pkg.facet `docInfo + let exeJob ← «doc-gen4».fetch + let bibPrepassJob ← bibPrepass.fetch + let buildDir := pkg.buildDir + let basePath := buildDir / "doc" + let dbPath := buildDir / "doc" / "api-docs.db" + let manifestFile := buildDir / "doc-manifest.json" let dataFile := basePath / "declarations" / "declaration-data.bmp" let staticFiles := #[ basePath / "style.css", @@ -317,63 +338,33 @@ library_facet docs (lib) : Array FilePath := do ] bibPrepassJob.bindM fun _ => do exeJob.bindM fun exeFile => do - coreJobs.bindM fun coreDeps => do - moduleJobs.mapM fun modDeps => do - buildFileUnlessUpToDate' dataFile do - logInfo "Documentation indexing" - proc { - cmd := exeFile.toString - args := #["index", "--build", buildDir.toString] - } - let traces ← staticFiles.mapM computeTrace - addTrace <| mixTraceArray traces - return (DepSet.mk (#[dataFile] ++ staticFiles) (modDeps.push (.mk coreDeps #[]))).toArray - -library_facet docsHeader (lib) : FilePath := do - let mods ← (← lib.modules.fetch).await - let moduleJobs := Job.mixArray <| ← mods.mapM (fetch <| ·.facet `docs) - let exeJob ← «doc-gen4».fetch - let coreJobs ← coreDocs.fetch - -- Shared with DocGen4.Output - let buildDir := (← getRootPackage).buildDir - let basePath := buildDir / "doc" - let dataFile := basePath / "declarations" / "header-data.bmp" - exeJob.bindM fun exeFile => do - coreJobs.bindM fun _ => do - moduleJobs.mapM fun _ => do + docInfoJob.mapM fun _ => do buildFileUnlessUpToDate' dataFile do - logInfo "Documentation header indexing" + logInfo "Generating documentation from database" proc { cmd := exeFile.toString - args := #["headerData", "--build", buildDir.toString] + args := #["fromDb", "--build", buildDir.toString, "--manifest", manifestFile.toString, dbPath.toString] + env := ← getAugmentedEnv } - return dataFile - -/-- Generate documentation from the SQLite database. -This facet depends on the regular `docs` facet to ensure the database is populated first, -then generates HTML from the database into a separate directory for comparison. -/ -library_facet dbdocs (lib) : FilePath := do - -- First, ensure the regular docs are built (which populates the DB) - let docsJob ← fetch <| lib.facet `docs - let exeJob ← «doc-gen4».fetch - let buildDir := (← getRootPackage).buildDir - let dbPath := buildDir / "lean-docs.db" - let outputDir := buildDir / "doc-from-db" - let outputDataDir := outputDir / "doc-data" - let outputMarker := outputDir / "doc" / "index.html" - docsJob.bindM fun _ => do - exeJob.mapM fun exeFile => do - buildFileUnlessUpToDate' outputMarker do - -- Copy references.json to the DB output directory so navbar includes references link - IO.FS.createDirAll outputDataDir - let srcRefsFile := buildDir / "doc-data" / "references.json" - let dstRefsFile := outputDataDir / "references.json" - if ← srcRefsFile.pathExists then - IO.FS.writeFile dstRefsFile (← IO.FS.readFile srcRefsFile) - logInfo s!"Generating documentation from database: {dbPath}" - proc { - cmd := exeFile.toString - args := #["fromDb", "--build", outputDir.toString, dbPath.toString] - env := ← getAugmentedEnv - } - return outputMarker + -- Read manifest and return file list + let traces ← staticFiles.mapM computeTrace + addTrace <| mixTraceArray traces + match Lean.Json.parse <| ← IO.FS.readFile manifestFile with + | .error _ => return staticFiles + | .ok manifestData => + match Lean.fromJson? manifestData with + | .error _ => return staticFiles + | .ok (deps : Array System.FilePath) => + return (#[dataFile] ++ staticFiles ++ deps.map (buildDir / ·)) + +/-- Helper facet that informs users to build docs at package level. -/ +module_facet docs (_mod) : Unit := Job.async do + logInfo "To build documentation, run: lake build :docs" + logInfo "This builds docs for the entire package at once." + return () + +/-- Helper facet that informs users to build docs at package level. -/ +library_facet docs (_lib) : Unit := Job.async do + logInfo "To build documentation, run: lake build :docs" + logInfo "This builds docs for the entire package at once." + return () diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index e1da0c98..7afddbf0 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -48,4 +48,4 @@ popd # Benchmark documentation generation env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake --dir "$TMPDIR/mathproject" build Mathlib:dbdocs + lake --dir "$TMPDIR/mathproject" build :docs diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index a5486aec..6bf6f643 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -13,4 +13,4 @@ lake build DocGen4 lake build doc-gen4 env DOCGEN_SRC="file" "$BENCH/measure.py" -t own-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake build DocGen4:dbdocs + lake build :docs From 8409ddbf8b9c1d68f576bc9f0ab69248786bb251 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 13:46:39 +0100 Subject: [PATCH 017/106] chore: dead code removal Remove all code that is no longer needed with the new generation path. --- DocGen4/DB.lean | 22 ------------------ DocGen4/Process/Hierarchy.lean | 42 ---------------------------------- Main.lean | 24 ------------------- 3 files changed, 88 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 87c2c598..dba4cb9f 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1398,28 +1398,6 @@ def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do else pos1 < pos2 -- Tiebreaker: use DB position return { name := moduleName, members := sortedMembers.map (·.2), imports } -/-- Load all modules from the database. -/ -def loadAllModules (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap Name Process.Module) := do - let mut modules : Std.HashMap Name Process.Module := {} - for modName in moduleNames do - let module ← loadModule db modName - modules := modules.insert modName module - return modules - -/-- Result of loading from the database, including source URLs. -/ -structure LoadFromDbResult where - result : Process.AnalyzerResult - sourceUrls : Std.HashMap Name String - -/-- Load a complete AnalyzerResult from the database. -/ -def loadFromDb (dbFile : System.FilePath) : IO LoadFromDbResult := do - let db ← openDbForReading dbFile - let moduleNames ← getModuleNames db - let sourceUrls ← getModuleSourceUrls db - let name2ModIdx ← buildName2ModIdx db moduleNames - let moduleInfo ← loadAllModules db moduleNames - return { result := { name2ModIdx, moduleNames, moduleInfo }, sourceUrls } - /-- Shared index data needed for cross-module linking, without loading full module contents. -/ structure SharedIndex where moduleNames : Array Name diff --git a/DocGen4/Process/Hierarchy.lean b/DocGen4/Process/Hierarchy.lean index 1d927426..d3f3c7b6 100644 --- a/DocGen4/Process/Hierarchy.lean +++ b/DocGen4/Process/Hierarchy.lean @@ -77,47 +77,5 @@ partial def insert! (h : Hierarchy) (n : Name) : Hierarchy := Id.run do partial def fromArray (names : Array Name) : Hierarchy := names.foldl insert! (empty anonymous false) -def baseDirBlackList : Std.HashSet String := - Std.HashSet.ofList [ - "404.html", - "color-scheme.js", - "declaration-data.js", - "declarations", - "expand-nav.js", - "find", - "foundational_types.html", - "how-about.js", - "index.html", - "jump-src.js", - "mathjax-config.js", - "navbar.html", - "nav.js", - "search.html", - "search.js", - "src", - "style.css", - "favicon.svg" - ] - -partial def fromDirectoryAux (dir : System.FilePath) (previous : Name) : IO (Array Name) := do - let mut children := #[] - for entry in ← System.FilePath.readDir dir do - if ← entry.path.isDir then - children := children ++ (← fromDirectoryAux entry.path (.str previous entry.fileName)) - else if entry.path.extension = some "html" then - children := children.push <| .str previous (entry.fileName.dropEnd ".html".length).copy - return children - -def fromDirectory (dir : System.FilePath) : IO Hierarchy := do - let mut children := #[] - for entry in ← System.FilePath.readDir dir do - if baseDirBlackList.contains entry.fileName then - continue - else if ← entry.path.isDir then - children := children ++ (← fromDirectoryAux entry.path (.mkSimple entry.fileName)) - else if entry.path.extension = some "html" then - children := children.push <| .mkSimple (entry.fileName.dropEnd ".html".length).copy - return Hierarchy.fromArray children - end Hierarchy end DocGen4 diff --git a/Main.lean b/Main.lean index e68376c9..470826b0 100644 --- a/Main.lean +++ b/Main.lean @@ -4,12 +4,6 @@ import Cli open DocGen4 DocGen4.DB DocGen4.Output Lean Cli -def getTopLevelModules (p : Parsed) : IO (List String) := do - let topLevelModules := p.variableArgsAs! String |>.toList - if topLevelModules.length == 0 then - throw <| IO.userError "No topLevelModules provided." - return topLevelModules - def runHeaderDataCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String @@ -28,15 +22,6 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do updateModuleDb doc buildDir dbFile (some sourceUri) return 0 -def runIndexCmd (p : Parsed) : IO UInt32 := do - let buildDir := match p.flag? "build" with - | some dir => dir.as! String - | none => ".lake/build" - let hierarchy ← Hierarchy.fromDirectory (Output.basePath buildDir) - let baseConfig ← getSimpleBaseContext buildDir hierarchy - htmlOutputIndex baseConfig - return 0 - def runGenCoreCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String @@ -145,14 +130,6 @@ def singleCmd := `[Cli| sourceUri : String; "The sourceUri as computed by the Lake facet" ] -def indexCmd := `[Cli| - index VIA runIndexCmd; - "Index the documentation that has been generated by single." - - FLAGS: - b, build : String; "Build directory." -] - def genCoreCmd := `[Cli| genCore VIA runGenCoreCmd; "Populate the database with documentation for the specified Lean core module (Init, Std, Lake, Lean)." @@ -204,7 +181,6 @@ def docGenCmd : Cmd := `[Cli| SUBCOMMANDS: singleCmd; - indexCmd; genCoreCmd; bibPrepassCmd; headerDataCmd; From af8611d9682a7682942cb60c548b711fefa3e9d0 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 13:55:34 +0100 Subject: [PATCH 018/106] chore: better naming --- DocGen4/DB.lean | 8 ++++---- DocGen4/Output.lean | 10 +++++----- Main.lean | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index dba4cb9f..ec555053 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1398,14 +1398,14 @@ def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do else pos1 < pos2 -- Tiebreaker: use DB position return { name := moduleName, members := sortedMembers.map (·.2), imports } -/-- Shared index data needed for cross-module linking, without loading full module contents. -/ -structure SharedIndex where +/-- Context needed for cross-module linking, without loading full module contents. -/ +structure LinkingContext where moduleNames : Array Name sourceUrls : Std.HashMap Name String name2ModIdx : Std.HashMap Name ModuleIdx -/-- Load just the shared index (fast) - only what's needed for cross-module linking. -/ -def loadSharedIndex (db : SQLite) : IO SharedIndex := do +/-- Load the linking context from the database. -/ +def loadLinkingContext (db : SQLite) : IO LinkingContext := do let moduleNames ← getModuleNames db let sourceUrls ← getModuleSourceUrls db let name2ModIdx ← buildName2ModIdx db moduleNames diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index d75d1bf8..67d55804 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -89,23 +89,23 @@ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → /-- Generate HTML for all modules in parallel. Each task loads its module from DB, renders HTML, and writes output files. - The shared index provides cross-module linking without loading all module data upfront. -/ + The linking context provides cross-module linking without loading all module data upfront. -/ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.FilePath) - (shared : SharedIndex) (sourceLinker? : Option SourceLinkerFn := none) + (linkCtx : LinkingContext) (sourceLinker? : Option SourceLinkerFn := none) (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath) := do FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir -- Spawn one task per module, each returning its output file path - let tasks ← shared.moduleNames.mapM fun modName => IO.asTask do + let tasks ← linkCtx.moduleNames.mapM fun modName => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← openDbForReading dbPath let module ← loadModule db modName -- Build a minimal AnalyzerResult with just this module's info let result : AnalyzerResult := { - name2ModIdx := shared.name2ModIdx - moduleNames := shared.moduleNames + name2ModIdx := linkCtx.name2ModIdx + moduleNames := linkCtx.moduleNames moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module } diff --git a/Main.lean b/Main.lean index 470826b0..123db0e2 100644 --- a/Main.lean +++ b/Main.lean @@ -70,16 +70,16 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let dbPath := p.positionalArg! "db" |>.as! String let manifestOutput? := (p.flag? "manifest").map (·.as! String) - -- Phase 1: Load shared index (fast - just names and cross-references) + -- Phase 1: Load linking context (fast - module names, source URLs, declaration locations) let start ← IO.monoMsNow - IO.println s!"Loading shared index from database: {dbPath}" + IO.println s!"Loading linking context from database: {dbPath}" let db ← openDbForReading dbPath - let shared ← loadSharedIndex db - IO.println s!"Index loaded in {(← IO.monoMsNow) - start}ms ({shared.name2ModIdx.size} declarations, {shared.moduleNames.size} modules)" + let linkCtx ← loadLinkingContext db + IO.println s!"Linking context loaded in {(← IO.monoMsNow) - start}ms ({linkCtx.name2ModIdx.size} declarations, {linkCtx.moduleNames.size} modules)" -- Add `references` pseudo-module to hierarchy since references.html is always generated let start ← IO.monoMsNow - let hierarchy := Hierarchy.fromArray (shared.moduleNames.push `references) + let hierarchy := Hierarchy.fromArray (linkCtx.moduleNames.push `references) IO.println s!"Hierarchy took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow let baseConfig ← getSimpleBaseContext buildDir hierarchy @@ -88,7 +88,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Phase 2: Parallel HTML generation (one task per module) let start ← IO.monoMsNow IO.println s!"Generating HTML in parallel to: {buildDir}" - let outputs ← htmlOutputResultsParallel baseConfig dbPath shared (sourceLinker? := some (dbSourceLinker shared.sourceUrls)) + let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow htmlOutputIndex baseConfig From f45fd31fc9629d01076cd9602a12f3305fc9d4f8 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 13:59:04 +0100 Subject: [PATCH 019/106] fix: update build command in workflow --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e0bdd3bb..31ece23d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,13 +23,13 @@ jobs: run: | export LEAN_ABORT_ON_PANIC=1 # to ensure that the `--query` test below has a baseline to compare against. - rm -rf .lake/build/docs - lake build DocGen4:docs - + rm -rf .lake/build/doc + lake build :docs + - name: Check `--query` output shell: bash # enables pipefail run: | export LEAN_ABORT_ON_PANIC=1 - lake query DocGen4:docs | sort > expected.txt + lake query :docs | sort > expected.txt find "$(pwd)/.lake/build/doc" -type f ! -name '*.trace' ! -name '*.hash' | sort > actual.txt diff actual.txt expected.txt From c169a17fcb2fc75a840112b8ac70e6545da8b273 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 14:14:46 +0100 Subject: [PATCH 020/106] fix: don't put database in HTML dir --- lakefile.lean | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lakefile.lean b/lakefile.lean index 433923d9..e7f23a76 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -224,7 +224,7 @@ module_facet docInfo (mod) : FilePath := do let srcUri ← uriJob.await proc { cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, mod.name.toString, "doc/api-docs.db", srcUri] + args := #["single", "--build", buildDir.toString, mod.name.toString, "api-docs.db", srcUri] env := ← getAugmentedEnv } IO.FS.createDirAll markerFile.parent.get! @@ -241,7 +241,7 @@ def coreTarget (component : Lean.Name) : FetchM (Job FilePath) := do buildFileUnlessUpToDate' markerFile do proc { cmd := exeFile.toString - args := #["genCore", "--build", buildDir.toString, component.toString, "doc/api-docs.db"] + args := #["genCore", "--build", buildDir.toString, component.toString, "api-docs.db"] env := ← getAugmentedEnv } IO.FS.createDirAll markerFile.parent.get! @@ -276,7 +276,7 @@ package_facet docInfo (pkg) : FilePath := do let libs := pkg.leanLibs let libDocJobs := Job.collectArray <| ← libs.mapM (fetch <| ·.facet `docInfo) let coreJobs ← coreDocs.fetch - let dbPath := pkg.buildDir / "doc" / "api-docs.db" + let dbPath := pkg.buildDir / "api-docs.db" coreJobs.bindM fun _ => do libDocJobs.mapM fun _ => return dbPath @@ -310,7 +310,7 @@ package_facet docs (pkg) : Array FilePath := do let bibPrepassJob ← bibPrepass.fetch let buildDir := pkg.buildDir let basePath := buildDir / "doc" - let dbPath := buildDir / "doc" / "api-docs.db" + let dbPath := buildDir / "api-docs.db" let manifestFile := buildDir / "doc-manifest.json" let dataFile := basePath / "declarations" / "declaration-data.bmp" let staticFiles := #[ From 569cd63bbe54e9d7d112c115a5d6967a6d616ad0 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 14:58:19 +0100 Subject: [PATCH 021/106] fix: build docs for dependencies' contents --- lakefile.lean | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lakefile.lean b/lakefile.lean index e7f23a76..4fcc60a9 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -269,12 +269,14 @@ library_facet docInfo (lib) : Array FilePath := do /-- A facet to collect docInfo dependencies for a package (no HTML generation). -This populates the database with all module data and core docs. +This populates the database with all module data and core docs for all packages +in the workspace (including dependencies). Returns the database file path. -/ package_facet docInfo (pkg) : FilePath := do - let libs := pkg.leanLibs - let libDocJobs := Job.collectArray <| ← libs.mapM (fetch <| ·.facet `docInfo) + let ws ← getWorkspace + let allLibs := ws.packages.flatMap (·.leanLibs) + let libDocJobs := Job.collectArray <| ← allLibs.mapM (fetch <| ·.facet `docInfo) let coreJobs ← coreDocs.fetch let dbPath := pkg.buildDir / "api-docs.db" coreJobs.bindM fun _ => do From fa6ae514c83a7ece266b120814098eeb75663166 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 2 Feb 2026 19:54:16 +0100 Subject: [PATCH 022/106] fix: get an updated Plausible to avoid version compat issue in benchmark --- scripts/bench/mathlib-docs/run | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 7afddbf0..6af94a4c 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -34,8 +34,14 @@ cat >> lakefile.toml <> lakefile.toml + # Update doc-gen4 dependency MATHLIB_NO_CACHE_ON_UPDATE=1 lake update doc-gen4 From e216fd21a9fbea7f7ce7bcfed46272944ffc074a Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 11:05:18 +0100 Subject: [PATCH 023/106] feat: detect mismatched schemas --- DocGen4/DB.lean | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index ccce303a..4d0413e0 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -4,6 +4,43 @@ import SQLite namespace DocGen4.DB +open Lean in +/-- Extract a deterministic string representation of an inductive type for hashing. + Includes constructor names and their types. -/ +private def inductiveRepr (env : Environment) (name : Name) : String := Id.run do + let some (.inductInfo info) := env.find? name | return s!"not found: {name}" + let mut s := s!"inductive {name} : {info.type}\n" + for ctor in info.ctors do + let some (.ctorInfo ctorInfo) := env.find? ctor | continue + let ctorName := ctor.replacePrefix name .anonymous + s := s ++ s!" | {ctorName} : {ctorInfo.type}\n" + return s + +namespace Internals +open Lean Elab Term in +/-- String representation of inductive type definitions, computed at compile time. -/ +scoped elab "inductiveRepr![" types:ident,* "]" : term => do + let env ← getEnv + let mut reprs : Array String := #[] + for type in types.getElems do + let name ← resolveGlobalConstNoOverload type + reprs := reprs.push (inductiveRepr env name) + return .lit (.strVal (String.intercalate "\n" reprs.toList)) +end Internals + +open Internals in +open Lean.Widget in +/-- +The datatypes that are serialized to the database. If they change, then the database should be +rebuilt. +-/ +def serializedCodeTypeDefs : String := + inductiveRepr![ + SortFormer, + RenderedCode.Tag, + TaggedText + ] + section open Lean open SQLite.Blob @@ -216,6 +253,34 @@ def getDb (dbFile : System.FilePath) : IO SQLite := do catch | e => throw <| .userError s!"Exception while creating schema: {e}" + -- Check schema version via DDL hash and type definition hash + let ddlHash := toString ddl.hash + let typeHash := toString serializedCodeTypeDefs.hash + let stmt ← db.prepare "SELECT key, value FROM schema_meta" + let mut storedDdlHash : Option String := none + let mut storedTypeHash : Option String := none + while ← stmt.step do + let key ← stmt.columnText 0 + let value ← stmt.columnText 1 + if key == "ddl_hash" then storedDdlHash := some value + if key == "type_hash" then storedTypeHash := some value + match storedDdlHash, storedTypeHash with + | none, none => + -- New database, store the hashes + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" + | some stored, _ => + if stored != ddlHash then + throw <| .userError s!"Database schema is outdated (DDL hash mismatch). Run `lake clean` or delete '{dbFile}' and rebuild." + match storedTypeHash with + | none => + -- Older DB without type hash, add it + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" + | some storedType => + if storedType != typeHash then + throw <| .userError s!"Database schema is outdated (serialized type definitions changed). Run `lake clean` or delete '{dbFile}' and rebuild." + | none, some _ => -- Shouldn't happen, but handle gracefully + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" return db where ddl := @@ -479,6 +544,11 @@ CREATE TABLE IF NOT EXISTS tactic_tags ( PRIMARY KEY (module_name, internal_name, tag), FOREIGN KEY (module_name, internal_name) REFERENCES tactics(module_name, internal_name) ON DELETE CASCADE ); + +CREATE TABLE IF NOT EXISTS schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); "# def withDbContext (context : String) (act : IO α) : IO α := do From 8f11577daf45dff81a2a0d12796c160e30753ddf Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 12:22:31 +0100 Subject: [PATCH 024/106] fix: make facets work as before --- DocGen4/DB.lean | 23 ++++++++ DocGen4/Output.lean | 87 +++++++++++++++++++++++++++++-- Main.lean | 32 ++++++++++-- README.md | 12 +++-- lakefile.lean | 95 +++++++++++----------------------- scripts/bench/mathlib-docs/run | 8 +-- scripts/bench/own-docs/run | 2 +- 7 files changed, 175 insertions(+), 84 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 4d0413e0..d2813baa 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1539,4 +1539,27 @@ def loadLinkingContext (db : SQLite) : IO LinkingContext := do let name2ModIdx ← buildName2ModIdx db moduleNames return { moduleNames, sourceUrls, name2ModIdx } +/-- Get transitive closure of imports for given modules using recursive CTE. -/ +def getTransitiveImports (db : SQLite) (modules : Array Name) : IO (Array Name) := withDbContext "read:transitive_imports" do + if modules.isEmpty then return #[] + -- Build the VALUES clause for starting modules + let placeholders := ", ".intercalate (modules.toList.map fun _ => "(?)") + let sql := s!" + WITH RECURSIVE transitive_imports(name) AS ( + VALUES {placeholders} + UNION + SELECT mi.imported FROM module_imports mi + JOIN transitive_imports ti ON mi.importer = ti.name + ) + SELECT DISTINCT name FROM transitive_imports" + let stmt ← db.prepare sql + -- Bind all module names + for h : i in [0:modules.size] do + stmt.bind (i.toInt32 + 1) modules[i].toString + let mut result := #[] + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + result := result.push name + return result + end Reading diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 9598d2e5..a93ef490 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -92,15 +92,18 @@ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → /-- Generate HTML for all modules in parallel. Each task loads its module from DB, renders HTML, and writes output files. - The linking context provides cross-module linking without loading all module data upfront. -/ + The linking context provides cross-module linking without loading all module data upfront. + When `targetModules` is provided, only those modules are rendered (but linking uses all modules). -/ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.FilePath) - (linkCtx : LinkingContext) (sourceLinker? : Option SourceLinkerFn := none) + (linkCtx : LinkingContext) + (targetModules : Array Name := linkCtx.moduleNames) + (sourceLinker? : Option SourceLinkerFn := none) (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath) := do FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir -- Spawn one task per module, each returning its output file path - let tasks ← linkCtx.moduleNames.mapM fun modName => IO.asTask do + let tasks ← targetModules.mapM fun modName => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← openDbForReading dbPath let module ← loadModule db modName @@ -214,4 +217,82 @@ def headerDataOutput (buildDir : System.FilePath) : IO Unit := do FS.createDirAll declarationDir FS.writeFile (declarationDir / "header-data.bmp") finalHeaderJson.compress +/-- Convert HTML file path to module name: doc/A/B/C.html -> `A.B.C -/ +def htmlPathToModuleName (docDir : System.FilePath) (htmlPath : System.FilePath) : Option Name := + -- Get relative path from doc directory + let docDirStr := docDir.toString + let htmlPathStr := htmlPath.toString + -- Strip the doc directory prefix (handle both with and without trailing separator) + let relPath? := + if htmlPathStr.startsWith (docDirStr ++ "/") then + some (htmlPathStr.drop (docDirStr.length + 1)) + else if htmlPathStr.startsWith docDirStr then + some (htmlPathStr.drop docDirStr.length) + else + none + relPath?.bind fun relPath => + -- Remove .html extension + if relPath.endsWith ".html" then + let withoutExt := relPath.dropEnd 5 + -- Convert path separators to dots + let name := withoutExt.replace "/" "." |>.replace "\\" "." + some name.toName + else + none + +/-- Scan for existing module HTML files under docDir -/ +partial def scanModuleHtmlFiles (docDir : System.FilePath) : IO (Array Name) := do + -- Files/directories to skip (not module HTML files) + let skipFiles := ["index.html", "404.html", "navbar.html", "search.html", + "foundational_types.html", "references.html", "tactics.html"] + let skipDirs := ["find", "declarations", "src"] + + let rec scanDir (dir : System.FilePath) : IO (Array Name) := do + let mut result := #[] + if !(← dir.pathExists) then return result + for entry in ← System.FilePath.readDir dir do + let entryPath := entry.root / entry.fileName + if ← entryPath.isDir then + -- Skip special directories + if skipDirs.contains entry.fileName then continue + result := result ++ (← scanDir entryPath) + else if entry.fileName.endsWith ".html" then + -- Skip special files + if skipFiles.contains entry.fileName then continue + -- Convert file path to module name + if let some modName := htmlPathToModuleName docDir entryPath then + result := result.push modName + return result + + scanDir docDir + +/-- Rebuild navbar.html by scanning existing HTML files on disk. + This enables incremental builds where subsequent builds include modules from previous builds. -/ +def updateNavbarFromDisk (buildDir : System.FilePath) : IO Unit := do + let docDir := basePath buildDir + -- Scan for all existing module HTML files + let existingModules ← scanModuleHtmlFiles docDir + -- Add `references` pseudo-module for navbar + let allModules := existingModules.push `references + -- Build hierarchy from all found modules + let hierarchy := Hierarchy.fromArray allModules + -- Load references for base context + let contents ← FS.readFile (declarationsBasePath buildDir / "references.json") <|> (pure "[]") + let refs : Array BibItem ← match Json.parse contents with + | .error _ => pure #[] + | .ok jsonContent => + match fromJson? jsonContent with + | .error _ => pure #[] + | .ok refs => pure refs + let baseConfig : SiteBaseContext := { + buildDir := buildDir + depthToRoot := 0 + currentName := none + hierarchy := hierarchy + refs := refs + } + -- Regenerate navbar + let navbarHtml := ReaderT.run navbar baseConfig |>.toString + FS.writeFile (docDir / "navbar.html") navbarHtml + end DocGen4 diff --git a/Main.lean b/Main.lean index 123db0e2..1e759bb9 100644 --- a/Main.lean +++ b/Main.lean @@ -69,6 +69,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do | none => ".lake/build" let dbPath := p.positionalArg! "db" |>.as! String let manifestOutput? := (p.flag? "manifest").map (·.as! String) + let moduleRoots := (p.variableArgsAs! String).map String.toName -- Phase 1: Load linking context (fast - module names, source URLs, declaration locations) let start ← IO.monoMsNow @@ -77,9 +78,21 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let linkCtx ← loadLinkingContext db IO.println s!"Linking context loaded in {(← IO.monoMsNow) - start}ms ({linkCtx.name2ModIdx.size} declarations, {linkCtx.moduleNames.size} modules)" + -- Determine which modules to generate HTML for + let targetModules ← + if moduleRoots.isEmpty then + -- No roots specified: generate for all modules (existing behavior) + pure linkCtx.moduleNames + else + -- Roots specified: compute transitive closure + let start ← IO.monoMsNow + let transitiveModules ← getTransitiveImports db moduleRoots + IO.println s!"Computed transitive closure of {moduleRoots.size} roots: {transitiveModules.size} modules in {(← IO.monoMsNow) - start}ms" + pure transitiveModules + -- Add `references` pseudo-module to hierarchy since references.html is always generated let start ← IO.monoMsNow - let hierarchy := Hierarchy.fromArray (linkCtx.moduleNames.push `references) + let hierarchy := Hierarchy.fromArray (targetModules.push `references) IO.println s!"Hierarchy took {(← IO.monoMsNow) - start}ms" let start ← IO.monoMsNow let baseConfig ← getSimpleBaseContext buildDir hierarchy @@ -88,11 +101,19 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Phase 2: Parallel HTML generation (one task per module) let start ← IO.monoMsNow IO.println s!"Generating HTML in parallel to: {buildDir}" - let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) + let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" + + -- When module roots are specified, update navbar from disk (includes modules from previous builds) + -- Otherwise, generate full index for complete build let start ← IO.monoMsNow - htmlOutputIndex baseConfig - IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" + if moduleRoots.isEmpty then + htmlOutputIndex baseConfig + IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" + else + updateNavbarFromDisk buildDir + IO.println s!"Navbar update took {(← IO.monoMsNow) - start}ms" + IO.println "Done!" if let .some manifestOutput := manifestOutput? then IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress @@ -165,7 +186,7 @@ def headerDataCmd := `[Cli| def fromDbCmd := `[Cli| fromDb VIA runFromDbCmd; - "Generate all HTML documentation from a SQLite database." + "Generate HTML documentation from a SQLite database." FLAGS: b, build : String; "Build directory (default: .lake/build)" @@ -173,6 +194,7 @@ def fromDbCmd := `[Cli| ARGS: db : String; "Path to the SQLite database" + ...modules : String; "Optional: Module roots to generate docs for (computes transitive closure)" ] def docGenCmd : Cmd := `[Cli| diff --git a/README.md b/README.md index c3c366b9..8cbd6c87 100644 --- a/README.md +++ b/README.md @@ -31,14 +31,18 @@ rev = "main" 4. If your parent project has dependencies you want to run `lake update YourLibraryName` within `docbuild` whenever you update the dependencies of your parent project. -After this setup step you can generate documentation for all libraries in your package and its dependencies -using the following command within `docbuild`: +After this setup step you can generate documentation for an entire library and all files imported +by that library using the following command within `docbuild`: ``` -lake build :docs +lake build YourLibraryName:docs +``` +If you have multiple libraries you want to generate full documentation for: +``` +lake build Test:docs YourLibraryName:docs ``` Note that `doc-gen4` currently always generates documentation for `Lean`, `Init`, `Lake` and `Std` -as dependencies of your package. +in addition to the provided targets. The root of the built docs will be `docbuild/.lake/build/doc/index.html`. However, due to the "Same Origin Policy", the generated website will be partially broken if you just diff --git a/lakefile.lean b/lakefile.lean index 67582fe6..07a8cd9f 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -304,73 +304,40 @@ library_facet docsHeader (lib) : FilePath := do } return dataFile -/-- -Generates all documentation from the SQLite database for the package. This facet depends on the -package facet `docInfo` to ensure the database is populated, then generates HTML from the database. --/ -package_facet docs (pkg) : Array FilePath := do - -- Depend on docInfo to ensure DB is populated - let docInfoJob ← fetch <| pkg.facet `docInfo + +/-- Generate HTML for this module and its transitive imports. -/ +module_facet docs (mod) : Unit := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch - let buildDir := pkg.buildDir - let basePath := buildDir / "doc" + let docInfoJob ← fetch <| mod.facet `docInfo + + let buildDir := (← getRootPackage).buildDir let dbPath := buildDir / "api-docs.db" - let manifestFile := buildDir / "doc-manifest.json" - let dataFile := basePath / "declarations" / "declaration-data.bmp" - let staticFiles := #[ - basePath / "style.css", - basePath / "favicon.svg", - basePath / "declaration-data.js", - basePath / "color-scheme.js", - basePath / "nav.js", - basePath / "jump-src.js", - basePath / "expand-nav.js", - basePath / "how-about.js", - basePath / "search.js", - basePath / "mathjax-config.js", - basePath / "instances.js", - basePath / "importedBy.js", - basePath / "index.html", - basePath / "404.html", - basePath / "navbar.html", - basePath / "search.html", - basePath / "foundational_types.html", - basePath / "references.html", - basePath / "references.bib", - basePath / "tactics.html", - basePath / "find" / "index.html", - basePath / "find" / "find.js" - ] + bibPrepassJob.bindM fun _ => do exeJob.bindM fun exeFile => do docInfoJob.mapM fun _ => do - buildFileUnlessUpToDate' dataFile do - logInfo "Generating documentation from database" - proc { - cmd := exeFile.toString - args := #["fromDb", "--build", buildDir.toString, "--manifest", manifestFile.toString, dbPath.toString] - env := ← getAugmentedEnv - } - -- Read manifest and return file list - let traces ← staticFiles.mapM computeTrace - addTrace <| mixTraceArray traces - match Lean.Json.parse <| ← IO.FS.readFile manifestFile with - | .error _ => return staticFiles - | .ok manifestData => - match Lean.fromJson? manifestData with - | .error _ => return staticFiles - | .ok (deps : Array System.FilePath) => - return (#[dataFile] ++ staticFiles ++ deps.map (buildDir / ·)) - -/-- Helper facet that informs users to build docs at package level. -/ -module_facet docs (_mod) : Unit := Job.async do - logInfo "To build documentation, run: lake build :docs" - logInfo "This builds docs for the entire package at once." - return () - -/-- Helper facet that informs users to build docs at package level. -/ -library_facet docs (_lib) : Unit := Job.async do - logInfo "To build documentation, run: lake build :docs" - logInfo "This builds docs for the entire package at once." - return () + logInfo s!"Generating documentation for {mod.name} and dependencies" + proc { + cmd := exeFile.toString + args := #["fromDb", "--build", buildDir.toString, dbPath.toString, mod.name.toString] + env := ← getAugmentedEnv + } + +/-- Generate HTML for all modules in this library. -/ +library_facet docs (lib) : Unit := do + let coreJob ← coreDocs.fetch + let mods ← (← lib.modules.fetch).await + let jobs ← mods.mapM fun mod => fetch <| mod.facet `docs + coreJob.bindM fun _ => do + Job.collectArray jobs |>.mapM fun _ => pure () + +/-- +Generates documentation for the package's default library targets. Builds the `docs` facet of each +library, which in turn generates HTML for each module. +-/ +package_facet docs (pkg) : Unit := do + let defaultTargets := pkg.defaultTargets + let libs := pkg.leanLibs.filter fun lib => defaultTargets.contains lib.name + let jobs ← libs.mapM fun lib => fetch <| lib.facet `docs + Job.collectArray jobs |>.mapM fun _ => pure () diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 6af94a4c..863863cf 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -34,14 +34,8 @@ cat >> lakefile.toml <> lakefile.toml - # Update doc-gen4 dependency MATHLIB_NO_CACHE_ON_UPDATE=1 lake update doc-gen4 @@ -54,4 +48,4 @@ popd # Benchmark documentation generation env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake --dir "$TMPDIR/mathproject" build :docs + lake --dir "$TMPDIR/mathproject" build Mathlib:docs diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index 6bf6f643..8e80df29 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -13,4 +13,4 @@ lake build DocGen4 lake build doc-gen4 env DOCGEN_SRC="file" "$BENCH/measure.py" -t own-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake build :docs + lake build DocGen4:docs From 0f42d49e53c8f9314186896ad903468fe615a355 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 12:33:35 +0100 Subject: [PATCH 025/106] Revert CI change --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31ece23d..e0bdd3bb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,13 +23,13 @@ jobs: run: | export LEAN_ABORT_ON_PANIC=1 # to ensure that the `--query` test below has a baseline to compare against. - rm -rf .lake/build/doc - lake build :docs - + rm -rf .lake/build/docs + lake build DocGen4:docs + - name: Check `--query` output shell: bash # enables pipefail run: | export LEAN_ABORT_ON_PANIC=1 - lake query :docs | sort > expected.txt + lake query DocGen4:docs | sort > expected.txt find "$(pwd)/.lake/build/doc" -type f ! -name '*.trace' ! -name '*.hash' | sort > actual.txt diff actual.txt expected.txt From 8d513bf75c7311fa666b745800dc4a9aec8c2c87 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 15:29:12 +0100 Subject: [PATCH 026/106] fix core deps --- lakefile.lean | 64 ++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/lakefile.lean b/lakefile.lean index 07a8cd9f..dc1531d6 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -203,37 +203,6 @@ target bibPrepass : FilePath := do } return outputFile -/-- -Places the module's documentation content into the package's documentation database. - -Returns a marker file that indicates the database has been populated for this module. -The marker file participates in Lake's dependency tracking, allowing for incremental updates. --/ -module_facet docInfo (mod) : FilePath := do - let exeJob ← «doc-gen4».fetch - let bibPrepassJob ← bibPrepass.fetch - let modJob ← mod.leanArts.fetch - -- Build all documentation for imported modules - let imports ← (← mod.imports.fetch).await - let depDocJobs := Job.mixArray <| ← imports.mapM fun mod => fetch <| mod.facet `docInfo - let buildDir := (← getRootPackage).buildDir - let markerFile := buildDir / "doc-data" / s!"{mod.name}.doc" - depDocJobs.bindM fun _ => do - bibPrepassJob.bindM fun _ => do - exeJob.bindM fun exeFile => do - modJob.mapM fun _ => do - buildFileUnlessUpToDate' markerFile do - let uriJob ← fetch <| mod.facet `srcUri - let srcUri ← uriJob.await - proc { - cmd := exeFile.toString - args := #["single", "--build", buildDir.toString, mod.name.toString, "api-docs.db", srcUri] - env := ← getAugmentedEnv - } - IO.FS.createDirAll markerFile.parent.get! - IO.FS.writeFile markerFile "" - return markerFile - def coreTarget (component : Lean.Name) : FetchM (Job FilePath) := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch @@ -261,6 +230,39 @@ target coreDocs : Array FilePath := do return ← (Job.collectArray <| ← coreComponents.mapM coreTarget).mapM fun deps => return deps +/-- +Places the module's documentation content into the package's documentation database. + +Returns a marker file that indicates the database has been populated for this module. +The marker file participates in Lake's dependency tracking, allowing for incremental updates. +-/ +module_facet docInfo (mod) : FilePath := do + let exeJob ← «doc-gen4».fetch + let bibPrepassJob ← bibPrepass.fetch + let coreJob ← coreDocs.fetch + let modJob ← mod.leanArts.fetch + -- Build all documentation for imported modules + let imports ← (← mod.imports.fetch).await + let depDocJobs := Job.mixArray <| ← imports.mapM fun mod => fetch <| mod.facet `docInfo + let buildDir := (← getRootPackage).buildDir + let markerFile := buildDir / "doc-data" / s!"{mod.name}.doc" + coreJob.bindM fun _ => do + depDocJobs.bindM fun _ => do + bibPrepassJob.bindM fun _ => do + exeJob.bindM fun exeFile => do + modJob.mapM fun _ => do + buildFileUnlessUpToDate' markerFile do + let uriJob ← fetch <| mod.facet `srcUri + let srcUri ← uriJob.await + proc { + cmd := exeFile.toString + args := #["single", "--build", buildDir.toString, mod.name.toString, "api-docs.db", srcUri] + env := ← getAugmentedEnv + } + IO.FS.createDirAll markerFile.parent.get! + IO.FS.writeFile markerFile "" + return markerFile + /-- Populates the database with information for all modules in a library. -/ From 0ad430ae24b851fdf660ee141fef1f2e292b7c89 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 16:14:38 +0100 Subject: [PATCH 027/106] group html jobs --- lakefile.lean | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/lakefile.lean b/lakefile.lean index dc1531d6..e0962fae 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -307,39 +307,44 @@ library_facet docsHeader (lib) : FilePath := do return dataFile -/-- Generate HTML for this module and its transitive imports. -/ -module_facet docs (mod) : Unit := do +/-- +Generate HTML documentation for the given root modules. +Fetches docInfo for all roots, ensures core docs are built, then runs a single `fromDb` process. +-/ +def generateHtmlDocs (rootMods : Array Module) (description : String) : FetchM (Job Unit) := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch - let docInfoJob ← fetch <| mod.facet `docInfo - + let coreJob ← coreDocs.fetch + let docInfoJobs := Job.collectArray <| ← rootMods.mapM (fetch <| ·.facet `docInfo) let buildDir := (← getRootPackage).buildDir let dbPath := buildDir / "api-docs.db" + let rootNames := rootMods.map (·.name) + coreJob.bindM fun _ => do + docInfoJobs.bindM fun _ => do + bibPrepassJob.bindM fun _ => do + exeJob.mapM fun exeFile => do + logInfo description + proc { + cmd := exeFile.toString + args := #["fromDb", "--build", buildDir.toString, dbPath.toString] ++ rootNames.map (·.toString) + env := ← getAugmentedEnv + } - bibPrepassJob.bindM fun _ => do - exeJob.bindM fun exeFile => do - docInfoJob.mapM fun _ => do - logInfo s!"Generating documentation for {mod.name} and dependencies" - proc { - cmd := exeFile.toString - args := #["fromDb", "--build", buildDir.toString, dbPath.toString, mod.name.toString] - env := ← getAugmentedEnv - } +/-- Generate HTML for this module and its transitive imports. -/ +module_facet docs (mod) : Unit := do + generateHtmlDocs #[mod] s!"Generating documentation for {mod.name} and dependencies" /-- Generate HTML for all modules in this library. -/ library_facet docs (lib) : Unit := do - let coreJob ← coreDocs.fetch - let mods ← (← lib.modules.fetch).await - let jobs ← mods.mapM fun mod => fetch <| mod.facet `docs - coreJob.bindM fun _ => do - Job.collectArray jobs |>.mapM fun _ => pure () + let rootMods := lib.rootModules + generateHtmlDocs rootMods s!"Generating documentation for {lib.name} ({rootMods.size} root modules)" /-- -Generates documentation for the package's default library targets. Builds the `docs` facet of each -library, which in turn generates HTML for each module. +Generates documentation for the package's default library targets. Runs a single HTML generation +process for all root modules across all default libraries. -/ package_facet docs (pkg) : Unit := do let defaultTargets := pkg.defaultTargets let libs := pkg.leanLibs.filter fun lib => defaultTargets.contains lib.name - let jobs ← libs.mapM fun lib => fetch <| lib.facet `docs - Job.collectArray jobs |>.mapM fun _ => pure () + let rootMods := libs.flatMap (·.rootModules) + generateHtmlDocs rootMods s!"Generating documentation for {pkg.baseName} ({rootMods.size} root modules)" From 58628b37db3ab20c7e52902069e2b1629d5c3780 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 3 Feb 2026 16:19:27 +0100 Subject: [PATCH 028/106] bump timeout values for huge builds with db write contention --- DocGen4/DB.lean | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index d2813baa..6582b6c8 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -245,7 +245,7 @@ def getDb (dbFile : System.FilePath) : IO SQLite := do -- SQLite atomically creates the DB file, and the schema and journal settings here are applied -- idempotently. This avoids DB creation race conditions. let db ← SQLite.openWith dbFile .readWriteCreate - db.exec "PRAGMA busy_timeout = 60000" -- 60 seconds for parallel builds + db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout for parallel builds db.exec "PRAGMA journal_mode = WAL" db.exec "PRAGMA foreign_keys = ON" try @@ -1057,7 +1057,7 @@ open Lean SQLite.Blob /-- Open a database for reading. -/ def openDbForReading (dbFile : System.FilePath) : IO SQLite := do let db ← SQLite.openWith dbFile .readonly - db.exec "PRAGMA busy_timeout = 50000" + db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout return db /-- Read RenderedCode from a blob. -/ From b519df8d62000e8d9df765d35da934fa543576a4 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 06:23:44 +0100 Subject: [PATCH 029/106] fix: return file lists again --- lakefile.lean | 61 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/lakefile.lean b/lakefile.lean index e0962fae..530150fe 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -310,32 +310,73 @@ library_facet docsHeader (lib) : FilePath := do /-- Generate HTML documentation for the given root modules. Fetches docInfo for all roots, ensures core docs are built, then runs a single `fromDb` process. +Returns an array of all generated file paths. -/ -def generateHtmlDocs (rootMods : Array Module) (description : String) : FetchM (Job Unit) := do +def generateHtmlDocs (rootMods : Array Module) (description : String) : FetchM (Job (Array FilePath)) := do let exeJob ← «doc-gen4».fetch let bibPrepassJob ← bibPrepass.fetch let coreJob ← coreDocs.fetch let docInfoJobs := Job.collectArray <| ← rootMods.mapM (fetch <| ·.facet `docInfo) let buildDir := (← getRootPackage).buildDir + let basePath := buildDir / "doc" let dbPath := buildDir / "api-docs.db" + let dataFile := basePath / "declarations" / "declaration-data.bmp" + let staticFiles := #[ + basePath / "style.css", + basePath / "favicon.svg", + basePath / "declaration-data.js", + basePath / "color-scheme.js", + basePath / "nav.js", + basePath / "jump-src.js", + basePath / "expand-nav.js", + basePath / "how-about.js", + basePath / "search.js", + basePath / "mathjax-config.js", + basePath / "instances.js", + basePath / "importedBy.js", + basePath / "index.html", + basePath / "404.html", + basePath / "navbar.html", + basePath / "search.html", + basePath / "foundational_types.html", + basePath / "references.html", + basePath / "references.bib", + basePath / "tactics.html", + basePath / "find" / "index.html", + basePath / "find" / "find.js" + ] let rootNames := rootMods.map (·.name) + let manifestFile := buildDir / "doc-manifest.json" coreJob.bindM fun _ => do docInfoJobs.bindM fun _ => do bibPrepassJob.bindM fun _ => do exeJob.mapM fun exeFile => do - logInfo description - proc { - cmd := exeFile.toString - args := #["fromDb", "--build", buildDir.toString, dbPath.toString] ++ rootNames.map (·.toString) - env := ← getAugmentedEnv - } + buildFileUnlessUpToDate' dataFile do + logInfo description + proc { + cmd := exeFile.toString + args := #["fromDb", "--build", buildDir.toString, "--manifest", manifestFile.toString, dbPath.toString] ++ rootNames.map (·.toString) + env := ← getAugmentedEnv + } + let traces ← staticFiles.mapM computeTrace + addTrace <| mixTraceArray traces + -- We read the manifest to determine which HTML files were generated because we only + -- pass root module names to fromDb, which computes the transitive closure internally. + -- This avoids passing potentially thousands of module names on the command line. + match Lean.Json.parse <| ← IO.FS.readFile manifestFile with + | .error _ => return #[dataFile] ++ staticFiles + | .ok manifestData => + match Lean.fromJson? manifestData with + | .error _ => return #[dataFile] ++ staticFiles + | .ok (manifestDeps : Array System.FilePath) => + return #[dataFile] ++ staticFiles ++ manifestDeps.map (buildDir / ·) /-- Generate HTML for this module and its transitive imports. -/ -module_facet docs (mod) : Unit := do +module_facet docs (mod) : Array FilePath := do generateHtmlDocs #[mod] s!"Generating documentation for {mod.name} and dependencies" /-- Generate HTML for all modules in this library. -/ -library_facet docs (lib) : Unit := do +library_facet docs (lib) : Array FilePath := do let rootMods := lib.rootModules generateHtmlDocs rootMods s!"Generating documentation for {lib.name} ({rootMods.size} root modules)" @@ -343,7 +384,7 @@ library_facet docs (lib) : Unit := do Generates documentation for the package's default library targets. Runs a single HTML generation process for all root modules across all default libraries. -/ -package_facet docs (pkg) : Unit := do +package_facet docs (pkg) : Array FilePath := do let defaultTargets := pkg.defaultTargets let libs := pkg.leanLibs.filter fun lib => defaultTargets.contains lib.name let rootMods := libs.flatMap (·.rootModules) From ee4d2361d4cc7361b20f51e61bd61c68c5dde7cc Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 08:09:38 +0100 Subject: [PATCH 030/106] Files again --- Main.lean | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Main.lean b/Main.lean index 1e759bb9..352c4d65 100644 --- a/Main.lean +++ b/Main.lean @@ -104,15 +104,15 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" - -- When module roots are specified, update navbar from disk (includes modules from previous builds) - -- Otherwise, generate full index for complete build + -- Generate the search index (declaration-data.bmp) let start ← IO.monoMsNow - if moduleRoots.isEmpty then - htmlOutputIndex baseConfig - IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" - else - updateNavbarFromDisk buildDir - IO.println s!"Navbar update took {(← IO.monoMsNow) - start}ms" + htmlOutputIndex baseConfig + IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" + + -- Update navbar to include all modules on disk + let start ← IO.monoMsNow + updateNavbarFromDisk buildDir + IO.println s!"Navbar update took {(← IO.monoMsNow) - start}ms" IO.println "Done!" if let .some manifestOutput := manifestOutput? then From 64ced84dbec43d94bf58413fef9d54505c682c7f Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 08:22:56 +0100 Subject: [PATCH 031/106] chore: try WITHOUT ROWID --- DocGen4/DB.lean | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 6582b6c8..c4cead7a 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -291,7 +291,7 @@ PRAGMA journal_mode = WAL; CREATE TABLE IF NOT EXISTS modules ( name TEXT PRIMARY KEY, source_url TEXT -); +) WITHOUT ROWID; -- Direct imports CREATE TABLE IF NOT EXISTS module_imports ( @@ -302,7 +302,7 @@ CREATE TABLE IF NOT EXISTS module_imports ( -- There's no -- FOREIGN KEY (imported) REFERENCES modules(name) -- because docs are built incrementally. -); +) WITHOUT ROWID; -- Index for reverse queries: "what imports this module?" CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); @@ -313,7 +313,7 @@ CREATE TABLE IF NOT EXISTS module_items ( item_type TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS declaration_ranges ( module_name TEXT NOT NULL, @@ -326,7 +326,7 @@ CREATE TABLE IF NOT EXISTS declaration_ranges ( end_utf16 INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS markdown_docstrings ( module_name TEXT NOT NULL, @@ -334,7 +334,7 @@ CREATE TABLE IF NOT EXISTS markdown_docstrings ( text TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS verso_docstrings ( module_name TEXT NOT NULL, @@ -342,7 +342,7 @@ CREATE TABLE IF NOT EXISTS verso_docstrings ( content BLOB NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS name_info ( module_name TEXT NOT NULL, @@ -354,7 +354,7 @@ CREATE TABLE IF NOT EXISTS name_info ( render INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS axioms ( module_name TEXT NOT NULL, @@ -362,7 +362,7 @@ CREATE TABLE IF NOT EXISTS axioms ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; -- Internal names (like recursors) that aren't rendered but should link to a rendered declaration CREATE TABLE IF NOT EXISTS internal_names ( @@ -370,7 +370,7 @@ CREATE TABLE IF NOT EXISTS internal_names ( target_module TEXT NOT NULL, target_position INTEGER NOT NULL, FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; -- Index for CASCADE deletes: when name_info rows are deleted, find matching internal_names CREATE INDEX IF NOT EXISTS idx_internal_names_target ON internal_names(target_module, target_position); @@ -382,7 +382,7 @@ CREATE TABLE IF NOT EXISTS constructors ( PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; -- Index for CASCADE deletes on the second FK (type_position) CREATE INDEX IF NOT EXISTS idx_constructors_type_pos ON constructors(module_name, type_position); @@ -393,7 +393,7 @@ CREATE TABLE IF NOT EXISTS inductives ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS class_inductives ( module_name TEXT NOT NULL, @@ -401,7 +401,7 @@ CREATE TABLE IF NOT EXISTS class_inductives ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS opaques ( module_name TEXT NOT NULL, @@ -409,7 +409,7 @@ CREATE TABLE IF NOT EXISTS opaques ( safety TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS definitions ( module_name TEXT NOT NULL, @@ -420,7 +420,7 @@ CREATE TABLE IF NOT EXISTS definitions ( has_equations INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS definition_equations ( module_name TEXT NOT NULL, @@ -429,7 +429,7 @@ CREATE TABLE IF NOT EXISTS definition_equations ( sequence INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; -- Trigger to ensure has_equations is true when equations are inserted CREATE TRIGGER IF NOT EXISTS ensure_has_equations_on_insert @@ -446,7 +446,7 @@ CREATE TABLE IF NOT EXISTS instances ( class_name TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS instance_args ( module_name TEXT NOT NULL, @@ -455,7 +455,7 @@ CREATE TABLE IF NOT EXISTS instance_args ( type_name TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES instances(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS structures ( module_name TEXT NOT NULL, @@ -463,7 +463,7 @@ CREATE TABLE IF NOT EXISTS structures ( is_class INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS structure_parents ( module_name TEXT NOT NULL, @@ -473,7 +473,7 @@ CREATE TABLE IF NOT EXISTS structure_parents ( type TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS structure_constructors ( module_name TEXT NOT NULL, @@ -483,7 +483,7 @@ CREATE TABLE IF NOT EXISTS structure_constructors ( type BLOB NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS structure_fields ( module_name TEXT NOT NULL, @@ -496,7 +496,7 @@ CREATE TABLE IF NOT EXISTS structure_fields ( FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -- Note: No FK on proj_name because the projection function may be in a different module -- (for inherited fields) that hasn't been processed yet. The JOIN at load time handles this. -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS structure_field_args ( module_name TEXT NOT NULL, @@ -507,7 +507,7 @@ CREATE TABLE IF NOT EXISTS structure_field_args ( is_implicit INTEGER NOT NULL, PRIMARY KEY (module_name, position, field_sequence, arg_sequence), FOREIGN KEY (module_name, position, field_sequence) REFERENCES structure_fields(module_name, position, sequence) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS declaration_args ( module_name TEXT NOT NULL, @@ -517,7 +517,7 @@ CREATE TABLE IF NOT EXISTS declaration_args ( is_implicit INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS declaration_attrs ( module_name TEXT NOT NULL, @@ -526,7 +526,7 @@ CREATE TABLE IF NOT EXISTS declaration_attrs ( attr TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS tactics ( module_name TEXT NOT NULL, @@ -535,7 +535,7 @@ CREATE TABLE IF NOT EXISTS tactics ( doc_string TEXT NOT NULL, PRIMARY KEY (module_name, internal_name), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS tactic_tags ( module_name TEXT NOT NULL, @@ -543,12 +543,12 @@ CREATE TABLE IF NOT EXISTS tactic_tags ( tag TEXT NOT NULL, PRIMARY KEY (module_name, internal_name, tag), FOREIGN KEY (module_name, internal_name) REFERENCES tactics(module_name, internal_name) ON DELETE CASCADE -); +) WITHOUT ROWID; CREATE TABLE IF NOT EXISTS schema_meta ( key TEXT PRIMARY KEY, value TEXT NOT NULL -); +) WITHOUT ROWID; "# def withDbContext (context : String) (act : IO α) : IO α := do From 75e23c6a6589324762afe3d2fe64337fbe91d8e0 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 08:39:48 +0100 Subject: [PATCH 032/106] Revert "chore: try WITHOUT ROWID" This reverts commit 64ced84dbec43d94bf58413fef9d54505c682c7f. It didn't make things faster. --- DocGen4/DB.lean | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index c4cead7a..6582b6c8 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -291,7 +291,7 @@ PRAGMA journal_mode = WAL; CREATE TABLE IF NOT EXISTS modules ( name TEXT PRIMARY KEY, source_url TEXT -) WITHOUT ROWID; +); -- Direct imports CREATE TABLE IF NOT EXISTS module_imports ( @@ -302,7 +302,7 @@ CREATE TABLE IF NOT EXISTS module_imports ( -- There's no -- FOREIGN KEY (imported) REFERENCES modules(name) -- because docs are built incrementally. -) WITHOUT ROWID; +); -- Index for reverse queries: "what imports this module?" CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); @@ -313,7 +313,7 @@ CREATE TABLE IF NOT EXISTS module_items ( item_type TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS declaration_ranges ( module_name TEXT NOT NULL, @@ -326,7 +326,7 @@ CREATE TABLE IF NOT EXISTS declaration_ranges ( end_utf16 INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS markdown_docstrings ( module_name TEXT NOT NULL, @@ -334,7 +334,7 @@ CREATE TABLE IF NOT EXISTS markdown_docstrings ( text TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS verso_docstrings ( module_name TEXT NOT NULL, @@ -342,7 +342,7 @@ CREATE TABLE IF NOT EXISTS verso_docstrings ( content BLOB NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS name_info ( module_name TEXT NOT NULL, @@ -354,7 +354,7 @@ CREATE TABLE IF NOT EXISTS name_info ( render INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS axioms ( module_name TEXT NOT NULL, @@ -362,7 +362,7 @@ CREATE TABLE IF NOT EXISTS axioms ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); -- Internal names (like recursors) that aren't rendered but should link to a rendered declaration CREATE TABLE IF NOT EXISTS internal_names ( @@ -370,7 +370,7 @@ CREATE TABLE IF NOT EXISTS internal_names ( target_module TEXT NOT NULL, target_position INTEGER NOT NULL, FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); -- Index for CASCADE deletes: when name_info rows are deleted, find matching internal_names CREATE INDEX IF NOT EXISTS idx_internal_names_target ON internal_names(target_module, target_position); @@ -382,7 +382,7 @@ CREATE TABLE IF NOT EXISTS constructors ( PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); -- Index for CASCADE deletes on the second FK (type_position) CREATE INDEX IF NOT EXISTS idx_constructors_type_pos ON constructors(module_name, type_position); @@ -393,7 +393,7 @@ CREATE TABLE IF NOT EXISTS inductives ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS class_inductives ( module_name TEXT NOT NULL, @@ -401,7 +401,7 @@ CREATE TABLE IF NOT EXISTS class_inductives ( is_unsafe INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS opaques ( module_name TEXT NOT NULL, @@ -409,7 +409,7 @@ CREATE TABLE IF NOT EXISTS opaques ( safety TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS definitions ( module_name TEXT NOT NULL, @@ -420,7 +420,7 @@ CREATE TABLE IF NOT EXISTS definitions ( has_equations INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS definition_equations ( module_name TEXT NOT NULL, @@ -429,7 +429,7 @@ CREATE TABLE IF NOT EXISTS definition_equations ( sequence INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); -- Trigger to ensure has_equations is true when equations are inserted CREATE TRIGGER IF NOT EXISTS ensure_has_equations_on_insert @@ -446,7 +446,7 @@ CREATE TABLE IF NOT EXISTS instances ( class_name TEXT NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS instance_args ( module_name TEXT NOT NULL, @@ -455,7 +455,7 @@ CREATE TABLE IF NOT EXISTS instance_args ( type_name TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES instances(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS structures ( module_name TEXT NOT NULL, @@ -463,7 +463,7 @@ CREATE TABLE IF NOT EXISTS structures ( is_class INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS structure_parents ( module_name TEXT NOT NULL, @@ -473,7 +473,7 @@ CREATE TABLE IF NOT EXISTS structure_parents ( type TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS structure_constructors ( module_name TEXT NOT NULL, @@ -483,7 +483,7 @@ CREATE TABLE IF NOT EXISTS structure_constructors ( type BLOB NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS structure_fields ( module_name TEXT NOT NULL, @@ -496,7 +496,7 @@ CREATE TABLE IF NOT EXISTS structure_fields ( FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -- Note: No FK on proj_name because the projection function may be in a different module -- (for inherited fields) that hasn't been processed yet. The JOIN at load time handles this. -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS structure_field_args ( module_name TEXT NOT NULL, @@ -507,7 +507,7 @@ CREATE TABLE IF NOT EXISTS structure_field_args ( is_implicit INTEGER NOT NULL, PRIMARY KEY (module_name, position, field_sequence, arg_sequence), FOREIGN KEY (module_name, position, field_sequence) REFERENCES structure_fields(module_name, position, sequence) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS declaration_args ( module_name TEXT NOT NULL, @@ -517,7 +517,7 @@ CREATE TABLE IF NOT EXISTS declaration_args ( is_implicit INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS declaration_attrs ( module_name TEXT NOT NULL, @@ -526,7 +526,7 @@ CREATE TABLE IF NOT EXISTS declaration_attrs ( attr TEXT NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS tactics ( module_name TEXT NOT NULL, @@ -535,7 +535,7 @@ CREATE TABLE IF NOT EXISTS tactics ( doc_string TEXT NOT NULL, PRIMARY KEY (module_name, internal_name), FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS tactic_tags ( module_name TEXT NOT NULL, @@ -543,12 +543,12 @@ CREATE TABLE IF NOT EXISTS tactic_tags ( tag TEXT NOT NULL, PRIMARY KEY (module_name, internal_name, tag), FOREIGN KEY (module_name, internal_name) REFERENCES tactics(module_name, internal_name) ON DELETE CASCADE -) WITHOUT ROWID; +); CREATE TABLE IF NOT EXISTS schema_meta ( key TEXT PRIMARY KEY, value TEXT NOT NULL -) WITHOUT ROWID; +); "# def withDbContext (context : String) (act : IO α) : IO α := do From 52cd13ed9abd63c28a8a130ac9379d64b95e63a3 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 08:41:05 +0100 Subject: [PATCH 033/106] try turning off synchronous writes --- DocGen4/DB.lean | 1 + 1 file changed, 1 insertion(+) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 6582b6c8..26f1701d 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -247,6 +247,7 @@ def getDb (dbFile : System.FilePath) : IO SQLite := do let db ← SQLite.openWith dbFile .readWriteCreate db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout for parallel builds db.exec "PRAGMA journal_mode = WAL" + db.exec "PRAGMA synchronous = OFF" db.exec "PRAGMA foreign_keys = ON" try db.transaction (db.exec ddl) From 2e5f95b0a25e9be9a857dd666b08036f000ec4f0 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 08:56:28 +0100 Subject: [PATCH 034/106] simplify handling of equations --- DocGen4/DB.lean | 43 ++++++++++++++----------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 26f1701d..4b9057f9 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -418,7 +418,6 @@ CREATE TABLE IF NOT EXISTS definitions ( is_unsafe INTEGER NOT NULL, hints TEXT NOT NULL, is_noncomputable INTEGER NOT NULL, - has_equations INTEGER NOT NULL, PRIMARY KEY (module_name, position), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); @@ -432,15 +431,6 @@ CREATE TABLE IF NOT EXISTS definition_equations ( FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); --- Trigger to ensure has_equations is true when equations are inserted -CREATE TRIGGER IF NOT EXISTS ensure_has_equations_on_insert -AFTER INSERT ON definition_equations -BEGIN - UPDATE definitions - SET has_equations = 1 - WHERE module_name = NEW.module_name AND position = NEW.position AND has_equations = 0; -END; - CREATE TABLE IF NOT EXISTS instances ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -572,7 +562,7 @@ structure DB where saveInfo (modName : String) (position : Int64) (kind : String) (info : Process.Info) : IO Unit saveAxiom (modName : String) (position : Int64) (isUnsafe : Bool) : IO Unit saveOpaque (modName : String) (position : Int64) (safety : Lean.DefinitionSafety) : IO Unit - saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) (hasEquations : Bool) : IO Unit + saveDefinition (modName : String) (position : Int64) (isUnsafe : Bool) (hints : Lean.ReducibilityHints) (isNonComputable : Bool) : IO Unit saveDefinitionEquation (modName : String) (position : Int64) (code : RenderedCode) (sequence : Int64) : IO Unit saveInstance (modName : String) (position : Int64) (className : String) : IO Unit saveInstanceArg (modName : String) (position : Int64) (sequence : Int64) (typeName : String) : IO Unit @@ -718,14 +708,13 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveOpaqueStmt.bind 2 position saveOpaqueStmt.bind 3 safety run saveOpaqueStmt - let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable, has_equations) VALUES (?, ?, ?, ?, ?, ?)" - let saveDefinition modName position isUnsafe hints isNonComputable hasEquations := withDbContext "write:insert:definitions" do + let saveDefinitionStmt ← sqlite.prepare "INSERT INTO definitions (module_name, position, is_unsafe, hints, is_noncomputable) VALUES (?, ?, ?, ?, ?)" + let saveDefinition modName position isUnsafe hints isNonComputable := withDbContext "write:insert:definitions" do saveDefinitionStmt.bind 1 modName saveDefinitionStmt.bind 2 position saveDefinitionStmt.bind 3 isUnsafe saveDefinitionStmt.bind 4 hints saveDefinitionStmt.bind 5 isNonComputable - saveDefinitionStmt.bind 6 hasEquations run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" let saveDefinitionEquation modName position (code : RenderedCode) sequence := withDbContext "write:insert:definition_equations" do @@ -927,13 +916,13 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( | .opaqueInfo info => db.saveOpaque modNameStr pos info.definitionSafety | .definitionInfo info => - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable if let some eqns := info.equations then for h : j in 0...eqns.size do db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 | .instanceInfo info => -- Save definition data (InstanceInfo extends DefinitionInfo) - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable info.equations.isSome + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable if let some eqns := info.equations then for h : j in 0...eqns.size do db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 @@ -1210,17 +1199,15 @@ def loadInfo (db : SQLite) (moduleName : String) (position : Int64) (name : Name } /-- Load definition equations from the database. - Takes hasEquations flag to distinguish `none` from `some #[]`. -/ -def loadEquations (db : SQLite) (moduleName : String) (position : Int64) (hasEquations : Bool) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do - if !hasEquations then return none + Returns `none` if no equations exist, `some eqns` otherwise. -/ +def loadEquations (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do let stmt ← db.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position - let mut eqns := #[] + if !(← stmt.step) then return none + let mut eqns := #[← readRenderedCode (← stmt.columnBlob 0)] while (← stmt.step) do - let blob ← stmt.columnBlob 0 - let code ← readRenderedCode blob - eqns := eqns.push code + eqns := eqns.push (← readRenderedCode (← stmt.columnBlob 0)) return some eqns /-- Load instance type names from the database. -/ @@ -1375,19 +1362,18 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S return some <| .opaqueInfo { toInfo := info, definitionSafety := safety } return none | "definition" => - let stmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable, has_equations FROM definitions WHERE module_name = ? AND position = ?" + let stmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable FROM definitions WHERE module_name = ? AND position = ?" stmt.bind 1 moduleName stmt.bind 2 position if (← stmt.step) then let isUnsafe := (← stmt.columnInt64 0) != 0 let hintsStr ← stmt.columnText 1 let isNonComputable := (← stmt.columnInt64 2) != 0 - let hasEquations := (← stmt.columnInt64 3) != 0 let hints : ReducibilityHints := match hintsStr with | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations db moduleName position hasEquations + let equations ← loadEquations db moduleName position return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } return none | "instance" => @@ -1396,19 +1382,18 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S instStmt.bind 2 position if (← instStmt.step) then let className := (← instStmt.columnText 0).toName - let defStmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable, has_equations FROM definitions WHERE module_name = ? AND position = ?" + let defStmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable FROM definitions WHERE module_name = ? AND position = ?" defStmt.bind 1 moduleName defStmt.bind 2 position if (← defStmt.step) then let isUnsafe := (← defStmt.columnInt64 0) != 0 let hintsStr ← defStmt.columnText 1 let isNonComputable := (← defStmt.columnInt64 2) != 0 - let hasEquations := (← defStmt.columnInt64 3) != 0 let hints : ReducibilityHints := match hintsStr with | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations db moduleName position hasEquations + let equations ← loadEquations db moduleName position let typeNames ← loadInstanceArgs db moduleName position return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } return none From 8fdc1ce1ccd34539f9774394cdbe57a731887663 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 09:44:22 +0100 Subject: [PATCH 035/106] Batch up transactions for core docs Hopefully this enables greater concurrency --- DocGen4/DB.lean | 216 +++++++++++++++++++++++++++++------------------- 1 file changed, 133 insertions(+), 83 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 4b9057f9..8b1377c2 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1,9 +1,54 @@ import DocGen4.Process import DocGen4.RenderedCode import SQLite +import Std.Data.Iterators namespace DocGen4.DB +section + +open Std Iterators + +structure ChunkArray α where + array : Array α + chunkSize : Nat + curr : Nat + chunkSize_gt_zero : chunkSize > 0 := by grind + curr_valid : curr ≤ array.size := by grind + +def chunkedM {m : Type u → Type v} (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := + IterM.mk (ChunkArray.mk xs n 0) m (Array α) + +def chunked (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := + IterM.mk (ChunkArray.mk xs n 0) Id (Array α) + +def ChunkArray.PlausibleStep (it : IterM (α := ChunkArray α) m (Array α)) : + (step : IterStep (IterM (α := ChunkArray α) m (Array α)) (Array α)) → Prop + | .yield it' v => + it.internalState.curr < it.internalState.array.size ∧ + it.internalState.array = it'.internalState.array ∧ + it.internalState.chunkSize = it'.internalState.chunkSize ∧ + it.internalState.curr < it'.internalState.curr ∧ + v.size ≤ it.internalState.chunkSize + | .done => it.internalState.curr = it.internalState.array.size + | .skip .. => False + +instance [Pure m] : Iterator (ChunkArray α) m (Array α) where + IsPlausibleStep := ChunkArray.PlausibleStep + step it := + let { internalState := { array, chunkSize, chunkSize_gt_zero, curr, curr_valid } } := it + if h : curr = array.size then + pure <| .deflate <| .done h + else + let curr' := curr + chunkSize + let curr'' := if curr' > array.size then array.size else curr' + let it' : IterM (α := ChunkArray α) _ _ := ⟨{ array, chunkSize, curr := curr'' }⟩ + pure <| .deflate <| .yield it' (array.extract curr curr'') (by grind [ChunkArray.PlausibleStep]) + +instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Array α) m n := IteratorLoop.defaultImplementation + +end + open Lean in /-- Extract a deterministic string representation of an inductive type for hashing. Includes constructor names and their types. -/ @@ -884,90 +929,95 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile let db ← ensureDb dbFile - for (modName, modInfo) in doc.moduleInfo do - let modNameStr := modName.toString + for batch in chunked doc.moduleInfo.toArray 100 do -- Each module gets its own transaction to reduce lock contention - let _ ← withDbContext s!"transaction:immediate:{modNameStr}" <| db.sqlite.transaction (mode := .immediate) do - -- Collect structure field info to save in second pass (after all declarations are in name_info) - let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] - db.deleteModule modNameStr - db.saveModule modNameStr sourceUrl? - for imported in modInfo.imports do - db.saveImport modNameStr imported - let mut i : Int64 := 0 - for mem in modInfo.members do - let pos := i - i := i + 1 - match mem with - | .modDoc doc => - db.saveDeclarationRange modNameStr pos doc.declarationRange - db.saveMarkdownDocstring modNameStr pos doc.doc - | .docInfo info => - let baseInfo := info.toInfo - -- Skip saving ctorInfo here - they're saved along with their parent inductive - if !info.isCtorInfo then - db.saveInfo modNameStr pos (infoKind info) baseInfo - db.saveDeclarationRange modNameStr pos baseInfo.declarationRange - match info with - | .axiomInfo info => - db.saveAxiom modNameStr pos info.isUnsafe - | .theoremInfo _info => -- No extra info here - pure () - | .opaqueInfo info => - db.saveOpaque modNameStr pos info.definitionSafety - | .definitionInfo info => - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable - if let some eqns := info.equations then - for h : j in 0...eqns.size do - db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 - | .instanceInfo info => - -- Save definition data (InstanceInfo extends DefinitionInfo) - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable - if let some eqns := info.equations then - for h : j in 0...eqns.size do - db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 - -- Save instance-specific data - db.saveInstance modNameStr pos info.className.toString - for h : j in 0...info.typeNames.size do - db.saveInstanceArg modNameStr pos j.toInt64 info.typeNames[j].toString - | .inductiveInfo info => - db.saveInductive modNameStr pos info.isUnsafe - -- Save recursors (main + aux) as internal names linking to this inductive - saveRecursors doc.name2ModIdx db modNameStr pos info.name - for ctor in info.ctors do - let cpos := i - i := i + 1 - db.saveInfo modNameStr cpos "constructor" ctor - db.saveDeclarationRange modNameStr cpos ctor.declarationRange - db.saveConstructor modNameStr cpos pos - | .structureInfo info => - -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata false info db modNameStr pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (pos, info) - | .classInfo info => - -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata true info db modNameStr pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (pos, info) - | .classInductiveInfo info => - db.saveClassInductive modNameStr pos info.isUnsafe - -- Save recursors (main + aux) as internal names linking to this class inductive - saveRecursors doc.name2ModIdx db modNameStr pos info.name - for ctor in info.ctors do - let cpos := i - i := i + 1 - db.saveInfo modNameStr cpos "constructor" ctor - db.saveDeclarationRange modNameStr cpos ctor.declarationRange - db.saveConstructor modNameStr cpos pos - | .ctorInfo info => - -- Here we do nothing because they were inserted along with the inductive - pure () - -- Second pass: save structure fields (now that all projection functions are in name_info) - for (pos, info) in pendingStructureFields do - saveStructureFields info db modNameStr pos - -- Save tactics defined in this module - for tactic in modInfo.tactics do - db.saveTactic modNameStr tactic - pure () + let ctxStr := + if h : batch.size = 1 then batch[0].1.toString + else if h : batch.size = 0 then "none" + else s!"{batch[0].1}-{batch[batch.size-1].1}" + let _ ← withDbContext s!"transaction:immediate:{ctxStr}" <| db.sqlite.transaction (mode := .immediate) do + for (modName, modInfo) in doc.moduleInfo do + let modNameStr := modName.toString + -- Collect structure field info to save in second pass (after all declarations are in name_info) + let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] + db.deleteModule modNameStr + db.saveModule modNameStr sourceUrl? + for imported in modInfo.imports do + db.saveImport modNameStr imported + let mut i : Int64 := 0 + for mem in modInfo.members do + let pos := i + i := i + 1 + match mem with + | .modDoc doc => + db.saveDeclarationRange modNameStr pos doc.declarationRange + db.saveMarkdownDocstring modNameStr pos doc.doc + | .docInfo info => + let baseInfo := info.toInfo + -- Skip saving ctorInfo here - they're saved along with their parent inductive + if !info.isCtorInfo then + db.saveInfo modNameStr pos (infoKind info) baseInfo + db.saveDeclarationRange modNameStr pos baseInfo.declarationRange + match info with + | .axiomInfo info => + db.saveAxiom modNameStr pos info.isUnsafe + | .theoremInfo _info => -- No extra info here + pure () + | .opaqueInfo info => + db.saveOpaque modNameStr pos info.definitionSafety + | .definitionInfo info => + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 + | .instanceInfo info => + -- Save definition data (InstanceInfo extends DefinitionInfo) + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 + -- Save instance-specific data + db.saveInstance modNameStr pos info.className.toString + for h : j in 0...info.typeNames.size do + db.saveInstanceArg modNameStr pos j.toInt64 info.typeNames[j].toString + | .inductiveInfo info => + db.saveInductive modNameStr pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this inductive + saveRecursors doc.name2ModIdx db modNameStr pos info.name + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos + | .structureInfo info => + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata false info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) + | .classInfo info => + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata true info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) + | .classInductiveInfo info => + db.saveClassInductive modNameStr pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this class inductive + saveRecursors doc.name2ModIdx db modNameStr pos info.name + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos + | .ctorInfo info => + -- Here we do nothing because they were inserted along with the inductive + pure () + -- Second pass: save structure fields (now that all projection functions are in name_info) + for (pos, info) in pendingStructureFields do + saveStructureFields info db modNameStr pos + -- Save tactics defined in this module + for tactic in modInfo.tactics do + db.saveTactic modNameStr tactic + pure () pure () where From 28f670b5a84004aa55348f3c5854326e41512ced Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 10:08:09 +0100 Subject: [PATCH 036/106] fix batching and use subarrays for them --- DocGen4/DB.lean | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 8b1377c2..aee35f3f 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -17,23 +17,24 @@ structure ChunkArray α where curr_valid : curr ≤ array.size := by grind def chunkedM {m : Type u → Type v} (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := - IterM.mk (ChunkArray.mk xs n 0) m (Array α) + IterM.mk (ChunkArray.mk xs n 0) m (Subarray α) def chunked (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := - IterM.mk (ChunkArray.mk xs n 0) Id (Array α) + IterM.mk (ChunkArray.mk xs n 0) Id (Subarray α) -def ChunkArray.PlausibleStep (it : IterM (α := ChunkArray α) m (Array α)) : - (step : IterStep (IterM (α := ChunkArray α) m (Array α)) (Array α)) → Prop +def ChunkArray.PlausibleStep (it : IterM (α := ChunkArray α) m (Subarray α)) : + (step : IterStep (IterM (α := ChunkArray α) m (Subarray α)) (Subarray α)) → Prop | .yield it' v => it.internalState.curr < it.internalState.array.size ∧ it.internalState.array = it'.internalState.array ∧ it.internalState.chunkSize = it'.internalState.chunkSize ∧ it.internalState.curr < it'.internalState.curr ∧ - v.size ≤ it.internalState.chunkSize + v.size ≤ it.internalState.chunkSize ∧ + v.array = it.internalState.array | .done => it.internalState.curr = it.internalState.array.size | .skip .. => False -instance [Pure m] : Iterator (ChunkArray α) m (Array α) where +instance [Pure m] : Iterator (ChunkArray α) m (Subarray α) where IsPlausibleStep := ChunkArray.PlausibleStep step it := let { internalState := { array, chunkSize, chunkSize_gt_zero, curr, curr_valid } } := it @@ -43,9 +44,9 @@ instance [Pure m] : Iterator (ChunkArray α) m (Array α) where let curr' := curr + chunkSize let curr'' := if curr' > array.size then array.size else curr' let it' : IterM (α := ChunkArray α) _ _ := ⟨{ array, chunkSize, curr := curr'' }⟩ - pure <| .deflate <| .yield it' (array.extract curr curr'') (by grind [ChunkArray.PlausibleStep]) + pure <| .deflate <| .yield it' (array[curr...curr'']) (by grind [ChunkArray.PlausibleStep]) -instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Array α) m n := IteratorLoop.defaultImplementation +instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Subarray α) m n := IteratorLoop.defaultImplementation end @@ -925,6 +926,7 @@ end DB open DB + def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile @@ -934,9 +936,10 @@ def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) ( let ctxStr := if h : batch.size = 1 then batch[0].1.toString else if h : batch.size = 0 then "none" - else s!"{batch[0].1}-{batch[batch.size-1].1}" + else s!"{batch[0].1}-{batch[batch.size-1].1} ({batch.size} modules)" + let _ ← withDbContext s!"transaction:immediate:{ctxStr}" <| db.sqlite.transaction (mode := .immediate) do - for (modName, modInfo) in doc.moduleInfo do + for (modName, modInfo) in batch do let modNameStr := modName.toString -- Collect structure field info to save in second pass (after all declarations are in name_info) let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] From bc3e727b6aa219edf706d9a931db3df075f384ac Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 10:32:04 +0100 Subject: [PATCH 037/106] timing prints --- Main.lean | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Main.lean b/Main.lean index 352c4d65..2ccac0ca 100644 --- a/Main.lean +++ b/Main.lean @@ -18,8 +18,12 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let dbFile := p.positionalArg! "db" |>.as! String let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String + let t0 ← IO.monoMsNow let doc ← load <| .analyzeConcreteModules relevantModules + let t1 ← IO.monoMsNow updateModuleDb doc buildDir dbFile (some sourceUri) + let t2 ← IO.monoMsNow + IO.eprintln s!"[timing] {relevantModules[0]!}: load={t1-t0}ms db={t2-t1}ms total={t2-t0}ms" return 0 def runGenCoreCmd (p : Parsed) : IO UInt32 := do From d9e74035f9eb8aa2f2349ec1c62b52f14504b009 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 13:17:32 +0100 Subject: [PATCH 038/106] try just building Mathlib DB --- scripts/bench/mathlib-docs/run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 863863cf..f5d35ca4 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -48,4 +48,4 @@ popd # Benchmark documentation generation env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake --dir "$TMPDIR/mathproject" build Mathlib:docs + lake --dir "$TMPDIR/mathproject" build Mathlib:docInfo From 21e4d9b2ab52f579cf862e29858a38b730d7d788 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 13:47:10 +0100 Subject: [PATCH 039/106] Chunk up HTML generation --- DocGen4/DB.lean | 47 +-------------------------------------- DocGen4/Helpers.lean | 52 ++++++++++++++++++++++++++++++++++++++++++++ DocGen4/Output.lean | 5 +++-- 3 files changed, 56 insertions(+), 48 deletions(-) create mode 100644 DocGen4/Helpers.lean diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index aee35f3f..45f92612 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1,55 +1,10 @@ import DocGen4.Process import DocGen4.RenderedCode import SQLite -import Std.Data.Iterators +import DocGen4.Helpers namespace DocGen4.DB -section - -open Std Iterators - -structure ChunkArray α where - array : Array α - chunkSize : Nat - curr : Nat - chunkSize_gt_zero : chunkSize > 0 := by grind - curr_valid : curr ≤ array.size := by grind - -def chunkedM {m : Type u → Type v} (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := - IterM.mk (ChunkArray.mk xs n 0) m (Subarray α) - -def chunked (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := - IterM.mk (ChunkArray.mk xs n 0) Id (Subarray α) - -def ChunkArray.PlausibleStep (it : IterM (α := ChunkArray α) m (Subarray α)) : - (step : IterStep (IterM (α := ChunkArray α) m (Subarray α)) (Subarray α)) → Prop - | .yield it' v => - it.internalState.curr < it.internalState.array.size ∧ - it.internalState.array = it'.internalState.array ∧ - it.internalState.chunkSize = it'.internalState.chunkSize ∧ - it.internalState.curr < it'.internalState.curr ∧ - v.size ≤ it.internalState.chunkSize ∧ - v.array = it.internalState.array - | .done => it.internalState.curr = it.internalState.array.size - | .skip .. => False - -instance [Pure m] : Iterator (ChunkArray α) m (Subarray α) where - IsPlausibleStep := ChunkArray.PlausibleStep - step it := - let { internalState := { array, chunkSize, chunkSize_gt_zero, curr, curr_valid } } := it - if h : curr = array.size then - pure <| .deflate <| .done h - else - let curr' := curr + chunkSize - let curr'' := if curr' > array.size then array.size else curr' - let it' : IterM (α := ChunkArray α) _ _ := ⟨{ array, chunkSize, curr := curr'' }⟩ - pure <| .deflate <| .yield it' (array[curr...curr'']) (by grind [ChunkArray.PlausibleStep]) - -instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Subarray α) m n := IteratorLoop.defaultImplementation - -end - open Lean in /-- Extract a deterministic string representation of an inductive type for hashing. Includes constructor names and their types. -/ diff --git a/DocGen4/Helpers.lean b/DocGen4/Helpers.lean new file mode 100644 index 00000000..1daa3aae --- /dev/null +++ b/DocGen4/Helpers.lean @@ -0,0 +1,52 @@ +import Std.Data.Iterators + +namespace DocGen4 + +open Std Iterators + +structure ChunkArray α where + array : Array α + chunkSize : Nat + curr : Nat + chunkSize_gt_zero : chunkSize > 0 := by grind + curr_valid : curr ≤ array.size := by grind + +def chunkedM {m : Type u → Type v} (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := + IterM.mk (ChunkArray.mk xs n 0) m (Subarray α) + +def chunked (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := + IterM.mk (ChunkArray.mk xs n 0) Id (Subarray α) + +def ChunkArray.PlausibleStep (it : IterM (α := ChunkArray α) m (Subarray α)) : + (step : IterStep (IterM (α := ChunkArray α) m (Subarray α)) (Subarray α)) → Prop + | .yield it' v => + it.internalState.curr < it.internalState.array.size ∧ + it.internalState.array = it'.internalState.array ∧ + it.internalState.chunkSize = it'.internalState.chunkSize ∧ + it.internalState.curr < it'.internalState.curr ∧ + v.size ≤ it.internalState.chunkSize ∧ + v.array = it.internalState.array + | .done => it.internalState.curr = it.internalState.array.size + | .skip .. => False + +instance [Pure m] : Iterator (ChunkArray α) m (Subarray α) where + IsPlausibleStep := ChunkArray.PlausibleStep + step it := + let { internalState := { array, chunkSize, chunkSize_gt_zero, curr, curr_valid } } := it + if h : curr = array.size then + pure <| .deflate <| .done h + else + let curr' := curr + chunkSize + let curr'' := if curr' > array.size then array.size else curr' + let it' : IterM (α := ChunkArray α) _ _ := ⟨{ array, chunkSize, curr := curr'' }⟩ + pure <| .deflate <| .yield it' (array[curr...curr'']) (by grind [ChunkArray.PlausibleStep]) + +instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Subarray α) m n := IteratorLoop.defaultImplementation + +def chunksOf (xs : Array α) (size : Nat) (_ok : size > 0 := by grind) : Array (Array α) := Id.run do + let mut out := #[] + let mut n := 0 + while n < out.size do + out := out.push <| xs.extract n (n + size) + n := n + size + return out diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index a93ef490..3e5a6860 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -18,6 +18,7 @@ import DocGen4.Output.Search import DocGen4.Output.Tactics import DocGen4.Output.ToJson import DocGen4.Output.FoundationalTypes +import DocGen4.Helpers namespace DocGen4 @@ -102,8 +103,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir - -- Spawn one task per module, each returning its output file path - let tasks ← targetModules.mapM fun modName => IO.asTask do + -- Spawn one task per 100 modules, each returning its output file path + let tasks ← (chunksOf targetModules 100).flatMapM fun mods => mods.mapM fun modName => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← openDbForReading dbPath let module ← loadModule db modName From 48817ad03e569d2e297cdfc3d647fe60286f6b65 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 13:47:33 +0100 Subject: [PATCH 040/106] Revert "try just building Mathlib DB" This reverts commit d9e74035f9eb8aa2f2349ec1c62b52f14504b009. --- scripts/bench/mathlib-docs/run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index f5d35ca4..863863cf 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -48,4 +48,4 @@ popd # Benchmark documentation generation env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instructions -m maxrss -m task-clock -m wall-clock -- \ - lake --dir "$TMPDIR/mathproject" build Mathlib:docInfo + lake --dir "$TMPDIR/mathproject" build Mathlib:docs From 57be658b6f6cbc6c1112d9c4a2662c1beaad982f Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 14:15:07 +0100 Subject: [PATCH 041/106] Try single-threaded HTML output --- DocGen4/Output.lean | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 3e5a6860..66188a28 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -103,8 +103,10 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir + let mut outputs := #[] -- Spawn one task per 100 modules, each returning its output file path - let tasks ← (chunksOf targetModules 100).flatMapM fun mods => mods.mapM fun modName => IO.asTask do + + for modName in targetModules do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← openDbForReading dbPath let module ← loadModule db modName @@ -152,14 +154,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") jsonDecls.compress - return relFilePath + outputs := outputs.push relFilePath - -- Wait for all tasks and collect output paths - let mut outputs := #[] - for task in tasks do - match ← IO.wait task with - | .ok path => outputs := outputs.push path - | .error e => throw e return outputs def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : From 4e30ff28391c1e6ec4c8733743fd5bec6d865712 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 14:43:23 +0100 Subject: [PATCH 042/106] Revert "Try single-threaded HTML output" This reverts commit 57be658b6f6cbc6c1112d9c4a2662c1beaad982f. --- DocGen4/Output.lean | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 66188a28..3e5a6860 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -103,10 +103,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir - let mut outputs := #[] -- Spawn one task per 100 modules, each returning its output file path - - for modName in targetModules do + let tasks ← (chunksOf targetModules 100).flatMapM fun mods => mods.mapM fun modName => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← openDbForReading dbPath let module ← loadModule db modName @@ -154,8 +152,14 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") jsonDecls.compress - outputs := outputs.push relFilePath + return relFilePath + -- Wait for all tasks and collect output paths + let mut outputs := #[] + for task in tasks do + match ← IO.wait task with + | .ok path => outputs := outputs.push path + | .error e => throw e return outputs def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : From f47fbb40a38fb9fd6ba0df47ff0aa87eea9aa171 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 14:43:35 +0100 Subject: [PATCH 043/106] Revert "timing prints" This reverts commit bc3e727b6aa219edf706d9a931db3df075f384ac. --- Main.lean | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Main.lean b/Main.lean index 2ccac0ca..352c4d65 100644 --- a/Main.lean +++ b/Main.lean @@ -18,12 +18,8 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let dbFile := p.positionalArg! "db" |>.as! String let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String - let t0 ← IO.monoMsNow let doc ← load <| .analyzeConcreteModules relevantModules - let t1 ← IO.monoMsNow updateModuleDb doc buildDir dbFile (some sourceUri) - let t2 ← IO.monoMsNow - IO.eprintln s!"[timing] {relevantModules[0]!}: load={t1-t0}ms db={t2-t1}ms total={t2-t0}ms" return 0 def runGenCoreCmd (p : Parsed) : IO UInt32 := do From 5913631b43bf49daa9118405fcee7202cdf20b8e Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 4 Feb 2026 14:46:19 +0100 Subject: [PATCH 044/106] Remove timing print code --- Main.lean | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/Main.lean b/Main.lean index 352c4d65..95c344b3 100644 --- a/Main.lean +++ b/Main.lean @@ -71,50 +71,29 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let manifestOutput? := (p.flag? "manifest").map (·.as! String) let moduleRoots := (p.variableArgsAs! String).map String.toName - -- Phase 1: Load linking context (fast - module names, source URLs, declaration locations) - let start ← IO.monoMsNow - IO.println s!"Loading linking context from database: {dbPath}" + -- Load linking context (module names, source URLs, declaration locations) let db ← openDbForReading dbPath let linkCtx ← loadLinkingContext db - IO.println s!"Linking context loaded in {(← IO.monoMsNow) - start}ms ({linkCtx.name2ModIdx.size} declarations, {linkCtx.moduleNames.size} modules)" -- Determine which modules to generate HTML for let targetModules ← if moduleRoots.isEmpty then - -- No roots specified: generate for all modules (existing behavior) pure linkCtx.moduleNames else - -- Roots specified: compute transitive closure - let start ← IO.monoMsNow - let transitiveModules ← getTransitiveImports db moduleRoots - IO.println s!"Computed transitive closure of {moduleRoots.size} roots: {transitiveModules.size} modules in {(← IO.monoMsNow) - start}ms" - pure transitiveModules + getTransitiveImports db moduleRoots -- Add `references` pseudo-module to hierarchy since references.html is always generated - let start ← IO.monoMsNow let hierarchy := Hierarchy.fromArray (targetModules.push `references) - IO.println s!"Hierarchy took {(← IO.monoMsNow) - start}ms" - let start ← IO.monoMsNow let baseConfig ← getSimpleBaseContext buildDir hierarchy - IO.println s!"Context took {(← IO.monoMsNow) - start}ms" - -- Phase 2: Parallel HTML generation (one task per module) - let start ← IO.monoMsNow - IO.println s!"Generating HTML in parallel to: {buildDir}" + -- Parallel HTML generation let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) - IO.println s!"HTML took {(← IO.monoMsNow) - start}ms" -- Generate the search index (declaration-data.bmp) - let start ← IO.monoMsNow htmlOutputIndex baseConfig - IO.println s!"HTML index took {(← IO.monoMsNow) - start}ms" -- Update navbar to include all modules on disk - let start ← IO.monoMsNow updateNavbarFromDisk buildDir - IO.println s!"Navbar update took {(← IO.monoMsNow) - start}ms" - - IO.println "Done!" if let .some manifestOutput := manifestOutput? then IO.FS.writeFile manifestOutput (Lean.toJson outputs).compress return 0 From 3dc65148b7ddcda22e29e36660fb32591e49b4c9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Fri, 6 Feb 2026 11:01:50 +0100 Subject: [PATCH 045/106] Start cleanup --- DocGen4/DB.lean | 501 +++++++++++---------------------- DocGen4/DB/VersoDocString.lean | 207 ++++++++++++++ 2 files changed, 370 insertions(+), 338 deletions(-) create mode 100644 DocGen4/DB/VersoDocString.lean diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 45f92612..adc0c8fe 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -1,13 +1,17 @@ -import DocGen4.Process + import DocGen4.RenderedCode import SQLite import DocGen4.Helpers +import DocGen4.DB.VersoDocString namespace DocGen4.DB open Lean in -/-- Extract a deterministic string representation of an inductive type for hashing. - Includes constructor names and their types. -/ +/-- +Extracts a deterministic string representation of an inductive type, which is used to invalidate +database schemas in which blobs implicitly depend on serializations of datatypes. Includes +constructor names and their types. +-/ private def inductiveRepr (env : Environment) (name : Name) : String := Id.run do let some (.inductInfo info) := env.find? name | return s!"not found: {name}" let mut s := s!"inductive {name} : {info.type}\n" @@ -19,7 +23,9 @@ private def inductiveRepr (env : Environment) (name : Name) : String := Id.run d namespace Internals open Lean Elab Term in -/-- String representation of inductive type definitions, computed at compile time. -/ +/-- +Gets a string representation of inductive type definitions, computed at compile time. +-/ scoped elab "inductiveRepr![" types:ident,* "]" : term => do let env ← getEnv let mut reprs : Array String := #[] @@ -42,206 +48,6 @@ def serializedCodeTypeDefs : String := TaggedText ] -section -open Lean -open SQLite.Blob - -structure DocstringDataHandler where - serialize : Serializer Dynamic - deserialize : Deserializer Dynamic - -structure DocstringValues where - inlines : NameMap DocstringDataHandler := {} - blocks : NameMap DocstringDataHandler := {} - -def toBinaryElabInline (vals : DocstringValues) : Serializer ElabInline - | { name, val }, b => - match vals.inlines.get? name with - | none => b.push 0 |> ToBinary.serializer name - | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val - -def toBinaryElabBlock (vals : DocstringValues) : Serializer ElabBlock - | { name, val }, b => - match vals.blocks.get? name with - | none => b.push 0 |> ToBinary.serializer name - | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val - -structure Unknown where -deriving BEq, Hashable, Ord, DecidableEq, Inhabited, TypeName - -instance : Subsingleton Unknown where - allEq := by intros; rfl - -def fromBinaryElabInline (vals : DocstringValues) : Deserializer ElabInline := do - match (← Deserializer.byte) with - | 0 => - let name ← FromBinary.deserializer - pure { name := `unknown ++ name, val := .mk Unknown.mk } - | 1 => - let name ← FromBinary.deserializer - match vals.inlines.get? name with - | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } - | some d => - let val ← d.deserialize - pure { name, val } - | other => throw s!"Expected 0 or 1 for `ElabInline`'s tag, got `{other}`" - -def fromBinaryElabBlock (vals : DocstringValues) : Deserializer ElabBlock := do - match (← Deserializer.byte) with - | 0 => - let name ← FromBinary.deserializer - pure { name := `unknown ++ name, val := .mk Unknown.mk } - | 1 => - let name ← FromBinary.deserializer - match vals.blocks.get? name with - | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } - | some d => - let val ← d.deserialize - pure { name, val } - | other => throw s!"Expected 0 or 1 for `ElabBlock`'s tag, got `{other}`" - -partial instance [ToBinary i] : ToBinary (Doc.Inline i) where - serializer := go -where - go - | .text s, b => b.push 0 |> ToBinary.serializer s - | .linebreak s, b => b.push 1 |> ToBinary.serializer s - | .emph xs, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 2 |> ToBinary.serializer xs - | .bold xs, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 3 |> ToBinary.serializer xs - | .code s, b => - b.push 4 |> ToBinary.serializer s - | .math .inline s, b => b.push 5 |> ToBinary.serializer s - | .math .display s, b => b.push 6 |> ToBinary.serializer s - | .link xs url, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 7 |> ToBinary.serializer xs |> ToBinary.serializer url - | .footnote name xs, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 8 |> ToBinary.serializer name |> ToBinary.serializer xs - | .image alt url, b => b.push 9 |> ToBinary.serializer alt |> ToBinary.serializer url - | .concat xs, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 10 |> ToBinary.serializer xs - | .other container content, b => - have : ToBinary (Doc.Inline i) := ⟨go⟩ - b.push 11 |> ToBinary.serializer container |> ToBinary.serializer content - -partial instance [FromBinary i] : FromBinary (Doc.Inline i) where - deserializer := go -where go := do - have : FromBinary (Doc.Inline i) := ⟨go⟩ - match (← .byte) with - | 0 => .text <$> FromBinary.deserializer - | 1 => .linebreak <$> FromBinary.deserializer - | 2 => .emph <$> FromBinary.deserializer - | 3 => .bold <$> FromBinary.deserializer - | 4 => .code <$> FromBinary.deserializer - | 5 => .math .inline <$> FromBinary.deserializer - | 6 => .math .display <$> FromBinary.deserializer - | 7 => .link <$> FromBinary.deserializer <*> FromBinary.deserializer - | 8 => .footnote <$> FromBinary.deserializer <*> FromBinary.deserializer - | 9 => .image <$> FromBinary.deserializer <*> FromBinary.deserializer - | 10 => .concat <$> FromBinary.deserializer - | 11 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected a tag for `Doc.Inline` in 0...12, got {other}" - - -partial instance [ToBinary i] [ToBinary b] : ToBinary (Doc.Block i b) where - serializer := go -where - go - | .para xs, bs => bs.push 0 |> ToBinary.serializer xs - | .code s, bs => bs.push 1 |> ToBinary.serializer s - | .concat xs, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 2 |> ToBinary.serializer xs - | .ul xs, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 3 |> ToBinary.serializer (xs.map (·.contents)) - | .ol n xs, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 4 |> ToBinary.serializer n |> ToBinary.serializer (xs.map (·.contents)) - | .dl xs, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 5 |> ToBinary.serializer (xs.map (fun i => (i.term, i.desc))) - | .blockquote xs, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 6 |> ToBinary.serializer xs - | .other container content, bs => - have : ToBinary (Doc.Block i b) := ⟨go⟩ - bs.push 7 |> ToBinary.serializer container |> ToBinary.serializer content - - -partial instance [FromBinary i] [FromBinary b] : FromBinary (Doc.Block i b) where - deserializer := go -where go := do - have : FromBinary (Doc.Block i b) := ⟨go⟩ - match (← .byte) with - | 0 => .para <$> FromBinary.deserializer - | 1 => .code <$> FromBinary.deserializer - | 2 => .concat <$> FromBinary.deserializer - | 3 => - let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer - return .ul <| xss.map (⟨·⟩) - | 4 => - let n ← FromBinary.deserializer - let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer - return .ol n <| xss.map (⟨·⟩) - | 5 => - let items : Array (_ × _) ← FromBinary.deserializer - return .dl <| items.map (fun x => Doc.DescItem.mk x.1 x.2) - | 6 => .blockquote <$> FromBinary.deserializer - | 7 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected a tag for `Doc.Block` in 0...8, got {other}" - -partial instance [ToBinary i] [ToBinary b] [ToBinary p] : ToBinary (Doc.Part i p b) where - serializer := go -where - go - | .mk title titleString metadata content subParts, bs => - have : ToBinary (Doc.Part i p b) := ⟨go⟩ - bs - |> ToBinary.serializer title - |> ToBinary.serializer titleString - |> ToBinary.serializer metadata - |> ToBinary.serializer content - |> ToBinary.serializer subParts - -partial instance [FromBinary i] [FromBinary b] [FromBinary p] : FromBinary (Doc.Part i p b) where - deserializer := go -where - go := do - have : FromBinary (Doc.Part i p b) := ⟨go⟩ - .mk - <$> FromBinary.deserializer - <*> FromBinary.deserializer - <*> FromBinary.deserializer - <*> FromBinary.deserializer - <*> FromBinary.deserializer - -instance : ToBinary VersoDocString where - serializer - | {text, subsections}, b => - -- TODO customizable handling of Verso docstring extension data - have : ToBinary ElabInline := ⟨toBinaryElabInline {}⟩ - have : ToBinary ElabBlock := ⟨toBinaryElabBlock {}⟩ - b |> ToBinary.serializer text |> ToBinary.serializer subsections - -instance : FromBinary VersoDocString where - deserializer := do - -- TODO customizable handling of Verso docstring extension data - have : FromBinary ElabInline := ⟨fromBinaryElabInline {}⟩ - have : FromBinary ElabBlock := ⟨fromBinaryElabBlock {}⟩ - .mk <$> FromBinary.deserializer <*> FromBinary.deserializer - -instance : SQLite.QueryParam VersoDocString := .asBlob - -end - def getDb (dbFile : System.FilePath) : IO SQLite := do -- SQLite atomically creates the DB file, and the schema and journal settings here are applied -- idempotently. This avoids DB creation race conditions. @@ -543,10 +349,11 @@ CREATE TABLE IF NOT EXISTS schema_meta ( ); "# -def withDbContext (context : String) (act : IO α) : IO α := do +def withDbContext [MonadLiftT BaseIO m] [MonadControlT IO m] [Monad m] (context : String) (act : m α) : m α := + controlAt IO fun runInBase => do let ms ← IO.monoMsNow try - act + runInBase act catch | e => let ms' ← IO.monoMsNow @@ -620,7 +427,8 @@ instance : SQLite.QueryParam RenderedCode where let str := ToBinary.serializer code .empty SQLite.QueryParam.bind stmt index str -def ensureDb (dbFile : System.FilePath) : IO DB := do +def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do + have := versoDocStringQueryParam values let sqlite ← getDb dbFile let deleteModuleStmt ← sqlite.prepare "DELETE FROM modules WHERE name = ?" let deleteModule modName := withDbContext "write:delete:modules" do @@ -877,105 +685,121 @@ def ensureDb (dbFile : System.FilePath) : IO DB := do saveTactic } +structure DBM.Context where + values : DocstringValues + db : DB + +abbrev DBM α := ReaderT DBM.Context IO α + +def DBM.run (values : DocstringValues) (dbFile : System.FilePath) (act : DBM α) : IO α := do + let db ← ensureDb values dbFile + ReaderT.run act { values, db } + +def withDB (f : DB → DBM α) : DBM α := do f (← read).db + +def withSQLite (f : SQLite → DBM α) : DBM α := do f (← read).db + end DB open DB -def updateModuleDb (doc : Process.AnalyzerResult) (buildDir : System.FilePath) (dbFile : String) +def updateModuleDb (values : DocstringValues) + (doc : Process.AnalyzerResult) + (buildDir : System.FilePath) (dbFile : String) (sourceUrl? : Option String) : IO Unit := do let dbFile := buildDir / dbFile - let db ← ensureDb dbFile - for batch in chunked doc.moduleInfo.toArray 100 do - -- Each module gets its own transaction to reduce lock contention - let ctxStr := - if h : batch.size = 1 then batch[0].1.toString - else if h : batch.size = 0 then "none" - else s!"{batch[0].1}-{batch[batch.size-1].1} ({batch.size} modules)" - - let _ ← withDbContext s!"transaction:immediate:{ctxStr}" <| db.sqlite.transaction (mode := .immediate) do - for (modName, modInfo) in batch do - let modNameStr := modName.toString - -- Collect structure field info to save in second pass (after all declarations are in name_info) - let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] - db.deleteModule modNameStr - db.saveModule modNameStr sourceUrl? - for imported in modInfo.imports do - db.saveImport modNameStr imported - let mut i : Int64 := 0 - for mem in modInfo.members do - let pos := i - i := i + 1 - match mem with - | .modDoc doc => - db.saveDeclarationRange modNameStr pos doc.declarationRange - db.saveMarkdownDocstring modNameStr pos doc.doc - | .docInfo info => - let baseInfo := info.toInfo - -- Skip saving ctorInfo here - they're saved along with their parent inductive - if !info.isCtorInfo then - db.saveInfo modNameStr pos (infoKind info) baseInfo - db.saveDeclarationRange modNameStr pos baseInfo.declarationRange - match info with - | .axiomInfo info => - db.saveAxiom modNameStr pos info.isUnsafe - | .theoremInfo _info => -- No extra info here - pure () - | .opaqueInfo info => - db.saveOpaque modNameStr pos info.definitionSafety - | .definitionInfo info => - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable - if let some eqns := info.equations then - for h : j in 0...eqns.size do - db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 - | .instanceInfo info => - -- Save definition data (InstanceInfo extends DefinitionInfo) - db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable - if let some eqns := info.equations then - for h : j in 0...eqns.size do - db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 - -- Save instance-specific data - db.saveInstance modNameStr pos info.className.toString - for h : j in 0...info.typeNames.size do - db.saveInstanceArg modNameStr pos j.toInt64 info.typeNames[j].toString - | .inductiveInfo info => - db.saveInductive modNameStr pos info.isUnsafe - -- Save recursors (main + aux) as internal names linking to this inductive - saveRecursors doc.name2ModIdx db modNameStr pos info.name - for ctor in info.ctors do - let cpos := i - i := i + 1 - db.saveInfo modNameStr cpos "constructor" ctor - db.saveDeclarationRange modNameStr cpos ctor.declarationRange - db.saveConstructor modNameStr cpos pos - | .structureInfo info => - -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata false info db modNameStr pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (pos, info) - | .classInfo info => - -- First pass: save structure metadata (not fields) - i := (← (saveStructureMetadata true info db modNameStr pos doc.name2ModIdx).run i).2 - pendingStructureFields := pendingStructureFields.push (pos, info) - | .classInductiveInfo info => - db.saveClassInductive modNameStr pos info.isUnsafe - -- Save recursors (main + aux) as internal names linking to this class inductive - saveRecursors doc.name2ModIdx db modNameStr pos info.name - for ctor in info.ctors do - let cpos := i - i := i + 1 - db.saveInfo modNameStr cpos "constructor" ctor - db.saveDeclarationRange modNameStr cpos ctor.declarationRange - db.saveConstructor modNameStr cpos pos - | .ctorInfo info => - -- Here we do nothing because they were inserted along with the inductive - pure () - -- Second pass: save structure fields (now that all projection functions are in name_info) - for (pos, info) in pendingStructureFields do - saveStructureFields info db modNameStr pos - -- Save tactics defined in this module - for tactic in modInfo.tactics do - db.saveTactic modNameStr tactic - pure () + DBM.run values dbFile <| withDB fun db => do + for batch in chunked doc.moduleInfo.toArray 100 do + -- Each module gets its own transaction to reduce lock contention + let ctxStr := + if h : batch.size = 1 then batch[0].1.toString + else if h : batch.size = 0 then "none" + else s!"{batch[0].1}-{batch[batch.size-1].1} ({batch.size} modules)" + + let _ ← withDbContext s!"transaction:immediate:{ctxStr}" <| db.sqlite.transaction (mode := .immediate) do + for (modName, modInfo) in batch do + let modNameStr := modName.toString + -- Collect structure field info to save in second pass (after all declarations are in name_info) + let mut pendingStructureFields : Array (Int64 × Process.StructureInfo) := #[] + db.deleteModule modNameStr + db.saveModule modNameStr sourceUrl? + for imported in modInfo.imports do + db.saveImport modNameStr imported + let mut i : Int64 := 0 + for mem in modInfo.members do + let pos := i + i := i + 1 + match mem with + | .modDoc doc => + db.saveDeclarationRange modNameStr pos doc.declarationRange + db.saveMarkdownDocstring modNameStr pos doc.doc + | .docInfo info => + let baseInfo := info.toInfo + -- Skip saving ctorInfo here - they're saved along with their parent inductive + if !info.isCtorInfo then + db.saveInfo modNameStr pos (infoKind info) baseInfo + db.saveDeclarationRange modNameStr pos baseInfo.declarationRange + match info with + | .axiomInfo info => + db.saveAxiom modNameStr pos info.isUnsafe + | .theoremInfo _info => -- No extra info here + pure () + | .opaqueInfo info => + db.saveOpaque modNameStr pos info.definitionSafety + | .definitionInfo info => + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 + | .instanceInfo info => + -- Save definition data (InstanceInfo extends DefinitionInfo) + db.saveDefinition modNameStr pos info.isUnsafe info.hints info.isNonComputable + if let some eqns := info.equations then + for h : j in 0...eqns.size do + db.saveDefinitionEquation modNameStr pos eqns[j] j.toInt64 + -- Save instance-specific data + db.saveInstance modNameStr pos info.className.toString + for h : j in 0...info.typeNames.size do + db.saveInstanceArg modNameStr pos j.toInt64 info.typeNames[j].toString + | .inductiveInfo info => + db.saveInductive modNameStr pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this inductive + saveRecursors doc.name2ModIdx db modNameStr pos info.name + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos + | .structureInfo info => + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata false info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) + | .classInfo info => + -- First pass: save structure metadata (not fields) + i := (← (saveStructureMetadata true info db modNameStr pos doc.name2ModIdx).run i).2 + pendingStructureFields := pendingStructureFields.push (pos, info) + | .classInductiveInfo info => + db.saveClassInductive modNameStr pos info.isUnsafe + -- Save recursors (main + aux) as internal names linking to this class inductive + saveRecursors doc.name2ModIdx db modNameStr pos info.name + for ctor in info.ctors do + let cpos := i + i := i + 1 + db.saveInfo modNameStr cpos "constructor" ctor + db.saveDeclarationRange modNameStr cpos ctor.declarationRange + db.saveConstructor modNameStr cpos pos + | .ctorInfo info => + -- Here we do nothing because they were inserted along with the inductive + pure () + -- Second pass: save structure fields (now that all projection functions are in name_info) + for (pos, info) in pendingStructureFields do + saveStructureFields info db modNameStr pos + -- Save tactics defined in this module + for tactic in modInfo.tactics do + db.saveTactic modNameStr tactic + pure () pure () where @@ -1065,7 +889,8 @@ def readRenderedCode (blob : ByteArray) : IO RenderedCode := do | .error e => throw <| IO.userError s!"Failed to deserialize RenderedCode: {e}" /-- Read VersoDocString from a blob. -/ -def readVersoDocString (blob : ByteArray) : IO VersoDocString := do +def readVersoDocString (blob : ByteArray) : DBM VersoDocString := do + have := versoDocStringFromBinary (← read).values match fromBinary blob with | .ok doc => return doc | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" @@ -1125,7 +950,7 @@ def buildName2ModIdx (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap return result /-- Load declaration arguments from the database. -/ -def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.Arg) := withDbContext "read:declaration_args" do +def loadArgs (moduleName : String) (position : Int64) : DBM (Array Process.Arg) := withDbContext "read:declaration_args" <| withSQLite fun db => do let stmt ← db.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1138,7 +963,7 @@ def loadArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array return args /-- Load declaration attributes from the database. -/ -def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array String) := withDbContext "read:declaration_attrs" do +def loadAttrs (moduleName : String) (position : Int64) : DBM (Array String) := withDbContext "read:declaration_attrs" <| withSQLite fun db => do let stmt ← db.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1149,7 +974,7 @@ def loadAttrs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array return attrs /-- Load a docstring from the database. -/ -def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (String ⊕ VersoDocString)) := withDbContext "read:docstrings" do +def loadDocstring (moduleName : String) (position : Int64) : DBM (Option (String ⊕ VersoDocString)) := withDbContext "read:docstrings" <| withSQLite fun db => do -- Try markdown first let mdStmt ← db.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" mdStmt.bind 1 moduleName @@ -1163,12 +988,12 @@ def loadDocstring (db : SQLite) (moduleName : String) (position : Int64) : IO (O versoStmt.bind 2 position if (← versoStmt.step) then let blob ← versoStmt.columnBlob 0 - let doc ← readVersoDocString blob + let doc ← readVersoDocString blob return some (.inr doc) return none /-- Load a declaration range from the database. -/ -def loadDeclarationRange (db : SQLite) (moduleName : String) (position : Int64) : IO (Option DeclarationRange) := withDbContext "read:declaration_ranges" do +def loadDeclarationRange (moduleName : String) (position : Int64) : DBM (Option DeclarationRange) := withDbContext "read:declaration_ranges" <| withSQLite fun db => do let stmt ← db.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" stmt.bind 1 moduleName stmt.bind 2 position @@ -1188,12 +1013,12 @@ def loadDeclarationRange (db : SQLite) (moduleName : String) (position : Int64) return none /-- Load base Info from the database row. -/ -def loadInfo (db : SQLite) (moduleName : String) (position : Int64) (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO Process.Info := do +def loadInfo (moduleName : String) (position : Int64) (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : DBM Process.Info := do let type ← readRenderedCode typeBlob - let doc ← loadDocstring db moduleName position - let args ← loadArgs db moduleName position - let attrs ← loadAttrs db moduleName position - let some declRange ← loadDeclarationRange db moduleName position + let doc ← loadDocstring moduleName position + let args ← loadArgs moduleName position + let attrs ← loadAttrs moduleName position + let some declRange ← loadDeclarationRange moduleName position | throw <| IO.userError s!"Missing declaration range for {name}" return { name @@ -1208,7 +1033,7 @@ def loadInfo (db : SQLite) (moduleName : String) (position : Int64) (name : Name /-- Load definition equations from the database. Returns `none` if no equations exist, `some eqns` otherwise. -/ -def loadEquations (db : SQLite) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do +def loadEquations (moduleName : String) (position : Int64) : DBM (Option (Array RenderedCode)) := withDbContext "read:definition_equations" <| withSQLite fun db => do let stmt ← db.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1219,7 +1044,7 @@ def loadEquations (db : SQLite) (moduleName : String) (position : Int64) : IO (O return some eqns /-- Load instance type names from the database. -/ -def loadInstanceArgs (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Name) := do +def loadInstanceArgs (moduleName : String) (position : Int64) : DBM (Array Name) := withSQLite fun db => do let stmt ← db.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1230,7 +1055,7 @@ def loadInstanceArgs (db : SQLite) (moduleName : String) (position : Int64) : IO return typeNames /-- Load structure parents from the database. -/ -def loadStructureParents (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.StructureParentInfo) := do +def loadStructureParents (moduleName : String) (position : Int64) : DBM (Array Process.StructureParentInfo) := withSQLite fun db => do let stmt ← db.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName stmt.bind 2 position @@ -1257,7 +1082,7 @@ def loadStructureFieldArgs (db : SQLite) (moduleName : String) (position : Int64 return args /-- Load structure fields from the database. -/ -def loadStructureFields (db : SQLite) (moduleName : String) (position : Int64) : IO (Array Process.FieldInfo) := do +def loadStructureFields (moduleName : String) (position : Int64) : DBM (Array Process.FieldInfo) := withSQLite fun db => do -- Get structure fields and look up projection function info by name let stmt ← db.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" stmt.bind 1 moduleName @@ -1276,9 +1101,9 @@ def loadStructureFields (db : SQLite) (moduleName : String) (position : Int64) : let projModName ← projStmt.columnText 0 let projPos ← projStmt.columnInt64 1 -- Load projection function's docstring, attrs, and declaration range - let doc ← loadDocstring db projModName projPos - let attrs ← loadAttrs db projModName projPos - let declRange ← loadDeclarationRange db projModName projPos + let doc ← loadDocstring projModName projPos + let attrs ← loadAttrs projModName projPos + let declRange ← loadDeclarationRange projModName projPos -- Get render flag from projection function's name_info let render ← do let renderStmt ← db.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" @@ -1308,7 +1133,7 @@ def loadStructureFields (db : SQLite) (moduleName : String) (position : Int64) : return fields /-- Load structure constructor from the database. -/ -def loadStructureConstructor (db : SQLite) (moduleName : String) (position : Int64) : IO (Option Process.NameInfo) := do +def loadStructureConstructor (moduleName : String) (position : Int64) : DBM (Option Process.NameInfo) := withSQLite fun db => do let stmt ← db.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" stmt.bind 1 moduleName stmt.bind 2 position @@ -1317,12 +1142,12 @@ def loadStructureConstructor (db : SQLite) (moduleName : String) (position : Int let typeBlob ← stmt.columnBlob 1 let ctorPos ← stmt.columnInt64 2 let type ← readRenderedCode typeBlob - let doc ← loadDocstring db moduleName ctorPos + let doc ← loadDocstring moduleName ctorPos return some { name, type, doc } return none /-- Load constructors for an inductive type. -/ -def loadConstructors (db : SQLite) (moduleName : String) (position : Int64) : IO (List Process.ConstructorInfo) := do +def loadConstructors (moduleName : String) (position : Int64) : DBM (List Process.ConstructorInfo) := withSQLite fun db => do let stmt ← db.prepare "SELECT c.position FROM constructors c WHERE c.module_name = ? AND c.type_position = ? ORDER BY c.position" stmt.bind 1 moduleName stmt.bind 2 position @@ -1338,14 +1163,14 @@ def loadConstructors (db : SQLite) (moduleName : String) (position : Int64) : IO let typeBlob ← infoStmt.columnBlob 1 let sorried := (← infoStmt.columnInt64 2) != 0 let render := (← infoStmt.columnInt64 3) != 0 - let info ← loadInfo db moduleName ctorPos name typeBlob sorried render + let info ← loadInfo moduleName ctorPos name typeBlob sorried render ctors := ctors ++ [info] return ctors /-- Load a DocInfo from the database based on its kind. -/ -def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : String) - (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO (Option Process.DocInfo) := do - let info ← loadInfo db moduleName position name typeBlob sorried render +def loadDocInfo (moduleName : String) (position : Int64) (kind : String) + (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : DBM (Option Process.DocInfo) := withSQLite fun db => do + let info ← loadInfo moduleName position name typeBlob sorried render match kind with | "axiom" => let stmt ← db.prepare "SELECT is_unsafe FROM axioms WHERE module_name = ? AND position = ?" @@ -1381,7 +1206,7 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations db moduleName position + let equations ← loadEquations moduleName position return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } return none | "instance" => @@ -1401,8 +1226,8 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations db moduleName position - let typeNames ← loadInstanceArgs db moduleName position + let equations ← loadEquations moduleName position + let typeNames ← loadInstanceArgs moduleName position return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } return none | "inductive" => @@ -1411,7 +1236,7 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S stmt.bind 2 position if (← stmt.step) then let isUnsafe := (← stmt.columnInt64 0) != 0 - let ctors ← loadConstructors db moduleName position + let ctors ← loadConstructors moduleName position return some <| .inductiveInfo { toInfo := info, isUnsafe, ctors } return none | "structure" => @@ -1419,9 +1244,9 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S stmt.bind 1 moduleName stmt.bind 2 position if (← stmt.step) then - let parents ← loadStructureParents db moduleName position - let fieldInfo ← loadStructureFields db moduleName position - let some ctor ← loadStructureConstructor db moduleName position + let parents ← loadStructureParents moduleName position + let fieldInfo ← loadStructureFields moduleName position + let some ctor ← loadStructureConstructor moduleName position | return none return some <| .structureInfo { toInfo := info, fieldInfo, parents, ctor } return none @@ -1430,9 +1255,9 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S stmt.bind 1 moduleName stmt.bind 2 position if (← stmt.step) then - let parents ← loadStructureParents db moduleName position - let fieldInfo ← loadStructureFields db moduleName position - let some ctor ← loadStructureConstructor db moduleName position + let parents ← loadStructureParents moduleName position + let fieldInfo ← loadStructureFields moduleName position + let some ctor ← loadStructureConstructor moduleName position | return none return some <| .classInfo { toInfo := info, fieldInfo, parents, ctor } return none @@ -1442,7 +1267,7 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S stmt.bind 2 position if (← stmt.step) then let isUnsafe := (← stmt.columnInt64 0) != 0 - let ctors ← loadConstructors db moduleName position + let ctors ← loadConstructors moduleName position return some <| .classInductiveInfo { toInfo := info, isUnsafe, ctors } return none | "constructor" => @@ -1452,7 +1277,7 @@ def loadDocInfo (db : SQLite) (moduleName : String) (position : Int64) (kind : S return none /-- Load a module from the database. -/ -def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do +def loadModule (moduleName : Name) : DBM Process.Module := withSQLite fun db => do let modNameStr := moduleName.toString let imports ← getModuleImports db moduleName -- Load all members (declarations and module docs) with their positions. @@ -1472,7 +1297,7 @@ def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do let typeBlob ← stmt.columnBlob 3 let sorried := (← stmt.columnInt64 4) != 0 let render := (← stmt.columnInt64 5) != 0 - if let some docInfo ← loadDocInfo db modNameStr position kind name typeBlob sorried render then + if let some docInfo ← loadDocInfo modNameStr position kind name typeBlob sorried render then members := members.push (position, .docInfo docInfo) -- Load module docs let mdStmt ← db.prepare " @@ -1485,7 +1310,7 @@ def loadModule (db : SQLite) (moduleName : Name) : IO Process.Module := do while (← mdStmt.step) do let position ← mdStmt.columnInt64 0 let doc ← mdStmt.columnText 1 - if let some declRange ← loadDeclarationRange db modNameStr position then + if let some declRange ← loadDeclarationRange modNameStr position then members := members.push (position, .modDoc { doc, declarationRange := declRange }) -- Sort by (declaration range, position) to maintain deterministic ordering. -- Primary key: declaration range position (line, column) using Position.lt diff --git a/DocGen4/DB/VersoDocString.lean b/DocGen4/DB/VersoDocString.lean new file mode 100644 index 00000000..0c0c55f5 --- /dev/null +++ b/DocGen4/DB/VersoDocString.lean @@ -0,0 +1,207 @@ +import DocGen4.Process +import DocGen4.RenderedCode +import SQLite +import DocGen4.Helpers + +namespace DocGen4.DB + +open Lean +open SQLite.Blob + + + +structure DocstringDataHandler where + serialize : Serializer Dynamic + deserialize : Deserializer Dynamic + +structure DocstringValues where + inlines : NameMap DocstringDataHandler := {} + blocks : NameMap DocstringDataHandler := {} + +def toBinaryElabInline (vals : DocstringValues) : Serializer ElabInline + | { name, val }, b => + match vals.inlines.get? name with + | none => b.push 0 |> ToBinary.serializer name + | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + +def toBinaryElabBlock (vals : DocstringValues) : Serializer ElabBlock + | { name, val }, b => + match vals.blocks.get? name with + | none => b.push 0 |> ToBinary.serializer name + | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + +structure Unknown where +deriving BEq, Hashable, Ord, DecidableEq, Inhabited, TypeName + +instance : Subsingleton Unknown where + allEq := by intros; rfl + +def fromBinaryElabInline (vals : DocstringValues) : Deserializer ElabInline := do + match (← Deserializer.byte) with + | 0 => + let name ← FromBinary.deserializer + pure { name := `unknown ++ name, val := .mk Unknown.mk } + | 1 => + let name ← FromBinary.deserializer + match vals.inlines.get? name with + | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } + | some d => + let val ← d.deserialize + pure { name, val } + | other => throw s!"Expected 0 or 1 for `ElabInline`'s tag, got `{other}`" + +def fromBinaryElabBlock (vals : DocstringValues) : Deserializer ElabBlock := do + match (← Deserializer.byte) with + | 0 => + let name ← FromBinary.deserializer + pure { name := `unknown ++ name, val := .mk Unknown.mk } + | 1 => + let name ← FromBinary.deserializer + match vals.blocks.get? name with + | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } + | some d => + let val ← d.deserialize + pure { name, val } + | other => throw s!"Expected 0 or 1 for `ElabBlock`'s tag, got `{other}`" + +partial instance [ToBinary i] : ToBinary (Doc.Inline i) where + serializer := go +where + go + | .text s, b => b.push 0 |> ToBinary.serializer s + | .linebreak s, b => b.push 1 |> ToBinary.serializer s + | .emph xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 2 |> ToBinary.serializer xs + | .bold xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 3 |> ToBinary.serializer xs + | .code s, b => + b.push 4 |> ToBinary.serializer s + | .math .inline s, b => b.push 5 |> ToBinary.serializer s + | .math .display s, b => b.push 6 |> ToBinary.serializer s + | .link xs url, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 7 |> ToBinary.serializer xs |> ToBinary.serializer url + | .footnote name xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 8 |> ToBinary.serializer name |> ToBinary.serializer xs + | .image alt url, b => b.push 9 |> ToBinary.serializer alt |> ToBinary.serializer url + | .concat xs, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 10 |> ToBinary.serializer xs + | .other container content, b => + have : ToBinary (Doc.Inline i) := ⟨go⟩ + b.push 11 |> ToBinary.serializer container |> ToBinary.serializer content + +partial instance [FromBinary i] : FromBinary (Doc.Inline i) where + deserializer := go +where go := do + have : FromBinary (Doc.Inline i) := ⟨go⟩ + match (← .byte) with + | 0 => .text <$> FromBinary.deserializer + | 1 => .linebreak <$> FromBinary.deserializer + | 2 => .emph <$> FromBinary.deserializer + | 3 => .bold <$> FromBinary.deserializer + | 4 => .code <$> FromBinary.deserializer + | 5 => .math .inline <$> FromBinary.deserializer + | 6 => .math .display <$> FromBinary.deserializer + | 7 => .link <$> FromBinary.deserializer <*> FromBinary.deserializer + | 8 => .footnote <$> FromBinary.deserializer <*> FromBinary.deserializer + | 9 => .image <$> FromBinary.deserializer <*> FromBinary.deserializer + | 10 => .concat <$> FromBinary.deserializer + | 11 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected a tag for `Doc.Inline` in 0...12, got {other}" + + +partial instance [ToBinary i] [ToBinary b] : ToBinary (Doc.Block i b) where + serializer := go +where + go + | .para xs, bs => bs.push 0 |> ToBinary.serializer xs + | .code s, bs => bs.push 1 |> ToBinary.serializer s + | .concat xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 2 |> ToBinary.serializer xs + | .ul xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 3 |> ToBinary.serializer (xs.map (·.contents)) + | .ol n xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 4 |> ToBinary.serializer n |> ToBinary.serializer (xs.map (·.contents)) + | .dl xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 5 |> ToBinary.serializer (xs.map (fun i => (i.term, i.desc))) + | .blockquote xs, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 6 |> ToBinary.serializer xs + | .other container content, bs => + have : ToBinary (Doc.Block i b) := ⟨go⟩ + bs.push 7 |> ToBinary.serializer container |> ToBinary.serializer content + + +partial instance [FromBinary i] [FromBinary b] : FromBinary (Doc.Block i b) where + deserializer := go +where go := do + have : FromBinary (Doc.Block i b) := ⟨go⟩ + match (← .byte) with + | 0 => .para <$> FromBinary.deserializer + | 1 => .code <$> FromBinary.deserializer + | 2 => .concat <$> FromBinary.deserializer + | 3 => + let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer + return .ul <| xss.map (⟨·⟩) + | 4 => + let n ← FromBinary.deserializer + let xss : Array (Array (Doc.Block i b)) ← FromBinary.deserializer + return .ol n <| xss.map (⟨·⟩) + | 5 => + let items : Array (_ × _) ← FromBinary.deserializer + return .dl <| items.map (fun x => Doc.DescItem.mk x.1 x.2) + | 6 => .blockquote <$> FromBinary.deserializer + | 7 => .other <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected a tag for `Doc.Block` in 0...8, got {other}" + +partial instance [ToBinary i] [ToBinary b] [ToBinary p] : ToBinary (Doc.Part i p b) where + serializer := go +where + go + | .mk title titleString metadata content subParts, bs => + have : ToBinary (Doc.Part i p b) := ⟨go⟩ + bs + |> ToBinary.serializer title + |> ToBinary.serializer titleString + |> ToBinary.serializer metadata + |> ToBinary.serializer content + |> ToBinary.serializer subParts + +partial instance [FromBinary i] [FromBinary b] [FromBinary p] : FromBinary (Doc.Part i p b) where + deserializer := go +where + go := do + have : FromBinary (Doc.Part i p b) := ⟨go⟩ + .mk + <$> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + <*> FromBinary.deserializer + +def versoDocStringToBinary (values : DocstringValues) : ToBinary VersoDocString where + serializer + | {text, subsections}, b => + -- TODO customizable handling of Verso docstring extension data + have : ToBinary ElabInline := ⟨toBinaryElabInline values⟩ + have : ToBinary ElabBlock := ⟨toBinaryElabBlock values⟩ + b |> ToBinary.serializer text |> ToBinary.serializer subsections + +def versoDocStringFromBinary (values : DocstringValues) : FromBinary VersoDocString where + deserializer := do + -- TODO customizable handling of Verso docstring extension data + have : FromBinary ElabInline := ⟨fromBinaryElabInline values⟩ + have : FromBinary ElabBlock := ⟨fromBinaryElabBlock values⟩ + .mk <$> FromBinary.deserializer <*> FromBinary.deserializer + +def versoDocStringQueryParam (values : DocstringValues) : SQLite.QueryParam VersoDocString := + have := versoDocStringToBinary values + .asBlob From 55f22f39c3358961f3797e6e99716cd16733de89 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Sat, 7 Feb 2026 11:51:51 +0100 Subject: [PATCH 046/106] chore: refactor/reorganize for clarity --- DocGen4/DB.lean | 961 ++++------------------------------------- DocGen4/DB/Read.lean | 544 +++++++++++++++++++++++ DocGen4/DB/Schema.lean | 350 +++++++++++++++ DocGen4/Output.lean | 6 +- Main.lean | 10 +- 5 files changed, 993 insertions(+), 878 deletions(-) create mode 100644 DocGen4/DB/Read.lean create mode 100644 DocGen4/DB/Schema.lean diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index adc0c8fe..2027c463 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -3,363 +3,12 @@ import DocGen4.RenderedCode import SQLite import DocGen4.Helpers import DocGen4.DB.VersoDocString +import DocGen4.DB.Schema +import DocGen4.DB.Read namespace DocGen4.DB -open Lean in -/-- -Extracts a deterministic string representation of an inductive type, which is used to invalidate -database schemas in which blobs implicitly depend on serializations of datatypes. Includes -constructor names and their types. --/ -private def inductiveRepr (env : Environment) (name : Name) : String := Id.run do - let some (.inductInfo info) := env.find? name | return s!"not found: {name}" - let mut s := s!"inductive {name} : {info.type}\n" - for ctor in info.ctors do - let some (.ctorInfo ctorInfo) := env.find? ctor | continue - let ctorName := ctor.replacePrefix name .anonymous - s := s ++ s!" | {ctorName} : {ctorInfo.type}\n" - return s - -namespace Internals -open Lean Elab Term in -/-- -Gets a string representation of inductive type definitions, computed at compile time. --/ -scoped elab "inductiveRepr![" types:ident,* "]" : term => do - let env ← getEnv - let mut reprs : Array String := #[] - for type in types.getElems do - let name ← resolveGlobalConstNoOverload type - reprs := reprs.push (inductiveRepr env name) - return .lit (.strVal (String.intercalate "\n" reprs.toList)) -end Internals - -open Internals in -open Lean.Widget in -/-- -The datatypes that are serialized to the database. If they change, then the database should be -rebuilt. --/ -def serializedCodeTypeDefs : String := - inductiveRepr![ - SortFormer, - RenderedCode.Tag, - TaggedText - ] - -def getDb (dbFile : System.FilePath) : IO SQLite := do - -- SQLite atomically creates the DB file, and the schema and journal settings here are applied - -- idempotently. This avoids DB creation race conditions. - let db ← SQLite.openWith dbFile .readWriteCreate - db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout for parallel builds - db.exec "PRAGMA journal_mode = WAL" - db.exec "PRAGMA synchronous = OFF" - db.exec "PRAGMA foreign_keys = ON" - try - db.transaction (db.exec ddl) - catch - | e => - throw <| .userError s!"Exception while creating schema: {e}" - -- Check schema version via DDL hash and type definition hash - let ddlHash := toString ddl.hash - let typeHash := toString serializedCodeTypeDefs.hash - let stmt ← db.prepare "SELECT key, value FROM schema_meta" - let mut storedDdlHash : Option String := none - let mut storedTypeHash : Option String := none - while ← stmt.step do - let key ← stmt.columnText 0 - let value ← stmt.columnText 1 - if key == "ddl_hash" then storedDdlHash := some value - if key == "type_hash" then storedTypeHash := some value - match storedDdlHash, storedTypeHash with - | none, none => - -- New database, store the hashes - db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" - db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" - | some stored, _ => - if stored != ddlHash then - throw <| .userError s!"Database schema is outdated (DDL hash mismatch). Run `lake clean` or delete '{dbFile}' and rebuild." - match storedTypeHash with - | none => - -- Older DB without type hash, add it - db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" - | some storedType => - if storedType != typeHash then - throw <| .userError s!"Database schema is outdated (serialized type definitions changed). Run `lake clean` or delete '{dbFile}' and rebuild." - | none, some _ => -- Shouldn't happen, but handle gracefully - db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" - return db -where - ddl := - r#" -PRAGMA journal_mode = WAL; - --- Modules table -CREATE TABLE IF NOT EXISTS modules ( - name TEXT PRIMARY KEY, - source_url TEXT -); - --- Direct imports -CREATE TABLE IF NOT EXISTS module_imports ( - importer TEXT NOT NULL, - imported TEXT NOT NULL, - PRIMARY KEY (importer, imported), - FOREIGN KEY (importer) REFERENCES modules(name) ON DELETE CASCADE - -- There's no - -- FOREIGN KEY (imported) REFERENCES modules(name) - -- because docs are built incrementally. -); - --- Index for reverse queries: "what imports this module?" -CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); - -CREATE TABLE IF NOT EXISTS module_items ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - item_type TEXT NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS declaration_ranges ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - start_line INTEGER NOT NULL, - start_column INTEGER NOT NULL, - start_utf16 INTEGER NOT NULL, - end_line INTEGER NOT NULL, - end_column INTEGER NOT NULL, - end_utf16 INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS markdown_docstrings ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - text TEXT NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS verso_docstrings ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - content BLOB NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS name_info ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - kind TEXT, - name TEXT NOT NULL, - type TEXT NOT NULL, - sorried INTEGER NOT NULL, - render INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS axioms ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - is_unsafe INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - --- Internal names (like recursors) that aren't rendered but should link to a rendered declaration -CREATE TABLE IF NOT EXISTS internal_names ( - name TEXT NOT NULL PRIMARY KEY, - target_module TEXT NOT NULL, - target_position INTEGER NOT NULL, - FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - --- Index for CASCADE deletes: when name_info rows are deleted, find matching internal_names -CREATE INDEX IF NOT EXISTS idx_internal_names_target ON internal_names(target_module, target_position); - -CREATE TABLE IF NOT EXISTS constructors ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - type_position INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE - FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - --- Index for CASCADE deletes on the second FK (type_position) -CREATE INDEX IF NOT EXISTS idx_constructors_type_pos ON constructors(module_name, type_position); - -CREATE TABLE IF NOT EXISTS inductives ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - is_unsafe INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS class_inductives ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - is_unsafe INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS opaques ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - safety TEXT NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS definitions ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - is_unsafe INTEGER NOT NULL, - hints TEXT NOT NULL, - is_noncomputable INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS definition_equations ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - code TEXT NOT NULL, - sequence INTEGER NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS instances ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - class_name TEXT NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS instance_args ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - sequence INTEGER NOT NULL, - type_name TEXT NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES instances(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS structures ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - is_class INTEGER NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS structure_parents ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - sequence INTEGER NOT NULL, - projection_fn TEXT NOT NULL, - type TEXT NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS structure_constructors ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, -- The structure's position - ctor_position INTEGER NOT NULL, - name TEXT NOT NULL, - type BLOB NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS structure_fields ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - sequence INTEGER NOT NULL, - proj_name TEXT NOT NULL, - type BLOB NOT NULL, - is_direct INTEGER NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE - -- Note: No FK on proj_name because the projection function may be in a different module - -- (for inherited fields) that hasn't been processed yet. The JOIN at load time handles this. -); - -CREATE TABLE IF NOT EXISTS structure_field_args ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - field_sequence INTEGER NOT NULL, - arg_sequence INTEGER NOT NULL, - binder BLOB NOT NULL, - is_implicit INTEGER NOT NULL, - PRIMARY KEY (module_name, position, field_sequence, arg_sequence), - FOREIGN KEY (module_name, position, field_sequence) REFERENCES structure_fields(module_name, position, sequence) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS declaration_args ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - sequence INTEGER NOT NULL, - binder BLOB NOT NULL, - is_implicit INTEGER NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS declaration_attrs ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - sequence INTEGER NOT NULL, - attr TEXT NOT NULL, - PRIMARY KEY (module_name, position, sequence), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS tactics ( - module_name TEXT NOT NULL, - internal_name TEXT NOT NULL, - user_name TEXT NOT NULL, - doc_string TEXT NOT NULL, - PRIMARY KEY (module_name, internal_name), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS tactic_tags ( - module_name TEXT NOT NULL, - internal_name TEXT NOT NULL, - tag TEXT NOT NULL, - PRIMARY KEY (module_name, internal_name, tag), - FOREIGN KEY (module_name, internal_name) REFERENCES tactics(module_name, internal_name) ON DELETE CASCADE -); - -CREATE TABLE IF NOT EXISTS schema_meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL -); -"# - -def withDbContext [MonadLiftT BaseIO m] [MonadControlT IO m] [Monad m] (context : String) (act : m α) : m α := - controlAt IO fun runInBase => do - let ms ← IO.monoMsNow - try - runInBase act - catch - | e => - let ms' ← IO.monoMsNow - throw <| .userError s!"Exception in `{context}` after {ms' - ms}ms: {e.toString}" - -structure DB where +structure DB extends ReadOps where sqlite : SQLite deleteModule (modName : String) : IO Unit saveModule (modName : String) (sourceUrl? : Option String) : IO Unit @@ -391,6 +40,7 @@ structure DB where /-- Save a tactic defined in this module -/ saveTactic (modName : String) (tactic : Process.TacticInfo Process.MarkdownDocstring) : IO Unit + def DB.saveDocstring (db : DB) (modName : String) (position : Int64) (text : String ⊕ Lean.VersoDocString) : IO Unit := match text with | .inl md => db.saveMarkdownDocstring modName position md @@ -404,9 +54,6 @@ private def run (stmt : SQLite.Stmt) : IO Unit := do stmt.reset stmt.clearBindings -def _root_.SQLite.Stmt.bind [SQLite.NullableQueryParam α] (stmt : SQLite.Stmt) (index : Int32) (param : α) : IO Unit := do - SQLite.NullableQueryParam.bind stmt index param - instance : SQLite.QueryParam Lean.DefinitionSafety where bind stmt index safety := SQLite.QueryParam.bind stmt index <| @@ -655,7 +302,8 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveTacticTagStmt.bind 2 tactic.internalName.toString saveTacticTagStmt.bind 3 tag.toString run saveTacticTagStmt - pure { + let readOps ← mkReadOps sqlite values + pure { readOps with sqlite, deleteModule, saveModule, @@ -699,6 +347,91 @@ def withDB (f : DB → DBM α) : DBM α := do f (← read).db def withSQLite (f : SQLite → DBM α) : DBM α := do f (← read).db +private def readonlyError : IO α := throw (IO.userError "DB opened for reading only") + +def openForReading (dbFile : System.FilePath) (values : DocstringValues := {}) : IO DB := do + let sqlite ← SQLite.openWith dbFile .readonly + sqlite.exec "PRAGMA busy_timeout = 86400000" + let readOps ← mkReadOps sqlite values + pure { + sqlite, + deleteModule := fun _ => readonlyError, + saveModule := fun _ _ => readonlyError, + saveImport := fun _ _ => readonlyError, + saveMarkdownDocstring := fun _ _ _ => readonlyError, + saveVersoDocstring := fun _ _ _ => readonlyError, + saveDeclarationRange := fun _ _ _ => readonlyError, + saveInfo := fun _ _ _ _ => readonlyError, + saveAxiom := fun _ _ _ => readonlyError, + saveOpaque := fun _ _ _ => readonlyError, + saveDefinition := fun _ _ _ _ _ => readonlyError, + saveDefinitionEquation := fun _ _ _ _ => readonlyError, + saveInstance := fun _ _ _ => readonlyError, + saveInstanceArg := fun _ _ _ _ => readonlyError, + saveInductive := fun _ _ _ => readonlyError, + saveConstructor := fun _ _ _ => readonlyError, + saveClassInductive := fun _ _ _ => readonlyError, + saveStructure := fun _ _ _ => readonlyError, + saveStructureConstructor := fun _ _ _ _ => readonlyError, + saveNameOnly := fun _ _ _ _ _ _ => readonlyError, + saveStructureParent := fun _ _ _ _ _ => readonlyError, + saveStructureField := fun _ _ _ _ _ _ => readonlyError, + saveStructureFieldArg := fun _ _ _ _ _ _ => readonlyError, + saveArg := fun _ _ _ _ _ => readonlyError, + saveAttr := fun _ _ _ _ => readonlyError, + saveInternalName := fun _ _ _ => readonlyError, + saveTactic := fun _ _ => readonlyError, + getModuleNames := readOps.getModuleNames, + getModuleSourceUrls := readOps.getModuleSourceUrls, + getModuleImports := readOps.getModuleImports, + buildName2ModIdx := readOps.buildName2ModIdx, + loadModule := readOps.loadModule, + } + +/-! ## DB Reading -/ + +section Reading +open Lean + +/-- Context needed for cross-module linking, without loading full module contents. -/ +structure LinkingContext where + moduleNames : Array Name + sourceUrls : Std.HashMap Name String + name2ModIdx : Std.HashMap Name ModuleIdx + +/-- Load the linking context from the database. -/ +def DB.loadLinkingContext (db : DB) : IO LinkingContext := do + let moduleNames ← db.getModuleNames + let sourceUrls ← db.getModuleSourceUrls + let name2ModIdx ← db.buildName2ModIdx moduleNames + return { moduleNames, sourceUrls, name2ModIdx } + +/-- +Get transitive closure of imports for given modules using recursive CTE. +Uses dynamic SQL (variable number of placeholders) so cannot be pre-prepared. +-/ +def DB.getTransitiveImports (db : DB) (modules : Array Name) : IO (Array Name) := withDbContext "read:transitive_imports" do + if modules.isEmpty then return #[] + let placeholders := ", ".intercalate (modules.toList.map fun _ => "(?)") + let sql := s!" + WITH RECURSIVE transitive_imports(name) AS ( + VALUES {placeholders} + UNION + SELECT mi.imported FROM module_imports mi + JOIN transitive_imports ti ON mi.importer = ti.name + ) + SELECT DISTINCT name FROM transitive_imports" + let stmt ← db.sqlite.prepare sql + for h : i in [0:modules.size] do + stmt.bind (i.toInt32 + 1) modules[i].toString + let mut result := #[] + while (← stmt.step) do + let name := (← stmt.columnText 0).toName + result := result.push name + return result + +end Reading + end DB open DB @@ -870,515 +603,3 @@ where | .classInfo info => "class" | .classInductiveInfo info => "class inductive" | .ctorInfo info => "constructor" - -/-! ## DB Reading -/ - -section Reading -open Lean SQLite.Blob - -/-- Open a database for reading. -/ -def openDbForReading (dbFile : System.FilePath) : IO SQLite := do - let db ← SQLite.openWith dbFile .readonly - db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout - return db - -/-- Read RenderedCode from a blob. -/ -def readRenderedCode (blob : ByteArray) : IO RenderedCode := do - match fromBinary blob with - | .ok code => return code - | .error e => throw <| IO.userError s!"Failed to deserialize RenderedCode: {e}" - -/-- Read VersoDocString from a blob. -/ -def readVersoDocString (blob : ByteArray) : DBM VersoDocString := do - have := versoDocStringFromBinary (← read).values - match fromBinary blob with - | .ok doc => return doc - | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" - -/-- Get all module names from the database. -/ -def getModuleNames (db : SQLite) : IO (Array Name) := withDbContext "read:modules:names" do - let stmt ← db.prepare "SELECT name FROM modules ORDER BY name" - let mut names := #[] - while (← stmt.step) do - let name := (← stmt.columnText 0).toName - names := names.push name - return names - -/-- Get all module source URLs from the database. -/ -def getModuleSourceUrls (db : SQLite) : IO (Std.HashMap Name String) := withDbContext "read:modules:source_urls" do - let stmt ← db.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" - let mut urls : Std.HashMap Name String := {} - while (← stmt.step) do - let name := (← stmt.columnText 0).toName - let url ← stmt.columnText 1 - urls := urls.insert name url - return urls - -/-- Get all module imports from the database. -/ -def getModuleImports (db : SQLite) (moduleName : Name) : IO (Array Name) := withDbContext "read:module_imports" do - let stmt ← db.prepare "SELECT imported FROM module_imports WHERE importer = ?" - stmt.bind 1 moduleName.toString - let mut imports := #[] - while (← stmt.step) do - let name := (← stmt.columnText 0).toName - imports := imports.push name - return imports - -/-- Build the name-to-module index needed for cross-linking. -/ -def buildName2ModIdx (db : SQLite) (moduleNames : Array Name) : IO (Std.HashMap Name ModuleIdx) := do - -- First build a map from module name string to index - let modNameToIdx : Std.HashMap Name ModuleIdx := - moduleNames.foldl (init := {}) fun acc modName => - acc.insert modName acc.size - -- Now query all names and their modules - let stmt ← db.prepare "SELECT name, module_name FROM name_info" - let mut result : Std.HashMap Name ModuleIdx := {} - while (← stmt.step) do - let name := (← stmt.columnText 0).toName - let moduleName := (← stmt.columnText 1).toName - if let some idx := modNameToIdx[moduleName]? then - result := result.insert name idx - -- Also add internal names (like recursors) that map to their target's module. - -- Only add if not already in result (name_info entries take precedence). - let internalStmt ← db.prepare "SELECT name, target_module FROM internal_names" - while (← internalStmt.step) do - let name := (← internalStmt.columnText 0).toName - if !result.contains name then - let targetModule := (← internalStmt.columnText 1).toName - if let some idx := modNameToIdx[targetModule]? then - result := result.insert name idx - return result - -/-- Load declaration arguments from the database. -/ -def loadArgs (moduleName : String) (position : Int64) : DBM (Array Process.Arg) := withDbContext "read:declaration_args" <| withSQLite fun db => do - let stmt ← db.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut args := #[] - while (← stmt.step) do - let binderBlob ← stmt.columnBlob 0 - let binder ← readRenderedCode binderBlob - let isImplicit := (← stmt.columnInt64 1) != 0 - args := args.push { binder, implicit := isImplicit } - return args - -/-- Load declaration attributes from the database. -/ -def loadAttrs (moduleName : String) (position : Int64) : DBM (Array String) := withDbContext "read:declaration_attrs" <| withSQLite fun db => do - let stmt ← db.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut attrs := #[] - while (← stmt.step) do - let attr ← stmt.columnText 0 - attrs := attrs.push attr - return attrs - -/-- Load a docstring from the database. -/ -def loadDocstring (moduleName : String) (position : Int64) : DBM (Option (String ⊕ VersoDocString)) := withDbContext "read:docstrings" <| withSQLite fun db => do - -- Try markdown first - let mdStmt ← db.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" - mdStmt.bind 1 moduleName - mdStmt.bind 2 position - if (← mdStmt.step) then - let text ← mdStmt.columnText 0 - return some (.inl text) - -- Try verso - let versoStmt ← db.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" - versoStmt.bind 1 moduleName - versoStmt.bind 2 position - if (← versoStmt.step) then - let blob ← versoStmt.columnBlob 0 - let doc ← readVersoDocString blob - return some (.inr doc) - return none - -/-- Load a declaration range from the database. -/ -def loadDeclarationRange (moduleName : String) (position : Int64) : DBM (Option DeclarationRange) := withDbContext "read:declaration_ranges" <| withSQLite fun db => do - let stmt ← db.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let startLine := (← stmt.columnInt64 0).toNatClampNeg - let startCol := (← stmt.columnInt64 1).toNatClampNeg - let startUtf16 := (← stmt.columnInt64 2).toNatClampNeg - let endLine := (← stmt.columnInt64 3).toNatClampNeg - let endCol := (← stmt.columnInt64 4).toNatClampNeg - let endUtf16 := (← stmt.columnInt64 5).toNatClampNeg - return some { - pos := ⟨startLine, startCol⟩ - charUtf16 := startUtf16 - endPos := ⟨endLine, endCol⟩ - endCharUtf16 := endUtf16 - } - return none - -/-- Load base Info from the database row. -/ -def loadInfo (moduleName : String) (position : Int64) (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : DBM Process.Info := do - let type ← readRenderedCode typeBlob - let doc ← loadDocstring moduleName position - let args ← loadArgs moduleName position - let attrs ← loadAttrs moduleName position - let some declRange ← loadDeclarationRange moduleName position - | throw <| IO.userError s!"Missing declaration range for {name}" - return { - name - type - doc - args - declarationRange := declRange - attrs - sorried - render - } - -/-- Load definition equations from the database. - Returns `none` if no equations exist, `some eqns` otherwise. -/ -def loadEquations (moduleName : String) (position : Int64) : DBM (Option (Array RenderedCode)) := withDbContext "read:definition_equations" <| withSQLite fun db => do - let stmt ← db.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - if !(← stmt.step) then return none - let mut eqns := #[← readRenderedCode (← stmt.columnBlob 0)] - while (← stmt.step) do - eqns := eqns.push (← readRenderedCode (← stmt.columnBlob 0)) - return some eqns - -/-- Load instance type names from the database. -/ -def loadInstanceArgs (moduleName : String) (position : Int64) : DBM (Array Name) := withSQLite fun db => do - let stmt ← db.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut typeNames := #[] - while (← stmt.step) do - let typeName := (← stmt.columnText 0).toName - typeNames := typeNames.push typeName - return typeNames - -/-- Load structure parents from the database. -/ -def loadStructureParents (moduleName : String) (position : Int64) : DBM (Array Process.StructureParentInfo) := withSQLite fun db => do - let stmt ← db.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut parents := #[] - while (← stmt.step) do - let projFn := (← stmt.columnText 0).toName - let typeBlob ← stmt.columnBlob 1 - let type ← readRenderedCode typeBlob - parents := parents.push { projFn, type } - return parents - -/-- Load structure field args from the database. -/ -def loadStructureFieldArgs (db : SQLite) (moduleName : String) (position : Int64) (fieldSeq : Int64) : IO (Array Process.Arg) := do - let stmt ← db.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - stmt.bind 3 fieldSeq - let mut args := #[] - while (← stmt.step) do - let binderBlob ← stmt.columnBlob 0 - let binder ← readRenderedCode binderBlob - let isImplicit := (← stmt.columnInt64 1) != 0 - args := args.push { binder, implicit := isImplicit } - return args - -/-- Load structure fields from the database. -/ -def loadStructureFields (moduleName : String) (position : Int64) : DBM (Array Process.FieldInfo) := withSQLite fun db => do - -- Get structure fields and look up projection function info by name - let stmt ← db.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut fields := #[] - while (← stmt.step) do - let fieldSeq := ← stmt.columnInt64 0 - let name := (← stmt.columnText 1).toName - let typeBlob ← stmt.columnBlob 2 - let type ← readRenderedCode typeBlob - let isDirect := (← stmt.columnInt64 3) != 0 - -- Look up projection function by name to get its module and position - let projStmt ← db.prepare "SELECT module_name, position FROM name_info WHERE name = ? LIMIT 1" - projStmt.bind 1 name.toString - let (doc, attrs, declRange, render) ← if (← projStmt.step) then do - let projModName ← projStmt.columnText 0 - let projPos ← projStmt.columnInt64 1 - -- Load projection function's docstring, attrs, and declaration range - let doc ← loadDocstring projModName projPos - let attrs ← loadAttrs projModName projPos - let declRange ← loadDeclarationRange projModName projPos - -- Get render flag from projection function's name_info - let render ← do - let renderStmt ← db.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" - renderStmt.bind 1 projModName - renderStmt.bind 2 projPos - if (← renderStmt.step) then - pure ((← renderStmt.columnInt64 0) != 0) - else - pure true - pure (doc, attrs, declRange, render) - else - -- Projection function not found in name_info - use defaults - -- This can happen for inherited fields whose parent module wasn't processed - pure (none, #[], none, true) - -- Load field-specific args from structure_field_args - let args ← loadStructureFieldArgs db moduleName position fieldSeq - fields := fields.push { - name - type - doc - args - declarationRange := declRange.getD default - attrs - render - isDirect - } - return fields - -/-- Load structure constructor from the database. -/ -def loadStructureConstructor (moduleName : String) (position : Int64) : DBM (Option Process.NameInfo) := withSQLite fun db => do - let stmt ← db.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let name := (← stmt.columnText 0).toName - let typeBlob ← stmt.columnBlob 1 - let ctorPos ← stmt.columnInt64 2 - let type ← readRenderedCode typeBlob - let doc ← loadDocstring moduleName ctorPos - return some { name, type, doc } - return none - -/-- Load constructors for an inductive type. -/ -def loadConstructors (moduleName : String) (position : Int64) : DBM (List Process.ConstructorInfo) := withSQLite fun db => do - let stmt ← db.prepare "SELECT c.position FROM constructors c WHERE c.module_name = ? AND c.type_position = ? ORDER BY c.position" - stmt.bind 1 moduleName - stmt.bind 2 position - let mut ctors := [] - while (← stmt.step) do - let ctorPos ← stmt.columnInt64 0 - -- Now load the full info for this constructor - let infoStmt ← db.prepare "SELECT name, type, sorried, render FROM name_info WHERE module_name = ? AND position = ?" - infoStmt.bind 1 moduleName - infoStmt.bind 2 ctorPos - if (← infoStmt.step) then - let name := (← infoStmt.columnText 0).toName - let typeBlob ← infoStmt.columnBlob 1 - let sorried := (← infoStmt.columnInt64 2) != 0 - let render := (← infoStmt.columnInt64 3) != 0 - let info ← loadInfo moduleName ctorPos name typeBlob sorried render - ctors := ctors ++ [info] - return ctors - -/-- Load a DocInfo from the database based on its kind. -/ -def loadDocInfo (moduleName : String) (position : Int64) (kind : String) - (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : DBM (Option Process.DocInfo) := withSQLite fun db => do - let info ← loadInfo moduleName position name typeBlob sorried render - match kind with - | "axiom" => - let stmt ← db.prepare "SELECT is_unsafe FROM axioms WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let isUnsafe := (← stmt.columnInt64 0) != 0 - return some <| .axiomInfo { toInfo := info, isUnsafe } - return none - | "theorem" => - return some <| .theoremInfo { toInfo := info } - | "opaque" => - let stmt ← db.prepare "SELECT safety FROM opaques WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let safetyStr ← stmt.columnText 0 - let safety := match safetyStr with - | "unsafe" => .unsafe - | "partial" => .partial - | _ => .safe - return some <| .opaqueInfo { toInfo := info, definitionSafety := safety } - return none - | "definition" => - let stmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable FROM definitions WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let isUnsafe := (← stmt.columnInt64 0) != 0 - let hintsStr ← stmt.columnText 1 - let isNonComputable := (← stmt.columnInt64 2) != 0 - let hints : ReducibilityHints := match hintsStr with - | "opaque" => .opaque - | "abbrev" => .abbrev - | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations moduleName position - return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } - return none - | "instance" => - let instStmt ← db.prepare "SELECT class_name FROM instances WHERE module_name = ? AND position = ?" - instStmt.bind 1 moduleName - instStmt.bind 2 position - if (← instStmt.step) then - let className := (← instStmt.columnText 0).toName - let defStmt ← db.prepare "SELECT is_unsafe, hints, is_noncomputable FROM definitions WHERE module_name = ? AND position = ?" - defStmt.bind 1 moduleName - defStmt.bind 2 position - if (← defStmt.step) then - let isUnsafe := (← defStmt.columnInt64 0) != 0 - let hintsStr ← defStmt.columnText 1 - let isNonComputable := (← defStmt.columnInt64 2) != 0 - let hints : ReducibilityHints := match hintsStr with - | "opaque" => .opaque - | "abbrev" => .abbrev - | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← loadEquations moduleName position - let typeNames ← loadInstanceArgs moduleName position - return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } - return none - | "inductive" => - let stmt ← db.prepare "SELECT is_unsafe FROM inductives WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let isUnsafe := (← stmt.columnInt64 0) != 0 - let ctors ← loadConstructors moduleName position - return some <| .inductiveInfo { toInfo := info, isUnsafe, ctors } - return none - | "structure" => - let stmt ← db.prepare "SELECT is_class FROM structures WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let parents ← loadStructureParents moduleName position - let fieldInfo ← loadStructureFields moduleName position - let some ctor ← loadStructureConstructor moduleName position - | return none - return some <| .structureInfo { toInfo := info, fieldInfo, parents, ctor } - return none - | "class" => - let stmt ← db.prepare "SELECT is_class FROM structures WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let parents ← loadStructureParents moduleName position - let fieldInfo ← loadStructureFields moduleName position - let some ctor ← loadStructureConstructor moduleName position - | return none - return some <| .classInfo { toInfo := info, fieldInfo, parents, ctor } - return none - | "class inductive" => - let stmt ← db.prepare "SELECT is_unsafe FROM class_inductives WHERE module_name = ? AND position = ?" - stmt.bind 1 moduleName - stmt.bind 2 position - if (← stmt.step) then - let isUnsafe := (← stmt.columnInt64 0) != 0 - let ctors ← loadConstructors moduleName position - return some <| .classInductiveInfo { toInfo := info, isUnsafe, ctors } - return none - | "constructor" => - -- Constructors are handled as part of their parent inductive - return some <| .ctorInfo info - | _ => - return none - -/-- Load a module from the database. -/ -def loadModule (moduleName : Name) : DBM Process.Module := withSQLite fun db => do - let modNameStr := moduleName.toString - let imports ← getModuleImports db moduleName - -- Load all members (declarations and module docs) with their positions. - -- We'll sort by (declaration range, position) to maintain deterministic ordering - -- even when multiple declarations have the same position (which happens for - -- auto-generated declarations like instance defaults). - let stmt ← db.prepare " - SELECT n.position, n.kind, n.name, n.type, n.sorried, n.render - FROM name_info n - WHERE n.module_name = ?" - stmt.bind 1 modNameStr - let mut members : Array (Int64 × Process.ModuleMember) := #[] - while (← stmt.step) do - let position ← stmt.columnInt64 0 - let kind ← stmt.columnText 1 - let name := (← stmt.columnText 2).toName - let typeBlob ← stmt.columnBlob 3 - let sorried := (← stmt.columnInt64 4) != 0 - let render := (← stmt.columnInt64 5) != 0 - if let some docInfo ← loadDocInfo modNameStr position kind name typeBlob sorried render then - members := members.push (position, .docInfo docInfo) - -- Load module docs - let mdStmt ← db.prepare " - SELECT m.position, m.text - FROM markdown_docstrings m - WHERE m.module_name = ? - AND m.position NOT IN (SELECT position FROM name_info WHERE module_name = ?)" - mdStmt.bind 1 modNameStr - mdStmt.bind 2 modNameStr - while (← mdStmt.step) do - let position ← mdStmt.columnInt64 0 - let doc ← mdStmt.columnText 1 - if let some declRange ← loadDeclarationRange modNameStr position then - members := members.push (position, .modDoc { doc, declarationRange := declRange }) - -- Sort by (declaration range, position) to maintain deterministic ordering. - -- Primary key: declaration range position (line, column) using Position.lt - -- Secondary key: DB position (to break ties when ranges are equal) - let sortedMembers := members.qsort fun (pos1, m1) (pos2, m2) => - let r1 := m1.getDeclarationRange.pos - let r2 := m2.getDeclarationRange.pos - if Position.lt r1 r2 then true - else if Position.lt r2 r1 then false - else pos1 < pos2 -- Tiebreaker: use DB position - -- Load tactics defined in this module - let tacStmt ← db.prepare " - SELECT internal_name, user_name, doc_string - FROM tactics - WHERE module_name = ?" - tacStmt.bind 1 modNameStr - let tagStmt ← db.prepare " - SELECT tag FROM tactic_tags - WHERE module_name = ? AND internal_name = ?" - let mut tactics : Array (Process.TacticInfo Process.MarkdownDocstring) := #[] - while (← tacStmt.step) do - let internalName := (← tacStmt.columnText 0).toName - let userName ← tacStmt.columnText 1 - let docString ← tacStmt.columnText 2 - -- Load tags for this tactic - tagStmt.bind 1 modNameStr - tagStmt.bind 2 internalName.toString - let mut tags : Array Name := #[] - while (← tagStmt.step) do - tags := tags.push (← tagStmt.columnText 0).toName - tagStmt.reset - tactics := tactics.push { internalName, userName, tags, docString, definingModule := moduleName } - return { name := moduleName, members := sortedMembers.map (·.2), imports, tactics } - -/-- Context needed for cross-module linking, without loading full module contents. -/ -structure LinkingContext where - moduleNames : Array Name - sourceUrls : Std.HashMap Name String - name2ModIdx : Std.HashMap Name ModuleIdx - -/-- Load the linking context from the database. -/ -def loadLinkingContext (db : SQLite) : IO LinkingContext := do - let moduleNames ← getModuleNames db - let sourceUrls ← getModuleSourceUrls db - let name2ModIdx ← buildName2ModIdx db moduleNames - return { moduleNames, sourceUrls, name2ModIdx } - -/-- Get transitive closure of imports for given modules using recursive CTE. -/ -def getTransitiveImports (db : SQLite) (modules : Array Name) : IO (Array Name) := withDbContext "read:transitive_imports" do - if modules.isEmpty then return #[] - -- Build the VALUES clause for starting modules - let placeholders := ", ".intercalate (modules.toList.map fun _ => "(?)") - let sql := s!" - WITH RECURSIVE transitive_imports(name) AS ( - VALUES {placeholders} - UNION - SELECT mi.imported FROM module_imports mi - JOIN transitive_imports ti ON mi.importer = ti.name - ) - SELECT DISTINCT name FROM transitive_imports" - let stmt ← db.prepare sql - -- Bind all module names - for h : i in [0:modules.size] do - stmt.bind (i.toInt32 + 1) modules[i].toString - let mut result := #[] - while (← stmt.step) do - let name := (← stmt.columnText 0).toName - result := result.push name - return result - -end Reading diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean new file mode 100644 index 00000000..a784a87f --- /dev/null +++ b/DocGen4/DB/Read.lean @@ -0,0 +1,544 @@ + +import DocGen4.RenderedCode +import SQLite +import DocGen4.DB.VersoDocString + +namespace DocGen4.DB + +def withDbContext [MonadLiftT BaseIO m] [MonadControlT IO m] [Monad m] (context : String) (act : m α) : m α := + controlAt IO fun runInBase => do + let ms ← IO.monoMsNow + try + runInBase act + catch + | e => + let ms' ← IO.monoMsNow + throw <| .userError s!"Exception in `{context}` after {ms' - ms}ms: {e.toString}" + +structure ReadOps where + getModuleNames : IO (Array Lean.Name) + getModuleSourceUrls : IO (Std.HashMap Lean.Name String) + getModuleImports : Lean.Name → IO (Array Lean.Name) + buildName2ModIdx : Array Lean.Name → IO (Std.HashMap Lean.Name Lean.ModuleIdx) + loadModule : Lean.Name → IO Process.Module + +private def done (stmt : SQLite.Stmt) : IO Unit := do + stmt.reset + stmt.clearBindings + +def _root_.SQLite.Stmt.bind [SQLite.NullableQueryParam α] (stmt : SQLite.Stmt) (index : Int32) (param : α) : IO Unit := do + SQLite.NullableQueryParam.bind stmt index param + +open SQLite.Blob in +/-- Read RenderedCode from a blob. -/ +def readRenderedCode (blob : ByteArray) : IO RenderedCode := do + match fromBinary blob with + | .ok code => return code + | .error e => throw <| IO.userError s!"Failed to deserialize RenderedCode: {e}" + + +open Lean SQLite.Blob in +private structure ReadStmts where + values : DocstringValues + loadArgsStmt : SQLite.Stmt + loadAttrsStmt : SQLite.Stmt + readMdDocstringStmt : SQLite.Stmt + readVersoDocstringStmt : SQLite.Stmt + loadDeclRangeStmt : SQLite.Stmt + loadEqnsStmt : SQLite.Stmt + loadInstanceArgsStmt : SQLite.Stmt + loadStructureParentsStmt : SQLite.Stmt + loadFieldArgsStmt : SQLite.Stmt + loadFieldsStmt : SQLite.Stmt + lookupProjStmt : SQLite.Stmt + lookupRenderStmt : SQLite.Stmt + loadStructCtorStmt : SQLite.Stmt + loadCtorPosStmt : SQLite.Stmt + loadCtorInfoStmt : SQLite.Stmt + readAxiomStmt : SQLite.Stmt + readOpaqueStmt : SQLite.Stmt + readDefinitionStmt : SQLite.Stmt + readInstanceStmt : SQLite.Stmt + readInductiveStmt : SQLite.Stmt + readStructureStmt : SQLite.Stmt + readClassInductiveStmt : SQLite.Stmt + getModuleNamesStmt : SQLite.Stmt + getModuleSourceUrlsStmt : SQLite.Stmt + getModuleImportsStmt : SQLite.Stmt + buildNameInfoStmt : SQLite.Stmt + buildInternalNamesStmt : SQLite.Stmt + loadModuleStmt : SQLite.Stmt + loadModuleDocsStmt : SQLite.Stmt + loadTacticsStmt : SQLite.Stmt + loadTacticTagsStmt : SQLite.Stmt + +open Lean SQLite.Blob in +private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ReadStmts := do + let loadArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadAttrsStmt ← sqlite.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" + let readMdDocstringStmt ← sqlite.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" + let readVersoDocstringStmt ← sqlite.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" + let loadDeclRangeStmt ← sqlite.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" + let loadEqnsStmt ← sqlite.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadInstanceArgsStmt ← sqlite.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadStructureParentsStmt ← sqlite.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadFieldArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" + let loadFieldsStmt ← sqlite.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" + let lookupProjStmt ← sqlite.prepare "SELECT module_name, position FROM name_info WHERE name = ? LIMIT 1" + let lookupRenderStmt ← sqlite.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" + let loadStructCtorStmt ← sqlite.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" + let loadCtorPosStmt ← sqlite.prepare "SELECT c.position FROM constructors c WHERE c.module_name = ? AND c.type_position = ? ORDER BY c.position" + let loadCtorInfoStmt ← sqlite.prepare "SELECT name, type, sorried, render FROM name_info WHERE module_name = ? AND position = ?" + let readAxiomStmt ← sqlite.prepare "SELECT is_unsafe FROM axioms WHERE module_name = ? AND position = ?" + let readOpaqueStmt ← sqlite.prepare "SELECT safety FROM opaques WHERE module_name = ? AND position = ?" + let readDefinitionStmt ← sqlite.prepare "SELECT is_unsafe, hints, is_noncomputable FROM definitions WHERE module_name = ? AND position = ?" + let readInstanceStmt ← sqlite.prepare "SELECT class_name FROM instances WHERE module_name = ? AND position = ?" + let readInductiveStmt ← sqlite.prepare "SELECT is_unsafe FROM inductives WHERE module_name = ? AND position = ?" + let readStructureStmt ← sqlite.prepare "SELECT is_class FROM structures WHERE module_name = ? AND position = ?" + let readClassInductiveStmt ← sqlite.prepare "SELECT is_unsafe FROM class_inductives WHERE module_name = ? AND position = ?" + let getModuleNamesStmt ← sqlite.prepare "SELECT name FROM modules ORDER BY name" + let getModuleSourceUrlsStmt ← sqlite.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" + let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" + let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" + let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" + let loadModuleStmt ← sqlite.prepare "SELECT n.position, n.kind, n.name, n.type, n.sorried, n.render FROM name_info n WHERE n.module_name = ?" + let loadModuleDocsStmt ← sqlite.prepare "SELECT m.position, m.text FROM markdown_docstrings m WHERE m.module_name = ? AND m.position NOT IN (SELECT position FROM name_info WHERE module_name = ?)" + let loadTacticsStmt ← sqlite.prepare "SELECT internal_name, user_name, doc_string FROM tactics WHERE module_name = ?" + let loadTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" + pure { + values, loadArgsStmt, loadAttrsStmt, readMdDocstringStmt, readVersoDocstringStmt, + loadDeclRangeStmt, loadEqnsStmt, loadInstanceArgsStmt, loadStructureParentsStmt, + loadFieldArgsStmt, loadFieldsStmt, lookupProjStmt, lookupRenderStmt, + loadStructCtorStmt, loadCtorPosStmt, loadCtorInfoStmt, + readAxiomStmt, readOpaqueStmt, readDefinitionStmt, readInstanceStmt, + readInductiveStmt, readStructureStmt, readClassInductiveStmt, + getModuleNamesStmt, getModuleSourceUrlsStmt, getModuleImportsStmt, + buildNameInfoStmt, buildInternalNamesStmt, + loadModuleStmt, loadModuleDocsStmt, loadTacticsStmt, loadTacticTagsStmt + } + +open Lean SQLite.Blob in +private def ReadStmts.loadArgs (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Process.Arg) := withDbContext "read:declaration_args" do + s.loadArgsStmt.bind 1 moduleName + s.loadArgsStmt.bind 2 position + let mut args := #[] + while (← s.loadArgsStmt.step) do + let binderBlob ← s.loadArgsStmt.columnBlob 0 + let binder ← readRenderedCode binderBlob + let isImplicit := (← s.loadArgsStmt.columnInt64 1) != 0 + args := args.push { binder, implicit := isImplicit } + done s.loadArgsStmt + return args + +open Lean SQLite.Blob in +private def ReadStmts.loadAttrs (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array String) := withDbContext "read:declaration_attrs" do + s.loadAttrsStmt.bind 1 moduleName + s.loadAttrsStmt.bind 2 position + let mut attrs := #[] + while (← s.loadAttrsStmt.step) do + let attr ← s.loadAttrsStmt.columnText 0 + attrs := attrs.push attr + done s.loadAttrsStmt + return attrs + +open Lean SQLite.Blob in +private def ReadStmts.loadDocstring (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option (String ⊕ VersoDocString)) := withDbContext "read:docstrings" do + s.readMdDocstringStmt.bind 1 moduleName + s.readMdDocstringStmt.bind 2 position + if (← s.readMdDocstringStmt.step) then + let text ← s.readMdDocstringStmt.columnText 0 + done s.readMdDocstringStmt + return some (.inl text) + done s.readMdDocstringStmt + s.readVersoDocstringStmt.bind 1 moduleName + s.readVersoDocstringStmt.bind 2 position + if (← s.readVersoDocstringStmt.step) then + let blob ← s.readVersoDocstringStmt.columnBlob 0 + done s.readVersoDocstringStmt + have := versoDocStringFromBinary s.values + match fromBinary blob with + | .ok doc => return some (.inr doc) + | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" + done s.readVersoDocstringStmt + return none + +open Lean SQLite.Blob in +private def ReadStmts.loadDeclarationRange (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option DeclarationRange) := withDbContext "read:declaration_ranges" do + s.loadDeclRangeStmt.bind 1 moduleName + s.loadDeclRangeStmt.bind 2 position + if (← s.loadDeclRangeStmt.step) then + let startLine := (← s.loadDeclRangeStmt.columnInt64 0).toNatClampNeg + let startCol := (← s.loadDeclRangeStmt.columnInt64 1).toNatClampNeg + let startUtf16 := (← s.loadDeclRangeStmt.columnInt64 2).toNatClampNeg + let endLine := (← s.loadDeclRangeStmt.columnInt64 3).toNatClampNeg + let endCol := (← s.loadDeclRangeStmt.columnInt64 4).toNatClampNeg + let endUtf16 := (← s.loadDeclRangeStmt.columnInt64 5).toNatClampNeg + done s.loadDeclRangeStmt + return some { + pos := ⟨startLine, startCol⟩ + charUtf16 := startUtf16 + endPos := ⟨endLine, endCol⟩ + endCharUtf16 := endUtf16 + } + done s.loadDeclRangeStmt + return none + +open Lean SQLite.Blob in +private def ReadStmts.loadInfo (s : ReadStmts) (moduleName : String) (position : Int64) (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO Process.Info := do + let type ← readRenderedCode typeBlob + let doc ← s.loadDocstring moduleName position + let args ← s.loadArgs moduleName position + let attrs ← s.loadAttrs moduleName position + let some declRange ← s.loadDeclarationRange moduleName position + | throw <| IO.userError s!"Missing declaration range for {name}" + return { name, type, doc, args, declarationRange := declRange, attrs, sorried, render } + +open Lean SQLite.Blob in +private def ReadStmts.loadEquations (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do + s.loadEqnsStmt.bind 1 moduleName + s.loadEqnsStmt.bind 2 position + if !(← s.loadEqnsStmt.step) then + done s.loadEqnsStmt + return none + let mut eqns := #[← readRenderedCode (← s.loadEqnsStmt.columnBlob 0)] + while (← s.loadEqnsStmt.step) do + eqns := eqns.push (← readRenderedCode (← s.loadEqnsStmt.columnBlob 0)) + done s.loadEqnsStmt + return some eqns + +open Lean SQLite.Blob in +private def ReadStmts.loadInstanceArgs (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Name) := do + s.loadInstanceArgsStmt.bind 1 moduleName + s.loadInstanceArgsStmt.bind 2 position + let mut typeNames := #[] + while (← s.loadInstanceArgsStmt.step) do + let typeName := (← s.loadInstanceArgsStmt.columnText 0).toName + typeNames := typeNames.push typeName + done s.loadInstanceArgsStmt + return typeNames + +open Lean SQLite.Blob in +private def ReadStmts.loadStructureParents (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Process.StructureParentInfo) := do + s.loadStructureParentsStmt.bind 1 moduleName + s.loadStructureParentsStmt.bind 2 position + let mut parents := #[] + while (← s.loadStructureParentsStmt.step) do + let projFn := (← s.loadStructureParentsStmt.columnText 0).toName + let typeBlob ← s.loadStructureParentsStmt.columnBlob 1 + let type ← readRenderedCode typeBlob + parents := parents.push { projFn, type } + done s.loadStructureParentsStmt + return parents + +open Lean SQLite.Blob in +private def ReadStmts.loadStructureFieldArgs (s : ReadStmts) (moduleName : String) (position : Int64) (fieldSeq : Int64) : IO (Array Process.Arg) := do + s.loadFieldArgsStmt.bind 1 moduleName + s.loadFieldArgsStmt.bind 2 position + s.loadFieldArgsStmt.bind 3 fieldSeq + let mut args := #[] + while (← s.loadFieldArgsStmt.step) do + let binderBlob ← s.loadFieldArgsStmt.columnBlob 0 + let binder ← readRenderedCode binderBlob + let isImplicit := (← s.loadFieldArgsStmt.columnInt64 1) != 0 + args := args.push { binder, implicit := isImplicit } + done s.loadFieldArgsStmt + return args + +open Lean SQLite.Blob in +private def ReadStmts.loadStructureFields (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Process.FieldInfo) := do + s.loadFieldsStmt.bind 1 moduleName + s.loadFieldsStmt.bind 2 position + let mut fields := #[] + while (← s.loadFieldsStmt.step) do + let fieldSeq ← s.loadFieldsStmt.columnInt64 0 + let name := (← s.loadFieldsStmt.columnText 1).toName + let typeBlob ← s.loadFieldsStmt.columnBlob 2 + let type ← readRenderedCode typeBlob + let isDirect := (← s.loadFieldsStmt.columnInt64 3) != 0 + s.lookupProjStmt.bind 1 name.toString + let (doc, attrs, declRange, render) ← if (← s.lookupProjStmt.step) then do + let projModName ← s.lookupProjStmt.columnText 0 + let projPos ← s.lookupProjStmt.columnInt64 1 + done s.lookupProjStmt + let doc ← s.loadDocstring projModName projPos + let attrs ← s.loadAttrs projModName projPos + let declRange ← s.loadDeclarationRange projModName projPos + let render ← do + s.lookupRenderStmt.bind 1 projModName + s.lookupRenderStmt.bind 2 projPos + let r ← if (← s.lookupRenderStmt.step) then + pure ((← s.lookupRenderStmt.columnInt64 0) != 0) + else + pure true + done s.lookupRenderStmt + pure r + pure (doc, attrs, declRange, render) + else do + done s.lookupProjStmt + pure (none, #[], none, true) + let args ← s.loadStructureFieldArgs moduleName position fieldSeq + fields := fields.push { + name, type, doc, args, declarationRange := declRange.getD default, + attrs, render, isDirect + } + done s.loadFieldsStmt + return fields + +open Lean SQLite.Blob in +private def ReadStmts.loadStructureConstructor (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option Process.NameInfo) := do + s.loadStructCtorStmt.bind 1 moduleName + s.loadStructCtorStmt.bind 2 position + if (← s.loadStructCtorStmt.step) then + let name := (← s.loadStructCtorStmt.columnText 0).toName + let typeBlob ← s.loadStructCtorStmt.columnBlob 1 + let ctorPos ← s.loadStructCtorStmt.columnInt64 2 + done s.loadStructCtorStmt + let type ← readRenderedCode typeBlob + let doc ← s.loadDocstring moduleName ctorPos + return some { name, type, doc } + done s.loadStructCtorStmt + return none + +open Lean SQLite.Blob in +private def ReadStmts.loadConstructors (s : ReadStmts) (moduleName : String) (position : Int64) : IO (List Process.ConstructorInfo) := do + s.loadCtorPosStmt.bind 1 moduleName + s.loadCtorPosStmt.bind 2 position + let mut ctorPositions := #[] + while (← s.loadCtorPosStmt.step) do + ctorPositions := ctorPositions.push (← s.loadCtorPosStmt.columnInt64 0) + done s.loadCtorPosStmt + let mut ctors := [] + for ctorPos in ctorPositions do + s.loadCtorInfoStmt.bind 1 moduleName + s.loadCtorInfoStmt.bind 2 ctorPos + if (← s.loadCtorInfoStmt.step) then + let name := (← s.loadCtorInfoStmt.columnText 0).toName + let typeBlob ← s.loadCtorInfoStmt.columnBlob 1 + let sorried := (← s.loadCtorInfoStmt.columnInt64 2) != 0 + let render := (← s.loadCtorInfoStmt.columnInt64 3) != 0 + done s.loadCtorInfoStmt + let info ← s.loadInfo moduleName ctorPos name typeBlob sorried render + ctors := ctors ++ [info] + else + done s.loadCtorInfoStmt + return ctors + +open Lean SQLite.Blob in +private def ReadStmts.loadDocInfo (s : ReadStmts) (moduleName : String) (position : Int64) (kind : String) + (name : Name) (typeBlob : ByteArray) (sorried : Bool) (render : Bool) : IO (Option Process.DocInfo) := do + let info ← s.loadInfo moduleName position name typeBlob sorried render + match kind with + | "axiom" => readAxiom info + | "theorem" => return some <| .theoremInfo { toInfo := info } + | "opaque" => readOpaque info + | "definition" => readDefinition info + | "instance" => readInstance info + | "inductive" => readInductive info + | "structure" => readStructure .structureInfo info + | "class" => readStructure .classInfo info + | "class inductive" => readClassInductive info + | "constructor" => return some <| .ctorInfo info + | _ => return none +where + readAxiom (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readAxiomStmt.bind 1 moduleName + s.readAxiomStmt.bind 2 position + if (← s.readAxiomStmt.step) then + let isUnsafe := (← s.readAxiomStmt.columnInt64 0) != 0 + done s.readAxiomStmt + return some <| .axiomInfo { toInfo := info, isUnsafe } + done s.readAxiomStmt + return none + readOpaque (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readOpaqueStmt.bind 1 moduleName + s.readOpaqueStmt.bind 2 position + if (← s.readOpaqueStmt.step) then + let safetyStr ← s.readOpaqueStmt.columnText 0 + done s.readOpaqueStmt + let safety := match safetyStr with + | "unsafe" => .unsafe + | "partial" => .partial + | _ => .safe + return some <| .opaqueInfo { toInfo := info, definitionSafety := safety } + done s.readOpaqueStmt + return none + readDefinition (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readDefinitionStmt.bind 1 moduleName + s.readDefinitionStmt.bind 2 position + if (← s.readDefinitionStmt.step) then + let isUnsafe := (← s.readDefinitionStmt.columnInt64 0) != 0 + let hintsStr ← s.readDefinitionStmt.columnText 1 + let isNonComputable := (← s.readDefinitionStmt.columnInt64 2) != 0 + done s.readDefinitionStmt + let hints : ReducibilityHints := match hintsStr with + | "opaque" => .opaque + | "abbrev" => .abbrev + | s => .regular (s.toNat?.getD 0 |>.toUInt32) + let equations ← s.loadEquations moduleName position + return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } + done s.readDefinitionStmt + return none + readInstance (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readInstanceStmt.bind 1 moduleName + s.readInstanceStmt.bind 2 position + if (← s.readInstanceStmt.step) then + let className := (← s.readInstanceStmt.columnText 0).toName + done s.readInstanceStmt + s.readDefinitionStmt.bind 1 moduleName + s.readDefinitionStmt.bind 2 position + if (← s.readDefinitionStmt.step) then + let isUnsafe := (← s.readDefinitionStmt.columnInt64 0) != 0 + let hintsStr ← s.readDefinitionStmt.columnText 1 + let isNonComputable := (← s.readDefinitionStmt.columnInt64 2) != 0 + done s.readDefinitionStmt + let hints : ReducibilityHints := match hintsStr with + | "opaque" => .opaque + | "abbrev" => .abbrev + | s => .regular (s.toNat?.getD 0 |>.toUInt32) + let equations ← s.loadEquations moduleName position + let typeNames ← s.loadInstanceArgs moduleName position + return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } + done s.readDefinitionStmt + else + done s.readInstanceStmt + return none + readInductive (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readInductiveStmt.bind 1 moduleName + s.readInductiveStmt.bind 2 position + if (← s.readInductiveStmt.step) then + let isUnsafe := (← s.readInductiveStmt.columnInt64 0) != 0 + done s.readInductiveStmt + let ctors ← s.loadConstructors moduleName position + return some <| .inductiveInfo { toInfo := info, isUnsafe, ctors } + done s.readInductiveStmt + return none + readStructure (mk : Process.StructureInfo → Process.DocInfo) (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readStructureStmt.bind 1 moduleName + s.readStructureStmt.bind 2 position + if (← s.readStructureStmt.step) then + done s.readStructureStmt + let parents ← s.loadStructureParents moduleName position + let fieldInfo ← s.loadStructureFields moduleName position + let some ctor ← s.loadStructureConstructor moduleName position + | return none + return some <| mk { toInfo := info, fieldInfo, parents, ctor } + done s.readStructureStmt + return none + readClassInductive (info : Process.Info) : IO (Option Process.DocInfo) := do + s.readClassInductiveStmt.bind 1 moduleName + s.readClassInductiveStmt.bind 2 position + if (← s.readClassInductiveStmt.step) then + let isUnsafe := (← s.readClassInductiveStmt.columnInt64 0) != 0 + done s.readClassInductiveStmt + let ctors ← s.loadConstructors moduleName position + return some <| .classInductiveInfo { toInfo := info, isUnsafe, ctors } + done s.readClassInductiveStmt + return none + +open Lean SQLite.Blob in +private def ReadStmts.getModuleNames (s : ReadStmts) : IO (Array Name) := withDbContext "read:modules:names" do + let mut names := #[] + while (← s.getModuleNamesStmt.step) do + let name := (← s.getModuleNamesStmt.columnText 0).toName + names := names.push name + done s.getModuleNamesStmt + return names + +open Lean SQLite.Blob in +private def ReadStmts.getModuleSourceUrls (s : ReadStmts) : IO (Std.HashMap Name String) := withDbContext "read:modules:source_urls" do + let mut urls : Std.HashMap Name String := {} + while (← s.getModuleSourceUrlsStmt.step) do + let name := (← s.getModuleSourceUrlsStmt.columnText 0).toName + let url ← s.getModuleSourceUrlsStmt.columnText 1 + urls := urls.insert name url + done s.getModuleSourceUrlsStmt + return urls + +open Lean SQLite.Blob in +private def ReadStmts.getModuleImports (s : ReadStmts) (moduleName : Name) : IO (Array Name) := withDbContext "read:module_imports" do + s.getModuleImportsStmt.bind 1 moduleName.toString + let mut imports := #[] + while (← s.getModuleImportsStmt.step) do + let name := (← s.getModuleImportsStmt.columnText 0).toName + imports := imports.push name + done s.getModuleImportsStmt + return imports + +open Lean SQLite.Blob in +private def ReadStmts.buildName2ModIdx (s : ReadStmts) (moduleNames : Array Name) : IO (Std.HashMap Name ModuleIdx) := do + let modNameToIdx : Std.HashMap Name ModuleIdx := + moduleNames.foldl (init := {}) fun acc modName => + acc.insert modName acc.size + let mut result : Std.HashMap Name ModuleIdx := {} + while (← s.buildNameInfoStmt.step) do + let name := (← s.buildNameInfoStmt.columnText 0).toName + let moduleName := (← s.buildNameInfoStmt.columnText 1).toName + if let some idx := modNameToIdx[moduleName]? then + result := result.insert name idx + done s.buildNameInfoStmt + while (← s.buildInternalNamesStmt.step) do + let name := (← s.buildInternalNamesStmt.columnText 0).toName + if !result.contains name then + let targetModule := (← s.buildInternalNamesStmt.columnText 1).toName + if let some idx := modNameToIdx[targetModule]? then + result := result.insert name idx + done s.buildInternalNamesStmt + return result + +open Lean SQLite.Blob in +private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Process.Module := do + let modNameStr := moduleName.toString + let imports ← s.getModuleImports moduleName + s.loadModuleStmt.bind 1 modNameStr + let mut members : Array (Int64 × Process.ModuleMember) := #[] + while (← s.loadModuleStmt.step) do + let position ← s.loadModuleStmt.columnInt64 0 + let kind ← s.loadModuleStmt.columnText 1 + let name := (← s.loadModuleStmt.columnText 2).toName + let typeBlob ← s.loadModuleStmt.columnBlob 3 + let sorried := (← s.loadModuleStmt.columnInt64 4) != 0 + let render := (← s.loadModuleStmt.columnInt64 5) != 0 + if let some docInfo ← s.loadDocInfo modNameStr position kind name typeBlob sorried render then + members := members.push (position, .docInfo docInfo) + done s.loadModuleStmt + s.loadModuleDocsStmt.bind 1 modNameStr + s.loadModuleDocsStmt.bind 2 modNameStr + while (← s.loadModuleDocsStmt.step) do + let position ← s.loadModuleDocsStmt.columnInt64 0 + let doc ← s.loadModuleDocsStmt.columnText 1 + if let some declRange ← s.loadDeclarationRange modNameStr position then + members := members.push (position, .modDoc { doc, declarationRange := declRange }) + done s.loadModuleDocsStmt + let sortedMembers := members.qsort fun (pos1, m1) (pos2, m2) => + let r1 := m1.getDeclarationRange.pos + let r2 := m2.getDeclarationRange.pos + if Position.lt r1 r2 then true + else if Position.lt r2 r1 then false + else pos1 < pos2 + s.loadTacticsStmt.bind 1 modNameStr + let mut tactics : Array (Process.TacticInfo Process.MarkdownDocstring) := #[] + while (← s.loadTacticsStmt.step) do + let internalName := (← s.loadTacticsStmt.columnText 0).toName + let userName ← s.loadTacticsStmt.columnText 1 + let docString ← s.loadTacticsStmt.columnText 2 + s.loadTacticTagsStmt.bind 1 modNameStr + s.loadTacticTagsStmt.bind 2 internalName.toString + let mut tags : Array Name := #[] + while (← s.loadTacticTagsStmt.step) do + tags := tags.push (← s.loadTacticTagsStmt.columnText 0).toName + done s.loadTacticTagsStmt + tactics := tactics.push { internalName, userName, tags, docString, definingModule := moduleName } + done s.loadTacticsStmt + return { name := moduleName, members := sortedMembers.map (·.2), imports, tactics } + +def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do + let s ← ReadStmts.prepare sqlite values + pure { + getModuleNames := s.getModuleNames + getModuleSourceUrls := s.getModuleSourceUrls + getModuleImports := s.getModuleImports + buildName2ModIdx := s.buildName2ModIdx + loadModule := s.loadModule + } + +end DocGen4.DB diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean new file mode 100644 index 00000000..19a0dd0d --- /dev/null +++ b/DocGen4/DB/Schema.lean @@ -0,0 +1,350 @@ + +import DocGen4.RenderedCode +import SQLite + +namespace DocGen4.DB + +open Lean in +/-- +Extracts a deterministic string representation of an inductive type, which is used to invalidate +database schemas in which blobs implicitly depend on serializations of datatypes. Includes +constructor names and their types. +-/ +private def inductiveRepr (env : Environment) (name : Name) : String := Id.run do + let some (.inductInfo info) := env.find? name | return s!"not found: {name}" + let mut s := s!"inductive {name} : {info.type}\n" + for ctor in info.ctors do + let some (.ctorInfo ctorInfo) := env.find? ctor | continue + let ctorName := ctor.replacePrefix name .anonymous + s := s ++ s!" | {ctorName} : {ctorInfo.type}\n" + return s + +namespace Internals +open Lean Elab Term in +/-- +Gets a string representation of inductive type definitions, computed at compile time. +-/ +scoped elab "inductiveRepr![" types:ident,* "]" : term => do + let env ← getEnv + let mut reprs : Array String := #[] + for type in types.getElems do + let name ← resolveGlobalConstNoOverload type + reprs := reprs.push (inductiveRepr env name) + return .lit (.strVal (String.intercalate "\n" reprs.toList)) +end Internals + +open Internals in +open Lean.Widget in +/-- +The datatypes that are serialized to the database. If they change, then the database should be +rebuilt. +-/ +def serializedCodeTypeDefs : String := + inductiveRepr![ + SortFormer, + RenderedCode.Tag, + TaggedText + ] + +def getDb (dbFile : System.FilePath) : IO SQLite := do + -- SQLite atomically creates the DB file, and the schema and journal settings here are applied + -- idempotently. This avoids DB creation race conditions. + let db ← SQLite.openWith dbFile .readWriteCreate + db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout for parallel builds + db.exec "PRAGMA journal_mode = WAL" + db.exec "PRAGMA synchronous = OFF" + db.exec "PRAGMA foreign_keys = ON" + try + db.transaction (db.exec ddl) + catch + | e => + throw <| .userError s!"Exception while creating schema: {e}" + -- Check schema version via DDL hash and type definition hash + let ddlHash := toString ddl.hash + let typeHash := toString serializedCodeTypeDefs.hash + let stmt ← db.prepare "SELECT key, value FROM schema_meta" + let mut storedDdlHash : Option String := none + let mut storedTypeHash : Option String := none + while ← stmt.step do + let key ← stmt.columnText 0 + let value ← stmt.columnText 1 + if key == "ddl_hash" then storedDdlHash := some value + if key == "type_hash" then storedTypeHash := some value + match storedDdlHash, storedTypeHash with + | none, none => + -- New database, store the hashes + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" + | some stored, _ => + if stored != ddlHash then + throw <| .userError s!"Database schema is outdated (DDL hash mismatch). Run `lake clean` or delete '{dbFile}' and rebuild." + match storedTypeHash with + | none => + -- Older DB without type hash, add it + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('type_hash', '{typeHash}')" + | some storedType => + if storedType != typeHash then + throw <| .userError s!"Database schema is outdated (serialized type definitions changed). Run `lake clean` or delete '{dbFile}' and rebuild." + | none, some _ => -- Shouldn't happen, but handle gracefully + db.exec s!"INSERT INTO schema_meta (key, value) VALUES ('ddl_hash', '{ddlHash}')" + return db +where + ddl := + r#" +PRAGMA journal_mode = WAL; + +-- Modules table +CREATE TABLE IF NOT EXISTS modules ( + name TEXT PRIMARY KEY, + source_url TEXT +); + +-- Direct imports +CREATE TABLE IF NOT EXISTS module_imports ( + importer TEXT NOT NULL, + imported TEXT NOT NULL, + PRIMARY KEY (importer, imported), + FOREIGN KEY (importer) REFERENCES modules(name) ON DELETE CASCADE + -- There's no + -- FOREIGN KEY (imported) REFERENCES modules(name) + -- because docs are built incrementally. +); + +-- Index for reverse queries: "what imports this module?" +CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); + +CREATE TABLE IF NOT EXISTS module_items ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + item_type TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_ranges ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + start_line INTEGER NOT NULL, + start_column INTEGER NOT NULL, + start_utf16 INTEGER NOT NULL, + end_line INTEGER NOT NULL, + end_column INTEGER NOT NULL, + end_utf16 INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS markdown_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS verso_docstrings ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + content BLOB NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS name_info ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + kind TEXT, + name TEXT NOT NULL, + type TEXT NOT NULL, + sorried INTEGER NOT NULL, + render INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS axioms ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +-- Internal names (like recursors) that aren't rendered but should link to a rendered declaration +CREATE TABLE IF NOT EXISTS internal_names ( + name TEXT NOT NULL PRIMARY KEY, + target_module TEXT NOT NULL, + target_position INTEGER NOT NULL, + FOREIGN KEY (target_module, target_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +-- Index for CASCADE deletes: when name_info rows are deleted, find matching internal_names +CREATE INDEX IF NOT EXISTS idx_internal_names_target ON internal_names(target_module, target_position); + +CREATE TABLE IF NOT EXISTS constructors ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + type_position INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE + FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +-- Index for CASCADE deletes on the second FK (type_position) +CREATE INDEX IF NOT EXISTS idx_constructors_type_pos ON constructors(module_name, type_position); + +CREATE TABLE IF NOT EXISTS inductives ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS class_inductives ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS opaques ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + safety TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS definitions ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_unsafe INTEGER NOT NULL, + hints TEXT NOT NULL, + is_noncomputable INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS definition_equations ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + code TEXT NOT NULL, + sequence INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS instances ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + class_name TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS instance_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + type_name TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES instances(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structures ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + is_class INTEGER NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_parents ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + projection_fn TEXT NOT NULL, + type TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_constructors ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, -- The structure's position + ctor_position INTEGER NOT NULL, + name TEXT NOT NULL, + type BLOB NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS structure_fields ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + proj_name TEXT NOT NULL, + type BLOB NOT NULL, + is_direct INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE + -- Note: No FK on proj_name because the projection function may be in a different module + -- (for inherited fields) that hasn't been processed yet. The JOIN at load time handles this. +); + +CREATE TABLE IF NOT EXISTS structure_field_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + field_sequence INTEGER NOT NULL, + arg_sequence INTEGER NOT NULL, + binder BLOB NOT NULL, + is_implicit INTEGER NOT NULL, + PRIMARY KEY (module_name, position, field_sequence, arg_sequence), + FOREIGN KEY (module_name, position, field_sequence) REFERENCES structure_fields(module_name, position, sequence) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_args ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + binder BLOB NOT NULL, + is_implicit INTEGER NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS declaration_attrs ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + sequence INTEGER NOT NULL, + attr TEXT NOT NULL, + PRIMARY KEY (module_name, position, sequence), + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS tactics ( + module_name TEXT NOT NULL, + internal_name TEXT NOT NULL, + user_name TEXT NOT NULL, + doc_string TEXT NOT NULL, + PRIMARY KEY (module_name, internal_name), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS tactic_tags ( + module_name TEXT NOT NULL, + internal_name TEXT NOT NULL, + tag TEXT NOT NULL, + PRIMARY KEY (module_name, internal_name, tag), + FOREIGN KEY (module_name, internal_name) REFERENCES tactics(module_name, internal_name) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); +"# + +end DocGen4.DB diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 3e5a6860..c20193d9 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -22,7 +22,7 @@ import DocGen4.Helpers namespace DocGen4 -open Lean IO System Output Process +open Lean IO System Output Process DB def collectBackrefs (buildDir : System.FilePath) : IO (Array BackrefItem) := do let mut backrefs : Array BackrefItem := #[] @@ -106,8 +106,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi -- Spawn one task per 100 modules, each returning its output file path let tasks ← (chunksOf targetModules 100).flatMapM fun mods => mods.mapM fun modName => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) - let db ← openDbForReading dbPath - let module ← loadModule db modName + let db ← DB.openForReading dbPath + let module ← db.loadModule modName -- Build a minimal AnalyzerResult with just this module's info let result : AnalyzerResult := { diff --git a/Main.lean b/Main.lean index 95c344b3..3c6bd73e 100644 --- a/Main.lean +++ b/Main.lean @@ -19,7 +19,7 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String let doc ← load <| .analyzeConcreteModules relevantModules - updateModuleDb doc buildDir dbFile (some sourceUri) + updateModuleDb {} doc buildDir dbFile (some sourceUri) return 0 def runGenCoreCmd (p : Parsed) : IO UInt32 := do @@ -29,7 +29,7 @@ def runGenCoreCmd (p : Parsed) : IO UInt32 := do let dbFile := p.positionalArg! "db" |>.as! String let module := p.positionalArg! "module" |>.as! String |> String.toName let doc ← load <| .analyzePrefixModules module - updateModuleDb doc buildDir dbFile none + updateModuleDb {} doc buildDir dbFile none return 0 def runDocGenCmd (_p : Parsed) : IO UInt32 := do @@ -72,15 +72,15 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let moduleRoots := (p.variableArgsAs! String).map String.toName -- Load linking context (module names, source URLs, declaration locations) - let db ← openDbForReading dbPath - let linkCtx ← loadLinkingContext db + let db ← DB.openForReading dbPath + let linkCtx ← db.loadLinkingContext -- Determine which modules to generate HTML for let targetModules ← if moduleRoots.isEmpty then pure linkCtx.moduleNames else - getTransitiveImports db moduleRoots + db.getTransitiveImports moduleRoots -- Add `references` pseudo-module to hierarchy since references.html is always generated let hierarchy := Hierarchy.fromArray (targetModules.push `references) From f2ad0648d3694c8b1b462b0e3cc89b178af8c8ab Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 9 Feb 2026 12:16:01 +0100 Subject: [PATCH 047/106] wip handlers --- DocGen4/DB/VersoDocString.lean | 473 +++++++++++++++++++++++++++++++-- Main.lean | 4 +- 2 files changed, 447 insertions(+), 30 deletions(-) diff --git a/DocGen4/DB/VersoDocString.lean b/DocGen4/DB/VersoDocString.lean index 0c0c55f5..f198f8d5 100644 --- a/DocGen4/DB/VersoDocString.lean +++ b/DocGen4/DB/VersoDocString.lean @@ -15,20 +15,18 @@ structure DocstringDataHandler where deserialize : Deserializer Dynamic structure DocstringValues where - inlines : NameMap DocstringDataHandler := {} - blocks : NameMap DocstringDataHandler := {} + handlers : NameMap DocstringDataHandler := {} + +private def toBinaryElab (vals : DocstringValues) (name : Name) (val : Dynamic) (b : ByteArray) : ByteArray := + match vals.handlers.get? name with + | none => b.push 0 |> ToBinary.serializer name + | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val def toBinaryElabInline (vals : DocstringValues) : Serializer ElabInline - | { name, val }, b => - match vals.inlines.get? name with - | none => b.push 0 |> ToBinary.serializer name - | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + | { name, val }, b => toBinaryElab vals name val b def toBinaryElabBlock (vals : DocstringValues) : Serializer ElabBlock - | { name, val }, b => - match vals.blocks.get? name with - | none => b.push 0 |> ToBinary.serializer name - | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + | { name, val }, b => toBinaryElab vals name val b structure Unknown where deriving BEq, Hashable, Ord, DecidableEq, Inhabited, TypeName @@ -36,33 +34,27 @@ deriving BEq, Hashable, Ord, DecidableEq, Inhabited, TypeName instance : Subsingleton Unknown where allEq := by intros; rfl -def fromBinaryElabInline (vals : DocstringValues) : Deserializer ElabInline := do +private def fromBinaryElab (vals : DocstringValues) (label : String) : Deserializer (Name × Dynamic) := do match (← Deserializer.byte) with | 0 => let name ← FromBinary.deserializer - pure { name := `unknown ++ name, val := .mk Unknown.mk } + pure (`unknown ++ name, .mk Unknown.mk) | 1 => let name ← FromBinary.deserializer - match vals.inlines.get? name with - | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } + match vals.handlers.get? name with + | none => pure (`unknown ++ name, .mk Unknown.mk) | some d => let val ← d.deserialize - pure { name, val } - | other => throw s!"Expected 0 or 1 for `ElabInline`'s tag, got `{other}`" + pure (name, val) + | other => throw s!"Expected 0 or 1 for `{label}`'s tag, got `{other}`" + +def fromBinaryElabInline (vals : DocstringValues) : Deserializer ElabInline := do + let (name, val) ← fromBinaryElab vals "ElabInline" + pure { name, val } def fromBinaryElabBlock (vals : DocstringValues) : Deserializer ElabBlock := do - match (← Deserializer.byte) with - | 0 => - let name ← FromBinary.deserializer - pure { name := `unknown ++ name, val := .mk Unknown.mk } - | 1 => - let name ← FromBinary.deserializer - match vals.blocks.get? name with - | none => pure { name := `unknown ++ name, val := .mk Unknown.mk } - | some d => - let val ← d.deserialize - pure { name, val } - | other => throw s!"Expected 0 or 1 for `ElabBlock`'s tag, got `{other}`" + let (name, val) ← fromBinaryElab vals "ElabBlock" + pure { name, val } partial instance [ToBinary i] : ToBinary (Doc.Inline i) where serializer := go @@ -205,3 +197,428 @@ def versoDocStringFromBinary (values : DocstringValues) : FromBinary VersoDocStr def versoDocStringQueryParam (values : DocstringValues) : SQLite.QueryParam VersoDocString := have := versoDocStringToBinary values .asBlob + +/-! ## Builtin Data.* handlers -/ + +section BuiltinHandlers +open Doc + +private def mkHandler (α : Type) [TypeName α] [ToBinary α] [FromBinary α] : Name × DocstringDataHandler := + (TypeName.typeName α, + { serialize := fun dyn b => + match dyn.get? α with + | some val => ToBinary.serializer val b + | none => b -- should not happen if names match + deserialize := Dynamic.mk <$> (FromBinary.deserializer : Deserializer α) }) + +partial instance : ToBinary Format where + serializer := go +where go + | .nil, b => b.push 0 + | .line, b => b.push 1 + | .align force, b => b.push 2 |> ToBinary.serializer force + | .text s, b => b.push 3 |> ToBinary.serializer s + | .nest n f, b => + have : ToBinary Format := ⟨go⟩ + b.push 4 |> ToBinary.serializer n |> ToBinary.serializer f + | .append f₁ f₂, b => + have : ToBinary Format := ⟨go⟩ + b.push 5 |> ToBinary.serializer f₁ |> ToBinary.serializer f₂ + | .group f behavior, b => + have : ToBinary Format := ⟨go⟩ + let behaviorTag : UInt8 := match behavior with | .allOrNone => 0 | .fill => 1 + b.push 6 |> ToBinary.serializer f |> (·.push behaviorTag) + | .tag n f, b => + have : ToBinary Format := ⟨go⟩ + b.push 7 |> ToBinary.serializer n |> ToBinary.serializer f + +partial instance : FromBinary Format where + deserializer := go +where go := do + have : FromBinary Format := ⟨go⟩ + match (← Deserializer.byte) with + | 0 => pure .nil + | 1 => pure .line + | 2 => .align <$> FromBinary.deserializer + | 3 => .text <$> FromBinary.deserializer + | 4 => .nest <$> FromBinary.deserializer <*> FromBinary.deserializer + | 5 => .append <$> FromBinary.deserializer <*> FromBinary.deserializer + | 6 => + let f ← go + let behaviorTag ← Deserializer.byte + let behavior := if behaviorTag == 1 then .fill else .allOrNone + pure (.group f behavior) + | 7 => .tag <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..7 for Format, got {other}" + +private instance : ToBinary FVarId where + serializer fv b := ToBinary.serializer fv.name b +private instance : FromBinary FVarId where + deserializer := FVarId.mk <$> FromBinary.deserializer + +private instance : ToBinary MVarId where + serializer mv b := ToBinary.serializer mv.name b +private instance : FromBinary MVarId where + deserializer := MVarId.mk <$> FromBinary.deserializer + +private instance : ToBinary LevelMVarId where + serializer lmv b := ToBinary.serializer lmv.name b +private instance : FromBinary LevelMVarId where + deserializer := LevelMVarId.mk <$> FromBinary.deserializer + +partial instance : ToBinary Level where + serializer := go +where go + | .zero, b => b.push 0 + | .succ l, b => + have : ToBinary Level := ⟨go⟩ + b.push 1 |> ToBinary.serializer l + | .max l₁ l₂, b => + have : ToBinary Level := ⟨go⟩ + b.push 2 |> ToBinary.serializer l₁ |> ToBinary.serializer l₂ + | .imax l₁ l₂, b => + have : ToBinary Level := ⟨go⟩ + b.push 3 |> ToBinary.serializer l₁ |> ToBinary.serializer l₂ + | .param name, b => b.push 4 |> ToBinary.serializer name + | .mvar id, b => b.push 5 |> ToBinary.serializer id + +partial instance : FromBinary Level where + deserializer := go +where go := do + have : FromBinary Level := ⟨go⟩ + match (← Deserializer.byte) with + | 0 => pure .zero + | 1 => .succ <$> FromBinary.deserializer + | 2 => .max <$> FromBinary.deserializer <*> FromBinary.deserializer + | 3 => .imax <$> FromBinary.deserializer <*> FromBinary.deserializer + | 4 => .param <$> FromBinary.deserializer + | 5 => .mvar <$> FromBinary.deserializer + | other => throw s!"Expected tag 0..5 for Level, got {other}" + +private instance : ToBinary Literal where + serializer + | .natVal n, b => b.push 0 |> ToBinary.serializer n + | .strVal s, b => b.push 1 |> ToBinary.serializer s +private instance : FromBinary Literal where + deserializer := do + match (← Deserializer.byte) with + | 0 => .natVal <$> FromBinary.deserializer + | 1 => .strVal <$> FromBinary.deserializer + | other => throw s!"Expected tag 0..1 for Literal, got {other}" + +private instance : ToBinary BinderInfo where + serializer + | .default, b => b.push 0 + | .implicit, b => b.push 1 + | .strictImplicit, b => b.push 2 + | .instImplicit, b => b.push 3 +private instance : FromBinary BinderInfo where + deserializer := do + match (← Deserializer.byte) with + | 0 => pure .default + | 1 => pure .implicit + | 2 => pure .strictImplicit + | 3 => pure .instImplicit + | other => throw s!"Expected tag 0..3 for BinderInfo, got {other}" + +-- SourceInfo: original → canonical synthetic (lossy), synthetic and none preserved +private instance : ToBinary SourceInfo where + serializer + | .original _leading pos _trailing endPos, b => + b.push 0 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx + | .synthetic pos endPos canonical, b => + b.push 1 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx |> ToBinary.serializer canonical + | .none, b => b.push 2 +private instance : FromBinary SourceInfo where + deserializer := do + match (← Deserializer.byte) with + | 0 => + let pos ← FromBinary.deserializer + let endPos ← FromBinary.deserializer + pure (.synthetic ⟨pos⟩ ⟨endPos⟩ true) + | 1 => + let pos ← FromBinary.deserializer + let endPos ← FromBinary.deserializer + let canonical ← FromBinary.deserializer + pure (.synthetic ⟨pos⟩ ⟨endPos⟩ canonical) + | 2 => pure .none + | other => throw s!"Expected tag 0..2 for SourceInfo, got {other}" + +private instance : ToBinary Substring.Raw where + serializer ss b := b |> ToBinary.serializer ss.str |> ToBinary.serializer ss.startPos.byteIdx |> ToBinary.serializer ss.stopPos.byteIdx +private instance : FromBinary Substring.Raw where + deserializer := do + let str ← FromBinary.deserializer + let startPos ← FromBinary.deserializer + let stopPos ← FromBinary.deserializer + pure ⟨str, ⟨startPos⟩, ⟨stopPos⟩⟩ + +private instance : ToBinary Syntax.Preresolved where + serializer + | .namespace name, b => b.push 0 |> ToBinary.serializer name + | .decl name fields, b => b.push 1 |> ToBinary.serializer name |> ToBinary.serializer fields +private instance : FromBinary Syntax.Preresolved where + deserializer := do + match (← Deserializer.byte) with + | 0 => .namespace <$> FromBinary.deserializer + | 1 => .decl <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..1 for Syntax.Preresolved, got {other}" + +partial instance : ToBinary Syntax where + serializer := go +where go + | .missing, b => b.push 0 + | .node info kind args, b => + have : ToBinary Syntax := ⟨go⟩ + b.push 1 |> ToBinary.serializer info |> ToBinary.serializer kind |> ToBinary.serializer args + | .atom info val, b => b.push 2 |> ToBinary.serializer info |> ToBinary.serializer val + | .ident info rawVal val preresolved, b => + b.push 3 |> ToBinary.serializer info |> ToBinary.serializer rawVal |> ToBinary.serializer val |> ToBinary.serializer preresolved + +partial instance : FromBinary Syntax where + deserializer := go +where go := do + have : FromBinary Syntax := ⟨go⟩ + match (← Deserializer.byte) with + | 0 => pure .missing + | 1 => .node <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 2 => .atom <$> FromBinary.deserializer <*> FromBinary.deserializer + | 3 => .ident <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..3 for Syntax, got {other}" + +private instance : ToBinary DataValue where + serializer + | .ofString s, b => b.push 0 |> ToBinary.serializer s + | .ofBool v, b => b.push 1 |> ToBinary.serializer v + | .ofName n, b => b.push 2 |> ToBinary.serializer n + | .ofNat n, b => b.push 3 |> ToBinary.serializer n + | .ofInt i, b => b.push 4 |> ToBinary.serializer i + | .ofSyntax stx, b => b.push 5 |> ToBinary.serializer stx +private instance : FromBinary DataValue where + deserializer := do + match (← Deserializer.byte) with + | 0 => .ofString <$> FromBinary.deserializer + | 1 => .ofBool <$> FromBinary.deserializer + | 2 => .ofName <$> FromBinary.deserializer + | 3 => .ofNat <$> FromBinary.deserializer + | 4 => .ofInt <$> FromBinary.deserializer + | 5 => .ofSyntax <$> FromBinary.deserializer + | other => throw s!"Expected tag 0..5 for DataValue, got {other}" + +private instance : ToBinary KVMap where + serializer kv b := ToBinary.serializer kv.entries b +private instance : FromBinary KVMap where + deserializer := KVMap.mk <$> FromBinary.deserializer + +partial instance : ToBinary Expr where + serializer := go +where go + | .bvar n, b => b.push 0 |> ToBinary.serializer n + | .fvar id, b => b.push 1 |> ToBinary.serializer id + | .mvar id, b => b.push 2 |> ToBinary.serializer id + | .sort l, b => b.push 3 |> ToBinary.serializer l + | .const name ls, b => b.push 4 |> ToBinary.serializer name |> ToBinary.serializer ls + | .app f a, b => + have : ToBinary Expr := ⟨go⟩ + b.push 5 |> ToBinary.serializer f |> ToBinary.serializer a + | .lam name ty body bi, b => + have : ToBinary Expr := ⟨go⟩ + b.push 6 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer body |> ToBinary.serializer bi + | .forallE name ty body bi, b => + have : ToBinary Expr := ⟨go⟩ + b.push 7 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer body |> ToBinary.serializer bi + | .letE name ty val body nonDep, b => + have : ToBinary Expr := ⟨go⟩ + b.push 8 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer val |> ToBinary.serializer body |> ToBinary.serializer nonDep + | .lit l, b => b.push 9 |> ToBinary.serializer l + | .mdata md e, b => + have : ToBinary Expr := ⟨go⟩ + b.push 10 |> ToBinary.serializer md |> ToBinary.serializer e + | .proj name idx e, b => + have : ToBinary Expr := ⟨go⟩ + b.push 11 |> ToBinary.serializer name |> ToBinary.serializer idx |> ToBinary.serializer e + +partial instance : FromBinary Expr where + deserializer := go +where go := do + have : FromBinary Expr := ⟨go⟩ + match (← Deserializer.byte) with + | 0 => .bvar <$> FromBinary.deserializer + | 1 => .fvar <$> FromBinary.deserializer + | 2 => .mvar <$> FromBinary.deserializer + | 3 => .sort <$> FromBinary.deserializer + | 4 => .const <$> FromBinary.deserializer <*> FromBinary.deserializer + | 5 => .app <$> FromBinary.deserializer <*> FromBinary.deserializer + | 6 => .lam <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 7 => .forallE <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 8 => .letE <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 9 => .lit <$> FromBinary.deserializer + | 10 => .mdata <$> FromBinary.deserializer <*> FromBinary.deserializer + | 11 => .proj <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..11 for Expr, got {other}" + +private instance : ToBinary LocalDeclKind where + serializer + | .default, b => b.push 0 + | .implDetail, b => b.push 1 + | .auxDecl, b => b.push 2 +private instance : FromBinary LocalDeclKind where + deserializer := do + match (← Deserializer.byte) with + | 0 => pure .default + | 1 => pure .implDetail + | 2 => pure .auxDecl + | other => throw s!"Expected tag 0..2 for LocalDeclKind, got {other}" + +private instance : ToBinary LocalDecl where + serializer + | .cdecl idx fvarId userName type bi kind, b => + b.push 0 |> ToBinary.serializer idx |> ToBinary.serializer fvarId |> ToBinary.serializer userName + |> ToBinary.serializer type |> ToBinary.serializer bi |> ToBinary.serializer kind + | .ldecl idx fvarId userName type val nonDep kind, b => + b.push 1 |> ToBinary.serializer idx |> ToBinary.serializer fvarId |> ToBinary.serializer userName + |> ToBinary.serializer type |> ToBinary.serializer val |> ToBinary.serializer nonDep |> ToBinary.serializer kind +private instance : FromBinary LocalDecl where + deserializer := do + match (← Deserializer.byte) with + | 0 => .cdecl <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 1 => .ldecl <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..1 for LocalDecl, got {other}" + +private instance : ToBinary LocalContext where + serializer lctx b := + let decls := lctx.decls.toArray + let auxDecls := lctx.auxDeclToFullName.toArray + b |> ToBinary.serializer decls |> ToBinary.serializer auxDecls +private instance : FromBinary LocalContext where + deserializer := do + let decls : Array (Option LocalDecl) ← FromBinary.deserializer + let auxDecls : Array (FVarId × Name) ← FromBinary.deserializer + let mut lctx := LocalContext.empty + for d? in decls do + match d? with + | some d => lctx := lctx.addDecl d + | none => lctx := { lctx with decls := lctx.decls.push none } + for (fv, name) in auxDecls do + lctx := { lctx with auxDeclToFullName := lctx.auxDeclToFullName.insert fv name } + pure lctx + +private instance : ToBinary DocHighlight where + serializer + | .const name sig, b => b.push 0 |> ToBinary.serializer name |> ToBinary.serializer sig + | .var userName fvarId type, b => b.push 1 |> ToBinary.serializer userName |> ToBinary.serializer fvarId |> ToBinary.serializer type + | .field name sig, b => b.push 2 |> ToBinary.serializer name |> ToBinary.serializer sig + | .option name declName, b => b.push 3 |> ToBinary.serializer name |> ToBinary.serializer declName + | .keyword, b => b.push 4 + | .literal kind type?, b => b.push 5 |> ToBinary.serializer kind |> ToBinary.serializer type? +private instance : FromBinary DocHighlight where + deserializer := do + match (← Deserializer.byte) with + | 0 => .const <$> FromBinary.deserializer <*> FromBinary.deserializer + | 1 => .var <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer + | 2 => .field <$> FromBinary.deserializer <*> FromBinary.deserializer + | 3 => .option <$> FromBinary.deserializer <*> FromBinary.deserializer + | 4 => pure .keyword + | 5 => .literal <$> FromBinary.deserializer <*> FromBinary.deserializer + | other => throw s!"Expected tag 0..5 for DocHighlight, got {other}" + +private instance : ToBinary DocCode where + serializer dc b := ToBinary.serializer dc.code b +private instance : FromBinary DocCode where + deserializer := DocCode.mk <$> FromBinary.deserializer + +-- Simple single-Name types +private instance : ToBinary Data.Const where + serializer d b := ToBinary.serializer d.name b +private instance : FromBinary Data.Const where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.Tactic where + serializer d b := ToBinary.serializer d.name b +private instance : FromBinary Data.Tactic where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.ConvTactic where + serializer d b := ToBinary.serializer d.name b +private instance : FromBinary Data.ConvTactic where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.SyntaxCat where + serializer d b := ToBinary.serializer d.name b +private instance : FromBinary Data.SyntaxCat where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.ModuleName where + serializer d b := ToBinary.serializer d.module b +private instance : FromBinary Data.ModuleName where + deserializer := .mk <$> FromBinary.deserializer + +-- Two-Name type +private instance : ToBinary Data.Option where + serializer d b := b |> ToBinary.serializer d.name |> ToBinary.serializer d.declName +private instance : FromBinary Data.Option where + deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer + +-- Syntax types +private instance : ToBinary Data.Attributes where + serializer d b := ToBinary.serializer d.stx b +private instance : FromBinary Data.Attributes where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.Attribute where + serializer d b := ToBinary.serializer d.stx b +private instance : FromBinary Data.Attribute where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.Syntax where + serializer d b := b |> ToBinary.serializer d.category |> ToBinary.serializer d.stx +private instance : FromBinary Data.Syntax where + deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer + +private instance : ToBinary Data.Local where + serializer d b := b |> ToBinary.serializer d.name |> ToBinary.serializer d.fvarId + |> ToBinary.serializer d.lctx |> ToBinary.serializer d.type +private instance : FromBinary Data.Local where + deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer + <*> FromBinary.deserializer <*> FromBinary.deserializer + +-- DocCode types +private instance : ToBinary Data.LeanBlock where + serializer d b := ToBinary.serializer d.commands b +private instance : FromBinary Data.LeanBlock where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.LeanTerm where + serializer d b := ToBinary.serializer d.term b +private instance : FromBinary Data.LeanTerm where + deserializer := .mk <$> FromBinary.deserializer + +private instance : ToBinary Data.SetOption where + serializer d b := ToBinary.serializer d.term b +private instance : FromBinary Data.SetOption where + deserializer := .mk <$> FromBinary.deserializer + +def builtinDocstringValues : DocstringValues where + handlers := Id.run do + let mut m : NameMap DocstringDataHandler := {} + for (n, h) in [ + mkHandler Data.Const, + mkHandler Data.Local, + mkHandler Data.Tactic, + mkHandler Data.ConvTactic, + mkHandler Data.Attributes, + mkHandler Data.Attribute, + mkHandler Data.Option, + mkHandler Data.SyntaxCat, + mkHandler Data.Syntax, + mkHandler Data.ModuleName, + mkHandler Data.LeanBlock, + mkHandler Data.LeanTerm, + mkHandler Data.SetOption + ] do + m := m.insert n h + m + +end BuiltinHandlers diff --git a/Main.lean b/Main.lean index 3c6bd73e..03a142c0 100644 --- a/Main.lean +++ b/Main.lean @@ -19,7 +19,7 @@ def runSingleCmd (p : Parsed) : IO UInt32 := do let relevantModules := #[p.positionalArg! "module" |>.as! String |> String.toName] let sourceUri := p.positionalArg! "sourceUri" |>.as! String let doc ← load <| .analyzeConcreteModules relevantModules - updateModuleDb {} doc buildDir dbFile (some sourceUri) + updateModuleDb builtinDocstringValues doc buildDir dbFile (some sourceUri) return 0 def runGenCoreCmd (p : Parsed) : IO UInt32 := do @@ -29,7 +29,7 @@ def runGenCoreCmd (p : Parsed) : IO UInt32 := do let dbFile := p.positionalArg! "db" |>.as! String let module := p.positionalArg! "module" |>.as! String |> String.toName let doc ← load <| .analyzePrefixModules module - updateModuleDb {} doc buildDir dbFile none + updateModuleDb builtinDocstringValues doc buildDir dbFile none return 0 def runDocGenCmd (_p : Parsed) : IO UInt32 := do From bd98baed0c54b0a91ecad86123fdaeb146a8e11e Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 15:41:25 +0100 Subject: [PATCH 048/106] chore: use upstream instances --- DocGen4/DB/VersoDocString.lean | 414 ++++----------------------------- lake-manifest.json | 2 +- 2 files changed, 44 insertions(+), 372 deletions(-) diff --git a/DocGen4/DB/VersoDocString.lean b/DocGen4/DB/VersoDocString.lean index f198f8d5..4bf817c2 100644 --- a/DocGen4/DB/VersoDocString.lean +++ b/DocGen4/DB/VersoDocString.lean @@ -211,401 +211,73 @@ private def mkHandler (α : Type) [TypeName α] [ToBinary α] [FromBinary α] : | none => b -- should not happen if names match deserialize := Dynamic.mk <$> (FromBinary.deserializer : Deserializer α) }) -partial instance : ToBinary Format where - serializer := go -where go - | .nil, b => b.push 0 - | .line, b => b.push 1 - | .align force, b => b.push 2 |> ToBinary.serializer force - | .text s, b => b.push 3 |> ToBinary.serializer s - | .nest n f, b => - have : ToBinary Format := ⟨go⟩ - b.push 4 |> ToBinary.serializer n |> ToBinary.serializer f - | .append f₁ f₂, b => - have : ToBinary Format := ⟨go⟩ - b.push 5 |> ToBinary.serializer f₁ |> ToBinary.serializer f₂ - | .group f behavior, b => - have : ToBinary Format := ⟨go⟩ - let behaviorTag : UInt8 := match behavior with | .allOrNone => 0 | .fill => 1 - b.push 6 |> ToBinary.serializer f |> (·.push behaviorTag) - | .tag n f, b => - have : ToBinary Format := ⟨go⟩ - b.push 7 |> ToBinary.serializer n |> ToBinary.serializer f - -partial instance : FromBinary Format where - deserializer := go -where go := do - have : FromBinary Format := ⟨go⟩ - match (← Deserializer.byte) with - | 0 => pure .nil - | 1 => pure .line - | 2 => .align <$> FromBinary.deserializer - | 3 => .text <$> FromBinary.deserializer - | 4 => .nest <$> FromBinary.deserializer <*> FromBinary.deserializer - | 5 => .append <$> FromBinary.deserializer <*> FromBinary.deserializer - | 6 => - let f ← go - let behaviorTag ← Deserializer.byte - let behavior := if behaviorTag == 1 then .fill else .allOrNone - pure (.group f behavior) - | 7 => .tag <$> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..7 for Format, got {other}" - -private instance : ToBinary FVarId where - serializer fv b := ToBinary.serializer fv.name b -private instance : FromBinary FVarId where - deserializer := FVarId.mk <$> FromBinary.deserializer - -private instance : ToBinary MVarId where - serializer mv b := ToBinary.serializer mv.name b -private instance : FromBinary MVarId where - deserializer := MVarId.mk <$> FromBinary.deserializer - -private instance : ToBinary LevelMVarId where - serializer lmv b := ToBinary.serializer lmv.name b -private instance : FromBinary LevelMVarId where - deserializer := LevelMVarId.mk <$> FromBinary.deserializer - -partial instance : ToBinary Level where - serializer := go -where go - | .zero, b => b.push 0 - | .succ l, b => - have : ToBinary Level := ⟨go⟩ - b.push 1 |> ToBinary.serializer l - | .max l₁ l₂, b => - have : ToBinary Level := ⟨go⟩ - b.push 2 |> ToBinary.serializer l₁ |> ToBinary.serializer l₂ - | .imax l₁ l₂, b => - have : ToBinary Level := ⟨go⟩ - b.push 3 |> ToBinary.serializer l₁ |> ToBinary.serializer l₂ - | .param name, b => b.push 4 |> ToBinary.serializer name - | .mvar id, b => b.push 5 |> ToBinary.serializer id - -partial instance : FromBinary Level where - deserializer := go -where go := do - have : FromBinary Level := ⟨go⟩ - match (← Deserializer.byte) with - | 0 => pure .zero - | 1 => .succ <$> FromBinary.deserializer - | 2 => .max <$> FromBinary.deserializer <*> FromBinary.deserializer - | 3 => .imax <$> FromBinary.deserializer <*> FromBinary.deserializer - | 4 => .param <$> FromBinary.deserializer - | 5 => .mvar <$> FromBinary.deserializer - | other => throw s!"Expected tag 0..5 for Level, got {other}" - -private instance : ToBinary Literal where - serializer - | .natVal n, b => b.push 0 |> ToBinary.serializer n - | .strVal s, b => b.push 1 |> ToBinary.serializer s -private instance : FromBinary Literal where - deserializer := do - match (← Deserializer.byte) with - | 0 => .natVal <$> FromBinary.deserializer - | 1 => .strVal <$> FromBinary.deserializer - | other => throw s!"Expected tag 0..1 for Literal, got {other}" - -private instance : ToBinary BinderInfo where - serializer - | .default, b => b.push 0 - | .implicit, b => b.push 1 - | .strictImplicit, b => b.push 2 - | .instImplicit, b => b.push 3 -private instance : FromBinary BinderInfo where - deserializer := do - match (← Deserializer.byte) with - | 0 => pure .default - | 1 => pure .implicit - | 2 => pure .strictImplicit - | 3 => pure .instImplicit - | other => throw s!"Expected tag 0..3 for BinderInfo, got {other}" - -- SourceInfo: original → canonical synthetic (lossy), synthetic and none preserved private instance : ToBinary SourceInfo where serializer | .original _leading pos _trailing endPos, b => - b.push 0 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx + b.push 0 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx |> ToBinary.serializer true | .synthetic pos endPos canonical, b => - b.push 1 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx |> ToBinary.serializer canonical - | .none, b => b.push 2 + b.push 0 |> ToBinary.serializer pos.byteIdx |> ToBinary.serializer endPos.byteIdx |> ToBinary.serializer canonical + | .none, b => b.push 1 private instance : FromBinary SourceInfo where deserializer := do match (← Deserializer.byte) with | 0 => - let pos ← FromBinary.deserializer - let endPos ← FromBinary.deserializer - pure (.synthetic ⟨pos⟩ ⟨endPos⟩ true) - | 1 => let pos ← FromBinary.deserializer let endPos ← FromBinary.deserializer let canonical ← FromBinary.deserializer pure (.synthetic ⟨pos⟩ ⟨endPos⟩ canonical) - | 2 => pure .none - | other => throw s!"Expected tag 0..2 for SourceInfo, got {other}" + | 1 => pure .none + | other => throw s!"Expected tag 0 or 1 for SourceInfo, got {other}" -private instance : ToBinary Substring.Raw where - serializer ss b := b |> ToBinary.serializer ss.str |> ToBinary.serializer ss.startPos.byteIdx |> ToBinary.serializer ss.stopPos.byteIdx -private instance : FromBinary Substring.Raw where - deserializer := do - let str ← FromBinary.deserializer - let startPos ← FromBinary.deserializer - let stopPos ← FromBinary.deserializer - pure ⟨str, ⟨startPos⟩, ⟨stopPos⟩⟩ +private instance : ToBinary Substring.Raw := .via (Substring.Raw.toString) -private instance : ToBinary Syntax.Preresolved where - serializer - | .namespace name, b => b.push 0 |> ToBinary.serializer name - | .decl name fields, b => b.push 1 |> ToBinary.serializer name |> ToBinary.serializer fields -private instance : FromBinary Syntax.Preresolved where - deserializer := do - match (← Deserializer.byte) with - | 0 => .namespace <$> FromBinary.deserializer - | 1 => .decl <$> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..1 for Syntax.Preresolved, got {other}" +private instance : FromBinary Substring.Raw := + .via fun (s : String) => { str := s, startPos := s.rawStartPos, stopPos := s.rawEndPos : Substring.Raw} -partial instance : ToBinary Syntax where - serializer := go -where go - | .missing, b => b.push 0 - | .node info kind args, b => - have : ToBinary Syntax := ⟨go⟩ - b.push 1 |> ToBinary.serializer info |> ToBinary.serializer kind |> ToBinary.serializer args - | .atom info val, b => b.push 2 |> ToBinary.serializer info |> ToBinary.serializer val - | .ident info rawVal val preresolved, b => - b.push 3 |> ToBinary.serializer info |> ToBinary.serializer rawVal |> ToBinary.serializer val |> ToBinary.serializer preresolved - -partial instance : FromBinary Syntax where - deserializer := go -where go := do - have : FromBinary Syntax := ⟨go⟩ - match (← Deserializer.byte) with - | 0 => pure .missing - | 1 => .node <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 2 => .atom <$> FromBinary.deserializer <*> FromBinary.deserializer - | 3 => .ident <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..3 for Syntax, got {other}" +deriving instance ToBinary, FromBinary for Syntax.Preresolved -private instance : ToBinary DataValue where - serializer - | .ofString s, b => b.push 0 |> ToBinary.serializer s - | .ofBool v, b => b.push 1 |> ToBinary.serializer v - | .ofName n, b => b.push 2 |> ToBinary.serializer n - | .ofNat n, b => b.push 3 |> ToBinary.serializer n - | .ofInt i, b => b.push 4 |> ToBinary.serializer i - | .ofSyntax stx, b => b.push 5 |> ToBinary.serializer stx -private instance : FromBinary DataValue where - deserializer := do - match (← Deserializer.byte) with - | 0 => .ofString <$> FromBinary.deserializer - | 1 => .ofBool <$> FromBinary.deserializer - | 2 => .ofName <$> FromBinary.deserializer - | 3 => .ofNat <$> FromBinary.deserializer - | 4 => .ofInt <$> FromBinary.deserializer - | 5 => .ofSyntax <$> FromBinary.deserializer - | other => throw s!"Expected tag 0..5 for DataValue, got {other}" - -private instance : ToBinary KVMap where - serializer kv b := ToBinary.serializer kv.entries b -private instance : FromBinary KVMap where - deserializer := KVMap.mk <$> FromBinary.deserializer - -partial instance : ToBinary Expr where - serializer := go -where go - | .bvar n, b => b.push 0 |> ToBinary.serializer n - | .fvar id, b => b.push 1 |> ToBinary.serializer id - | .mvar id, b => b.push 2 |> ToBinary.serializer id - | .sort l, b => b.push 3 |> ToBinary.serializer l - | .const name ls, b => b.push 4 |> ToBinary.serializer name |> ToBinary.serializer ls - | .app f a, b => - have : ToBinary Expr := ⟨go⟩ - b.push 5 |> ToBinary.serializer f |> ToBinary.serializer a - | .lam name ty body bi, b => - have : ToBinary Expr := ⟨go⟩ - b.push 6 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer body |> ToBinary.serializer bi - | .forallE name ty body bi, b => - have : ToBinary Expr := ⟨go⟩ - b.push 7 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer body |> ToBinary.serializer bi - | .letE name ty val body nonDep, b => - have : ToBinary Expr := ⟨go⟩ - b.push 8 |> ToBinary.serializer name |> ToBinary.serializer ty |> ToBinary.serializer val |> ToBinary.serializer body |> ToBinary.serializer nonDep - | .lit l, b => b.push 9 |> ToBinary.serializer l - | .mdata md e, b => - have : ToBinary Expr := ⟨go⟩ - b.push 10 |> ToBinary.serializer md |> ToBinary.serializer e - | .proj name idx e, b => - have : ToBinary Expr := ⟨go⟩ - b.push 11 |> ToBinary.serializer name |> ToBinary.serializer idx |> ToBinary.serializer e - -partial instance : FromBinary Expr where - deserializer := go -where go := do - have : FromBinary Expr := ⟨go⟩ - match (← Deserializer.byte) with - | 0 => .bvar <$> FromBinary.deserializer - | 1 => .fvar <$> FromBinary.deserializer - | 2 => .mvar <$> FromBinary.deserializer - | 3 => .sort <$> FromBinary.deserializer - | 4 => .const <$> FromBinary.deserializer <*> FromBinary.deserializer - | 5 => .app <$> FromBinary.deserializer <*> FromBinary.deserializer - | 6 => .lam <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 7 => .forallE <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 8 => .letE <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 9 => .lit <$> FromBinary.deserializer - | 10 => .mdata <$> FromBinary.deserializer <*> FromBinary.deserializer - | 11 => .proj <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..11 for Expr, got {other}" - -private instance : ToBinary LocalDeclKind where - serializer - | .default, b => b.push 0 - | .implDetail, b => b.push 1 - | .auxDecl, b => b.push 2 -private instance : FromBinary LocalDeclKind where - deserializer := do - match (← Deserializer.byte) with - | 0 => pure .default - | 1 => pure .implDetail - | 2 => pure .auxDecl - | other => throw s!"Expected tag 0..2 for LocalDeclKind, got {other}" +deriving instance ToBinary, FromBinary for Syntax -private instance : ToBinary LocalDecl where - serializer - | .cdecl idx fvarId userName type bi kind, b => - b.push 0 |> ToBinary.serializer idx |> ToBinary.serializer fvarId |> ToBinary.serializer userName - |> ToBinary.serializer type |> ToBinary.serializer bi |> ToBinary.serializer kind - | .ldecl idx fvarId userName type val nonDep kind, b => - b.push 1 |> ToBinary.serializer idx |> ToBinary.serializer fvarId |> ToBinary.serializer userName - |> ToBinary.serializer type |> ToBinary.serializer val |> ToBinary.serializer nonDep |> ToBinary.serializer kind -private instance : FromBinary LocalDecl where - deserializer := do - match (← Deserializer.byte) with - | 0 => .cdecl <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 1 => .ldecl <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..1 for LocalDecl, got {other}" - -private instance : ToBinary LocalContext where - serializer lctx b := - let decls := lctx.decls.toArray - let auxDecls := lctx.auxDeclToFullName.toArray - b |> ToBinary.serializer decls |> ToBinary.serializer auxDecls -private instance : FromBinary LocalContext where - deserializer := do - let decls : Array (Option LocalDecl) ← FromBinary.deserializer - let auxDecls : Array (FVarId × Name) ← FromBinary.deserializer - let mut lctx := LocalContext.empty - for d? in decls do - match d? with - | some d => lctx := lctx.addDecl d - | none => lctx := { lctx with decls := lctx.decls.push none } - for (fv, name) in auxDecls do - lctx := { lctx with auxDeclToFullName := lctx.auxDeclToFullName.insert fv name } - pure lctx - -private instance : ToBinary DocHighlight where - serializer - | .const name sig, b => b.push 0 |> ToBinary.serializer name |> ToBinary.serializer sig - | .var userName fvarId type, b => b.push 1 |> ToBinary.serializer userName |> ToBinary.serializer fvarId |> ToBinary.serializer type - | .field name sig, b => b.push 2 |> ToBinary.serializer name |> ToBinary.serializer sig - | .option name declName, b => b.push 3 |> ToBinary.serializer name |> ToBinary.serializer declName - | .keyword, b => b.push 4 - | .literal kind type?, b => b.push 5 |> ToBinary.serializer kind |> ToBinary.serializer type? -private instance : FromBinary DocHighlight where - deserializer := do - match (← Deserializer.byte) with - | 0 => .const <$> FromBinary.deserializer <*> FromBinary.deserializer - | 1 => .var <$> FromBinary.deserializer <*> FromBinary.deserializer <*> FromBinary.deserializer - | 2 => .field <$> FromBinary.deserializer <*> FromBinary.deserializer - | 3 => .option <$> FromBinary.deserializer <*> FromBinary.deserializer - | 4 => pure .keyword - | 5 => .literal <$> FromBinary.deserializer <*> FromBinary.deserializer - | other => throw s!"Expected tag 0..5 for DocHighlight, got {other}" - -private instance : ToBinary DocCode where - serializer dc b := ToBinary.serializer dc.code b -private instance : FromBinary DocCode where - deserializer := DocCode.mk <$> FromBinary.deserializer - --- Simple single-Name types -private instance : ToBinary Data.Const where - serializer d b := ToBinary.serializer d.name b -private instance : FromBinary Data.Const where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.Tactic where - serializer d b := ToBinary.serializer d.name b -private instance : FromBinary Data.Tactic where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.ConvTactic where - serializer d b := ToBinary.serializer d.name b -private instance : FromBinary Data.ConvTactic where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.SyntaxCat where - serializer d b := ToBinary.serializer d.name b -private instance : FromBinary Data.SyntaxCat where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.ModuleName where - serializer d b := ToBinary.serializer d.module b -private instance : FromBinary Data.ModuleName where - deserializer := .mk <$> FromBinary.deserializer - --- Two-Name type -private instance : ToBinary Data.Option where - serializer d b := b |> ToBinary.serializer d.name |> ToBinary.serializer d.declName -private instance : FromBinary Data.Option where - deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer - --- Syntax types -private instance : ToBinary Data.Attributes where - serializer d b := ToBinary.serializer d.stx b -private instance : FromBinary Data.Attributes where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.Attribute where - serializer d b := ToBinary.serializer d.stx b -private instance : FromBinary Data.Attribute where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.Syntax where - serializer d b := b |> ToBinary.serializer d.category |> ToBinary.serializer d.stx -private instance : FromBinary Data.Syntax where - deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer - -private instance : ToBinary Data.Local where - serializer d b := b |> ToBinary.serializer d.name |> ToBinary.serializer d.fvarId - |> ToBinary.serializer d.lctx |> ToBinary.serializer d.type -private instance : FromBinary Data.Local where - deserializer := .mk <$> FromBinary.deserializer <*> FromBinary.deserializer - <*> FromBinary.deserializer <*> FromBinary.deserializer - --- DocCode types -private instance : ToBinary Data.LeanBlock where - serializer d b := ToBinary.serializer d.commands b -private instance : FromBinary Data.LeanBlock where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.LeanTerm where - serializer d b := ToBinary.serializer d.term b -private instance : FromBinary Data.LeanTerm where - deserializer := .mk <$> FromBinary.deserializer - -private instance : ToBinary Data.SetOption where - serializer d b := ToBinary.serializer d.term b -private instance : FromBinary Data.SetOption where - deserializer := .mk <$> FromBinary.deserializer +deriving instance ToBinary, FromBinary for DataValue + +deriving instance ToBinary, FromBinary for KVMap + +deriving instance ToBinary, FromBinary for Expr + +deriving instance ToBinary, FromBinary for DocHighlight + +deriving instance ToBinary, FromBinary for DocCode + +deriving instance ToBinary, FromBinary for Data.Const + +deriving instance ToBinary, FromBinary for Data.Tactic + +deriving instance ToBinary, FromBinary for Data.ConvTactic + +deriving instance ToBinary, FromBinary for Data.SyntaxCat + +deriving instance ToBinary, FromBinary for Data.ModuleName + +deriving instance ToBinary, FromBinary for Data.Option + +deriving instance ToBinary, FromBinary for Data.Attributes + +deriving instance ToBinary, FromBinary for Data.Attribute + +deriving instance ToBinary, FromBinary for Data.Syntax + +deriving instance ToBinary, FromBinary for Data.LeanBlock + +deriving instance ToBinary, FromBinary for Data.LeanTerm + +deriving instance ToBinary, FromBinary for Data.SetOption def builtinDocstringValues : DocstringValues where handlers := Id.run do let mut m : NameMap DocstringDataHandler := {} for (n, h) in [ mkHandler Data.Const, - mkHandler Data.Local, mkHandler Data.Tactic, mkHandler Data.ConvTactic, mkHandler Data.Attributes, diff --git a/lake-manifest.json b/lake-manifest.json index e01cad0d..1ea389c2 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -15,7 +15,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "5f76f5af0df8b908a91083be81027d03fb96c7fc", + "rev": "2765af4d645eb7a482f6555ac4e5cf9fb69e1ab1", "name": "leansqlite", "manifestFile": "lake-manifest.json", "inputRev": "main", From d13ca31237976e0195b86c6e4d68dd45d77b6db9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 20:24:53 +0100 Subject: [PATCH 049/106] fix: actually chunk contents --- DocGen4/Helpers.lean | 8 ++++---- DocGen4/Output.lean | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/DocGen4/Helpers.lean b/DocGen4/Helpers.lean index 1daa3aae..97496b5b 100644 --- a/DocGen4/Helpers.lean +++ b/DocGen4/Helpers.lean @@ -43,10 +43,10 @@ instance [Pure m] : Iterator (ChunkArray α) m (Subarray α) where instance [Pure m] [Monad n] : IteratorLoop (ChunkArray α) (β := Subarray α) m n := IteratorLoop.defaultImplementation -def chunksOf (xs : Array α) (size : Nat) (_ok : size > 0 := by grind) : Array (Array α) := Id.run do +def chunksOf (xs : Array α) (chunkSize : Nat) (_ok : chunkSize > 0 := by grind) : Array (Array α) := Id.run do let mut out := #[] let mut n := 0 - while n < out.size do - out := out.push <| xs.extract n (n + size) - n := n + size + while n < xs.size do + out := out.push <| xs.extract n (n + chunkSize) + n := n + chunkSize return out diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 80f9e267..b2185f4e 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -104,7 +104,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| declarationsBasePath baseConfig.buildDir -- Spawn one task per 100 modules, each returning its output file path - let tasks ← (chunksOf targetModules 100).flatMapM fun mods => mods.mapM fun modName => IO.asTask do + let tasks ← (chunksOf targetModules 100).mapM fun mods => IO.asTask do mods.mapM fun modName => do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← DB.openForReading dbPath let module ← db.loadModule modName @@ -157,8 +157,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi -- Wait for all tasks and collect output paths let mut outputs := #[] for task in tasks do - match ← IO.wait task with - | .ok path => outputs := outputs.push path + match (← IO.wait task) with + | .ok paths => outputs := outputs ++ paths | .error e => throw e return outputs From b645b4df911402c016b2504902e7971bb8939d35 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 21:03:55 +0100 Subject: [PATCH 050/106] fix: serialization --- DocGen4/DB.lean | 2 +- DocGen4/DB/VersoDocString.lean | 9 +++++++-- DocGen4/Output.lean | 2 +- Main.lean | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 2027c463..21d7b3f9 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -349,7 +349,7 @@ def withSQLite (f : SQLite → DBM α) : DBM α := do f (← read).db private def readonlyError : IO α := throw (IO.userError "DB opened for reading only") -def openForReading (dbFile : System.FilePath) (values : DocstringValues := {}) : IO DB := do +def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB := do let sqlite ← SQLite.openWith dbFile .readonly sqlite.exec "PRAGMA busy_timeout = 86400000" let readOps ← mkReadOps sqlite values diff --git a/DocGen4/DB/VersoDocString.lean b/DocGen4/DB/VersoDocString.lean index 4bf817c2..f81fe1f2 100644 --- a/DocGen4/DB/VersoDocString.lean +++ b/DocGen4/DB/VersoDocString.lean @@ -20,7 +20,9 @@ structure DocstringValues where private def toBinaryElab (vals : DocstringValues) (name : Name) (val : Dynamic) (b : ByteArray) : ByteArray := match vals.handlers.get? name with | none => b.push 0 |> ToBinary.serializer name - | some s => b.push 1 |> ToBinary.serializer name |> s.serialize val + | some s => + let payload := s.serialize val .empty + b.push 1 |> ToBinary.serializer name |> ToBinary.serializer payload.size |> (· ++ payload) def toBinaryElabInline (vals : DocstringValues) : Serializer ElabInline | { name, val }, b => toBinaryElab vals name val b @@ -41,8 +43,11 @@ private def fromBinaryElab (vals : DocstringValues) (label : String) : Deseriali pure (`unknown ++ name, .mk Unknown.mk) | 1 => let name ← FromBinary.deserializer + let len : Nat ← FromBinary.deserializer match vals.handlers.get? name with - | none => pure (`unknown ++ name, .mk Unknown.mk) + | none => + let _ ← Deserializer.nbytes len + pure (`unknown ++ name, .mk Unknown.mk) | some d => let val ← d.deserialize pure (name, val) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index b2185f4e..f2a6ea36 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -106,7 +106,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi -- Spawn one task per 100 modules, each returning its output file path let tasks ← (chunksOf targetModules 100).mapM fun mods => IO.asTask do mods.mapM fun modName => do -- Each task opens its own DB connection (SQLite handles concurrent readers well) - let db ← DB.openForReading dbPath + let db ← DB.openForReading dbPath builtinDocstringValues let module ← db.loadModule modName -- Build a minimal AnalyzerResult with just this module's info diff --git a/Main.lean b/Main.lean index 03a142c0..e3855309 100644 --- a/Main.lean +++ b/Main.lean @@ -72,7 +72,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let moduleRoots := (p.variableArgsAs! String).map String.toName -- Load linking context (module names, source URLs, declaration locations) - let db ← DB.openForReading dbPath + let db ← DB.openForReading dbPath builtinDocstringValues let linkCtx ← db.loadLinkingContext -- Determine which modules to generate HTML for From 5e9aed2a5d1cbaeb1ef9eeec683bf803232193a1 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 21:17:28 +0100 Subject: [PATCH 051/106] chore: cleanups --- DocGen4/DB/Read.lean | 14 +++++++++----- DocGen4/DB/Schema.lean | 16 ++++------------ DocGen4/Helpers.lean | 3 --- DocGen4/Output/Base.lean | 21 --------------------- DocGen4/Output/DocString.lean | 11 ++--------- DocGen4/Process/Base.lean | 7 +++++++ DocGen4/Process/DocInfo.lean | 9 +-------- DocGen4/RenderedCode.lean | 1 - 8 files changed, 23 insertions(+), 59 deletions(-) diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index a784a87f..53795313 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -338,7 +338,9 @@ private def ReadStmts.loadDocInfo (s : ReadStmts) (moduleName : String) (positio | "class" => readStructure .classInfo info | "class inductive" => readClassInductive info | "constructor" => return some <| .ctorInfo info - | _ => return none + | other => + IO.eprintln s!"warning: unknown declaration kind '{other}' for '{name}' in module '{moduleName}'; skipping" + return none where readAxiom (info : Process.Info) : IO (Option Process.DocInfo) := do s.readAxiomStmt.bind 1 moduleName @@ -498,16 +500,18 @@ private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Proces let typeBlob ← s.loadModuleStmt.columnBlob 3 let sorried := (← s.loadModuleStmt.columnInt64 4) != 0 let render := (← s.loadModuleStmt.columnInt64 5) != 0 - if let some docInfo ← s.loadDocInfo modNameStr position kind name typeBlob sorried render then - members := members.push (position, .docInfo docInfo) + match (← s.loadDocInfo modNameStr position kind name typeBlob sorried render) with + | some docInfo => members := members.push (position, .docInfo docInfo) + | none => IO.eprintln s!"warning: failed to load declaration '{name}' (kind '{kind}') at position {position} in module '{modNameStr}'; skipping" done s.loadModuleStmt s.loadModuleDocsStmt.bind 1 modNameStr s.loadModuleDocsStmt.bind 2 modNameStr while (← s.loadModuleDocsStmt.step) do let position ← s.loadModuleDocsStmt.columnInt64 0 let doc ← s.loadModuleDocsStmt.columnText 1 - if let some declRange ← s.loadDeclarationRange modNameStr position then - members := members.push (position, .modDoc { doc, declarationRange := declRange }) + match (← s.loadDeclarationRange modNameStr position) with + | some declRange => members := members.push (position, .modDoc { doc, declarationRange := declRange }) + | none => IO.eprintln s!"warning: missing declaration range for module docstring at position {position} in module '{modNameStr}'; skipping" done s.loadModuleDocsStmt let sortedMembers := members.qsort fun (pos1, m1) (pos2, m2) => let r1 := m1.getDeclarationRange.pos diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index 19a0dd0d..faffc2df 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -113,14 +113,6 @@ CREATE TABLE IF NOT EXISTS module_imports ( -- Index for reverse queries: "what imports this module?" CREATE INDEX IF NOT EXISTS idx_module_imports_imported ON module_imports(imported); -CREATE TABLE IF NOT EXISTS module_items ( - module_name TEXT NOT NULL, - position INTEGER NOT NULL, - item_type TEXT NOT NULL, - PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE -); - CREATE TABLE IF NOT EXISTS declaration_ranges ( module_name TEXT NOT NULL, position INTEGER NOT NULL, @@ -155,7 +147,7 @@ CREATE TABLE IF NOT EXISTS name_info ( position INTEGER NOT NULL, kind TEXT, name TEXT NOT NULL, - type TEXT NOT NULL, + type BLOB NOT NULL, sorried INTEGER NOT NULL, render INTEGER NOT NULL, PRIMARY KEY (module_name, position), @@ -186,7 +178,7 @@ CREATE TABLE IF NOT EXISTS constructors ( position INTEGER NOT NULL, type_position INTEGER NOT NULL, PRIMARY KEY (module_name, position), - FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE + FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE, FOREIGN KEY (module_name, type_position) REFERENCES name_info(module_name, position) ON DELETE CASCADE ); @@ -230,7 +222,7 @@ CREATE TABLE IF NOT EXISTS definitions ( CREATE TABLE IF NOT EXISTS definition_equations ( module_name TEXT NOT NULL, position INTEGER NOT NULL, - code TEXT NOT NULL, + code BLOB NOT NULL, sequence INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE @@ -266,7 +258,7 @@ CREATE TABLE IF NOT EXISTS structure_parents ( position INTEGER NOT NULL, sequence INTEGER NOT NULL, projection_fn TEXT NOT NULL, - type TEXT NOT NULL, + type BLOB NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES structures(module_name, position) ON DELETE CASCADE ); diff --git a/DocGen4/Helpers.lean b/DocGen4/Helpers.lean index 97496b5b..52a0c1f3 100644 --- a/DocGen4/Helpers.lean +++ b/DocGen4/Helpers.lean @@ -11,9 +11,6 @@ structure ChunkArray α where chunkSize_gt_zero : chunkSize > 0 := by grind curr_valid : curr ≤ array.size := by grind -def chunkedM {m : Type u → Type v} (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := - IterM.mk (ChunkArray.mk xs n 0) m (Subarray α) - def chunked (xs : Array α) (n : Nat) (ok : n > 0 := by grind) := IterM.mk (ChunkArray.mk xs n 0) Id (Subarray α) diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index c82c5326..fc6501ca 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -259,27 +259,6 @@ def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Html := do [breakWithin name.toString] -/-- -In Lean syntax declarations the following pattern is quite common: -``` -syntax term " + " term : term -``` -that is, we place spaces around the operator in the middle. When the -`InfoTree` framework provides us with information about what source token -corresponds to which identifier it will thus say that `" + "` corresponds to -`HAdd.hadd`. This is however not the way we want this to be linked, in the HTML -only `+` should be linked, taking care of this is what this function is -responsible for. --/ -def splitWhitespaces (s : String) : String × String × String := - let length := s.length - let s := s.trimAsciiStart - let front := "".pushn ' ' (length - s.positions.count) - let length := s.positions.count - let s := s.trimAsciiEnd.copy - let back := "".pushn ' ' (length - s.length) - (front, s, back) - /-- For a name, try to find a linkable target by stripping suffix components that are numeric or start with `_`. Returns the first name found in name2ModIdx, diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index 3877d09f..f757d2df 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -366,7 +366,8 @@ def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : let docString := match docString with | .inl md => md - | .inr v => toMarkdown v + -- TODO: natively render Verso docstrings + | .inr v => versoDocToMarkdown v let refsMarkdown := "\n\n" ++ (String.join <| (findAllReferences (← read).refsMap docString).toList.map fun s => s!"[{s}]: references.html#ref_{s}\n") @@ -380,13 +381,5 @@ def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : | .none => addError <| "Error: failed to parse markdown:\n" ++ docString return #[.raw "Error: failed to parse markdown: ", .text docString] -where - -- TODO: natively render Verso docstrings - toMarkdown : VersoDocString → String - | .mk bs ps => Doc.MarkdownM.run' do - for b in bs do - Doc.ToMarkdown.toMarkdown b - for p in ps do - Doc.ToMarkdown.toMarkdown p end Output end DocGen4 diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 9cd25aac..90866e3e 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -42,6 +42,13 @@ deriving instance Hashable for Position deriving instance Hashable for DeclarationRange +def versoDocToMarkdown (v : VersoDocString) : String := + let { text, subsections } := v + Doc.MarkdownM.run' do + for b in text do + Doc.ToMarkdown.toMarkdown b + for p in subsections do + Doc.ToMarkdown.toMarkdown p /-- diff --git a/DocGen4/Process/DocInfo.lean b/DocGen4/Process/DocInfo.lean index 31201881..67669ec2 100644 --- a/DocGen4/Process/DocInfo.lean +++ b/DocGen4/Process/DocInfo.lean @@ -120,14 +120,7 @@ def getDocString : DocInfo → Option (String ⊕ VersoDocString) def getMarkdownDocString (i : DocInfo) : Option String := i.getDocString.map fun | .inl md => md - | .inr v => toMarkdown v -where - toMarkdown : VersoDocString → String - | .mk bs ps => Doc.MarkdownM.run' do - for b in bs do - Doc.ToMarkdown.toMarkdown b - for p in ps do - Doc.ToMarkdown.toMarkdown p + | .inr v => versoDocToMarkdown v def shouldRender : DocInfo → Bool | axiomInfo i => i.render diff --git a/DocGen4/RenderedCode.lean b/DocGen4/RenderedCode.lean index 9c64fc62..85e03a77 100644 --- a/DocGen4/RenderedCode.lean +++ b/DocGen4/RenderedCode.lean @@ -198,7 +198,6 @@ private def tokenize (txt : String) : RenderedCode := Id.run do continue return toks where - tokenEnder (str : String.Slice) : Bool := str.front?.map Char.isAlphanum |>.getD true kws := ["let", "fun", "do", "match", "with", "if", "then", "else", "break", "continue", "for", "in", "mut"] /-- From 8a025f1fa5b0932893583dcfed0e762ed6b05e8c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 21:23:01 +0100 Subject: [PATCH 052/106] chore: use zstd for benchmark-produced data --- scripts/bench/mathlib-docs/run | 2 +- scripts/bench/own-docs/run | 2 +- scripts/bench/run | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 306026de..b6d586c4 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -54,4 +54,4 @@ TAR_ARGS=(doc) if [ -f "$TMPDIR/mathproject/.lake/build/api-docs.db" ]; then TAR_ARGS+=(api-docs.db) fi -tar czf "$REPO_ROOT/mathlib-docs.tar.gz" -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" +tar cf - -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" | zstd -o "$REPO_ROOT/mathlib-docs.tar.zst" diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index 515dd996..da4d2b56 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -19,4 +19,4 @@ TAR_ARGS=(doc) if [ -f .lake/build/api-docs.db ]; then TAR_ARGS+=(api-docs.db) fi -tar czf own-docs.tar.gz -C .lake/build "${TAR_ARGS[@]}" +tar cf - -C .lake/build "${TAR_ARGS[@]}" | zstd -o own-docs.tar.zst diff --git a/scripts/bench/run b/scripts/bench/run index c415834a..e3dc477e 100755 --- a/scripts/bench/run +++ b/scripts/bench/run @@ -12,7 +12,7 @@ echo "Running benchmark: mathlib-docs" if [ -n "${IN_RADAR:-}" ]; then SHA=$(git rev-parse HEAD) BASE_URL="https://speed.lean-lang.org/doc-gen4-out/$SHA" - for f in own-docs.tar.gz mathlib-docs.tar.gz; do + for f in own-docs.tar.zst mathlib-docs.tar.zst; do URL="$BASE_URL/$f" echo "Uploading $f to $URL" curl -T "$f" "$URL" From cafaf11823fcb773729144c342874137dcb6d5b9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 21:43:45 +0100 Subject: [PATCH 053/106] Revert "chore: use zstd for benchmark-produced data" This reverts commit 8a025f1fa5b0932893583dcfed0e762ed6b05e8c. --- scripts/bench/mathlib-docs/run | 2 +- scripts/bench/own-docs/run | 2 +- scripts/bench/run | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index b6d586c4..306026de 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -54,4 +54,4 @@ TAR_ARGS=(doc) if [ -f "$TMPDIR/mathproject/.lake/build/api-docs.db" ]; then TAR_ARGS+=(api-docs.db) fi -tar cf - -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" | zstd -o "$REPO_ROOT/mathlib-docs.tar.zst" +tar czf "$REPO_ROOT/mathlib-docs.tar.gz" -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index da4d2b56..515dd996 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -19,4 +19,4 @@ TAR_ARGS=(doc) if [ -f .lake/build/api-docs.db ]; then TAR_ARGS+=(api-docs.db) fi -tar cf - -C .lake/build "${TAR_ARGS[@]}" | zstd -o own-docs.tar.zst +tar czf own-docs.tar.gz -C .lake/build "${TAR_ARGS[@]}" diff --git a/scripts/bench/run b/scripts/bench/run index e3dc477e..c415834a 100755 --- a/scripts/bench/run +++ b/scripts/bench/run @@ -12,7 +12,7 @@ echo "Running benchmark: mathlib-docs" if [ -n "${IN_RADAR:-}" ]; then SHA=$(git rev-parse HEAD) BASE_URL="https://speed.lean-lang.org/doc-gen4-out/$SHA" - for f in own-docs.tar.zst mathlib-docs.tar.zst; do + for f in own-docs.tar.gz mathlib-docs.tar.gz; do URL="$BASE_URL/$f" echo "Uploading $f to $URL" curl -T "$f" "$URL" From 8dc6c3663d1ccec9cce8772801758bb33802aab3 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 22:33:00 +0100 Subject: [PATCH 054/106] Reapply "chore: use zstd for benchmark-produced data" This reverts commit cafaf11823fcb773729144c342874137dcb6d5b9. --- scripts/bench/mathlib-docs/run | 2 +- scripts/bench/own-docs/run | 2 +- scripts/bench/run | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index 306026de..b6d586c4 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -54,4 +54,4 @@ TAR_ARGS=(doc) if [ -f "$TMPDIR/mathproject/.lake/build/api-docs.db" ]; then TAR_ARGS+=(api-docs.db) fi -tar czf "$REPO_ROOT/mathlib-docs.tar.gz" -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" +tar cf - -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" | zstd -o "$REPO_ROOT/mathlib-docs.tar.zst" diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index 515dd996..da4d2b56 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -19,4 +19,4 @@ TAR_ARGS=(doc) if [ -f .lake/build/api-docs.db ]; then TAR_ARGS+=(api-docs.db) fi -tar czf own-docs.tar.gz -C .lake/build "${TAR_ARGS[@]}" +tar cf - -C .lake/build "${TAR_ARGS[@]}" | zstd -o own-docs.tar.zst diff --git a/scripts/bench/run b/scripts/bench/run index c415834a..e3dc477e 100755 --- a/scripts/bench/run +++ b/scripts/bench/run @@ -12,7 +12,7 @@ echo "Running benchmark: mathlib-docs" if [ -n "${IN_RADAR:-}" ]; then SHA=$(git rev-parse HEAD) BASE_URL="https://speed.lean-lang.org/doc-gen4-out/$SHA" - for f in own-docs.tar.gz mathlib-docs.tar.gz; do + for f in own-docs.tar.zst mathlib-docs.tar.zst; do URL="$BASE_URL/$f" echo "Uploading $f to $URL" curl -T "$f" "$URL" From 6d413a1b2f049d4571bb4357f3f4ed87f5fc4f00 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 22:34:33 +0100 Subject: [PATCH 055/106] Try bigger chunks --- DocGen4/Output.lean | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index f2a6ea36..fd760c7c 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -103,8 +103,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir - -- Spawn one task per 100 modules, each returning its output file path - let tasks ← (chunksOf targetModules 100).mapM fun mods => IO.asTask do mods.mapM fun modName => do + -- Spawn one task per 500 modules, each returning its output file path + let tasks ← (chunksOf targetModules 500).mapM fun mods => IO.asTask do mods.mapM fun modName => do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← DB.openForReading dbPath builtinDocstringValues let module ← db.loadModule modName From f1d4513a55c92f053aca18670f7cafd03d5f4b76 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 23:03:01 +0100 Subject: [PATCH 056/106] Don't deserialize long equations --- DocGen4/DB.lean | 5 +++-- DocGen4/DB/Read.lean | 41 ++++++++++++++++++++++++---------- DocGen4/DB/Schema.lean | 1 + DocGen4/Output/Definition.lean | 10 +++------ DocGen4/Process/Base.lean | 4 ++++ DocGen4/RenderedCode.lean | 5 +++++ 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 21d7b3f9..be187cd0 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -172,12 +172,13 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveDefinitionStmt.bind 4 hints saveDefinitionStmt.bind 5 isNonComputable run saveDefinitionStmt - let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, sequence) VALUES (?, ?, ?, ?)" + let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, text_length, sequence) VALUES (?, ?, ?, ?, ?)" let saveDefinitionEquation modName position (code : RenderedCode) sequence := withDbContext "write:insert:definition_equations" do saveDefinitionEquationStmt.bind 1 modName saveDefinitionEquationStmt.bind 2 position saveDefinitionEquationStmt.bind 3 code - saveDefinitionEquationStmt.bind 4 sequence + saveDefinitionEquationStmt.bind 4 (RenderedCode.textLength code).toInt64 + saveDefinitionEquationStmt.bind 5 sequence run saveDefinitionEquationStmt let saveInstanceStmt ← sqlite.prepare "INSERT INTO instances (module_name, position, class_name) VALUES (?, ?, ?)" let saveInstance modName position className := withDbContext "write:insert:instances" do diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 53795313..f64096e4 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -1,5 +1,6 @@ import DocGen4.RenderedCode +import DocGen4.Process.Base import SQLite import DocGen4.DB.VersoDocString @@ -79,7 +80,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let readMdDocstringStmt ← sqlite.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" let readVersoDocstringStmt ← sqlite.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" let loadDeclRangeStmt ← sqlite.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" - let loadEqnsStmt ← sqlite.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadEqnsStmt ← sqlite.prepare "SELECT CASE WHEN text_length < ? THEN code ELSE NULL END FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" let loadInstanceArgsStmt ← sqlite.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" let loadStructureParentsStmt ← sqlite.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" let loadFieldArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" @@ -194,17 +195,33 @@ private def ReadStmts.loadInfo (s : ReadStmts) (moduleName : String) (position : return { name, type, doc, args, declarationRange := declRange, attrs, sorried, render } open Lean SQLite.Blob in -private def ReadStmts.loadEquations (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode)) := withDbContext "read:definition_equations" do - s.loadEqnsStmt.bind 1 moduleName - s.loadEqnsStmt.bind 2 position +private def ReadStmts.loadEquations (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode) × Bool) := withDbContext "read:definition_equations" do + s.loadEqnsStmt.bind 1 Process.equationLimit.toInt64 + s.loadEqnsStmt.bind 2 moduleName + s.loadEqnsStmt.bind 3 position if !(← s.loadEqnsStmt.step) then done s.loadEqnsStmt - return none - let mut eqns := #[← readRenderedCode (← s.loadEqnsStmt.columnBlob 0)] + return (none, false) + let mut eqns := #[] + let mut wereOmitted := false + let processRow : IO (Option RenderedCode) := do + let colType ← s.loadEqnsStmt.columnType 0 + if colType == .null then + return none + else + let blob ← s.loadEqnsStmt.columnBlob 0 + return some (← readRenderedCode blob) + match (← processRow) with + | some code => eqns := eqns.push code + | none => wereOmitted := true while (← s.loadEqnsStmt.step) do - eqns := eqns.push (← readRenderedCode (← s.loadEqnsStmt.columnBlob 0)) + match (← processRow) with + | some code => eqns := eqns.push code + | none => wereOmitted := true done s.loadEqnsStmt - return some eqns + if eqns.isEmpty && !wereOmitted then + return (none, false) + return (some eqns, wereOmitted) open Lean SQLite.Blob in private def ReadStmts.loadInstanceArgs (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Name) := do @@ -376,8 +393,8 @@ where | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← s.loadEquations moduleName position - return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable } + let (equations, equationsWereOmitted) ← s.loadEquations moduleName position + return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable } done s.readDefinitionStmt return none readInstance (info : Process.Info) : IO (Option Process.DocInfo) := do @@ -397,9 +414,9 @@ where | "opaque" => .opaque | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let equations ← s.loadEquations moduleName position + let (equations, equationsWereOmitted) ← s.loadEquations moduleName position let typeNames ← s.loadInstanceArgs moduleName position - return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, isNonComputable, className, typeNames } + return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable, className, typeNames } done s.readDefinitionStmt else done s.readInstanceStmt diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index faffc2df..a0cd6490 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -223,6 +223,7 @@ CREATE TABLE IF NOT EXISTS definition_equations ( module_name TEXT NOT NULL, position INTEGER NOT NULL, code BLOB NOT NULL, + text_length INTEGER NOT NULL, sequence INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), FOREIGN KEY (module_name, position) REFERENCES name_info(module_name, position) ON DELETE CASCADE diff --git a/DocGen4/Output/Definition.lean b/DocGen4/Output/Definition.lean index 5027f775..c95c39cd 100644 --- a/DocGen4/Output/Definition.lean +++ b/DocGen4/Output/Definition.lean @@ -8,9 +8,6 @@ namespace Output open scoped DocGen4.Jsx open Lean Widget -/-- This is basically an arbitrary number that seems to work okay. -/ -def equationLimit : Nat := 200 - def equationToHtml (c : RenderedCode) : HtmlM Html := do return
  • [← renderedCodeToHtml c]
  • @@ -23,14 +20,13 @@ defined in `equationLimit` we stop trying since they: def equationsToHtml (i : Process.DefinitionInfo) : HtmlM (Array Html) := do if let some eqs := i.equations then let equationsHtml ← eqs.mapM equationToHtml - let filteredEquationsHtml := equationsHtml.filter (·.textLength < equationLimit) - if equationsHtml.size ≠ filteredEquationsHtml.size then + if i.equationsWereOmitted then return #[
    Equations
    • One or more equations did not get rendered due to their size.
    • - [filteredEquationsHtml] + [equationsHtml]
    ] @@ -39,7 +35,7 @@ def equationsToHtml (i : Process.DefinitionInfo) : HtmlM (Array Html) := do
    Equations
      - [filteredEquationsHtml] + [equationsHtml]
    ] diff --git a/DocGen4/Process/Base.lean b/DocGen4/Process/Base.lean index 90866e3e..953948e4 100644 --- a/DocGen4/Process/Base.lean +++ b/DocGen4/Process/Base.lean @@ -137,6 +137,9 @@ structure OpaqueInfo extends Info where deriving instance Hashable for ReducibilityHints +/-- The maximum string length of equations before they are omitted from rendering. -/ +def equationLimit : Nat := 200 + /-- Information about a `def` declaration, note that partial defs are handled by `OpaqueInfo`. -/ @@ -144,6 +147,7 @@ structure DefinitionInfo extends Info where isUnsafe : Bool hints : ReducibilityHints equations : Option (Array RenderedCode) + equationsWereOmitted : Bool := false isNonComputable : Bool deriving Inhabited, Hashable diff --git a/DocGen4/RenderedCode.lean b/DocGen4/RenderedCode.lean index 85e03a77..fe14f887 100644 --- a/DocGen4/RenderedCode.lean +++ b/DocGen4/RenderedCode.lean @@ -88,6 +88,11 @@ deriving Inhabited, BEq, Repr, ToBinary, FromBinary, Hashable def RenderedCode.empty : RenderedCode := .append #[] +partial def RenderedCode.textLength : RenderedCode → Nat + | .text s => s.length + | .tag _ inner => textLength inner + | .append xs => xs.foldl (init := 0) fun len x => len + textLength x + open Lean.Widget in mutual partial def RenderedCode.pushRight (xs : Array RenderedCode) (x : RenderedCode) : Array RenderedCode := From 5fdf9c40fb9c8d9de2f40e7f107786d1e8155251 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 23:37:41 +0100 Subject: [PATCH 057/106] Don't even save long equations --- DocGen4/DB.lean | 7 +++++-- DocGen4/DB/Read.lean | 8 +++----- DocGen4/DB/Schema.lean | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index be187cd0..cedbb8c0 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -174,10 +174,13 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do run saveDefinitionStmt let saveDefinitionEquationStmt ← sqlite.prepare "INSERT INTO definition_equations (module_name, position, code, text_length, sequence) VALUES (?, ?, ?, ?, ?)" let saveDefinitionEquation modName position (code : RenderedCode) sequence := withDbContext "write:insert:definition_equations" do + let textLength := RenderedCode.textLength code saveDefinitionEquationStmt.bind 1 modName saveDefinitionEquationStmt.bind 2 position - saveDefinitionEquationStmt.bind 3 code - saveDefinitionEquationStmt.bind 4 (RenderedCode.textLength code).toInt64 + saveDefinitionEquationStmt.bind 3 <| + if textLength < Process.equationLimit then some code + else none + saveDefinitionEquationStmt.bind 4 textLength.toInt64 saveDefinitionEquationStmt.bind 5 sequence run saveDefinitionEquationStmt let saveInstanceStmt ← sqlite.prepare "INSERT INTO instances (module_name, position, class_name) VALUES (?, ?, ?)" diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index f64096e4..8b2b5046 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -1,6 +1,5 @@ import DocGen4.RenderedCode -import DocGen4.Process.Base import SQLite import DocGen4.DB.VersoDocString @@ -80,7 +79,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let readMdDocstringStmt ← sqlite.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" let readVersoDocstringStmt ← sqlite.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" let loadDeclRangeStmt ← sqlite.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" - let loadEqnsStmt ← sqlite.prepare "SELECT CASE WHEN text_length < ? THEN code ELSE NULL END FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadEqnsStmt ← sqlite.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" let loadInstanceArgsStmt ← sqlite.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" let loadStructureParentsStmt ← sqlite.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" let loadFieldArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" @@ -196,9 +195,8 @@ private def ReadStmts.loadInfo (s : ReadStmts) (moduleName : String) (position : open Lean SQLite.Blob in private def ReadStmts.loadEquations (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Option (Array RenderedCode) × Bool) := withDbContext "read:definition_equations" do - s.loadEqnsStmt.bind 1 Process.equationLimit.toInt64 - s.loadEqnsStmt.bind 2 moduleName - s.loadEqnsStmt.bind 3 position + s.loadEqnsStmt.bind 1 moduleName + s.loadEqnsStmt.bind 2 position if !(← s.loadEqnsStmt.step) then done s.loadEqnsStmt return (none, false) diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index a0cd6490..e79be8eb 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -222,7 +222,7 @@ CREATE TABLE IF NOT EXISTS definitions ( CREATE TABLE IF NOT EXISTS definition_equations ( module_name TEXT NOT NULL, position INTEGER NOT NULL, - code BLOB NOT NULL, + code BLOB, text_length INTEGER NOT NULL, sequence INTEGER NOT NULL, PRIMARY KEY (module_name, position, sequence), From f3a89f77a25450218976ae780524a691d6193693 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Tue, 10 Feb 2026 23:56:23 +0100 Subject: [PATCH 058/106] Less copying in escape --- DocGen4/Output/ToHtmlFormat.lean | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/DocGen4/Output/ToHtmlFormat.lean b/DocGen4/Output/ToHtmlFormat.lean index 91b8abdf..f7fca246 100644 --- a/DocGen4/Output/ToHtmlFormat.lean +++ b/DocGen4/Output/ToHtmlFormat.lean @@ -32,16 +32,16 @@ instance : Coe String Html := namespace Html -def escapePairs : Array (String × String) := - #[ - ("&", "&"), - ("<", "<"), - (">", ">"), - ("\"", """) - ] - def escape (s : String) : String := - escapePairs.foldl (fun acc (o, r) => acc.replace o r) s + if s.any (fun c => c == '&' || c == '<' || c == '>' || c == '"') then + s.foldl (init := "") fun + | out, '&' => out ++ "&" + | out, '<' => out ++ "<" + | out, '>' => out ++ ">" + | out, '"' => out ++ """ + | out, c => out.push c + else + s -- TODO: remove the following 3 functions -- once is fixed From 5042ed1dc128aa775fa81ab32d6d10e9068b7982 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 00:24:03 +0100 Subject: [PATCH 059/106] Revert "Less copying in escape" This reverts commit f3a89f77a25450218976ae780524a691d6193693. It was slower. --- DocGen4/Output/ToHtmlFormat.lean | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/DocGen4/Output/ToHtmlFormat.lean b/DocGen4/Output/ToHtmlFormat.lean index f7fca246..91b8abdf 100644 --- a/DocGen4/Output/ToHtmlFormat.lean +++ b/DocGen4/Output/ToHtmlFormat.lean @@ -32,16 +32,16 @@ instance : Coe String Html := namespace Html +def escapePairs : Array (String × String) := + #[ + ("&", "&"), + ("<", "<"), + (">", ">"), + ("\"", """) + ] + def escape (s : String) : String := - if s.any (fun c => c == '&' || c == '<' || c == '>' || c == '"') then - s.foldl (init := "") fun - | out, '&' => out ++ "&" - | out, '<' => out ++ "<" - | out, '>' => out ++ ">" - | out, '"' => out ++ """ - | out, c => out.push c - else - s + escapePairs.foldl (fun acc (o, r) => acc.replace o r) s -- TODO: remove the following 3 functions -- once is fixed From 7c939bd6a55741c46df6842c2e51fefc07c005fa Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 10:06:40 +0100 Subject: [PATCH 060/106] experiment: emit HTML directly rather than allocating trees and strings --- DocGen4/Output.lean | 97 ++++---- DocGen4/Output/Arg.lean | 13 +- DocGen4/Output/Base.lean | 329 ++++++++++++++++---------- DocGen4/Output/Class.lean | 13 +- DocGen4/Output/ClassInductive.lean | 2 +- DocGen4/Output/Definition.lean | 40 ++-- DocGen4/Output/DocString.lean | 194 +++++++-------- DocGen4/Output/Find.lean | 20 +- DocGen4/Output/FoundationalTypes.lean | 10 +- DocGen4/Output/Index.lean | 4 +- DocGen4/Output/Inductive.lean | 37 ++- DocGen4/Output/Module.lean | 184 +++++++------- DocGen4/Output/Navbar.lean | 121 +++++----- DocGen4/Output/NotFound.lean | 4 +- DocGen4/Output/References.lean | 44 ++-- DocGen4/Output/Search.lean | 7 +- DocGen4/Output/Structure.lean | 69 +++--- DocGen4/Output/Tactics.lean | 45 ++-- DocGen4/Output/Template.lean | 93 ++++---- DocGen4/Output/ToHtmlFormat.lean | 120 ++++------ DocGen4/Output/ToJson.lean | 5 +- Main.lean | 2 +- 22 files changed, 727 insertions(+), 726 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index fd760c7c..c9cd36db 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -39,24 +39,29 @@ def collectBackrefs (buildDir : System.FilePath) : IO (Array BackrefItem) := do | .ok (arr : Array BackrefItem) => backrefs := backrefs ++ arr return backrefs -def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do - let findBasePath (buildDir : System.FilePath) := basePath buildDir / "find" +def htmlOutputSetup (config : SiteBaseConfig) : IO Unit := do + let bp := basePath config.buildDir + let findBasePath := bp / "find" -- Base structure - FS.createDirAll <| basePath config.buildDir - FS.createDirAll <| findBasePath config.buildDir + FS.createDirAll bp + FS.createDirAll findBasePath FS.createDirAll <| srcBasePath config.buildDir FS.createDirAll <| declarationsBasePath config.buildDir - -- All the doc-gen static stuff - let indexHtml := ReaderT.run index config |>.toString - let notFoundHtml := ReaderT.run notFound config |>.toString - let foundationalTypesHtml := ReaderT.run foundationalTypes config |>.toString - let navbarHtml := ReaderT.run navbar config |>.toString - let searchHtml := ReaderT.run search config |>.toString - let referencesHtml := ReaderT.run (references (← collectBackrefs config.buildDir)) config |>.toString - let tacticsHtml := ReaderT.run (tactics (← loadTacticsJSON config.buildDir)) config |>.toString - let docGenStatic := #[ + -- HTML pages written directly to files + let run action path := runHtmlToFile action config path + run index (bp / "index.html") + run notFound (bp / "404.html") + run foundationalTypes (bp / "foundational_types.html") + run navbar (bp / "navbar.html") + run search (bp / "search.html") + run (references (← collectBackrefs config.buildDir)) (bp / "references.html") + run (tactics (← loadTacticsJSON config.buildDir)) (bp / "tactics.html") + runHtmlToFile find { config with depthToRoot := 1 } (findBasePath / "index.html") + + -- Static assets + let staticFiles := #[ ("style.css", styleCss), ("favicon.svg", faviconSvg), ("declaration-data.js", declarationDataCenterJs), @@ -65,28 +70,14 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do ("jump-src.js", jumpSrcJs), ("expand-nav.js", expandNavJs), ("how-about.js", howAboutJs), - ("search.html", searchHtml), ("search.js", searchJs), ("mathjax-config.js", mathjaxConfigJs), ("instances.js", instancesJs), ("importedBy.js", importedByJs), - ("index.html", indexHtml), - ("foundational_types.html", foundationalTypesHtml), - ("404.html", notFoundHtml), - ("navbar.html", navbarHtml), - ("references.html", referencesHtml), - ("tactics.html", tacticsHtml), - ] - for (fileName, content) in docGenStatic do - FS.writeFile (basePath config.buildDir / fileName) content - - let findHtml := ReaderT.run find { config with depthToRoot := 1 } |>.toString - let findStatic := #[ - ("index.html", findHtml), - ("find.js", findJs) ] - for (fileName, content) in findStatic do - FS.writeFile (findBasePath config.buildDir / fileName) content + for (fileName, content) in staticFiles do + FS.writeFile (bp / fileName) content + FS.writeFile (findBasePath / "find.js") findJs /-- Custom source linker type: given an optional source URL and module name, returns a function from declaration range to URL -/ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → String @@ -95,7 +86,7 @@ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → Each task loads its module from DB, renders HTML, and writes output files. The linking context provides cross-module linking without loading all module data upfront. When `targetModules` is provided, only those modules are rendered (but linking uses all modules). -/ -def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.FilePath) +def htmlOutputResultsParallel (baseConfig : SiteBaseConfig) (dbPath : System.FilePath) (linkCtx : LinkingContext) (targetModules : Array Name := linkCtx.moduleNames) (sourceLinker? : Option SourceLinkerFn := none) @@ -125,21 +116,26 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi -- path: 'basePath/module/components/till/last.html' -- The last component is the file name, so we drop it from the depth to root. - let moduleConfig := { baseConfig with - depthToRoot := modName.components.dropLast.length - currentName := some modName - } - let (moduleHtml, cfg) := moduleToHtml module |>.run {} config moduleConfig - let (tactics, cfg) := module.tactics.mapM TacticInfo.docStringToHtml |>.run cfg config baseConfig - if not cfg.errors.isEmpty then - throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" - - -- Write HTML file let relFilePath := basePathComponent / moduleNameToFile modName let filePath := baseConfig.buildDir / relFilePath if let .some d := filePath.parent then FS.createDirAll d - FS.writeFile filePath moduleHtml.toString + + let handle ← FS.Handle.mk filePath .write + let fileStream := FS.Stream.ofHandle handle + let moduleConfig : SiteBaseContext := { + toSiteBaseConfig := { baseConfig with + depthToRoot := modName.components.dropLast.length + currentName := some modName + } + stream := fileStream + } + let (_, cfg) ← moduleToHtml module |>.run {} config moduleConfig + + -- Run tactic docstring rendering + let (tactics, cfg) ← module.tactics.mapM TacticInfo.docStringToHtml |>.eval cfg config baseConfig + if not cfg.errors.isEmpty then + throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" -- Write backrefs JSON FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") @@ -148,7 +144,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics -- Generate declaration data JSON for search - let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig + let (jsonDecls, _) ← Module.toJson module |>.eval {} config baseConfig FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") jsonDecls.compress @@ -162,8 +158,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi | .error e => throw e return outputs -def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : - IO SiteBaseContext := do +def getSimpleBaseConfig (buildDir : System.FilePath) (hierarchy : Hierarchy) : + IO SiteBaseConfig := do let contents ← FS.readFile (declarationsBasePath buildDir / "references.json") <|> (pure "[]") match Json.parse contents with | .error err => @@ -181,9 +177,11 @@ def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : refs := refs } -def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do +def htmlOutputIndex (baseConfig : SiteBaseConfig) : IO Unit := do htmlOutputSetup baseConfig + let ref ← IO.mkRef {} + let ctx : SiteBaseContext := { toSiteBaseConfig := baseConfig, stream := IO.FS.Stream.ofBuffer ref } let mut index : JsonIndex := {} for entry in ← System.FilePath.readDir (declarationsBasePath baseConfig.buildDir) do if entry.fileName.startsWith "declaration-data-" && entry.fileName.endsWith ".bmp" then @@ -196,7 +194,7 @@ def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do | .error err => throw <| IO.userError s!"failed to parse file '{entry.path}': {err}" | .ok (module : JsonModule) => - index := index.addModule module |>.run baseConfig + index ← (index.addModule module).run ctx let finalJson := toJson index -- The root JSON for find @@ -285,7 +283,7 @@ def updateNavbarFromDisk (buildDir : System.FilePath) : IO Unit := do match fromJson? jsonContent with | .error _ => pure #[] | .ok refs => pure refs - let baseConfig : SiteBaseContext := { + let baseConfig : SiteBaseConfig := { buildDir := buildDir depthToRoot := 0 currentName := none @@ -293,7 +291,6 @@ def updateNavbarFromDisk (buildDir : System.FilePath) : IO Unit := do refs := refs } -- Regenerate navbar - let navbarHtml := ReaderT.run navbar baseConfig |>.toString - FS.writeFile (docDir / "navbar.html") navbarHtml + runHtmlToFile navbar baseConfig (docDir / "navbar.html") end DocGen4 diff --git a/DocGen4/Output/Arg.lean b/DocGen4/Output/Arg.lean index 536f7268..d9f04ddf 100644 --- a/DocGen4/Output/Arg.lean +++ b/DocGen4/Output/Arg.lean @@ -9,14 +9,15 @@ open scoped DocGen4.Jsx Render an `Arg` as HTML, adding opacity effects etc. depending on what type of binder it has. -/ -def argToHtml (arg : Process.Arg) : HtmlM Html := do - let node ← renderedCodeToHtml arg.binder - let inner := [node] - let html := Html.element "span" false #[("class", "decl_args")] #[inner] +def argToHtml (arg : Process.Arg) : HtmlM Unit := do + let inner : HtmlM Unit := + ( + {renderedCodeToHtml arg.binder} + ) if arg.implicit then - return {html} + ({inner}) else - return html + inner end Output end DocGen4 diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index fc6501ca..47ef6d50 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -10,6 +10,7 @@ import DocGen4.RenderedCode namespace DocGen4.Output open scoped DocGen4.Jsx +open DocGen4 (Raw escape) open Lean System Widget Elab Process def basePathComponent := "doc" @@ -44,15 +45,14 @@ structure BackrefItem where deriving FromJson, ToJson, Inhabited /-- -The context used in the `BaseHtmlM` monad for HTML templating. +Site configuration without a rendering stream. Used by callers that only need site metadata, but +won't generate HTML, such as JSON generation. -/ -structure SiteBaseContext where - +structure SiteBaseConfig where /-- The build directory (provided by lake). -/ buildDir : System.FilePath - /-- The module hierarchy as a tree structure. -/ @@ -71,17 +71,96 @@ structure SiteBaseContext where -/ refs : Array BibItem +/-- +The context used in the `BaseHtmlM` monad for HTML templating. +Extends `SiteBaseConfig` with a stream for writing HTML output. +-/ +structure SiteBaseContext extends SiteBaseConfig where + /-- + The stream to write HTML output to. + -/ + stream : IO.FS.Stream + +def setCurrentName (name : Name) (ctx : SiteBaseContext) := { ctx with currentName := some name } +def SiteBaseConfig.setCurrentName (name : Name) (cfg : SiteBaseConfig) := { cfg with currentName := some name } + +abbrev BaseHtmlT := ReaderT SiteBaseContext +abbrev BaseHtmlM := BaseHtmlT IO + +/-! ## Write helpers + +These are the concrete implementations referenced by JSX macro expansion. +They read the stream from `SiteBaseContext` via the reader monad. -/ + +section +variable [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] + +@[always_inline, inline] def putHtml (s : String) : m Unit := do + (← read).stream.putStr s + +@[always_inline, inline] def putEscaped (s : String) : m Unit := putHtml (escape s) + + + +namespace Html + +/-- Writes escaped text content to the HTML stream. -/ +@[always_inline, inline] def text (s : String) : m Unit := putEscaped s + +/-- Writes raw (unescaped) HTML content to the stream. -/ +@[always_inline, inline] def rawText (s : String) : m Unit := putHtml s + +end Html + +def putOpenTag (tag : String) (attrs : Array (String × String)) : m Unit := do + putHtml s!"<{tag}" + for (k, v) in attrs do putHtml s!" {k}=\"{escape v}\"" + putHtml ">" + +@[always_inline, inline] def putCloseTag (tag : String) : m Unit := putHtml s!"" + +namespace Html + +/-- Writes an HTML element with open/close tags wrapping a body action. -/ +def element (tag : String) (attrs : Array (String × String)) (body : m Unit) : m Unit := do + putOpenTag tag attrs + body + putCloseTag tag + +end Html + +scoped instance : Coe String (m Unit) where + coe s := putEscaped s + +scoped instance : Coe Raw (m Unit) where + coe r := putHtml r.html + +end + +/-- +The writable state used in the `HtmlM` monad for HTML templating. +-/ +structure SiteState where + /-- + The list of back references, as an array. + -/ + backrefs : Array BackrefItem := #[] + /-- + The errors occurred during the process. + -/ + errors : String := "" + /-- Declaration decorator function type: given a module name, declaration name, and declaration kind, -returns optional extra HTML to inject into the declaration's rendering. -This enables external tools to add badges, links, or other decorations to declarations. +writes optional extra HTML to inject into the declaration's rendering. This enables external tools +to add badges, links, or other decorations to declarations. -/ -abbrev DeclarationDecoratorFn := Name → Name → String → Array Html +abbrev DeclarationDecoratorFn := Name → Name → String → BaseHtmlM Unit /-- The default declaration decorator that produces no extra HTML. -/ -def defaultDeclarationDecorator : DeclarationDecoratorFn := fun _ _ _ => #[] +def defaultDeclarationDecorator : DeclarationDecoratorFn := fun _ _ _ => pure () /-- The read-only context used in the `HtmlM` monad for HTML templating. @@ -101,45 +180,22 @@ structure SiteContext where refsMap : Std.HashMap String BibItem /-- A function to decorate declarations with extra HTML (e.g., verification badges). - Receives (moduleName, declarationName, declarationKind) and returns extra HTML. + Receives (moduleName, declarationName, declarationKind) and writes extra HTML. Defaults to producing no extra HTML. -/ declarationDecorator : DeclarationDecoratorFn := defaultDeclarationDecorator -/-- -The writable state used in the `HtmlM` monad for HTML templating. --/ -structure SiteState where - /-- - The list of back references, as an array. - -/ - backrefs : Array BackrefItem := #[] - /-- - The errors occurred during the process. - -/ - errors : String := "" - -def setCurrentName (name : Name) (ctx : SiteBaseContext) := {ctx with currentName := some name} - -abbrev BaseHtmlT := ReaderT SiteBaseContext -abbrev BaseHtmlM := BaseHtmlT Id - abbrev HtmlT (m) := StateT SiteState <| ReaderT SiteContext <| BaseHtmlT m -abbrev HtmlM := HtmlT Id +abbrev HtmlM := HtmlT IO def HtmlT.run (x : HtmlT m α) (state : SiteState) (ctx : SiteContext) (baseCtx : SiteBaseContext) : m (α × SiteState) := StateT.run x state |>.run ctx |>.run baseCtx def HtmlM.run (x : HtmlM α) (state : SiteState) (ctx : SiteContext) - (baseCtx : SiteBaseContext) : α × SiteState := - StateT.run x state |>.run ctx |>.run baseCtx |>.run + (baseCtx : SiteBaseContext) : IO (α × SiteState) := + HtmlT.run x state ctx baseCtx -instance [Monad m] : MonadLift HtmlM (HtmlT m) where - monadLift x := do return (x.run (← getThe SiteState) (← readThe SiteContext) (← readThe SiteBaseContext)).1 - -instance [Monad m] : MonadLift BaseHtmlM (BaseHtmlT m) where - monadLift x := do return x.run (← readThe SiteBaseContext) /-- Add a backref of the given `citekey` and `funName` to current document, and returns it. -/ def addBackref (citekey funName : String) : HtmlM BackrefItem := do @@ -157,14 +213,13 @@ def addError (err : String) : HtmlM Unit := do modify fun cfg => { cfg with errors := cfg.errors ++ err ++ "\n" } /-- -Obtains the root URL as a relative one to the current depth. +Obtains the root URL relative to the given depth. -/ def getRoot : BaseHtmlM String := do - let rec go: Nat -> String - | 0 => "./" - | Nat.succ n' => "../" ++ go n' - let d <- SiteBaseContext.depthToRoot <$> read - return (go d) + let rec go : Nat → String + | 0 => "./" + | n + 1 => "../" ++ go n + return go (← read).depthToRoot def getHierarchy : BaseHtmlM Hierarchy := do return (← read).hierarchy def getCurrentName : BaseHtmlM (Option Name) := do return (← read).currentName @@ -172,28 +227,19 @@ def getResult : HtmlM AnalyzerResult := do return (← read).result def getSourceUrl (module : Name) (range : Option DeclarationRange): HtmlM String := do return (← read).sourceLinker module range def getDeclarationDecorator : HtmlM DeclarationDecoratorFn := do return (← read).declarationDecorator -/-- -If a template is meant to be extended because it for example only provides the -header but no real content this is the way to fill the template with content. -This is untyped so HtmlM and BaseHtmlM can be mixed. --/ -def templateExtends {α β} {m} [Bind m] (base : α → m β) (new : m α) : m β := - new >>= base - -def templateLiftExtends {α β} {m n} [Bind m] [MonadLiftT n m] (base : α → n β) (new : m α) : m β := - new >>= (monadLift ∘ base) /-- Returns the doc-gen4 link to a module name. -/ def moduleNameToLink (n : Name) : BaseHtmlM String := do + let root ← getRoot let parts := n.components.map (Name.toString (escape := False)) - return (← getRoot) ++ (parts.intersperse "/").foldl (· ++ ·) "" ++ ".html" + return root ++ (parts.intersperse "/").foldl (· ++ ·) "" ++ ".html" /-- Returns the HTML doc-gen4 link to a module name. -/ -def moduleToHtmlLink (module : Name) : BaseHtmlM Html := do - return {module.toString} +def moduleToHtmlLink (module : Name) : BaseHtmlM Unit := do + {module.toString} /-- Returns the path to the HTML file that contains information about a module. @@ -236,28 +282,28 @@ def declNameToLink (name : Name) : HtmlM String := do /-- Returns the HTML doc-gen4 link to a declaration name. -/ -def declNameToHtmlLink (name : Name) : HtmlM Html := do - return {name.toString} +def declNameToHtmlLink (name : Name) : HtmlM Unit := do + {name.toString} /-- -Returns a name splitted into parts. -Together with "break_within" CSS class this helps browser to break a name -nicely. +Writes a name split into parts, each wrapped in a span. +Together with "break_within" CSS class this helps browser to break a name nicely. -/ -def breakWithin (name: String) : (Array Html) := - name.splitOn "." - |> .map (fun (s: String) => {s}) - |> .intersperse "." - |> List.toArray +def breakWithin [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] (name: String) : m Unit := do + let parts := name.splitOn "." + for part in parts, i in [:parts.length] do + if i > 0 then + Html.text "." + ({part}) /-- Returns the HTML doc-gen4 link to a declaration name with "break_within" set as class. -/ -def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Html := do - return - [breakWithin name.toString] - +def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Unit := do + + {breakWithin name.toString} + /-- For a name, try to find a linkable target by stripping suffix components @@ -296,97 +342,140 @@ where .str (go parent) s | _ => .anonymous +/-- Captures the HTML output of an HtmlM action and also returns its result. -/ +def captureHtmlWith (action : HtmlM α) : HtmlM (String × α) := do + let ref ← IO.mkRef {} + let bufStream := IO.FS.Stream.ofBuffer ref + let result ← withTheReader SiteBaseContext (fun ctx => { ctx with stream := bufStream }) action + return (String.fromUTF8! (← ref.get).data, result) + +/-- Captures the HTML output of an HtmlM action as a string. -/ +def captureHtml (action : HtmlM Unit) : HtmlM String := do + return (← captureHtmlWith action).1 + +/-- Captures the HTML output of a BaseHtmlM action as a string. -/ +def captureBaseHtml (action : BaseHtmlM Unit) : BaseHtmlM String := do + let ref ← IO.mkRef {} + let bufStream := IO.FS.Stream.ofBuffer ref + withTheReader SiteBaseContext (fun ctx => { ctx with stream := bufStream }) action + return String.fromUTF8! (← ref.get).data + +/-- Runs an HtmlM action inside a BaseHtmlM context using an IO.Ref for state. -/ +def runHtmlInBase (action : HtmlM Unit) (siteCtx : SiteContext) (stateRef : IO.Ref SiteState) : BaseHtmlM Unit := do + let baseCtx ← read + let s ← stateRef.get + let ((), s') ← HtmlM.run action s siteCtx baseCtx + stateRef.set s' + +/-- Runs a BaseHtmlM action, writing output directly to a file. -/ +def runHtmlToFile (action : BaseHtmlM Unit) (config : SiteBaseConfig) (path : FilePath) : IO Unit := do + let handle ← IO.FS.Handle.mk path .write + let fileStream := IO.FS.Stream.ofHandle handle + action.run { toSiteBaseConfig := config, stream := fileStream } + +/-- Runs an HtmlM action for its return value, auto-providing a buffer stream. -/ +def HtmlM.eval (x : HtmlM α) (state : SiteState) (ctx : SiteContext) + (config : SiteBaseConfig) : IO (α × SiteState) := do + let ref ← IO.mkRef {} + let bufStream := IO.FS.Stream.ofBuffer ref + HtmlM.run x state ctx { toSiteBaseConfig := config, stream := bufStream } + /-- -Convert RenderedCode to HTML with declaration links. -Returns (hasAnchor, html) where hasAnchor indicates if the result contains an anchor tag. -This is used to avoid creating nested anchors (invalid HTML). +Converts RenderedCode to HTML with declaration links. Returns `true` if the result contains an +anchor tag and thus shouldn't be wrapped by a surrounding one. -/ -partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array Html) := do +partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM Bool := do match code with - | .text t => return (false, #[t]) + | .text t => + Html.text t + return false | .append xs => - xs.foldlM (init := (false, #[])) fun (a?, acc) t => do - let (a?', acc') ← renderedCodeToHtmlAux t - pure (a? || a?', acc ++ acc') + let mut hasAnchor := false + for t in xs do + let a? ← renderedCodeToHtmlAux t + hasAnchor := hasAnchor || a? + return hasAnchor | .tag tag inner => - let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner match tag with | .const name => let name2ModIdx := (← getResult).name2ModIdx + -- Always capture inner to a string since we may need to wrap it + let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) if name2ModIdx.contains name then - let link ← declNameToLink name - -- Avoid nested anchors: if inner content already has anchors, don't wrap again - -- Match original behavior: no fn wrapper when const is in name2ModIdx if innerHasAnchor then - return (true, innerHtml) + Html.rawText innerStr + return true else - return (true, #[[innerHtml]]) + let link ← declNameToLink name + ({Html.rawText innerStr}) + return true else - -- Name not in name2ModIdx - try to find a linkable parent - -- This handles both: - -- 1. Private names like `_private.Init.Prelude.0.Lean.Name.hash._proof_1` - -- 2. Auxiliary names like `Std.Do.Option.instWPMonad._proof_2` let nameToSearch := Lean.privateToUserName? name |>.getD name match findLinkableParent name2ModIdx nameToSearch with | some target => - let link ← declNameToLink target if innerHasAnchor then - return (true, innerHtml) + Html.rawText innerStr + return true else - return (true, #[[innerHtml]]) + let link ← declNameToLink target + ({Html.rawText innerStr}) + return true | none => - -- For private names, fall back to linking to the module itself (no anchor) match Lean.privatePrefix? name with | some pfx => let modName := moduleFromPrivatePrefix pfx if modName != .anonymous then - let link ← moduleNameToLink modName if innerHasAnchor then - return (true, innerHtml) + Html.rawText innerStr + return true else - return (true, #[[innerHtml]]) + let link ← moduleNameToLink modName + ({Html.rawText innerStr}) + return true else - return (innerHasAnchor, fn innerHtml) + ({Html.rawText innerStr}) + return innerHasAnchor | none => - return (innerHasAnchor, fn innerHtml) + ({Html.rawText innerStr}) + return innerHasAnchor | .sort _ => - let link := s!"{← getRoot}foundational_types.html" - -- Avoid nested anchors - -- Match original behavior: no fn wrapper when creating sort link + let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) if innerHasAnchor then - return (true, innerHtml) + Html.rawText innerStr + return true else - return (true, #[[innerHtml]]) - -- For Phase 1 compatibility: treat keyword/string as plain content (no extra styling) - -- This matches the original infoFormatToHtml behavior - | .keyword => return (innerHasAnchor, innerHtml) - | .string => return (innerHasAnchor, innerHtml) - | .otherExpr => return (innerHasAnchor, fn innerHtml) -where - fn (html : Array Html) : Array Html := #[[html]] + let link := s!"{← getRoot}foundational_types.html" + ({Html.rawText innerStr}) + return true + | .keyword => + renderedCodeToHtmlAux inner + | .string => + renderedCodeToHtmlAux inner + | .otherExpr => + let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) + ({Html.rawText innerStr}) + return innerHasAnchor /-- -Convert RenderedCode to HTML with declaration links. +Converts RenderedCode to HTML with declaration links. -/ -def renderedCodeToHtml (code : RenderedCode) : HtmlM (Array Html) := - Prod.snd <$> renderedCodeToHtmlAux code +def renderedCodeToHtml (code : RenderedCode) : HtmlM Unit := do + let _ ← renderedCodeToHtmlAux code /- Turns a `CodeWithInfos` object, that is basically a Lean syntax tree with information about what the identifiers mean, into an HTML object that links to as much information as possible. -/ -def infoFormatToHtml (i : CodeWithInfos) : HtmlM (Array Html) := +def infoFormatToHtml (i : CodeWithInfos) : HtmlM Unit := renderedCodeToHtml (renderTagged i) -def baseHtmlHeadDeclarations : BaseHtmlM (Array Html) := do - return #[ - , - , - , - , - , - - ] +def baseHtmlHeadDeclarations : BaseHtmlM Unit := do + () + () + () + () + () + () end DocGen4.Output diff --git a/DocGen4/Output/Class.lean b/DocGen4/Output/Class.lean index 521afc3c..34074d43 100644 --- a/DocGen4/Output/Class.lean +++ b/DocGen4/Output/Class.lean @@ -8,14 +8,13 @@ namespace Output open scoped DocGen4.Jsx open Lean -def classInstancesToHtml (className : Name) : HtmlM Html := do - pure -
    - Instances -
      -
      +def classInstancesToHtml (className : Name) : HtmlM Unit := do +
      + Instances +
        +
        -def classToHtml (i : Process.ClassInfo) : HtmlM (Array Html) := do +def classToHtml (i : Process.ClassInfo) : HtmlM Unit := do structureToHtml i end Output diff --git a/DocGen4/Output/ClassInductive.lean b/DocGen4/Output/ClassInductive.lean index 746b0054..06a70836 100644 --- a/DocGen4/Output/ClassInductive.lean +++ b/DocGen4/Output/ClassInductive.lean @@ -7,7 +7,7 @@ import DocGen4.Process namespace DocGen4 namespace Output -def classInductiveToHtml (i : Process.ClassInductiveInfo) : HtmlM (Array Html) := do +def classInductiveToHtml (i : Process.ClassInductiveInfo) : HtmlM Unit := do inductiveToHtml i end Output diff --git a/DocGen4/Output/Definition.lean b/DocGen4/Output/Definition.lean index c95c39cd..68bba962 100644 --- a/DocGen4/Output/Definition.lean +++ b/DocGen4/Output/Definition.lean @@ -8,8 +8,8 @@ namespace Output open scoped DocGen4.Jsx open Lean Widget -def equationToHtml (c : RenderedCode) : HtmlM Html := do - return
      • [← renderedCodeToHtml c]
      • +def equationToHtml (c : RenderedCode) : HtmlM Unit := do +
      • {renderedCodeToHtml c}
      • /-- Attempt to render all `simp` equations for this definition. At a size @@ -17,31 +17,23 @@ defined in `equationLimit` we stop trying since they: - are too ugly to read most of the time - take too long -/ -def equationsToHtml (i : Process.DefinitionInfo) : HtmlM (Array Html) := do +def equationsToHtml (i : Process.DefinitionInfo) : HtmlM Unit := do if let some eqs := i.equations then - let equationsHtml ← eqs.mapM equationToHtml if i.equationsWereOmitted then - return #[ -
        - Equations -
          -
        • One or more equations did not get rendered due to their size.
        • - [equationsHtml] -
        -
        - ] +
        + Equations +
          +
        • One or more equations did not get rendered due to their size.
        • + {eqs.forM equationToHtml} +
        +
        else - return #[ -
        - Equations -
          - [equationsHtml] -
        -
        - ] - else - return #[] +
        + Equations +
          + {eqs.forM equationToHtml} +
        +
        end Output end DocGen4 - diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index f757d2df..28762505 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -7,6 +7,9 @@ open Lean DocGen4.Process namespace DocGen4 namespace Output +open scoped DocGen4.Jsx +open DocGen4 (Raw escape) + /-- Auxiliary function for `splitAround`. -/ @[specialize] partial def splitAroundAux (s : String) (p : Char → Bool) (b i : String.Pos.Raw) (r : List String) : List String := if String.Pos.Raw.atEnd s i then @@ -153,25 +156,23 @@ def isLeanCode (lang : Array MD4Lean.AttrText) : Bool := /-- Automatically adds intra-documentation links for code content. -/ -def autoLinkInline (ss : Array String) : HtmlM (Array Html) := do - let mut result : Array Html := #[] +def autoLinkInline (ss : Array String) : HtmlM Unit := do for s in ss do let parts := splitAround s unicodeToSplit for part in parts do match ← nameToLink? part with | some link => - result := result.push <| Html.element "a" true #[("href", link)] #[Html.text part] + ({part}) | none => let sHead := part.dropEndWhile (· != '.') |>.copy let sTail := part.takeEndWhile (· != '.') |>.copy match ← nameToLink? sTail with | some link => if !sHead.isEmpty then - result := result.push <| Html.text sHead - result := result.push <| Html.element "a" true #[("href", link)] #[Html.text sTail] + Html.text sHead + ({sTail}) | none => - result := result.push <| Html.text part - return result + Html.text part where unicodeToSplit (c : Char) : Bool := -- separator (`Z`), other (`C`) @@ -183,25 +184,17 @@ mutual Renders a single `MD4Lean.Text` inline element to HTML, while processing custom extensions such as bibliography items. `inLink` suppresses auto-linking inside `` to avoid nested anchors. -/ -partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM (Array Html) := do +partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM Unit := do match t with - | .normal s => return #[Html.text s] - | .nullchar => return #[Html.raw "\uFFFD"] - | .br _ => return #[Html.raw "
        \n"] -- This avoids

        , which is incorrect HTML5 - | .softbr _ => return #[Html.raw "\n"] - | .entity s => return #[Html.raw s] - | .em ts => - let inner ← renderTexts ts funName inLink - return #[Html.element "em" true #[] inner] - | .strong ts => - let inner ← renderTexts ts funName inLink - return #[Html.element "strong" true #[] inner] - | .u ts => - let inner ← renderTexts ts funName inLink - return #[Html.element "u" true #[] inner] - | .del ts => - let inner ← renderTexts ts funName inLink - return #[Html.element "del" true #[] inner] + | .normal s => Html.text s + | .nullchar => Html.rawText "\uFFFD" + | .br _ => Html.rawText "
        \n" -- This avoids

        , which is incorrect HTML5 + | .softbr _ => Html.rawText "\n" + | .entity s => Html.rawText s + | .em ts => ({renderTexts ts funName inLink}) + | .strong ts => ({renderTexts ts funName inLink}) + | .u ts => ({renderTexts ts funName inLink}) + | .del ts => ({renderTexts ts funName inLink}) | .a href title _isAuto children => let hrefStr := attrTextToString href let titleStr := attrTextToString title @@ -210,138 +203,109 @@ partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := f match bibitem with | .some bibitem => let newBackref ← addBackref bibitem.citekey funName - let childrenHtml ← renderTexts children funName (inLink := true) let changeName : Bool := if let #[.normal s] := children then s == bibitem.citekey else false - let newChildren : Array Html := - if changeName then #[Html.text bibitem.tag] else childrenHtml - let mut attrs : Array (String × String) := #[("href", extHref)] - attrs := attrs.push ("title", bibitem.plaintext) - attrs := attrs.push ("id", s!"_backref_{newBackref.index}") - return #[Html.element "a" true attrs newChildren] + (
        + {if changeName then Html.text bibitem.tag + else renderTexts children funName (inLink := true)} + ) | .none => - let childrenHtml ← renderTexts children funName (inLink := true) let mut attrs : Array (String × String) := #[("href", extHref)] if !titleStr.isEmpty then attrs := attrs.push ("title", titleStr) - return #[Html.element "a" true attrs childrenHtml] + ({renderTexts children funName (inLink := true)}) | .img src title alt => - let srcStr := Html.escape (attrTextToString src) - let titleStr := Html.escape (attrTextToString title) + let srcStr := escape (attrTextToString src) + let titleStr := escape (attrTextToString title) let altTexts := alt.toList.map textToPlaintext - let altStr := Html.escape (String.join altTexts) + let altStr := escape (String.join altTexts) let mut s := s!"\"{altStr}\""" - return #[Html.raw s] + Html.rawText s | .code ss => - let inner ← if inLink then - pure #[Html.text (String.join ss.toList)] - else - autoLinkInline ss - return #[Html.element "code" true #[] inner] + ( + {if inLink then Html.text (String.join ss.toList) + else autoLinkInline ss} + ) -- Math is rendered with dollar signs because MathJax will later render them | .latexMath ss => let content := String.join ss.toList - return #[Html.raw s!"${Html.escape content}$"] + Html.rawText s!"${escape content}$" -- Math is rendered with dollar signs because MathJax will later render them | .latexMathDisplay ss => let content := String.join ss.toList - return #[Html.raw s!"$${Html.escape content}$$"] + Html.rawText s!"$${escape content}$$" | .wikiLink target children => - let inner ← renderTexts children funName inLink let targetStr := attrTextToString target - return #[Html.element "x-wikilink" true #[("data-target", targetStr)] inner] + Html.element "x-wikilink" #[("data-target", targetStr)] (renderTexts children funName inLink) /-- Render an array of `MD4Lean.Text` inline elements to HTML. -/ -partial def renderTexts (texts : Array MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM (Array Html) := do - let mut result : Array Html := #[] +partial def renderTexts (texts : Array MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM Unit := do for t in texts do - result := result ++ (← renderText t funName inLink) - return result + renderText t funName inLink /-- Render a single `MD4Lean.Block` element to HTML. -/ -partial def renderBlock (block : MD4Lean.Block) (funName : String) (tight : Bool := false) : HtmlM (Array Html) := do +partial def renderBlock (block : MD4Lean.Block) (funName : String) (tight : Bool := false) : HtmlM Unit := do match block with | .p texts => - let inner ← renderTexts texts funName if tight then - return inner + renderTexts texts funName else - return #[Html.element "p" true #[] inner] + (

        {renderTexts texts funName}

        ) | .ul isTight _mark items => - let mut lis : Array Html := #[] - for item in items do - let liHtml ← renderLi item funName isTight - lis := lis ++ liHtml - return #[Html.element "ul" true #[] lis] + (
          {items.forM fun item => renderLi item funName isTight}
        ) | .ol isTight start _mark items => - let mut lis : Array Html := #[] - for item in items do - let liHtml ← renderLi item funName isTight - lis := lis ++ liHtml let attrs : Array (String × String) := if start != 1 then #[("start", toString start)] else #[] - return #[Html.element "ol" true attrs lis] - | .hr => return #[Html.raw "
        \n"] + (
          {items.forM fun item => renderLi item funName isTight}
        ) + | .hr => Html.rawText "
        \n" | .header level texts => + -- Dynamic tag name requires Html.element let id := mdGetHeadingId texts - let inner ← renderTexts texts funName - let anchor := Html.element "a" true #[("class", "hover-link"), ("href", s!"#{id}")] #[Html.text "#"] - let children := inner.push (Html.text " ") |>.push anchor let tag := s!"h{level}" - return #[Html.element tag true #[("id", id), ("class", "markdown-heading")] children] + Html.element tag #[("id", id), ("class", "markdown-heading")] do + renderTexts texts funName + Html.text " " + (#) | .code _info lang _fenceChar content => - let langStr := attrTextToString lang let codeAttrs : Array (String × String) := + let langStr := attrTextToString lang if !langStr.isEmpty then #[("class", s!"language-{langStr}")] else #[] - let inner : Array Html ← - if isLeanCode lang then - autoLinkInline content - else - pure #[Html.text (String.join content.toList)] - let codeElem := Html.element "code" true codeAttrs inner - return #[Html.element "pre" true #[] #[codeElem]] + (
        
        +      {if isLeanCode lang then autoLinkInline content
        +       else Html.text (String.join content.toList)}
        +    
        ) | .html content => - return #[Html.raw (String.join content.toList)] + Html.rawText (String.join content.toList) | .blockquote blocks => - let mut inner : Array Html := #[] - for b in blocks do - inner := inner ++ (← renderBlock b funName) - return #[Html.element "blockquote" true #[] inner] + (
        {blocks.forM (renderBlock · funName)}
        ) | .table head body => - let mut headCells : Array Html := #[] - for cell in head do - let cellHtml ← renderTexts cell funName - headCells := headCells.push (Html.element "th" true #[] cellHtml) - let headRow := Html.element "tr" true #[] headCells - let thead := Html.element "thead" true #[] #[headRow] - let mut bodyRows : Array Html := #[] - for row in body do - let mut rowCells : Array Html := #[] - for cell in row do - let cellHtml ← renderTexts cell funName - rowCells := rowCells.push (Html.element "td" true #[] cellHtml) - bodyRows := bodyRows.push (Html.element "tr" true #[] rowCells) - let tbody := Html.element "tbody" true #[] bodyRows - return #[Html.element "table" true #[] #[thead, tbody]] + ( + + {head.forM fun cell => ()} + + + {body.forM fun row => + ({row.forM fun cell => ()})} + +
        {renderTexts cell funName}
        {renderTexts cell funName}
        ) /-- Render a list item to HTML. -/ -partial def renderLi (li : MD4Lean.Li MD4Lean.Block) (funName : String) (tight : Bool) : HtmlM (Array Html) := do - let mut inner : Array Html := #[] - if li.isTask then - let checked := li.taskChar == some 'x' || li.taskChar == some 'X' - if checked then - inner := inner.push (Html.raw "") - else - inner := inner.push (Html.raw "") - for b in li.contents do - inner := inner ++ (← renderBlock b funName tight) - return #[Html.element "li" true #[] inner] +partial def renderLi (li : MD4Lean.Li MD4Lean.Block) (funName : String) (tight : Bool) : HtmlM Unit := do + (
      • + {do if li.isTask then + let checked := li.taskChar == some 'x' || li.taskChar == some 'X' + if checked then + Html.rawText "" + else + Html.rawText ""} + {li.contents.forM fun b => renderBlock b funName tight} +
      • ) end @@ -361,8 +325,8 @@ partial def findAllReferences (refsMap : Std.HashMap String BibItem) (s : String else ret -/-- Convert docstring to Html. -/ -def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : HtmlM (Array Html) := do +/-- Convert docstring to Html, writing directly to stream. -/ +def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : HtmlM Unit := do let docString := match docString with | .inl md => md @@ -374,12 +338,12 @@ def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : let flags := MD4Lean.MD_DIALECT_GITHUB ||| MD4Lean.MD_FLAG_LATEXMATHSPANS ||| MD4Lean.MD_FLAG_NOHTML match MD4Lean.parse (docString ++ refsMarkdown) flags with | .some doc => - let mut result : Array Html := #[] for block in doc.blocks do - result := result ++ (← renderBlock block funName) - return result + renderBlock block funName | .none => addError <| "Error: failed to parse markdown:\n" ++ docString - return #[.raw "Error: failed to parse markdown: ", .text docString] + (Error: failed to parse markdown: ) + Html.text docString + end Output end DocGen4 diff --git a/DocGen4/Output/Find.lean b/DocGen4/Output/Find.lean index 726a1f88..e6437ac8 100644 --- a/DocGen4/Output/Find.lean +++ b/DocGen4/Output/Find.lean @@ -5,17 +5,17 @@ namespace Output open scoped DocGen4.Jsx open Lean +open DocGen4 (Raw) -def find : BaseHtmlM Html := do - pure - - - - - - - - +def find : BaseHtmlM Unit := do + + + + + + + + end Output end DocGen4 diff --git a/DocGen4/Output/FoundationalTypes.lean b/DocGen4/Output/FoundationalTypes.lean index 41ffdf2d..7f8419ce 100644 --- a/DocGen4/Output/FoundationalTypes.lean +++ b/DocGen4/Output/FoundationalTypes.lean @@ -5,8 +5,8 @@ namespace DocGen4.Output open scoped DocGen4.Jsx -def foundationalTypes : BaseHtmlM Html := templateLiftExtends (baseHtml "Foundational Types") do - pure <| +def foundationalTypes : BaseHtmlM Unit := do + baseHtmlGenerator "Foundational Types" do

        Foundational Types

        @@ -18,15 +18,15 @@ def foundationalTypes : BaseHtmlM Html := templateLiftExtends (baseHtml "Foundat

        Sort u

        Sort u is the type of types in Lean, and Sort u : Sort (u + 1).

        - {← instancesForToHtml `_builtin_sortu} + {instancesForToHtml `_builtin_sortu}

        Type u

        Type u is notation for Sort (u + 1).

        - {← instancesForToHtml `_builtin_typeu} + {instancesForToHtml `_builtin_typeu}

        Prop

        Prop is notation for Sort 0.

        - {← instancesForToHtml `_builtin_prop} + {instancesForToHtml `_builtin_prop}

        Pi types, {"(a : α) → β a"}

        The type of dependent functions is known as a pi type. diff --git a/DocGen4/Output/Index.lean b/DocGen4/Output/Index.lean index 95febccf..a6f14b23 100644 --- a/DocGen4/Output/Index.lean +++ b/DocGen4/Output/Index.lean @@ -11,8 +11,8 @@ namespace Output open scoped DocGen4.Jsx -def index : BaseHtmlM Html := do templateExtends (baseHtml "Index") <| - pure <| +def index : BaseHtmlM Unit := do + baseHtmlGenerator "Index" do

        Welcome to the documentation page

        diff --git a/DocGen4/Output/Inductive.lean b/DocGen4/Output/Inductive.lean index ca3ddd34..f3080252 100644 --- a/DocGen4/Output/Inductive.lean +++ b/DocGen4/Output/Inductive.lean @@ -9,33 +9,28 @@ namespace Output open scoped DocGen4.Jsx open Lean -def instancesForToHtml (typeName : Name) : BaseHtmlM Html := do - pure -
        - Instances For -
          -
          +def instancesForToHtml (typeName : Name) : BaseHtmlM Unit := do +
          + Instances For +
            +
            -def ctorToHtml (c : Process.ConstructorInfo) : HtmlM Html := do +def ctorToHtml (c : Process.ConstructorInfo) : HtmlM Unit := do let shortName := c.name.componentsRev.head!.toString let name := c.name.toString - let args ← c.args.mapM argToHtml + let args := c.args.forM argToHtml if let some doc := c.doc then - let renderedDoc ← docStringToHtml doc name - pure -
          • - {shortName} [args] {" : "} [← renderedCodeToHtml c.type] -
            [renderedDoc]
            -
          • + (
          • + {shortName} {args} {" : "} {renderedCodeToHtml c.type} +
            {docStringToHtml doc name}
            +
          • ) else - pure -
          • - {shortName} [args] {" : "} [← renderedCodeToHtml c.type] -
          • + (
          • + {shortName} {args} {" : "} {renderedCodeToHtml c.type} +
          • ) -def inductiveToHtml (i : Process.InductiveInfo) : HtmlM (Array Html) := do - let constructorsHtml :=
              [← i.ctors.toArray.mapM ctorToHtml]
            - return #[constructorsHtml] +def inductiveToHtml (i : Process.InductiveInfo) : HtmlM Unit := do +
              {i.ctors.forM ctorToHtml}
            end Output end DocGen4 diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index c6e4d3da..802e7a48 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -19,120 +19,115 @@ namespace Output open scoped DocGen4.Jsx open Lean Process +open DocGen4 (Raw) /-- Render the structures this structure extends from as HTML so it can be added to the top level. -/ -def structureInfoHeader (s : Process.StructureInfo) : HtmlM (Array Html) := do - let mut nodes := #[] +def structureInfoHeader (s : Process.StructureInfo) : HtmlM Unit := do if s.parents.size > 0 then - nodes := nodes.push extends - let mut parents := #[Html.text " "] + (extends) + Html.text " " for parent in s.parents, i in [0:s.parents.size] do if i > 0 then - parents := parents.push (Html.text ", ") - parents := parents ++ (← renderedCodeToHtml parent.type) - nodes := nodes ++ parents - return nodes + Html.text ", " + renderedCodeToHtml parent.type /-- Render the general header of a declaration containing its declaration type and name. -/ -def docInfoHeader (doc : DocInfo) : HtmlM Html := do - let mut nodes := #[] - nodes := nodes.push <| Html.element "span" false #[("class", "decl_kind")] #[doc.getKindDescription] - -- TODO: Can we inline if-then-else and avoid repeating here? - if doc.getSorried then - nodes := nodes.push {← declNameToHtmlBreakWithinLink doc.getName} - else - nodes := nodes.push {← declNameToHtmlBreakWithinLink doc.getName} - for arg in doc.getArgs do - nodes := nodes.push (← argToHtml arg) - - match doc with - | DocInfo.structureInfo i => nodes := nodes.append (← structureInfoHeader i) - | DocInfo.classInfo i => nodes := nodes.append (← structureInfoHeader i) - | _ => nodes := nodes - - nodes := nodes.push <| Html.element "span" true #[("class", "decl_args")] #[" :"] - nodes := nodes.push
            [← renderedCodeToHtml doc.getType]
            - return
            [nodes]
            +def docInfoHeader (doc : DocInfo) : HtmlM Unit := do +
            + {doc.getKindDescription} + {if doc.getSorried then + ( + {" "}{declNameToHtmlBreakWithinLink doc.getName}{" "} + ) + else + ( + {" "}{declNameToHtmlBreakWithinLink doc.getName}{" "} + )} + {doc.getArgs.forM argToHtml} + {match doc with + | DocInfo.structureInfo i => structureInfoHeader i + | DocInfo.classInfo i => structureInfoHeader i + | _ => pure ()} + : +
            {renderedCodeToHtml doc.getType}
            +
            /-- The main entry point for rendering a single declaration inside a given module. -/ -def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Html := do +def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Unit := do -- basic info like headers, types, structure fields, etc. - let docInfoHtml ← match doc with + let docInfoHtml : HtmlM Unit := match doc with | DocInfo.inductiveInfo i => inductiveToHtml i | DocInfo.structureInfo i => structureToHtml i | DocInfo.classInfo i => classToHtml i | DocInfo.classInductiveInfo i => classInductiveToHtml i - | _ => pure #[] - -- rendered doc stirng - let docStringHtml ← match doc.getDocString with + | _ => pure () + -- rendered doc string + let docStringHtml : HtmlM Unit := match doc.getDocString with | some s => docStringToHtml s doc.getName.toString - | none => pure #[] + | none => pure () -- extra information like equations and instances - let extraInfoHtml ← match doc with - | DocInfo.classInfo i => pure #[← classInstancesToHtml i.name] - | DocInfo.definitionInfo i => pure ((← equationsToHtml i) ++ #[← instancesForToHtml i.name]) + let extraInfoHtml : HtmlM Unit := match doc with + | DocInfo.classInfo i => do classInstancesToHtml i.name + | DocInfo.definitionInfo i => do equationsToHtml i; instancesForToHtml i.name | DocInfo.instanceInfo i => equationsToHtml i.toDefinitionInfo - | DocInfo.classInductiveInfo i => pure #[← classInstancesToHtml i.name] - | DocInfo.inductiveInfo i => pure #[← instancesForToHtml i.name] - | DocInfo.structureInfo i => pure #[← instancesForToHtml i.name] - | _ => pure #[] + | DocInfo.classInductiveInfo i => classInstancesToHtml i.name + | DocInfo.inductiveInfo i => instancesForToHtml i.name + | DocInfo.structureInfo i => instancesForToHtml i.name + | _ => pure () let attrs := doc.getAttrs - let attrsHtml := - if attrs.size > 0 then + let attrsHtml : HtmlM Unit := + if attrs.size > 0 then do let attrStr := "@[" ++ String.intercalate ", " doc.getAttrs.toList ++ "]" - #[Html.element "div" false #[("class", "attributes")] #[attrStr]] + (
            {attrStr}
            ) else - #[] + pure () -- custom decoration (e.g., verification badges from external tools) let decorator ← getDeclarationDecorator - let decoratorHtml := decorator module doc.getName doc.getKind let cssClass := "decl" ++ if doc.getSorried then " sorried" else "" - pure -
            -
            - - [decoratorHtml] - [attrsHtml] - {← docInfoHeader doc} - [docStringHtml] - [docInfoHtml] - [extraInfoHtml] + (
            +
            + + {decorator module doc.getName doc.getKind} + {attrsHtml} + {docInfoHeader doc} + {docStringHtml} + {docInfoHtml} + {extraInfoHtml}
            +
            ) /-- Rendering a module doc string, that is the ones with an ! after the opener as HTML. -/ -def modDocToHtml (mdoc : ModuleDoc) : HtmlM Html := do - pure -
            - [← docStringToHtml (.inl mdoc.doc) ""] -
            +def modDocToHtml (mdoc : ModuleDoc) : HtmlM Unit := do +
            + {docStringToHtml (.inl mdoc.doc) ""} +
            /-- Render a module member, that is either a module doc string or a declaration as HTML. -/ -def moduleMemberToHtml (module : Name) (member : ModuleMember) : HtmlM Html := do +def moduleMemberToHtml (module : Name) (member : ModuleMember) : HtmlM Unit := do match member with | ModuleMember.docInfo d => docInfoToHtml module d | ModuleMember.modDoc d => modDocToHtml d -def declarationToNavLink (module : Name) : Html := +def declarationToNavLink [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] (module : Name) : m Unit := do @@ -147,44 +142,49 @@ def getImports (module : Name) : HtmlM (Array Name) := do Sort the list of all modules this one is importing, linkify it and return the HTML. -/ -def importsHtml (moduleName : Name) : HtmlM (Array Html) := do +def importsHtml (moduleName : Name) : HtmlM Unit := do let imports := (← getImports moduleName).qsort Name.lt - imports.mapM (fun i => do return
          • {← moduleToHtmlLink i}
          • ) + for i in imports do +
          • {moduleToHtmlLink i}
          • /-- Render the internal nav bar (the thing on the right on all module pages). -/ -def internalNav (members : Array Name) (moduleName : Name) : HtmlM Html := do - pure - +def internalNav (members : Array Name) (moduleName : Name) : HtmlM Unit := do + /-- The main entry point to rendering the HTML for an entire module. -/ -def moduleToHtml (module : Process.Module) : HtmlM Html := withTheReader SiteBaseContext (setCurrentName module.name) do +def moduleToHtml (module : Process.Module) : HtmlM Unit := withTheReader SiteBaseContext (setCurrentName module.name) do let relevantMembers := module.members.filter Process.ModuleMember.shouldRender - let memberDocs ← relevantMembers.mapM (moduleMemberToHtml module.name) let memberNames := filterDocInfo relevantMembers |>.map DocInfo.getName - templateLiftExtends (baseHtmlGenerator module.name.toString) <| pure #[ - ← internalNav memberNames module.name, - Html.element "main" false #[] memberDocs - ] + let siteCtx ← readThe SiteContext + let stateRef ← IO.mkRef (← get) + liftM (baseHtmlGenerator module.name.toString do + runHtmlInBase (internalNav memberNames module.name) siteCtx stateRef + (
            + {relevantMembers.forM fun member => + runHtmlInBase (moduleMemberToHtml module.name member) siteCtx stateRef} +
            ) + : BaseHtmlM Unit) + set (← stateRef.get) end Output end DocGen4 diff --git a/DocGen4/Output/Navbar.lean b/DocGen4/Output/Navbar.lean index 61ad534b..50387fbd 100644 --- a/DocGen4/Output/Navbar.lean +++ b/DocGen4/Output/Navbar.lean @@ -13,96 +13,83 @@ namespace Output open Lean open scoped DocGen4.Jsx -def moduleListFile (file : Name) : BaseHtmlM Html := do - return
            +def moduleListFile (file : Name) : BaseHtmlM Unit := do + let cls := if (← getCurrentName) == file then "nav_link visible" else "nav_link" + ( +
            ) /-- Build the HTML tree representing the module hierarchy. -/ -partial def moduleListDir (h : Hierarchy) : BaseHtmlM Html := do +partial def moduleListDir (h : Hierarchy) : BaseHtmlM Unit := do let children := Array.mk (h.getChildren.toList.map Prod.snd) let dirs := children.filter (fun c => c.getChildren.toList.length != 0) let files := children.filter (fun c => Hierarchy.isFile c && c.getChildren.toList.length = 0) |>.map Hierarchy.getName - let dirNodes ← dirs.mapM moduleListDir - let fileNodes ← files.mapM moduleListFile let moduleLink ← moduleNameToLink h.getName - let summary ← do - if h.isFile then - pure {s!"{h.getName.getString!} ("}file) + let openAttr := if (← getCurrentName).any (h.getName.isPrefixOf ·) then #[("open", "")] else #[] + let detailsAttrs := #[("class", "nav_sect"), ("data-path", moduleLink)] ++ openAttr + (
            + {if h.isFile then + ({s!"{h.getName.getString!} ("}file)) else - pure {h.getName.getString!} - pure - + ({h.getName.getString!})} + {dirs.forM moduleListDir} + {files.forM moduleListFile} +
            ) /-- Return a list of top level modules, linkified and rendered as HTML -/ -def moduleList : BaseHtmlM Html := do +def moduleList : BaseHtmlM Unit := do let hierarchy ← getHierarchy - let mut list := Array.empty - for (_, cs) in hierarchy.getChildren do - list := list.push <| ← moduleListDir cs - return
            [list]
            + (
            + {do for (_, cs) in hierarchy.getChildren do + moduleListDir cs} +
            ) /-- The main entry point to rendering the navbar on the left hand side. -/ -def navbar : BaseHtmlM Html := do - /- - TODO: Add these in later - - - - - -/ - let mut staticPages : Array Html := #[ - , - , - , - ] - let config ← read - if not config.refs.isEmpty then - staticPages := staticPages.push - pure - - - [← baseHtmlHeadDeclarations] +def navbar : BaseHtmlM Unit := do + ( + + {baseHtmlHeadDeclarations} - - - - + + + + - -
            diff --git a/DocGen4/Output/Structure.lean b/DocGen4/Output/Structure.lean index 7bf513cd..4acb21b2 100644 --- a/DocGen4/Output/Structure.lean +++ b/DocGen4/Output/Structure.lean @@ -12,49 +12,48 @@ open Lean /-- Render a single field consisting of its documentation, its name and its type as HTML. -/ -def fieldToHtml (f : Process.FieldInfo) : HtmlM Html := do +def fieldToHtml (f : Process.FieldInfo) : HtmlM Unit := do let shortName := f.name.componentsRev.head!.toString let name := f.name.toString - let args ← f.args.mapM argToHtml if f.isDirect then - let doc : Array HTML ← - if let some doc := f.doc then - let renderedDoc ← docStringToHtml doc name - pure #[
            [renderedDoc]
            ] - else - pure #[] - pure -
          • -
            {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
            - [doc] -
          • + if let some doc := f.doc then + (
          • +
            + {shortName} {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} +
            +
            {docStringToHtml doc name}
            +
          • ) + else + (
          • +
            + {shortName} {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} +
            +
          • ) else - pure -
          • -
            {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
            -
          • + (
          • +
            + {shortName} + {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} +
            +
          • ) /-- Render all information about a structure as HTML. -/ -def structureToHtml (i : Process.StructureInfo) : HtmlM (Array Html) := do - let structureHtml ← do - if Name.isSuffixOf `mk i.ctor.name then - pure -
              - [← i.fieldInfo.mapM fieldToHtml] -
            - else - let ctorShortName := i.ctor.name.componentsRev.head!.toString - pure -
              -
            • {s!"{ctorShortName} "} :: (
            • -
                - [← i.fieldInfo.mapM fieldToHtml] -
              -
            • )
            • -
            - return #[structureHtml] +def structureToHtml (i : Process.StructureInfo) : HtmlM Unit := do + if Name.isSuffixOf `mk i.ctor.name then + (
              + {i.fieldInfo.forM fieldToHtml} +
            ) + else + let ctorShortName := i.ctor.name.componentsRev.head!.toString + (
              +
            • {s!"{ctorShortName} "} :: (
            • +
                + {i.fieldInfo.forM fieldToHtml} +
              +
            • )
            • +
            ) end Output end DocGen4 diff --git a/DocGen4/Output/Tactics.lean b/DocGen4/Output/Tactics.lean index a3917be6..695674cd 100644 --- a/DocGen4/Output/Tactics.lean +++ b/DocGen4/Output/Tactics.lean @@ -9,36 +9,37 @@ import DocGen4.Output.Module namespace DocGen4.Process open scoped DocGen4.Jsx -open DocGen4 Output Lean +open DocGen4 (Raw) +open DocGen4.Output +open Lean /-- Render the HTML for a single tactic. -/ -def TacticInfo.docStringToHtml (tac : TacticInfo MarkdownDocstring) : Output.HtmlM (TacticInfo Html) := do - return { - tac with - docString :=

            [← Output.docStringToHtml (.inl tac.docString) tac.internalName.toString]

            - } +def TacticInfo.docStringToHtml (tac : TacticInfo MarkdownDocstring) : Output.HtmlM (TacticInfo String) := do + let captured ← Output.captureHtml do +

            {Output.docStringToHtml (.inl tac.docString) tac.internalName.toString}

            + return { tac with docString := captured } /-- Render the HTML for a single tactic. -/ -def TacticInfo.toHtml (tac : TacticInfo Html) : Output.BaseHtmlM Html := do +def TacticInfo.toHtml (tac : TacticInfo String) : Output.BaseHtmlM Unit := do let internalName := tac.internalName.toString let defLink := (← moduleNameToLink tac.definingModule) ++ "#" ++ internalName let tags := ", ".intercalate (tac.tags.map (·.toString)).qsort.toList - return
            + (

            {tac.userName}

            - {tac.docString} + {Raw.mk tac.docString}
            Tags:
            {tags}
            Defined in module:
            {tac.definingModule.toString}
            -
            +
            ) -def TacticInfo.navLink (tac : TacticInfo α) : Html := +def TacticInfo.navLink (tac : TacticInfo α) : Output.BaseHtmlM Unit := do

            {tac.userName}

            end DocGen4.Process @@ -47,23 +48,23 @@ namespace DocGen4.Output open scoped DocGen4.Jsx open Lean Process +open DocGen4 (Raw) /-- Render the HTML for the tactics listing page. -/ -def tactics (tacticInfo : Array (TacticInfo Html)) : BaseHtmlM Html := do - let sectionsHtml ← tacticInfo.mapM (· |>.toHtml) - templateLiftExtends (baseHtmlGenerator "Tactics") <| pure #[ +def tactics (tacticInfo : Array (TacticInfo String)) : BaseHtmlM Unit := do + baseHtmlGenerator "Tactics" do , - Html.element "main" false #[] ( - #[

            The tactic language is a special-purpose programming language for constructing proofs, indicated using the keyword by.

            ] ++ - sectionsHtml) - ] + {tacticInfo.forM (·.navLink)} + + (
            +

            The tactic language is a special-purpose programming language for constructing proofs, indicated using the keyword by.

            + {tacticInfo.forM (·.toHtml)} +
            ) -def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo Html)) := do +def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo String)) := do let mut result : Array (TacticInfo _) := #[] for entry in ← System.FilePath.readDir (declarationsBasePath buildDir) do if entry.fileName.startsWith "tactics-" && entry.fileName.endsWith ".json" then @@ -83,7 +84,7 @@ def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo Html)) This `abbrev` exists as a type-checking wrapper around `toJson`, ensuring `loadTacticsJSON` gets objects in the expected format. -/ -abbrev saveTacticsJSON (fileName : System.FilePath) (tacticInfo : Array (TacticInfo Html)) : IO Unit := do +abbrev saveTacticsJSON (fileName : System.FilePath) (tacticInfo : Array (TacticInfo String)) : IO Unit := do if tacticInfo.size > 0 then IO.FS.writeFile fileName (toString (toJson tacticInfo)) diff --git a/DocGen4/Output/Template.lean b/DocGen4/Output/Template.lean index 921100fd..5b030331 100644 --- a/DocGen4/Output/Template.lean +++ b/DocGen4/Output/Template.lean @@ -10,61 +10,52 @@ namespace DocGen4 namespace Output open scoped DocGen4.Jsx +open DocGen4 (Raw) /-- The HTML template used for all pages. -/ -def baseHtmlGenerator (title : String) (site : Array Html) : BaseHtmlM Html := do - let moduleConstant := - if let some module := ← getCurrentName then - #[] - else - #[] - pure - - - [← baseHtmlHeadDeclarations] - - {title} - - - - - - [moduleConstant] - - - - - - - - - - - - -
            -

            Documentation

            -

            [breakWithin title]

            -
            - {.raw " "} - -
            -
            - - [site] - - - - - -/-- -A comfortability wrapper around `baseHtmlGenerator`. --/ -def baseHtml (title : String) (site : Html) : BaseHtmlM Html := baseHtmlGenerator title #[site] +def baseHtmlGenerator (title : String) (writeContent : BaseHtmlM Unit) : BaseHtmlM Unit := do + ( + + {baseHtmlHeadDeclarations} + + {title} + + + + + + {do if let some module := ← getCurrentName then + ()} + + + + + + + + + + + + +
            +

            Documentation

            +

            {breakWithin title}

            +
            + {Raw.mk " "} + +
            +
            + + {writeContent} + + + + ) end Output end DocGen4 diff --git a/DocGen4/Output/ToHtmlFormat.lean b/DocGen4/Output/ToHtmlFormat.lean index 91b8abdf..7bdcd92a 100644 --- a/DocGen4/Output/ToHtmlFormat.lean +++ b/DocGen4/Output/ToHtmlFormat.lean @@ -7,30 +7,19 @@ Authors: Wojciech Nawrocki, Sebastian Ullrich, Henrik Böving import Lean.Data.Json import Lean.Data.Xml import Lean.Parser +import Lean.Elab.Term /-! This module defines: -- a representation of HTML trees -- together with a JSX-like DSL for writing them -- and widget support for visualizing any type as HTML. -/ +- streaming HTML write helpers +- a JSX-like DSL that expands to monadic writes -/ namespace DocGen4 open Lean -inductive Html where - -- TODO(WN): it's nameless for shorter JSON; re-add names when we have deriving strategies for From/ToJson - -- element (tag : String) (flatten : Bool) (attrs : Array HtmlAttribute) (children : Array Html) - | element : String → Bool → Array (String × String) → Array Html → Html - /-- A text node, which will be escaped in the output -/ - | text : String → Html - /-- An arbitrary string containing HTML -/ - | raw : String → Html - deriving Repr, BEq, Inhabited, FromJson, ToJson - -instance : Coe String Html := - ⟨Html.text⟩ - -namespace Html +/-- A raw HTML string that should not be escaped. -/ +structure Raw where + html : String def escapePairs : Array (String × String) := #[ @@ -43,11 +32,15 @@ def escapePairs : Array (String × String) := def escape (s : String) : String := escapePairs.foldl (fun acc (o, r) => acc.replace o r) s +namespace Html +def escape := @DocGen4.escape +end Html + -- TODO: remove the following 3 functions -- once is fixed def _root_.Lean.Xml.Attributes.toStringEscaped (as : Xml.Attributes) : String := - as.foldl (fun s n v => s ++ s!" {n}=\"{Html.escape v}\"") "" + as.foldl (fun s n v => s ++ s!" {n}=\"{DocGen4.escape v}\"") "" mutual @@ -57,7 +50,7 @@ partial def _root_.Lean.Xml.eToStringEscaped : Xml.Element → String partial def _root_.Lean.Xml.cToStringEscaped : Xml.Content → String | .Element e => eToStringEscaped e | .Comment c => s!"" -| .Character c => Html.escape c +| .Character c => DocGen4.escape c end @@ -73,31 +66,6 @@ partial def _root_.Lean.Xml.cToPlaintext : Xml.Content → String end -def attributesToString (attrs : Array (String × String)) :String := - attrs.foldl (fun acc (k, v) => acc ++ " " ++ k ++ "=\"" ++ escape v ++ "\"") "" - --- TODO: Termination proof -partial def toStringAux : Html → String -| element tag false attrs #[text s] => s!"<{tag}{attributesToString attrs}>{escape s}\n" -| element tag false attrs #[raw s] => s!"<{tag}{attributesToString attrs}>{s}\n" -| element tag false attrs #[child] => s!"<{tag}{attributesToString attrs}>\n{child.toStringAux}\n" -| element tag false attrs children => s!"<{tag}{attributesToString attrs}>\n{children.foldl (· ++ toStringAux ·) ""}\n" -| element tag true attrs children => s!"<{tag}{attributesToString attrs}>{children.foldl (· ++ toStringAux ·) ""}" -| text s => escape s -| raw s => s - -def toString (html : Html) : String := - html.toStringAux.trimAsciiEnd.copy - -partial def textLength : Html → Nat -| raw s => s.length -- measures lengths of escape sequences too! -| text s => s.length -| element _ _ _ children => - let lengths := children.map textLength - lengths.foldl Nat.add 0 - -end Html - namespace Jsx open Parser PrettyPrinter @@ -149,35 +117,51 @@ def translateAttrs (attrs : Array (TSyntax `DocGen4.Jsx.jsxAttr)) : MacroM (TSyn | _ => Macro.throwUnsupported return as -private def htmlHelper (n : Syntax) (children : Array Syntax) (m : Syntax) : MacroM (String × (TSyntax `term)):= do - unless n.getId == m.getId do - withRef m <| Macro.throwError s!"Leading and trailing part of tags don't match: '{n}', '{m}'" - let mut cs ← `(#[]) - for child in children do - cs ← match child with - | `(jsxChild|$t:jsxText) => `(($cs).push (Html.text $(quote t.raw[0]!.getAtomVal))) - -- TODO(WN): elab as list of children if type is `t Html` where `Foldable t` - | `(jsxChild|{$t}) => `(($cs).push ($t : Html)) - | `(jsxChild|[$t]) => `($cs ++ ($t : Array Html)) - | `(jsxChild|$e:jsxElement) => `(($cs).push ($e:jsxElement : Html)) - | _ => Macro.throwUnsupported - let tag := toString n.getId - pure <| (tag, cs) +private def mkN (s : String) : Lean.Ident := + mkIdent (`DocGen4 ++ `Output ++ Name.mkSimple s) + +private def mkApp (fn : Ident) (args : Array (TSyntax `term)) : MacroM (TSyntax `term) := do + args.foldlM (fun acc arg => `($acc $arg)) (fn : TSyntax `term) macro_rules | `(<$n $attrs* />) => do - let kind := quote (toString n.getId) - let attrs ← translateAttrs attrs - `(Html.element $kind true $attrs #[]) + let tag : TSyntax `term := quote (toString n.getId) + let atSyn ← translateAttrs attrs + let pot := mkN "putOpenTag" + let pct := mkN "putCloseTag" + let openCall ← mkApp pot #[tag, atSyn] + let closeCall ← mkApp pct #[tag] + `(do ($openCall); ($closeCall)) | `(<$n $attrs* >$children*) => do - let (tag, children) ← htmlHelper n children m - `(Html.element $(quote tag) true $(← translateAttrs attrs) $children) + unless n.getId == m.getId do + withRef m <| Macro.throwError s!"Leading and trailing part of tags don't match: '{n}', '{m}'" + let atSyn ← translateAttrs attrs + let tag : TSyntax `term := quote (toString n.getId) + let pot := mkN "putOpenTag" + let pct := mkN "putCloseTag" + let pe := mkN "putEscaped" + let openCall ← mkApp pot #[tag, atSyn] + let closeCall ← mkApp pct #[tag] + let mut stmts : Array (TSyntax `Lean.Parser.Term.doSeqItem) := #[] + stmts := stmts.push (← `(Lean.Parser.Term.doSeqItem| ($openCall))) + for child in children do + let stmt ← match child with + | `(jsxChild|$t:jsxText) => + let s : TSyntax `term := quote t.raw[0]!.getAtomVal + let call ← mkApp pe #[s] + `(Lean.Parser.Term.doSeqItem| ($call)) + | `(jsxChild|{$t}) => + `(Lean.Parser.Term.doSeqItem| ($t)) + | `(jsxChild|[$t]) => + `(Lean.Parser.Term.doSeqItem| for _x in ($t : Array _) do _x) + | `(jsxChild|$e:jsxElement) => + `(Lean.Parser.Term.doSeqItem| $e:jsxElement) + | _ => Macro.throwUnsupported + stmts := stmts.push stmt + stmts := stmts.push (← `(Lean.Parser.Term.doSeqItem| ($closeCall))) + `(do $stmts*) -end Jsx -/-- A type which implements `ToHtmlFormat` will be visualized -as the resulting HTML in editors which support it. -/ -class ToHtmlFormat (α : Type u) where - formatHtml : α → Html +end Jsx end DocGen4 diff --git a/DocGen4/Output/ToJson.lean b/DocGen4/Output/ToJson.lean index 5dd0e23e..2d103320 100644 --- a/DocGen4/Output/ToJson.lean +++ b/DocGen4/Output/ToJson.lean @@ -107,8 +107,7 @@ def JsonIndex.addModule (index : JsonIndex) (module : JsonModule) : BaseHtmlM Js | some i => pure i | none => let impLink ← moduleNameToLink (String.toName imp) - let indexedModule := { url := impLink, importedBy := #[] } - pure indexedModule + pure { url := impLink, importedBy := #[] } index := { index with modules := index.modules.insert @@ -124,7 +123,7 @@ def DocInfo.toJson (sourceLinker : Option DeclarationRange → String) (info : P let docLink ← declNameToLink info.getName let sourceLink := sourceLinker info.getDeclarationRange let line := info.getDeclarationRange.pos.line - let header := (← docInfoHeader info).toString + let header ← captureHtml (docInfoHeader info) let info := { name, kind, doc, docLink, sourceLink, line } return { info, header } diff --git a/Main.lean b/Main.lean index e3855309..dd583d72 100644 --- a/Main.lean +++ b/Main.lean @@ -84,7 +84,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Add `references` pseudo-module to hierarchy since references.html is always generated let hierarchy := Hierarchy.fromArray (targetModules.push `references) - let baseConfig ← getSimpleBaseContext buildDir hierarchy + let baseConfig ← getSimpleBaseConfig buildDir hierarchy -- Parallel HTML generation let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) From 5e87fd81b4ef80a4ae46e6e0e2f1c1871761ef0c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 10:36:31 +0100 Subject: [PATCH 061/106] Informative exceptions --- DocGen4/Output.lean | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index c9cd36db..8259c980 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -207,8 +207,15 @@ def headerDataOutput (buildDir : System.FilePath) : IO Unit := do for entry in ← System.FilePath.readDir (declarationsBasePath buildDir) do if entry.fileName.startsWith "declaration-data-" && entry.fileName.endsWith ".bmp" then let fileContent ← FS.readFile entry.path - let .ok jsonContent := Json.parse fileContent | unreachable! - let .ok (module : JsonModule) := fromJson? jsonContent | unreachable! + let jsonContent ← + match Json.parse fileContent with + | .ok c => pure c + | .error e => throw <| IO.userError s!"failed to parse JSON in {entry.path}: {e}" + let (module : JsonModule) ← + match fromJson? jsonContent with + | .ok v => pure v + | .error e => + throw <| IO.userError s!"failed to deserialize JsonModule from {entry.path}: {e}" headerIndex := headerIndex.addModule module let finalHeaderJson := toJson headerIndex From 76baef3c29ee933a39ed0c643635ff2f2b11e5bd Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 10:37:31 +0100 Subject: [PATCH 062/106] DB simplifications --- DocGen4/DB.lean | 51 +++++++++++++------------------------------- DocGen4/DB/Read.lean | 27 +++++++++-------------- 2 files changed, 25 insertions(+), 53 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index cedbb8c0..9d0bfc33 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -33,8 +33,6 @@ structure DB extends ReadOps where saveStructureParent (modName : String) (position : Int64) (sequence : Int32) (projectionFn : String) (type : RenderedCode) : IO Unit saveStructureField (modName : String) (position : Int64) (sequence : Int64) (projName : String) (type : RenderedCode) (isDirect : Bool) : IO Unit saveStructureFieldArg (modName : String) (position : Int64) (fieldSeq : Int64) (argSeq : Int64) (binder : RenderedCode) (isImplicit : Bool) : IO Unit - saveArg (modName : String) (position : Int64) (sequence : Int64) (binder : RenderedCode) (isImplicit : Bool) : IO Unit - saveAttr (modName : String) (position : Int64) (sequence : Int64) (attr : String) : IO Unit /-- Save an internal name (like a recursor) that should link to its target declaration -/ saveInternalName (name : Lean.Name) (targetModule : String) (targetPosition : Int64) : IO Unit /-- Save a tactic defined in this module -/ @@ -118,8 +116,8 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16 run saveDeclarationRangeStmt let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?)" - let saveArgStmt' ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" - let saveAttrStmt' ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" + let saveArgStmt ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" + let saveAttrStmt ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" let saveInfo modName position kind (info : Process.Info) := withDbContext "write:insert:name_info" do saveInfoStmt.bind 1 modName saveInfoStmt.bind 2 position @@ -136,22 +134,22 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do -- Save args for h : j in 0...info.args.size do let arg := info.args[j] - withDbContext "write:insert:declaration_args:info" do - saveArgStmt'.bind 1 modName - saveArgStmt'.bind 2 position - saveArgStmt'.bind 3 j.toInt64 - saveArgStmt'.bind 4 arg.binder - saveArgStmt'.bind 5 arg.implicit - run saveArgStmt' + withDbContext "write:insert:declaration_args" do + saveArgStmt.bind 1 modName + saveArgStmt.bind 2 position + saveArgStmt.bind 3 j.toInt64 + saveArgStmt.bind 4 arg.binder + saveArgStmt.bind 5 arg.implicit + run saveArgStmt -- Save attrs for h : j in 0...info.attrs.size do let attr := info.attrs[j] - withDbContext "write:insert:declaration_attrs:info" do - saveAttrStmt'.bind 1 modName - saveAttrStmt'.bind 2 position - saveAttrStmt'.bind 3 j.toInt64 - saveAttrStmt'.bind 4 attr - run saveAttrStmt' + withDbContext "write:insert:declaration_attrs" do + saveAttrStmt.bind 1 modName + saveAttrStmt.bind 2 position + saveAttrStmt.bind 3 j.toInt64 + saveAttrStmt.bind 4 attr + run saveAttrStmt let saveAxiomStmt ← sqlite.prepare "INSERT INTO axioms (module_name, position, is_unsafe) VALUES (?, ?, ?)" let saveAxiom modName position isUnsafe := withDbContext "write:insert:axioms" do saveAxiomStmt.bind 1 modName @@ -260,21 +258,6 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveStructureFieldArgStmt.bind 5 binder saveStructureFieldArgStmt.bind 6 isImplicit run saveStructureFieldArgStmt - let saveArgStmt ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" - let saveArg modName position sequence (binder : RenderedCode) isImplicit := withDbContext "write:insert:declaration_args" do - saveArgStmt.bind 1 modName - saveArgStmt.bind 2 position - saveArgStmt.bind 3 sequence - saveArgStmt.bind 4 binder - saveArgStmt.bind 5 isImplicit - run saveArgStmt - let saveAttrStmt ← sqlite.prepare "INSERT INTO declaration_attrs (module_name, position, sequence, attr) VALUES (?, ?, ?, ?)" - let saveAttr modName position sequence attr := withDbContext "write:insert:declaration_attrs" do - saveAttrStmt.bind 1 modName - saveAttrStmt.bind 2 position - saveAttrStmt.bind 3 sequence - saveAttrStmt.bind 4 attr - run saveAttrStmt -- For saving minimal info to name_info for name lookups only (not rendering) let saveNameOnlyStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, 0, 0)" let saveNameOnly modName position kind (name : Lean.Name) (type : RenderedCode) (declRange : Lean.DeclarationRange) := withDbContext "write:insert:name_info:nameonly" do @@ -330,8 +313,6 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveStructureParent, saveStructureField, saveStructureFieldArg, - saveArg, - saveAttr, saveNameOnly, saveInternalName, saveTactic @@ -381,8 +362,6 @@ def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB saveStructureParent := fun _ _ _ _ _ => readonlyError, saveStructureField := fun _ _ _ _ _ _ => readonlyError, saveStructureFieldArg := fun _ _ _ _ _ _ => readonlyError, - saveArg := fun _ _ _ _ _ => readonlyError, - saveAttr := fun _ _ _ _ => readonlyError, saveInternalName := fun _ _ _ => readonlyError, saveTactic := fun _ _ => readonlyError, getModuleNames := readOps.getModuleNames, diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 8b2b5046..c2038870 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -379,7 +379,7 @@ where return some <| .opaqueInfo { toInfo := info, definitionSafety := safety } done s.readOpaqueStmt return none - readDefinition (info : Process.Info) : IO (Option Process.DocInfo) := do + readDefinitionData : IO (Option (Bool × ReducibilityHints × Bool × Option (Array RenderedCode) × Bool)) := do s.readDefinitionStmt.bind 1 moduleName s.readDefinitionStmt.bind 2 position if (← s.readDefinitionStmt.step) then @@ -392,30 +392,23 @@ where | "abbrev" => .abbrev | s => .regular (s.toNat?.getD 0 |>.toUInt32) let (equations, equationsWereOmitted) ← s.loadEquations moduleName position - return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable } + return some (isUnsafe, hints, isNonComputable, equations, equationsWereOmitted) done s.readDefinitionStmt return none + readDefinition (info : Process.Info) : IO (Option Process.DocInfo) := do + let some (isUnsafe, hints, isNonComputable, equations, equationsWereOmitted) ← readDefinitionData + | return none + return some <| .definitionInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable } readInstance (info : Process.Info) : IO (Option Process.DocInfo) := do s.readInstanceStmt.bind 1 moduleName s.readInstanceStmt.bind 2 position if (← s.readInstanceStmt.step) then let className := (← s.readInstanceStmt.columnText 0).toName done s.readInstanceStmt - s.readDefinitionStmt.bind 1 moduleName - s.readDefinitionStmt.bind 2 position - if (← s.readDefinitionStmt.step) then - let isUnsafe := (← s.readDefinitionStmt.columnInt64 0) != 0 - let hintsStr ← s.readDefinitionStmt.columnText 1 - let isNonComputable := (← s.readDefinitionStmt.columnInt64 2) != 0 - done s.readDefinitionStmt - let hints : ReducibilityHints := match hintsStr with - | "opaque" => .opaque - | "abbrev" => .abbrev - | s => .regular (s.toNat?.getD 0 |>.toUInt32) - let (equations, equationsWereOmitted) ← s.loadEquations moduleName position - let typeNames ← s.loadInstanceArgs moduleName position - return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable, className, typeNames } - done s.readDefinitionStmt + let some (isUnsafe, hints, isNonComputable, equations, equationsWereOmitted) ← readDefinitionData + | return none + let typeNames ← s.loadInstanceArgs moduleName position + return some <| .instanceInfo { toInfo := info, isUnsafe, hints, equations, equationsWereOmitted, isNonComputable, className, typeNames } else done s.readInstanceStmt return none From a706e59c4dc8401e6fd6160da839e9895465eced Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 10:37:52 +0100 Subject: [PATCH 063/106] Revert "experiment: emit HTML directly rather than allocating trees and strings" This reverts commit 7c939bd6a55741c46df6842c2e51fefc07c005fa. It wasn't meaningfully faster. --- DocGen4/Output.lean | 97 ++++---- DocGen4/Output/Arg.lean | 13 +- DocGen4/Output/Base.lean | 329 ++++++++++---------------- DocGen4/Output/Class.lean | 13 +- DocGen4/Output/ClassInductive.lean | 2 +- DocGen4/Output/Definition.lean | 40 ++-- DocGen4/Output/DocString.lean | 194 ++++++++------- DocGen4/Output/Find.lean | 20 +- DocGen4/Output/FoundationalTypes.lean | 10 +- DocGen4/Output/Index.lean | 4 +- DocGen4/Output/Inductive.lean | 37 +-- DocGen4/Output/Module.lean | 184 +++++++------- DocGen4/Output/Navbar.lean | 121 +++++----- DocGen4/Output/NotFound.lean | 4 +- DocGen4/Output/References.lean | 44 ++-- DocGen4/Output/Search.lean | 7 +- DocGen4/Output/Structure.lean | 69 +++--- DocGen4/Output/Tactics.lean | 45 ++-- DocGen4/Output/Template.lean | 93 ++++---- DocGen4/Output/ToHtmlFormat.lean | 120 ++++++---- DocGen4/Output/ToJson.lean | 5 +- Main.lean | 2 +- 22 files changed, 726 insertions(+), 727 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 8259c980..3855982f 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -39,29 +39,24 @@ def collectBackrefs (buildDir : System.FilePath) : IO (Array BackrefItem) := do | .ok (arr : Array BackrefItem) => backrefs := backrefs ++ arr return backrefs -def htmlOutputSetup (config : SiteBaseConfig) : IO Unit := do - let bp := basePath config.buildDir - let findBasePath := bp / "find" +def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do + let findBasePath (buildDir : System.FilePath) := basePath buildDir / "find" -- Base structure - FS.createDirAll bp - FS.createDirAll findBasePath + FS.createDirAll <| basePath config.buildDir + FS.createDirAll <| findBasePath config.buildDir FS.createDirAll <| srcBasePath config.buildDir FS.createDirAll <| declarationsBasePath config.buildDir - -- HTML pages written directly to files - let run action path := runHtmlToFile action config path - run index (bp / "index.html") - run notFound (bp / "404.html") - run foundationalTypes (bp / "foundational_types.html") - run navbar (bp / "navbar.html") - run search (bp / "search.html") - run (references (← collectBackrefs config.buildDir)) (bp / "references.html") - run (tactics (← loadTacticsJSON config.buildDir)) (bp / "tactics.html") - runHtmlToFile find { config with depthToRoot := 1 } (findBasePath / "index.html") - - -- Static assets - let staticFiles := #[ + -- All the doc-gen static stuff + let indexHtml := ReaderT.run index config |>.toString + let notFoundHtml := ReaderT.run notFound config |>.toString + let foundationalTypesHtml := ReaderT.run foundationalTypes config |>.toString + let navbarHtml := ReaderT.run navbar config |>.toString + let searchHtml := ReaderT.run search config |>.toString + let referencesHtml := ReaderT.run (references (← collectBackrefs config.buildDir)) config |>.toString + let tacticsHtml := ReaderT.run (tactics (← loadTacticsJSON config.buildDir)) config |>.toString + let docGenStatic := #[ ("style.css", styleCss), ("favicon.svg", faviconSvg), ("declaration-data.js", declarationDataCenterJs), @@ -70,14 +65,28 @@ def htmlOutputSetup (config : SiteBaseConfig) : IO Unit := do ("jump-src.js", jumpSrcJs), ("expand-nav.js", expandNavJs), ("how-about.js", howAboutJs), + ("search.html", searchHtml), ("search.js", searchJs), ("mathjax-config.js", mathjaxConfigJs), ("instances.js", instancesJs), ("importedBy.js", importedByJs), + ("index.html", indexHtml), + ("foundational_types.html", foundationalTypesHtml), + ("404.html", notFoundHtml), + ("navbar.html", navbarHtml), + ("references.html", referencesHtml), + ("tactics.html", tacticsHtml), + ] + for (fileName, content) in docGenStatic do + FS.writeFile (basePath config.buildDir / fileName) content + + let findHtml := ReaderT.run find { config with depthToRoot := 1 } |>.toString + let findStatic := #[ + ("index.html", findHtml), + ("find.js", findJs) ] - for (fileName, content) in staticFiles do - FS.writeFile (bp / fileName) content - FS.writeFile (findBasePath / "find.js") findJs + for (fileName, content) in findStatic do + FS.writeFile (findBasePath config.buildDir / fileName) content /-- Custom source linker type: given an optional source URL and module name, returns a function from declaration range to URL -/ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → String @@ -86,7 +95,7 @@ abbrev SourceLinkerFn := Option String → Name → Option DeclarationRange → Each task loads its module from DB, renders HTML, and writes output files. The linking context provides cross-module linking without loading all module data upfront. When `targetModules` is provided, only those modules are rendered (but linking uses all modules). -/ -def htmlOutputResultsParallel (baseConfig : SiteBaseConfig) (dbPath : System.FilePath) +def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.FilePath) (linkCtx : LinkingContext) (targetModules : Array Name := linkCtx.moduleNames) (sourceLinker? : Option SourceLinkerFn := none) @@ -116,26 +125,21 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseConfig) (dbPath : System.Fil -- path: 'basePath/module/components/till/last.html' -- The last component is the file name, so we drop it from the depth to root. + let moduleConfig := { baseConfig with + depthToRoot := modName.components.dropLast.length + currentName := some modName + } + let (moduleHtml, cfg) := moduleToHtml module |>.run {} config moduleConfig + let (tactics, cfg) := module.tactics.mapM TacticInfo.docStringToHtml |>.run cfg config baseConfig + if not cfg.errors.isEmpty then + throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" + + -- Write HTML file let relFilePath := basePathComponent / moduleNameToFile modName let filePath := baseConfig.buildDir / relFilePath if let .some d := filePath.parent then FS.createDirAll d - - let handle ← FS.Handle.mk filePath .write - let fileStream := FS.Stream.ofHandle handle - let moduleConfig : SiteBaseContext := { - toSiteBaseConfig := { baseConfig with - depthToRoot := modName.components.dropLast.length - currentName := some modName - } - stream := fileStream - } - let (_, cfg) ← moduleToHtml module |>.run {} config moduleConfig - - -- Run tactic docstring rendering - let (tactics, cfg) ← module.tactics.mapM TacticInfo.docStringToHtml |>.eval cfg config baseConfig - if not cfg.errors.isEmpty then - throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" + FS.writeFile filePath moduleHtml.toString -- Write backrefs JSON FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") @@ -144,7 +148,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseConfig) (dbPath : System.Fil saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics -- Generate declaration data JSON for search - let (jsonDecls, _) ← Module.toJson module |>.eval {} config baseConfig + let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") jsonDecls.compress @@ -158,8 +162,8 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseConfig) (dbPath : System.Fil | .error e => throw e return outputs -def getSimpleBaseConfig (buildDir : System.FilePath) (hierarchy : Hierarchy) : - IO SiteBaseConfig := do +def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : + IO SiteBaseContext := do let contents ← FS.readFile (declarationsBasePath buildDir / "references.json") <|> (pure "[]") match Json.parse contents with | .error err => @@ -177,11 +181,9 @@ def getSimpleBaseConfig (buildDir : System.FilePath) (hierarchy : Hierarchy) : refs := refs } -def htmlOutputIndex (baseConfig : SiteBaseConfig) : IO Unit := do +def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do htmlOutputSetup baseConfig - let ref ← IO.mkRef {} - let ctx : SiteBaseContext := { toSiteBaseConfig := baseConfig, stream := IO.FS.Stream.ofBuffer ref } let mut index : JsonIndex := {} for entry in ← System.FilePath.readDir (declarationsBasePath baseConfig.buildDir) do if entry.fileName.startsWith "declaration-data-" && entry.fileName.endsWith ".bmp" then @@ -194,7 +196,7 @@ def htmlOutputIndex (baseConfig : SiteBaseConfig) : IO Unit := do | .error err => throw <| IO.userError s!"failed to parse file '{entry.path}': {err}" | .ok (module : JsonModule) => - index ← (index.addModule module).run ctx + index := index.addModule module |>.run baseConfig let finalJson := toJson index -- The root JSON for find @@ -290,7 +292,7 @@ def updateNavbarFromDisk (buildDir : System.FilePath) : IO Unit := do match fromJson? jsonContent with | .error _ => pure #[] | .ok refs => pure refs - let baseConfig : SiteBaseConfig := { + let baseConfig : SiteBaseContext := { buildDir := buildDir depthToRoot := 0 currentName := none @@ -298,6 +300,7 @@ def updateNavbarFromDisk (buildDir : System.FilePath) : IO Unit := do refs := refs } -- Regenerate navbar - runHtmlToFile navbar baseConfig (docDir / "navbar.html") + let navbarHtml := ReaderT.run navbar baseConfig |>.toString + FS.writeFile (docDir / "navbar.html") navbarHtml end DocGen4 diff --git a/DocGen4/Output/Arg.lean b/DocGen4/Output/Arg.lean index d9f04ddf..536f7268 100644 --- a/DocGen4/Output/Arg.lean +++ b/DocGen4/Output/Arg.lean @@ -9,15 +9,14 @@ open scoped DocGen4.Jsx Render an `Arg` as HTML, adding opacity effects etc. depending on what type of binder it has. -/ -def argToHtml (arg : Process.Arg) : HtmlM Unit := do - let inner : HtmlM Unit := - ( - {renderedCodeToHtml arg.binder} - ) +def argToHtml (arg : Process.Arg) : HtmlM Html := do + let node ← renderedCodeToHtml arg.binder + let inner := [node] + let html := Html.element "span" false #[("class", "decl_args")] #[inner] if arg.implicit then - ({inner}) + return {html} else - inner + return html end Output end DocGen4 diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index 47ef6d50..fc6501ca 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -10,7 +10,6 @@ import DocGen4.RenderedCode namespace DocGen4.Output open scoped DocGen4.Jsx -open DocGen4 (Raw escape) open Lean System Widget Elab Process def basePathComponent := "doc" @@ -45,14 +44,15 @@ structure BackrefItem where deriving FromJson, ToJson, Inhabited /-- -Site configuration without a rendering stream. Used by callers that only need site metadata, but -won't generate HTML, such as JSON generation. +The context used in the `BaseHtmlM` monad for HTML templating. -/ -structure SiteBaseConfig where +structure SiteBaseContext where + /-- The build directory (provided by lake). -/ buildDir : System.FilePath + /-- The module hierarchy as a tree structure. -/ @@ -71,96 +71,17 @@ structure SiteBaseConfig where -/ refs : Array BibItem -/-- -The context used in the `BaseHtmlM` monad for HTML templating. -Extends `SiteBaseConfig` with a stream for writing HTML output. --/ -structure SiteBaseContext extends SiteBaseConfig where - /-- - The stream to write HTML output to. - -/ - stream : IO.FS.Stream - -def setCurrentName (name : Name) (ctx : SiteBaseContext) := { ctx with currentName := some name } -def SiteBaseConfig.setCurrentName (name : Name) (cfg : SiteBaseConfig) := { cfg with currentName := some name } - -abbrev BaseHtmlT := ReaderT SiteBaseContext -abbrev BaseHtmlM := BaseHtmlT IO - -/-! ## Write helpers - -These are the concrete implementations referenced by JSX macro expansion. -They read the stream from `SiteBaseContext` via the reader monad. -/ - -section -variable [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] - -@[always_inline, inline] def putHtml (s : String) : m Unit := do - (← read).stream.putStr s - -@[always_inline, inline] def putEscaped (s : String) : m Unit := putHtml (escape s) - - - -namespace Html - -/-- Writes escaped text content to the HTML stream. -/ -@[always_inline, inline] def text (s : String) : m Unit := putEscaped s - -/-- Writes raw (unescaped) HTML content to the stream. -/ -@[always_inline, inline] def rawText (s : String) : m Unit := putHtml s - -end Html - -def putOpenTag (tag : String) (attrs : Array (String × String)) : m Unit := do - putHtml s!"<{tag}" - for (k, v) in attrs do putHtml s!" {k}=\"{escape v}\"" - putHtml ">" - -@[always_inline, inline] def putCloseTag (tag : String) : m Unit := putHtml s!"" - -namespace Html - -/-- Writes an HTML element with open/close tags wrapping a body action. -/ -def element (tag : String) (attrs : Array (String × String)) (body : m Unit) : m Unit := do - putOpenTag tag attrs - body - putCloseTag tag - -end Html - -scoped instance : Coe String (m Unit) where - coe s := putEscaped s - -scoped instance : Coe Raw (m Unit) where - coe r := putHtml r.html - -end - -/-- -The writable state used in the `HtmlM` monad for HTML templating. --/ -structure SiteState where - /-- - The list of back references, as an array. - -/ - backrefs : Array BackrefItem := #[] - /-- - The errors occurred during the process. - -/ - errors : String := "" - /-- Declaration decorator function type: given a module name, declaration name, and declaration kind, -writes optional extra HTML to inject into the declaration's rendering. This enables external tools -to add badges, links, or other decorations to declarations. +returns optional extra HTML to inject into the declaration's rendering. +This enables external tools to add badges, links, or other decorations to declarations. -/ -abbrev DeclarationDecoratorFn := Name → Name → String → BaseHtmlM Unit +abbrev DeclarationDecoratorFn := Name → Name → String → Array Html /-- The default declaration decorator that produces no extra HTML. -/ -def defaultDeclarationDecorator : DeclarationDecoratorFn := fun _ _ _ => pure () +def defaultDeclarationDecorator : DeclarationDecoratorFn := fun _ _ _ => #[] /-- The read-only context used in the `HtmlM` monad for HTML templating. @@ -180,22 +101,45 @@ structure SiteContext where refsMap : Std.HashMap String BibItem /-- A function to decorate declarations with extra HTML (e.g., verification badges). - Receives (moduleName, declarationName, declarationKind) and writes extra HTML. + Receives (moduleName, declarationName, declarationKind) and returns extra HTML. Defaults to producing no extra HTML. -/ declarationDecorator : DeclarationDecoratorFn := defaultDeclarationDecorator +/-- +The writable state used in the `HtmlM` monad for HTML templating. +-/ +structure SiteState where + /-- + The list of back references, as an array. + -/ + backrefs : Array BackrefItem := #[] + /-- + The errors occurred during the process. + -/ + errors : String := "" + +def setCurrentName (name : Name) (ctx : SiteBaseContext) := {ctx with currentName := some name} + +abbrev BaseHtmlT := ReaderT SiteBaseContext +abbrev BaseHtmlM := BaseHtmlT Id + abbrev HtmlT (m) := StateT SiteState <| ReaderT SiteContext <| BaseHtmlT m -abbrev HtmlM := HtmlT IO +abbrev HtmlM := HtmlT Id def HtmlT.run (x : HtmlT m α) (state : SiteState) (ctx : SiteContext) (baseCtx : SiteBaseContext) : m (α × SiteState) := StateT.run x state |>.run ctx |>.run baseCtx def HtmlM.run (x : HtmlM α) (state : SiteState) (ctx : SiteContext) - (baseCtx : SiteBaseContext) : IO (α × SiteState) := - HtmlT.run x state ctx baseCtx + (baseCtx : SiteBaseContext) : α × SiteState := + StateT.run x state |>.run ctx |>.run baseCtx |>.run +instance [Monad m] : MonadLift HtmlM (HtmlT m) where + monadLift x := do return (x.run (← getThe SiteState) (← readThe SiteContext) (← readThe SiteBaseContext)).1 + +instance [Monad m] : MonadLift BaseHtmlM (BaseHtmlT m) where + monadLift x := do return x.run (← readThe SiteBaseContext) /-- Add a backref of the given `citekey` and `funName` to current document, and returns it. -/ def addBackref (citekey funName : String) : HtmlM BackrefItem := do @@ -213,13 +157,14 @@ def addError (err : String) : HtmlM Unit := do modify fun cfg => { cfg with errors := cfg.errors ++ err ++ "\n" } /-- -Obtains the root URL relative to the given depth. +Obtains the root URL as a relative one to the current depth. -/ def getRoot : BaseHtmlM String := do - let rec go : Nat → String - | 0 => "./" - | n + 1 => "../" ++ go n - return go (← read).depthToRoot + let rec go: Nat -> String + | 0 => "./" + | Nat.succ n' => "../" ++ go n' + let d <- SiteBaseContext.depthToRoot <$> read + return (go d) def getHierarchy : BaseHtmlM Hierarchy := do return (← read).hierarchy def getCurrentName : BaseHtmlM (Option Name) := do return (← read).currentName @@ -227,19 +172,28 @@ def getResult : HtmlM AnalyzerResult := do return (← read).result def getSourceUrl (module : Name) (range : Option DeclarationRange): HtmlM String := do return (← read).sourceLinker module range def getDeclarationDecorator : HtmlM DeclarationDecoratorFn := do return (← read).declarationDecorator +/-- +If a template is meant to be extended because it for example only provides the +header but no real content this is the way to fill the template with content. +This is untyped so HtmlM and BaseHtmlM can be mixed. +-/ +def templateExtends {α β} {m} [Bind m] (base : α → m β) (new : m α) : m β := + new >>= base + +def templateLiftExtends {α β} {m n} [Bind m] [MonadLiftT n m] (base : α → n β) (new : m α) : m β := + new >>= (monadLift ∘ base) /-- Returns the doc-gen4 link to a module name. -/ def moduleNameToLink (n : Name) : BaseHtmlM String := do - let root ← getRoot let parts := n.components.map (Name.toString (escape := False)) - return root ++ (parts.intersperse "/").foldl (· ++ ·) "" ++ ".html" + return (← getRoot) ++ (parts.intersperse "/").foldl (· ++ ·) "" ++ ".html" /-- Returns the HTML doc-gen4 link to a module name. -/ -def moduleToHtmlLink (module : Name) : BaseHtmlM Unit := do - {module.toString} +def moduleToHtmlLink (module : Name) : BaseHtmlM Html := do + return {module.toString} /-- Returns the path to the HTML file that contains information about a module. @@ -282,28 +236,28 @@ def declNameToLink (name : Name) : HtmlM String := do /-- Returns the HTML doc-gen4 link to a declaration name. -/ -def declNameToHtmlLink (name : Name) : HtmlM Unit := do - {name.toString} +def declNameToHtmlLink (name : Name) : HtmlM Html := do + return {name.toString} /-- -Writes a name split into parts, each wrapped in a span. -Together with "break_within" CSS class this helps browser to break a name nicely. +Returns a name splitted into parts. +Together with "break_within" CSS class this helps browser to break a name +nicely. -/ -def breakWithin [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] (name: String) : m Unit := do - let parts := name.splitOn "." - for part in parts, i in [:parts.length] do - if i > 0 then - Html.text "." - ({part}) +def breakWithin (name: String) : (Array Html) := + name.splitOn "." + |> .map (fun (s: String) => {s}) + |> .intersperse "." + |> List.toArray /-- Returns the HTML doc-gen4 link to a declaration name with "break_within" set as class. -/ -def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Unit := do - - {breakWithin name.toString} - +def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Html := do + return + [breakWithin name.toString] + /-- For a name, try to find a linkable target by stripping suffix components @@ -342,140 +296,97 @@ where .str (go parent) s | _ => .anonymous -/-- Captures the HTML output of an HtmlM action and also returns its result. -/ -def captureHtmlWith (action : HtmlM α) : HtmlM (String × α) := do - let ref ← IO.mkRef {} - let bufStream := IO.FS.Stream.ofBuffer ref - let result ← withTheReader SiteBaseContext (fun ctx => { ctx with stream := bufStream }) action - return (String.fromUTF8! (← ref.get).data, result) - -/-- Captures the HTML output of an HtmlM action as a string. -/ -def captureHtml (action : HtmlM Unit) : HtmlM String := do - return (← captureHtmlWith action).1 - -/-- Captures the HTML output of a BaseHtmlM action as a string. -/ -def captureBaseHtml (action : BaseHtmlM Unit) : BaseHtmlM String := do - let ref ← IO.mkRef {} - let bufStream := IO.FS.Stream.ofBuffer ref - withTheReader SiteBaseContext (fun ctx => { ctx with stream := bufStream }) action - return String.fromUTF8! (← ref.get).data - -/-- Runs an HtmlM action inside a BaseHtmlM context using an IO.Ref for state. -/ -def runHtmlInBase (action : HtmlM Unit) (siteCtx : SiteContext) (stateRef : IO.Ref SiteState) : BaseHtmlM Unit := do - let baseCtx ← read - let s ← stateRef.get - let ((), s') ← HtmlM.run action s siteCtx baseCtx - stateRef.set s' - -/-- Runs a BaseHtmlM action, writing output directly to a file. -/ -def runHtmlToFile (action : BaseHtmlM Unit) (config : SiteBaseConfig) (path : FilePath) : IO Unit := do - let handle ← IO.FS.Handle.mk path .write - let fileStream := IO.FS.Stream.ofHandle handle - action.run { toSiteBaseConfig := config, stream := fileStream } - -/-- Runs an HtmlM action for its return value, auto-providing a buffer stream. -/ -def HtmlM.eval (x : HtmlM α) (state : SiteState) (ctx : SiteContext) - (config : SiteBaseConfig) : IO (α × SiteState) := do - let ref ← IO.mkRef {} - let bufStream := IO.FS.Stream.ofBuffer ref - HtmlM.run x state ctx { toSiteBaseConfig := config, stream := bufStream } - /-- -Converts RenderedCode to HTML with declaration links. Returns `true` if the result contains an -anchor tag and thus shouldn't be wrapped by a surrounding one. +Convert RenderedCode to HTML with declaration links. +Returns (hasAnchor, html) where hasAnchor indicates if the result contains an anchor tag. +This is used to avoid creating nested anchors (invalid HTML). -/ -partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM Bool := do +partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array Html) := do match code with - | .text t => - Html.text t - return false + | .text t => return (false, #[t]) | .append xs => - let mut hasAnchor := false - for t in xs do - let a? ← renderedCodeToHtmlAux t - hasAnchor := hasAnchor || a? - return hasAnchor + xs.foldlM (init := (false, #[])) fun (a?, acc) t => do + let (a?', acc') ← renderedCodeToHtmlAux t + pure (a? || a?', acc ++ acc') | .tag tag inner => + let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner match tag with | .const name => let name2ModIdx := (← getResult).name2ModIdx - -- Always capture inner to a string since we may need to wrap it - let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) if name2ModIdx.contains name then + let link ← declNameToLink name + -- Avoid nested anchors: if inner content already has anchors, don't wrap again + -- Match original behavior: no fn wrapper when const is in name2ModIdx if innerHasAnchor then - Html.rawText innerStr - return true + return (true, innerHtml) else - let link ← declNameToLink name - ({Html.rawText innerStr}) - return true + return (true, #[[innerHtml]]) else + -- Name not in name2ModIdx - try to find a linkable parent + -- This handles both: + -- 1. Private names like `_private.Init.Prelude.0.Lean.Name.hash._proof_1` + -- 2. Auxiliary names like `Std.Do.Option.instWPMonad._proof_2` let nameToSearch := Lean.privateToUserName? name |>.getD name match findLinkableParent name2ModIdx nameToSearch with | some target => + let link ← declNameToLink target if innerHasAnchor then - Html.rawText innerStr - return true + return (true, innerHtml) else - let link ← declNameToLink target - ({Html.rawText innerStr}) - return true + return (true, #[[innerHtml]]) | none => + -- For private names, fall back to linking to the module itself (no anchor) match Lean.privatePrefix? name with | some pfx => let modName := moduleFromPrivatePrefix pfx if modName != .anonymous then + let link ← moduleNameToLink modName if innerHasAnchor then - Html.rawText innerStr - return true + return (true, innerHtml) else - let link ← moduleNameToLink modName - ({Html.rawText innerStr}) - return true + return (true, #[[innerHtml]]) else - ({Html.rawText innerStr}) - return innerHasAnchor + return (innerHasAnchor, fn innerHtml) | none => - ({Html.rawText innerStr}) - return innerHasAnchor + return (innerHasAnchor, fn innerHtml) | .sort _ => - let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) + let link := s!"{← getRoot}foundational_types.html" + -- Avoid nested anchors + -- Match original behavior: no fn wrapper when creating sort link if innerHasAnchor then - Html.rawText innerStr - return true + return (true, innerHtml) else - let link := s!"{← getRoot}foundational_types.html" - ({Html.rawText innerStr}) - return true - | .keyword => - renderedCodeToHtmlAux inner - | .string => - renderedCodeToHtmlAux inner - | .otherExpr => - let (innerStr, innerHasAnchor) ← captureHtmlWith (renderedCodeToHtmlAux inner) - ({Html.rawText innerStr}) - return innerHasAnchor + return (true, #[[innerHtml]]) + -- For Phase 1 compatibility: treat keyword/string as plain content (no extra styling) + -- This matches the original infoFormatToHtml behavior + | .keyword => return (innerHasAnchor, innerHtml) + | .string => return (innerHasAnchor, innerHtml) + | .otherExpr => return (innerHasAnchor, fn innerHtml) +where + fn (html : Array Html) : Array Html := #[[html]] /-- -Converts RenderedCode to HTML with declaration links. +Convert RenderedCode to HTML with declaration links. -/ -def renderedCodeToHtml (code : RenderedCode) : HtmlM Unit := do - let _ ← renderedCodeToHtmlAux code +def renderedCodeToHtml (code : RenderedCode) : HtmlM (Array Html) := + Prod.snd <$> renderedCodeToHtmlAux code /- Turns a `CodeWithInfos` object, that is basically a Lean syntax tree with information about what the identifiers mean, into an HTML object that links to as much information as possible. -/ -def infoFormatToHtml (i : CodeWithInfos) : HtmlM Unit := +def infoFormatToHtml (i : CodeWithInfos) : HtmlM (Array Html) := renderedCodeToHtml (renderTagged i) -def baseHtmlHeadDeclarations : BaseHtmlM Unit := do - () - () - () - () - () - () +def baseHtmlHeadDeclarations : BaseHtmlM (Array Html) := do + return #[ + , + , + , + , + , + + ] end DocGen4.Output diff --git a/DocGen4/Output/Class.lean b/DocGen4/Output/Class.lean index 34074d43..521afc3c 100644 --- a/DocGen4/Output/Class.lean +++ b/DocGen4/Output/Class.lean @@ -8,13 +8,14 @@ namespace Output open scoped DocGen4.Jsx open Lean -def classInstancesToHtml (className : Name) : HtmlM Unit := do -
            - Instances -
              -
              +def classInstancesToHtml (className : Name) : HtmlM Html := do + pure +
              + Instances +
                +
                -def classToHtml (i : Process.ClassInfo) : HtmlM Unit := do +def classToHtml (i : Process.ClassInfo) : HtmlM (Array Html) := do structureToHtml i end Output diff --git a/DocGen4/Output/ClassInductive.lean b/DocGen4/Output/ClassInductive.lean index 06a70836..746b0054 100644 --- a/DocGen4/Output/ClassInductive.lean +++ b/DocGen4/Output/ClassInductive.lean @@ -7,7 +7,7 @@ import DocGen4.Process namespace DocGen4 namespace Output -def classInductiveToHtml (i : Process.ClassInductiveInfo) : HtmlM Unit := do +def classInductiveToHtml (i : Process.ClassInductiveInfo) : HtmlM (Array Html) := do inductiveToHtml i end Output diff --git a/DocGen4/Output/Definition.lean b/DocGen4/Output/Definition.lean index 68bba962..c95c39cd 100644 --- a/DocGen4/Output/Definition.lean +++ b/DocGen4/Output/Definition.lean @@ -8,8 +8,8 @@ namespace Output open scoped DocGen4.Jsx open Lean Widget -def equationToHtml (c : RenderedCode) : HtmlM Unit := do -
              • {renderedCodeToHtml c}
              • +def equationToHtml (c : RenderedCode) : HtmlM Html := do + return
              • [← renderedCodeToHtml c]
              • /-- Attempt to render all `simp` equations for this definition. At a size @@ -17,23 +17,31 @@ defined in `equationLimit` we stop trying since they: - are too ugly to read most of the time - take too long -/ -def equationsToHtml (i : Process.DefinitionInfo) : HtmlM Unit := do +def equationsToHtml (i : Process.DefinitionInfo) : HtmlM (Array Html) := do if let some eqs := i.equations then + let equationsHtml ← eqs.mapM equationToHtml if i.equationsWereOmitted then -
                - Equations -
                  -
                • One or more equations did not get rendered due to their size.
                • - {eqs.forM equationToHtml} -
                -
                + return #[ +
                + Equations +
                  +
                • One or more equations did not get rendered due to their size.
                • + [equationsHtml] +
                +
                + ] else -
                - Equations -
                  - {eqs.forM equationToHtml} -
                -
                + return #[ +
                + Equations +
                  + [equationsHtml] +
                +
                + ] + else + return #[] end Output end DocGen4 + diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index 28762505..f757d2df 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -7,9 +7,6 @@ open Lean DocGen4.Process namespace DocGen4 namespace Output -open scoped DocGen4.Jsx -open DocGen4 (Raw escape) - /-- Auxiliary function for `splitAround`. -/ @[specialize] partial def splitAroundAux (s : String) (p : Char → Bool) (b i : String.Pos.Raw) (r : List String) : List String := if String.Pos.Raw.atEnd s i then @@ -156,23 +153,25 @@ def isLeanCode (lang : Array MD4Lean.AttrText) : Bool := /-- Automatically adds intra-documentation links for code content. -/ -def autoLinkInline (ss : Array String) : HtmlM Unit := do +def autoLinkInline (ss : Array String) : HtmlM (Array Html) := do + let mut result : Array Html := #[] for s in ss do let parts := splitAround s unicodeToSplit for part in parts do match ← nameToLink? part with | some link => - ({part}) + result := result.push <| Html.element "a" true #[("href", link)] #[Html.text part] | none => let sHead := part.dropEndWhile (· != '.') |>.copy let sTail := part.takeEndWhile (· != '.') |>.copy match ← nameToLink? sTail with | some link => if !sHead.isEmpty then - Html.text sHead - ({sTail}) + result := result.push <| Html.text sHead + result := result.push <| Html.element "a" true #[("href", link)] #[Html.text sTail] | none => - Html.text part + result := result.push <| Html.text part + return result where unicodeToSplit (c : Char) : Bool := -- separator (`Z`), other (`C`) @@ -184,17 +183,25 @@ mutual Renders a single `MD4Lean.Text` inline element to HTML, while processing custom extensions such as bibliography items. `inLink` suppresses auto-linking inside `` to avoid nested anchors. -/ -partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM Unit := do +partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM (Array Html) := do match t with - | .normal s => Html.text s - | .nullchar => Html.rawText "\uFFFD" - | .br _ => Html.rawText "
                \n" -- This avoids

                , which is incorrect HTML5 - | .softbr _ => Html.rawText "\n" - | .entity s => Html.rawText s - | .em ts => ({renderTexts ts funName inLink}) - | .strong ts => ({renderTexts ts funName inLink}) - | .u ts => ({renderTexts ts funName inLink}) - | .del ts => ({renderTexts ts funName inLink}) + | .normal s => return #[Html.text s] + | .nullchar => return #[Html.raw "\uFFFD"] + | .br _ => return #[Html.raw "
                \n"] -- This avoids

                , which is incorrect HTML5 + | .softbr _ => return #[Html.raw "\n"] + | .entity s => return #[Html.raw s] + | .em ts => + let inner ← renderTexts ts funName inLink + return #[Html.element "em" true #[] inner] + | .strong ts => + let inner ← renderTexts ts funName inLink + return #[Html.element "strong" true #[] inner] + | .u ts => + let inner ← renderTexts ts funName inLink + return #[Html.element "u" true #[] inner] + | .del ts => + let inner ← renderTexts ts funName inLink + return #[Html.element "del" true #[] inner] | .a href title _isAuto children => let hrefStr := attrTextToString href let titleStr := attrTextToString title @@ -203,109 +210,138 @@ partial def renderText (t : MD4Lean.Text) (funName : String) (inLink : Bool := f match bibitem with | .some bibitem => let newBackref ← addBackref bibitem.citekey funName + let childrenHtml ← renderTexts children funName (inLink := true) let changeName : Bool := if let #[.normal s] := children then s == bibitem.citekey else false - (
                - {if changeName then Html.text bibitem.tag - else renderTexts children funName (inLink := true)} - ) + let newChildren : Array Html := + if changeName then #[Html.text bibitem.tag] else childrenHtml + let mut attrs : Array (String × String) := #[("href", extHref)] + attrs := attrs.push ("title", bibitem.plaintext) + attrs := attrs.push ("id", s!"_backref_{newBackref.index}") + return #[Html.element "a" true attrs newChildren] | .none => + let childrenHtml ← renderTexts children funName (inLink := true) let mut attrs : Array (String × String) := #[("href", extHref)] if !titleStr.isEmpty then attrs := attrs.push ("title", titleStr) - ({renderTexts children funName (inLink := true)}) + return #[Html.element "a" true attrs childrenHtml] | .img src title alt => - let srcStr := escape (attrTextToString src) - let titleStr := escape (attrTextToString title) + let srcStr := Html.escape (attrTextToString src) + let titleStr := Html.escape (attrTextToString title) let altTexts := alt.toList.map textToPlaintext - let altStr := escape (String.join altTexts) + let altStr := Html.escape (String.join altTexts) let mut s := s!"\"{altStr}\""" - Html.rawText s + return #[Html.raw s] | .code ss => - ( - {if inLink then Html.text (String.join ss.toList) - else autoLinkInline ss} - ) + let inner ← if inLink then + pure #[Html.text (String.join ss.toList)] + else + autoLinkInline ss + return #[Html.element "code" true #[] inner] -- Math is rendered with dollar signs because MathJax will later render them | .latexMath ss => let content := String.join ss.toList - Html.rawText s!"${escape content}$" + return #[Html.raw s!"${Html.escape content}$"] -- Math is rendered with dollar signs because MathJax will later render them | .latexMathDisplay ss => let content := String.join ss.toList - Html.rawText s!"$${escape content}$$" + return #[Html.raw s!"$${Html.escape content}$$"] | .wikiLink target children => + let inner ← renderTexts children funName inLink let targetStr := attrTextToString target - Html.element "x-wikilink" #[("data-target", targetStr)] (renderTexts children funName inLink) + return #[Html.element "x-wikilink" true #[("data-target", targetStr)] inner] /-- Render an array of `MD4Lean.Text` inline elements to HTML. -/ -partial def renderTexts (texts : Array MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM Unit := do +partial def renderTexts (texts : Array MD4Lean.Text) (funName : String) (inLink : Bool := false) : HtmlM (Array Html) := do + let mut result : Array Html := #[] for t in texts do - renderText t funName inLink + result := result ++ (← renderText t funName inLink) + return result /-- Render a single `MD4Lean.Block` element to HTML. -/ -partial def renderBlock (block : MD4Lean.Block) (funName : String) (tight : Bool := false) : HtmlM Unit := do +partial def renderBlock (block : MD4Lean.Block) (funName : String) (tight : Bool := false) : HtmlM (Array Html) := do match block with | .p texts => + let inner ← renderTexts texts funName if tight then - renderTexts texts funName + return inner else - (

                {renderTexts texts funName}

                ) + return #[Html.element "p" true #[] inner] | .ul isTight _mark items => - (
                  {items.forM fun item => renderLi item funName isTight}
                ) + let mut lis : Array Html := #[] + for item in items do + let liHtml ← renderLi item funName isTight + lis := lis ++ liHtml + return #[Html.element "ul" true #[] lis] | .ol isTight start _mark items => + let mut lis : Array Html := #[] + for item in items do + let liHtml ← renderLi item funName isTight + lis := lis ++ liHtml let attrs : Array (String × String) := if start != 1 then #[("start", toString start)] else #[] - (
                  {items.forM fun item => renderLi item funName isTight}
                ) - | .hr => Html.rawText "
                \n" + return #[Html.element "ol" true attrs lis] + | .hr => return #[Html.raw "
                \n"] | .header level texts => - -- Dynamic tag name requires Html.element let id := mdGetHeadingId texts + let inner ← renderTexts texts funName + let anchor := Html.element "a" true #[("class", "hover-link"), ("href", s!"#{id}")] #[Html.text "#"] + let children := inner.push (Html.text " ") |>.push anchor let tag := s!"h{level}" - Html.element tag #[("id", id), ("class", "markdown-heading")] do - renderTexts texts funName - Html.text " " - (#) + return #[Html.element tag true #[("id", id), ("class", "markdown-heading")] children] | .code _info lang _fenceChar content => + let langStr := attrTextToString lang let codeAttrs : Array (String × String) := - let langStr := attrTextToString lang if !langStr.isEmpty then #[("class", s!"language-{langStr}")] else #[] - (
                
                -      {if isLeanCode lang then autoLinkInline content
                -       else Html.text (String.join content.toList)}
                -    
                ) + let inner : Array Html ← + if isLeanCode lang then + autoLinkInline content + else + pure #[Html.text (String.join content.toList)] + let codeElem := Html.element "code" true codeAttrs inner + return #[Html.element "pre" true #[] #[codeElem]] | .html content => - Html.rawText (String.join content.toList) + return #[Html.raw (String.join content.toList)] | .blockquote blocks => - (
                {blocks.forM (renderBlock · funName)}
                ) + let mut inner : Array Html := #[] + for b in blocks do + inner := inner ++ (← renderBlock b funName) + return #[Html.element "blockquote" true #[] inner] | .table head body => - ( - - {head.forM fun cell => ()} - - - {body.forM fun row => - ({row.forM fun cell => ()})} - -
                {renderTexts cell funName}
                {renderTexts cell funName}
                ) + let mut headCells : Array Html := #[] + for cell in head do + let cellHtml ← renderTexts cell funName + headCells := headCells.push (Html.element "th" true #[] cellHtml) + let headRow := Html.element "tr" true #[] headCells + let thead := Html.element "thead" true #[] #[headRow] + let mut bodyRows : Array Html := #[] + for row in body do + let mut rowCells : Array Html := #[] + for cell in row do + let cellHtml ← renderTexts cell funName + rowCells := rowCells.push (Html.element "td" true #[] cellHtml) + bodyRows := bodyRows.push (Html.element "tr" true #[] rowCells) + let tbody := Html.element "tbody" true #[] bodyRows + return #[Html.element "table" true #[] #[thead, tbody]] /-- Render a list item to HTML. -/ -partial def renderLi (li : MD4Lean.Li MD4Lean.Block) (funName : String) (tight : Bool) : HtmlM Unit := do - (
              • - {do if li.isTask then - let checked := li.taskChar == some 'x' || li.taskChar == some 'X' - if checked then - Html.rawText "" - else - Html.rawText ""} - {li.contents.forM fun b => renderBlock b funName tight} -
              • ) +partial def renderLi (li : MD4Lean.Li MD4Lean.Block) (funName : String) (tight : Bool) : HtmlM (Array Html) := do + let mut inner : Array Html := #[] + if li.isTask then + let checked := li.taskChar == some 'x' || li.taskChar == some 'X' + if checked then + inner := inner.push (Html.raw "") + else + inner := inner.push (Html.raw "") + for b in li.contents do + inner := inner ++ (← renderBlock b funName tight) + return #[Html.element "li" true #[] inner] end @@ -325,8 +361,8 @@ partial def findAllReferences (refsMap : Std.HashMap String BibItem) (s : String else ret -/-- Convert docstring to Html, writing directly to stream. -/ -def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : HtmlM Unit := do +/-- Convert docstring to Html. -/ +def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : HtmlM (Array Html) := do let docString := match docString with | .inl md => md @@ -338,12 +374,12 @@ def docStringToHtml (docString : String ⊕ VersoDocString) (funName : String) : let flags := MD4Lean.MD_DIALECT_GITHUB ||| MD4Lean.MD_FLAG_LATEXMATHSPANS ||| MD4Lean.MD_FLAG_NOHTML match MD4Lean.parse (docString ++ refsMarkdown) flags with | .some doc => + let mut result : Array Html := #[] for block in doc.blocks do - renderBlock block funName + result := result ++ (← renderBlock block funName) + return result | .none => addError <| "Error: failed to parse markdown:\n" ++ docString - (Error: failed to parse markdown: ) - Html.text docString - + return #[.raw "Error: failed to parse markdown: ", .text docString] end Output end DocGen4 diff --git a/DocGen4/Output/Find.lean b/DocGen4/Output/Find.lean index e6437ac8..726a1f88 100644 --- a/DocGen4/Output/Find.lean +++ b/DocGen4/Output/Find.lean @@ -5,17 +5,17 @@ namespace Output open scoped DocGen4.Jsx open Lean -open DocGen4 (Raw) -def find : BaseHtmlM Unit := do - - - - - - - - +def find : BaseHtmlM Html := do + pure + + + + + + + + end Output end DocGen4 diff --git a/DocGen4/Output/FoundationalTypes.lean b/DocGen4/Output/FoundationalTypes.lean index 7f8419ce..41ffdf2d 100644 --- a/DocGen4/Output/FoundationalTypes.lean +++ b/DocGen4/Output/FoundationalTypes.lean @@ -5,8 +5,8 @@ namespace DocGen4.Output open scoped DocGen4.Jsx -def foundationalTypes : BaseHtmlM Unit := do - baseHtmlGenerator "Foundational Types" do +def foundationalTypes : BaseHtmlM Html := templateLiftExtends (baseHtml "Foundational Types") do + pure <|

                Foundational Types

                @@ -18,15 +18,15 @@ def foundationalTypes : BaseHtmlM Unit := do

                Sort u

                Sort u is the type of types in Lean, and Sort u : Sort (u + 1).

                - {instancesForToHtml `_builtin_sortu} + {← instancesForToHtml `_builtin_sortu}

                Type u

                Type u is notation for Sort (u + 1).

                - {instancesForToHtml `_builtin_typeu} + {← instancesForToHtml `_builtin_typeu}

                Prop

                Prop is notation for Sort 0.

                - {instancesForToHtml `_builtin_prop} + {← instancesForToHtml `_builtin_prop}

                Pi types, {"(a : α) → β a"}

                The type of dependent functions is known as a pi type. diff --git a/DocGen4/Output/Index.lean b/DocGen4/Output/Index.lean index a6f14b23..95febccf 100644 --- a/DocGen4/Output/Index.lean +++ b/DocGen4/Output/Index.lean @@ -11,8 +11,8 @@ namespace Output open scoped DocGen4.Jsx -def index : BaseHtmlM Unit := do - baseHtmlGenerator "Index" do +def index : BaseHtmlM Html := do templateExtends (baseHtml "Index") <| + pure <|

                Welcome to the documentation page

                diff --git a/DocGen4/Output/Inductive.lean b/DocGen4/Output/Inductive.lean index f3080252..ca3ddd34 100644 --- a/DocGen4/Output/Inductive.lean +++ b/DocGen4/Output/Inductive.lean @@ -9,28 +9,33 @@ namespace Output open scoped DocGen4.Jsx open Lean -def instancesForToHtml (typeName : Name) : BaseHtmlM Unit := do -
                - Instances For -
                  -
                  +def instancesForToHtml (typeName : Name) : BaseHtmlM Html := do + pure +
                  + Instances For +
                    +
                    -def ctorToHtml (c : Process.ConstructorInfo) : HtmlM Unit := do +def ctorToHtml (c : Process.ConstructorInfo) : HtmlM Html := do let shortName := c.name.componentsRev.head!.toString let name := c.name.toString - let args := c.args.forM argToHtml + let args ← c.args.mapM argToHtml if let some doc := c.doc then - (
                  • - {shortName} {args} {" : "} {renderedCodeToHtml c.type} -
                    {docStringToHtml doc name}
                    -
                  • ) + let renderedDoc ← docStringToHtml doc name + pure +
                  • + {shortName} [args] {" : "} [← renderedCodeToHtml c.type] +
                    [renderedDoc]
                    +
                  • else - (
                  • - {shortName} {args} {" : "} {renderedCodeToHtml c.type} -
                  • ) + pure +
                  • + {shortName} [args] {" : "} [← renderedCodeToHtml c.type] +
                  • -def inductiveToHtml (i : Process.InductiveInfo) : HtmlM Unit := do -
                      {i.ctors.forM ctorToHtml}
                    +def inductiveToHtml (i : Process.InductiveInfo) : HtmlM (Array Html) := do + let constructorsHtml :=
                      [← i.ctors.toArray.mapM ctorToHtml]
                    + return #[constructorsHtml] end Output end DocGen4 diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 802e7a48..c6e4d3da 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -19,115 +19,120 @@ namespace Output open scoped DocGen4.Jsx open Lean Process -open DocGen4 (Raw) /-- Render the structures this structure extends from as HTML so it can be added to the top level. -/ -def structureInfoHeader (s : Process.StructureInfo) : HtmlM Unit := do +def structureInfoHeader (s : Process.StructureInfo) : HtmlM (Array Html) := do + let mut nodes := #[] if s.parents.size > 0 then - (extends) - Html.text " " + nodes := nodes.push extends + let mut parents := #[Html.text " "] for parent in s.parents, i in [0:s.parents.size] do if i > 0 then - Html.text ", " - renderedCodeToHtml parent.type + parents := parents.push (Html.text ", ") + parents := parents ++ (← renderedCodeToHtml parent.type) + nodes := nodes ++ parents + return nodes /-- Render the general header of a declaration containing its declaration type and name. -/ -def docInfoHeader (doc : DocInfo) : HtmlM Unit := do -
                    - {doc.getKindDescription} - {if doc.getSorried then - ( - {" "}{declNameToHtmlBreakWithinLink doc.getName}{" "} - ) - else - ( - {" "}{declNameToHtmlBreakWithinLink doc.getName}{" "} - )} - {doc.getArgs.forM argToHtml} - {match doc with - | DocInfo.structureInfo i => structureInfoHeader i - | DocInfo.classInfo i => structureInfoHeader i - | _ => pure ()} - : -
                    {renderedCodeToHtml doc.getType}
                    -
                    +def docInfoHeader (doc : DocInfo) : HtmlM Html := do + let mut nodes := #[] + nodes := nodes.push <| Html.element "span" false #[("class", "decl_kind")] #[doc.getKindDescription] + -- TODO: Can we inline if-then-else and avoid repeating here? + if doc.getSorried then + nodes := nodes.push {← declNameToHtmlBreakWithinLink doc.getName} + else + nodes := nodes.push {← declNameToHtmlBreakWithinLink doc.getName} + for arg in doc.getArgs do + nodes := nodes.push (← argToHtml arg) + + match doc with + | DocInfo.structureInfo i => nodes := nodes.append (← structureInfoHeader i) + | DocInfo.classInfo i => nodes := nodes.append (← structureInfoHeader i) + | _ => nodes := nodes + + nodes := nodes.push <| Html.element "span" true #[("class", "decl_args")] #[" :"] + nodes := nodes.push
                    [← renderedCodeToHtml doc.getType]
                    + return
                    [nodes]
                    /-- The main entry point for rendering a single declaration inside a given module. -/ -def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Unit := do +def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Html := do -- basic info like headers, types, structure fields, etc. - let docInfoHtml : HtmlM Unit := match doc with + let docInfoHtml ← match doc with | DocInfo.inductiveInfo i => inductiveToHtml i | DocInfo.structureInfo i => structureToHtml i | DocInfo.classInfo i => classToHtml i | DocInfo.classInductiveInfo i => classInductiveToHtml i - | _ => pure () - -- rendered doc string - let docStringHtml : HtmlM Unit := match doc.getDocString with + | _ => pure #[] + -- rendered doc stirng + let docStringHtml ← match doc.getDocString with | some s => docStringToHtml s doc.getName.toString - | none => pure () + | none => pure #[] -- extra information like equations and instances - let extraInfoHtml : HtmlM Unit := match doc with - | DocInfo.classInfo i => do classInstancesToHtml i.name - | DocInfo.definitionInfo i => do equationsToHtml i; instancesForToHtml i.name + let extraInfoHtml ← match doc with + | DocInfo.classInfo i => pure #[← classInstancesToHtml i.name] + | DocInfo.definitionInfo i => pure ((← equationsToHtml i) ++ #[← instancesForToHtml i.name]) | DocInfo.instanceInfo i => equationsToHtml i.toDefinitionInfo - | DocInfo.classInductiveInfo i => classInstancesToHtml i.name - | DocInfo.inductiveInfo i => instancesForToHtml i.name - | DocInfo.structureInfo i => instancesForToHtml i.name - | _ => pure () + | DocInfo.classInductiveInfo i => pure #[← classInstancesToHtml i.name] + | DocInfo.inductiveInfo i => pure #[← instancesForToHtml i.name] + | DocInfo.structureInfo i => pure #[← instancesForToHtml i.name] + | _ => pure #[] let attrs := doc.getAttrs - let attrsHtml : HtmlM Unit := - if attrs.size > 0 then do + let attrsHtml := + if attrs.size > 0 then let attrStr := "@[" ++ String.intercalate ", " doc.getAttrs.toList ++ "]" - (
                    {attrStr}
                    ) + #[Html.element "div" false #[("class", "attributes")] #[attrStr]] else - pure () + #[] -- custom decoration (e.g., verification badges from external tools) let decorator ← getDeclarationDecorator + let decoratorHtml := decorator module doc.getName doc.getKind let cssClass := "decl" ++ if doc.getSorried then " sorried" else "" - (
                    -
                    - ) /-- Rendering a module doc string, that is the ones with an ! after the opener as HTML. -/ -def modDocToHtml (mdoc : ModuleDoc) : HtmlM Unit := do -
                    - {docStringToHtml (.inl mdoc.doc) ""} -
                    +def modDocToHtml (mdoc : ModuleDoc) : HtmlM Html := do + pure +
                    + [← docStringToHtml (.inl mdoc.doc) ""] +
                    /-- Render a module member, that is either a module doc string or a declaration as HTML. -/ -def moduleMemberToHtml (module : Name) (member : ModuleMember) : HtmlM Unit := do +def moduleMemberToHtml (module : Name) (member : ModuleMember) : HtmlM Html := do match member with | ModuleMember.docInfo d => docInfoToHtml module d | ModuleMember.modDoc d => modDocToHtml d -def declarationToNavLink [Monad m] [MonadReaderOf SiteBaseContext m] [MonadLiftT IO m] (module : Name) : m Unit := do +def declarationToNavLink (module : Name) : Html := @@ -142,49 +147,44 @@ def getImports (module : Name) : HtmlM (Array Name) := do Sort the list of all modules this one is importing, linkify it and return the HTML. -/ -def importsHtml (moduleName : Name) : HtmlM Unit := do +def importsHtml (moduleName : Name) : HtmlM (Array Html) := do let imports := (← getImports moduleName).qsort Name.lt - for i in imports do -
                  • {moduleToHtmlLink i}
                  • + imports.mapM (fun i => do return
                  • {← moduleToHtmlLink i}
                  • ) /-- Render the internal nav bar (the thing on the right on all module pages). -/ -def internalNav (members : Array Name) (moduleName : Name) : HtmlM Unit := do - +def internalNav (members : Array Name) (moduleName : Name) : HtmlM Html := do + pure + /-- The main entry point to rendering the HTML for an entire module. -/ -def moduleToHtml (module : Process.Module) : HtmlM Unit := withTheReader SiteBaseContext (setCurrentName module.name) do +def moduleToHtml (module : Process.Module) : HtmlM Html := withTheReader SiteBaseContext (setCurrentName module.name) do let relevantMembers := module.members.filter Process.ModuleMember.shouldRender + let memberDocs ← relevantMembers.mapM (moduleMemberToHtml module.name) let memberNames := filterDocInfo relevantMembers |>.map DocInfo.getName - let siteCtx ← readThe SiteContext - let stateRef ← IO.mkRef (← get) - liftM (baseHtmlGenerator module.name.toString do - runHtmlInBase (internalNav memberNames module.name) siteCtx stateRef - (
                    - {relevantMembers.forM fun member => - runHtmlInBase (moduleMemberToHtml module.name member) siteCtx stateRef} -
                    ) - : BaseHtmlM Unit) - set (← stateRef.get) + templateLiftExtends (baseHtmlGenerator module.name.toString) <| pure #[ + ← internalNav memberNames module.name, + Html.element "main" false #[] memberDocs + ] end Output end DocGen4 diff --git a/DocGen4/Output/Navbar.lean b/DocGen4/Output/Navbar.lean index 50387fbd..61ad534b 100644 --- a/DocGen4/Output/Navbar.lean +++ b/DocGen4/Output/Navbar.lean @@ -13,83 +13,96 @@ namespace Output open Lean open scoped DocGen4.Jsx -def moduleListFile (file : Name) : BaseHtmlM Unit := do - let cls := if (← getCurrentName) == file then "nav_link visible" else "nav_link" - (
                    +def moduleListFile (file : Name) : BaseHtmlM Html := do + return ) +
                    /-- Build the HTML tree representing the module hierarchy. -/ -partial def moduleListDir (h : Hierarchy) : BaseHtmlM Unit := do +partial def moduleListDir (h : Hierarchy) : BaseHtmlM Html := do let children := Array.mk (h.getChildren.toList.map Prod.snd) let dirs := children.filter (fun c => c.getChildren.toList.length != 0) let files := children.filter (fun c => Hierarchy.isFile c && c.getChildren.toList.length = 0) |>.map Hierarchy.getName + let dirNodes ← dirs.mapM moduleListDir + let fileNodes ← files.mapM moduleListFile let moduleLink ← moduleNameToLink h.getName - let openAttr := if (← getCurrentName).any (h.getName.isPrefixOf ·) then #[("open", "")] else #[] - let detailsAttrs := #[("class", "nav_sect"), ("data-path", moduleLink)] ++ openAttr - (
                    - {if h.isFile then - ({s!"{h.getName.getString!} ("}file)) + let summary ← do + if h.isFile then + pure {s!"{h.getName.getString!} ("}file) else - ({h.getName.getString!})} - {dirs.forM moduleListDir} - {files.forM moduleListFile} -
                    ) + pure {h.getName.getString!} + pure + /-- Return a list of top level modules, linkified and rendered as HTML -/ -def moduleList : BaseHtmlM Unit := do +def moduleList : BaseHtmlM Html := do let hierarchy ← getHierarchy - (
                    - {do for (_, cs) in hierarchy.getChildren do - moduleListDir cs} -
                    ) + let mut list := Array.empty + for (_, cs) in hierarchy.getChildren do + list := list.push <| ← moduleListDir cs + return
                    [list]
                    /-- The main entry point to rendering the navbar on the left hand side. -/ -def navbar : BaseHtmlM Unit := do - ( - - {baseHtmlHeadDeclarations} +def navbar : BaseHtmlM Html := do + /- + TODO: Add these in later + + + + + -/ + let mut staticPages : Array Html := #[ + , + , + , + ] + let config ← read + if not config.refs.isEmpty then + staticPages := staticPages.push + pure + + + [← baseHtmlHeadDeclarations] - - - - + + + + - - - - ) + + end Output end DocGen4 diff --git a/DocGen4/Output/NotFound.lean b/DocGen4/Output/NotFound.lean index 29e370bd..976ba062 100644 --- a/DocGen4/Output/NotFound.lean +++ b/DocGen4/Output/NotFound.lean @@ -14,8 +14,8 @@ open scoped DocGen4.Jsx /-- Render the 404 page. -/ -def notFound : BaseHtmlM Unit := do - baseHtmlGenerator "404" do +def notFound : BaseHtmlM Html := do templateExtends (baseHtml "404") <| + pure <|

                    404 Not Found

                    Unfortunately, the page you were looking for is no longer here.

                    diff --git a/DocGen4/Output/References.lean b/DocGen4/Output/References.lean index 29c1dd6c..e78a88e5 100644 --- a/DocGen4/Output/References.lean +++ b/DocGen4/Output/References.lean @@ -62,35 +62,33 @@ def disableBibFile (buildDir : System.FilePath) : IO Unit := do namespace Output open scoped DocGen4.Jsx -open DocGen4 (Raw) -def refItem (ref : BibItem) (backrefs : Array BackrefItem) : BaseHtmlM Unit := do +def refItem (ref : BibItem) (backrefs : Array BackrefItem) : BaseHtmlM Html := do let backrefs := backrefs.filter (fun x => x.citekey == ref.citekey) - let backrefLinks : BaseHtmlM Unit := do - if !backrefs.isEmpty then - ( - {do for i in [:backrefs.size] do - let backref := backrefs[i]! - let href := s!"{← moduleNameToLink backref.modName}#_backref_{backref.index}" - let title := s!"File: {backref.modName}" ++ - if backref.funName.isEmpty then "" else s!"\nLocation: {backref.funName}" - Html.rawText " " - ({s!"[{i + 1}]"})} - ) - (
                  • - {ref.tag} - {Raw.mk " "} - {Raw.mk ref.html} - {backrefLinks} -
                  • ) - -def references (backrefs : Array BackrefItem) : BaseHtmlM Unit := do - baseHtmlGenerator "References" do + let toHtml (i : Nat) (backref : BackrefItem) : BaseHtmlM (Array Html) := do + let href := s!"{← moduleNameToLink backref.modName}#_backref_{backref.index}" + let title := s!"File: {backref.modName}" ++ + if backref.funName.isEmpty then "" else s!"\nLocation: {backref.funName}" + pure #[.raw " ", {.text s!"[{i + 1}]"}] + let backrefHtml : Html ← (do + if backrefs.isEmpty then + pure (.raw "") + else + pure [(← backrefs.mapIdxM toHtml).foldl (· ++ ·) #[]]) + pure <| +
                  • + {.text ref.tag} + {.raw " "}{.raw ref.html}{backrefHtml} +
                  • + +def references (backrefs : Array BackrefItem) : + BaseHtmlM Html := templateLiftExtends (baseHtml "References") do + pure <|

                    References

                      - {(← read).refs.forM (refItem · backrefs)} + [← (← read).refs.mapM (refItem · backrefs)]
                    diff --git a/DocGen4/Output/Search.lean b/DocGen4/Output/Search.lean index 92543bca..17fb0564 100644 --- a/DocGen4/Output/Search.lean +++ b/DocGen4/Output/Search.lean @@ -10,10 +10,9 @@ namespace DocGen4 namespace Output open scoped DocGen4.Jsx -open DocGen4 (Raw) -def search : BaseHtmlM Unit := do - baseHtmlGenerator "Search" do +def search : BaseHtmlM Html := do templateExtends (baseHtml "Search") <| do + pure

                    Search Results

                    @@ -39,7 +38,7 @@ def search : BaseHtmlM Unit := do
                    diff --git a/DocGen4/Output/Structure.lean b/DocGen4/Output/Structure.lean index 4acb21b2..7bf513cd 100644 --- a/DocGen4/Output/Structure.lean +++ b/DocGen4/Output/Structure.lean @@ -12,48 +12,49 @@ open Lean /-- Render a single field consisting of its documentation, its name and its type as HTML. -/ -def fieldToHtml (f : Process.FieldInfo) : HtmlM Unit := do +def fieldToHtml (f : Process.FieldInfo) : HtmlM Html := do let shortName := f.name.componentsRev.head!.toString let name := f.name.toString + let args ← f.args.mapM argToHtml if f.isDirect then - if let some doc := f.doc then - (
                  • -
                    - {shortName} {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} -
                    -
                    {docStringToHtml doc name}
                    -
                  • ) - else - (
                  • -
                    - {shortName} {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} -
                    -
                  • ) + let doc : Array HTML ← + if let some doc := f.doc then + let renderedDoc ← docStringToHtml doc name + pure #[
                    [renderedDoc]
                    ] + else + pure #[] + pure +
                  • +
                    {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
                    + [doc] +
                  • else - (
                  • -
                    - {shortName} - {f.args.forM argToHtml} {" : "} {renderedCodeToHtml f.type} -
                    -
                  • ) + pure +
                  • +
                    {shortName} [args] {" : "} [← renderedCodeToHtml f.type]
                    +
                  • /-- Render all information about a structure as HTML. -/ -def structureToHtml (i : Process.StructureInfo) : HtmlM Unit := do - if Name.isSuffixOf `mk i.ctor.name then - (
                      - {i.fieldInfo.forM fieldToHtml} -
                    ) - else - let ctorShortName := i.ctor.name.componentsRev.head!.toString - (
                      -
                    • {s!"{ctorShortName} "} :: (
                    • -
                        - {i.fieldInfo.forM fieldToHtml} -
                      -
                    • )
                    • -
                    ) +def structureToHtml (i : Process.StructureInfo) : HtmlM (Array Html) := do + let structureHtml ← do + if Name.isSuffixOf `mk i.ctor.name then + pure +
                      + [← i.fieldInfo.mapM fieldToHtml] +
                    + else + let ctorShortName := i.ctor.name.componentsRev.head!.toString + pure +
                      +
                    • {s!"{ctorShortName} "} :: (
                    • +
                        + [← i.fieldInfo.mapM fieldToHtml] +
                      +
                    • )
                    • +
                    + return #[structureHtml] end Output end DocGen4 diff --git a/DocGen4/Output/Tactics.lean b/DocGen4/Output/Tactics.lean index 695674cd..a3917be6 100644 --- a/DocGen4/Output/Tactics.lean +++ b/DocGen4/Output/Tactics.lean @@ -9,37 +9,36 @@ import DocGen4.Output.Module namespace DocGen4.Process open scoped DocGen4.Jsx -open DocGen4 (Raw) -open DocGen4.Output -open Lean +open DocGen4 Output Lean /-- Render the HTML for a single tactic. -/ -def TacticInfo.docStringToHtml (tac : TacticInfo MarkdownDocstring) : Output.HtmlM (TacticInfo String) := do - let captured ← Output.captureHtml do -

                    {Output.docStringToHtml (.inl tac.docString) tac.internalName.toString}

                    - return { tac with docString := captured } +def TacticInfo.docStringToHtml (tac : TacticInfo MarkdownDocstring) : Output.HtmlM (TacticInfo Html) := do + return { + tac with + docString :=

                    [← Output.docStringToHtml (.inl tac.docString) tac.internalName.toString]

                    + } /-- Render the HTML for a single tactic. -/ -def TacticInfo.toHtml (tac : TacticInfo String) : Output.BaseHtmlM Unit := do +def TacticInfo.toHtml (tac : TacticInfo Html) : Output.BaseHtmlM Html := do let internalName := tac.internalName.toString let defLink := (← moduleNameToLink tac.definingModule) ++ "#" ++ internalName let tags := ", ".intercalate (tac.tags.map (·.toString)).qsort.toList - (
                    + return

                    {tac.userName}

                    - {Raw.mk tac.docString} + {tac.docString}
                    Tags:
                    {tags}
                    Defined in module:
                    {tac.definingModule.toString}
                    -
                    ) +
                    -def TacticInfo.navLink (tac : TacticInfo α) : Output.BaseHtmlM Unit := do +def TacticInfo.navLink (tac : TacticInfo α) : Html :=

                    {tac.userName}

                    end DocGen4.Process @@ -48,23 +47,23 @@ namespace DocGen4.Output open scoped DocGen4.Jsx open Lean Process -open DocGen4 (Raw) /-- Render the HTML for the tactics listing page. -/ -def tactics (tacticInfo : Array (TacticInfo String)) : BaseHtmlM Unit := do - baseHtmlGenerator "Tactics" do +def tactics (tacticInfo : Array (TacticInfo Html)) : BaseHtmlM Html := do + let sectionsHtml ← tacticInfo.mapM (· |>.toHtml) + templateLiftExtends (baseHtmlGenerator "Tactics") <| pure #[ - (
                    -

                    The tactic language is a special-purpose programming language for constructing proofs, indicated using the keyword by.

                    - {tacticInfo.forM (·.toHtml)} -
                    ) + [tacticInfo.map (· |>.navLink)] + , + Html.element "main" false #[] ( + #[

                    The tactic language is a special-purpose programming language for constructing proofs, indicated using the keyword by.

                    ] ++ + sectionsHtml) + ] -def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo String)) := do +def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo Html)) := do let mut result : Array (TacticInfo _) := #[] for entry in ← System.FilePath.readDir (declarationsBasePath buildDir) do if entry.fileName.startsWith "tactics-" && entry.fileName.endsWith ".json" then @@ -84,7 +83,7 @@ def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo String) This `abbrev` exists as a type-checking wrapper around `toJson`, ensuring `loadTacticsJSON` gets objects in the expected format. -/ -abbrev saveTacticsJSON (fileName : System.FilePath) (tacticInfo : Array (TacticInfo String)) : IO Unit := do +abbrev saveTacticsJSON (fileName : System.FilePath) (tacticInfo : Array (TacticInfo Html)) : IO Unit := do if tacticInfo.size > 0 then IO.FS.writeFile fileName (toString (toJson tacticInfo)) diff --git a/DocGen4/Output/Template.lean b/DocGen4/Output/Template.lean index 5b030331..921100fd 100644 --- a/DocGen4/Output/Template.lean +++ b/DocGen4/Output/Template.lean @@ -10,52 +10,61 @@ namespace DocGen4 namespace Output open scoped DocGen4.Jsx -open DocGen4 (Raw) /-- The HTML template used for all pages. -/ -def baseHtmlGenerator (title : String) (writeContent : BaseHtmlM Unit) : BaseHtmlM Unit := do - ( - - {baseHtmlHeadDeclarations} - - {title} - - - - - - {do if let some module := ← getCurrentName then - ()} - - - - - - - - - - - - -
                    -

                    Documentation

                    -

                    {breakWithin title}

                    -
                    - {Raw.mk " "} - -
                    -
                    - - {writeContent} - - - - ) +def baseHtmlGenerator (title : String) (site : Array Html) : BaseHtmlM Html := do + let moduleConstant := + if let some module := ← getCurrentName then + #[] + else + #[] + pure + + + [← baseHtmlHeadDeclarations] + + {title} + + + + + + [moduleConstant] + + + + + + + + + + + + +
                    +

                    Documentation

                    +

                    [breakWithin title]

                    +
                    + {.raw " "} + +
                    +
                    + + [site] + + + + + +/-- +A comfortability wrapper around `baseHtmlGenerator`. +-/ +def baseHtml (title : String) (site : Html) : BaseHtmlM Html := baseHtmlGenerator title #[site] end Output end DocGen4 diff --git a/DocGen4/Output/ToHtmlFormat.lean b/DocGen4/Output/ToHtmlFormat.lean index 7bdcd92a..91b8abdf 100644 --- a/DocGen4/Output/ToHtmlFormat.lean +++ b/DocGen4/Output/ToHtmlFormat.lean @@ -7,19 +7,30 @@ Authors: Wojciech Nawrocki, Sebastian Ullrich, Henrik Böving import Lean.Data.Json import Lean.Data.Xml import Lean.Parser -import Lean.Elab.Term /-! This module defines: -- streaming HTML write helpers -- a JSX-like DSL that expands to monadic writes -/ +- a representation of HTML trees +- together with a JSX-like DSL for writing them +- and widget support for visualizing any type as HTML. -/ namespace DocGen4 open Lean -/-- A raw HTML string that should not be escaped. -/ -structure Raw where - html : String +inductive Html where + -- TODO(WN): it's nameless for shorter JSON; re-add names when we have deriving strategies for From/ToJson + -- element (tag : String) (flatten : Bool) (attrs : Array HtmlAttribute) (children : Array Html) + | element : String → Bool → Array (String × String) → Array Html → Html + /-- A text node, which will be escaped in the output -/ + | text : String → Html + /-- An arbitrary string containing HTML -/ + | raw : String → Html + deriving Repr, BEq, Inhabited, FromJson, ToJson + +instance : Coe String Html := + ⟨Html.text⟩ + +namespace Html def escapePairs : Array (String × String) := #[ @@ -32,15 +43,11 @@ def escapePairs : Array (String × String) := def escape (s : String) : String := escapePairs.foldl (fun acc (o, r) => acc.replace o r) s -namespace Html -def escape := @DocGen4.escape -end Html - -- TODO: remove the following 3 functions -- once is fixed def _root_.Lean.Xml.Attributes.toStringEscaped (as : Xml.Attributes) : String := - as.foldl (fun s n v => s ++ s!" {n}=\"{DocGen4.escape v}\"") "" + as.foldl (fun s n v => s ++ s!" {n}=\"{Html.escape v}\"") "" mutual @@ -50,7 +57,7 @@ partial def _root_.Lean.Xml.eToStringEscaped : Xml.Element → String partial def _root_.Lean.Xml.cToStringEscaped : Xml.Content → String | .Element e => eToStringEscaped e | .Comment c => s!"" -| .Character c => DocGen4.escape c +| .Character c => Html.escape c end @@ -66,6 +73,31 @@ partial def _root_.Lean.Xml.cToPlaintext : Xml.Content → String end +def attributesToString (attrs : Array (String × String)) :String := + attrs.foldl (fun acc (k, v) => acc ++ " " ++ k ++ "=\"" ++ escape v ++ "\"") "" + +-- TODO: Termination proof +partial def toStringAux : Html → String +| element tag false attrs #[text s] => s!"<{tag}{attributesToString attrs}>{escape s}\n" +| element tag false attrs #[raw s] => s!"<{tag}{attributesToString attrs}>{s}\n" +| element tag false attrs #[child] => s!"<{tag}{attributesToString attrs}>\n{child.toStringAux}\n" +| element tag false attrs children => s!"<{tag}{attributesToString attrs}>\n{children.foldl (· ++ toStringAux ·) ""}\n" +| element tag true attrs children => s!"<{tag}{attributesToString attrs}>{children.foldl (· ++ toStringAux ·) ""}" +| text s => escape s +| raw s => s + +def toString (html : Html) : String := + html.toStringAux.trimAsciiEnd.copy + +partial def textLength : Html → Nat +| raw s => s.length -- measures lengths of escape sequences too! +| text s => s.length +| element _ _ _ children => + let lengths := children.map textLength + lengths.foldl Nat.add 0 + +end Html + namespace Jsx open Parser PrettyPrinter @@ -117,51 +149,35 @@ def translateAttrs (attrs : Array (TSyntax `DocGen4.Jsx.jsxAttr)) : MacroM (TSyn | _ => Macro.throwUnsupported return as -private def mkN (s : String) : Lean.Ident := - mkIdent (`DocGen4 ++ `Output ++ Name.mkSimple s) - -private def mkApp (fn : Ident) (args : Array (TSyntax `term)) : MacroM (TSyntax `term) := do - args.foldlM (fun acc arg => `($acc $arg)) (fn : TSyntax `term) +private def htmlHelper (n : Syntax) (children : Array Syntax) (m : Syntax) : MacroM (String × (TSyntax `term)):= do + unless n.getId == m.getId do + withRef m <| Macro.throwError s!"Leading and trailing part of tags don't match: '{n}', '{m}'" + let mut cs ← `(#[]) + for child in children do + cs ← match child with + | `(jsxChild|$t:jsxText) => `(($cs).push (Html.text $(quote t.raw[0]!.getAtomVal))) + -- TODO(WN): elab as list of children if type is `t Html` where `Foldable t` + | `(jsxChild|{$t}) => `(($cs).push ($t : Html)) + | `(jsxChild|[$t]) => `($cs ++ ($t : Array Html)) + | `(jsxChild|$e:jsxElement) => `(($cs).push ($e:jsxElement : Html)) + | _ => Macro.throwUnsupported + let tag := toString n.getId + pure <| (tag, cs) macro_rules | `(<$n $attrs* />) => do - let tag : TSyntax `term := quote (toString n.getId) - let atSyn ← translateAttrs attrs - let pot := mkN "putOpenTag" - let pct := mkN "putCloseTag" - let openCall ← mkApp pot #[tag, atSyn] - let closeCall ← mkApp pct #[tag] - `(do ($openCall); ($closeCall)) + let kind := quote (toString n.getId) + let attrs ← translateAttrs attrs + `(Html.element $kind true $attrs #[]) | `(<$n $attrs* >$children*) => do - unless n.getId == m.getId do - withRef m <| Macro.throwError s!"Leading and trailing part of tags don't match: '{n}', '{m}'" - let atSyn ← translateAttrs attrs - let tag : TSyntax `term := quote (toString n.getId) - let pot := mkN "putOpenTag" - let pct := mkN "putCloseTag" - let pe := mkN "putEscaped" - let openCall ← mkApp pot #[tag, atSyn] - let closeCall ← mkApp pct #[tag] - let mut stmts : Array (TSyntax `Lean.Parser.Term.doSeqItem) := #[] - stmts := stmts.push (← `(Lean.Parser.Term.doSeqItem| ($openCall))) - for child in children do - let stmt ← match child with - | `(jsxChild|$t:jsxText) => - let s : TSyntax `term := quote t.raw[0]!.getAtomVal - let call ← mkApp pe #[s] - `(Lean.Parser.Term.doSeqItem| ($call)) - | `(jsxChild|{$t}) => - `(Lean.Parser.Term.doSeqItem| ($t)) - | `(jsxChild|[$t]) => - `(Lean.Parser.Term.doSeqItem| for _x in ($t : Array _) do _x) - | `(jsxChild|$e:jsxElement) => - `(Lean.Parser.Term.doSeqItem| $e:jsxElement) - | _ => Macro.throwUnsupported - stmts := stmts.push stmt - stmts := stmts.push (← `(Lean.Parser.Term.doSeqItem| ($closeCall))) - `(do $stmts*) - + let (tag, children) ← htmlHelper n children m + `(Html.element $(quote tag) true $(← translateAttrs attrs) $children) end Jsx +/-- A type which implements `ToHtmlFormat` will be visualized +as the resulting HTML in editors which support it. -/ +class ToHtmlFormat (α : Type u) where + formatHtml : α → Html + end DocGen4 diff --git a/DocGen4/Output/ToJson.lean b/DocGen4/Output/ToJson.lean index 2d103320..5dd0e23e 100644 --- a/DocGen4/Output/ToJson.lean +++ b/DocGen4/Output/ToJson.lean @@ -107,7 +107,8 @@ def JsonIndex.addModule (index : JsonIndex) (module : JsonModule) : BaseHtmlM Js | some i => pure i | none => let impLink ← moduleNameToLink (String.toName imp) - pure { url := impLink, importedBy := #[] } + let indexedModule := { url := impLink, importedBy := #[] } + pure indexedModule index := { index with modules := index.modules.insert @@ -123,7 +124,7 @@ def DocInfo.toJson (sourceLinker : Option DeclarationRange → String) (info : P let docLink ← declNameToLink info.getName let sourceLink := sourceLinker info.getDeclarationRange let line := info.getDeclarationRange.pos.line - let header ← captureHtml (docInfoHeader info) + let header := (← docInfoHeader info).toString let info := { name, kind, doc, docLink, sourceLink, line } return { info, header } diff --git a/Main.lean b/Main.lean index dd583d72..e3855309 100644 --- a/Main.lean +++ b/Main.lean @@ -84,7 +84,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Add `references` pseudo-module to hierarchy since references.html is always generated let hierarchy := Hierarchy.fromArray (targetModules.push `references) - let baseConfig ← getSimpleBaseConfig buildDir hierarchy + let baseConfig ← getSimpleBaseContext buildDir hierarchy -- Parallel HTML generation let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) From a468da650dee8148a3de969785025f2c1b0c1f38 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 10:53:26 +0100 Subject: [PATCH 064/106] Attempt to optimize structure reading --- DocGen4/DB/Read.lean | 66 +++++++++++++++++++++++++----------------- DocGen4/DB/Schema.lean | 3 ++ 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index c2038870..41477868 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -50,6 +50,7 @@ private structure ReadStmts where loadStructureParentsStmt : SQLite.Stmt loadFieldArgsStmt : SQLite.Stmt loadFieldsStmt : SQLite.Stmt + loadFieldsJoinedStmt : SQLite.Stmt lookupProjStmt : SQLite.Stmt lookupRenderStmt : SQLite.Stmt loadStructCtorStmt : SQLite.Stmt @@ -84,6 +85,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let loadStructureParentsStmt ← sqlite.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" let loadFieldArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" let loadFieldsStmt ← sqlite.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" + let loadFieldsJoinedStmt ← sqlite.prepare "SELECT f.sequence, f.proj_name, f.type, f.is_direct, n.module_name, n.position, n.render, md.text, v.content, dr.start_line, dr.start_column, dr.start_utf16, dr.end_line, dr.end_column, dr.end_utf16 FROM structure_fields f LEFT JOIN name_info n ON n.name = f.proj_name LEFT JOIN markdown_docstrings md ON md.module_name = n.module_name AND md.position = n.position LEFT JOIN verso_docstrings v ON v.module_name = n.module_name AND v.position = n.position LEFT JOIN declaration_ranges dr ON dr.module_name = n.module_name AND dr.position = n.position WHERE f.module_name = ? AND f.position = ? ORDER BY f.sequence" let lookupProjStmt ← sqlite.prepare "SELECT module_name, position FROM name_info WHERE name = ? LIMIT 1" let lookupRenderStmt ← sqlite.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" let loadStructCtorStmt ← sqlite.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" @@ -108,7 +110,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO pure { values, loadArgsStmt, loadAttrsStmt, readMdDocstringStmt, readVersoDocstringStmt, loadDeclRangeStmt, loadEqnsStmt, loadInstanceArgsStmt, loadStructureParentsStmt, - loadFieldArgsStmt, loadFieldsStmt, lookupProjStmt, lookupRenderStmt, + loadFieldArgsStmt, loadFieldsStmt, loadFieldsJoinedStmt, lookupProjStmt, lookupRenderStmt, loadStructCtorStmt, loadCtorPosStmt, loadCtorInfoStmt, readAxiomStmt, readOpaqueStmt, readDefinitionStmt, readInstanceStmt, readInductiveStmt, readStructureStmt, readClassInductiveStmt, @@ -261,42 +263,52 @@ private def ReadStmts.loadStructureFieldArgs (s : ReadStmts) (moduleName : Strin open Lean SQLite.Blob in private def ReadStmts.loadStructureFields (s : ReadStmts) (moduleName : String) (position : Int64) : IO (Array Process.FieldInfo) := do - s.loadFieldsStmt.bind 1 moduleName - s.loadFieldsStmt.bind 2 position + let stmt := s.loadFieldsJoinedStmt + stmt.bind 1 moduleName + stmt.bind 2 position let mut fields := #[] - while (← s.loadFieldsStmt.step) do - let fieldSeq ← s.loadFieldsStmt.columnInt64 0 - let name := (← s.loadFieldsStmt.columnText 1).toName - let typeBlob ← s.loadFieldsStmt.columnBlob 2 + while (← stmt.step) do + let fieldSeq ← stmt.columnInt64 0 + let name := (← stmt.columnText 1).toName + let typeBlob ← stmt.columnBlob 2 let type ← readRenderedCode typeBlob - let isDirect := (← s.loadFieldsStmt.columnInt64 3) != 0 - s.lookupProjStmt.bind 1 name.toString - let (doc, attrs, declRange, render) ← if (← s.lookupProjStmt.step) then do - let projModName ← s.lookupProjStmt.columnText 0 - let projPos ← s.lookupProjStmt.columnInt64 1 - done s.lookupProjStmt - let doc ← s.loadDocstring projModName projPos - let attrs ← s.loadAttrs projModName projPos - let declRange ← s.loadDeclarationRange projModName projPos - let render ← do - s.lookupRenderStmt.bind 1 projModName - s.lookupRenderStmt.bind 2 projPos - let r ← if (← s.lookupRenderStmt.step) then - pure ((← s.lookupRenderStmt.columnInt64 0) != 0) + let isDirect := (← stmt.columnInt64 3) != 0 + -- Columns 4-14 come from LEFT JOINs on name_info, markdown_docstrings, verso_docstrings, declaration_ranges + let projFound := (← stmt.columnType 4) != .null + let (doc, attrs, declRange, render) ← if projFound then do + let projModName ← stmt.columnText 4 + let projPos ← stmt.columnInt64 5 + let render := (← stmt.columnInt64 6) != 0 + let doc ← do + if (← stmt.columnType 7) != .null then + pure <| some <| Sum.inl (← stmt.columnText 7) + else if (← stmt.columnType 8) != .null then + let blob ← stmt.columnBlob 8 + have := versoDocStringFromBinary s.values + match fromBinary blob with + | .ok doc => pure <| some <| Sum.inr doc + | .error e => throw <| IO.userError s!"Failed to deserialize VersoDocString: {e}" else - pure true - done s.lookupRenderStmt - pure r + pure none + let declRange ← if (← stmt.columnType 9) != .null then + pure <| some { + pos := ⟨(← stmt.columnInt64 9).toNatClampNeg, (← stmt.columnInt64 10).toNatClampNeg⟩ + charUtf16 := (← stmt.columnInt64 11).toNatClampNeg + endPos := ⟨(← stmt.columnInt64 12).toNatClampNeg, (← stmt.columnInt64 13).toNatClampNeg⟩ + endCharUtf16 := (← stmt.columnInt64 14).toNatClampNeg + } + else + pure none + let attrs ← s.loadAttrs projModName projPos pure (doc, attrs, declRange, render) - else do - done s.lookupProjStmt + else pure (none, #[], none, true) let args ← s.loadStructureFieldArgs moduleName position fieldSeq fields := fields.push { name, type, doc, args, declarationRange := declRange.getD default, attrs, render, isDirect } - done s.loadFieldsStmt + done stmt return fields open Lean SQLite.Blob in diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index e79be8eb..607f69e3 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -154,6 +154,9 @@ CREATE TABLE IF NOT EXISTS name_info ( FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE ); +-- Index for lookups by declaration name (e.g. structure field projection lookups) +CREATE INDEX IF NOT EXISTS idx_name_info_name ON name_info(name); + CREATE TABLE IF NOT EXISTS axioms ( module_name TEXT NOT NULL, position INTEGER NOT NULL, From 3c25ecbdb31fd495ba99f6fc3246444e0d145419 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 11:31:23 +0100 Subject: [PATCH 065/106] fix: share DB in tasks --- DocGen4/Output.lean | 82 +++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 3855982f..e9edb099 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -103,56 +103,58 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir - -- Spawn one task per 500 modules, each returning its output file path - let tasks ← (chunksOf targetModules 500).mapM fun mods => IO.asTask do mods.mapM fun modName => do + let chunkSize := if targetModules.size > 20 then targetModules.size / 20 else 1 + -- Spawn about 20 tasks + let tasks ← (chunksOf targetModules chunkSize).mapM fun mods => IO.asTask do -- Each task opens its own DB connection (SQLite handles concurrent readers well) let db ← DB.openForReading dbPath builtinDocstringValues - let module ← db.loadModule modName + mods.mapM fun modName => do + let module ← db.loadModule modName - -- Build a minimal AnalyzerResult with just this module's info - let result : AnalyzerResult := { - name2ModIdx := linkCtx.name2ModIdx - moduleNames := linkCtx.moduleNames - moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module - } + -- Build a minimal AnalyzerResult with just this module's info + let result : AnalyzerResult := { + name2ModIdx := linkCtx.name2ModIdx + moduleNames := linkCtx.moduleNames + moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module + } - let config : SiteContext := { - result := result - sourceLinker := (sourceLinker?.getD SourceLinker.sourceLinker) none - refsMap := Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) - declarationDecorator := declarationDecorator?.getD defaultDeclarationDecorator - } + let config : SiteContext := { + result := result + sourceLinker := (sourceLinker?.getD SourceLinker.sourceLinker) none + refsMap := Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) + declarationDecorator := declarationDecorator?.getD defaultDeclarationDecorator + } - -- path: 'basePath/module/components/till/last.html' - -- The last component is the file name, so we drop it from the depth to root. - let moduleConfig := { baseConfig with - depthToRoot := modName.components.dropLast.length - currentName := some modName - } - let (moduleHtml, cfg) := moduleToHtml module |>.run {} config moduleConfig - let (tactics, cfg) := module.tactics.mapM TacticInfo.docStringToHtml |>.run cfg config baseConfig - if not cfg.errors.isEmpty then - throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" + -- path: 'basePath/module/components/till/last.html' + -- The last component is the file name, so we drop it from the depth to root. + let moduleConfig := { baseConfig with + depthToRoot := modName.components.dropLast.length + currentName := some modName + } + let (moduleHtml, cfg) := moduleToHtml module |>.run {} config moduleConfig + let (tactics, cfg) := module.tactics.mapM TacticInfo.docStringToHtml |>.run cfg config baseConfig + if not cfg.errors.isEmpty then + throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" - -- Write HTML file - let relFilePath := basePathComponent / moduleNameToFile modName - let filePath := baseConfig.buildDir / relFilePath - if let .some d := filePath.parent then - FS.createDirAll d - FS.writeFile filePath moduleHtml.toString + -- Write HTML file + let relFilePath := basePathComponent / moduleNameToFile modName + let filePath := baseConfig.buildDir / relFilePath + if let .some d := filePath.parent then + FS.createDirAll d + FS.writeFile filePath moduleHtml.toString - -- Write backrefs JSON - FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") - (toString (toJson cfg.backrefs)) + -- Write backrefs JSON + FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") + (toString (toJson cfg.backrefs)) - saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics + saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics - -- Generate declaration data JSON for search - let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig - FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") - jsonDecls.compress + -- Generate declaration data JSON for search + let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig + FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") + jsonDecls.compress - return relFilePath + return relFilePath -- Wait for all tasks and collect output paths let mut outputs := #[] From 3c848f229d8888c9715b93be61db25a95e062d69 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 12:50:14 +0100 Subject: [PATCH 066/106] Single-pass string escape This was 15% on a profile, now it's better --- DocGen4/Output/ToHtmlFormat.lean | 33 ++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/DocGen4/Output/ToHtmlFormat.lean b/DocGen4/Output/ToHtmlFormat.lean index 91b8abdf..ea5f340a 100644 --- a/DocGen4/Output/ToHtmlFormat.lean +++ b/DocGen4/Output/ToHtmlFormat.lean @@ -32,16 +32,29 @@ instance : Coe String Html := namespace Html -def escapePairs : Array (String × String) := - #[ - ("&", "&"), - ("<", "<"), - (">", ">"), - ("\"", """) - ] - -def escape (s : String) : String := - escapePairs.foldl (fun acc (o, r) => acc.replace o r) s + +def escape (s : String) : String := Id.run do + let mut out := "" + let mut i := s.startPos + let mut j := s.startPos + while h : j ≠ s.endPos do + let c := j.get h + if let some esc := subst c then + out := out ++ s.extract i j ++ esc + j := j.next h + i := j + else + j := j.next h + if i = s.startPos then s -- no escaping needed, return original + else out ++ s.extract i j +where + subst : Char → Option String + | '&' => some "&" + | '<' => some "<" + | '>' => some ">" + | '"' => some """ + | _ => none + -- TODO: remove the following 3 functions -- once is fixed From 4ff68a89b1ad0672c16c0f7134abe90812fec3d5 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 13:06:29 +0100 Subject: [PATCH 067/106] Don't round-trip index to/from disk --- DocGen4/Output.lean | 34 ++++++++++++++-------------------- DocGen4/Output/ToJson.lean | 8 +++++--- Main.lean | 4 ++-- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index e9edb099..3fef2d6e 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -99,7 +99,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi (linkCtx : LinkingContext) (targetModules : Array Name := linkCtx.moduleNames) (sourceLinker? : Option SourceLinkerFn := none) - (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath) := do + (declarationDecorator? : Option DeclarationDecoratorFn := none) : IO (Array System.FilePath × Array JsonModule) := do FS.createDirAll <| basePath baseConfig.buildDir FS.createDirAll <| declarationsBasePath baseConfig.buildDir @@ -150,19 +150,23 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics -- Generate declaration data JSON for search - let (jsonDecls, _) := Module.toJson module |>.run {} config baseConfig + let (jsonModule, _) := moduleToJsonModule module |>.run {} config baseConfig FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") - jsonDecls.compress + (ToJson.toJson jsonModule).compress - return relFilePath + return (relFilePath, jsonModule) - -- Wait for all tasks and collect output paths + -- Wait for all tasks and collect output paths and modules let mut outputs := #[] + let mut jsonModules := #[] for task in tasks do match (← IO.wait task) with - | .ok paths => outputs := outputs ++ paths + | .ok results => + for (path, jsonMod) in results do + outputs := outputs.push path + jsonModules := jsonModules.push jsonMod | .error e => throw e - return outputs + return (outputs, jsonModules) def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : IO SiteBaseContext := do @@ -183,22 +187,12 @@ def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : refs := refs } -def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do +def htmlOutputIndex (baseConfig : SiteBaseContext) (modules : Array JsonModule) : IO Unit := do htmlOutputSetup baseConfig let mut index : JsonIndex := {} - for entry in ← System.FilePath.readDir (declarationsBasePath baseConfig.buildDir) do - if entry.fileName.startsWith "declaration-data-" && entry.fileName.endsWith ".bmp" then - let fileContent ← FS.readFile entry.path - match Json.parse fileContent with - | .error err => - throw <| IO.userError s!"failed to parse file '{entry.path}' as json: {err}" - | .ok jsonContent => - match fromJson? jsonContent with - | .error err => - throw <| IO.userError s!"failed to parse file '{entry.path}': {err}" - | .ok (module : JsonModule) => - index := index.addModule module |>.run baseConfig + for module in modules do + index := index.addModule module |>.run baseConfig let finalJson := toJson index -- The root JSON for find diff --git a/DocGen4/Output/ToJson.lean b/DocGen4/Output/ToJson.lean index 5dd0e23e..73873738 100644 --- a/DocGen4/Output/ToJson.lean +++ b/DocGen4/Output/ToJson.lean @@ -128,7 +128,7 @@ def DocInfo.toJson (sourceLinker : Option DeclarationRange → String) (info : P let info := { name, kind, doc, docLink, sourceLink, line } return { info, header } -def Process.Module.toJson (module : Process.Module) : HtmlM Json := do +def moduleToJsonModule (module : Process.Module) : HtmlM JsonModule := do let mut jsonDecls := [] let mut instances := #[] let sourceLinker := (← read).sourceLinker module.name @@ -141,12 +141,14 @@ def Process.Module.toJson (module : Process.Module) : HtmlM Json := do className := i.className.toString typeNames := i.typeNames.map Name.toString } - let jsonMod : JsonModule := { + return { name := module.name.toString, declarations := jsonDecls, instances, imports := module.imports.map Name.toString } - return ToJson.toJson jsonMod + +def Process.Module.toJson (module : Process.Module) : HtmlM Json := do + return ToJson.toJson (← moduleToJsonModule module) end DocGen4.Output diff --git a/Main.lean b/Main.lean index e3855309..87ab5b67 100644 --- a/Main.lean +++ b/Main.lean @@ -87,10 +87,10 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let baseConfig ← getSimpleBaseContext buildDir hierarchy -- Parallel HTML generation - let outputs ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) + let (outputs, jsonModules) ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) -- Generate the search index (declaration-data.bmp) - htmlOutputIndex baseConfig + htmlOutputIndex baseConfig jsonModules -- Update navbar to include all modules on disk updateNavbarFromDisk buildDir From 476d60422218c2bee68f5c103ca288302b1abb90 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 13:47:13 +0100 Subject: [PATCH 068/106] Do round trip those that didn't just get built --- DocGen4/Output.lean | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 3fef2d6e..8d740540 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -190,8 +190,30 @@ def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : def htmlOutputIndex (baseConfig : SiteBaseContext) (modules : Array JsonModule) : IO Unit := do htmlOutputSetup baseConfig + -- Build a set of module names we just generated (already in memory) + let freshModuleNames : Std.HashSet String := modules.foldl (init := {}) fun s m => s.insert m.name + + -- Load per-module data from disk for modules NOT in the current task set. + -- This enables incremental builds: prior runs wrote declaration-data-{module}.bmp files, + -- and we merge them so the unified search index covers all modules. + let mut diskModules : Array JsonModule := #[] + for entry in ← System.FilePath.readDir (declarationsBasePath baseConfig.buildDir) do + if entry.fileName.startsWith "declaration-data-" && entry.fileName.endsWith ".bmp" then + -- Extract module name from filename: "declaration-data-Foo.Bar.bmp" -> "Foo.Bar" + let modName := entry.fileName.drop "declaration-data-".length |>.dropEnd ".bmp".length |>.toString + if freshModuleNames.contains modName then continue + let fileContent ← FS.readFile entry.path + match Json.parse fileContent with + | .error _ => continue + | .ok jsonContent => + match fromJson? jsonContent with + | .error _ => continue + | .ok (module : JsonModule) => + diskModules := diskModules.push module + + let allModules := modules ++ diskModules let mut index : JsonIndex := {} - for module in modules do + for module in allModules do index := index.addModule module |>.run baseConfig let finalJson := toJson index From 5550f29a8fd82a8a9437bf0480efa1d4e4cd455a Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 14:51:46 +0100 Subject: [PATCH 069/106] Upload non-corrupted db after bench --- Main.lean | 9 +++++++++ scripts/bench/mathlib-docs/run | 2 ++ scripts/bench/own-docs/run | 2 ++ 3 files changed, 13 insertions(+) diff --git a/Main.lean b/Main.lean index 87ab5b67..4864919e 100644 --- a/Main.lean +++ b/Main.lean @@ -63,6 +63,12 @@ def dbSourceLinker (sourceUrls : Std.HashMap Name String) (_gitUrl? : Option Str -- Fallback for modules without source URL fun _ => "#" +/-- Flush the WAL so the database file is self-contained. Connection is closed on return. -/ +def walCheckpoint (dbPath : String) : IO Unit := do + let db ← SQLite.open dbPath + db.exec "PRAGMA wal_checkpoint(TRUNCATE)" + db.exec "PRAGMA optimize" + def runFromDbCmd (p : Parsed) : IO UInt32 := do let buildDir := match p.flag? "build" with | some dir => dir.as! String @@ -71,6 +77,9 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do let manifestOutput? := (p.flag? "manifest").map (·.as! String) let moduleRoots := (p.variableArgsAs! String).map String.toName + -- Flush WAL so the database file is self-contained for concurrent readers + walCheckpoint dbPath + -- Load linking context (module names, source URLs, declaration locations) let db ← DB.openForReading dbPath builtinDocstringValues let linkCtx ← db.loadLinkingContext diff --git a/scripts/bench/mathlib-docs/run b/scripts/bench/mathlib-docs/run index b6d586c4..d2e4e461 100755 --- a/scripts/bench/mathlib-docs/run +++ b/scripts/bench/mathlib-docs/run @@ -52,6 +52,8 @@ env DOCGEN_SRC="file" "$REPO_ROOT/$BENCH/measure.py" -t mathlib-docs -m instruct TAR_ARGS=(doc) if [ -f "$TMPDIR/mathproject/.lake/build/api-docs.db" ]; then + # Compact the DB into a single portable file (removes WAL/journal dependency) + sqlite3 "$TMPDIR/mathproject/.lake/build/api-docs.db" "VACUUM" TAR_ARGS+=(api-docs.db) fi tar cf - -C "$TMPDIR/mathproject/.lake/build" "${TAR_ARGS[@]}" | zstd -o "$REPO_ROOT/mathlib-docs.tar.zst" diff --git a/scripts/bench/own-docs/run b/scripts/bench/own-docs/run index da4d2b56..6dc8f6c2 100755 --- a/scripts/bench/own-docs/run +++ b/scripts/bench/own-docs/run @@ -17,6 +17,8 @@ env DOCGEN_SRC="file" "$BENCH/measure.py" -t own-docs -m instructions -m maxrss TAR_ARGS=(doc) if [ -f .lake/build/api-docs.db ]; then + # Compact the DB into a single portable file (removes WAL/journal dependency) + sqlite3 .lake/build/api-docs.db "VACUUM" TAR_ARGS+=(api-docs.db) fi tar cf - -C .lake/build "${TAR_ARGS[@]}" | zstd -o own-docs.tar.zst From 236977ded6a67585b7415734b7c29b99c1dc5092 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 16:32:58 +0100 Subject: [PATCH 070/106] Save positions as ints This allows correct sorting in SQLite --- DocGen4/DB.lean | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 9d0bfc33..10947cc7 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -108,12 +108,12 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do let saveDeclarationRange modName position (declRange : Lean.DeclarationRange) := withDbContext "write:insert:declaration_ranges" do saveDeclarationRangeStmt.bind 1 modName saveDeclarationRangeStmt.bind 2 position - saveDeclarationRangeStmt.bind 3 declRange.pos.line - saveDeclarationRangeStmt.bind 4 declRange.pos.column - saveDeclarationRangeStmt.bind 5 declRange.charUtf16 - saveDeclarationRangeStmt.bind 6 declRange.endPos.line - saveDeclarationRangeStmt.bind 7 declRange.endPos.column - saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16 + saveDeclarationRangeStmt.bind 3 declRange.pos.line.toInt64 + saveDeclarationRangeStmt.bind 4 declRange.pos.column.toInt64 + saveDeclarationRangeStmt.bind 5 declRange.charUtf16.toInt64 + saveDeclarationRangeStmt.bind 6 declRange.endPos.line.toInt64 + saveDeclarationRangeStmt.bind 7 declRange.endPos.column.toInt64 + saveDeclarationRangeStmt.bind 8 declRange.endCharUtf16.toInt64 run saveDeclarationRangeStmt let saveInfoStmt ← sqlite.prepare "INSERT INTO name_info (module_name, position, kind, name, type, sorried, render) VALUES (?, ?, ?, ?, ?, ?, ?)" let saveArgStmt ← sqlite.prepare "INSERT INTO declaration_args (module_name, position, sequence, binder, is_implicit) VALUES (?, ?, ?, ?, ?)" From 606268608cecff293e0c2d9e2c79850488038334 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 17:09:35 +0100 Subject: [PATCH 071/106] Try synchronous mode to see performance impact --- DocGen4/DB/Schema.lean | 1 - 1 file changed, 1 deletion(-) diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index 607f69e3..4676edb5 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -52,7 +52,6 @@ def getDb (dbFile : System.FilePath) : IO SQLite := do let db ← SQLite.openWith dbFile .readWriteCreate db.exec "PRAGMA busy_timeout = 86400000" -- 24 hours - effectively no timeout for parallel builds db.exec "PRAGMA journal_mode = WAL" - db.exec "PRAGMA synchronous = OFF" db.exec "PRAGMA foreign_keys = ON" try db.transaction (db.exec ddl) From c093b639a58ed365652469ac437689384f7fde0e Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 20:56:48 +0100 Subject: [PATCH 072/106] Match prior ordering --- DocGen4/DB.lean | 19 ++++++++--- DocGen4/DB/Read.lean | 75 +++++++++++++++++++++--------------------- DocGen4/DB/Schema.lean | 14 ++++++-- 3 files changed, 63 insertions(+), 45 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 10947cc7..8eae9a3a 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -14,6 +14,7 @@ structure DB extends ReadOps where saveModule (modName : String) (sourceUrl? : Option String) : IO Unit saveImport (modName : String) (imported : Lean.Name) : IO Unit saveMarkdownDocstring (modName : String) (position : Int64) (text : String) : IO Unit + saveModuleDoc (modName : String) (position : Int64) (text : String) : IO Unit saveVersoDocstring (modName : String) (position : Int64) (text : Lean.VersoDocString) : IO Unit saveDeclarationRange (modName : String) (position : Int64) (declRange : Lean.DeclarationRange) : IO Unit saveInfo (modName : String) (position : Int64) (kind : String) (info : Process.Info) : IO Unit @@ -90,14 +91,20 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveImportStmt.bind 1 modName saveImportStmt.bind 2 imported.toString run saveImportStmt - let saveMarkdownDocstringStmt ← sqlite.prepare "INSERT INTO markdown_docstrings (module_name, position, text) VALUES (?, ?, ?)" - let saveMarkdownDocstring modName position text := withDbContext "write:insert:markdown_docstrings" do + let saveMarkdownDocstringStmt ← sqlite.prepare "INSERT INTO declaration_markdown_docstrings (module_name, position, text) VALUES (?, ?, ?)" + let saveMarkdownDocstring modName position text := withDbContext "write:insert:declaration_markdown_docstrings" do saveMarkdownDocstringStmt.bind 1 modName saveMarkdownDocstringStmt.bind 2 position saveMarkdownDocstringStmt.bind 3 text run saveMarkdownDocstringStmt - let saveVersoDocstringStmt ← sqlite.prepare "INSERT INTO verso_docstrings (module_name, position, content) VALUES (?, ?, ?)" - let saveVersoDocstring modName position text := withDbContext "write:insert:verso_docstrings" do + let saveModuleDocStmt ← sqlite.prepare "INSERT INTO module_docs_markdown (module_name, position, text) VALUES (?, ?, ?)" + let saveModuleDoc modName position text := withDbContext "write:insert:module_docs_markdown" do + saveModuleDocStmt.bind 1 modName + saveModuleDocStmt.bind 2 position + saveModuleDocStmt.bind 3 text + run saveModuleDocStmt + let saveVersoDocstringStmt ← sqlite.prepare "INSERT INTO declaration_verso_docstrings (module_name, position, content) VALUES (?, ?, ?)" + let saveVersoDocstring modName position text := withDbContext "write:insert:declaration_verso_docstrings" do saveVersoDocstringStmt.bind 1 modName saveVersoDocstringStmt.bind 2 position saveVersoDocstringStmt.bind 3 text @@ -296,6 +303,7 @@ def ensureDb (values : DocstringValues) (dbFile : System.FilePath) : IO DB := do saveModule, saveImport, saveMarkdownDocstring, + saveModuleDoc, saveVersoDocstring, saveDeclarationRange, saveInfo, @@ -344,6 +352,7 @@ def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB saveModule := fun _ _ => readonlyError, saveImport := fun _ _ => readonlyError, saveMarkdownDocstring := fun _ _ _ => readonlyError, + saveModuleDoc := fun _ _ _ => readonlyError, saveVersoDocstring := fun _ _ _ => readonlyError, saveDeclarationRange := fun _ _ _ => readonlyError, saveInfo := fun _ _ _ _ => readonlyError, @@ -449,7 +458,7 @@ def updateModuleDb (values : DocstringValues) match mem with | .modDoc doc => db.saveDeclarationRange modNameStr pos doc.declarationRange - db.saveMarkdownDocstring modNameStr pos doc.doc + db.saveModuleDoc modNameStr pos doc.doc | .docInfo info => let baseInfo := info.toInfo -- Skip saving ctorInfo here - they're saved along with their parent inductive diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 41477868..2f47032d 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -68,8 +68,7 @@ private structure ReadStmts where getModuleImportsStmt : SQLite.Stmt buildNameInfoStmt : SQLite.Stmt buildInternalNamesStmt : SQLite.Stmt - loadModuleStmt : SQLite.Stmt - loadModuleDocsStmt : SQLite.Stmt + loadModuleMembersStmt : SQLite.Stmt loadTacticsStmt : SQLite.Stmt loadTacticTagsStmt : SQLite.Stmt @@ -77,15 +76,15 @@ open Lean SQLite.Blob in private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ReadStmts := do let loadArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM declaration_args WHERE module_name = ? AND position = ? ORDER BY sequence" let loadAttrsStmt ← sqlite.prepare "SELECT attr FROM declaration_attrs WHERE module_name = ? AND position = ? ORDER BY sequence" - let readMdDocstringStmt ← sqlite.prepare "SELECT text FROM markdown_docstrings WHERE module_name = ? AND position = ?" - let readVersoDocstringStmt ← sqlite.prepare "SELECT content FROM verso_docstrings WHERE module_name = ? AND position = ?" + let readMdDocstringStmt ← sqlite.prepare "SELECT text FROM declaration_markdown_docstrings WHERE module_name = ? AND position = ?" + let readVersoDocstringStmt ← sqlite.prepare "SELECT content FROM declaration_verso_docstrings WHERE module_name = ? AND position = ?" let loadDeclRangeStmt ← sqlite.prepare "SELECT start_line, start_column, start_utf16, end_line, end_column, end_utf16 FROM declaration_ranges WHERE module_name = ? AND position = ?" let loadEqnsStmt ← sqlite.prepare "SELECT code FROM definition_equations WHERE module_name = ? AND position = ? ORDER BY sequence" let loadInstanceArgsStmt ← sqlite.prepare "SELECT type_name FROM instance_args WHERE module_name = ? AND position = ? ORDER BY sequence" let loadStructureParentsStmt ← sqlite.prepare "SELECT projection_fn, type FROM structure_parents WHERE module_name = ? AND position = ? ORDER BY sequence" let loadFieldArgsStmt ← sqlite.prepare "SELECT binder, is_implicit FROM structure_field_args WHERE module_name = ? AND position = ? AND field_sequence = ? ORDER BY arg_sequence" let loadFieldsStmt ← sqlite.prepare "SELECT sequence, proj_name, type, is_direct FROM structure_fields WHERE module_name = ? AND position = ? ORDER BY sequence" - let loadFieldsJoinedStmt ← sqlite.prepare "SELECT f.sequence, f.proj_name, f.type, f.is_direct, n.module_name, n.position, n.render, md.text, v.content, dr.start_line, dr.start_column, dr.start_utf16, dr.end_line, dr.end_column, dr.end_utf16 FROM structure_fields f LEFT JOIN name_info n ON n.name = f.proj_name LEFT JOIN markdown_docstrings md ON md.module_name = n.module_name AND md.position = n.position LEFT JOIN verso_docstrings v ON v.module_name = n.module_name AND v.position = n.position LEFT JOIN declaration_ranges dr ON dr.module_name = n.module_name AND dr.position = n.position WHERE f.module_name = ? AND f.position = ? ORDER BY f.sequence" + let loadFieldsJoinedStmt ← sqlite.prepare "SELECT f.sequence, f.proj_name, f.type, f.is_direct, n.module_name, n.position, n.render, md.text, v.content, dr.start_line, dr.start_column, dr.start_utf16, dr.end_line, dr.end_column, dr.end_utf16 FROM structure_fields f LEFT JOIN name_info n ON n.name = f.proj_name LEFT JOIN declaration_markdown_docstrings md ON md.module_name = n.module_name AND md.position = n.position LEFT JOIN declaration_verso_docstrings v ON v.module_name = n.module_name AND v.position = n.position LEFT JOIN declaration_ranges dr ON dr.module_name = n.module_name AND dr.position = n.position WHERE f.module_name = ? AND f.position = ? ORDER BY f.sequence" let lookupProjStmt ← sqlite.prepare "SELECT module_name, position FROM name_info WHERE name = ? LIMIT 1" let lookupRenderStmt ← sqlite.prepare "SELECT render FROM name_info WHERE module_name = ? AND position = ?" let loadStructCtorStmt ← sqlite.prepare "SELECT name, type, ctor_position FROM structure_constructors WHERE module_name = ? AND position = ?" @@ -103,8 +102,13 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" - let loadModuleStmt ← sqlite.prepare "SELECT n.position, n.kind, n.name, n.type, n.sorried, n.render FROM name_info n WHERE n.module_name = ?" - let loadModuleDocsStmt ← sqlite.prepare "SELECT m.position, m.text FROM markdown_docstrings m WHERE m.module_name = ? AND m.position NOT IN (SELECT position FROM name_info WHERE module_name = ?)" + let loadModuleMembersStmt ← sqlite.prepare + "SELECT position, kind, name, type, sorried, render, NULL as mod_doc \ + FROM name_info WHERE module_name = ? \ + UNION ALL \ + SELECT position, NULL, NULL, NULL, 0, 0, text \ + FROM module_docs_markdown WHERE module_name = ? \ + ORDER BY position" let loadTacticsStmt ← sqlite.prepare "SELECT internal_name, user_name, doc_string FROM tactics WHERE module_name = ?" let loadTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" pure { @@ -116,7 +120,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO readInductiveStmt, readStructureStmt, readClassInductiveStmt, getModuleNamesStmt, getModuleSourceUrlsStmt, getModuleImportsStmt, buildNameInfoStmt, buildInternalNamesStmt, - loadModuleStmt, loadModuleDocsStmt, loadTacticsStmt, loadTacticTagsStmt + loadModuleMembersStmt, loadTacticsStmt, loadTacticTagsStmt } open Lean SQLite.Blob in @@ -273,7 +277,7 @@ private def ReadStmts.loadStructureFields (s : ReadStmts) (moduleName : String) let typeBlob ← stmt.columnBlob 2 let type ← readRenderedCode typeBlob let isDirect := (← stmt.columnInt64 3) != 0 - -- Columns 4-14 come from LEFT JOINs on name_info, markdown_docstrings, verso_docstrings, declaration_ranges + -- Columns 4-14 come from LEFT JOINs on name_info, declaration_markdown_docstrings, declaration_verso_docstrings, declaration_ranges let projFound := (← stmt.columnType 4) != .null let (doc, attrs, declRange, render) ← if projFound then do let projModName ← stmt.columnText 4 @@ -511,34 +515,29 @@ open Lean SQLite.Blob in private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Process.Module := do let modNameStr := moduleName.toString let imports ← s.getModuleImports moduleName - s.loadModuleStmt.bind 1 modNameStr - let mut members : Array (Int64 × Process.ModuleMember) := #[] - while (← s.loadModuleStmt.step) do - let position ← s.loadModuleStmt.columnInt64 0 - let kind ← s.loadModuleStmt.columnText 1 - let name := (← s.loadModuleStmt.columnText 2).toName - let typeBlob ← s.loadModuleStmt.columnBlob 3 - let sorried := (← s.loadModuleStmt.columnInt64 4) != 0 - let render := (← s.loadModuleStmt.columnInt64 5) != 0 - match (← s.loadDocInfo modNameStr position kind name typeBlob sorried render) with - | some docInfo => members := members.push (position, .docInfo docInfo) - | none => IO.eprintln s!"warning: failed to load declaration '{name}' (kind '{kind}') at position {position} in module '{modNameStr}'; skipping" - done s.loadModuleStmt - s.loadModuleDocsStmt.bind 1 modNameStr - s.loadModuleDocsStmt.bind 2 modNameStr - while (← s.loadModuleDocsStmt.step) do - let position ← s.loadModuleDocsStmt.columnInt64 0 - let doc ← s.loadModuleDocsStmt.columnText 1 - match (← s.loadDeclarationRange modNameStr position) with - | some declRange => members := members.push (position, .modDoc { doc, declarationRange := declRange }) - | none => IO.eprintln s!"warning: missing declaration range for module docstring at position {position} in module '{modNameStr}'; skipping" - done s.loadModuleDocsStmt - let sortedMembers := members.qsort fun (pos1, m1) (pos2, m2) => - let r1 := m1.getDeclarationRange.pos - let r2 := m2.getDeclarationRange.pos - if Position.lt r1 r2 then true - else if Position.lt r2 r1 then false - else pos1 < pos2 + -- Single query returns declarations and standalone module docs in position order + s.loadModuleMembersStmt.bind 1 modNameStr + s.loadModuleMembersStmt.bind 2 modNameStr + let mut members : Array Process.ModuleMember := #[] + while (← s.loadModuleMembersStmt.step) do + let position ← s.loadModuleMembersStmt.columnInt64 0 + if (← s.loadModuleMembersStmt.columnNull 1) then + -- Standalone module doc (kind column is NULL) + let doc ← s.loadModuleMembersStmt.columnText 6 + match (← s.loadDeclarationRange modNameStr position) with + | some declRange => members := members.push (.modDoc { doc, declarationRange := declRange }) + | none => IO.eprintln s!"warning: missing declaration range for module docstring at position {position} in module '{modNameStr}'; skipping" + else + -- Declaration + let kind ← s.loadModuleMembersStmt.columnText 1 + let name := (← s.loadModuleMembersStmt.columnText 2).toName + let typeBlob ← s.loadModuleMembersStmt.columnBlob 3 + let sorried := (← s.loadModuleMembersStmt.columnInt64 4) != 0 + let render := (← s.loadModuleMembersStmt.columnInt64 5) != 0 + match (← s.loadDocInfo modNameStr position kind name typeBlob sorried render) with + | some docInfo => members := members.push (.docInfo docInfo) + | none => IO.eprintln s!"warning: failed to load declaration '{name}' (kind '{kind}') at position {position} in module '{modNameStr}'; skipping" + done s.loadModuleMembersStmt s.loadTacticsStmt.bind 1 modNameStr let mut tactics : Array (Process.TacticInfo Process.MarkdownDocstring) := #[] while (← s.loadTacticsStmt.step) do @@ -553,7 +552,7 @@ private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Proces done s.loadTacticTagsStmt tactics := tactics.push { internalName, userName, tags, docString, definingModule := moduleName } done s.loadTacticsStmt - return { name := moduleName, members := sortedMembers.map (·.2), imports, tactics } + return { name := moduleName, members, imports, tactics } def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do let s ← ReadStmts.prepare sqlite values diff --git a/DocGen4/DB/Schema.lean b/DocGen4/DB/Schema.lean index 4676edb5..d6bf0727 100644 --- a/DocGen4/DB/Schema.lean +++ b/DocGen4/DB/Schema.lean @@ -125,7 +125,7 @@ CREATE TABLE IF NOT EXISTS declaration_ranges ( FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE ); -CREATE TABLE IF NOT EXISTS markdown_docstrings ( +CREATE TABLE IF NOT EXISTS declaration_markdown_docstrings ( module_name TEXT NOT NULL, position INTEGER NOT NULL, text TEXT NOT NULL, @@ -133,7 +133,17 @@ CREATE TABLE IF NOT EXISTS markdown_docstrings ( FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE ); -CREATE TABLE IF NOT EXISTS verso_docstrings ( +CREATE TABLE IF NOT EXISTS module_docs_markdown ( + module_name TEXT NOT NULL, + position INTEGER NOT NULL, + text TEXT NOT NULL, + PRIMARY KEY (module_name, position), + FOREIGN KEY (module_name) REFERENCES modules(name) ON DELETE CASCADE +); + +-- TODO: Add module_docs_verso table for Lean.VersoModuleDocs.Snippet + +CREATE TABLE IF NOT EXISTS declaration_verso_docstrings ( module_name TEXT NOT NULL, position INTEGER NOT NULL, content BLOB NOT NULL, From 88a29d9134562d36fb6d28815c0aac6e9e01c706 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Wed, 11 Feb 2026 23:33:13 +0100 Subject: [PATCH 073/106] fix: always add core docs to HTML output --- lakefile.lean | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lakefile.lean b/lakefile.lean index 530150fe..a34bd7d2 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -345,7 +345,8 @@ def generateHtmlDocs (rootMods : Array Module) (description : String) : FetchM ( basePath / "find" / "index.html", basePath / "find" / "find.js" ] - let rootNames := rootMods.map (·.name) + let coreRoots := #[`Init, `Std, `Lake, `Lean] + let rootNames := rootMods.map (·.name) ++ coreRoots let manifestFile := buildDir / "doc-manifest.json" coreJob.bindM fun _ => do docInfoJobs.bindM fun _ => do From 28503499c3dd5e4e572b89c1139ee7ddcdd65c76 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Thu, 12 Feb 2026 00:01:31 +0100 Subject: [PATCH 074/106] chore: eliminate JSON trip and fix ordering of tactics --- DocGen4/DB.lean | 1 + DocGen4/DB/Read.lean | 29 ++++++++++++++++++++++++++++- DocGen4/Output.lean | 11 ++++------- DocGen4/Output/Tactics.lean | 24 ------------------------ Main.lean | 14 +++++++++++++- 5 files changed, 46 insertions(+), 33 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 8eae9a3a..b138a5aa 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -378,6 +378,7 @@ def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB getModuleImports := readOps.getModuleImports, buildName2ModIdx := readOps.buildName2ModIdx, loadModule := readOps.loadModule, + loadAllTactics := readOps.loadAllTactics, } /-! ## DB Reading -/ diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 2f47032d..4e2b2d53 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -21,6 +21,7 @@ structure ReadOps where getModuleImports : Lean.Name → IO (Array Lean.Name) buildName2ModIdx : Array Lean.Name → IO (Std.HashMap Lean.Name Lean.ModuleIdx) loadModule : Lean.Name → IO Process.Module + loadAllTactics : IO (Array (Process.TacticInfo Process.MarkdownDocstring)) private def done (stmt : SQLite.Stmt) : IO Unit := do stmt.reset @@ -71,6 +72,8 @@ private structure ReadStmts where loadModuleMembersStmt : SQLite.Stmt loadTacticsStmt : SQLite.Stmt loadTacticTagsStmt : SQLite.Stmt + loadAllTacticsStmt : SQLite.Stmt + loadAllTacticTagsStmt : SQLite.Stmt open Lean SQLite.Blob in private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ReadStmts := do @@ -111,6 +114,10 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ORDER BY position" let loadTacticsStmt ← sqlite.prepare "SELECT internal_name, user_name, doc_string FROM tactics WHERE module_name = ?" let loadTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" + let loadAllTacticsStmt ← sqlite.prepare + "SELECT module_name, internal_name, user_name, doc_string FROM tactics \ + ORDER BY user_name, module_name, internal_name" + let loadAllTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" pure { values, loadArgsStmt, loadAttrsStmt, readMdDocstringStmt, readVersoDocstringStmt, loadDeclRangeStmt, loadEqnsStmt, loadInstanceArgsStmt, loadStructureParentsStmt, @@ -120,7 +127,8 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO readInductiveStmt, readStructureStmt, readClassInductiveStmt, getModuleNamesStmt, getModuleSourceUrlsStmt, getModuleImportsStmt, buildNameInfoStmt, buildInternalNamesStmt, - loadModuleMembersStmt, loadTacticsStmt, loadTacticTagsStmt + loadModuleMembersStmt, loadTacticsStmt, loadTacticTagsStmt, + loadAllTacticsStmt, loadAllTacticTagsStmt } open Lean SQLite.Blob in @@ -554,6 +562,24 @@ private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Proces done s.loadTacticsStmt return { name := moduleName, members, imports, tactics } +open Lean in +private def ReadStmts.loadAllTactics (s : ReadStmts) : IO (Array (Process.TacticInfo Process.MarkdownDocstring)) := withDbContext "read:all_tactics" do + let mut tactics : Array (Process.TacticInfo Process.MarkdownDocstring) := #[] + while (← s.loadAllTacticsStmt.step) do + let moduleName := (← s.loadAllTacticsStmt.columnText 0).toName + let internalName := (← s.loadAllTacticsStmt.columnText 1).toName + let userName ← s.loadAllTacticsStmt.columnText 2 + let docString ← s.loadAllTacticsStmt.columnText 3 + s.loadAllTacticTagsStmt.bind 1 moduleName.toString + s.loadAllTacticTagsStmt.bind 2 internalName.toString + let mut tags : Array Name := #[] + while (← s.loadAllTacticTagsStmt.step) do + tags := tags.push (← s.loadAllTacticTagsStmt.columnText 0).toName + done s.loadAllTacticTagsStmt + tactics := tactics.push { internalName, userName, tags, docString, definingModule := moduleName } + done s.loadAllTacticsStmt + return tactics + def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do let s ← ReadStmts.prepare sqlite values pure { @@ -562,6 +588,7 @@ def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do getModuleImports := s.getModuleImports buildName2ModIdx := s.buildName2ModIdx loadModule := s.loadModule + loadAllTactics := s.loadAllTactics } end DocGen4.DB diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 8d740540..08e32f80 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -39,7 +39,7 @@ def collectBackrefs (buildDir : System.FilePath) : IO (Array BackrefItem) := do | .ok (arr : Array BackrefItem) => backrefs := backrefs ++ arr return backrefs -def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do +def htmlOutputSetup (config : SiteBaseContext) (tacticInfo : Array (Process.TacticInfo Html)) : IO Unit := do let findBasePath (buildDir : System.FilePath) := basePath buildDir / "find" -- Base structure @@ -55,7 +55,7 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do let navbarHtml := ReaderT.run navbar config |>.toString let searchHtml := ReaderT.run search config |>.toString let referencesHtml := ReaderT.run (references (← collectBackrefs config.buildDir)) config |>.toString - let tacticsHtml := ReaderT.run (tactics (← loadTacticsJSON config.buildDir)) config |>.toString + let tacticsHtml := ReaderT.run (tactics tacticInfo) config |>.toString let docGenStatic := #[ ("style.css", styleCss), ("favicon.svg", faviconSvg), @@ -132,7 +132,6 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi currentName := some modName } let (moduleHtml, cfg) := moduleToHtml module |>.run {} config moduleConfig - let (tactics, cfg) := module.tactics.mapM TacticInfo.docStringToHtml |>.run cfg config baseConfig if not cfg.errors.isEmpty then throw <| IO.userError s!"There are errors when generating HTML for '{modName}': {cfg.errors}" @@ -147,8 +146,6 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"backrefs-{module.name}.json") (toString (toJson cfg.backrefs)) - saveTacticsJSON (declarationsBasePath baseConfig.buildDir / s!"tactics-{module.name}.json") tactics - -- Generate declaration data JSON for search let (jsonModule, _) := moduleToJsonModule module |>.run {} config baseConfig FS.writeFile (declarationsBasePath baseConfig.buildDir / s!"declaration-data-{module.name}.bmp") @@ -187,8 +184,8 @@ def getSimpleBaseContext (buildDir : System.FilePath) (hierarchy : Hierarchy) : refs := refs } -def htmlOutputIndex (baseConfig : SiteBaseContext) (modules : Array JsonModule) : IO Unit := do - htmlOutputSetup baseConfig +def htmlOutputIndex (baseConfig : SiteBaseContext) (modules : Array JsonModule) (tacticInfo : Array (Process.TacticInfo Html)) : IO Unit := do + htmlOutputSetup baseConfig tacticInfo -- Build a set of module names we just generated (already in memory) let freshModuleNames : Std.HashSet String := modules.foldl (init := {}) fun s m => s.insert m.name diff --git a/DocGen4/Output/Tactics.lean b/DocGen4/Output/Tactics.lean index a3917be6..44998067 100644 --- a/DocGen4/Output/Tactics.lean +++ b/DocGen4/Output/Tactics.lean @@ -63,29 +63,5 @@ def tactics (tacticInfo : Array (TacticInfo Html)) : BaseHtmlM Html := do sectionsHtml) ] -def loadTacticsJSON (buildDir : System.FilePath) : IO (Array (TacticInfo Html)) := do - let mut result : Array (TacticInfo _) := #[] - for entry in ← System.FilePath.readDir (declarationsBasePath buildDir) do - if entry.fileName.startsWith "tactics-" && entry.fileName.endsWith ".json" then - let fileContent ← IO.FS.readFile entry.path - match Json.parse fileContent with - | .error err => - throw <| IO.userError s!"failed to parse file '{entry.path}' as json: {err}" - | .ok jsonContent => - match fromJson? jsonContent with - | .error err => - throw <| IO.userError s!"failed to parse file '{entry.path}': {err}" - | .ok (arr : Array (TacticInfo _)) => result := result ++ arr - return result.qsort (lt := (·.userName < ·.userName)) - -/-- Save sections of supplementary pages declared in a specific module. - -This `abbrev` exists as a type-checking wrapper around `toJson`, ensuring `loadTacticsJSON` gets -objects in the expected format. --/ -abbrev saveTacticsJSON (fileName : System.FilePath) (tacticInfo : Array (TacticInfo Html)) : IO Unit := do - if tacticInfo.size > 0 then - IO.FS.writeFile fileName (toString (toJson tacticInfo)) - end Output end DocGen4 diff --git a/Main.lean b/Main.lean index 4864919e..a88dbdf4 100644 --- a/Main.lean +++ b/Main.lean @@ -98,8 +98,20 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do -- Parallel HTML generation let (outputs, jsonModules) ← htmlOutputResultsParallel baseConfig dbPath linkCtx targetModules (sourceLinker? := some (dbSourceLinker linkCtx.sourceUrls)) + -- Load all tactics from DB in sorted order and convert markdown docstrings to HTML + let allTacticsRaw ← db.loadAllTactics + let refsMap : Std.HashMap String BibItem := + Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany + (baseConfig.refs.iter.map fun x => (x.citekey, x)) + let minimalSiteCtx : SiteContext := { + result := { name2ModIdx := linkCtx.name2ModIdx, moduleNames := linkCtx.moduleNames, moduleInfo := {} } + sourceLinker := fun _ _ => "#" + refsMap := refsMap + } + let (allTactics, _) := allTacticsRaw.mapM Process.TacticInfo.docStringToHtml |>.run {} minimalSiteCtx baseConfig + -- Generate the search index (declaration-data.bmp) - htmlOutputIndex baseConfig jsonModules + htmlOutputIndex baseConfig jsonModules allTactics -- Update navbar to include all modules on disk updateNavbarFromDisk buildDir From 18a4266e6ab92f12b05da6fafe16156114b7d73c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Thu, 12 Feb 2026 00:20:51 +0100 Subject: [PATCH 075/106] Slight increase in eqns robustness --- DocGen4/Output/Definition.lean | 1 + 1 file changed, 1 insertion(+) diff --git a/DocGen4/Output/Definition.lean b/DocGen4/Output/Definition.lean index c95c39cd..b1bf48a3 100644 --- a/DocGen4/Output/Definition.lean +++ b/DocGen4/Output/Definition.lean @@ -19,6 +19,7 @@ defined in `equationLimit` we stop trying since they: -/ def equationsToHtml (i : Process.DefinitionInfo) : HtmlM (Array Html) := do if let some eqs := i.equations then + if eqs.isEmpty && !i.equationsWereOmitted then return #[] let equationsHtml ← eqs.mapM equationToHtml if i.equationsWereOmitted then return #[ From f7fd765e314132e76904eded05d2e685b060f5c4 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Thu, 12 Feb 2026 01:56:15 +0100 Subject: [PATCH 076/106] fix: markdown postprocessing --- DocGen4/Process/NameInfo.lean | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/DocGen4/Process/NameInfo.lean b/DocGen4/Process/NameInfo.lean index 7e69e7dc..1225aa40 100644 --- a/DocGen4/Process/NameInfo.lean +++ b/DocGen4/Process/NameInfo.lean @@ -56,14 +56,12 @@ def getDocString? (env : Environment) (name : Name) : IO (Option (String ⊕ Ver let name := alternativeOfTactic env name |>.getD name match (← findInternalDocString? env name) with | none => return none - | some (.inl markdown) => - let exts := getTacticExtensionString env name - let spellings := getRecommendedSpellingString env name - return some <| .inl <| markdown ++ exts ++ spellings | some (.inr verso) => let exts := getTacticExtensionText env name |>.map (#[·]) |>.getD #[] let spellings := getRecommendedSpellingText env name |>.map (#[·]) |>.getD #[] return some <| .inr <| { verso with text := verso.text ++ exts ++ spellings } + | some (.inl _) => + return (·.map .inl) (← Lean.findDocString? env name) def NameInfo.ofTypedName (n : Name) (t : Expr) : MetaM NameInfo := do From bfac47f3ced255a480cb4d4490c523ed06ed2434 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 06:14:27 +0100 Subject: [PATCH 077/106] fix: only use rendered decls as link targets --- DocGen4/DB/Read.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 4e2b2d53..135b31a9 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -103,7 +103,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let getModuleNamesStmt ← sqlite.prepare "SELECT name FROM modules ORDER BY name" let getModuleSourceUrlsStmt ← sqlite.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" - let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" + let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info WHERE render = 1" let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" let loadModuleMembersStmt ← sqlite.prepare "SELECT position, kind, name, type, sorried, render, NULL as mod_doc \ From d74c3b3c780647fa7fdd9b32b664e331c1e0e621 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 07:39:30 +0100 Subject: [PATCH 078/106] fix: only link to rendered names Otherwise use a heuristic to link to the name. --- DocGen4/DB.lean | 5 ++++- DocGen4/DB/Read.lean | 16 ++++++++++++++-- DocGen4/Output.lean | 1 + DocGen4/Output/Base.lean | 33 ++++++++++++++++++--------------- DocGen4/Process/Analyze.lean | 8 ++++++++ Main.lean | 2 +- 6 files changed, 46 insertions(+), 19 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index b138a5aa..58388f1b 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -377,6 +377,7 @@ def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB getModuleSourceUrls := readOps.getModuleSourceUrls, getModuleImports := readOps.getModuleImports, buildName2ModIdx := readOps.buildName2ModIdx, + buildRenderedNames := readOps.buildRenderedNames, loadModule := readOps.loadModule, loadAllTactics := readOps.loadAllTactics, } @@ -391,13 +392,15 @@ structure LinkingContext where moduleNames : Array Name sourceUrls : Std.HashMap Name String name2ModIdx : Std.HashMap Name ModuleIdx + renderedNames : Std.HashSet Name /-- Load the linking context from the database. -/ def DB.loadLinkingContext (db : DB) : IO LinkingContext := do let moduleNames ← db.getModuleNames let sourceUrls ← db.getModuleSourceUrls let name2ModIdx ← db.buildName2ModIdx moduleNames - return { moduleNames, sourceUrls, name2ModIdx } + let renderedNames ← db.buildRenderedNames + return { moduleNames, sourceUrls, name2ModIdx, renderedNames } /-- Get transitive closure of imports for given modules using recursive CTE. diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 135b31a9..e57d1a61 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -20,6 +20,7 @@ structure ReadOps where getModuleSourceUrls : IO (Std.HashMap Lean.Name String) getModuleImports : Lean.Name → IO (Array Lean.Name) buildName2ModIdx : Array Lean.Name → IO (Std.HashMap Lean.Name Lean.ModuleIdx) + buildRenderedNames : IO (Std.HashSet Lean.Name) loadModule : Lean.Name → IO Process.Module loadAllTactics : IO (Array (Process.TacticInfo Process.MarkdownDocstring)) @@ -74,6 +75,7 @@ private structure ReadStmts where loadTacticTagsStmt : SQLite.Stmt loadAllTacticsStmt : SQLite.Stmt loadAllTacticTagsStmt : SQLite.Stmt + buildRenderedNamesStmt : SQLite.Stmt open Lean SQLite.Blob in private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ReadStmts := do @@ -103,7 +105,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let getModuleNamesStmt ← sqlite.prepare "SELECT name FROM modules ORDER BY name" let getModuleSourceUrlsStmt ← sqlite.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" - let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info WHERE render = 1" + let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" let loadModuleMembersStmt ← sqlite.prepare "SELECT position, kind, name, type, sorried, render, NULL as mod_doc \ @@ -118,6 +120,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO "SELECT module_name, internal_name, user_name, doc_string FROM tactics \ ORDER BY user_name, module_name, internal_name" let loadAllTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" + let buildRenderedNamesStmt ← sqlite.prepare "SELECT name FROM name_info WHERE render = 1" pure { values, loadArgsStmt, loadAttrsStmt, readMdDocstringStmt, readVersoDocstringStmt, loadDeclRangeStmt, loadEqnsStmt, loadInstanceArgsStmt, loadStructureParentsStmt, @@ -128,7 +131,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO getModuleNamesStmt, getModuleSourceUrlsStmt, getModuleImportsStmt, buildNameInfoStmt, buildInternalNamesStmt, loadModuleMembersStmt, loadTacticsStmt, loadTacticTagsStmt, - loadAllTacticsStmt, loadAllTacticTagsStmt + loadAllTacticsStmt, loadAllTacticTagsStmt, buildRenderedNamesStmt } open Lean SQLite.Blob in @@ -519,6 +522,14 @@ private def ReadStmts.buildName2ModIdx (s : ReadStmts) (moduleNames : Array Name done s.buildInternalNamesStmt return result +open Lean in +private def ReadStmts.buildRenderedNames (s : ReadStmts) : IO (Std.HashSet Name) := do + let mut result : Std.HashSet Name := {} + while (← s.buildRenderedNamesStmt.step) do + result := result.insert (← s.buildRenderedNamesStmt.columnText 0).toName + done s.buildRenderedNamesStmt + return result + open Lean SQLite.Blob in private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Process.Module := do let modNameStr := moduleName.toString @@ -587,6 +598,7 @@ def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do getModuleSourceUrls := s.getModuleSourceUrls getModuleImports := s.getModuleImports buildName2ModIdx := s.buildName2ModIdx + buildRenderedNames := s.buildRenderedNames loadModule := s.loadModule loadAllTactics := s.loadAllTactics } diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 08e32f80..d2a14499 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -116,6 +116,7 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi name2ModIdx := linkCtx.name2ModIdx moduleNames := linkCtx.moduleNames moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module + renderedNames := linkCtx.renderedNames } let config : SiteContext := { diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index fc6501ca..276222db 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -261,21 +261,23 @@ def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Html := do /-- For a name, try to find a linkable target by stripping suffix components -that are numeric or start with `_`. Returns the first name found in name2ModIdx, +that are numeric or start with `_`. Returns the first name found in `targets`, or none if nothing is found. -/ -private def findLinkableParent (name2ModIdx : Std.HashMap Name ModuleIdx) (name : Name) : Option Name := - match name with +private def findLinkableParent (name : Name) (targets : Std.HashSet Name) : Option Name := + go name +where + isLinkable n := targets.contains n + go : Name → Option Name | .str parent s => - -- If this component starts with _ or is numeric-like, try the parent if s.startsWith "_" then - findLinkableParent name2ModIdx parent - else if name2ModIdx.contains name then - some name + go parent + else if isLinkable (.str parent s) then + some (.str parent s) else - findLinkableParent name2ModIdx parent + go parent | .num parent _ => - findLinkableParent name2ModIdx parent + go parent | .anonymous => none /-- @@ -312,22 +314,23 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner match tag with | .const name => - let name2ModIdx := (← getResult).name2ModIdx - if name2ModIdx.contains name then + let { renderedNames, .. } ← getResult + let hasAnchor := renderedNames.contains name + if hasAnchor then let link ← declNameToLink name -- Avoid nested anchors: if inner content already has anchors, don't wrap again - -- Match original behavior: no fn wrapper when const is in name2ModIdx if innerHasAnchor then return (true, innerHtml) else return (true, #[[innerHtml]]) else - -- Name not in name2ModIdx - try to find a linkable parent - -- This handles both: + -- Name not rendered - find an appropriate rendered target. + -- This handles: -- 1. Private names like `_private.Init.Prelude.0.Lean.Name.hash._proof_1` -- 2. Auxiliary names like `Std.Do.Option.instWPMonad._proof_2` + -- 3. Constructors, projection functions, and other non-rendered declarations let nameToSearch := Lean.privateToUserName? name |>.getD name - match findLinkableParent name2ModIdx nameToSearch with + match findLinkableParent nameToSearch renderedNames with | some target => let link ← declNameToLink target if innerHasAnchor then diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index 94468065..0f93437e 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -86,6 +86,11 @@ structure AnalyzerResult where A map from module names to information about these modules. -/ moduleInfo : Std.HashMap Name Module + /-- + Names that have rendered declarations (i.e., will have a `
                    ` anchor). + Used to decide whether to generate a direct link or fall through to redirect logic. + -/ + renderedNames : Std.HashSet Name deriving Inhabited namespace ModuleMember @@ -216,6 +221,9 @@ def process (task : AnalyzeTask) : MetaM AnalyzerResult := do name2ModIdx := env.const2ModIdx, moduleNames := allModules, moduleInfo := res, + -- This field is used during HTML rendering, but not while populating the DB, so it can be empty + -- here: + renderedNames := {} } def filterDocInfo (ms : Array ModuleMember) : Array DocInfo := diff --git a/Main.lean b/Main.lean index a88dbdf4..66c4e4c5 100644 --- a/Main.lean +++ b/Main.lean @@ -104,7 +104,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) let minimalSiteCtx : SiteContext := { - result := { name2ModIdx := linkCtx.name2ModIdx, moduleNames := linkCtx.moduleNames, moduleInfo := {} } + result := { linkCtx with moduleNames := linkCtx.moduleNames, moduleInfo := {} } sourceLinker := fun _ _ => "#" refsMap := refsMap } From 188f7938473c0910c18788698ec1c135815404e6 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 11:05:41 +0100 Subject: [PATCH 079/106] Revert "fix: only link to rendered names" This reverts commit d74c3b3c780647fa7fdd9b32b664e331c1e0e621. --- DocGen4/DB.lean | 5 +---- DocGen4/DB/Read.lean | 16 ++-------------- DocGen4/Output.lean | 1 - DocGen4/Output/Base.lean | 33 +++++++++++++++------------------ DocGen4/Process/Analyze.lean | 8 -------- Main.lean | 2 +- 6 files changed, 19 insertions(+), 46 deletions(-) diff --git a/DocGen4/DB.lean b/DocGen4/DB.lean index 58388f1b..b138a5aa 100644 --- a/DocGen4/DB.lean +++ b/DocGen4/DB.lean @@ -377,7 +377,6 @@ def openForReading (dbFile : System.FilePath) (values : DocstringValues) : IO DB getModuleSourceUrls := readOps.getModuleSourceUrls, getModuleImports := readOps.getModuleImports, buildName2ModIdx := readOps.buildName2ModIdx, - buildRenderedNames := readOps.buildRenderedNames, loadModule := readOps.loadModule, loadAllTactics := readOps.loadAllTactics, } @@ -392,15 +391,13 @@ structure LinkingContext where moduleNames : Array Name sourceUrls : Std.HashMap Name String name2ModIdx : Std.HashMap Name ModuleIdx - renderedNames : Std.HashSet Name /-- Load the linking context from the database. -/ def DB.loadLinkingContext (db : DB) : IO LinkingContext := do let moduleNames ← db.getModuleNames let sourceUrls ← db.getModuleSourceUrls let name2ModIdx ← db.buildName2ModIdx moduleNames - let renderedNames ← db.buildRenderedNames - return { moduleNames, sourceUrls, name2ModIdx, renderedNames } + return { moduleNames, sourceUrls, name2ModIdx } /-- Get transitive closure of imports for given modules using recursive CTE. diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index e57d1a61..135b31a9 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -20,7 +20,6 @@ structure ReadOps where getModuleSourceUrls : IO (Std.HashMap Lean.Name String) getModuleImports : Lean.Name → IO (Array Lean.Name) buildName2ModIdx : Array Lean.Name → IO (Std.HashMap Lean.Name Lean.ModuleIdx) - buildRenderedNames : IO (Std.HashSet Lean.Name) loadModule : Lean.Name → IO Process.Module loadAllTactics : IO (Array (Process.TacticInfo Process.MarkdownDocstring)) @@ -75,7 +74,6 @@ private structure ReadStmts where loadTacticTagsStmt : SQLite.Stmt loadAllTacticsStmt : SQLite.Stmt loadAllTacticTagsStmt : SQLite.Stmt - buildRenderedNamesStmt : SQLite.Stmt open Lean SQLite.Blob in private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO ReadStmts := do @@ -105,7 +103,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let getModuleNamesStmt ← sqlite.prepare "SELECT name FROM modules ORDER BY name" let getModuleSourceUrlsStmt ← sqlite.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" - let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" + let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info WHERE render = 1" let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" let loadModuleMembersStmt ← sqlite.prepare "SELECT position, kind, name, type, sorried, render, NULL as mod_doc \ @@ -120,7 +118,6 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO "SELECT module_name, internal_name, user_name, doc_string FROM tactics \ ORDER BY user_name, module_name, internal_name" let loadAllTacticTagsStmt ← sqlite.prepare "SELECT tag FROM tactic_tags WHERE module_name = ? AND internal_name = ?" - let buildRenderedNamesStmt ← sqlite.prepare "SELECT name FROM name_info WHERE render = 1" pure { values, loadArgsStmt, loadAttrsStmt, readMdDocstringStmt, readVersoDocstringStmt, loadDeclRangeStmt, loadEqnsStmt, loadInstanceArgsStmt, loadStructureParentsStmt, @@ -131,7 +128,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO getModuleNamesStmt, getModuleSourceUrlsStmt, getModuleImportsStmt, buildNameInfoStmt, buildInternalNamesStmt, loadModuleMembersStmt, loadTacticsStmt, loadTacticTagsStmt, - loadAllTacticsStmt, loadAllTacticTagsStmt, buildRenderedNamesStmt + loadAllTacticsStmt, loadAllTacticTagsStmt } open Lean SQLite.Blob in @@ -522,14 +519,6 @@ private def ReadStmts.buildName2ModIdx (s : ReadStmts) (moduleNames : Array Name done s.buildInternalNamesStmt return result -open Lean in -private def ReadStmts.buildRenderedNames (s : ReadStmts) : IO (Std.HashSet Name) := do - let mut result : Std.HashSet Name := {} - while (← s.buildRenderedNamesStmt.step) do - result := result.insert (← s.buildRenderedNamesStmt.columnText 0).toName - done s.buildRenderedNamesStmt - return result - open Lean SQLite.Blob in private def ReadStmts.loadModule (s : ReadStmts) (moduleName : Name) : IO Process.Module := do let modNameStr := moduleName.toString @@ -598,7 +587,6 @@ def mkReadOps (sqlite : SQLite) (values : DocstringValues) : IO ReadOps := do getModuleSourceUrls := s.getModuleSourceUrls getModuleImports := s.getModuleImports buildName2ModIdx := s.buildName2ModIdx - buildRenderedNames := s.buildRenderedNames loadModule := s.loadModule loadAllTactics := s.loadAllTactics } diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index d2a14499..08e32f80 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -116,7 +116,6 @@ def htmlOutputResultsParallel (baseConfig : SiteBaseContext) (dbPath : System.Fi name2ModIdx := linkCtx.name2ModIdx moduleNames := linkCtx.moduleNames moduleInfo := ({} : Std.HashMap Name Process.Module).insert modName module - renderedNames := linkCtx.renderedNames } let config : SiteContext := { diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index 276222db..fc6501ca 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -261,23 +261,21 @@ def declNameToHtmlBreakWithinLink (name : Name) : HtmlM Html := do /-- For a name, try to find a linkable target by stripping suffix components -that are numeric or start with `_`. Returns the first name found in `targets`, +that are numeric or start with `_`. Returns the first name found in name2ModIdx, or none if nothing is found. -/ -private def findLinkableParent (name : Name) (targets : Std.HashSet Name) : Option Name := - go name -where - isLinkable n := targets.contains n - go : Name → Option Name +private def findLinkableParent (name2ModIdx : Std.HashMap Name ModuleIdx) (name : Name) : Option Name := + match name with | .str parent s => + -- If this component starts with _ or is numeric-like, try the parent if s.startsWith "_" then - go parent - else if isLinkable (.str parent s) then - some (.str parent s) + findLinkableParent name2ModIdx parent + else if name2ModIdx.contains name then + some name else - go parent + findLinkableParent name2ModIdx parent | .num parent _ => - go parent + findLinkableParent name2ModIdx parent | .anonymous => none /-- @@ -314,23 +312,22 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H let (innerHasAnchor, innerHtml) ← renderedCodeToHtmlAux inner match tag with | .const name => - let { renderedNames, .. } ← getResult - let hasAnchor := renderedNames.contains name - if hasAnchor then + let name2ModIdx := (← getResult).name2ModIdx + if name2ModIdx.contains name then let link ← declNameToLink name -- Avoid nested anchors: if inner content already has anchors, don't wrap again + -- Match original behavior: no fn wrapper when const is in name2ModIdx if innerHasAnchor then return (true, innerHtml) else return (true, #[[innerHtml]]) else - -- Name not rendered - find an appropriate rendered target. - -- This handles: + -- Name not in name2ModIdx - try to find a linkable parent + -- This handles both: -- 1. Private names like `_private.Init.Prelude.0.Lean.Name.hash._proof_1` -- 2. Auxiliary names like `Std.Do.Option.instWPMonad._proof_2` - -- 3. Constructors, projection functions, and other non-rendered declarations let nameToSearch := Lean.privateToUserName? name |>.getD name - match findLinkableParent nameToSearch renderedNames with + match findLinkableParent name2ModIdx nameToSearch with | some target => let link ← declNameToLink target if innerHasAnchor then diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index 0f93437e..94468065 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -86,11 +86,6 @@ structure AnalyzerResult where A map from module names to information about these modules. -/ moduleInfo : Std.HashMap Name Module - /-- - Names that have rendered declarations (i.e., will have a `
                    ` anchor). - Used to decide whether to generate a direct link or fall through to redirect logic. - -/ - renderedNames : Std.HashSet Name deriving Inhabited namespace ModuleMember @@ -221,9 +216,6 @@ def process (task : AnalyzeTask) : MetaM AnalyzerResult := do name2ModIdx := env.const2ModIdx, moduleNames := allModules, moduleInfo := res, - -- This field is used during HTML rendering, but not while populating the DB, so it can be empty - -- here: - renderedNames := {} } def filterDocInfo (ms : Array ModuleMember) : Array DocInfo := diff --git a/Main.lean b/Main.lean index 66c4e4c5..a88dbdf4 100644 --- a/Main.lean +++ b/Main.lean @@ -104,7 +104,7 @@ def runFromDbCmd (p : Parsed) : IO UInt32 := do Std.HashMap.emptyWithCapacity baseConfig.refs.size |>.insertMany (baseConfig.refs.iter.map fun x => (x.citekey, x)) let minimalSiteCtx : SiteContext := { - result := { linkCtx with moduleNames := linkCtx.moduleNames, moduleInfo := {} } + result := { name2ModIdx := linkCtx.name2ModIdx, moduleNames := linkCtx.moduleNames, moduleInfo := {} } sourceLinker := fun _ _ => "#" refsMap := refsMap } From bad1961e1ddfe67a3fa21c8cb4b53372a6cc3cd2 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 11:10:44 +0100 Subject: [PATCH 080/106] Alternate private name approach --- DocGen4/Output/Base.lean | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index fc6501ca..367c7b72 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -313,10 +313,9 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H match tag with | .const name => let name2ModIdx := (← getResult).name2ModIdx - if name2ModIdx.contains name then + if name2ModIdx.contains name && (Lean.privatePrefix? name).isNone then let link ← declNameToLink name -- Avoid nested anchors: if inner content already has anchors, don't wrap again - -- Match original behavior: no fn wrapper when const is in name2ModIdx if innerHasAnchor then return (true, innerHtml) else @@ -339,7 +338,7 @@ partial def renderedCodeToHtmlAux (code : RenderedCode) : HtmlM (Bool × Array H match Lean.privatePrefix? name with | some pfx => let modName := moduleFromPrivatePrefix pfx - if modName != .anonymous then + if modName != Name.anonymous then let link ← moduleNameToLink modName if innerHasAnchor then return (true, innerHtml) From d2293c751a69f774d0393c144de56e7e24ed6d46 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 11:19:45 +0100 Subject: [PATCH 081/106] Revert "fix: only use rendered decls as link targets" This reverts commit bfac47f3ced255a480cb4d4490c523ed06ed2434. --- DocGen4/DB/Read.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DocGen4/DB/Read.lean b/DocGen4/DB/Read.lean index 135b31a9..4e2b2d53 100644 --- a/DocGen4/DB/Read.lean +++ b/DocGen4/DB/Read.lean @@ -103,7 +103,7 @@ private def ReadStmts.prepare (sqlite : SQLite) (values : DocstringValues) : IO let getModuleNamesStmt ← sqlite.prepare "SELECT name FROM modules ORDER BY name" let getModuleSourceUrlsStmt ← sqlite.prepare "SELECT name, source_url FROM modules WHERE source_url IS NOT NULL" let getModuleImportsStmt ← sqlite.prepare "SELECT imported FROM module_imports WHERE importer = ?" - let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info WHERE render = 1" + let buildNameInfoStmt ← sqlite.prepare "SELECT name, module_name FROM name_info" let buildInternalNamesStmt ← sqlite.prepare "SELECT name, target_module FROM internal_names" let loadModuleMembersStmt ← sqlite.prepare "SELECT position, kind, name, type, sorried, render, NULL as mod_doc \ From 7136299f83c816c23c840014c2a620997f1aba4c Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 15:41:38 +0100 Subject: [PATCH 082/106] fix: don't attempt to auto-link private names --- DocGen4/Output/DocString.lean | 12 +- DocGen4/Output/Module.lean | 2 +- DocGen4/Output/ToJson.lean | 2 +- DocGen4/Process/Analyze.lean | 8 +- scripts/check_diff_soup.py | 587 ++++++++++++++++++++++++---------- 5 files changed, 434 insertions(+), 177 deletions(-) diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index f757d2df..48203af3 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -38,7 +38,7 @@ def nameToLink? (s : String) : HtmlM (Option String) := do return (← getRoot) ++ s.dropEnd 5 ++ ".html" else if let some name := Lean.Syntax.decodeNameLit ("`" ++ s) then -- with exactly the same name - if res.name2ModIdx.contains name then + if res.name2ModIdx.contains name && !isPrivateName name then declNameToLink name -- module name else if res.moduleNames.contains name then @@ -47,10 +47,14 @@ def nameToLink? (s : String) : HtmlM (Option String) := do else match (← getCurrentName) with | some currentName => - match res.moduleInfo[currentName]! |>.members |> filterDocInfo |>.find? (sameEnd ·.getName name) with - | some info => + let info? := + res.moduleInfo[currentName]! |>.members |>.iter + |> filterDocInfo + |>.filter (!isPrivateName ·.getName) + |>.find? (sameEnd ·.getName name) + if let some info := info? then declNameToLink info.getName - | _ => return none + else return none | _ => return none else return none diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 42622fe4..744ff486 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -179,7 +179,7 @@ The main entry point to rendering the HTML for an entire module. def moduleToHtml (module : Process.Module) : HtmlM Html := withTheReader SiteBaseContext (setCurrentName module.name) do let relevantMembers := module.members.filter Process.ModuleMember.shouldRender let memberDocs ← relevantMembers.mapM (moduleMemberToHtml module.name) - let memberNames := filterDocInfo relevantMembers |>.map DocInfo.getName + let memberNames := filterDocInfo relevantMembers.iter |>.map DocInfo.getName |>.toArray templateLiftExtends (baseHtmlGenerator module.name.toString) <| pure #[ ← internalNav memberNames module.name, Html.element "main" false #[] memberDocs diff --git a/DocGen4/Output/ToJson.lean b/DocGen4/Output/ToJson.lean index 73873738..520785f3 100644 --- a/DocGen4/Output/ToJson.lean +++ b/DocGen4/Output/ToJson.lean @@ -132,7 +132,7 @@ def moduleToJsonModule (module : Process.Module) : HtmlM JsonModule := do let mut jsonDecls := [] let mut instances := #[] let sourceLinker := (← read).sourceLinker module.name - let declInfo := Process.filterDocInfo module.members + let declInfo := Process.filterDocInfo module.members.iter for decl in declInfo do jsonDecls := (← DocInfo.toJson sourceLinker decl) :: jsonDecls if let .instanceInfo i := decl then diff --git a/DocGen4/Process/Analyze.lean b/DocGen4/Process/Analyze.lean index 94468065..19c6e4f6 100644 --- a/DocGen4/Process/Analyze.lean +++ b/DocGen4/Process/Analyze.lean @@ -218,10 +218,10 @@ def process (task : AnalyzeTask) : MetaM AnalyzerResult := do moduleInfo := res, } -def filterDocInfo (ms : Array ModuleMember) : Array DocInfo := - ms.filterMap filter - where - filter : ModuleMember → Option DocInfo +open Std (Iterator Iter) + +def filterDocInfo [Iterator α Id ModuleMember] (ms : @Iter α ModuleMember) := + ms.filterMap fun | ModuleMember.docInfo i => some i | _ => none diff --git a/scripts/check_diff_soup.py b/scripts/check_diff_soup.py index b35dcf6b..9b159afe 100755 --- a/scripts/check_diff_soup.py +++ b/scripts/check_diff_soup.py @@ -16,7 +16,9 @@ import argparse import difflib +import json import re +import sqlite3 import sys import time from concurrent.futures import ThreadPoolExecutor, as_completed @@ -263,6 +265,17 @@ def allow_unwrap_broken_link(ctx: DiffContext) -> str | None: return None +def _lean_file_hrefs_equivalent(href1: str, href2: str) -> bool: + """Check if two file:/// .lean hrefs are equivalent up to temp dir prefix.""" + if not href1.startswith("file:///") or not href2.startswith("file:///"): + return False + if not href1.endswith(".lean") or not href2.endswith(".lean"): + return False + if "/.lake/" not in href1 or "/.lake/" not in href2: + return False + return href1[href1.find("/.lake/"):] == href2[href2.find("/.lake/"):] + + def _check_href_valid(href: str, file_path: str, targets: set[str]) -> bool: """Check if an href target exists in the given target set.""" if not href: @@ -347,6 +360,157 @@ def allow_span_fn_to_link(ctx: DiffContext) -> str | None: return None +def allow_lean_file_href_change(ctx: DiffContext) -> str | None: + """Allow href changes on file:/// URLs to .lean files if they match from .lake onwards.""" + if ctx.diff_type != "attribute" or ctx.attribute_name != "href": + return None + old_val = str(ctx.old_value or "") + new_val = str(ctx.new_value or "") + if not old_val.startswith("file:///") or not new_val.startswith("file:///"): + return None + if not old_val.endswith(".lean") or not new_val.endswith(".lean"): + return None + # Compare from .lake onwards + old_suffix = old_val[old_val.find("/.lake/"):] if "/.lake/" in old_val else old_val + new_suffix = new_val[new_val.find("/.lake/"):] if "/.lake/" in new_val else new_val + if old_suffix == new_suffix: + return "file:/// .lean href differs only in temp directory prefix" + return None + + +# DB-backed declaration source position data, loaded via --db flag. +# Maps (module_name, decl_name) → (start_line, start_column). +_db_decl_positions: dict[tuple[str, str], tuple[int, int]] = {} + + +def load_db_decl_positions(db_path: Path) -> dict[tuple[str, str], tuple[int, int]]: + """Load declaration source positions from the doc-gen4 SQLite database.""" + conn = sqlite3.connect(str(db_path)) + cursor = conn.execute( + "SELECT n.module_name, n.name, r.start_line, r.start_column " + "FROM name_info n JOIN declaration_ranges r " + "USING (module_name, position)" + ) + result = {} + for module_name, name, start_line, start_column in cursor: + result[(module_name, name)] = (start_line, start_column) + conn.close() + return result + + +def _file_path_to_module(file_path: str) -> str: + """Convert HTML file path to Lean module name (e.g. 'Init/Core.html' → 'Init.Core').""" + return file_path.removesuffix(".html").replace("/", ".") + + +def _find_enclosing_decl(elem: Tag | None, ancestors: list[Tag]) -> Tag | None: + """Find the enclosing
                    from an element or its ancestors.""" + if elem and isinstance(elem, Tag) and elem.name == "div" and "decl" in elem.get("class", []): + return elem + for a in ancestors: + if isinstance(a, Tag) and a.name == "div" and "decl" in a.get("class", []): + return a + return None + + +def _extract_decl_name_from_context(elem: Tag | None, ancestors: list[Tag]) -> str | None: + """Extract declaration name from either a
                    or an context.""" + # Check for enclosing
                    + decl = _find_enclosing_decl(elem, ancestors) + if decl is not None: + return decl.get("id") + + # Check if the element itself is (or contains) a link with a fragment-only href + if elem and isinstance(elem, Tag): + a = elem if elem.name == "a" else elem.find("a") + if a: + href = a.get("href", "") + if isinstance(href, str) and href.startswith("#"): + return href[1:] + + # Check ancestors for with fragment href (handles children of nav links) + for a in ancestors: + if isinstance(a, Tag) and a.name == "a": + href = a.get("href", "") + if isinstance(href, str) and href.startswith("#"): + return href[1:] + + return None + + +def allow_reorder_same_source_position(ctx: DiffContext) -> str | None: + """Accept diffs caused by reordering declarations that share the same source position. + + Uses the doc-gen4 database (loaded via --db) to look up the actual source line/column + for each declaration. If two swapped declarations originate from the same source position, + their ordering is non-deterministic and the diff is acceptable. + + Handles both declaration bodies (
                    ) and navigation links (). + """ + if not _db_decl_positions: + return None + + old_name = _extract_decl_name_from_context(ctx.old_elem, ctx.old_ancestors) + new_name = _extract_decl_name_from_context(ctx.new_elem, ctx.new_ancestors) + + if not old_name or not new_name: + return None + + # Same declaration, not a reordering + if old_name == new_name: + return None + + module = _file_path_to_module(ctx.file_path) + old_pos = _db_decl_positions.get((module, str(old_name))) + new_pos = _db_decl_positions.get((module, str(new_name))) + + if old_pos and new_pos and old_pos == new_pos: + return f"reordering of declarations at same source position (line {old_pos[0]})" + + return None + + +def allow_empty_equations_removal(ctx: DiffContext) -> str | None: + """Accept diffs caused by removal of empty equations sections (no equation items). + + The empty
                    Equations
                      + gets removed in the new version. Because the diff tool matches elements positionally, + this shows up as attribute/text diffs (old Equations paired with next sibling) rather + than a clean element_removed. + """ + + def is_empty_equations_details(elem: Tag) -> bool: + """Check if elem is
                      with Equations summary and no
                    • items.""" + if not isinstance(elem, Tag) or elem.name != "details": + return False + summary = elem.find("summary") + if not summary or summary.get_text(strip=True) != "Equations": + return False + eq_list = elem.find("ul", class_="equations") + if eq_list is None: + return True # No list at all + return len(eq_list.find_all("li", recursive=False)) == 0 + + # Check old element and its ancestors for an empty equations
                      + if ctx.old_elem and is_empty_equations_details(ctx.old_elem): + return "removal of empty equations section" + for ancestor in ctx.old_ancestors: + if is_empty_equations_details(ancestor): + return "removal of empty equations section" + + # Handle cascading positional shift: when an empty equations
                      is removed, + # the next sibling shifts position and appears as "element_removed" at the end. + # Check if any sibling in the old parent was an empty equations
                      . + if ctx.diff_type == "element_removed" and ctx.old_ancestors: + old_parent = ctx.old_ancestors[0] + if isinstance(old_parent, Tag): + for sibling in old_parent.children: + if isinstance(sibling, Tag) and is_empty_equations_details(sibling): + return "cascading from removal of empty equations section" + + return None + + def allow_duplicate_li_removal_in_imports(ctx: DiffContext) -> str | None: """Allow changes inside
                      that remove duplicate
                    • elements.""" if not ctx.has_ancestor("div.imports"): @@ -389,12 +553,15 @@ def get_li_contents(parent: Tag) -> list[str]: # Note: elements are handled specially by compare_code_elements() in compare_trees() # These rules handle differences outside of elements RULES: list[Rule] = [ + allow_lean_file_href_change, allow_href_change_if_old_broken, allow_a_to_span_if_broken, allow_span_fn_to_link, allow_unwrap_broken_link, allow_added_link_with_valid_target, + allow_empty_equations_removal, allow_duplicate_li_removal_in_imports, + allow_reorder_same_source_position, ] @@ -458,8 +625,12 @@ def compare_directories( # Regex patterns for extracting link targets (faster than full HTML parsing) # Require attributes to be inside complete HTML tags: # Use non-greedy [^>]*? to match the first id/name attribute in the tag -ID_PATTERN = re.compile(r'<\w[^>]*?\bid=["\']([^"\']+)["\'][^>]*>', re.IGNORECASE) -NAME_PATTERN = re.compile(r'<\w[^>]*?\bname=["\']([^"\']+)["\'][^>]*>', re.IGNORECASE) +# Two patterns per attribute: one for double-quoted, one for single-quoted values, +# so that a ' inside id="foo'" is captured correctly. +ID_PATTERN_DQ = re.compile(r'<\w[^>]*?\bid="([^"]+)"[^>]*>', re.IGNORECASE) +ID_PATTERN_SQ = re.compile(r"<\w[^>]*?\bid='([^']+)'[^>]*>", re.IGNORECASE) +NAME_PATTERN_DQ = re.compile(r'<\w[^>]*?\bname="([^"]+)"[^>]*>', re.IGNORECASE) +NAME_PATTERN_SQ = re.compile(r"<\w[^>]*?\bname='([^']+)'[^>]*>", re.IGNORECASE) def extract_targets_from_file(directory: Path, rel_path: str) -> set[str]: @@ -471,11 +642,15 @@ def extract_targets_from_file(directory: Path, rel_path: str) -> set[str]: content = file_path.read_text(encoding="utf-8", errors="replace") # Extract id attributes - for match in ID_PATTERN.finditer(content): + for match in ID_PATTERN_DQ.finditer(content): + targets.add(f"{rel_path}#{match.group(1)}") + for match in ID_PATTERN_SQ.finditer(content): targets.add(f"{rel_path}#{match.group(1)}") # Extract name attributes - for match in NAME_PATTERN.finditer(content): + for match in NAME_PATTERN_DQ.finditer(content): + targets.add(f"{rel_path}#{match.group(1)}") + for match in NAME_PATTERN_SQ.finditer(content): targets.add(f"{rel_path}#{match.group(1)}") except Exception as e: @@ -552,6 +727,9 @@ def wrappers_compatible(old_w: Tag | None, new_w: Tag | None) -> bool: new_href = str(new_w.get('href', '')) if old_href == new_href: return True + # Allow file:/// .lean hrefs that match from .lake onwards + if _lean_file_hrefs_equivalent(old_href, new_href): + return True # Allow href change if new link is valid (old can be valid or broken) return _check_href_valid(new_href, file_path, new_targets) elif old_w.name == 'span': @@ -636,25 +814,24 @@ def advance(stack: deque) -> tuple[str | None, Tag | None, bool]: # Main loop: consume text from both sides, verifying wrappers match while True: - # Get more text if needed + # Strip leading whitespace, then get more text if needed. + # We loop because fetched text may be whitespace-only (e.g., " " between tags). + old_text = old_text.lstrip() while not old_text and old_stack: text, tag, done = advance(old_stack) if text: - old_text = text + old_text = text.lstrip() elif done: break + new_text = new_text.lstrip() while not new_text and new_stack: text, tag, done = advance(new_stack) if text: - new_text = text + new_text = text.lstrip() elif done: break - # Strip leading whitespace from both - old_text = old_text.lstrip() - new_text = new_text.lstrip() - # Check for completion if not old_text and not new_text: if not old_stack and not new_stack: @@ -949,6 +1126,144 @@ def should_use_structural_comparison(node: Tag) -> bool: return differences +def compare_tactics_page( + old_soup: BeautifulSoup, + new_soup: BeautifulSoup, + file_path: str, + old_targets: set[str], + new_targets: set[str], +) -> list[Difference] | None: + """Semantic comparison for tactics.html. + + Instead of positional comparison (which produces spurious diffs when + same-userName tactics are reordered), this: + 1. Matches tactic entries by internalName (the div id) + 2. Compares matched pairs with normal tree comparison + rules + 3. Verifies both pages are sorted by userName + 4. Ignores ordering within the same userName group + + Returns a list of Differences, or None to fall through to normal comparison. + """ + old_main = (old_soup.body or old_soup).find("main") + new_main = (new_soup.body or new_soup).find("main") + if old_main is None or new_main is None: + return None + + def extract_tactic_divs(main_elem: Tag) -> dict[str, Tag]: + """Extract {internalName: div} from tactic divs.""" + result: dict[str, Tag] = {} + for div in main_elem.find_all("div", id=True, recursive=False): + if div.find("h2") is not None: + result[div.get("id", "")] = div + return result + + def get_user_names_in_order(main_elem: Tag) -> list[str]: + """Get userName sequence from tactic divs.""" + return [h2.get_text(strip=True) + for div in main_elem.find_all("div", id=True, recursive=False) + if (h2 := div.find("h2")) is not None] + + old_divs = extract_tactic_divs(old_main) + new_divs = extract_tactic_divs(new_main) + + # Must have the same set of tactic entries + if set(old_divs.keys()) != set(new_divs.keys()): + return None + + # Both must be sorted by userName (ignoring order within same userName) + old_names = get_user_names_in_order(old_main) + new_names = get_user_names_in_order(new_main) + if sorted(old_names) != old_names or sorted(new_names) != new_names: + return None + + # Compare matched tactic entries by internalName using normal tree comparison + differences: list[Difference] = [] + for internal_name, old_div in old_divs.items(): + new_div = new_divs[internal_name] + diffs = compare_trees( + old_div, new_div, file_path, + [old_main], [new_main], + old_targets, new_targets, + ) + differences.extend(diffs) + + # Compare non-tactic children of
                      (the intro paragraph etc.) positionally + old_non_tactic = [c for c in get_significant_children(old_main) + if not (isinstance(c, Tag) and c.name == "div" and c.get("id") and c.find("h2"))] + new_non_tactic = [c for c in get_significant_children(new_main) + if not (isinstance(c, Tag) and c.name == "div" and c.get("id") and c.find("h2"))] + for i in range(max(len(old_non_tactic), len(new_non_tactic))): + old_child = old_non_tactic[i] if i < len(old_non_tactic) else None + new_child = new_non_tactic[i] if i < len(new_non_tactic) else None + differences.extend(compare_trees( + old_child, new_child, file_path, + [old_main], [new_main], + old_targets, new_targets, + )) + + # Compare nav section: match nav links by href target rather than position + old_nav = (old_soup.body or old_soup).find("nav", class_="internal_nav") + new_nav = (new_soup.body or new_soup).find("nav", class_="internal_nav") + if old_nav and new_nav: + def extract_nav_entries(nav: Tag) -> dict[str, Tag]: + """Extract {href:

                      element} from nav links.""" + result: dict[str, Tag] = {} + for p in nav.find_all("p", recursive=False): + a = p.find("a") + if a and a.get("href", "").startswith("#"): + result[a["href"]] = p + return result + + old_nav_entries = extract_nav_entries(old_nav) + new_nav_entries = extract_nav_entries(new_nav) + + # Compare matched nav entries + for href in old_nav_entries: + if href in new_nav_entries: + diffs = compare_trees( + old_nav_entries[href], new_nav_entries[href], file_path, + [old_nav], [new_nav], + old_targets, new_targets, + ) + differences.extend(diffs) + + # Report any entries only in one side + for href in set(old_nav_entries) - set(new_nav_entries): + differences.append(Difference( + file_path=file_path, diff_type="element_removed", + old_elem=old_nav_entries[href], new_elem=None, + old_ancestors=[old_nav], new_ancestors=[new_nav], + )) + for href in set(new_nav_entries) - set(old_nav_entries): + differences.append(Difference( + file_path=file_path, diff_type="element_added", + old_elem=None, new_elem=new_nav_entries[href], + old_ancestors=[old_nav], new_ancestors=[new_nav], + )) + + # Compare the "return to top" link and any other non-

                      children + old_other = [c for c in get_significant_children(old_nav) if not (isinstance(c, Tag) and c.name == "p" and c.find("a"))] + new_other = [c for c in get_significant_children(new_nav) if not (isinstance(c, Tag) and c.name == "p" and c.find("a"))] + for i in range(max(len(old_other), len(new_other))): + oc = old_other[i] if i < len(old_other) else None + nc = new_other[i] if i < len(new_other) else None + differences.extend(compare_trees(oc, nc, file_path, [old_nav], [new_nav], old_targets, new_targets)) + + # Compare everything outside main and nav (rest of body) + old_body = old_soup.body or old_soup + new_body = new_soup.body or new_soup + old_rest = [c for c in get_significant_children(old_body) + if not (isinstance(c, Tag) and (c.name == "main" or (c.name == "nav" and "internal_nav" in c.get("class", []))))] + new_rest = [c for c in get_significant_children(new_body) + if not (isinstance(c, Tag) and (c.name == "main" or (c.name == "nav" and "internal_nav" in c.get("class", []))))] + for i in range(max(len(old_rest), len(new_rest))): + oc = old_rest[i] if i < len(old_rest) else None + nc = new_rest[i] if i < len(new_rest) else None + differences.extend(compare_trees(oc, nc, file_path, [], [], old_targets, new_targets)) + + return differences + + def compare_html_files( file_path: str, dir1: Path, @@ -967,6 +1282,14 @@ def compare_html_files( old_soup = BeautifulSoup(old_content, PARSER) new_soup = BeautifulSoup(new_content, PARSER) + # Semantic comparison for tactics.html + if file_path == "tactics.html": + tactics_result = compare_tactics_page(old_soup, new_soup, file_path, old_targets, new_targets) + if tactics_result is not None: + result.differences = tactics_result + result.elapsed_ms = (time.perf_counter() - start_time) * 1000 + return result + # Compare from body if exists, otherwise from root old_body = old_soup.body or old_soup new_body = new_soup.body or new_soup @@ -1254,155 +1577,6 @@ def print_summary( log(f" Files only in dir2: {files_only_in_dir2}") -# ============================================================================= -# Main -# ============================================================================= - - -def main() -> int: - parser = argparse.ArgumentParser( - description="Compare two HTML documentation directories", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("dir1", type=Path, help="First documentation directory (old)") - parser.add_argument("dir2", type=Path, help="Second documentation directory (new)") - parser.add_argument( - "-v", "--verbose", action="store_true", help="Show accepted differences too" - ) - parser.add_argument( - "-j", - "--jobs", - type=int, - default=8, - help="Number of parallel jobs (default: 8)", - ) - - args = parser.parse_args() - - if not args.dir1.is_dir(): - print(f"Error: {args.dir1} is not a directory", file=sys.stderr) - return 1 - - if not args.dir2.is_dir(): - print(f"Error: {args.dir2} is not a directory", file=sys.stderr) - return 1 - - total_start = time.perf_counter() - log(f"Comparing {args.dir1} vs {args.dir2}") - - # Step 1: Compare directory contents - step_start = time.perf_counter() - html_only_dir1, html_only_dir2, html_both, other_only_dir1, other_only_dir2 = compare_directories( - args.dir1, args.dir2 - ) - step_elapsed = (time.perf_counter() - step_start) * 1000 - - log(f"Scanning directories... ({step_elapsed:.1f}ms)") - log(f" HTML files in both: {len(html_both)}") - log(f" HTML files only in dir1: {len(html_only_dir1)}") - log(f" HTML files only in dir2: {len(html_only_dir2)}") - - # Step 2: Extract link targets - step_start = time.perf_counter() - all_html_dir1 = html_both | html_only_dir1 - all_html_dir2 = html_both | html_only_dir2 - old_targets = extract_link_targets(args.dir1, all_html_dir1, args.jobs) - new_targets = extract_link_targets(args.dir2, all_html_dir2, args.jobs) - step_elapsed = (time.perf_counter() - step_start) * 1000 - - log(f"Extracting link targets... ({step_elapsed:.1f}ms)") - log(f" Targets in dir1: {len(old_targets)}") - log(f" Targets in dir2: {len(new_targets)}") - - # Report HTML files only in one directory (skip non-HTML files) - has_errors = False - files_only_dir1 = sorted(html_only_dir1) - files_only_dir2 = sorted(html_only_dir2) - - if files_only_dir1: - log("\n" + "=" * 60) - log("FILES ONLY IN DIR1:") - log("=" * 60) - for f in files_only_dir1: - log(f" {f}") - has_errors = True - - if files_only_dir2: - log("\n" + "=" * 60) - log("FILES ONLY IN DIR2:") - log("=" * 60) - for f in files_only_dir2: - log(f" {f}") - has_errors = True - - # Step 3: Compare HTML files in parallel, printing results in deterministic order - log(f"\nComparing {len(html_both)} HTML files...") - - total_rejected = 0 - total_accepted = 0 - files_with_diffs = 0 - files_compared = 0 - - # Sort file paths for deterministic output order - sorted_files = sorted(html_both) - results_by_path: dict[str, FileComparisonResult] = {} - next_to_print = 0 # Index into sorted_files of next file to print - - try: - with ThreadPoolExecutor(max_workers=args.jobs) as executor: - futures = { - executor.submit( - compare_html_files, file_path, args.dir1, args.dir2, old_targets, new_targets - ): file_path - for file_path in sorted_files - } - - for future in as_completed(futures): - file_path = futures[future] - result = future.result() - # Evaluate rules - result.differences = evaluate_rules( - result.differences, RULES, old_targets, new_targets - ) - results_by_path[file_path] = result - - # Print all consecutive results that are ready, in sorted order - while next_to_print < len(sorted_files): - next_path = sorted_files[next_to_print] - if next_path not in results_by_path: - break # Next file in order isn't ready yet - - res = results_by_path.pop(next_path) - rejected, accepted, has_error = print_file_report(res, args.verbose) - total_rejected += rejected - total_accepted += accepted - if rejected or has_error: - has_errors = True - if rejected or accepted: - files_with_diffs += 1 - files_compared += 1 - next_to_print += 1 - - except KeyboardInterrupt: - log("\n\nInterrupted! Cancelling pending tasks...") - executor.shutdown(wait=False, cancel_futures=True) - log(f"Processed {files_compared} of {len(sorted_files)} files before interrupt.") - return 130 # Standard exit code for SIGINT - - # Step 4: Print summary - total_elapsed = (time.perf_counter() - total_start) * 1000 - print_summary( - total_files=len(html_both), - files_with_diffs=files_with_diffs, - total_rejected=total_rejected, - total_accepted=total_accepted, - files_only_in_dir1=len(files_only_dir1), - files_only_in_dir2=len(files_only_dir2), - total_elapsed_ms=total_elapsed, - ) - - return 1 if has_errors else 0 - # ============================================================================= # Tests for compare_code_elements @@ -1651,6 +1825,15 @@ def test( old_targets={"Valid.html#foo"}, # Link was valid ) + # Test 23: Multiple words becoming links (e.g., "forget₂ CommMonCat MonCat") + test( + "multiple words wrapped in links", + "forget₂ CommMonCat MonCat", + 'forget₂ CommMonCat MonCat', + expected_ok=True, + new_targets={"CommMonCat.html#CommMonCat", "MonCat.html#MonCat"}, + ) + log(f"\n{passed} passed, {failed} failed") return 0 if failed == 0 else 1 @@ -1684,6 +1867,24 @@ def main_cli() -> int: default=8, help="Number of parallel jobs (default: 8)", ) + compare_parser.add_argument( + "--restrict", + type=str, + default=None, + help="Only compare HTML files under this subdirectory (link targets still use full dirs)", + ) + compare_parser.add_argument( + "--cache-link-targets", + type=Path, + default=None, + help="Cache file for link targets (loads if exists, creates otherwise)", + ) + compare_parser.add_argument( + "--db", + type=Path, + default=None, + help="doc-gen4 SQLite database for source position lookups (enables reorder acceptance)", + ) # Test command subparsers.add_parser("test", help="Run unit tests for compare_code_elements") @@ -1718,6 +1919,24 @@ def main(args: argparse.Namespace | None = None) -> int: default=8, help="Number of parallel jobs (default: 8)", ) + parser.add_argument( + "--restrict", + type=str, + default=None, + help="Only compare HTML files under this subdirectory (link targets still use full dirs)", + ) + parser.add_argument( + "--cache-link-targets", + type=Path, + default=None, + help="Cache file for link targets (loads if exists, creates otherwise)", + ) + parser.add_argument( + "--db", + type=Path, + default=None, + help="doc-gen4 SQLite database for source position lookups (enables reorder acceptance)", + ) args = parser.parse_args() if not args.dir1.is_dir(): @@ -1728,6 +1947,16 @@ def main(args: argparse.Namespace | None = None) -> int: print(f"Error: {args.dir2} is not a directory", file=sys.stderr) return 1 + # Load DB declaration positions if provided + global _db_decl_positions + db_path = getattr(args, "db", None) + if db_path: + if not db_path.exists(): + print(f"Error: database {db_path} not found", file=sys.stderr) + return 1 + _db_decl_positions = load_db_decl_positions(db_path) + log(f"Loaded {len(_db_decl_positions)} declaration positions from {db_path}") + total_start = time.perf_counter() log(f"Comparing {args.dir1} vs {args.dir2}") @@ -1743,18 +1972,42 @@ def main(args: argparse.Namespace | None = None) -> int: log(f" HTML files only in dir1: {len(html_only_dir1)}") log(f" HTML files only in dir2: {len(html_only_dir2)}") - # Step 2: Extract link targets + # Step 2: Extract link targets (or load from cache) step_start = time.perf_counter() - all_html_dir1 = html_both | html_only_dir1 - all_html_dir2 = html_both | html_only_dir2 - old_targets = extract_link_targets(args.dir1, all_html_dir1, args.jobs) - new_targets = extract_link_targets(args.dir2, all_html_dir2, args.jobs) - step_elapsed = (time.perf_counter() - step_start) * 1000 + cache_file = getattr(args, "cache_link_targets", None) + if cache_file and cache_file.exists(): + import pickle + with open(cache_file, "rb") as f: + cached = pickle.load(f) + old_targets = cached["old"] + new_targets = cached["new"] + step_elapsed = (time.perf_counter() - step_start) * 1000 + log(f"Loaded link targets from cache '{cache_file}' ({step_elapsed:.1f}ms)") + else: + all_html_dir1 = html_both | html_only_dir1 + all_html_dir2 = html_both | html_only_dir2 + old_targets = extract_link_targets(args.dir1, all_html_dir1, args.jobs) + new_targets = extract_link_targets(args.dir2, all_html_dir2, args.jobs) + step_elapsed = (time.perf_counter() - step_start) * 1000 + log(f"Extracting link targets... ({step_elapsed:.1f}ms)") + if cache_file: + import pickle + with open(cache_file, "wb") as f: + pickle.dump({"old": old_targets, "new": new_targets}, f) + log(f"Saved link targets to cache '{cache_file}'") - log(f"Extracting link targets... ({step_elapsed:.1f}ms)") log(f" Targets in dir1: {len(old_targets)}") log(f" Targets in dir2: {len(new_targets)}") + # Restrict comparison to subdirectory if requested + restrict = getattr(args, "restrict", None) + if restrict: + prefix = restrict.rstrip("/") + "/" + html_both = {f for f in html_both if f.startswith(prefix) or f == restrict} + html_only_dir1 = {f for f in html_only_dir1 if f.startswith(prefix) or f == restrict} + html_only_dir2 = {f for f in html_only_dir2 if f.startswith(prefix) or f == restrict} + log(f"Restricted to '{restrict}': {len(html_both)} files to compare") + # Report HTML files only in one directory (skip non-HTML files) has_errors = False files_only_dir1 = sorted(html_only_dir1) From 52bd09dac89c9fa704334504c9b6b7c032a187be Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 19:24:35 +0100 Subject: [PATCH 083/106] fix: link targets for parent projections --- DocGen4/Output/Module.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 744ff486..02ebc6cd 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -31,7 +31,9 @@ def structureInfoHeader (s : Process.StructureInfo) : HtmlM (Array Html) := do for parent in s.parents, i in [0:s.parents.size] do if i > 0 then parents := parents.push (Html.text ", ") - parents := parents ++ (← renderedCodeToHtml parent.type) + let parentHtml ← renderedCodeToHtml parent.type + parents := parents.push + [parentHtml] nodes := nodes ++ parents return nodes From 3ac4817e742ff27e29894f58ff8c2cf652e891d9 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 16 Feb 2026 19:48:54 +0100 Subject: [PATCH 084/106] better linking --- DocGen4/Output/DocString.lean | 46 ++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index 48203af3..34c70d07 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -26,6 +26,40 @@ namespace Output -/ def splitAround (s : String) (p : Char → Bool) : List String := splitAroundAux s p 0 0 [] +private def isAutoGeneratedSuffix (s : String) : Bool := + s == "rec" || s == "recOn" || s == "casesOn" || s == "noConfusion" || + s == "noConfusionType" || s == "below" || s == "brecOn" || + s == "ibelow" || s == "binductionOn" + +/-- +Check if a name has an HTML anchor in its module's documentation. +Names get anchors if they are rendered members, constructors, direct fields, +or parent projections. +-/ +private def hasHtmlAnchor (res : AnalyzerResult) (name : Name) : Bool := + if let some modIdx := res.name2ModIdx[name]? then + let modName := res.moduleNames[modIdx.toNat]! + if let some mod := res.moduleInfo[modName]? then + mod.members.any fun + | .docInfo d => + -- Rendered declarations get

                      + if d.getName == name && d.shouldRender then true + -- Constructors get