From edbaf61b3986e6d50089bafef35520f580eab5bf Mon Sep 17 00:00:00 2001
From: STX-Boot <jsmnjace@gmail.com>
Date: Fri, 26 Jun 2026 15:03:36 +0100
Subject: [PATCH] fix: schema stability, path normalization, CLI help tests,
 vulndb refactor (#509 #515 #516 #520)

- #509 (sanctifier-core/sanctifier-cli): Restructure `analyze --format json`
  output to conform to schemas/analysis-output.json (schema_version 1.1.0).
  Add `findings` object with code-tagged Finding* types; move duration_ms and
  profile out of metadata (additionalProperties:false); remove non-schema keys
  from summary.  Un-ignore `test_json_output_validates_against_schema`.
  Add threat-model and schema stability notes to sanctifier-core lib.rs.

- #515 (sanctifier-cli): Refactor vulndb.rs into vulndb/mod.rs (database I/O +
  validation) + vulndb/matcher.rs (pure scan engine + VulnMatch type), making
  the module boundary between loading and matching explicit.

- #516 (sanctifier-cli): Add 14 new CLI help/UX tests covering --version,
  unknown-subcommand error, per-subcommand --help output, shell completions,
  and top-level help listing all core commands.

- #520 (sanctifier-cli): Extract normalize_cli_path() helper with doc comment
  describing cross-platform backslash conversion; add 3 unit tests.
---
 .../sanctifier-cli/src/commands/analyze.rs    | 227 ++++++++++++++++--
 tooling/sanctifier-cli/src/vulndb/matcher.rs  |  79 ++++++
 .../src/{vulndb.rs => vulndb/mod.rs}          | 113 +++++----
 tooling/sanctifier-cli/tests/cli_tests.rs     | 161 ++++++++++++-
 tooling/sanctifier-core/src/lib.rs            |  28 ++-
 5 files changed, 532 insertions(+), 76 deletions(-)
 create mode 100644 tooling/sanctifier-cli/src/vulndb/matcher.rs
 rename tooling/sanctifier-cli/src/{vulndb.rs => vulndb/mod.rs} (86%)
diff --git a/tooling/sanctifier-cli/src/commands/analyze.rs b/tooling/sanctifier-cli/src/commands/analyze.rs
index 90a961a4..728c29af 100644
--- a/tooling/sanctifier-cli/src/commands/analyze.rs
+++ b/tooling/sanctifier-cli/src/commands/analyze.rs
@@ -158,18 +158,11 @@ pub fn exec(args: AnalyzeArgs) -> anyhow::Result<()> {
 pub fn run_analysis(args: AnalyzeArgs) -> anyhow::Result<bool> {
     let path_raw = args.path.clone();
 
-    #[cfg(not(windows))]
-    let path = {
-        let s = path_raw.to_string_lossy();
-        if s.contains('\\') {
-            PathBuf::from(s.replace('\\', "/"))
-        } else {
-            path_raw
-        }
-    };
-
-    #[cfg(windows)]
-    let path = path_raw;
+    // On non-Windows platforms, accept Windows-style backslash paths (e.g. from
+    // cross-platform CI scripts or copy-pasted Windows paths) and convert them
+    // to POSIX forward-slash paths so the rest of the pipeline can use them
+    // uniformly.  On Windows the OS already handles both separators natively.
+    let path = normalize_cli_path(path_raw);
 
     let is_json = args.format == "json";
     let timeout_secs = args.timeout;
@@ -466,8 +459,40 @@ pub fn run_analysis(args: AnalyzeArgs) -> anyhow::Result<bool> {
     }
 
     if is_json {
+        let cached_files = cached_counter.load(Ordering::Relaxed);
+
+        // Flatten upgrade report sub-findings into individual S010 entries.
+        let upgrade_risks: Vec<serde_json::Value> = upgrade_reports
+            .iter()
+            .flat_map(|r| r.findings.iter())
+            .map(|f| {
+                serde_json::json!({
+                    "code": finding_codes::UPGRADE_RISK,
+                    "category": f.category,
+                    "function_name": f.function_name,
+                    "location": f.location,
+                    "message": f.message,
+                    "suggestion": f.suggestion,
+                })
+            })
+            .collect();
+
+        // Convert timed-out file paths to structured S000 finding objects.
+        let timeout_findings: Vec<serde_json::Value> = timed_out_files
+            .iter()
+            .map(|f| {
+                serde_json::json!({
+                    "code": finding_codes::ANALYSIS_TIMEOUT,
+                    "file": f,
+                    "message": format!("Analysis timed out after {}s", timeout_secs),
+                })
+            })
+            .collect();
+
         let report = serde_json::json!({
-            "schema_version": "1.0.0",
+            // Schema version — increment when the output shape changes.
+            "schema_version": "1.1.0",
+            // ── Raw arrays (backward compat; top-level additionalProperties is open) ──
             "storage_collisions": collisions,
             "ledger_size_warnings": size_warnings,
             "unsafe_patterns": unsafe_patterns,
@@ -480,33 +505,35 @@ pub fn run_analysis(args: AnalyzeArgs) -> anyhow::Result<bool> {
             "unhandled_results": unhandled_results,
             "upgrade_reports": upgrade_reports,
             "smt_issues": smt_issues,
-            "sep41_checked_contracts": sep41_checked_contracts,
             "sep41_issues": sep41_issues,
             "contractimport_issues": contractimport_issues,
             "vulnerability_db_matches": vuln_matches,
             "vulnerability_db_version": vuln_db.version,
             "timed_out_files": timed_out_files,
+            "sep41_checked_contracts": sep41_checked_contracts,
+            // Extra fields not allowed inside metadata (schema: additionalProperties: false).
+            "duration_ms": duration_ms,
+            "profile": args.profile.map(|p| p.as_str()),
+            // ── Schema-required metadata (no extra keys — additionalProperties: false) ──
             "metadata": {
                 "version": env!("CARGO_PKG_VERSION"),
                 "timestamp": timestamp,
-                "duration_ms": duration_ms,
                 "project_path": path.display().to_string(),
                 "format": "sanctifier-ci-v1",
                 "timeout_secs": timeout_secs,
-                "cached_files": cached_counter.load(Ordering::Relaxed),
+                "cached_files": cached_files,
                 "total_files": total_files,
-                "profile": args.profile.map(|p| p.as_str()),
             },
             "error_codes": finding_codes::all_finding_codes(),
+            // ── Schema-required summary (no extra keys — additionalProperties: false) ──
             "summary": {
                 "total_findings": total_findings,
-                "cached_files": cached_counter.load(Ordering::Relaxed),
-                "reanalysed_files": total_files - cached_counter.load(Ordering::Relaxed),
+                "cached_files": cached_files,
+                "reanalysed_files": total_files - cached_files,
                 "storage_collisions": collisions.len(),
                 "auth_gaps": auth_gaps.len(),
                 "panic_issues": panic_issues.len(),
                 "arithmetic_issues": arithmetic_issues.len(),
-                "truncation_bounds_issues": truncation_bounds_issues.len(),
                 "size_warnings": size_warnings.len(),
                 "unsafe_patterns": unsafe_patterns.len(),
                 "custom_rule_matches": custom_matches.len(),
@@ -514,11 +541,91 @@ pub fn run_analysis(args: AnalyzeArgs) -> anyhow::Result<bool> {
                 "unhandled_results": unhandled_results.len(),
                 "smt_issues": smt_issues.len(),
                 "sep41_issues": sep41_issues.len(),
-                "contractimport_issues": contractimport_issues.len(),
                 "timed_out_files": timed_out_files.len(),
                 "has_critical": has_critical,
                 "has_high": has_high,
             },
+            // ── Structured finding lists, each tagged with a canonical code ──
+            "findings": {
+                "storage_collisions": collisions.iter().map(|c| serde_json::json!({
+                    "code": finding_codes::STORAGE_COLLISION,
+                    "key_value": c.key_value,
+                    "key_type": c.key_type,
+                    "location": c.location,
+                    "message": c.message,
+                })).collect::<Vec<_>>(),
+                "ledger_size_warnings": size_warnings.iter().map(|w| serde_json::json!({
+                    "code": finding_codes::LEDGER_SIZE_RISK,
+                    "struct_name": w.struct_name,
+                    "estimated_size": w.estimated_size,
+                    "limit": w.limit,
+                    "level": w.level,
+                })).collect::<Vec<_>>(),
+                "unsafe_patterns": unsafe_patterns.iter().map(|u| serde_json::json!({
+                    "code": finding_codes::UNSAFE_PATTERN,
+                    "pattern_type": u.pattern_type,
+                    "line": u.line,
+                    "snippet": u.snippet,
+                })).collect::<Vec<_>>(),
+                // Schema FindingAuthGap uses "function" (not "function_name").
+                "auth_gaps": auth_gaps.iter().map(|a| serde_json::json!({
+                    "code": finding_codes::AUTH_GAP,
+                    "function": a.function_name,
+                })).collect::<Vec<_>>(),
+                "panic_issues": panic_issues.iter().map(|p| serde_json::json!({
+                    "code": finding_codes::PANIC_USAGE,
+                    "function_name": p.function_name,
+                    "issue_type": p.issue_type,
+                    "location": p.location,
+                })).collect::<Vec<_>>(),
+                "arithmetic_issues": arithmetic_issues.iter().map(|a| serde_json::json!({
+                    "code": finding_codes::ARITHMETIC_OVERFLOW,
+                    "function_name": a.function_name,
+                    "operation": a.operation,
+                    "suggestion": a.suggestion,
+                    "location": a.location,
+                })).collect::<Vec<_>>(),
+                // Schema FindingCustomRule.severity is "info" | "warning" | "error".
+                "custom_rules": custom_matches.iter().map(|m| serde_json::json!({
+                    "code": finding_codes::CUSTOM_RULE_MATCH,
+                    "rule_name": m.rule_name,
+                    "line": m.line,
+                    "snippet": m.snippet,
+                    "severity": severity_schema_str(&m.severity),
+                })).collect::<Vec<_>>(),
+                // Schema FindingEventIssue does not include function_name.
+                "event_issues": event_issues.iter().map(|e| serde_json::json!({
+                    "code": finding_codes::EVENT_INCONSISTENCY,
+                    "event_name": e.event_name,
+                    "issue_type": e.issue_type,
+                    "location": e.location,
+                    "message": e.message,
+                })).collect::<Vec<_>>(),
+                "unhandled_results": unhandled_results.iter().map(|u| serde_json::json!({
+                    "code": finding_codes::UNHANDLED_RESULT,
+                    "function_name": u.function_name,
+                    "call_expression": u.call_expression,
+                    "location": u.location,
+                    "message": u.message,
+                })).collect::<Vec<_>>(),
+                "upgrade_risks": upgrade_risks,
+                "smt_issues": smt_issues.iter().map(|s| serde_json::json!({
+                    "code": finding_codes::SMT_INVARIANT_VIOLATION,
+                    "function_name": s.function_name,
+                    "description": s.description,
+                    "location": s.location,
+                })).collect::<Vec<_>>(),
+                "sep41_issues": sep41_issues.iter().map(|s| serde_json::json!({
+                    "code": finding_codes::SEP41_INTERFACE_DEVIATION,
+                    "function_name": s.function_name,
+                    "kind": s.kind,
+                    "location": s.location,
+                    "message": s.message,
+                    "expected_signature": s.expected_signature,
+                    "actual_signature": s.actual_signature,
+                })).collect::<Vec<_>>(),
+                "timeouts": timeout_findings,
+            },
         });
         println!("{}", serde_json::to_string_pretty(&report)?);
         return Ok(should_exit_with_1);
@@ -1111,3 +1218,81 @@ impl AnalysisCache {
         self.entries.insert(file_path, CacheEntry { hash, result });
     }
 }
+
+// ── Path normalization ────────────────────────────────────────────────────────
+
+/// Normalize a CLI path argument for the current OS.
+///
+/// On non-Windows platforms, backslash separators that users copy from Windows
+/// paths (e.g. `tests\fixtures\contract.rs`) are silently converted to POSIX
+/// forward-slash paths so that the rest of the pipeline can handle them
+/// uniformly.  No conversion is needed on Windows because the OS accepts both
+/// separator styles natively.
+///
+/// # Platform behaviour
+/// | Platform | Input | Output |
+/// |----------|-------|--------|
+/// | Linux/macOS | `foo\bar\baz.rs` | `foo/bar/baz.rs` |
+/// | Linux/macOS | `foo/bar/baz.rs` | `foo/bar/baz.rs` (unchanged) |
+/// | Windows | any | unchanged (OS handles both) |
+#[cfg(not(windows))]
+pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf {
+    let s = p.to_string_lossy();
+    if s.contains('\\') {
+        PathBuf::from(s.replace('\\', "/"))
+    } else {
+        p
+    }
+}
+
+#[cfg(windows)]
+pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf {
+    p
+}
+
+/// Map a `FindingSeverity` value to the three-level schema string used in the
+/// `findings.custom_rules[].severity` field ("info" | "warning" | "error").
+fn severity_schema_str(s: &sanctifier_core::finding_codes::FindingSeverity) -> &'static str {
+    use sanctifier_core::finding_codes::FindingSeverity;
+    match s {
+        FindingSeverity::Critical | FindingSeverity::High => "error",
+        FindingSeverity::Medium | FindingSeverity::Low => "warning",
+        FindingSeverity::Info => "info",
+    }
+}
+
+#[cfg(test)]
+mod path_normalization_tests {
+    use super::normalize_cli_path;
+    use std::path::PathBuf;
+
+    #[test]
+    #[cfg(not(windows))]
+    fn unix_converts_backslashes_to_forward_slashes() {
+        let result = normalize_cli_path(PathBuf::from("tests\\fixtures\\valid_contract.rs"));
+        assert_eq!(result, PathBuf::from("tests/fixtures/valid_contract.rs"));
+    }
+
+    #[test]
+    #[cfg(not(windows))]
+    fn unix_passthrough_when_no_backslashes() {
+        let p = PathBuf::from("tests/fixtures/valid_contract.rs");
+        let result = normalize_cli_path(p.clone());
+        assert_eq!(result, p);
+    }
+
+    #[test]
+    #[cfg(not(windows))]
+    fn unix_handles_mixed_separators() {
+        let result = normalize_cli_path(PathBuf::from("tests\\fixtures/contract.rs"));
+        assert_eq!(result, PathBuf::from("tests/fixtures/contract.rs"));
+    }
+
+    #[test]
+    #[cfg(windows)]
+    fn windows_path_is_returned_unchanged() {
+        let p = PathBuf::from("tests\\fixtures\\valid_contract.rs");
+        let result = normalize_cli_path(p.clone());
+        assert_eq!(result, p);
+    }
+}
diff --git a/tooling/sanctifier-cli/src/vulndb/matcher.rs b/tooling/sanctifier-cli/src/vulndb/matcher.rs
new file mode 100644
index 00000000..f96183aa
--- /dev/null
+++ b/tooling/sanctifier-cli/src/vulndb/matcher.rs
@@ -0,0 +1,79 @@
+//! Pattern-matching engine for the vulnerability database.
+//!
+//! This module owns the [`VulnMatch`] result type and the [`scan_source`]
+//! function that runs every [`super::VulnEntry`] regex pattern against a
+//! source file.  Keeping the matching logic separate from the database I/O in
+//! [`super`] makes the boundary between "loading data" and "using data" clear
+//! and allows the scanner to be unit-tested without touching the file system.
+
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+
+use super::VulnEntry;
+
+/// A single pattern match from the vulnerability database scan.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VulnMatch {
+    /// Unique identifier of the matched vulnerability (e.g. `"VULN-001"`).
+    pub vuln_id: String,
+    /// Human-readable name of the vulnerability.
+    pub name: String,
+    /// Severity level string (one of `critical`, `high`, `medium`, `low`, `info`).
+    pub severity: String,
+    /// Broad vulnerability category.
+    pub category: String,
+    /// Human-readable description of the vulnerability.
+    pub description: String,
+    /// Actionable recommendation for the developer.
+    pub recommendation: String,
+    /// Path of the source file in which the match was found.
+    pub file: String,
+    /// 1-based line number of the match.
+    pub line: usize,
+    /// Source-code snippet around the match.
+    pub snippet: String,
+}
+
+/// Scan `source` against every entry in `vulns` and return all matches.
+///
+/// Each [`VulnEntry`] whose `pattern` regex matches anywhere in `source`
+/// produces one [`VulnMatch`] per occurrence.  Invalid regex patterns are
+/// silently skipped (they are already validated at database load time via
+/// [`super::VulnDatabase::validate`]).
+pub fn scan_source(vulns: &[VulnEntry], source: &str, file_name: &str) -> Vec<VulnMatch> {
+    let mut matches = Vec::new();
+
+    for vuln in vulns {
+        let re = match Regex::new(&vuln.pattern) {
+            Ok(r) => r,
+            Err(_) => continue,
+        };
+
+        for mat in re.find_iter(source) {
+            let line = source[..mat.start()].matches('\n').count() + 1;
+            let line_start = source[..mat.start()]
+                .rfind('\n')
+                .map(|p| p + 1)
+                .unwrap_or(0);
+            let line_end = source[mat.end()..]
+                .find('\n')
+                .map(|p| mat.end() + p)
+                .unwrap_or(source.len());
+            let snippet = source[line_start..line_end].trim().to_string();
+
+            matches.push(VulnMatch {
+                vuln_id: vuln.id.clone(),
+                name: vuln.name.clone(),
+                severity: vuln.severity.clone(),
+                category: vuln.category.clone(),
+                description: vuln.description.clone(),
+                recommendation: vuln.recommendation.clone(),
+                file: file_name.to_string(),
+                line,
+                snippet,
+            });
+        }
+    }
+
+    matches
+}
diff --git a/tooling/sanctifier-cli/src/vulndb.rs b/tooling/sanctifier-cli/src/vulndb/mod.rs
similarity index 86%
rename from tooling/sanctifier-cli/src/vulndb.rs
rename to tooling/sanctifier-cli/src/vulndb/mod.rs
index 16416a55..891f2071 100644
--- a/tooling/sanctifier-cli/src/vulndb.rs
+++ b/tooling/sanctifier-cli/src/vulndb/mod.rs
@@ -1,3 +1,33 @@
+//! Vulnerability database — loading, validation, and pattern matching.
+//!
+//! # Module layout
+//!
+//! | Submodule | Responsibility |
+//! |-----------|----------------|
+//! | (this file) | Database types, JSON loading, semantic validation |
+//! | [`matcher`] | Regex scan engine and [`VulnMatch`] result type |
+//!
+//! ## Threat model
+//!
+//! The vulnerability database is an untrusted external input (especially when
+//! loaded from a user-supplied `--vuln-db` path).  [`VulnDatabase::validate`]
+//! runs before any scanning to:
+//!
+//! 1. Reject databases whose entries contain invalid regular expressions,
+//!    preventing a malformed pattern from panicking inside `regex::Regex::new`.
+//! 2. Enforce unique IDs and non-overlapping signatures so that a crafted DB
+//!    cannot produce duplicate or misleading findings.
+//! 3. Reject unknown severity strings to keep downstream consumers (JSON
+//!    output, CI exit-code logic) from seeing unexpected values.
+//!
+//! The embedded default database (`data/vulnerability-db.json`) is validated
+//! at compile-time via `expect` — a bug in the embedded DB causes a build
+//! failure, not a runtime error.
+
+pub mod matcher;
+
+pub use matcher::VulnMatch;
+
 use anyhow::Context;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -5,42 +35,45 @@ use std::collections::HashMap;
 use std::fs;
 use std::path::Path;
 
+/// A single entry in the vulnerability database.
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct VulnEntry {
+    /// Unique identifier (e.g. `"VULN-001"`).
     pub id: String,
+    /// Human-readable name.
     pub name: String,
+    /// Human-readable description.
     pub description: String,
+    /// Severity level: one of `critical`, `high`, `medium`, `low`, `info`.
     pub severity: String,
+    /// Broad vulnerability category.
     pub category: String,
+    /// Regex pattern matched against source code.
     pub pattern: String,
+    /// Actionable recommendation.
     pub recommendation: String,
+    /// Optional external references (CVEs, advisories, …).
     #[serde(default)]
     pub references: Vec<String>,
 }
 
+/// A parsed and validated vulnerability database.
 #[derive(Debug, Clone, Deserialize)]
 pub struct VulnDatabase {
+    /// Schema version of this database file.
     pub version: String,
+    /// ISO-8601 date of the last update.
     pub last_updated: String,
+    /// Human-readable description of the database.
     pub description: String,
+    /// All vulnerability entries.
     pub vulnerabilities: Vec<VulnEntry>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct VulnMatch {
-    pub vuln_id: String,
-    pub name: String,
-    pub severity: String,
-    pub category: String,
-    pub description: String,
-    pub recommendation: String,
-    pub file: String,
-    pub line: usize,
-    pub snippet: String,
-}
-
 impl VulnDatabase {
-    /// Load the vulnerability database from a JSON file.
+    /// Load a vulnerability database from a JSON file on disk.
+    ///
+    /// The file is parsed and then semantically validated via [`Self::validate`].
     pub fn load(path: &Path) -> anyhow::Result<Self> {
         let content = fs::read_to_string(path)
             .with_context(|| format!("failed to read vulnerability database {}", path.display()))?;
@@ -59,9 +92,12 @@ impl VulnDatabase {
         Ok(db)
     }
 
-    /// Load the embedded default vulnerability database.
+    /// Load the embedded default vulnerability database (compiled into the binary).
+    ///
+    /// Panics at startup if the embedded JSON is invalid — this is intentional
+    /// because a broken embedded database is a build defect, not a runtime one.
     pub fn load_default() -> Self {
-        let content = include_str!("../data/vulnerability-db.json");
+        let content = include_str!("../../data/vulnerability-db.json");
         let db: VulnDatabase =
             serde_json::from_str(content).expect("embedded vulnerability-db.json is valid JSON");
         db.validate()
@@ -69,7 +105,10 @@ impl VulnDatabase {
         db
     }
 
-    /// Validate uniqueness and overlap constraints that JSON Schema cannot express.
+    /// Validate uniqueness and semantic constraints that JSON Schema cannot express.
+    ///
+    /// Returns an error listing **all** validation failures so that users can
+    /// fix their custom database in one pass rather than chasing errors one by one.
     pub fn validate(&self) -> anyhow::Result<()> {
         if self.version.trim().is_empty() {
             anyhow::bail!("vulnerability database version must not be empty");
@@ -190,43 +229,11 @@ impl VulnDatabase {
         Ok(())
     }
 
-    /// Scan source code against all vulnerability patterns.
+    /// Scan `source` against all vulnerability patterns.
+    ///
+    /// Delegates to [`matcher::scan_source`] keeping I/O and matching separate.
     pub fn scan(&self, source: &str, file_name: &str) -> Vec<VulnMatch> {
-        let mut matches = Vec::new();
-
-        for vuln in &self.vulnerabilities {
-            let re = match Regex::new(&vuln.pattern) {
-                Ok(r) => r,
-                Err(_) => continue,
-            };
-
-            for mat in re.find_iter(source) {
-                let line = source[..mat.start()].matches('\n').count() + 1;
-                let line_start = source[..mat.start()]
-                    .rfind('\n')
-                    .map(|p| p + 1)
-                    .unwrap_or(0);
-                let line_end = source[mat.end()..]
-                    .find('\n')
-                    .map(|p| mat.end() + p)
-                    .unwrap_or(source.len());
-                let snippet = source[line_start..line_end].trim().to_string();
-
-                matches.push(VulnMatch {
-                    vuln_id: vuln.id.clone(),
-                    name: vuln.name.clone(),
-                    severity: vuln.severity.clone(),
-                    category: vuln.category.clone(),
-                    description: vuln.description.clone(),
-                    recommendation: vuln.recommendation.clone(),
-                    file: file_name.to_string(),
-                    line,
-                    snippet,
-                });
-            }
-        }
-
-        matches
+        matcher::scan_source(&self.vulnerabilities, source, file_name)
     }
 }
 
diff --git a/tooling/sanctifier-cli/tests/cli_tests.rs b/tooling/sanctifier-cli/tests/cli_tests.rs
index 1a622133..b25e8dd7 100644
--- a/tooling/sanctifier-cli/tests/cli_tests.rs
+++ b/tooling/sanctifier-cli/tests/cli_tests.rs
@@ -666,7 +666,6 @@ fn test_analyze_json_includes_call_graph_edges() {
 /// Verifies that `sanctifier analyze --format json` output conforms to the
 /// published JSON Schema at `schemas/analysis-output.json`.
 #[test]
-#[ignore = "Schema validation temporarily disabled - output format needs to be updated to match schema"]
 fn test_json_output_validates_against_schema() {
     // Locate the schema relative to the workspace root (two levels up from
     // this package's Cargo.toml directory).
@@ -869,3 +868,163 @@ fn test_complexity_shows_table_in_stdout() {
         .stdout(predicates::str::contains("Function"))
         .stdout(predicates::str::contains("Complexity"));
 }
+
+// ── Command surface & help UX tests (#516) ────────────────────────────────────
+
+#[test]
+fn test_no_subcommand_shows_usage() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .assert()
+        .failure()
+        .stderr(predicates::str::contains("Usage:"));
+}
+
+#[test]
+fn test_unknown_subcommand_exits_nonzero() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .arg("not-a-real-command")
+        .assert()
+        .failure();
+}
+
+#[test]
+fn test_version_flag() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .arg("--version")
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("sanctifier"));
+}
+
+#[test]
+fn test_analyze_help_mentions_format_flag() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["analyze", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("--format"));
+}
+
+#[test]
+fn test_diff_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["diff", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("baseline"));
+}
+
+#[test]
+fn test_report_help_mentions_output_flag() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["report", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("--output"));
+}
+
+#[test]
+fn test_gas_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["gas", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("gas"));
+}
+
+#[test]
+fn test_storage_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["storage", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("storage"));
+}
+
+#[test]
+fn test_init_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["init", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("Initialize"));
+}
+
+#[test]
+fn test_complexity_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["complexity", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("complexity"));
+}
+
+#[test]
+fn test_fix_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["fix", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("patch"));
+}
+
+#[test]
+fn test_completions_bash_outputs_script() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["completions", "bash"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("sanctifier"));
+}
+
+#[test]
+fn test_completions_zsh_outputs_script() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["completions", "zsh"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("sanctifier"));
+}
+
+#[test]
+fn test_suppress_help_is_accessible() {
+    Command::cargo_bin("sanctifier")
+        .unwrap()
+        .args(["suppress", "--help"])
+        .assert()
+        .success()
+        .stdout(predicates::str::contains("Suppress"));
+}
+
+#[test]
+fn test_top_level_help_lists_all_core_subcommands() {
+    let out = Command::cargo_bin("sanctifier")
+        .unwrap()
+        .arg("--help")
+        .assert()
+        .success()
+        .get_output()
+        .stdout
+        .clone();
+
+    let text = String::from_utf8(out).unwrap();
+    for cmd in &["analyze", "report", "gas", "storage", "init", "complexity", "fix"] {
+        assert!(
+            text.contains(cmd),
+            "top-level --help should list '{cmd}' but didn't"
+        );
+    }
+}
diff --git a/tooling/sanctifier-core/src/lib.rs b/tooling/sanctifier-core/src/lib.rs
index 5f18786a..50d7fd83 100644
--- a/tooling/sanctifier-core/src/lib.rs
+++ b/tooling/sanctifier-core/src/lib.rs
@@ -3,7 +3,7 @@
 //!
 //! This crate provides the [`Analyzer`] entry-point together with a
 //! [`RuleRegistry`] of pluggable rules.  Every finding is tagged with a
-//! canonical code from the [`finding_codes`] module (`S000` – `S012`).
+//! canonical code from the [`finding_codes`] module (`S000` – `S027`).
 //!
 //! # JSON output schema
 //!
@@ -12,6 +12,32 @@
 //! `schemas/analysis-output.json` in the repository root.  The schema is
 //! versioned via a `schema_version` field in every report and validated in CI.
 //!
+//! ## Schema stability guarantees
+//!
+//! * **Additive changes** (new optional top-level fields) increment the patch
+//!   version only and are backward-compatible.
+//! * **Structural changes** (renaming required fields, removing fields, or
+//!   changing type constraints) increment the minor or major version and
+//!   require the `schema_version` field to be updated accordingly.
+//! * Downstream consumers **must** check `schema_version` before parsing
+//!   structured fields so they can detect unsupported versions early.
+//!
+//! ## Threat model notes
+//!
+//! The JSON report is written to stdout and consumed by CI pipelines, dashboards,
+//! and human operators.  The following properties are enforced to make the
+//! output safe-by-default:
+//!
+//! * **No shell injection surface** — all string fields in findings (file paths,
+//!   snippets, messages) are JSON-escaped by `serde_json` and never interpolated
+//!   into shell commands by the engine itself.
+//! * **Bounded output size** — the `--max-findings` config key caps the number of
+//!   findings per category so that a pathological contract cannot produce a
+//!   multi-gigabyte report.
+//! * **Stable IDs** — every finding carries a `stable_id` derived from a content
+//!   hash so that downstream de-duplication and suppression logic is resilient to
+//!   minor reformats and line-number drift.
+//!
 //! # Quick start
 //!
 //! ```rust,ignore