diff --git a/tooling/sanctifier-cli/src/commands/analyze.rs b/tooling/sanctifier-cli/src/commands/analyze.rs index 062e566f..4ce6e071 100644 --- a/tooling/sanctifier-cli/src/commands/analyze.rs +++ b/tooling/sanctifier-cli/src/commands/analyze.rs @@ -170,11 +170,11 @@ pub(crate) fn run_analysis(args: AnalyzeArgs) -> anyhow::Result { return stream_ndjson(&args); } - let path = &args.path; + let path = normalize_cli_path(args.path.clone()); if !path.exists() { anyhow::bail!("path does not exist: {}", path.display()); } - if !is_soroban_project(path) { + if !is_soroban_project(&path) { eprintln!("No Soroban project found at {:?}", path); return Ok(false); } @@ -681,3 +681,70 @@ fn sha256_hex(content: &str) -> String { hasher.update(content.as_bytes()); format!("{:x}", hasher.finalize()) } + +// ── Path normalization ──────────────────────────────────────────────────────── + +/// Normalize a CLI path argument for the current OS. +/// +/// On non-Windows platforms, backslash separators that users copy from Windows +/// paths (e.g. `tests\fixtures\contract.rs`) are silently converted to POSIX +/// forward-slash paths so that the rest of the pipeline can handle them +/// uniformly. No conversion is needed on Windows because the OS accepts both +/// separator styles natively. +/// +/// # Platform behaviour +/// | Platform | Input | Output | +/// |----------|-------|--------| +/// | Linux/macOS | `foo\bar\baz.rs` | `foo/bar/baz.rs` | +/// | Linux/macOS | `foo/bar/baz.rs` | `foo/bar/baz.rs` (unchanged) | +/// | Windows | any | unchanged (OS handles both) | +#[cfg(not(windows))] +pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf { + let s = p.to_string_lossy(); + if s.contains('\\') { + PathBuf::from(s.replace('\\', "/")) + } else { + p + } +} + +#[cfg(windows)] +pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf { + p +} + +#[cfg(test)] +mod path_normalization_tests { + use super::normalize_cli_path; + use std::path::PathBuf; + + #[test] + #[cfg(not(windows))] + fn unix_converts_backslashes_to_forward_slashes() { + let result = normalize_cli_path(PathBuf::from("tests\\fixtures\\valid_contract.rs")); + assert_eq!(result, PathBuf::from("tests/fixtures/valid_contract.rs")); + } + + #[test] + #[cfg(not(windows))] + fn unix_passthrough_when_no_backslashes() { + let p = PathBuf::from("tests/fixtures/valid_contract.rs"); + let result = normalize_cli_path(p.clone()); + assert_eq!(result, p); + } + + #[test] + #[cfg(not(windows))] + fn unix_handles_mixed_separators() { + let result = normalize_cli_path(PathBuf::from("tests\\fixtures/contract.rs")); + assert_eq!(result, PathBuf::from("tests/fixtures/contract.rs")); + } + + #[test] + #[cfg(windows)] + fn windows_path_is_returned_unchanged() { + let p = PathBuf::from("tests\\fixtures\\valid_contract.rs"); + let result = normalize_cli_path(p.clone()); + assert_eq!(result, p); + } +} diff --git a/tooling/sanctifier-cli/src/vulndb/matcher.rs b/tooling/sanctifier-cli/src/vulndb/matcher.rs new file mode 100644 index 00000000..f96183aa --- /dev/null +++ b/tooling/sanctifier-cli/src/vulndb/matcher.rs @@ -0,0 +1,79 @@ +//! Pattern-matching engine for the vulnerability database. +//! +//! This module owns the [`VulnMatch`] result type and the [`scan_source`] +//! function that runs every [`super::VulnEntry`] regex pattern against a +//! source file. Keeping the matching logic separate from the database I/O in +//! [`super`] makes the boundary between "loading data" and "using data" clear +//! and allows the scanner to be unit-tested without touching the file system. + +use regex::Regex; +use serde::{Deserialize, Serialize}; + +use super::VulnEntry; + +/// A single pattern match from the vulnerability database scan. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VulnMatch { + /// Unique identifier of the matched vulnerability (e.g. `"VULN-001"`). + pub vuln_id: String, + /// Human-readable name of the vulnerability. + pub name: String, + /// Severity level string (one of `critical`, `high`, `medium`, `low`, `info`). + pub severity: String, + /// Broad vulnerability category. + pub category: String, + /// Human-readable description of the vulnerability. + pub description: String, + /// Actionable recommendation for the developer. + pub recommendation: String, + /// Path of the source file in which the match was found. + pub file: String, + /// 1-based line number of the match. + pub line: usize, + /// Source-code snippet around the match. + pub snippet: String, +} + +/// Scan `source` against every entry in `vulns` and return all matches. +/// +/// Each [`VulnEntry`] whose `pattern` regex matches anywhere in `source` +/// produces one [`VulnMatch`] per occurrence. Invalid regex patterns are +/// silently skipped (they are already validated at database load time via +/// [`super::VulnDatabase::validate`]). +pub fn scan_source(vulns: &[VulnEntry], source: &str, file_name: &str) -> Vec { + let mut matches = Vec::new(); + + for vuln in vulns { + let re = match Regex::new(&vuln.pattern) { + Ok(r) => r, + Err(_) => continue, + }; + + for mat in re.find_iter(source) { + let line = source[..mat.start()].matches('\n').count() + 1; + let line_start = source[..mat.start()] + .rfind('\n') + .map(|p| p + 1) + .unwrap_or(0); + let line_end = source[mat.end()..] + .find('\n') + .map(|p| mat.end() + p) + .unwrap_or(source.len()); + let snippet = source[line_start..line_end].trim().to_string(); + + matches.push(VulnMatch { + vuln_id: vuln.id.clone(), + name: vuln.name.clone(), + severity: vuln.severity.clone(), + category: vuln.category.clone(), + description: vuln.description.clone(), + recommendation: vuln.recommendation.clone(), + file: file_name.to_string(), + line, + snippet, + }); + } + } + + matches +} diff --git a/tooling/sanctifier-cli/src/vulndb.rs b/tooling/sanctifier-cli/src/vulndb/mod.rs similarity index 86% rename from tooling/sanctifier-cli/src/vulndb.rs rename to tooling/sanctifier-cli/src/vulndb/mod.rs index fbfff63e..b08d7bd0 100644 --- a/tooling/sanctifier-cli/src/vulndb.rs +++ b/tooling/sanctifier-cli/src/vulndb/mod.rs @@ -1,4 +1,33 @@ #![allow(dead_code)] +//! Vulnerability database — loading, validation, and pattern matching. +//! +//! # Module layout +//! +//! | Submodule | Responsibility | +//! |-----------|----------------| +//! | (this file) | Database types, JSON loading, semantic validation | +//! | [`matcher`] | Regex scan engine and [`VulnMatch`] result type | +//! +//! ## Threat model +//! +//! The vulnerability database is an untrusted external input (especially when +//! loaded from a user-supplied `--vuln-db` path). [`VulnDatabase::validate`] +//! runs before any scanning to: +//! +//! 1. Reject databases whose entries contain invalid regular expressions, +//! preventing a malformed pattern from panicking inside `regex::Regex::new`. +//! 2. Enforce unique IDs and non-overlapping signatures so that a crafted DB +//! cannot produce duplicate or misleading findings. +//! 3. Reject unknown severity strings to keep downstream consumers (JSON +//! output, CI exit-code logic) from seeing unexpected values. +//! +//! The embedded default database (`data/vulnerability-db.json`) is validated +//! at compile-time via `expect` — a bug in the embedded DB causes a build +//! failure, not a runtime error. + +pub mod matcher; + +pub use matcher::VulnMatch; use anyhow::Context; use regex::Regex; @@ -7,42 +36,45 @@ use std::collections::HashMap; use std::fs; use std::path::Path; +/// A single entry in the vulnerability database. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct VulnEntry { + /// Unique identifier (e.g. `"VULN-001"`). pub id: String, + /// Human-readable name. pub name: String, + /// Human-readable description. pub description: String, + /// Severity level: one of `critical`, `high`, `medium`, `low`, `info`. pub severity: String, + /// Broad vulnerability category. pub category: String, + /// Regex pattern matched against source code. pub pattern: String, + /// Actionable recommendation. pub recommendation: String, + /// Optional external references (CVEs, advisories, …). #[serde(default)] pub references: Vec, } +/// A parsed and validated vulnerability database. #[derive(Debug, Clone, Deserialize)] pub struct VulnDatabase { + /// Schema version of this database file. pub version: String, + /// ISO-8601 date of the last update. pub last_updated: String, + /// Human-readable description of the database. pub description: String, + /// All vulnerability entries. pub vulnerabilities: Vec, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VulnMatch { - pub vuln_id: String, - pub name: String, - pub severity: String, - pub category: String, - pub description: String, - pub recommendation: String, - pub file: String, - pub line: usize, - pub snippet: String, -} - impl VulnDatabase { - /// Load the vulnerability database from a JSON file. + /// Load a vulnerability database from a JSON file on disk. + /// + /// The file is parsed and then semantically validated via [`Self::validate`]. pub fn load(path: &Path) -> anyhow::Result { let content = fs::read_to_string(path) .with_context(|| format!("failed to read vulnerability database {}", path.display()))?; @@ -61,9 +93,12 @@ impl VulnDatabase { Ok(db) } - /// Load the embedded default vulnerability database. + /// Load the embedded default vulnerability database (compiled into the binary). + /// + /// Panics at startup if the embedded JSON is invalid — this is intentional + /// because a broken embedded database is a build defect, not a runtime one. pub fn load_default() -> Self { - let content = include_str!("../data/vulnerability-db.json"); + let content = include_str!("../../data/vulnerability-db.json"); let db: VulnDatabase = serde_json::from_str(content).expect("embedded vulnerability-db.json is valid JSON"); db.validate() @@ -71,7 +106,10 @@ impl VulnDatabase { db } - /// Validate uniqueness and overlap constraints that JSON Schema cannot express. + /// Validate uniqueness and semantic constraints that JSON Schema cannot express. + /// + /// Returns an error listing **all** validation failures so that users can + /// fix their custom database in one pass rather than chasing errors one by one. pub fn validate(&self) -> anyhow::Result<()> { if self.version.trim().is_empty() { anyhow::bail!("vulnerability database version must not be empty"); @@ -192,43 +230,11 @@ impl VulnDatabase { Ok(()) } - /// Scan source code against all vulnerability patterns. + /// Scan `source` against all vulnerability patterns. + /// + /// Delegates to [`matcher::scan_source`] keeping I/O and matching separate. pub fn scan(&self, source: &str, file_name: &str) -> Vec { - let mut matches = Vec::new(); - - for vuln in &self.vulnerabilities { - let re = match Regex::new(&vuln.pattern) { - Ok(r) => r, - Err(_) => continue, - }; - - for mat in re.find_iter(source) { - let line = source[..mat.start()].matches('\n').count() + 1; - let line_start = source[..mat.start()] - .rfind('\n') - .map(|p| p + 1) - .unwrap_or(0); - let line_end = source[mat.end()..] - .find('\n') - .map(|p| mat.end() + p) - .unwrap_or(source.len()); - let snippet = source[line_start..line_end].trim().to_string(); - - matches.push(VulnMatch { - vuln_id: vuln.id.clone(), - name: vuln.name.clone(), - severity: vuln.severity.clone(), - category: vuln.category.clone(), - description: vuln.description.clone(), - recommendation: vuln.recommendation.clone(), - file: file_name.to_string(), - line, - snippet, - }); - } - } - - matches + matcher::scan_source(&self.vulnerabilities, source, file_name) } } diff --git a/tooling/sanctifier-cli/tests/cli_tests.rs b/tooling/sanctifier-cli/tests/cli_tests.rs index 24ae4759..4e05919c 100644 --- a/tooling/sanctifier-cli/tests/cli_tests.rs +++ b/tooling/sanctifier-cli/tests/cli_tests.rs @@ -667,7 +667,6 @@ fn test_analyze_json_includes_call_graph_edges() { /// Verifies that `sanctifier analyze --format json` output conforms to the /// published JSON Schema at `schemas/analysis-output.json`. #[test] -#[ignore = "Schema validation temporarily disabled - output format needs to be updated to match schema"] fn test_json_output_validates_against_schema() { // Locate the schema relative to the workspace root (two levels up from // this package's Cargo.toml directory). @@ -999,6 +998,209 @@ fn test_telemetry_flag_parses() { .success(); } +#[test] +fn test_analyze_invalid_project_returns_2() { + let mut cmd = Command::cargo_bin("sanctifier").unwrap(); + cmd.arg("analyze") + .arg("does-not-exist") + .arg("--exit-code") + .env_remove("RUST_LOG") + .assert() + .code(2); +} + +#[test] +fn test_init_creates_cargo_toml_and_lib_rs() { + let temp_dir = tempdir().unwrap(); + let project_path = temp_dir.path().join("test-contract"); + + let mut cmd = Command::cargo_bin("sanctifier").unwrap(); + cmd.arg("init").arg(&project_path).assert().success(); + + assert!( + project_path.join("Cargo.toml").exists(), + "init should create Cargo.toml" + ); + assert!( + project_path.join("src/lib.rs").exists(), + "init should create src/lib.rs" + ); +} + +#[test] +fn test_complexity_shows_table_in_stdout() { + let mut cmd = Command::cargo_bin("sanctifier").unwrap(); + let fixture_path = env::current_dir() + .unwrap() + .join("tests/fixtures/valid_contract.rs"); + + cmd.arg("complexity") + .arg(fixture_path) + .env_remove("RUST_LOG") + .assert() + .success() + .stdout(predicates::str::contains("Function")) + .stdout(predicates::str::contains("Complexity")); +} + +// ── Command surface & help UX tests (#516) ──────────────────────────────────── + +#[test] +fn test_no_subcommand_shows_usage() { + Command::cargo_bin("sanctifier") + .unwrap() + .assert() + .failure() + .stderr(predicates::str::contains("Usage:")); +} + +#[test] +fn test_unknown_subcommand_exits_nonzero() { + Command::cargo_bin("sanctifier") + .unwrap() + .arg("not-a-real-command") + .assert() + .failure(); +} + +#[test] +fn test_version_flag() { + Command::cargo_bin("sanctifier") + .unwrap() + .arg("--version") + .assert() + .success() + .stdout(predicates::str::contains("sanctifier")); +} + +#[test] +fn test_analyze_help_mentions_format_flag() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["analyze", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("--format")); +} + +#[test] +fn test_diff_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["diff", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("baseline")); +} + +#[test] +fn test_report_help_mentions_output_flag() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["report", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("--output")); +} + +#[test] +fn test_gas_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["gas", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("gas")); +} + +#[test] +fn test_storage_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["storage", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("storage")); +} + +#[test] +fn test_init_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["init", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("Initialize")); +} + +#[test] +fn test_complexity_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["complexity", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("complexity")); +} + +#[test] +fn test_fix_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["fix", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("patch")); +} + +#[test] +fn test_completions_bash_outputs_script() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["completions", "bash"]) + .assert() + .success() + .stdout(predicates::str::contains("sanctifier")); +} + +#[test] +fn test_completions_zsh_outputs_script() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["completions", "zsh"]) + .assert() + .success() + .stdout(predicates::str::contains("sanctifier")); +} + +#[test] +fn test_suppress_help_is_accessible() { + Command::cargo_bin("sanctifier") + .unwrap() + .args(["suppress", "--help"]) + .assert() + .success() + .stdout(predicates::str::contains("Suppress")); +} + +#[test] +fn test_top_level_help_lists_all_core_subcommands() { + let out = Command::cargo_bin("sanctifier") + .unwrap() + .arg("--help") + .assert() + .success() + .get_output() + .stdout + .clone(); + + let text = String::from_utf8(out).unwrap(); + for cmd in &["analyze", "report", "gas", "storage", "init", "complexity", "fix"] { + assert!( + text.contains(cmd), + "top-level --help should list '{cmd}' but didn't" + ); + } // ── #517: Config file resolution precedence ─────────────────────────────────── /// The config file in the same directory as the analysed file is used (nearest wins). diff --git a/tooling/sanctifier-core/src/lib.rs b/tooling/sanctifier-core/src/lib.rs index 655a4e84..a19baf5b 100644 --- a/tooling/sanctifier-core/src/lib.rs +++ b/tooling/sanctifier-core/src/lib.rs @@ -1,3 +1,43 @@ +//! **sanctifier-core** — static-analysis engine for Stellar Soroban smart +//! contracts. +//! +//! This crate provides the [`Analyzer`] entry-point together with a +//! [`RuleRegistry`] of pluggable rules. Every finding is tagged with a +//! canonical code from the [`finding_codes`] module (`S000` – `S027`). +//! +//! # JSON output schema +//! +//! When the CLI is invoked with `--format json` the output conforms to the +//! JSON Schema (draft-07) published at +//! `schemas/analysis-output.json` in the repository root. The schema is +//! versioned via a `schema_version` field in every report and validated in CI. +//! +//! ## Schema stability guarantees +//! +//! * **Additive changes** (new optional top-level fields) increment the patch +//! version only and are backward-compatible. +//! * **Structural changes** (renaming required fields, removing fields, or +//! changing type constraints) increment the minor or major version and +//! require the `schema_version` field to be updated accordingly. +//! * Downstream consumers **must** check `schema_version` before parsing +//! structured fields so they can detect unsupported versions early. +//! +//! ## Threat model notes +//! +//! The JSON report is written to stdout and consumed by CI pipelines, dashboards, +//! and human operators. The following properties are enforced to make the +//! output safe-by-default: +//! +//! * **No shell injection surface** — all string fields in findings (file paths, +//! snippets, messages) are JSON-escaped by `serde_json` and never interpolated +//! into shell commands by the engine itself. +//! * **Bounded output size** — the `--max-findings` config key caps the number of +//! findings per category so that a pathological contract cannot produce a +//! multi-gigabyte report. +//! * **Stable IDs** — every finding carries a `stable_id` derived from a content +//! hash so that downstream de-duplication and suppression logic is resilient to +//! minor reformats and line-number drift. + use regex::Regex; use serde::{Deserialize, Serialize}; #[cfg(feature = "soroban")]