Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 69 additions & 2 deletions tooling/sanctifier-cli/src/commands/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@ pub(crate) fn run_analysis(args: AnalyzeArgs) -> anyhow::Result<bool> {
return stream_ndjson(&args);
}

let path = &args.path;
let path = normalize_cli_path(args.path.clone());
if !path.exists() {
anyhow::bail!("path does not exist: {}", path.display());
}
if !is_soroban_project(path) {
if !is_soroban_project(&path) {
eprintln!("No Soroban project found at {:?}", path);
return Ok(false);
}
Expand Down Expand Up @@ -681,3 +681,70 @@ fn sha256_hex(content: &str) -> String {
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}

// ── Path normalization ────────────────────────────────────────────────────────

/// Normalize a CLI path argument for the current OS.
///
/// On non-Windows platforms, backslash separators that users copy from Windows
/// paths (e.g. `tests\fixtures\contract.rs`) are silently converted to POSIX
/// forward-slash paths so that the rest of the pipeline can handle them
/// uniformly. No conversion is needed on Windows because the OS accepts both
/// separator styles natively.
///
/// # Platform behaviour
/// | Platform | Input | Output |
/// |----------|-------|--------|
/// | Linux/macOS | `foo\bar\baz.rs` | `foo/bar/baz.rs` |
/// | Linux/macOS | `foo/bar/baz.rs` | `foo/bar/baz.rs` (unchanged) |
/// | Windows | any | unchanged (OS handles both) |
#[cfg(not(windows))]
pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf {
let s = p.to_string_lossy();
if s.contains('\\') {
PathBuf::from(s.replace('\\', "/"))
} else {
p
}
}

#[cfg(windows)]
pub(crate) fn normalize_cli_path(p: PathBuf) -> PathBuf {
p
}

#[cfg(test)]
mod path_normalization_tests {
use super::normalize_cli_path;
use std::path::PathBuf;

#[test]
#[cfg(not(windows))]
fn unix_converts_backslashes_to_forward_slashes() {
let result = normalize_cli_path(PathBuf::from("tests\\fixtures\\valid_contract.rs"));
assert_eq!(result, PathBuf::from("tests/fixtures/valid_contract.rs"));
}

#[test]
#[cfg(not(windows))]
fn unix_passthrough_when_no_backslashes() {
let p = PathBuf::from("tests/fixtures/valid_contract.rs");
let result = normalize_cli_path(p.clone());
assert_eq!(result, p);
}

#[test]
#[cfg(not(windows))]
fn unix_handles_mixed_separators() {
let result = normalize_cli_path(PathBuf::from("tests\\fixtures/contract.rs"));
assert_eq!(result, PathBuf::from("tests/fixtures/contract.rs"));
}

#[test]
#[cfg(windows)]
fn windows_path_is_returned_unchanged() {
let p = PathBuf::from("tests\\fixtures\\valid_contract.rs");
let result = normalize_cli_path(p.clone());
assert_eq!(result, p);
}
}
79 changes: 79 additions & 0 deletions tooling/sanctifier-cli/src/vulndb/matcher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//! Pattern-matching engine for the vulnerability database.
//!
//! This module owns the [`VulnMatch`] result type and the [`scan_source`]
//! function that runs every [`super::VulnEntry`] regex pattern against a
//! source file. Keeping the matching logic separate from the database I/O in
//! [`super`] makes the boundary between "loading data" and "using data" clear
//! and allows the scanner to be unit-tested without touching the file system.

use regex::Regex;
use serde::{Deserialize, Serialize};

use super::VulnEntry;

/// A single pattern match from the vulnerability database scan.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnMatch {
/// Unique identifier of the matched vulnerability (e.g. `"VULN-001"`).
pub vuln_id: String,
/// Human-readable name of the vulnerability.
pub name: String,
/// Severity level string (one of `critical`, `high`, `medium`, `low`, `info`).
pub severity: String,
/// Broad vulnerability category.
pub category: String,
/// Human-readable description of the vulnerability.
pub description: String,
/// Actionable recommendation for the developer.
pub recommendation: String,
/// Path of the source file in which the match was found.
pub file: String,
/// 1-based line number of the match.
pub line: usize,
/// Source-code snippet around the match.
pub snippet: String,
}

/// Scan `source` against every entry in `vulns` and return all matches.
///
/// Each [`VulnEntry`] whose `pattern` regex matches anywhere in `source`
/// produces one [`VulnMatch`] per occurrence. Invalid regex patterns are
/// silently skipped (they are already validated at database load time via
/// [`super::VulnDatabase::validate`]).
pub fn scan_source(vulns: &[VulnEntry], source: &str, file_name: &str) -> Vec<VulnMatch> {
let mut matches = Vec::new();

for vuln in vulns {
let re = match Regex::new(&vuln.pattern) {
Ok(r) => r,
Err(_) => continue,
};

for mat in re.find_iter(source) {
let line = source[..mat.start()].matches('\n').count() + 1;
let line_start = source[..mat.start()]
.rfind('\n')
.map(|p| p + 1)
.unwrap_or(0);
let line_end = source[mat.end()..]
.find('\n')
.map(|p| mat.end() + p)
.unwrap_or(source.len());
let snippet = source[line_start..line_end].trim().to_string();

matches.push(VulnMatch {
vuln_id: vuln.id.clone(),
name: vuln.name.clone(),
severity: vuln.severity.clone(),
category: vuln.category.clone(),
description: vuln.description.clone(),
recommendation: vuln.recommendation.clone(),
file: file_name.to_string(),
line,
snippet,
});
}
}

matches
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,33 @@
#![allow(dead_code)]
//! Vulnerability database — loading, validation, and pattern matching.
//!
//! # Module layout
//!
//! | Submodule | Responsibility |
//! |-----------|----------------|
//! | (this file) | Database types, JSON loading, semantic validation |
//! | [`matcher`] | Regex scan engine and [`VulnMatch`] result type |
//!
//! ## Threat model
//!
//! The vulnerability database is an untrusted external input (especially when
//! loaded from a user-supplied `--vuln-db` path). [`VulnDatabase::validate`]
//! runs before any scanning to:
//!
//! 1. Reject databases whose entries contain invalid regular expressions,
//! preventing a malformed pattern from panicking inside `regex::Regex::new`.
//! 2. Enforce unique IDs and non-overlapping signatures so that a crafted DB
//! cannot produce duplicate or misleading findings.
//! 3. Reject unknown severity strings to keep downstream consumers (JSON
//! output, CI exit-code logic) from seeing unexpected values.
//!
//! The embedded default database (`data/vulnerability-db.json`) is validated
//! at compile-time via `expect` — a bug in the embedded DB causes a build
//! failure, not a runtime error.

pub mod matcher;

pub use matcher::VulnMatch;

use anyhow::Context;
use regex::Regex;
Expand All @@ -7,42 +36,45 @@ use std::collections::HashMap;
use std::fs;
use std::path::Path;

/// A single entry in the vulnerability database.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct VulnEntry {
/// Unique identifier (e.g. `"VULN-001"`).
pub id: String,
/// Human-readable name.
pub name: String,
/// Human-readable description.
pub description: String,
/// Severity level: one of `critical`, `high`, `medium`, `low`, `info`.
pub severity: String,
/// Broad vulnerability category.
pub category: String,
/// Regex pattern matched against source code.
pub pattern: String,
/// Actionable recommendation.
pub recommendation: String,
/// Optional external references (CVEs, advisories, …).
#[serde(default)]
pub references: Vec<String>,
}

/// A parsed and validated vulnerability database.
#[derive(Debug, Clone, Deserialize)]
pub struct VulnDatabase {
/// Schema version of this database file.
pub version: String,
/// ISO-8601 date of the last update.
pub last_updated: String,
/// Human-readable description of the database.
pub description: String,
/// All vulnerability entries.
pub vulnerabilities: Vec<VulnEntry>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnMatch {
pub vuln_id: String,
pub name: String,
pub severity: String,
pub category: String,
pub description: String,
pub recommendation: String,
pub file: String,
pub line: usize,
pub snippet: String,
}

impl VulnDatabase {
/// Load the vulnerability database from a JSON file.
/// Load a vulnerability database from a JSON file on disk.
///
/// The file is parsed and then semantically validated via [`Self::validate`].
pub fn load(path: &Path) -> anyhow::Result<Self> {
let content = fs::read_to_string(path)
.with_context(|| format!("failed to read vulnerability database {}", path.display()))?;
Expand All @@ -61,17 +93,23 @@ impl VulnDatabase {
Ok(db)
}

/// Load the embedded default vulnerability database.
/// Load the embedded default vulnerability database (compiled into the binary).
///
/// Panics at startup if the embedded JSON is invalid — this is intentional
/// because a broken embedded database is a build defect, not a runtime one.
pub fn load_default() -> Self {
let content = include_str!("../data/vulnerability-db.json");
let content = include_str!("../../data/vulnerability-db.json");
let db: VulnDatabase =
serde_json::from_str(content).expect("embedded vulnerability-db.json is valid JSON");
db.validate()
.expect("embedded vulnerability-db.json passes semantic validation");
db
}

/// Validate uniqueness and overlap constraints that JSON Schema cannot express.
/// Validate uniqueness and semantic constraints that JSON Schema cannot express.
///
/// Returns an error listing **all** validation failures so that users can
/// fix their custom database in one pass rather than chasing errors one by one.
pub fn validate(&self) -> anyhow::Result<()> {
if self.version.trim().is_empty() {
anyhow::bail!("vulnerability database version must not be empty");
Expand Down Expand Up @@ -192,43 +230,11 @@ impl VulnDatabase {
Ok(())
}

/// Scan source code against all vulnerability patterns.
/// Scan `source` against all vulnerability patterns.
///
/// Delegates to [`matcher::scan_source`] keeping I/O and matching separate.
pub fn scan(&self, source: &str, file_name: &str) -> Vec<VulnMatch> {
let mut matches = Vec::new();

for vuln in &self.vulnerabilities {
let re = match Regex::new(&vuln.pattern) {
Ok(r) => r,
Err(_) => continue,
};

for mat in re.find_iter(source) {
let line = source[..mat.start()].matches('\n').count() + 1;
let line_start = source[..mat.start()]
.rfind('\n')
.map(|p| p + 1)
.unwrap_or(0);
let line_end = source[mat.end()..]
.find('\n')
.map(|p| mat.end() + p)
.unwrap_or(source.len());
let snippet = source[line_start..line_end].trim().to_string();

matches.push(VulnMatch {
vuln_id: vuln.id.clone(),
name: vuln.name.clone(),
severity: vuln.severity.clone(),
category: vuln.category.clone(),
description: vuln.description.clone(),
recommendation: vuln.recommendation.clone(),
file: file_name.to_string(),
line,
snippet,
});
}
}

matches
matcher::scan_source(&self.vulnerabilities, source, file_name)
}
}

Expand Down
Loading
Loading