diff --git a/README.md b/README.md index 2815e286..1a4fec8a 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,29 @@ See [Engine::get_coverage_report](https://docs.rs/regorus/latest/regorus/struct. Policy coverage information is useful for debugging your policy as well as to write tests for your policy so that all lines of the policy are exercised by the tests. +## Type analysis + +Regorus contains an experimental structural type analysis engine that can be used to reason about rule outputs, +expression constants, schema-backed types, and diagnostics before executing a policy. Applications can opt in by +calling `Engine::enable_type_checking`, optionally providing input and data schemas or limiting the analysis to +specific entrypoints via the `TypeChecker` API. + +The `regorus` example binary exposes this functionality through the new `analyze` subcommand: + +```bash +$ regorus analyze -d examples/server/allowed_server.rego \ + --input-schema examples/server/input.schema.json \ + -e data.example +``` + +This produces a structured report that lists inferred rule facts, expression-level facts, provenance, and any +diagnostics that were discovered. Pass `--verbose` to include dependency graphs and specialization summaries, or use +`-b`/`--bundles` to analyze entire policy directories. + +End-to-end regression tests for the analyzer live under `src/tests/type_analysis` with YAML fixtures located in +`tests/type_analysis`. Each case describes modules, optional schemas, and the expected facts or diagnostics; running +`cargo test type_analysis::run` executes the full suite. + ## ACI Policies Regorus successfully passes the ACI policy test-suite. It is fast and can run each of the tests in a few milliseconds. diff --git a/examples/helpers/mod.rs b/examples/helpers/mod.rs new file mode 100644 index 00000000..fb5ac2b4 --- /dev/null +++ b/examples/helpers/mod.rs @@ -0,0 +1 @@ +pub mod type_analysis; diff --git a/examples/helpers/type_analysis.rs b/examples/helpers/type_analysis.rs new file mode 100644 index 00000000..2d74eb52 --- /dev/null +++ b/examples/helpers/type_analysis.rs @@ -0,0 +1,1540 @@ +use std::collections::BTreeMap; + +use anyhow::{anyhow, bail, Result}; +use regorus::type_analysis::model::{ + ConstantValue, PathSegment, SourceOrigin, SourceRoot, TypeDiagnosticSeverity, TypeFact, + TypeProvenance, +}; +use regorus::type_analysis::result::{ + DefinitionSummary, DependencyKind, RuleBodyKind, RuleBodySummary, RuleKind, + RuleSpecializationRecord, +}; +use regorus::type_analysis::{StructuralType, TypeAnalysisResult, TypeDescriptor}; +use regorus::unstable::{ + ArithOp, AssignOp, BinOp, BoolOp, Expr, Literal, LiteralStmt, Query, Rule, RuleHead, +}; +use regorus::{get_path_string, Schema}; + +use crate::add_policy_from_file; + +#[allow(unused_variables)] +pub fn rego_type_analysis( + bundles: &[String], + files: &[String], + input_schema: Option, + data_schema: Option, + entrypoints: Vec, + v0: bool, + verbose: bool, +) -> Result<()> { + // Create engine. + let mut engine = regorus::Engine::new(); + engine.set_rego_v0(v0); + + // Load files from given bundles. + for dir in bundles.iter() { + let entries = + std::fs::read_dir(dir).or_else(|e| bail!("failed to read bundle {dir}.\n{e}"))?; + for entry in entries { + let entry = entry.or_else(|e| bail!("failed to unwrap entry. {e}"))?; + let path = entry.path(); + + match (path.is_file(), path.extension()) { + (true, Some(ext)) if ext == "rego" => {} + _ => continue, + } + + let _package = add_policy_from_file(&mut engine, entry.path().display().to_string())?; + } + } + + // Load given policy files. + for file in files.iter() { + if file.ends_with(".rego") { + let _package = add_policy_from_file(&mut engine, file.clone())?; + } else { + bail!("Type analysis only accepts .rego files. Got: {file}"); + } + } + + // Enable type checking on the engine + engine.enable_type_checking(); + + // Set entrypoints if provided + if !entrypoints.is_empty() { + if let Some(checker) = engine.get_type_checker_mut() { + checker.set_entrypoints(entrypoints); + } + } + + // Load schemas if provided and set them on the type checker + if let Some(path) = input_schema { + let schema_str = std::fs::read_to_string(&path) + .map_err(|e| anyhow!("failed to read input schema {path}: {e}"))?; + let schema = Schema::from_json_str(&schema_str) + .map_err(|e| anyhow!("failed to parse input schema: {:?}", e))?; + + if let Some(checker) = engine.get_type_checker_mut() { + checker.set_input_schema(schema); + } + } + + if let Some(path) = data_schema { + let schema_str = std::fs::read_to_string(&path) + .map_err(|e| anyhow!("failed to read data schema {path}: {e}"))?; + let schema = Schema::from_json_str(&schema_str) + .map_err(|e| anyhow!("failed to parse data schema: {:?}", e))?; + + if let Some(checker) = engine.get_type_checker_mut() { + checker.set_data_schema(schema); + } + } + + // Run type checking (this will automatically run loop hoisting) + let _diagnostics = engine.type_check()?; + + // Get the type analysis result and clone it to avoid borrow conflicts + let requested_entrypoints = { + let checker = engine + .get_type_checker() + .ok_or_else(|| anyhow!("Type checker not available"))?; + checker + .get_entrypoints() + .map(|eps| eps.to_vec()) + .unwrap_or_default() + }; + + let (result, modules) = { + let checker = engine + .get_type_checker() + .ok_or_else(|| anyhow!("Type checker not available"))?; + let result = checker + .get_result() + .ok_or_else(|| anyhow!("Type analysis results not available"))? + .clone(); + let modules_ref = engine.get_modules(); + (result, modules_ref.clone()) + }; + + // Print entrypoint filtering statistics if applicable + if !requested_entrypoints.is_empty() { + println!("\n=== Entrypoint Filtering ===\n"); + println!("šŸ“ Requested entrypoints: {}", requested_entrypoints.len()); + for ep in &requested_entrypoints { + println!(" • {}", ep); + } + + // Get reachable rules from lookup + let reachable_rules: Vec = result + .expressions + .facts + .reachable_rules() + .cloned() + .collect(); + if !reachable_rules.is_empty() { + println!("\nšŸŽÆ Reachable rules: {}", reachable_rules.len()); + + // Count total rules in all modules + let total_rules: usize = modules.iter().map(|m| m.policy.len()).sum(); + let analyzed_percent = if total_rules > 0 { + (reachable_rules.len() * 100) / total_rules + } else { + 0 + }; + + println!("šŸ“Š Total rules in policy: {}", total_rules); + println!("⚔ Analysis coverage: {}%", analyzed_percent); + + if verbose { + println!("\nReachable rules:"); + for rule_path in &reachable_rules { + println!(" • {}", rule_path); + } + } + } + println!("\n=== Type Analysis Results ===\n"); + } + println!("\n=== Type Analysis Results ===\n"); + // Get reachable rules set for filtering (if entrypoint filtering is active) + let reachable_rules: Option> = + if !requested_entrypoints.is_empty() { + Some( + result + .expressions + .facts + .reachable_rules() + .cloned() + .collect(), + ) + } else { + None + }; + let reachable_rules = reachable_rules.as_ref(); + + if !result.diagnostics.is_empty() { + println!("āš ļø Diagnostics:"); + for diag in &result.diagnostics { + let severity = match diag.severity { + TypeDiagnosticSeverity::Error => "error", + TypeDiagnosticSeverity::Warning => "warning", + }; + println!( + " [{}] Line {}, Col {}: {}", + severity, diag.line, diag.col, diag.message + ); + } + println!(); + } + + // Print rule information + for (module_idx, module) in modules.iter().enumerate() { + if module.policy.is_empty() { + continue; + } + + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + println!("šŸ“¦ Module: {}", module_path); + println!("{}", "─".repeat(80)); + + // Rule analysis info not yet available in TypeAnalysisResult + // TODO: Populate rules field in TypeAnalysisResult.from_analysis_state + for (rule_idx, rule) in module.policy.iter().enumerate() { + // Check if this is a default rule + let is_default = matches!(rule.as_ref(), Rule::Default { .. }); + + // Check if this rule has bodies (depends on runtime evaluation) + let has_bodies = match rule.as_ref() { + Rule::Spec { bodies, .. } => !bodies.is_empty(), + Rule::Default { .. } => false, + }; + + // Get rule head expression + let refr = match rule.as_ref() { + Rule::Spec { head, .. } => match head { + RuleHead::Compr { refr, .. } + | RuleHead::Set { refr, .. } + | RuleHead::Func { refr, .. } => Some(refr), + }, + Rule::Default { refr, .. } => Some(refr), + }; + + if let Some(refr) = refr { + let rule_path = get_path_string(refr.as_ref(), Some(&module_path)) + .unwrap_or_else(|_| format!("rule_{}", rule_idx)); + + // Skip if entrypoint filtering is active and this rule is not reachable + if let Some(reachable) = reachable_rules { + if !reachable.contains(&rule_path) { + continue; + } + } + + let rule_summary_ref = result + .rules + .modules + .get(module_idx) + .and_then(|module_summary| module_summary.rules.get(rule_idx)); + + // Get rule kind icon + let icon = if is_default { + "šŸ”¹" + } else if matches!( + rule.as_ref(), + Rule::Spec { + head: RuleHead::Func { .. }, + .. + } + ) { + "ʒ" + } else { + "•" + }; + + // Print rule header without location (only definitions have locations) + print!("\n {} {}", icon, rule_path); + if is_default { + print!(" (default)"); + } + println!(); + + // In verbose mode, show aggregated info + if verbose { + if let Some(rule_summary) = rule_summary_ref { + if let Some(agg) = &rule_summary.aggregated_head_fact { + println!(" Aggregated: {}", format_fact_summary(agg)); + } + } + } + + // Show definitions + if let Some(rule_summary) = rule_summary_ref { + let is_function_rule = rule_summary.kind == RuleKind::Function; + + for (def_idx, definition) in rule_summary.definitions.iter().enumerate() { + let def_location = definition + .span + .as_ref() + .map(|span| format!(" → {}", span.format())) + .unwrap_or_default(); + + // Show definition header with location (only for multiple definitions) + if rule_summary.definitions.len() > 1 { + println!(" Definition #{}{}", def_idx + 1, def_location); + } + + // Show type and values + let bodies_available = has_bodies && !definition.bodies.is_empty(); + let show_bodies = bodies_available && !is_function_rule; + + if let Some(fact) = &definition.aggregated_head_fact { + print!(" Type: "); + print_type_descriptor(&fact.descriptor); + println!(); + + if show_bodies { + print_definition_bodies( + module_idx, + rule.as_ref(), + definition, + &result, + verbose, + ); + } else if bodies_available && is_function_rule { + if rule_summary.specializations.is_empty() { + println!(" Bodies: "); + } + } else if let ConstantValue::Known(value) = &fact.constant { + println!(" Constant: {}", serde_json::to_string(&value)?); + } + } else if show_bodies { + println!(" Type: "); + print_definition_bodies( + module_idx, + rule.as_ref(), + definition, + &result, + verbose, + ); + } else if bodies_available && is_function_rule { + println!(" Type: "); + if rule_summary.specializations.is_empty() { + println!(" Bodies: "); + } + } else { + println!(" Type: "); + } + + // In verbose mode, show additional details + if verbose { + if let Some(fact) = &definition.aggregated_head_fact { + if !fact.origins.is_empty() { + print!(" Origins: "); + for (i, origin) in fact.origins.iter().enumerate() { + if i > 0 { + print!(", "); + } + print!("{:?}:{:?}", origin.root, origin.path); + } + println!(); + } + } + } + } + + // Show specializations for function rules + if !rule_summary.specializations.is_empty() { + if verbose + && !rule_summary.aggregated_parameter_facts.is_empty() + && rule_summary + .aggregated_parameter_facts + .iter() + .any(|fact| fact.is_some()) + { + println!(" Parameter types (union):"); + for (arg_idx, fact) in + rule_summary.aggregated_parameter_facts.iter().enumerate() + { + match fact { + Some(fact) => println!( + " arg {}: {}", + arg_idx + 1, + format_fact_summary(fact) + ), + None => println!(" arg {}: ", arg_idx + 1), + } + } + } + + println!( + " Specializations: {}", + rule_summary.specializations.len() + ); + for spec in &rule_summary.specializations { + let signature_display = + format_specialization_signature(&rule_path, spec); + let head_display = spec + .head_fact + .as_ref() + .map(format_fact_summary) + .unwrap_or_else(|| "Unknown".to_owned()); + println!(" - {} → {}", signature_display, head_display); + + if let Some(constant) = &spec.constant_value { + println!(" Constant value: {}", constant); + } + + let body_entries = collect_specialization_bodies( + rule.as_ref(), + &rule_summary.definitions, + spec, + ); + if !body_entries.is_empty() { + println!(" Bodies:"); + for entry in &body_entries { + let location = entry + .body + .span + .as_ref() + .map(|span| format!(" ({})", span.format())) + .unwrap_or_default(); + + let fact = entry.specialized_fact.or(entry.fallback_fact); + println!( + " - {}{}: {}", + entry.label, + location, + render_body_value(fact, entry.body.is_constant) + ); + } + } + + if !verbose { + continue; + } + + let overlay = SpecializationFactOverlay::new(&result, &spec.expr_facts); + + for entry in &body_entries { + if let Some(verbose_info) = collect_body_verbose_info( + spec.signature.module_idx, + entry.definition_rule, + entry.body_idx, + &overlay, + ) { + print_body_verbose_details( + 9, + entry.label.as_str(), + &verbose_info, + ); + } + } + + if !spec.expr_facts.is_empty() { + println!(" Expression facts:"); + for (fact_module_idx, exprs) in &spec.expr_facts { + let module_label = modules + .get(*fact_module_idx as usize) + .and_then(|m| { + get_path_string(m.package.refr.as_ref(), Some("data")) + .ok() + }) + .unwrap_or_else(|| format!("module {}", fact_module_idx)); + println!(" [{}]", module_label); + for (expr_idx, expr_fact) in exprs { + println!( + " expr #{}: {}", + expr_idx, + format_fact_summary(expr_fact) + ); + } + } + } + } + } + + // Show dependencies in verbose mode + if verbose { + if !rule_summary.input_dependencies.is_empty() { + println!(" Input dependencies:"); + for origin in &rule_summary.input_dependencies { + println!(" - {}", format_origin(origin)); + } + } + + if !rule_summary.rule_dependencies.is_empty() { + println!(" Rule dependencies:"); + for dep in &rule_summary.rule_dependencies { + let kind_label = describe_dependency_kind(&dep.kind); + println!(" - {} ({})", dep.target, kind_label); + + if let Some(target_summary) = result.rules.by_path.get(&dep.target) + { + if !target_summary.specializations.is_empty() { + println!( + " Specializations: {}", + target_summary.specializations.len() + ); + for spec in &target_summary.specializations { + let sig_display = + format_specialization_signature(&dep.target, spec); + let head_display = spec + .head_fact + .as_ref() + .map(format_fact_summary) + .unwrap_or_else(|| "Unknown".to_owned()); + println!( + " • {} → {}", + sig_display, head_display + ); + } + } + } + } + } + } + } + } + } + + println!(); + } + + Ok(()) +} + +fn format_origin(origin: &SourceOrigin) -> String { + let mut path = match origin.root { + SourceRoot::Input => String::from("input"), + SourceRoot::Data => String::from("data"), + }; + + for segment in &origin.path { + match segment { + PathSegment::Field(name) => { + path.push('.'); + path.push_str(name); + } + PathSegment::Index(idx) => { + path.push('['); + path.push_str(&idx.to_string()); + path.push(']'); + } + PathSegment::Any => path.push_str("[*]"), + } + } + + if origin.derived { + path.push_str(" (derived)"); + } + + path +} + +fn describe_dependency_kind(kind: &DependencyKind) -> &'static str { + match kind { + DependencyKind::StaticCall => "static", + DependencyKind::DynamicCall => "dynamic", + DependencyKind::DefaultLink => "default link", + } +} + +fn format_specialization_signature(rule_path: &str, spec: &RuleSpecializationRecord) -> String { + let args: Vec = spec + .parameter_facts + .iter() + .map(format_fact_summary) + .collect(); + + let joined = if args.is_empty() { + String::new() + } else { + args.join(", ") + }; + + format!("{}({})", rule_path, joined) +} + +fn describe_type_descriptor(descriptor: &TypeDescriptor) -> String { + match descriptor { + TypeDescriptor::Schema(schema) => describe_schema(schema), + TypeDescriptor::Structural(st) => describe_structural_type(st), + } +} + +fn describe_schema(schema: &Schema) -> String { + describe_schema_type(schema.as_type()) +} + +fn describe_schema_type(ty: ®orus::schema::Type) -> String { + use regorus::schema::Type as SchemaType; + + match ty { + SchemaType::Any { .. } => "Any".to_owned(), + SchemaType::Integer { .. } => "Integer".to_owned(), + SchemaType::Number { .. } => "Number".to_owned(), + SchemaType::Boolean { .. } => "Boolean".to_owned(), + SchemaType::Null { .. } => "Null".to_owned(), + SchemaType::String { .. } => "String".to_owned(), + SchemaType::Array { items, .. } => { + format!("Array[{}]", describe_schema_type(items.as_type())) + } + SchemaType::Set { items, .. } => { + format!("Set[{}]", describe_schema_type(items.as_type())) + } + SchemaType::Object { + properties, + required, + additional_properties, + .. + } => { + if properties.is_empty() && additional_properties.is_none() { + return "Object".to_owned(); + } + + let mut parts: Vec = Vec::new(); + for (name, schema) in properties.iter() { + let is_required = required + .as_ref() + .map(|reqs| { + reqs.iter() + .any(|req_name| req_name.as_ref() == name.as_ref()) + }) + .unwrap_or(false); + + let optional_marker = if is_required { "" } else { "?" }; + parts.push(format!( + "{}{}: {}", + name, + optional_marker, + describe_schema_type(schema.as_type()) + )); + } + + if let Some(additional) = additional_properties { + if !matches!(additional.as_type(), SchemaType::Any { .. }) { + parts.push(format!( + "additional: {}", + describe_schema_type(additional.as_type()) + )); + } + } + + format!("Object{{{}}}", parts.join(", ")) + } + SchemaType::Const { value, .. } => value + .to_json_str() + .map(|s| format!("Const({s})")) + .unwrap_or_else(|_| "Const".to_owned()), + SchemaType::Enum { values, .. } => { + let rendered: Vec = values + .iter() + .map(|v| v.to_json_str().unwrap_or_else(|_| "?".to_owned())) + .collect(); + format!("Enum[{}]", rendered.join(" | ")) + } + SchemaType::AnyOf(variants) => { + let rendered: Vec = variants + .iter() + .map(|variant| describe_schema_type(variant.as_type())) + .collect(); + format!("AnyOf[{}]", rendered.join(" | ")) + } + } +} + +fn describe_structural_type(st: &StructuralType) -> String { + describe_structural_type_impl(st) +} + +fn describe_structural_type_impl(st: &StructuralType) -> String { + match st { + StructuralType::Any => "Any".to_owned(), + StructuralType::Boolean => "Boolean".to_owned(), + StructuralType::Number => "Number".to_owned(), + StructuralType::Integer => "Integer".to_owned(), + StructuralType::String => "String".to_owned(), + StructuralType::Null => "Null".to_owned(), + StructuralType::Array(elem) => { + format!("Array[{}]", describe_structural_type_impl(elem)) + } + StructuralType::Set(elem) => { + format!("Set[{}]", describe_structural_type_impl(elem)) + } + StructuralType::Object(shape) => { + if shape.fields.is_empty() { + "Object".to_owned() + } else { + let mut parts: Vec = Vec::new(); + for (name, ty) in &shape.fields { + parts.push(format!("{}: {}", name, describe_structural_type_impl(ty))); + } + format!("Object{{{}}}", parts.join(", ")) + } + } + StructuralType::Union(types) => { + let rendered: Vec = types.iter().map(describe_structural_type_impl).collect(); + format!("Union[{}]", rendered.join(", ")) + } + StructuralType::Enum(values) => { + let rendered: Vec = values + .iter() + .map(|value| { + serde_json::to_string(value).unwrap_or_else(|_| format!("{:?}", value)) + }) + .collect(); + format!("Enum[{}]", rendered.join(", ")) + } + StructuralType::Unknown => "Unknown".to_owned(), + } +} + +fn print_type_descriptor(descriptor: &TypeDescriptor) { + let rendered = describe_type_descriptor(descriptor); + print!("{}", rendered); +} + +#[allow(dead_code)] +fn print_structural_type(st: &StructuralType) { + print_structural_type_impl(st); +} + +fn print_structural_type_impl(st: &StructuralType) { + match st { + StructuralType::Any => print!("Any"), + StructuralType::Boolean => print!("Boolean"), + StructuralType::Number => print!("Number"), + StructuralType::Integer => print!("Integer"), + StructuralType::String => print!("String"), + StructuralType::Null => print!("Null"), + StructuralType::Array(elem) => { + print!("Array["); + print_structural_type_impl(elem); + print!("]"); + } + StructuralType::Set(elem) => { + print!("Set["); + print_structural_type_impl(elem); + print!("]"); + } + StructuralType::Object(shape) => { + if shape.fields.is_empty() { + print!("Object"); + } else { + print!("Object{{"); + for (i, (name, ty)) in shape.fields.iter().enumerate() { + if i > 0 { + print!(", "); + } + print!("{}: ", name); + print_structural_type_impl(ty); + } + print!("}}"); + } + } + StructuralType::Union(types) => { + print!("Union["); + for (i, t) in types.iter().enumerate() { + if i > 0 { + print!(", "); + } + print_structural_type_impl(t); + } + print!("]"); + } + StructuralType::Enum(values) => { + print!("Enum["); + for (i, v) in values.iter().enumerate() { + if i > 0 { + print!(", "); + } + if let Ok(s) = v.as_string() { + print!("{:?}", s.as_ref()); + } else { + print!( + "{}", + serde_json::to_string(v).unwrap_or_else(|_| "?".to_string()) + ); + } + } + print!("]"); + } + StructuralType::Unknown => print!("Unknown"), + } +} + +trait ExprFactSource { + fn get_expr_fact(&self, module_idx: u32, expr_idx: u32) -> Option; +} + +impl ExprFactSource for TypeAnalysisResult { + fn get_expr_fact(&self, module_idx: u32, expr_idx: u32) -> Option { + self.expressions + .facts + .get_expr(module_idx, expr_idx) + .cloned() + } +} + +struct SpecializationFactOverlay<'a> { + base: &'a TypeAnalysisResult, + overrides: &'a BTreeMap>, +} + +impl<'a> SpecializationFactOverlay<'a> { + fn new( + base: &'a TypeAnalysisResult, + overrides: &'a BTreeMap>, + ) -> Self { + Self { base, overrides } + } +} + +impl<'a> ExprFactSource for SpecializationFactOverlay<'a> { + fn get_expr_fact(&self, module_idx: u32, expr_idx: u32) -> Option { + if let Some(exprs) = self.overrides.get(&module_idx) { + if let Some(fact) = exprs.get(&expr_idx) { + return Some(fact.clone()); + } + } + + self.base.get_expr_fact(module_idx, expr_idx) + } +} + +struct RuleVerboseInfo { + locals: Vec, + statements: Vec, +} + +#[derive(Clone)] +struct LocalDisplay { + name: String, + fact: Option, +} + +struct StatementDisplay { + summary: String, + fact_lines: Vec, +} + +struct LocalCollector { + order: Vec, + entries: BTreeMap, +} + +impl LocalCollector { + fn new() -> Self { + Self { + order: Vec::new(), + entries: BTreeMap::new(), + } + } + + fn note(&mut self, name: String, fact: Option) { + let entry = self.entries.entry(name.clone()).or_insert_with(|| { + self.order.push(name.clone()); + LocalDisplay { name, fact: None } + }); + + if let Some(fact) = fact { + entry.fact = Some(fact); + } + } + + fn into_vec(self) -> Vec { + let mut displays = Vec::new(); + let LocalCollector { order, entries } = self; + for name in order { + if let Some(entry) = entries.get(&name) { + displays.push(entry.clone()); + } + } + displays + } +} + +fn process_query_statements( + module_idx: u32, + query: &Query, + facts: &impl ExprFactSource, + indent: usize, + locals: &mut LocalCollector, +) -> Vec { + let mut out = Vec::new(); + for stmt in &query.stmts { + out.extend(process_literal(module_idx, stmt, facts, indent, locals)); + } + out +} + +fn process_literal( + module_idx: u32, + stmt: &LiteralStmt, + facts: &impl ExprFactSource, + indent: usize, + locals: &mut LocalCollector, +) -> Vec { + let mut displays = Vec::new(); + let indent_str = " ".repeat(indent); + let summary = format!("{}{}", indent_str, summarize_literal(&stmt.literal)); + let mut fact_lines = Vec::new(); + + match &stmt.literal { + Literal::SomeVars { vars, .. } => { + for span in vars { + locals.note(span.text().to_owned(), None); + } + } + Literal::SomeIn { + key, + value, + collection, + .. + } => { + if let Some(key_expr) = key { + if let Some(name) = extract_var_name(key_expr.as_ref()) { + let fact = get_fact(facts, module_idx, key_expr.as_ref()); + if let Some(fact) = fact.clone() { + fact_lines.push(format_fact_entry("key", &fact)); + } + locals.note(name, fact); + } else { + collect_expr_fact_lines( + module_idx, + key_expr.as_ref(), + facts, + &mut fact_lines, + locals, + ); + } + } + + if let Some(name) = extract_var_name(value.as_ref()) { + let fact = get_fact(facts, module_idx, value.as_ref()); + if let Some(fact) = fact.clone() { + fact_lines.push(format_fact_entry("value", &fact)); + } + locals.note(name, fact); + } else { + collect_expr_fact_lines(module_idx, value.as_ref(), facts, &mut fact_lines, locals); + } + + if let Some(fact) = get_fact(facts, module_idx, collection.as_ref()) { + fact_lines.push(format_fact_entry("collection", &fact)); + } + } + Literal::Expr { expr, .. } => { + collect_expr_fact_lines(module_idx, expr.as_ref(), facts, &mut fact_lines, locals); + } + Literal::NotExpr { expr, .. } => { + collect_expr_fact_lines(module_idx, expr.as_ref(), facts, &mut fact_lines, locals); + } + Literal::Every { + key, + value, + domain, + query, + .. + } => { + if let Some(key_span) = key { + locals.note(key_span.text().to_owned(), None); + } + locals.note(value.text().to_owned(), None); + + if let Some(fact) = get_fact(facts, module_idx, domain.as_ref()) { + fact_lines.push(format_fact_entry("domain", &fact)); + } + + let mut nested = + process_query_statements(module_idx, query.as_ref(), facts, indent + 2, locals); + displays.append(&mut nested); + } + } + + displays.push(StatementDisplay { + summary, + fact_lines, + }); + displays +} + +fn summarize_literal(literal: &Literal) -> String { + match literal { + Literal::SomeVars { vars, .. } => { + let names: Vec = vars.iter().map(|span| span.text().to_owned()).collect(); + format!("some {}", names.join(", ")) + } + Literal::SomeIn { + key, + value, + collection, + .. + } => { + let value_part = format_expr(value.as_ref()); + let key_part = key + .as_ref() + .map(|expr| format_expr(expr.as_ref())) + .map(|k| format!("{}, ", k)) + .unwrap_or_default(); + let collection_part = format_expr(collection.as_ref()); + format!("some {}{} in {}", key_part, value_part, collection_part) + } + Literal::Expr { expr, .. } => format_expr(expr.as_ref()), + Literal::NotExpr { expr, .. } => format!("not {}", format_expr(expr.as_ref())), + Literal::Every { + key, value, domain, .. + } => { + let key_part = key + .as_ref() + .map(|span| format!("{}, ", span.text())) + .unwrap_or_default(); + let value_part = value.text(); + let domain_part = format_expr(domain.as_ref()); + format!("every {}{} in {}", key_part, value_part, domain_part) + } + } +} + +fn format_expr(expr: &Expr) -> String { + match expr { + Expr::Var { value, .. } => value + .as_string() + .map(|s| s.as_ref().to_owned()) + .unwrap_or_else(|_| "".to_owned()), + Expr::String { value, .. } + | Expr::RawString { value, .. } + | Expr::Number { value, .. } + | Expr::Bool { value, .. } + | Expr::Null { value, .. } => { + serde_json::to_string(value).unwrap_or_else(|_| "".to_owned()) + } + Expr::Array { items, .. } => { + let parts: Vec = items + .iter() + .map(|item| format_expr(item.as_ref())) + .collect(); + format!("[{}]", parts.join(", ")) + } + Expr::Set { items, .. } => { + let parts: Vec = items + .iter() + .map(|item| format_expr(item.as_ref())) + .collect(); + format!("{{{}}}", parts.join(", ")) + } + Expr::Object { fields, .. } => { + let mut parts = Vec::new(); + for (name_span, key_expr, value_expr) in fields { + let key_text = name_span.text(); + let key = if key_text.is_empty() { + format_expr(key_expr.as_ref()) + } else { + key_text.to_owned() + }; + parts.push(format!("{}: {}", key, format_expr(value_expr.as_ref()))); + } + format!("{{{}}}", parts.join(", ")) + } + Expr::ArrayCompr { term, .. } => format!("[{} | ...]", format_expr(term.as_ref())), + Expr::SetCompr { term, .. } => format!("{{{} | ...}}", format_expr(term.as_ref())), + Expr::ObjectCompr { key, value, .. } => format!( + "{{{}: {} | ...}}", + format_expr(key.as_ref()), + format_expr(value.as_ref()) + ), + Expr::Call { fcn, params, .. } => { + let args: Vec = + params.iter().map(|p| format_expr(p.as_ref())).collect(); + format!("{}({})", format_expr(fcn.as_ref()), args.join(", ")) + } + Expr::UnaryExpr { expr, .. } => { + format!("unary({})", format_expr(expr.as_ref())) + } + Expr::RefDot { refr, field, .. } => { + let base = format_expr(refr.as_ref()); + let field_name = field + .as_ref() + .map(|(span, value)| { + value + .as_string() + .map(|s| s.as_ref().to_owned()) + .unwrap_or_else(|_| { + let text = span.text(); + if text.is_empty() { + "".to_owned() + } else { + text.to_owned() + } + }) + }) + .unwrap_or_else(|| "".to_owned()); + format!("{}.{}", base, field_name) + } + Expr::RefBrack { refr, index, .. } => { + format!( + "{}[{}]", + format_expr(refr.as_ref()), + format_expr(index.as_ref()) + ) + } + Expr::BinExpr { op, lhs, rhs, .. } => format!( + "{} {} {}", + format_expr(lhs.as_ref()), + format_bin_op(op), + format_expr(rhs.as_ref()) + ), + Expr::BoolExpr { op, lhs, rhs, .. } => format!( + "{} {} {}", + format_expr(lhs.as_ref()), + format_bool_op(op), + format_expr(rhs.as_ref()) + ), + Expr::ArithExpr { op, lhs, rhs, .. } => format!( + "{} {} {}", + format_expr(lhs.as_ref()), + format_arith_op(op), + format_expr(rhs.as_ref()) + ), + Expr::AssignExpr { lhs, op, rhs, .. } => format!( + "{} {} {}", + format_expr(lhs.as_ref()), + format_assign_op(op), + format_expr(rhs.as_ref()) + ), + Expr::Membership { + key, + value, + collection, + .. + } => { + let collection_text = format_expr(collection.as_ref()); + let value_text = format_expr(value.as_ref()); + if let Some(key_expr) = key { + format!( + "{}, {} in {}", + format_expr(key_expr.as_ref()), + value_text, + collection_text + ) + } else { + format!("{} in {}", value_text, collection_text) + } + } + #[cfg(feature = "rego-extensions")] + Expr::OrExpr { lhs, rhs, .. } => format!( + "{} or {}", + format_expr(lhs.as_ref()), + format_expr(rhs.as_ref()) + ), + } +} + +fn format_bool_op(op: &BoolOp) -> &'static str { + match op { + BoolOp::Lt => "<", + BoolOp::Le => "<=", + BoolOp::Eq => "==", + BoolOp::Ge => ">=", + BoolOp::Gt => ">", + BoolOp::Ne => "!=", + } +} + +fn format_arith_op(op: &ArithOp) -> &'static str { + match op { + ArithOp::Add => "+", + ArithOp::Sub => "-", + ArithOp::Mul => "*", + ArithOp::Div => "/", + ArithOp::Mod => "%", + } +} + +fn format_bin_op(op: &BinOp) -> &'static str { + match op { + BinOp::Intersection => "∩", + BinOp::Union => "∪", + } +} + +fn format_assign_op(op: &AssignOp) -> &'static str { + match op { + AssignOp::Eq => ":=", + AssignOp::ColEq => ":=", + } +} + +fn collect_expr_fact_lines( + module_idx: u32, + expr: &Expr, + facts: &impl ExprFactSource, + fact_lines: &mut Vec, + locals: &mut LocalCollector, +) { + fn record_fact_line( + label: &str, + module_idx: u32, + expr: &Expr, + facts: &impl ExprFactSource, + fact_lines: &mut Vec, + locals: &mut LocalCollector, + ) -> Option { + if let Some(fact) = get_fact(facts, module_idx, expr) { + if let Some(name) = extract_var_name(expr) { + locals.note(name, Some(fact.clone())); + } + fact_lines.push(format_fact_entry(label, &fact)); + Some(fact) + } else { + if let Some(name) = extract_var_name(expr) { + locals.note(name, None); + } + None + } + } + + match expr { + Expr::Var { .. } => { + let _ = record_fact_line("var", module_idx, expr, facts, fact_lines, locals); + } + Expr::AssignExpr { lhs, rhs, .. } => { + let _ = record_fact_line("rhs", module_idx, rhs.as_ref(), facts, fact_lines, locals); + let _ = record_fact_line("lhs", module_idx, lhs.as_ref(), facts, fact_lines, locals); + let _ = record_fact_line("expr", module_idx, expr, facts, fact_lines, locals); + } + Expr::BoolExpr { lhs, rhs, .. } + | Expr::BinExpr { lhs, rhs, .. } + | Expr::ArithExpr { lhs, rhs, .. } => { + let _ = record_fact_line("lhs", module_idx, lhs.as_ref(), facts, fact_lines, locals); + let _ = record_fact_line("rhs", module_idx, rhs.as_ref(), facts, fact_lines, locals); + let _ = record_fact_line("expr", module_idx, expr, facts, fact_lines, locals); + } + Expr::Call { params, .. } => { + let _ = record_fact_line("call", module_idx, expr, facts, fact_lines, locals); + for (idx, param) in params.iter().enumerate() { + let _ = record_fact_line( + &format!("arg{}", idx), + module_idx, + param.as_ref(), + facts, + fact_lines, + locals, + ); + } + } + Expr::RefDot { refr, .. } => { + let _ = record_fact_line("expr", module_idx, expr, facts, fact_lines, locals); + let _ = record_fact_line("base", module_idx, refr.as_ref(), facts, fact_lines, locals); + } + Expr::RefBrack { refr, index, .. } => { + let _ = record_fact_line("expr", module_idx, expr, facts, fact_lines, locals); + let base_fact = + record_fact_line("base", module_idx, refr.as_ref(), facts, fact_lines, locals); + let index_fact = record_fact_line( + "index", + module_idx, + index.as_ref(), + facts, + fact_lines, + locals, + ); + if index_fact.is_none() { + if let Some(name) = extract_var_name(index.as_ref()) { + if let Some(fallback) = derive_index_fact(base_fact.as_ref(), index.as_ref()) { + locals.note(name.clone(), Some(fallback.clone())); + fact_lines.push(format_fact_entry("index", &fallback)); + } else { + locals.note(name, None); + } + } + } + } + _ => { + let _ = record_fact_line("expr", module_idx, expr, facts, fact_lines, locals); + } + } +} + +fn get_fact(facts: &impl ExprFactSource, module_idx: u32, expr: &Expr) -> Option { + facts.get_expr_fact(module_idx, expr.eidx()) +} + +fn derive_index_fact(base_fact: Option<&TypeFact>, index_expr: &Expr) -> Option { + if !matches!(index_expr, Expr::Var { .. }) { + return None; + } + + let base_fact = base_fact?; + let structural_type = match &base_fact.descriptor { + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(st) => st.clone(), + }; + + let mut inferred = match structural_type { + StructuralType::Array(_) | StructuralType::Set(_) => StructuralType::Integer, + StructuralType::Object(_) => StructuralType::String, + _ => StructuralType::Any, + }; + + if matches!(inferred, StructuralType::Any) { + let has_field_segment = base_fact.origins.iter().any(|origin| { + origin + .path + .iter() + .any(|segment| matches!(segment, PathSegment::Field(_))) + }); + inferred = if has_field_segment { + StructuralType::String + } else { + StructuralType::Integer + }; + } + + let mut fact = TypeFact { + descriptor: TypeDescriptor::Structural(inferred), + constant: ConstantValue::Unknown, + provenance: TypeProvenance::Propagated, + origins: mark_origins_derived(&base_fact.origins), + specialization_hits: Vec::new(), + }; + + if fact.origins.is_empty() { + fact.origins = base_fact.origins.clone(); + } + + Some(fact) +} + +fn mark_origins_derived(origins: &[SourceOrigin]) -> Vec { + origins + .iter() + .map(|origin| { + let mut updated = origin.clone(); + updated.derived = true; + updated + }) + .collect() +} + +fn extract_var_name(expr: &Expr) -> Option { + if let Expr::Var { value, .. } = expr { + value.as_string().map(|s| s.as_ref().to_owned()).ok() + } else { + None + } +} + +fn format_local_fact_suffix(fact: &Option) -> String { + match fact { + Some(fact) => format!(" :: {}", format_fact_summary(fact)), + None => " :: ".to_owned(), + } +} + +fn format_fact_entry(label: &str, fact: &TypeFact) -> String { + format!("{}: {}", label, format_fact_summary(fact)) +} + +fn format_fact_summary(fact: &TypeFact) -> String { + let mut parts = Vec::new(); + parts.push(format!( + "type={}", + describe_type_descriptor(&fact.descriptor) + )); + + if let ConstantValue::Known(value) = &fact.constant { + if let Ok(rendered) = serde_json::to_string(value) { + parts.push(format!("const={}", rendered)); + } + } + + parts.push(format!("prov={:?}", fact.provenance)); + + if !fact.origins.is_empty() { + let origin_texts: Vec = fact.origins.iter().map(format_origin).collect(); + parts.push(format!("origins={}", origin_texts.join(" | "))); + } + + parts.join(", ") +} + +fn format_body_label(body: &RuleBodySummary) -> String { + match body.kind { + RuleBodyKind::Primary => format!("body #{}", body.body_idx + 1), + RuleBodyKind::Else => format!("else body #{}", body.body_idx), + } +} + +fn render_body_value(fact: Option<&TypeFact>, is_constant: bool) -> String { + let label = if is_constant { "Constant" } else { "Value" }; + + match fact { + Some(fact) => { + if let ConstantValue::Known(value) = &fact.constant { + if let Ok(rendered) = serde_json::to_string(value) { + return format!("{}: {}", label, rendered); + } + } + + format!("{}: {}", label, describe_type_descriptor(&fact.descriptor)) + } + None => format!("{}: Unknown", label), + } +} + +struct SpecializationBodyEntry<'a> { + label: String, + body: &'a RuleBodySummary, + specialized_fact: Option<&'a TypeFact>, + fallback_fact: Option<&'a TypeFact>, + definition_rule: &'a Rule, + body_idx: usize, +} + +fn collect_specialization_bodies<'a>( + rule: &'a Rule, + definitions: &'a [DefinitionSummary], + spec: &'a RuleSpecializationRecord, +) -> Vec> { + let module_facts = spec.expr_facts.get(&spec.signature.module_idx); + let mut entries = Vec::new(); + + for definition in definitions { + if definition.bodies.is_empty() { + continue; + } + + for (idx, body) in definition.bodies.iter().enumerate() { + let specialized_fact = body + .value_expr_idx + .and_then(|expr_idx| module_facts.and_then(|facts| facts.get(&expr_idx))); + + entries.push(SpecializationBodyEntry { + label: format_body_label(body), + body, + specialized_fact, + fallback_fact: body.value_fact.as_ref(), + definition_rule: rule, + body_idx: idx, + }); + } + } + + entries +} + +fn collect_body_verbose_info( + module_idx: u32, + rule: &Rule, + body_idx: usize, + facts: &impl ExprFactSource, +) -> Option { + match rule { + Rule::Spec { bodies, .. } => bodies.get(body_idx).map(|body| { + let mut locals = LocalCollector::new(); + let mut statements = Vec::new(); + + if let Some(assign) = &body.assign { + let expr = assign.value.as_ref(); + let mut fact_lines = Vec::new(); + collect_expr_fact_lines(module_idx, expr, facts, &mut fact_lines, &mut locals); + statements.push(StatementDisplay { + summary: format!("assign {}", format_expr(expr)), + fact_lines, + }); + } + + statements.extend(process_query_statements( + module_idx, + body.query.as_ref(), + facts, + 0, + &mut locals, + )); + + RuleVerboseInfo { + locals: locals.into_vec(), + statements, + } + }), + _ => None, + } +} + +fn print_definition_bodies( + module_idx: usize, + rule: &Rule, + definition: &DefinitionSummary, + result: &TypeAnalysisResult, + verbose: bool, +) { + if definition.bodies.is_empty() { + return; + } + + println!(" Bodies:"); + let module_idx = module_idx as u32; + + for body in &definition.bodies { + let label = format_body_label(body); + let location = body + .span + .as_ref() + .map(|span| format!(" ({})", span.format())) + .unwrap_or_default(); + println!( + " - {}{}: {}", + label.as_str(), + location, + render_body_value(body.value_fact.as_ref(), body.is_constant) + ); + + if verbose { + if let Some(verbose_info) = + collect_body_verbose_info(module_idx, rule, body.body_idx, result) + { + print_body_verbose_details(7, label.as_str(), &verbose_info); + } + } + } +} + +fn print_body_verbose_details(indent: usize, label: &str, info: &RuleVerboseInfo) { + if info.locals.is_empty() && info.statements.is_empty() { + return; + } + + let prefix = " ".repeat(indent); + println!("{}{} details:", prefix, label); + + if !info.locals.is_empty() { + let locals_prefix = " ".repeat(indent + 2); + println!("{}Locals:", locals_prefix); + let entry_prefix = " ".repeat(indent + 4); + for local in &info.locals { + println!( + "{}- {}{}", + entry_prefix, + local.name, + format_local_fact_suffix(&local.fact) + ); + } + } + + if !info.statements.is_empty() { + let statements_prefix = " ".repeat(indent + 2); + println!("{}Statements:", statements_prefix); + let entry_prefix = " ".repeat(indent + 4); + for statement in &info.statements { + println!("{}- {}", entry_prefix, statement.summary); + if !statement.fact_lines.is_empty() { + let fact_prefix = " ".repeat(indent + 6); + for fact_line in &statement.fact_lines { + println!("{}{}", fact_prefix, fact_line); + } + } + } + } +} diff --git a/examples/regorus.rs b/examples/regorus.rs index 10bb3061..891c5fd3 100644 --- a/examples/regorus.rs +++ b/examples/regorus.rs @@ -3,6 +3,8 @@ use anyhow::{anyhow, bail, Result}; +mod helpers; + #[allow(dead_code)] fn read_file(path: &String) -> Result { std::fs::read_to_string(path).map_err(|_| anyhow!("could not read {path}")) @@ -25,7 +27,7 @@ fn read_value_from_json_file(path: &String) -> Result { regorus::Value::from_json_str(&read_file(path)?) } -fn add_policy_from_file(engine: &mut regorus::Engine, path: String) -> Result { +pub(crate) fn add_policy_from_file(engine: &mut regorus::Engine, path: String) -> Result { #[cfg(feature = "std")] return engine.add_policy_from_file(path); @@ -267,6 +269,37 @@ enum RegorusCommand { #[arg(long)] v0: bool, }, + + /// Analyze types in Rego policies. + Analyze { + /// Directories containing Rego files. + #[arg(long, short, value_name = "bundle")] + bundles: Vec, + + /// Policy files. + #[arg(long, short, value_name = "policy.rego")] + data: Vec, + + /// Input schema file (JSON). + #[arg(long, value_name = "schema.json")] + input_schema: Option, + + /// Data schema file (JSON). + #[arg(long, value_name = "schema.json")] + data_schema: Option, + + /// Only analyze rules reachable from these paths (e.g., data.package.rule). + #[arg(long, short = 'e', value_name = "RULE_PATH")] + entrypoints: Vec, + + /// Turn on Rego language v0. + #[arg(long)] + v0: bool, + + /// Verbose output (show dependencies and origins). + #[arg(long, short)] + verbose: bool, + }, } #[derive(clap::Parser)] @@ -306,5 +339,22 @@ fn main() -> Result<()> { RegorusCommand::Lex { file, verbose } => rego_lex(file, verbose), RegorusCommand::Parse { file, v0 } => rego_parse(file, v0), RegorusCommand::Ast { file } => rego_ast(file), + RegorusCommand::Analyze { + bundles, + data, + input_schema, + data_schema, + entrypoints, + v0, + verbose, + } => helpers::type_analysis::rego_type_analysis( + &bundles, + &data, + input_schema, + data_schema, + entrypoints, + v0, + verbose, + ), } } diff --git a/examples/server/allowed_server.rego b/examples/server/allowed_server.rego index b8c0e79b..1e6fdd0e 100644 --- a/examples/server/allowed_server.rego +++ b/examples/server/allowed_server.rego @@ -1,26 +1,29 @@ package example -default allow := false # unless otherwise defined, allow is false +#default allow := false # unless otherwise defined, allow is false -allow := true if { # allow is true if... - count(violation) == 0 # there are zero violations. +allow := r if { # allow is true if... + r := { + "allow": count(violation) == 0, # there are zero violations. + "violations": violation # return the violations found. + } } -violation[server.id] if { # a server is in the violation set if... +violation contains server.id if { # a server is in the violation set if... some server public_server[server] # it exists in the 'public_server' set and... server.protocols[_] == "http" # it contains the insecure "http" protocol. } -violation[server.id] if { # a server is in the violation set if... +violation contains server.id if { # a server is in the violation set if... server := input.servers[_] # it exists in the input.servers collection and... server.protocols[_] == "telnet" # it contains the "telnet" protocol. } -public_server[server]if { # a server exists in the public_server set if... +public_server contains server if { # a server exists in the public_server set if... some i, j server := input.servers[_] # it exists in the input.servers collection and... server.ports[_] == input.ports[i].id # it references a port in the input.ports collection and... input.ports[i].network == input.networks[j].id # the port references a network in the input.networks collection and... input.networks[j].public # the network is public. -} \ No newline at end of file +} diff --git a/examples/server/input.schema.json b/examples/server/input.schema.json new file mode 100644 index 00000000..56527897 --- /dev/null +++ b/examples/server/input.schema.json @@ -0,0 +1,72 @@ +{ + "type": "object", + "properties": { + "servers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the server" + }, + "name": { + "type": "string", + "description": "Human-readable server name" + }, + "protocols": { + "type": "array", + "items": { + "enum": ["http", "https", "ssh", "telnet", "ftp", "sftp"], + "description": "Protocol name" + }, + "description": "List of protocols supported by the server" + }, + "ports": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of port IDs this server is listening on" + } + }, + "required": ["id", "protocols", "ports"] + } + }, + "ports": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the port" + }, + "network": { + "type": "string", + "description": "Network ID this port belongs to" + } + }, + "required": ["id", "network"] + } + }, + "networks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the network" + }, + "public": { + "type": "boolean", + "description": "Whether this network is publicly accessible" + } + }, + "required": ["id", "public"] + } + } + }, + "required": ["servers", "ports", "networks"] +} diff --git a/src/ast.rs b/src/ast.rs index 4e16f500..59398865 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -202,7 +202,8 @@ pub enum Expr { RefDot { span: Span, refr: Ref, - field: (Span, Value), + #[cfg_attr(feature = "ast", serde(skip_serializing_if = "Option::is_none"))] + field: Option<(Span, Value)>, eidx: u32, }, @@ -474,6 +475,41 @@ pub struct Module { pub num_statements: u32, // Number of queries in the module. pub num_queries: u32, + // Position lookup table: maps (byte_offset) -> expr_idx for quick hover/completion + #[cfg_attr(feature = "ast", serde(skip))] + pub expr_positions: alloc::vec::Vec<(u32, u32, u32)>, // (line, col, eidx) - 1-based } pub type ExprRef = Ref; + +impl Module { + /// Find the expression at a given position using the position lookup table. + /// Returns the expr_idx of the expression at or just before the position. + /// Line and column are 1-based (matching Span convention). + /// Uses binary search for O(log n) performance. + /// + /// Note: VS Code/LSP provides Position with 0-based line/character. + /// Callers must convert: `find_expr_at_position(vscode_line + 1, vscode_char + 1)` + /// + /// Strategy: Return the rightmost (most recently parsed) expression at or before + /// the cursor position. When multiple expressions start at the same position (e.g., + /// nested expressions), the later ones in the table are the outer/parent expressions, + /// so we return the last match which gives us the innermost context for hover. + pub fn find_expr_at_position(&self, line: usize, col: usize) -> Option { + let line = line as u32; + let col = col as u32; + + // Binary search for the rightmost expression at or before (line, col). + // The table is sorted by (line, col), with eidx as tiebreaker for stability. + let idx = self + .expr_positions + .partition_point(|(eline, ecol, _)| *eline < line || (*eline == line && *ecol <= col)); + + // partition_point returns the index where we'd insert, so we need idx - 1 + if idx > 0 { + self.expr_positions.get(idx - 1).map(|(_, _, eidx)| *eidx) + } else { + None + } + } +} diff --git a/src/engine.rs b/src/engine.rs index 3f90f04d..7cc2b93a 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2,11 +2,12 @@ // Licensed under the MIT License. use crate::ast::*; -use crate::compiled_policy::CompiledPolicy; +use crate::compiled_policy::{CompiledPolicy, CompiledPolicyData}; use crate::interpreter::*; use crate::lexer::*; use crate::parser::*; use crate::scheduler::*; +use crate::type_checker::TypeChecker; use crate::utils::gather_functions; use crate::value::*; use crate::*; @@ -22,6 +23,8 @@ pub struct Engine { interpreter: Interpreter, prepared: bool, rego_v1: bool, + type_checker: Option, + tolerant_parse: bool, } #[cfg(feature = "azure_policy")] @@ -60,6 +63,14 @@ impl Default for Engine { } } +/// Bundled artifacts used by the type analyzer when operating on an engine. +pub(crate) type TypeAnalysisContext = ( + Rc>>, + Option>, + Option>, + Rc, +); + impl Engine { /// Create an instance of [Engine]. pub fn new() -> Self { @@ -68,6 +79,23 @@ impl Engine { interpreter: Interpreter::new(), prepared: false, rego_v1: true, + type_checker: None, + tolerant_parse: false, + } + } + + /// Create an engine seeded with an existing module set. + /// + /// This is used internally by components that already hold parsed modules + /// and want to reuse the engine's preparation pipeline without reparsing. + pub(crate) fn new_with_modules(modules: Rc>>) -> Self { + Self { + modules, + interpreter: Interpreter::new(), + prepared: false, + rego_v1: true, + type_checker: None, + tolerant_parse: false, } } @@ -100,6 +128,11 @@ impl Engine { self.rego_v1 = !rego_v0; } + /// Enable or disable tolerant parsing. Intended for IDE scenarios. + pub fn set_tolerant_parsing(&mut self, enable: bool) { + self.tolerant_parse = enable; + } + /// Add a policy. /// /// The policy file will be parsed and converted to AST representation. @@ -133,6 +166,9 @@ impl Engine { Rc::make_mut(&mut self.modules).push(module.clone()); // if policies change, interpreter needs to be prepared again self.prepared = false; + if let Some(type_checker) = self.type_checker.as_mut() { + type_checker.set_modules(self.modules.clone()); + } Interpreter::get_path_string(&module.package.refr, Some("data")) } @@ -166,6 +202,9 @@ impl Engine { Rc::make_mut(&mut self.modules).push(module.clone()); // if policies change, interpreter needs to be prepared again self.prepared = false; + if let Some(type_checker) = self.type_checker.as_mut() { + type_checker.set_modules(self.modules.clone()); + } Interpreter::get_path_string(&module.package.refr, Some("data")) } @@ -381,6 +420,152 @@ impl Engine { self.add_data(Value::from_json_str(data_json)?) } + /// Enable type checking for the policies. + /// + /// When enabled, the engine will create a TypeChecker and run type analysis + /// during policy preparation. This can help catch type errors early. + /// + /// # Example + /// ``` + /// # use regorus::*; + /// # fn main() -> anyhow::Result<()> { + /// let mut engine = Engine::new(); + /// engine.enable_type_checking(); + /// + /// engine.add_policy( + /// "test.rego".to_string(), + /// r#" + /// package test + /// allow = input.value > 10 + /// "#.to_string())?; + /// + /// // Type analysis will run during prepare_for_eval + /// engine.eval_query("data.test.allow".to_string(), false)?; + /// + /// // Access type analysis results + /// if let Some(checker) = engine.get_type_checker() { + /// println!("Type analysis found {} diagnostics", + /// checker.diagnostic_count().unwrap_or(0)); + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn enable_type_checking(&mut self) { + if self.type_checker.is_none() { + self.type_checker = Some(TypeChecker::new(self.modules.clone())); + } + } + + /// Disable type checking. + /// + /// This will remove the TypeChecker and prevent type analysis from running. + pub fn disable_type_checking(&mut self) { + self.type_checker = None; + } + + /// Check if type checking is enabled. + pub fn is_type_checking_enabled(&self) -> bool { + self.type_checker.is_some() + } + + /// Get a reference to the TypeChecker if type checking is enabled. + /// + /// Returns `None` if type checking hasn't been enabled via [`enable_type_checking()`](Self::enable_type_checking). + /// + /// # Example + /// ``` + /// # use regorus::*; + /// # fn main() -> anyhow::Result<()> { + /// let mut engine = Engine::new(); + /// engine.enable_type_checking(); + /// + /// engine.add_policy( + /// "test.rego".to_string(), + /// "package test\nallow = true".to_string())?; + /// + /// // Prepare the engine (this will run type analysis) + /// engine.eval_query("data.test.allow".to_string(), false)?; + /// + /// if let Some(checker) = engine.get_type_checker() { + /// if let Some(result) = checker.get_result() { + /// println!("Diagnostics: {:?}", result.diagnostics); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn get_type_checker(&self) -> Option<&TypeChecker> { + self.type_checker.as_ref() + } + + /// Get a mutable reference to the TypeChecker if type checking is enabled. + /// + /// This allows setting input/data schemas on the type checker. + /// + /// # Example + /// ``` + /// # use regorus::*; + /// # fn main() -> anyhow::Result<()> { + /// let mut engine = Engine::new(); + /// engine.enable_type_checking(); + /// + /// // Set input schema + /// # #[cfg(feature = "jsonschema")] + /// if let Some(checker) = engine.get_type_checker_mut() { + /// let schema = Schema::from_json_str( + /// r#"{"type": "object", "properties": {"value": {"type": "integer"}}}"# + /// ).map_err(|e| anyhow::anyhow!("{e}"))?; + /// checker.set_input_schema(schema); + /// } + /// + /// engine.add_policy( + /// "test.rego".to_string(), + /// "package test\nallow = input.value > 10".to_string())?; + /// + /// // Type analysis will use the schema + /// engine.eval_query("data.test.allow".to_string(), false)?; + /// # Ok(()) + /// # } + /// ``` + pub fn get_type_checker_mut(&mut self) -> Option<&mut TypeChecker> { + self.type_checker.as_mut() + } + + /// Run type checking on the loaded policies. + /// + /// This is a convenience method that enables type checking if not already enabled, + /// runs the type analysis, and returns any diagnostics found. + /// + /// # Example + /// ``` + /// # use regorus::*; + /// # fn main() -> anyhow::Result<()> { + /// let mut engine = Engine::new(); + /// + /// engine.add_policy( + /// "test.rego".to_string(), + /// "package test\nallow = true".to_string())?; + /// + /// // Run type checking + /// let diagnostics = engine.type_check()?; + /// println!("Found {} type issues", diagnostics.len()); + /// # Ok(()) + /// # } + /// ``` + pub fn type_check(&mut self) -> Result> { + self.enable_type_checking(); + + if let Some(checker) = self.type_checker.as_mut() { + checker.check()?; + Ok(checker + .get_result() + .map(|r| r.diagnostics.clone()) + .unwrap_or_default()) + } else { + Ok(vec![]) + } + } + /// Set whether builtins should raise errors strictly or not. /// /// Regorus differs from OPA in that by default builtins will @@ -954,7 +1139,17 @@ impl Engine { let hoister = LoopHoister::new_with_schedule(schedule.clone()); let loop_lookup = hoister.populate_with_extra_capacity(&self.modules, 0)?; - self.interpreter.set_loop_hoisting_table(loop_lookup); + self.interpreter + .set_loop_hoisting_table(loop_lookup.clone()); + + // Run type checking if enabled + if let Some(checker) = self.type_checker.as_mut() { + // Update modules in case they've changed + checker.set_modules(self.modules.clone()); + + // Run type analysis (hoister will be run internally if needed) + checker.check()?; + } // Set schedule after hoisting completes self.interpreter.set_schedule(Some(schedule)); @@ -1402,6 +1597,9 @@ impl Engine { if self.rego_v1 { parser.enable_rego_v1()?; } + if self.tolerant_parse { + parser.enable_tolerant_parsing(); + } Ok(parser) } @@ -1416,6 +1614,43 @@ impl Engine { interpreter: Interpreter::new_from_compiled_policy(compiled_policy), rego_v1: true, // Value doesn't matter since this is used only for policy parsing prepared: true, + type_checker: None, + tolerant_parse: false, } } + + /// Get the context needed for type analysis. + /// Returns (modules, schedule, loop_lookup, compiled_policy) for use by TypeAnalyzer. + /// The engine will prepare itself if needed. Returns None if preparation fails. + pub(crate) fn get_type_analysis_context(&mut self) -> Option { + if self.prepare_for_eval(false, false).is_err() { + return None; + } + + let compiled_policy = self.interpreter.get_compiled_policy().clone(); + let schedule = compiled_policy.schedule.clone(); + let loop_lookup = Some(Rc::new(compiled_policy.loop_hoisting_table.clone())); + + Some((self.modules.clone(), schedule, loop_lookup, compiled_policy)) + } + + /// Try to evaluate a rule as a constant value. + /// + /// This is used by the type analyzer to determine if a rule can be constant-folded. + /// Returns Some(Value) if the rule evaluates to a constant, None if it requires runtime inputs. + pub(crate) fn try_eval_rule_constant( + &mut self, + rule_path: &str, + ) -> Option { + // Prepare the engine if not already prepared + if self.prepare_for_eval(false, false).is_err() { + return None; + } + + // Clean state before evaluation + self.interpreter.clean_internal_evaluation_state(); + + // Delegate to interpreter + self.interpreter.try_eval_rule_constant(rule_path) + } } diff --git a/src/interpreter.rs b/src/interpreter.rs index 5a8f0a7f..ea84d7ea 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -95,6 +95,9 @@ pub struct Interpreter { active_rules: Vec>, builtins_cache: BTreeMap<(&'static str, Vec), Value>, no_rules_lookup: bool, + pub(crate) constant_folding: bool, + constant_fold_input_accessed: bool, + constant_fold_external_data_accessed: bool, } impl Default for Interpreter { @@ -142,6 +145,9 @@ impl Clone for Interpreter { query_module: None, module: None, no_rules_lookup: false, + constant_folding: false, + constant_fold_input_accessed: false, + constant_fold_external_data_accessed: false, } } } @@ -212,6 +218,9 @@ impl Interpreter { active_rules: Vec::default(), builtins_cache: BTreeMap::default(), no_rules_lookup: false, + constant_folding: false, + constant_fold_input_accessed: false, + constant_fold_external_data_accessed: false, traces: None, extensions: Map::default(), @@ -225,6 +234,27 @@ impl Interpreter { } } + #[inline] + fn reset_constant_fold_usage_flags(&mut self) { + self.constant_fold_input_accessed = false; + self.constant_fold_external_data_accessed = false; + } + + #[inline] + fn mark_constant_fold_input_access(&mut self) { + self.constant_fold_input_accessed = true; + } + + #[inline] + fn mark_constant_fold_external_data_access(&mut self) { + self.constant_fold_external_data_accessed = true; + } + + #[inline] + fn constant_fold_used_runtime_inputs(&self) -> bool { + self.constant_fold_input_accessed || self.constant_fold_external_data_accessed + } + /// Create a new Interpreter from a compiled policy. pub fn new_from_compiled_policy(compiled_policy: Rc) -> Self { Self { @@ -249,6 +279,9 @@ impl Interpreter { active_rules: Vec::default(), builtins_cache: BTreeMap::default(), no_rules_lookup: false, + constant_folding: false, + constant_fold_input_accessed: false, + constant_fold_external_data_accessed: false, traces: None, #[cfg(feature = "coverage")] @@ -324,6 +357,44 @@ impl Interpreter { }; } + /// Attempts to evaluate a rule in constant-folding mode. + /// This is used by the type analyzer to determine if a rule can be constant-folded. + /// Returns Some(Value) if successful, None if the rule needs runtime values. + pub(crate) fn try_eval_rule_constant(&mut self, rule_path: &str) -> Option { + // Enable constant folding mode + let prev_constant_folding = self.constant_folding; + self.constant_folding = true; + self.reset_constant_fold_usage_flags(); + + // Try to evaluate the rule + let result = match self.ensure_rule_evaluated(rule_path.to_owned()) { + Ok(()) => { + // Extract the value from data + let path_parts: Vec<&str> = rule_path.split('.').skip(1).collect(); + let value = Self::get_value_chained(self.data.clone(), &path_parts); + + // Return None if undefined (couldn't be constant folded) + if value == Value::Undefined { + None + } else { + Some(value) + } + } + Err(_) => { + // Evaluation failed (needs input/data or has other issues) + None + } + }; + + // Restore constant folding mode + self.constant_folding = prev_constant_folding; + if self.constant_fold_used_runtime_inputs() { + None + } else { + result + } + } + pub fn set_strict_builtin_errors(&mut self, b: bool) { self.compiled_policy_mut().strict_builtin_errors = b; } @@ -495,8 +566,11 @@ impl Interpreter { } // Accumulate chained . field accesses. Expr::RefDot { refr, field, .. } => { + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; expr = refr; - path.push(field.0.text()); + path.push(field_span.text()); } Expr::RefBrack { refr, index, .. } => match index.as_ref() { // refr["field"] is the same as refr.field @@ -1531,7 +1605,10 @@ impl Interpreter { expr = refr; } Expr::RefDot { refr, field, .. } => { - comps.push(Value::String(field.0.text().into())); + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; + comps.push(Value::from(field_span.text())); expr = refr; } _ => { @@ -2781,6 +2858,10 @@ impl Interpreter { // Handle input. if name.text() == "input" { + if self.constant_folding { + self.mark_constant_fold_input_access(); + bail!(span.error("input not available in constant folding mode")); + } return Ok(Self::get_value_chained(self.input.clone(), fields)); } @@ -2794,6 +2875,28 @@ impl Interpreter { // Ensure that rules are evaluated if name.text() == "data" { + // In constant folding mode, check if this is a rule path + if self.constant_folding { + let mut is_rule_path = false; + + // Check if any prefix of the path corresponds to a rule + for i in (1..fields.len() + 1).rev() { + let check_path = "data.".to_owned() + &fields[0..i].join("."); + if self.compiled_policy.rules.contains_key(&check_path) + || self.compiled_policy.default_rules.contains_key(&check_path) + { + is_rule_path = true; + break; + } + } + + // If not a rule path, it's external data - bail out + if !is_rule_path { + self.mark_constant_fold_external_data_access(); + bail!(span.error("external data not available in constant folding mode")); + } + } + if self.is_processed(fields)? { return Ok(Self::get_value_chained(self.data.clone(), fields)); } @@ -3172,7 +3275,10 @@ impl Interpreter { while let Some(e) = expr { match e { Expr::RefDot { refr, field, .. } => { - comps.push(field.0.text()); + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; + comps.push(field_span.text()); expr = Some(refr); } Expr::RefBrack { refr, index, .. } @@ -3671,7 +3777,10 @@ impl Interpreter { refr } Expr::RefDot { refr, field, .. } => { - components.push(field.0.text().into()); + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; + components.push(field_span.text().into()); refr } _ => break, @@ -3790,7 +3899,9 @@ impl Interpreter { let target = match &import.r#as { Some(s) => s.text(), _ => match import.refr.as_ref() { - Expr::RefDot { field, .. } => field.0.text(), + Expr::RefDot { field, .. } => { + field.as_ref().map(|(span, _)| span.text()).unwrap_or("") + } Expr::RefBrack { index, .. } => match index.as_ref() { Expr::String { span: s, .. } => s.text(), _ => "", diff --git a/src/interpreter/target/infer.rs b/src/interpreter/target/infer.rs index a135ea2a..7eb36084 100644 --- a/src/interpreter/target/infer.rs +++ b/src/interpreter/target/infer.rs @@ -171,16 +171,18 @@ fn extract_input_field_access(expr: &Expr, _expected_field: &str) -> Option { - if let ( - Expr::Var { - value: Value::String(var_name), - .. - }, - Value::String(field_name), - ) = (refr.as_ref(), &field.1) - { - if var_name.as_ref() == "input" { - return Some(field_name.clone()); + if let Some((_, field_value)) = field.as_ref() { + if let ( + Expr::Var { + value: Value::String(var_name), + .. + }, + Value::String(field_name), + ) = (refr.as_ref(), field_value) + { + if var_name.as_ref() == "input" { + return Some(field_name.clone()); + } } } } diff --git a/src/lexer.rs b/src/lexer.rs index e792cb5e..0730756f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -196,6 +196,29 @@ impl Source { } } + pub fn offset_to_line_col(&self, offset: u32) -> (u32, u32) { + for (idx, (start, end)) in self.src.lines.iter().enumerate() { + let start_u32 = *start; + let end_u32 = *end; + let is_last_line = idx == self.src.lines.len().saturating_sub(1); + if offset < end_u32 || (is_last_line && offset <= end_u32) { + let line_idx = idx as u32 + 1; + let slice_start = start_u32 as usize; + let slice_end = core::cmp::min(offset, end_u32) as usize; + let slice = &self.src.contents[slice_start..slice_end]; + let col = slice.chars().count() as u32 + 1; + return (line_idx, col); + } + } + + if self.src.lines.is_empty() { + return (1, 1); + } + + let line_idx = self.src.lines.len() as u32; + (line_idx, 1) + } + pub fn message(&self, line: u32, col: u32, kind: &str, msg: &str) -> String { if line as usize > self.src.lines.len() { return format!("{}: invalid line {} specified", self.src.file, line); diff --git a/src/lib.rs b/src/lib.rs index 269d1e1c..3bd72e00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,17 +39,18 @@ mod query; #[cfg(feature = "azure_policy")] pub mod registry; mod scheduler; -#[cfg(feature = "azure_policy")] -mod schema; +pub mod schema; #[cfg(feature = "azure_policy")] pub mod target; +pub mod type_analysis; +mod type_checker; mod utils; mod value; #[cfg(feature = "azure_policy")] pub use { compile::compile_policy_for_target, - schema::{error::ValidationError, validate::SchemaValidator, Schema}, + schema::{error::ValidationError, validate::SchemaValidator}, target::Target, }; @@ -57,9 +58,20 @@ pub use compile::{compile_policy_with_entrypoint, PolicyModule}; pub use compiled_policy::CompiledPolicy; pub use engine::Engine; pub use lexer::Source; +pub use lexer::Span; pub use policy_info::PolicyInfo; +pub use schema::Schema; +pub use type_checker::TypeChecker; pub use value::Value; +pub use ast::{ + Expr, ExprRef, Import, Literal, LiteralStmt, Module, Package, Query, Rule, RuleAssign, + RuleBody, RuleHead, WithModifier, +}; + +pub use utils::get_path_string; +pub use utils::path::normalize_rule_path; + #[cfg(feature = "arc")] pub use alloc::sync::Arc as Rc; @@ -465,6 +477,7 @@ pub mod unstable { pub use crate::ast::*; pub use crate::lexer::*; pub use crate::parser::*; + pub use crate::utils::get_path_string; } #[cfg(test)] diff --git a/src/lookup.rs b/src/lookup.rs index 7a8a234c..ebc94c62 100644 --- a/src/lookup.rs +++ b/src/lookup.rs @@ -95,4 +95,9 @@ impl Lookup { None } } + + /// Get read-only access to all module slots. + pub fn modules(&self) -> &[Vec>] { + &self.slots + } } diff --git a/src/parser.rs b/src/parser.rs index db1ecd7d..340dddb5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -21,6 +21,7 @@ pub struct Parser<'source> { end: u32, future_keywords: BTreeMap>, rego_v1: bool, + tolerant: bool, // The index of the last expression that was parsed. eidx: u32, @@ -28,6 +29,9 @@ pub struct Parser<'source> { sidx: u32, // The index of the last query that was parsed. qidx: u32, + // Position lookup table: (line, col, eidx) + // Stores 1-based line/col matching Span convention + expr_positions: alloc::vec::Vec<(u32, u32, u32)>, } const FUTURE_KEYWORDS: [&str; 4] = ["contains", "every", "if", "in"]; @@ -44,15 +48,23 @@ impl<'source> Parser<'source> { end: 0, future_keywords: BTreeMap::new(), rego_v1: false, + tolerant: false, eidx: 0, sidx: 0, qidx: 0, + expr_positions: alloc::vec::Vec::new(), }) } - fn next_eidx(&mut self) -> u32 { + pub fn enable_tolerant_parsing(&mut self) { + self.tolerant = true; + } + + fn next_eidx(&mut self, span: &Span) -> u32 { let eidx = self.eidx; self.eidx += 1; + // Record position for fast lookup (line, col are 1-based from Span) + self.expr_positions.push((span.line, span.col, eidx)); eidx } @@ -150,8 +162,11 @@ impl<'source> Parser<'source> { pub fn get_path_ref_components_into(refr: &Ref, comps: &mut Vec) -> Result<()> { match refr.as_ref() { Expr::RefDot { refr, field, .. } => { + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; Self::get_path_ref_components_into(refr, comps)?; - comps.push(field.0.clone()); + comps.push(field_span.clone()); } Expr::RefBrack { refr, index, .. } => { Self::get_path_ref_components_into(refr, comps)?; @@ -293,9 +308,9 @@ impl<'source> Parser<'source> { fn read_number(&mut self, span: Span) -> Result { match Number::from_str(span.text()) { Ok(v) => Ok(Expr::Number { - span, + span: span.clone(), value: Value::Number(v), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }), Err(_) => bail!(span.error("could not parse number")), } @@ -303,6 +318,7 @@ impl<'source> Parser<'source> { fn parse_scalar_or_var(&mut self) -> Result { let span = self.tok.1.clone(); + let span_clone = span.clone(); let node = match &self.tok.0 { TokenKind::Number => self.read_number(span)?, TokenKind::String => { @@ -314,7 +330,7 @@ impl<'source> Parser<'source> { Expr::String { span, value: v, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), } } TokenKind::RawString => { @@ -322,24 +338,24 @@ impl<'source> Parser<'source> { Expr::RawString { span, value: v, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), } } TokenKind::Ident => match self.token_text() { "null" => Expr::Null { span, value: Value::Null, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), }, "true" => Expr::Bool { span, value: Value::from(true), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), }, "false" => Expr::Bool { span, value: Value::from(false), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), }, _ => { let ident = self.parse_var()?; @@ -347,7 +363,7 @@ impl<'source> Parser<'source> { return Ok(Expr::Var { span: ident, value, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span_clone), }); } }, @@ -406,10 +422,10 @@ impl<'source> Parser<'source> { Ok((term, query)) => { span.end = self.end; Ok(Expr::ArrayCompr { - span, + span: span.clone(), term: Ref::new(term), query: Ref::new(query), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } Err(_) if self.end == pos => { @@ -430,9 +446,9 @@ impl<'source> Parser<'source> { self.expect("]", "while parsing array")?; span.end = self.end; Ok(Expr::Array { - span, + span: span.clone(), items, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } Err(err) => Err(err), @@ -448,10 +464,10 @@ impl<'source> Parser<'source> { Ok((term, query)) => { span.end = self.end; return Ok(Expr::SetCompr { - span, + span: span.clone(), term: Ref::new(term), query: Ref::new(query), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }); } Err(err) if self.end != pos => { @@ -468,9 +484,9 @@ impl<'source> Parser<'source> { self.next_token()?; span.end = self.end; return Ok(Expr::Object { - span, + span: span.clone(), fields: vec![], - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }); } @@ -491,9 +507,9 @@ impl<'source> Parser<'source> { self.expect("}", "while parsing set")?; span.end = self.end; return Ok(Expr::Set { - span, + span: span.clone(), items, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }); } @@ -505,11 +521,11 @@ impl<'source> Parser<'source> { Ok((term, query)) => { span.end = self.end; return Ok(Expr::ObjectCompr { - span, + span: span.clone(), key: Ref::new(first), value: Ref::new(term), query: Ref::new(query), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }); } Err(err) if self.end != pos => { @@ -549,9 +565,9 @@ impl<'source> Parser<'source> { span.end = self.end; Ok(Expr::Object { - span, + span: span.clone(), fields: items, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } @@ -561,9 +577,9 @@ impl<'source> Parser<'source> { self.expect(")", "while parsing empty set")?; span.end = self.tok.1.end; Ok(Expr::Set { - span, + span: span.clone(), items: vec![], - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } @@ -581,9 +597,9 @@ impl<'source> Parser<'source> { let expr = self.parse_in_expr()?; span.end = self.end; Ok(Expr::UnaryExpr { - span, + span: span.clone(), expr: Ref::new(expr), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } @@ -636,8 +652,20 @@ impl<'source> Parser<'source> { ); } "." => { - // Read identifier. + let dot_span = self.tok.1.clone(); self.next_token()?; + + if self.tolerant && !matches!(self.tok.0, TokenKind::Ident) { + span.end = dot_span.end; + term = Expr::RefDot { + span: span.clone(), + refr: Ref::new(term), + field: None, + eidx: self.next_eidx(&span), + }; + continue; + } + let field = self.parse_var()?; span.end = self.end; @@ -654,10 +682,10 @@ impl<'source> Parser<'source> { } let fieldv = Value::from(field.text()); term = Expr::RefDot { - span, + span: span.clone(), refr: Ref::new(term), - field: (field, fieldv), - eidx: self.next_eidx(), + field: Some((field, fieldv)), + eidx: self.next_eidx(&span), }; } "[" => { @@ -671,10 +699,10 @@ impl<'source> Parser<'source> { span.end = self.end; term = Expr::RefBrack { - span, + span: span.clone(), refr: Ref::new(term), index: Ref::new(index), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } "(" if possible_fcn => { @@ -694,10 +722,10 @@ impl<'source> Parser<'source> { self.expect(")", "while parsing call expr")?; span.end = self.end; term = Expr::Call { - span, + span: span.clone(), fcn: Ref::new(term), params: args, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; // The expression can no longer be a function after the call. @@ -731,11 +759,11 @@ impl<'source> Parser<'source> { let right = self.parse_term()?; span.end = self.end; expr = Expr::ArithExpr { - span, + span: span.clone(), op, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } } @@ -767,11 +795,11 @@ impl<'source> Parser<'source> { }; span.end = self.end; expr = Expr::ArithExpr { - span, + span: span.clone(), op, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } } @@ -787,11 +815,11 @@ impl<'source> Parser<'source> { let right = self.parse_arith_expr()?; span.end = self.end; expr = Expr::BinExpr { - span, + span: span.clone(), op: BinOp::Intersection, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } Ok(expr) @@ -808,11 +836,11 @@ impl<'source> Parser<'source> { let right = self.parse_set_intersection_expr()?; span.end = self.end; expr = Expr::BinExpr { - span, + span: span.clone(), op: BinOp::Union, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } Ok(expr) @@ -837,11 +865,11 @@ impl<'source> Parser<'source> { let right = self.parse_set_union_expr()?; span.end = self.end; expr = Expr::BoolExpr { - span, + span: span.clone(), op, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } Ok(expr) @@ -864,11 +892,11 @@ impl<'source> Parser<'source> { None => (None, Ref::new(expr1)), }; expr1 = Expr::Membership { - span, + span: span.clone(), key, value, collection: Ref::new(expr3), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; expr2 = None; @@ -909,10 +937,10 @@ impl<'source> Parser<'source> { self.next_token()?; let rhs = self.parse_membership_expr()?; expr = Expr::OrExpr { - span, + span: span.clone(), lhs: Ref::new(expr), rhs: Ref::new(rhs), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } Ok(expr) @@ -966,11 +994,11 @@ impl<'source> Parser<'source> { let right = self.parse_expr()?; span.end = self.end; Ok(Expr::AssignExpr { - span, + span: span.clone(), op, lhs: Ref::new(expr), rhs: Ref::new(right), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) } @@ -984,7 +1012,7 @@ impl<'source> Parser<'source> { let r#as = self.parse_in_expr()?; span.end = self.end; modifiers.push(WithModifier { - span, + span: span.clone(), refr: Ref::new(refr), r#as: Ref::new(r#as), }); @@ -1254,9 +1282,9 @@ impl<'source> Parser<'source> { let (span, value) = Self::span_and_value(var); let mut refr = Expr::Var { - span, + span: span.clone(), value, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; loop { let mut span = self.tok.1.clone(); @@ -1291,10 +1319,10 @@ impl<'source> Parser<'source> { ); } refr = Expr::RefDot { - span, + span: span.clone(), refr: Ref::new(refr), - field: Self::span_and_value(field), - eidx: self.next_eidx(), + field: Some(Self::span_and_value(field)), + eidx: self.next_eidx(&span), }; } "[" => { @@ -1303,9 +1331,9 @@ impl<'source> Parser<'source> { TokenKind::String => { let (span, value) = Self::span_and_value(self.tok.1.clone()); Expr::String { - span, + span: span.clone(), value, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), } } _ => { @@ -1320,10 +1348,10 @@ impl<'source> Parser<'source> { self.expect("]", "while parsing bracketed reference")?; span.end = self.end; refr = Expr::RefBrack { - span, + span: span.clone(), refr: Ref::new(refr), index: Ref::new(index), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } _ => break, @@ -1349,9 +1377,9 @@ impl<'source> Parser<'source> { } let (span, value) = Self::span_and_value(v); Expr::Var { - span, + span: span.clone(), value, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), } } else { return Err(self.source.error( @@ -1394,10 +1422,10 @@ impl<'source> Parser<'source> { ); } term = Expr::RefDot { - span, + span: span.clone(), refr: Ref::new(term), - field: Self::span_and_value(field), - eidx: self.next_eidx(), + field: Some(Self::span_and_value(field)), + eidx: self.next_eidx(&span), }; } "[" => { @@ -1406,10 +1434,10 @@ impl<'source> Parser<'source> { span.end = self.end; self.expect("]", "while parsing bracketed reference")?; term = Expr::RefBrack { - span, + span: span.clone(), refr: Ref::new(term), index: Ref::new(index), - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }; } _ => break, @@ -1443,7 +1471,7 @@ impl<'source> Parser<'source> { span.end = self.end; Ok(RuleHead::Func { - span, + span: span.clone(), refr: rule_ref, args, assign, @@ -1714,9 +1742,9 @@ impl<'source> Parser<'source> { .map(|a| { let (span, value) = Self::span_and_value(a); Ref::new(Expr::Var { - span, + span: span.clone(), value, - eidx: self.next_eidx(), + eidx: self.next_eidx(&span), }) }) .collect(), @@ -1946,6 +1974,12 @@ impl<'source> Parser<'source> { } } + // Sort expr_positions by (line, col) for binary search + let mut expr_positions = self.expr_positions.clone(); + expr_positions.sort_unstable_by(|(line1, col1, _), (line2, col2, _)| { + line1.cmp(line2).then(col1.cmp(col2)) + }); + let m = Module { package, imports, @@ -1955,6 +1989,7 @@ impl<'source> Parser<'source> { num_expressions: self.eidx, num_statements: self.sidx, num_queries: self.qidx, + expr_positions, }; #[cfg(debug_assertions)] diff --git a/src/schema.rs b/src/schema.rs index bfa7886f..b5d604bb 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -214,6 +214,7 @@ pub mod validate; /// Schemas are typically created by deserializing from JSON Schema format: /// /// ```rust +/// use regorus::schema::Schema; /// use serde_json::json; /// /// // Create a schema from JSON @@ -253,12 +254,16 @@ pub mod validate; /// /// ## Simple String Schema /// ```rust +/// # use regorus::schema::Schema; +/// # use serde_json::json; /// let schema = json!({ "type": "string", "minLength": 1 }); /// let parsed: Schema = serde_json::from_value(schema).unwrap(); /// ``` /// /// ## Complex Object Schema /// ```rust +/// # use regorus::schema::Schema; +/// # use serde_json::json; /// let schema = json!({ /// "type": "object", /// "properties": { @@ -280,6 +285,8 @@ pub mod validate; /// /// ## Union Types with anyOf /// ```rust +/// # use regorus::schema::Schema; +/// # use serde_json::json; /// let schema = json!({ /// "anyOf": [ /// { "type": "string" }, @@ -323,8 +330,10 @@ impl Schema { /// Parse a JSON Schema document from a string into a `Schema` instance. /// Provides better error messages than `serde_json::from_str`. pub fn from_json_str(s: &str) -> Result> { + // Strip leading UTF-8 BOM if present to avoid confusing serde_json. + let schema_str = s.strip_prefix('\u{feff}').unwrap_or(s); let value: serde_json::Value = - serde_json::from_str(s).map_err(|e| format!("Failed to parse schema: {e}"))?; + serde_json::from_str(schema_str).map_err(|e| format!("Failed to parse schema: {e}"))?; Self::from_serde_json_value(value) } @@ -1057,7 +1066,7 @@ impl<'de> Deserialize<'de> for DiscriminatedSubobject { } } -#[cfg(test)] +#[cfg(all(test, feature = "jsonschema"))] mod tests { mod azure; mod suite; diff --git a/src/schema/meta.rs b/src/schema/meta.rs index 74435832..9cb0f0dc 100644 --- a/src/schema/meta.rs +++ b/src/schema/meta.rs @@ -3,10 +3,13 @@ #![allow(dead_code)] use crate::*; + +#[cfg(feature = "jsonschema")] use lazy_static::lazy_static; const META_SCHEMA: &str = include_str!("meta.schema.json"); +#[cfg(feature = "jsonschema")] lazy_static! { /// Lazy static JSON Schema validator for the Regorus meta-schema. /// This validator is initialized once and can be used to validate @@ -26,12 +29,14 @@ pub fn get_meta_schema() -> &'static str { /// Validates a schema definition against the Regorus meta-schema. /// Returns true if the schema is valid, false otherwise. +#[cfg(feature = "jsonschema")] pub fn validate_schema(schema: &serde_json::Value) -> bool { META_SCHEMA_VALIDATOR.is_valid(schema) } /// Validates a schema definition against the Regorus meta-schema. /// Returns Ok(()) if valid, or Err with validation errors if invalid. +#[cfg(feature = "jsonschema")] pub fn validate_schema_detailed(schema: &serde_json::Value) -> Result<(), Vec> { if let jsonschema::BasicOutput::Invalid(errors) = META_SCHEMA_VALIDATOR.apply(schema).basic() { let msgs: alloc::collections::BTreeSet = errors @@ -45,6 +50,16 @@ pub fn validate_schema_detailed(schema: &serde_json::Value) -> Result<(), Vec bool { + false +} + +#[cfg(not(feature = "jsonschema"))] +pub fn validate_schema_detailed(_schema: &serde_json::Value) -> Result<(), Vec> { + Err(vec!["jsonschema feature disabled".into()]) +} + /// Validates a schema definition from a JSON string. /// Returns true if the schema is valid, false otherwise. pub fn validate_schema_str(schema_str: &str) -> bool { @@ -54,5 +69,5 @@ pub fn validate_schema_str(schema_str: &str) -> bool { } } -#[cfg(test)] +#[cfg(all(test, feature = "jsonschema"))] mod tests; diff --git a/src/schema/tests/validate/resource.rs b/src/schema/tests/validate/resource.rs index 3f54e5d9..cab59e4e 100644 --- a/src/schema/tests/validate/resource.rs +++ b/src/schema/tests/validate/resource.rs @@ -1245,6 +1245,7 @@ fn test_edge_cases_and_boundary_conditions() { ); } +#[cfg(all(feature = "arc", feature = "regex", feature = "jsonschema"))] #[test] fn test_concurrent_schema_validation_stress() { use std::sync::Arc; diff --git a/src/schema/validate.rs b/src/schema/validate.rs index 1e56e8e7..883c2fce 100644 --- a/src/schema/validate.rs +++ b/src/schema/validate.rs @@ -8,6 +8,7 @@ use crate::{ *, }; use alloc::collections::BTreeMap; +#[cfg(feature = "regex")] use regex::Regex; type String = Rc; @@ -250,17 +251,29 @@ impl SchemaValidator { // Check pattern constraint if let Some(pattern_str) = pattern { - let regex = - Regex::new(pattern_str).map_err(|e| ValidationError::InvalidPattern { - pattern: pattern_str.as_ref().into(), - error: e.to_string().into(), + #[cfg(feature = "regex")] + { + let regex = Regex::new(pattern_str).map_err(|e| { + ValidationError::InvalidPattern { + pattern: pattern_str.as_ref().into(), + error: e.to_string().into(), + } })?; - if !regex.is_match(string_value) { - return Err(ValidationError::PatternMismatch { - value: string_value.to_string().into(), + if !regex.is_match(string_value) { + return Err(ValidationError::PatternMismatch { + value: string_value.to_string().into(), + pattern: pattern_str.clone(), + path: path.into(), + }); + } + } + + #[cfg(not(feature = "regex"))] + { + return Err(ValidationError::InvalidPattern { pattern: pattern_str.clone(), - path: path.into(), + error: "regex feature disabled".into(), }); } } diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 4adb12f6..4723cdb5 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -3,3 +3,5 @@ mod interpreter; mod scheduler; +#[cfg(feature = "jsonschema")] +mod type_analysis; diff --git a/src/tests/type_analysis/mod.rs b/src/tests/type_analysis/mod.rs new file mode 100644 index 00000000..4bb568b7 --- /dev/null +++ b/src/tests/type_analysis/mod.rs @@ -0,0 +1,1043 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::ast::{Expr, ExprRef, Literal, Module, Query, Ref, Rule, RuleHead}; +use crate::lexer::Source; +use crate::parser::Parser; +use crate::scheduler::Analyzer as SchedulerAnalyzer; +use crate::schema::Schema; +use crate::type_analysis::{ + ConstantValue, StructuralType, TypeAnalysisOptions, TypeAnalysisResult, TypeAnalyzer, + TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::utils::get_path_string; +use crate::value::Value; + +use super::interpreter::process_value; + +use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeMap, format, string::String, vec::Vec}; +use anyhow::{anyhow, bail, Context, Result}; +use core::mem::discriminant; +use serde::Deserialize; +use std::collections::{HashMap, HashSet}; +use test_generator::test_resources; + +#[derive(Debug, Deserialize)] +struct TypeYamlTest { + cases: Vec, +} + +#[derive(Debug, Deserialize)] +struct TypeCase { + note: String, + modules: Vec, + #[serde(default)] + input_schema: Option, + #[serde(default)] + data_schema: Option, + #[serde(default)] + rules: Vec, + #[serde(default)] + exprs: Vec, + #[serde(default)] + diagnostics: Vec, +} + +#[derive(Debug, Deserialize)] +struct RuleExpectation { + rule: String, + #[serde(rename = "type")] + r#type: TypeExpectation, + #[serde(default)] + constant: Option, + #[serde(default)] + provenance: Option, + #[serde(default)] + schema_backed: Option, + #[serde(default)] + line: Option, + #[serde(default)] + col: Option, +} + +#[derive(Debug, Deserialize)] +struct ExprExpectation { + expr: String, + #[serde(rename = "type")] + r#type: TypeExpectation, + #[serde(default)] + constant: Option, + #[serde(default)] + provenance: Option, + #[serde(default)] + schema_backed: Option, + #[serde(default)] + line: Option, + #[serde(default)] + col: Option, +} + +#[derive(Debug, Deserialize)] +struct DiagnosticExpectation { + #[serde(default)] + kind: Option, + #[serde(default)] + message: Option, + #[serde(default)] + line: Option, + #[serde(default)] + col: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum TypeExpectation { + Simple(String), + Detailed(DetailedTypeExpectation), +} + +#[derive(Debug, Deserialize)] +struct DetailedTypeExpectation { + kind: String, + #[serde(default)] + element: Option>, + #[serde(default)] + #[serde(alias = "fields")] + properties: Option>, + #[serde(default)] + variants: Option>, +} + +#[derive(Clone)] +struct ExprInfo { + module_idx: u32, + expr: ExprRef, + text: String, + line: u32, + col: u32, +} + +#[derive(Clone)] +struct RuleInfo { + fact: TypeFact, + line: u32, + col: u32, +} + +fn yaml_test_impl(path: &str) -> Result<()> { + let yaml = std::fs::read_to_string(path) + .with_context(|| format!("failed to read yaml test file {path}"))?; + let test: TypeYamlTest = serde_yaml::from_str(&yaml) + .with_context(|| format!("failed to parse yaml test file {path}"))?; + + for case in test.cases.iter() { + run_case(case).with_context(|| format!("case `{}`", case.note))?; + } + + Ok(()) +} + +fn run_case(case: &TypeCase) -> Result<()> { + let mut sources = Vec::with_capacity(case.modules.len()); + let mut modules = Vec::with_capacity(case.modules.len()); + + for (idx, module_src) in case.modules.iter().enumerate() { + let source = Source::from_contents(format!("module_{idx}.rego"), module_src.clone())?; + let mut parser = Parser::new(&source)?; + parser.enable_rego_v1()?; + let module = parser.parse()?; + modules.push(Ref::new(module)); + sources.push(source); + } + + let schedule = SchedulerAnalyzer::new().analyze(&modules)?; + + let options = TypeAnalysisOptions { + input_schema: parse_optional_schema(case.input_schema.clone())?, + data_schema: parse_optional_schema(case.data_schema.clone())?, + loop_lookup: None, + entrypoints: None, + disable_function_generic_pass: true, + }; + + let analyzer = TypeAnalyzer::new(&modules, Some(&schedule), options); + let result = analyzer.analyze_modules(); + + let rule_map = if case.rules.is_empty() { + BTreeMap::new() + } else { + collect_rule_facts(&modules, &result)? + }; + let exprs = collect_exprs(&modules); + check_rules(case, &rule_map)?; + check_exprs(case, &exprs, &result)?; + check_diagnostics(case, &result)?; + + Ok(()) +} + +fn parse_optional_schema(raw: Option) -> Result> { + match raw { + Some(value) => { + let schema = Schema::from_serde_json_value(value) + .map_err(|e| anyhow!("failed to parse schema: {e}"))?; + Ok(Some(schema)) + } + None => Ok(None), + } +} + +fn collect_rule_facts( + modules: &[Ref], + result: &TypeAnalysisResult, +) -> Result> { + let mut rules = BTreeMap::new(); + + for (module_idx, module) in modules.iter().enumerate() { + let module_idx = module_idx as u32; + let module_path = get_path_string(module.package.refr.as_ref(), Some("data"))?; + for rule in &module.policy { + match rule.as_ref() { + Rule::Spec { head, .. } => { + if let Some((refr, span)) = rule_head_expression(head) { + let mut path = get_path_string(refr.as_ref(), None)?; + if !path.starts_with("data.") { + path = format!("{module_path}.{path}"); + } + let fact = resolve_rule_fact(result, module_idx, refr.eidx(), &path) + .with_context(|| { + format!( + "missing type fact for rule `{path}` (module {module_idx}, expr {})", + refr.eidx() + ) + })?; + rules.insert( + path, + RuleInfo { + fact, + line: span.line, + col: span.col, + }, + ); + } + } + Rule::Default { refr, span, .. } => { + let mut path = get_path_string(refr.as_ref(), None)?; + if !path.starts_with("data.") { + path = format!("{module_path}.{path}"); + } + let fact = resolve_rule_fact(result, module_idx, refr.eidx(), &path) + .with_context(|| { + format!( + "missing type fact for default rule `{path}` (module {module_idx}, expr {})", + refr.eidx() + ) + })?; + rules.insert( + path, + RuleInfo { + fact, + line: span.line, + col: span.col, + }, + ); + } + } + } + } + + Ok(rules) +} + +fn resolve_rule_fact( + result: &TypeAnalysisResult, + module_idx: u32, + expr_idx: u32, + path: &str, +) -> Option { + let expr_fact = result + .expressions + .facts + .get_expr(module_idx, expr_idx) + .cloned(); + + let summary_fact = result.rules.by_path.get(path).and_then(|summary| { + summary.aggregated_head_fact.clone().or_else(|| { + summary.definitions.iter().find_map(|definition| { + definition + .head_fact + .clone() + .or_else(|| definition.aggregated_head_fact.clone()) + }) + }) + }); + + match (expr_fact, summary_fact) { + (Some(expr), Some(summary)) => { + let expr_informative = descriptor_is_informative(&expr.descriptor); + let summary_informative = descriptor_is_informative(&summary.descriptor); + + if summary_informative && !expr_informative { + Some(merge_fact_with_constant(summary, &expr)) + } else { + Some(expr) + } + } + (Some(expr), None) => Some(expr), + (None, Some(summary)) => Some(summary), + (None, None) => None, + } +} + +fn descriptor_is_informative(descriptor: &TypeDescriptor) -> bool { + match descriptor { + TypeDescriptor::Structural(ty) => !structural_contains_unknownish(ty), + TypeDescriptor::Schema(_) => true, + } +} + +fn structural_contains_unknownish(ty: &StructuralType) -> bool { + match ty { + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => variants.iter().any(structural_contains_unknownish), + _ => false, + } +} + +fn merge_fact_with_constant(mut primary: TypeFact, fallback: &TypeFact) -> TypeFact { + if matches!(primary.constant, ConstantValue::Unknown) { + if let ConstantValue::Known(value) = &fallback.constant { + primary = primary.with_constant(ConstantValue::known(value.clone())); + } + } + + if primary.origins.is_empty() && !fallback.origins.is_empty() { + primary = primary.with_origins(fallback.origins.clone()); + } + + primary +} + +fn rule_head_expression(head: &RuleHead) -> Option<(&ExprRef, &crate::lexer::Span)> { + match head { + RuleHead::Compr { refr, span, .. } + | RuleHead::Set { refr, span, .. } + | RuleHead::Func { refr, span, .. } => Some((refr, span)), + } +} + +fn collect_exprs(modules: &[Ref]) -> Vec { + let mut exprs = Vec::new(); + let mut visited: HashSet<(u32, u32)> = HashSet::new(); + + for (module_idx, module) in modules.iter().enumerate() { + let module_idx = module_idx as u32; + for rule in &module.policy { + match rule.as_ref() { + Rule::Spec { head, bodies, .. } => { + if let Some((refr, _)) = rule_head_expression(head) { + record_expr(&mut exprs, &mut visited, module_idx, refr); + } + + collect_rule_head(head, module_idx, &mut exprs, &mut visited); + + for body in bodies { + if let Some(assign) = &body.assign { + record_expr(&mut exprs, &mut visited, module_idx, &assign.value); + collect_expr( + assign.value.clone(), + module_idx, + &mut exprs, + &mut visited, + ); + } + collect_query(&body.query, module_idx, &mut exprs, &mut visited); + } + } + Rule::Default { + refr, value, span, .. + } => { + record_expr(&mut exprs, &mut visited, module_idx, refr); + collect_expr(refr.clone(), module_idx, &mut exprs, &mut visited); + record_expr(&mut exprs, &mut visited, module_idx, value); + collect_expr(value.clone(), module_idx, &mut exprs, &mut visited); + exprs.push(ExprInfo { + module_idx, + expr: value.clone(), + text: span.text().trim().to_owned(), + line: span.line, + col: span.col, + }); + } + } + } + } + + exprs +} + +fn collect_rule_head( + head: &RuleHead, + module_idx: u32, + exprs: &mut Vec, + visited: &mut HashSet<(u32, u32)>, +) { + match head { + RuleHead::Compr { assign, .. } | RuleHead::Func { assign, .. } => { + if let Some(assign) = assign { + record_expr(exprs, visited, module_idx, &assign.value); + collect_expr(assign.value.clone(), module_idx, exprs, visited); + } + } + RuleHead::Set { key, .. } => { + if let Some(key) = key { + record_expr(exprs, visited, module_idx, key); + collect_expr(key.clone(), module_idx, exprs, visited); + } + } + } +} + +fn collect_query( + query: &Ref, + module_idx: u32, + exprs: &mut Vec, + visited: &mut HashSet<(u32, u32)>, +) { + for stmt in &query.stmts { + for with in &stmt.with_mods { + record_expr(exprs, visited, module_idx, &with.refr); + collect_expr(with.refr.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, &with.r#as); + collect_expr(with.r#as.clone(), module_idx, exprs, visited); + } + collect_literal(&stmt.literal, module_idx, exprs, visited); + } +} + +fn collect_literal( + literal: &Literal, + module_idx: u32, + exprs: &mut Vec, + visited: &mut HashSet<(u32, u32)>, +) { + match literal { + Literal::SomeVars { .. } => {} + Literal::SomeIn { + key, + value, + collection, + .. + } => { + if let Some(k) = key { + record_expr(exprs, visited, module_idx, k); + collect_expr(k.clone(), module_idx, exprs, visited); + } + record_expr(exprs, visited, module_idx, value); + collect_expr(value.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, collection); + collect_expr(collection.clone(), module_idx, exprs, visited); + } + Literal::Expr { expr, .. } | Literal::NotExpr { expr, .. } => { + record_expr(exprs, visited, module_idx, expr); + collect_expr(expr.clone(), module_idx, exprs, visited); + } + Literal::Every { domain, query, .. } => { + record_expr(exprs, visited, module_idx, domain); + collect_expr(domain.clone(), module_idx, exprs, visited); + collect_query(query, module_idx, exprs, visited); + } + } +} + +fn record_expr( + exprs: &mut Vec, + visited: &mut HashSet<(u32, u32)>, + module_idx: u32, + expr: &ExprRef, +) { + let key = (module_idx, expr.eidx()); + if !visited.insert(key) { + return; + } + + let span = expr.span(); + exprs.push(ExprInfo { + module_idx, + expr: expr.clone(), + text: span.text().trim().to_owned(), + line: span.line, + col: span.col, + }); +} + +fn collect_expr( + expr: ExprRef, + module_idx: u32, + exprs: &mut Vec, + visited: &mut HashSet<(u32, u32)>, +) { + match expr.as_ref() { + Expr::Array { items, .. } | Expr::Set { items, .. } => { + for item in items { + record_expr(exprs, visited, module_idx, item); + collect_expr(item.clone(), module_idx, exprs, visited); + } + } + Expr::Object { fields, .. } => { + for (_, key_expr, value_expr) in fields { + record_expr(exprs, visited, module_idx, key_expr); + collect_expr(key_expr.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, value_expr); + collect_expr(value_expr.clone(), module_idx, exprs, visited); + } + } + Expr::ArrayCompr { term, query, .. } | Expr::SetCompr { term, query, .. } => { + record_expr(exprs, visited, module_idx, term); + collect_expr(term.clone(), module_idx, exprs, visited); + collect_query(query, module_idx, exprs, visited); + } + Expr::ObjectCompr { + key, value, query, .. + } => { + record_expr(exprs, visited, module_idx, key); + collect_expr(key.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, value); + collect_expr(value.clone(), module_idx, exprs, visited); + collect_query(query, module_idx, exprs, visited); + } + Expr::Call { fcn, params, .. } => { + record_expr(exprs, visited, module_idx, fcn); + collect_expr(fcn.clone(), module_idx, exprs, visited); + for param in params { + record_expr(exprs, visited, module_idx, param); + collect_expr(param.clone(), module_idx, exprs, visited); + } + } + Expr::UnaryExpr { expr: inner, .. } => { + record_expr(exprs, visited, module_idx, inner); + collect_expr(inner.clone(), module_idx, exprs, visited); + } + Expr::RefDot { refr, .. } | Expr::RefBrack { refr, .. } => { + record_expr(exprs, visited, module_idx, refr); + collect_expr(refr.clone(), module_idx, exprs, visited); + if let Expr::RefBrack { index, .. } = expr.as_ref() { + record_expr(exprs, visited, module_idx, index); + collect_expr(index.clone(), module_idx, exprs, visited); + } + } + Expr::BinExpr { lhs, rhs, .. } + | Expr::BoolExpr { lhs, rhs, .. } + | Expr::ArithExpr { lhs, rhs, .. } => { + record_expr(exprs, visited, module_idx, lhs); + collect_expr(lhs.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, rhs); + collect_expr(rhs.clone(), module_idx, exprs, visited); + } + #[cfg(feature = "rego-extensions")] + Expr::OrExpr { lhs, rhs, .. } => { + record_expr(exprs, visited, module_idx, lhs); + collect_expr(lhs.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, rhs); + collect_expr(rhs.clone(), module_idx, exprs, visited); + } + Expr::AssignExpr { lhs, rhs, .. } => { + record_expr(exprs, visited, module_idx, lhs); + collect_expr(lhs.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, rhs); + collect_expr(rhs.clone(), module_idx, exprs, visited); + } + Expr::Membership { + key, + value, + collection, + .. + } => { + if let Some(key) = key { + record_expr(exprs, visited, module_idx, key); + collect_expr(key.clone(), module_idx, exprs, visited); + } + record_expr(exprs, visited, module_idx, value); + collect_expr(value.clone(), module_idx, exprs, visited); + record_expr(exprs, visited, module_idx, collection); + collect_expr(collection.clone(), module_idx, exprs, visited); + } + Expr::String { .. } + | Expr::RawString { .. } + | Expr::Number { .. } + | Expr::Bool { .. } + | Expr::Null { .. } + | Expr::Var { .. } => {} + } +} + +fn check_rules(case: &TypeCase, rules: &BTreeMap) -> Result<()> { + for expectation in &case.rules { + let info = rules.get(&expectation.rule).with_context(|| { + format!("rule `{}` not found in analysed modules", expectation.rule) + })?; + + if let Some(expected_line) = expectation.line { + if info.line != expected_line { + bail!( + "rule `{}` line mismatch: expected {expected_line}, found {}", + expectation.rule, + info.line + ); + } + } + + if let Some(expected_col) = expectation.col { + if info.col != expected_col { + bail!( + "rule `{}` column mismatch: expected {expected_col}, found {}", + expectation.rule, + info.col + ); + } + } + + check_fact( + &expectation.rule, + &expectation.r#type, + expectation, + &info.fact, + )?; + } + + Ok(()) +} + +fn check_exprs(case: &TypeCase, exprs: &[ExprInfo], result: &TypeAnalysisResult) -> Result<()> { + for expectation in &case.exprs { + let matches: Vec<&ExprInfo> = exprs + .iter() + .filter(|info| info.text == expectation.expr.trim()) + .collect(); + + let target = match (matches.len(), expectation.line, expectation.col) { + (0, _, _) => { + bail!("expression `{}` not found", expectation.expr.trim()); + } + (1, _, _) => matches[0], + (_, Some(line), col) => matches + .iter() + .find(|info| info.line == line && col.is_none_or(|c| info.col == c)) + .copied() + .with_context(|| { + format!( + "expression `{}` not found at line {line}{}", + expectation.expr.trim(), + col.map_or(String::new(), |c| format!(" col {c}")) + ) + })?, + (n, None, None) => { + bail!( + "expression `{}` matched {n} times; specify line/col to disambiguate", + expectation.expr.trim() + ); + } + _ => { + bail!( + "expression `{}` matched multiple times; specify both line and col", + expectation.expr.trim() + ); + } + }; + + let fact = result + .expressions + .facts + .get_expr(target.module_idx, target.expr.eidx()) + .with_context(|| { + format!( + "missing type fact for expression `{}` (module {}, expr {})", + expectation.expr.trim(), + target.module_idx, + target.expr.eidx() + ) + })?; + + check_fact( + &format!( + "expression `{}` (line {}, col {})", + expectation.expr.trim(), + target.line, + target.col + ), + &expectation.r#type, + expectation, + fact, + )?; + } + + Ok(()) +} + +fn check_diagnostics(case: &TypeCase, result: &TypeAnalysisResult) -> Result<()> { + if case.diagnostics.is_empty() { + if !result.diagnostics.is_empty() { + bail!( + "expected no diagnostics but found {} entries: {}", + result.diagnostics.len(), + summarize_diagnostics(&result.diagnostics) + ); + } + return Ok(()); + } + + if result.diagnostics.len() != case.diagnostics.len() { + bail!( + "diagnostic count mismatch: expected {}, found {} (actual: {})", + case.diagnostics.len(), + result.diagnostics.len(), + summarize_diagnostics(&result.diagnostics) + ); + } + + for (idx, (expected, actual)) in case + .diagnostics + .iter() + .zip(result.diagnostics.iter()) + .enumerate() + { + if let Some(kind) = &expected.kind { + let actual_kind = format_diag_kind(&actual.kind); + if actual_kind != kind.as_str() { + bail!( + "diagnostic #{idx} kind mismatch: expected `{}`, found `{actual_kind}`", + kind + ); + } + } + + if let Some(message) = &expected.message { + if !actual.message.contains(message) { + bail!( + "diagnostic #{idx} message mismatch: expected substring `{message}`, actual `{}`", + actual.message + ); + } + } + + if let Some(line) = expected.line { + if actual.line != line { + bail!( + "diagnostic #{idx} line mismatch: expected {line}, found {}", + actual.line + ); + } + } + + if let Some(col) = expected.col { + if actual.col != col { + bail!( + "diagnostic #{idx} column mismatch: expected {col}, found {}", + actual.col + ); + } + } + } + + Ok(()) +} + +fn summarize_diagnostics(diags: &[crate::type_analysis::TypeDiagnostic]) -> String { + diags + .iter() + .map(|diag| { + format!( + "{}:{} {}: {}", + diag.line, + diag.col, + format_diag_kind(&diag.kind), + diag.message + ) + }) + .collect::>() + .join("; ") +} + +fn check_fact( + label: &str, + expected_type: &TypeExpectation, + expectation: &T, + fact: &TypeFact, +) -> Result<()> +where + T: FactExpectation, +{ + expected_type + .matches(fact.descriptor()) + .with_context(|| format!("{label}: type mismatch"))?; + + if let Some(expected) = expectation.expected_constant()? { + match &fact.constant { + ConstantValue::Known(actual) => { + let processed = process_value(&expected)?; + if processed != *actual { + bail!( + "{label}: constant mismatch. expected {}, found {}", + serde_yaml::to_string(&processed)?, + serde_yaml::to_string(actual)? + ); + } + } + ConstantValue::Unknown => { + bail!("{label}: expected constant value but analysis returned unknown"); + } + } + } + + if let Some(expected_schema) = expectation.expect_schema_backed() { + let is_schema = matches!(fact.descriptor, TypeDescriptor::Schema(_)); + if is_schema != expected_schema { + bail!("{label}: schema-backed mismatch. expected {expected_schema}, found {is_schema}"); + } + } + + if let Some(expected_prov) = expectation.expected_provenance()? { + if discriminant(&fact.provenance) != discriminant(&expected_prov) { + bail!( + "{label}: provenance mismatch. expected {:?}, found {:?}", + expected_prov, + fact.provenance + ); + } + } + + Ok(()) +} + +trait FactExpectation { + fn expected_constant(&self) -> Result>; + fn expected_provenance(&self) -> Result>; + fn expect_schema_backed(&self) -> Option; +} + +impl FactExpectation for RuleExpectation { + fn expected_constant(&self) -> Result> { + Ok(self.constant.clone()) + } + + fn expected_provenance(&self) -> Result> { + parse_provenance(self.provenance.as_deref()) + } + + fn expect_schema_backed(&self) -> Option { + self.schema_backed + } +} + +impl FactExpectation for ExprExpectation { + fn expected_constant(&self) -> Result> { + Ok(self.constant.clone()) + } + + fn expected_provenance(&self) -> Result> { + parse_provenance(self.provenance.as_deref()) + } + + fn expect_schema_backed(&self) -> Option { + self.schema_backed + } +} + +fn parse_provenance(raw: Option<&str>) -> Result> { + let Some(raw) = raw else { + return Ok(None); + }; + + let prov = match raw { + "SchemaInput" => TypeProvenance::SchemaInput, + "SchemaData" => TypeProvenance::SchemaData, + "Literal" => TypeProvenance::Literal, + "Assignment" => TypeProvenance::Assignment, + "Propagated" => TypeProvenance::Propagated, + "Builtin" => TypeProvenance::Builtin, + "Rule" => TypeProvenance::Rule, + "Unknown" => TypeProvenance::Unknown, + other => bail!("unknown provenance `{other}`"), + }; + + Ok(Some(prov)) +} + +impl TypeExpectation { + fn matches(&self, descriptor: &TypeDescriptor) -> Result<()> { + match self { + TypeExpectation::Simple(name) => match_simple_type(name, descriptor), + TypeExpectation::Detailed(detail) => match_detailed_type(detail, descriptor), + } + } +} + +fn match_simple_type(expected: &str, descriptor: &TypeDescriptor) -> Result<()> { + let expected = expected.trim(); + let actual = descriptor_kind(descriptor); + if !equals_ignore_ascii_case(expected, actual) { + bail!("expected type `{expected}`, found `{actual}`"); + } + Ok(()) +} + +fn match_detailed_type( + detail: &DetailedTypeExpectation, + descriptor: &TypeDescriptor, +) -> Result<()> { + let actual = structural_view(descriptor); + let kind = detail.kind.trim(); + + match kind.to_ascii_lowercase().as_str() { + "any" => { + if !matches!(actual, StructuralType::Any) { + bail!("expected Any but found {}", structural_kind_name(&actual)); + } + } + "boolean" => require_kind("Boolean", &actual)?, + "number" => require_kind("Number", &actual)?, + "integer" => require_kind("Integer", &actual)?, + "string" => require_kind("String", &actual)?, + "null" => require_kind("Null", &actual)?, + "array" => { + let StructuralType::Array(inner) = actual else { + bail!("expected Array but found {}", structural_kind_name(&actual)); + }; + if let Some(expected_inner) = &detail.element { + expected_inner.matches(&TypeDescriptor::structural((*inner).clone()))?; + } + } + "set" => { + let StructuralType::Set(inner) = actual else { + bail!("expected Set but found {}", structural_kind_name(&actual)); + }; + if let Some(expected_inner) = &detail.element { + expected_inner.matches(&TypeDescriptor::structural((*inner).clone()))?; + } + } + "object" => { + let StructuralType::Object(shape) = actual else { + bail!( + "expected Object but found {}", + structural_kind_name(&actual) + ); + }; + if let Some(props) = &detail.properties { + for (name, expected_type) in props { + let Some(actual_field) = shape.fields.get(name) else { + bail!("expected object field `{name}` not found"); + }; + expected_type.matches(&TypeDescriptor::structural(actual_field.clone()))?; + } + } + } + "union" => { + let StructuralType::Union(actual_variants) = actual else { + bail!("expected Union but found {}", structural_kind_name(&actual)); + }; + + if let Some(expected_variants) = &detail.variants { + if actual_variants.len() != expected_variants.len() { + bail!( + "expected {} union variants but found {}", + expected_variants.len(), + actual_variants.len() + ); + } + + let mut remaining: Vec = actual_variants.clone(); + + for expected_variant in expected_variants { + let position = remaining.iter().position(|candidate| { + expected_variant + .matches(&TypeDescriptor::structural(candidate.clone())) + .is_ok() + }); + + if let Some(idx) = position { + remaining.remove(idx); + } else { + bail!( + "union variant {:?} not found in actual type", + expected_variant + ); + } + } + } + } + other => { + bail!("unknown detailed type kind `{other}`"); + } + } + + Ok(()) +} + +fn structural_view(descriptor: &TypeDescriptor) -> StructuralType { + match descriptor { + TypeDescriptor::Structural(ty) => ty.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + } +} + +fn descriptor_kind(descriptor: &TypeDescriptor) -> &'static str { + match descriptor { + TypeDescriptor::Structural(ty) => structural_kind_name(ty), + TypeDescriptor::Schema(_) => "Schema", + } +} + +fn structural_kind_name(ty: &StructuralType) -> &'static str { + match ty { + StructuralType::Any => "Any", + StructuralType::Boolean => "Boolean", + StructuralType::Number => "Number", + StructuralType::Integer => "Integer", + StructuralType::String => "String", + StructuralType::Null => "Null", + StructuralType::Array(_) => "Array", + StructuralType::Set(_) => "Set", + StructuralType::Object(_) => "Object", + StructuralType::Union(_) => "Union", + StructuralType::Enum(_) => "Enum", + StructuralType::Unknown => "Unknown", + } +} + +fn require_kind(expected: &str, actual: &StructuralType) -> Result<()> { + if !equals_ignore_ascii_case(expected, structural_kind_name(actual)) { + bail!( + "expected {expected} but found {}", + structural_kind_name(actual) + ); + } + Ok(()) +} + +fn equals_ignore_ascii_case(a: &str, b: &str) -> bool { + a.eq_ignore_ascii_case(b) +} + +fn format_diag_kind(kind: &crate::type_analysis::TypeDiagnosticKind) -> &'static str { + match kind { + crate::type_analysis::TypeDiagnosticKind::SchemaViolation => "SchemaViolation", + crate::type_analysis::TypeDiagnosticKind::InternalError => "InternalError", + crate::type_analysis::TypeDiagnosticKind::TypeMismatch => "TypeMismatch", + crate::type_analysis::TypeDiagnosticKind::UnreachableStatement => "UnreachableStatement", + } +} + +trait DescriptorExt { + fn descriptor(&self) -> &TypeDescriptor; +} + +impl DescriptorExt for TypeFact { + fn descriptor(&self) -> &TypeDescriptor { + &self.descriptor + } +} + +#[test_resources("tests/type_analysis/**/*.yaml")] +fn run(path: &str) { + yaml_test_impl(path).unwrap(); +} diff --git a/src/type_analysis.rs b/src/type_analysis.rs new file mode 100644 index 00000000..6825499f --- /dev/null +++ b/src/type_analysis.rs @@ -0,0 +1,47 @@ +//! Static type analysis and constant propagation for Regorus queries. +//! +//! The type analysis module walks Rego queries following the execution +//! order computed by the scheduler. For every expression it records a +//! lightweight type descriptor together with constant information when the +//! value can be statically determined. The results are stored in +//! [`Lookup`](crate::lookup::Lookup) tables so that other subsystems (the +//! VSĀ Code plugin, validation passes, or even the interpreter itself in the +//! future) can reuse the inferred information without mutating the AST. +//! +//! The implementation purposefully reuses existing building blocks: +//! +//! * [`schema`](crate::schema) provides the authoritative types for +//! `input` and `data` roots. +//! * [`lookup`](crate::lookup::Lookup) stores per-expression facts so that +//! we never need to modify nodes inside `ast`. +//! * [`scheduler`](crate::scheduler::Schedule) dictates the order in which +//! statements are analysed, matching the interpreter's execution model. +//! +//! The module is split across a couple of dedicated files so the concerns +//! remain focused: +//! +//! * `model.rs` contains the type representations shared across the +//! analysis. +//! * `context.rs` keeps the lookup tables and scoped environments used +//! while walking queries. +//! * `constants.rs` exposes a tiny helper for tracking constant values. +//! * `propagation/` contains the analyser pipeline split across smaller +//! units so orchestration, expression handling, and helpers stay focused. + +pub mod builtins; +pub mod constants; +pub mod context; +pub mod model; +pub mod propagation; +pub mod result; +pub(crate) mod value_utils; + +pub use constants::{ConstantFact, ConstantStore}; +pub use context::{LookupContext, ScopedBindings}; +pub use model::{ + ConstantValue, HybridType, HybridTypeKind, PathSegment, RuleAnalysis, RuleConstantState, + SourceOrigin, SourceRoot, StructuralObjectShape, StructuralType, TypeDescriptor, + TypeDiagnostic, TypeDiagnosticKind, TypeFact, TypeProvenance, +}; +pub use propagation::{AnalysisState, TypeAnalysisOptions, TypeAnalyzer}; +pub use result::TypeAnalysisResult; diff --git a/src/type_analysis/builtins/builtins.json b/src/type_analysis/builtins/builtins.json new file mode 100644 index 00000000..5f3f05d3 --- /dev/null +++ b/src/type_analysis/builtins/builtins.json @@ -0,0 +1,260 @@ +{ + "version": 1, + "groups": [ + { + "name": "core", + "builtins": [ + { "name": "abs", "return": { "kind": "number" }, "params": [ { "kind": "number" } ] }, + { "name": "ceil", "return": { "kind": "number" }, "params": [ { "kind": "number" } ] }, + { "name": "floor", "return": { "kind": "number" }, "params": [ { "kind": "number" } ] }, + { "name": "numbers.range", "return": { "kind": "array_any" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "numbers.range_step", "return": { "kind": "array_any" }, "params": [ { "kind": "integer" }, { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "round", "return": { "kind": "number" }, "params": [ { "kind": "number" } ] }, + { "name": "bits.and", "return": { "kind": "integer" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "bits.lsh", "return": { "kind": "integer" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "bits.negate", "return": { "kind": "integer" }, "params": [ { "kind": "integer" } ] }, + { "name": "bits.or", "return": { "kind": "integer" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "bits.rsh", "return": { "kind": "integer" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + { "name": "bits.xor", "return": { "kind": "integer" }, "params": [ { "kind": "integer" }, { "kind": "integer" } ] }, + + { "name": "array.concat", "return": { "kind": "array_any" }, "params": [ { "kind": "array_any" }, { "kind": "array_any" } ] }, + { "name": "array.reverse", "return": { "kind": "array_any" }, "params": [ { "kind": "array_any" } ] }, + { "name": "array.slice", "return": { "kind": "array_any" }, "params": [ { "kind": "array_any" }, { "kind": "number" }, { "kind": "number" } ] }, + + { "name": "count", "return": { "kind": "integer" }, "params": [ { "kind": "any" } ] }, + { "name": "max", "return": { "kind": "collection_element", "index": 0 }, "params": [ { "kind": "any" } ] }, + { "name": "min", "return": { "kind": "collection_element", "index": 0 }, "params": [ { "kind": "any" } ] }, + { "name": "product", "return": { "kind": "number" }, "params": [ { "kind": "any" } ] }, + { "name": "sort", "return": { "kind": "array_any" }, "params": [ { "kind": "any" } ] }, + { "name": "sum", "return": { "kind": "number" }, "params": [ { "kind": "any" } ] }, + + { "name": "intersection", "return": { "kind": "set_any" }, "params": [ { "kind": "set_any" } ] }, + { "name": "union", "return": { "kind": "set_any" }, "params": [ { "kind": "set_any" } ] }, + + { "name": "concat", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "any" } ] }, + { "name": "contains", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "endswith", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "format_int", "return": { "kind": "string" }, "params": [ { "kind": "number" }, { "kind": "number" } ] }, + { "name": "indexof", "return": { "kind": "integer" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "indexof_n", "return": { "kind": "array_any" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "lower", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "replace", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" }, { "kind": "string" } ] }, + { "name": "split", "return": { "kind": "array_any" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "sprintf", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "array_any" } ] }, + { "name": "startswith", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "strings.any_prefix_match", "return": { "kind": "boolean" }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "strings.any_suffix_match", "return": { "kind": "boolean" }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "strings.count", "return": { "kind": "integer" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "strings.replace_n", "return": { "kind": "string" }, "params": [ { "kind": "object_any" }, { "kind": "string" } ] }, + { "name": "strings.reverse", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "substring", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "number" }, { "kind": "number" } ] }, + { "name": "trim", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "trim_left", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "trim_prefix", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "trim_right", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "trim_space", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "trim_suffix", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "upper", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + + { "name": "json.filter", "return": { "kind": "same_as_argument", "index": 0 }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "json.is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] }, + { "name": "json.remove", "return": { "kind": "same_as_argument", "index": 0 }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "json.marshal", "return": { "kind": "string" }, "params": [ { "kind": "any" } ] }, + { "name": "json.marshal_with_options", "return": { "kind": "string" }, "params": [ { "kind": "any" }, { "kind": "object_any" } ] }, + { "name": "json.unmarshal", "return": { "kind": "any" }, "params": [ { "kind": "string" } ] }, + { "name": "object.filter", "return": { "kind": "object_any" }, "params": [ { "kind": "object_any" }, { "kind": "any" } ] }, + { "name": "object.get", "return": { "kind": "any" }, "params": [ { "kind": "object_any" }, { "kind": "any" }, { "kind": "any" } ] }, + { "name": "object.keys", "return": { "kind": "array_any" }, "params": [ { "kind": "object_any" } ] }, + { "name": "object.remove", "return": { "kind": "object_any" }, "params": [ { "kind": "object_any" }, { "kind": "any" } ] }, + { "name": "object.subset", "return": { "kind": "boolean" }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "object.union", "return": { "kind": "object_any" }, "params": [ { "kind": "object_any" }, { "kind": "object_any" } ] }, + { "name": "object.union_n", "return": { "kind": "object_any" }, "params": [ { "kind": "array_any" } ] }, + + { "name": "to_number", "return": { "kind": "number" }, "params": [ { "kind": "any" } ] }, + { "name": "trace", "purity": "impure", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "units.parse", "return": { "kind": "number" }, "params": [ { "kind": "string" } ] }, + { "name": "units.parse_bytes", "return": { "kind": "number" }, "params": [ { "kind": "string" } ] }, + + { "name": "is_array", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_boolean", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_null", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_number", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_object", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_set", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "is_string", "return": { "kind": "boolean" }, "params": [ { "kind": "any" } ] }, + { "name": "type_name", "return": { "kind": "string" }, "params": [ { "kind": "any" } ] } + ] + }, + { + "name": "std", + "requires": ["std"], + "builtins": [ + { + "name": "rand.intn", + "purity": "impure", + "cache": true, + "return": { "kind": "integer" }, + "params": [ { "kind": "string" }, { "kind": "number" } ] + } + ] + }, + { + "name": "base64", + "requires": ["base64"], + "builtins": [ + { "name": "base64.decode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "base64.encode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "base64.is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "base64url", + "requires": ["base64url"], + "builtins": [ + { "name": "base64url.decode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "base64url.encode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "base64url.encode_no_pad", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "glob", + "requires": ["glob"], + "builtins": [ + { "name": "glob.match", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "any" }, { "kind": "string" } ] }, + { "name": "glob.quote_meta", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "graph", + "requires": ["graph"], + "builtins": [ + { "name": "graph.reachable", "return": { "kind": "set_any" }, "params": [ { "kind": "object_any" }, { "kind": "any" } ] }, + { "name": "graph.reachable_paths", "return": { "kind": "set_any" }, "params": [ { "kind": "object_any" }, { "kind": "any" } ] }, + { "name": "walk", "return": { "kind": "array_any" }, "params": [ { "kind": "any" } ] } + ] + }, + { + "name": "hex", + "requires": ["hex"], + "builtins": [ + { "name": "hex.decode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "hex.encode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "http", + "requires": ["http"], + "builtins": [ + { "name": "http.send", "purity": "impure", "return": { "kind": "any" }, "params": [ { "kind": "any" } ] } + ] + }, + { + "name": "net", + "requires": ["net"], + "builtins": [ + { "name": "net.cidr_is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] }, + { "name": "net.cidr_contains", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" } ] } + ] + }, + { + "name": "opa-runtime", + "requires": ["opa-runtime"], + "builtins": [ + { + "name": "opa.runtime", + "purity": "impure", + "cache": true, + "return": { "kind": "object_any" }, + "params": [] + } + ] + }, + { + "name": "regex", + "requires": ["regex"], + "builtins": [ + { "name": "regex.find_all_string_submatch_n", "return": { "kind": "array_any" }, "params": [ { "kind": "string" }, { "kind": "string" }, { "kind": "number" } ] }, + { "name": "regex.find_n", "return": { "kind": "array_any" }, "params": [ { "kind": "string" }, { "kind": "string" }, { "kind": "number" } ] }, + { "name": "regex.is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] }, + { "name": "regex.match", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "regex.replace", "return": { "kind": "string" }, "params": [ { "kind": "string" }, { "kind": "string" }, { "kind": "string" } ] }, + { "name": "regex.split", "return": { "kind": "array_any" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "regex.template_match", "return": { "kind": "boolean" }, "params": [ { "kind": "string" }, { "kind": "string" }, { "kind": "string" }, { "kind": "string" } ] } + ] + }, + { + "name": "semver", + "requires": ["semver"], + "builtins": [ + { "name": "semver.compare", "return": { "kind": "integer" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "semver.is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "time", + "requires": ["time"], + "builtins": [ + { "name": "time.add_date", "return": { "kind": "number" }, "params": [ { "kind": "any" }, { "kind": "number" }, { "kind": "number" }, { "kind": "number" } ] }, + { "name": "time.clock", "return": { "kind": "array_any" }, "params": [ { "kind": "any" } ] }, + { "name": "time.date", "return": { "kind": "array_any" }, "params": [ { "kind": "any" } ] }, + { "name": "time.diff", "return": { "kind": "array_any" }, "params": [ { "kind": "any" }, { "kind": "any" } ] }, + { "name": "time.format", "return": { "kind": "string" }, "params": [ { "kind": "any" } ] }, + { "name": "time.now_ns", "purity": "impure", "cache": true, "return": { "kind": "number" }, "params": [] }, + { "name": "time.parse_duration_ns", "return": { "kind": "number" }, "params": [ { "kind": "string" } ] }, + { "name": "time.parse_ns", "return": { "kind": "number" }, "params": [ { "kind": "string" }, { "kind": "string" } ] }, + { "name": "time.parse_rfc3339_ns", "return": { "kind": "number" }, "params": [ { "kind": "string" } ] }, + { "name": "time.weekday", "return": { "kind": "string" }, "params": [ { "kind": "any" } ] } + ] + }, + { + "name": "urlquery", + "requires": ["urlquery"], + "builtins": [ + { "name": "urlquery.decode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "urlquery.decode_object", "return": { "kind": "object_any" }, "params": [ { "kind": "string" } ] }, + { "name": "urlquery.encode", "return": { "kind": "string" }, "params": [ { "kind": "string" } ] }, + { "name": "urlquery.encode_object", "return": { "kind": "string" }, "params": [ { "kind": "object_any" } ] } + ] + }, + { + "name": "uuid", + "requires": ["uuid"], + "builtins": [ + { "name": "uuid.parse", "return": { "kind": "object_any" }, "params": [ { "kind": "string" } ] }, + { "name": "uuid.rfc4122", "purity": "impure", "cache": true, "return": { "kind": "string" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "yaml", + "requires": ["yaml"], + "builtins": [ + { "name": "yaml.is_valid", "return": { "kind": "boolean" }, "params": [ { "kind": "string" } ] }, + { "name": "yaml.marshal", "return": { "kind": "string" }, "params": [ { "kind": "any" } ] }, + { "name": "yaml.unmarshal", "return": { "kind": "any" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "opa-testutil", + "requires": ["opa-testutil"], + "builtins": [ + { "name": "test.sleep", "purity": "impure", "return": { "kind": "null" }, "params": [ { "kind": "string" } ] } + ] + }, + { + "name": "jsonschema", + "requires": ["jsonschema"], + "builtins": [ + { + "name": "json.match_schema", + "return": { "kind": "array_any" }, + "params": [ { "kind": "any" }, { "kind": "any" } ] + }, + { + "name": "json.verify_schema", + "return": { "kind": "array_any" }, + "params": [ { "kind": "any" } ] + } + ] + } + ] +} diff --git a/src/type_analysis/builtins/catalog.rs b/src/type_analysis/builtins/catalog.rs new file mode 100644 index 00000000..e38da057 --- /dev/null +++ b/src/type_analysis/builtins/catalog.rs @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::string::String; +use alloc::vec::Vec; + +use serde::Deserialize; + +use super::spec::{BuiltinPurity, BuiltinTableError, BuiltinTypeTemplate}; + +#[derive(Debug, Deserialize, Default)] +pub(super) struct BuiltinCatalog { + #[serde(default)] + pub(super) groups: Vec, + #[serde(default)] + pub(super) builtins: Vec, +} + +#[derive(Debug, Deserialize, Default)] +pub(super) struct BuiltinGroupConfig { + #[allow(dead_code)] + pub(super) name: String, + #[serde(default)] + pub(super) requires: Vec, + #[serde(default)] + pub(super) builtins: Vec, +} + +impl BuiltinGroupConfig { + pub(super) fn is_enabled(&self) -> Result { + for feature in self.requires.iter() { + if !feature_active(feature)? { + return Ok(false); + } + } + Ok(true) + } +} + +#[derive(Debug, Deserialize, Clone)] +pub(super) struct BuiltinConfig { + pub(super) name: String, + #[serde(default)] + pub(super) purity: Option, + #[serde(default)] + pub(super) cache: bool, + #[serde(default)] + pub(super) params: Vec, + #[serde(rename = "return")] + pub(super) return_template: TemplateConfig, +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub(super) enum PurityConfig { + Pure, + Impure, +} + +impl PurityConfig { + pub(super) fn into_purity(self) -> BuiltinPurity { + match self { + PurityConfig::Pure => BuiltinPurity::Pure, + PurityConfig::Impure => BuiltinPurity::Impure, + } + } +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub(super) enum TemplateConfig { + Any, + Boolean, + Number, + Integer, + String, + Null, + ArrayAny, + SetAny, + ObjectAny, + SameAsArgument { index: u8 }, + CollectionElement { index: u8 }, +} + +impl TemplateConfig { + pub(super) fn as_template(self) -> BuiltinTypeTemplate { + match self { + TemplateConfig::Any => BuiltinTypeTemplate::Any, + TemplateConfig::Boolean => BuiltinTypeTemplate::Boolean, + TemplateConfig::Number => BuiltinTypeTemplate::Number, + TemplateConfig::Integer => BuiltinTypeTemplate::Integer, + TemplateConfig::String => BuiltinTypeTemplate::String, + TemplateConfig::Null => BuiltinTypeTemplate::Null, + TemplateConfig::ArrayAny => BuiltinTypeTemplate::ArrayAny, + TemplateConfig::SetAny => BuiltinTypeTemplate::SetAny, + TemplateConfig::ObjectAny => BuiltinTypeTemplate::ObjectAny, + TemplateConfig::SameAsArgument { index } => BuiltinTypeTemplate::SameAsArgument(index), + TemplateConfig::CollectionElement { index } => { + BuiltinTypeTemplate::CollectionElement(index) + } + } + } +} + +pub(super) fn feature_active(feature: &str) -> Result { + match feature { + "" | "core" => Ok(true), + "azure_policy" => Ok(cfg!(feature = "azure_policy")), + "std" => Ok(cfg!(feature = "std")), + "jsonschema" => Ok(cfg!(feature = "jsonschema")), + "base64" => Ok(cfg!(feature = "base64")), + "base64url" => Ok(cfg!(feature = "base64url")), + "glob" => Ok(cfg!(feature = "glob")), + "graph" => Ok(cfg!(feature = "graph")), + "hex" => Ok(cfg!(feature = "hex")), + "http" => Ok(cfg!(feature = "http")), + "net" => Ok(cfg!(feature = "net")), + "opa-runtime" => Ok(cfg!(feature = "opa-runtime")), + "regex" => Ok(cfg!(feature = "regex")), + "semver" => Ok(cfg!(feature = "semver")), + "time" => Ok(cfg!(feature = "time")), + "urlquery" => Ok(cfg!(feature = "urlquery")), + "uuid" => Ok(cfg!(feature = "uuid")), + "yaml" => Ok(cfg!(feature = "yaml")), + "opa-testutil" => Ok(cfg!(feature = "opa-testutil")), + other => Err(BuiltinTableError::UnknownFeature(other.to_owned())), + } +} diff --git a/src/type_analysis/builtins/matching.rs b/src/type_analysis/builtins/matching.rs new file mode 100644 index 00000000..94d72b44 --- /dev/null +++ b/src/type_analysis/builtins/matching.rs @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::vec::Vec; + +use crate::schema::Type as SchemaType; +use crate::type_analysis::model::{HybridType, SourceOrigin, StructuralType, TypeDescriptor}; + +use super::spec::BuiltinTypeTemplate; + +pub fn matches_template( + arg: &HybridType, + template: BuiltinTypeTemplate, + args: &[HybridType], +) -> bool { + match template { + BuiltinTypeTemplate::Any => true, + BuiltinTypeTemplate::Boolean => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::Number => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::Integer => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::String => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::Null => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::ArrayAny => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::SetAny => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::ObjectAny => descriptor_matches(&arg.fact.descriptor, template), + BuiltinTypeTemplate::SameAsArgument(idx) => args + .get(idx as usize) + .map(|other| descriptors_compatible(&arg.fact.descriptor, &other.fact.descriptor)) + .unwrap_or(true), + BuiltinTypeTemplate::CollectionElement(_) => true, + } +} + +fn descriptor_matches(descriptor: &TypeDescriptor, template: BuiltinTypeTemplate) -> bool { + match descriptor { + TypeDescriptor::Structural(structural) => structural_matches(structural, template), + TypeDescriptor::Schema(schema) => schema_matches(schema.as_type(), template), + } +} + +fn structural_matches(ty: &StructuralType, template: BuiltinTypeTemplate) -> bool { + match template { + BuiltinTypeTemplate::Any => true, + BuiltinTypeTemplate::Boolean => matches!(ty, StructuralType::Boolean | StructuralType::Any), + BuiltinTypeTemplate::Number => matches!( + ty, + StructuralType::Number | StructuralType::Integer | StructuralType::Any + ), + BuiltinTypeTemplate::Integer => matches!(ty, StructuralType::Integer | StructuralType::Any), + BuiltinTypeTemplate::String => matches!(ty, StructuralType::String | StructuralType::Any), + BuiltinTypeTemplate::Null => matches!(ty, StructuralType::Null | StructuralType::Any), + BuiltinTypeTemplate::ArrayAny => { + matches!(ty, StructuralType::Array(_) | StructuralType::Any) + } + BuiltinTypeTemplate::SetAny => matches!(ty, StructuralType::Set(_) | StructuralType::Any), + BuiltinTypeTemplate::ObjectAny => { + matches!(ty, StructuralType::Object(_) | StructuralType::Any) + } + BuiltinTypeTemplate::SameAsArgument(_) | BuiltinTypeTemplate::CollectionElement(_) => true, + } +} + +fn schema_matches(schema_type: &SchemaType, template: BuiltinTypeTemplate) -> bool { + match schema_type { + SchemaType::Any { .. } => true, + SchemaType::Boolean { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::Boolean + ), + SchemaType::Integer { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::Integer | BuiltinTypeTemplate::Number + ), + SchemaType::Number { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::Number + ), + SchemaType::Null { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::Null + ), + SchemaType::String { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::String + ), + SchemaType::Array { items, .. } => { + matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::ArrayAny + ) || schema_matches(items.as_type(), template) + } + SchemaType::Set { items, .. } => { + matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::SetAny + ) || schema_matches(items.as_type(), template) + } + SchemaType::Object { .. } => matches!( + template, + BuiltinTypeTemplate::Any | BuiltinTypeTemplate::ObjectAny + ), + SchemaType::AnyOf(variants) => variants + .iter() + .any(|variant| schema_matches(variant.as_type(), template)), + SchemaType::Const { .. } | SchemaType::Enum { .. } => true, + } +} + +fn descriptors_compatible(lhs: &TypeDescriptor, rhs: &TypeDescriptor) -> bool { + matches!( + (lhs, rhs), + (TypeDescriptor::Structural(_), TypeDescriptor::Structural(_)) + | (TypeDescriptor::Schema(_), TypeDescriptor::Schema(_)) + ) +} + +pub fn combined_arg_origins(args: &[HybridType]) -> Vec { + let mut origins = Vec::new(); + for arg in args { + origins.extend(arg.fact.origins.iter().cloned()); + } + origins +} diff --git a/src/type_analysis/builtins/mod.rs b/src/type_analysis/builtins/mod.rs new file mode 100644 index 00000000..264f4c9d --- /dev/null +++ b/src/type_analysis/builtins/mod.rs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod catalog; +mod matching; +mod spec; +mod table; + +pub use matching::{combined_arg_origins, matches_template}; +pub use spec::return_descriptor; +pub use spec::{BuiltinPurity, BuiltinSpec, BuiltinTableError, BuiltinTypeTemplate}; +pub use table::{lookup, override_builtin_table, reset_builtin_table}; diff --git a/src/type_analysis/builtins/spec.rs b/src/type_analysis/builtins/spec.rs new file mode 100644 index 00000000..9ebcf30d --- /dev/null +++ b/src/type_analysis/builtins/spec.rs @@ -0,0 +1,260 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; +use core::convert::TryFrom; +use core::fmt; + +use crate::schema::Type; +use crate::type_analysis::model::{ + HybridType, StructuralObjectShape, StructuralType, TypeDescriptor, +}; + +use super::catalog::{BuiltinConfig, PurityConfig, TemplateConfig}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BuiltinPurity { + Pure, + Impure, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BuiltinTypeTemplate { + Any, + Boolean, + Number, + Integer, + String, + Null, + ArrayAny, + SetAny, + ObjectAny, + SameAsArgument(u8), + CollectionElement(u8), +} + +fn descriptor_from_template(template: BuiltinTypeTemplate, args: &[HybridType]) -> TypeDescriptor { + match template { + BuiltinTypeTemplate::Any => TypeDescriptor::structural(StructuralType::Any), + BuiltinTypeTemplate::Boolean => TypeDescriptor::structural(StructuralType::Boolean), + BuiltinTypeTemplate::Number => TypeDescriptor::structural(StructuralType::Number), + BuiltinTypeTemplate::Integer => TypeDescriptor::structural(StructuralType::Integer), + BuiltinTypeTemplate::String => TypeDescriptor::structural(StructuralType::String), + BuiltinTypeTemplate::Null => TypeDescriptor::structural(StructuralType::Null), + BuiltinTypeTemplate::ArrayAny => { + TypeDescriptor::structural(StructuralType::Array(Box::new(StructuralType::Any))) + } + BuiltinTypeTemplate::SetAny => { + TypeDescriptor::structural(StructuralType::Set(Box::new(StructuralType::Any))) + } + BuiltinTypeTemplate::ObjectAny => { + TypeDescriptor::structural(StructuralType::Object(StructuralObjectShape::new())) + } + BuiltinTypeTemplate::SameAsArgument(idx) => args + .get(idx as usize) + .map(|arg| arg.fact.descriptor.clone()) + .unwrap_or_else(|| TypeDescriptor::structural(StructuralType::Any)), + BuiltinTypeTemplate::CollectionElement(idx) => args + .get(idx as usize) + .map(collection_element_descriptor) + .unwrap_or_else(|| TypeDescriptor::structural(StructuralType::Any)), + } +} + +fn collection_element_descriptor(arg: &HybridType) -> TypeDescriptor { + match &arg.fact.descriptor { + TypeDescriptor::Structural(structural) => match structural { + StructuralType::Array(inner) | StructuralType::Set(inner) => { + TypeDescriptor::structural((**inner).clone()) + } + _ => TypeDescriptor::structural(StructuralType::Any), + }, + TypeDescriptor::Schema(schema) => match schema.as_type() { + Type::Array { items, .. } | Type::Set { items, .. } => { + TypeDescriptor::schema(items.clone()) + } + _ => TypeDescriptor::structural(StructuralType::Any), + }, + } +} + +#[derive(Clone, Debug)] +pub struct BuiltinSpec { + purity: BuiltinPurity, + return_template: BuiltinTypeTemplate, + params: Option>, + param_count: u8, + must_cache: bool, +} + +impl BuiltinSpec { + fn from_parts( + purity: BuiltinPurity, + return_template: BuiltinTypeTemplate, + params: Option>, + param_count: u8, + must_cache: bool, + ) -> Self { + BuiltinSpec { + purity, + return_template, + params, + param_count, + must_cache, + } + } + + pub(super) fn from_config(name: &str, cfg: &BuiltinConfig) -> Result { + let params_vec: Vec = cfg + .params + .iter() + .copied() + .map(TemplateConfig::as_template) + .collect(); + + let param_len = params_vec.len(); + let param_count = + u8::try_from(param_len).map_err(|_| BuiltinTableError::TooManyParameters { + builtin: name.to_owned(), + count: param_len, + })?; + + let params = Some(params_vec.into_boxed_slice()); + let purity = cfg.purity.unwrap_or(PurityConfig::Pure).into_purity(); + + let spec = BuiltinSpec::from_parts( + purity, + cfg.return_template.as_template(), + params, + param_count, + cfg.cache, + ); + + spec.validate_template_indices(name)?; + Ok(spec) + } + + fn validate_template_indices(&self, name: &str) -> Result<(), BuiltinTableError> { + let param_total = self.param_count as usize; + + match self.return_template { + BuiltinTypeTemplate::SameAsArgument(idx) + | BuiltinTypeTemplate::CollectionElement(idx) + if idx as usize >= param_total => + { + return Err(BuiltinTableError::InvalidTemplate { + builtin: name.to_owned(), + detail: format!( + "return template references argument {idx} but only {param_total} parameters defined", + ), + }); + } + _ => {} + } + + if let Some(params) = &self.params { + for (position, template) in params.iter().enumerate() { + match template { + BuiltinTypeTemplate::SameAsArgument(idx) + | BuiltinTypeTemplate::CollectionElement(idx) + if *idx as usize >= param_total => + { + return Err(BuiltinTableError::InvalidTemplate { + builtin: name.to_owned(), + detail: format!( + "parameter template at index {position} references argument {idx} but only {param_total} parameters defined", + ), + }); + } + _ => {} + } + } + } + + Ok(()) + } + + pub const fn fallback(param_count: u8) -> Self { + BuiltinSpec { + purity: BuiltinPurity::Impure, + return_template: BuiltinTypeTemplate::Any, + params: None, + param_count, + must_cache: false, + } + } + + pub fn return_descriptor(&self, args: &[HybridType]) -> TypeDescriptor { + descriptor_from_template(self.return_template, args) + } + + pub fn params(&self) -> Option<&[BuiltinTypeTemplate]> { + self.params.as_deref() + } + + pub fn param_count(&self) -> u8 { + self.param_count + } + + pub fn is_pure(&self) -> bool { + matches!(self.purity, BuiltinPurity::Pure) + } + + pub fn must_cache(&self) -> bool { + self.must_cache + } +} + +#[derive(Debug)] +pub enum BuiltinTableError { + Parse(serde_json::Error), + DuplicateBuiltin(String), + UnknownFeature(String), + TooManyParameters { builtin: String, count: usize }, + InvalidTemplate { builtin: String, detail: String }, +} + +impl fmt::Display for BuiltinTableError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BuiltinTableError::Parse(err) => write!(f, "failed to parse builtin table: {err}"), + BuiltinTableError::DuplicateBuiltin(name) => { + write!(f, "duplicate builtin entry `{name}` in table") + } + BuiltinTableError::UnknownFeature(feature) => { + write!(f, "unknown feature `{feature}` referenced in builtin table") + } + BuiltinTableError::TooManyParameters { builtin, count } => write!( + f, + "builtin `{builtin}` declares {count} parameters which exceeds supported limit" + ), + BuiltinTableError::InvalidTemplate { builtin, detail } => { + write!(f, "builtin `{builtin}` has invalid template: {detail}") + } + } + } +} + +impl From for BuiltinTableError { + fn from(err: serde_json::Error) -> Self { + BuiltinTableError::Parse(err) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuiltinTableError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + BuiltinTableError::Parse(err) => Some(err), + _ => None, + } + } +} + +pub fn return_descriptor(template: BuiltinTypeTemplate, args: &[HybridType]) -> TypeDescriptor { + descriptor_from_template(template, args) +} diff --git a/src/type_analysis/builtins/table.rs b/src/type_analysis/builtins/table.rs new file mode 100644 index 00000000..5fd1959f --- /dev/null +++ b/src/type_analysis/builtins/table.rs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use alloc::collections::BTreeMap; +use alloc::string::String; +use alloc::vec::Vec; +use core::ptr; +use core::sync::atomic::{AtomicPtr, Ordering}; + +use crate::builtins; + +use lazy_static::lazy_static; + +use super::catalog::{BuiltinCatalog, BuiltinGroupConfig}; +use super::spec::{BuiltinSpec, BuiltinTableError}; + +const DEFAULT_BUILTINS_JSON: &str = include_str!("./builtins.json"); + +#[derive(Default)] +pub(super) struct BuiltinTable { + entries: BTreeMap, +} + +impl BuiltinTable { + fn from_json(json: &str) -> Result { + let mut catalog: BuiltinCatalog = serde_json::from_str(json)?; + + if !catalog.builtins.is_empty() { + catalog.groups.push(BuiltinGroupConfig { + name: "default".to_owned(), + requires: Vec::new(), + builtins: catalog.builtins, + }); + catalog.builtins = Vec::new(); + } + + let mut entries = BTreeMap::new(); + + for group in catalog.groups.into_iter() { + if !group.is_enabled()? { + continue; + } + + for builtin in group.builtins.iter() { + let spec = BuiltinSpec::from_config(&builtin.name, builtin)?; + if entries.insert(builtin.name.clone(), spec).is_some() { + return Err(BuiltinTableError::DuplicateBuiltin(builtin.name.clone())); + } + } + } + + Ok(BuiltinTable { entries }) + } + + fn lookup(&self, name: &str) -> Option { + self.entries.get(name).cloned() + } +} + +lazy_static! { + static ref DEFAULT_TABLE: BuiltinTable = BuiltinTable::from_json(DEFAULT_BUILTINS_JSON) + .expect("failed to load default builtin specifications"); +} + +static CUSTOM_TABLE: AtomicPtr = AtomicPtr::new(ptr::null_mut()); + +pub fn override_builtin_table(json: &str) -> Result<(), BuiltinTableError> { + let table = Box::new(BuiltinTable::from_json(json)?); + set_custom_table(Some(table)); + Ok(()) +} + +pub fn reset_builtin_table() { + set_custom_table(None); +} + +fn set_custom_table(table: Option>) { + let new_ptr = match table { + Some(table) => Box::into_raw(table), + None => ptr::null_mut(), + }; + + let old_ptr = CUSTOM_TABLE.swap(new_ptr, Ordering::SeqCst); + if !old_ptr.is_null() { + // Safety: pointer was created with Box::into_raw. + unsafe { drop(Box::from_raw(old_ptr)) }; + } +} + +fn active_table() -> &'static BuiltinTable { + let custom_ptr = CUSTOM_TABLE.load(Ordering::SeqCst); + if custom_ptr.is_null() { + &DEFAULT_TABLE + } else { + // Safety: pointer originates from Box::into_raw and lives until reset/override. + unsafe { &*custom_ptr } + } +} + +pub fn lookup(name: &str) -> Option { + if let Some(spec) = active_table().lookup(name) { + return Some(spec); + } + + if let Some((_, nargs)) = builtins::BUILTINS.get(name) { + return Some(BuiltinSpec::fallback(*nargs)); + } + + None +} diff --git a/src/type_analysis/constants.rs b/src/type_analysis/constants.rs new file mode 100644 index 00000000..2e808854 --- /dev/null +++ b/src/type_analysis/constants.rs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +use crate::{lookup::Lookup, value::Value}; + +/// Constant facts are stored alongside type information so that other tooling +/// (for example the VSĀ Code extension) can query them quickly without having +/// to recompute constants or evaluate expressions. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct ConstantFact { + pub value: Option, +} + +/// Lightweight wrapper around a [`Lookup`] to store constants per expression. +#[derive(Clone, Default, Debug)] +pub struct ConstantStore { + table: Lookup, +} + +impl ConstantStore { + pub fn new() -> Self { + ConstantStore { + table: Lookup::new(), + } + } + + pub fn ensure_capacity(&mut self, module_idx: u32, expr_idx: u32) { + self.table.ensure_capacity(module_idx, expr_idx); + } + + pub fn record(&mut self, module_idx: u32, expr_idx: u32, value: Option) { + self.table.set(module_idx, expr_idx, ConstantFact { value }); + } + + pub fn get(&self, module_idx: u32, expr_idx: u32) -> Option<&ConstantFact> { + self.table.get(module_idx, expr_idx) + } + + pub fn into_lookup(self) -> Lookup { + self.table + } +} diff --git a/src/type_analysis/context.rs b/src/type_analysis/context.rs new file mode 100644 index 00000000..0623b701 --- /dev/null +++ b/src/type_analysis/context.rs @@ -0,0 +1,213 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +use alloc::{borrow::ToOwned, collections::BTreeMap, string::String, vec::Vec}; + +use crate::lookup::Lookup; + +use super::model::{TypeFact, TypeProvenance}; + +/// Central storage for the types inferred for each expression. +#[derive(Clone, Default, Debug)] +pub struct LookupContext { + expr_types: Lookup, + expr_rule_references: Lookup>, + /// Current rule being analyzed (for dependency tracking) + current_rule: Option, + /// Set of reachable rules (when entrypoint filtering is enabled) + reachable_rules: BTreeMap, + /// Dynamic reference patterns discovered during analysis + dynamic_references: Vec, +} + +/// Pattern for a dynamic data/rule reference +#[derive(Clone, Debug)] +pub struct DynamicReferencePattern { + /// Static prefix components (e.g., ["data", "pkg"]) + pub static_prefix: Vec, + /// Full pattern with wildcards (e.g., "data.pkg.*.rule") + pub pattern: String, + /// Module and expression where this reference occurs + pub location: (u32, u32), +} + +impl LookupContext { + pub fn new() -> Self { + LookupContext { + expr_types: Lookup::new(), + expr_rule_references: Lookup::new(), + current_rule: None, + reachable_rules: BTreeMap::new(), + dynamic_references: Vec::new(), + } + } + + pub fn ensure_expr_capacity(&mut self, module_idx: u32, max_expr_idx: u32) { + // Ensure the module has at least an empty slot so cross-module lookups + // cannot underflow, even when the module has zero expressions. + if self.expr_types.module_len() <= module_idx as usize { + self.expr_types.push_module(Vec::new()); + } + if self.expr_rule_references.module_len() <= module_idx as usize { + self.expr_rule_references.push_module(Vec::new()); + } + + self.expr_types.ensure_capacity(module_idx, max_expr_idx); + self.expr_rule_references + .ensure_capacity(module_idx, max_expr_idx); + } + + pub fn record_expr(&mut self, module_idx: u32, expr_idx: u32, fact: TypeFact) { + self.expr_types.set(module_idx, expr_idx, fact); + } + + pub fn get_expr(&self, module_idx: u32, expr_idx: u32) -> Option<&TypeFact> { + self.expr_types.get_checked(module_idx, expr_idx) + } + + pub fn expr_types(&self) -> &Lookup { + &self.expr_types + } + + pub fn expr_types_mut(&mut self) -> &mut Lookup { + &mut self.expr_types + } + + pub fn record_rule_reference(&mut self, module_idx: u32, expr_idx: u32, rule_path: String) { + self.expr_rule_references + .ensure_capacity(module_idx, expr_idx); + let slot = self.expr_rule_references.get_mut(module_idx, expr_idx); + let entry = slot.get_or_insert_with(Vec::new); + if !entry.contains(&rule_path) { + entry.push(rule_path); + } + } + + pub fn get_rule_references(&self, module_idx: u32, expr_idx: u32) -> Option<&[String]> { + self.expr_rule_references + .get_checked(module_idx, expr_idx) + .map(|vec| vec.as_slice()) + } + + /// Set the current rule being analyzed + pub fn set_current_rule(&mut self, rule_path: Option) { + self.current_rule = rule_path; + } + + pub fn current_rule(&self) -> Option<&str> { + self.current_rule.as_deref() + } + + /// Mark a rule as reachable + pub fn mark_reachable(&mut self, rule_path: String) { + self.reachable_rules.insert(rule_path, true); + } + + /// Check if a rule is reachable + pub fn is_reachable(&self, rule_path: &str) -> bool { + self.reachable_rules + .get(rule_path) + .copied() + .unwrap_or(false) + } + + /// Get all reachable rules + pub fn reachable_rules(&self) -> impl Iterator { + self.reachable_rules.keys() + } + + /// Record a dynamic reference pattern + pub fn record_dynamic_reference( + &mut self, + static_prefix: Vec, + pattern: String, + module_idx: u32, + expr_idx: u32, + ) { + self.dynamic_references.push(DynamicReferencePattern { + static_prefix, + pattern, + location: (module_idx, expr_idx), + }); + } + + /// Get all dynamic reference patterns + pub fn dynamic_references(&self) -> &[DynamicReferencePattern] { + &self.dynamic_references + } +} + +/// Scoped variable bindings collected while walking a query. +#[derive(Clone, Default, Debug)] +pub struct ScopedBindings { + stack: Vec>, +} + +impl ScopedBindings { + pub fn new() -> Self { + ScopedBindings { stack: Vec::new() } + } + + pub fn push_scope(&mut self) { + self.stack.push(BTreeMap::new()); + } + + pub fn pop_scope(&mut self) { + self.stack.pop(); + } + + pub fn assign(&mut self, name: String, fact: TypeFact) { + if let Some(scope) = self.stack.last_mut() { + scope.insert(name, fact); + } + } + + pub fn ensure_root_scope(&mut self) { + if self.stack.is_empty() { + self.push_scope(); + } + } + + pub fn lookup(&self, name: &str) -> Option<&TypeFact> { + for scope in self.stack.iter().rev() { + if let Some(fact) = scope.get(name) { + return Some(fact); + } + } + None + } + + pub fn assign_if_absent(&mut self, name: String, fact: TypeFact) { + self.ensure_root_scope(); + if self.lookup(&name).is_none() { + self.assign(name, fact); + } + } + + pub fn assign_propagated(&mut self, name: &str, fact: &TypeFact) { + self.ensure_root_scope(); + if let Some(scope) = self.stack.last_mut() { + scope.insert( + name.to_owned(), + TypeFact { + descriptor: fact.descriptor.clone(), + constant: fact.constant.clone(), + provenance: TypeProvenance::Propagated, + origins: fact.origins.clone(), + specialization_hits: fact.specialization_hits.clone(), + }, + ); + } + } + + pub fn binding_names(&self) -> Vec { + let mut names = Vec::new(); + for scope in &self.stack { + for key in scope.keys() { + if !names.contains(key) { + names.push(key.clone()); + } + } + } + names + } +} diff --git a/src/type_analysis/model.rs b/src/type_analysis/model.rs new file mode 100644 index 00000000..bd79b7d8 --- /dev/null +++ b/src/type_analysis/model.rs @@ -0,0 +1,452 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +use alloc::borrow::ToOwned; +use alloc::{ + boxed::Box, + collections::BTreeMap, + format, + string::{String, ToString}, + vec::Vec, +}; + +use crate::{schema::Schema, value::Value, Rc}; + +/// Structural type information that is lightweight to manipulate while +/// propagating types through expressions. +#[derive(Clone, Debug, PartialEq)] +pub enum StructuralType { + Any, + Boolean, + Number, + Integer, + String, + Null, + Array(Box), + Set(Box), + Object(StructuralObjectShape), + Union(Vec), + Enum(Vec), + Unknown, +} + +impl StructuralType { + pub fn any() -> Self { + StructuralType::Any + } + + pub fn boolean() -> Self { + StructuralType::Boolean + } + + pub fn from_schema(schema: &Schema) -> Self { + use crate::schema::Type; + match schema.as_type() { + Type::Any { .. } => StructuralType::Any, + Type::Boolean { .. } => StructuralType::Boolean, + Type::Integer { .. } => StructuralType::Integer, + Type::Number { .. } => StructuralType::Number, + Type::Null { .. } => StructuralType::Null, + Type::String { .. } => StructuralType::String, + Type::Array { items, .. } => { + StructuralType::Array(Box::new(StructuralType::from_schema(items))) + } + Type::Set { items, .. } => { + StructuralType::Set(Box::new(StructuralType::from_schema(items))) + } + Type::Object { properties, .. } => { + let mut shape = BTreeMap::new(); + for (name, prop_schema) in properties.iter() { + shape.insert(name.to_string(), StructuralType::from_schema(prop_schema)); + } + StructuralType::Object(StructuralObjectShape { fields: shape }) + } + Type::Enum { values, .. } => StructuralType::Enum((**values).clone()), + Type::AnyOf(_) | Type::Const { .. } => StructuralType::Any, + } + } +} + +/// Additional information about a structural object, namely the shape of +/// known fields. The analyser purposely keeps this light-weight – we only +/// track the fields that have been observed so far. +#[derive(Clone, Debug, PartialEq)] +pub struct StructuralObjectShape { + pub fields: BTreeMap, +} + +impl Default for StructuralObjectShape { + fn default() -> Self { + Self::new() + } +} + +impl StructuralObjectShape { + pub fn new() -> Self { + StructuralObjectShape { + fields: BTreeMap::new(), + } + } +} + +/// The primary descriptor for a type fact associated with an expression. +#[derive(Clone, Debug)] +pub enum TypeDescriptor { + Schema(Schema), + Structural(StructuralType), +} + +impl TypeDescriptor { + pub fn schema(schema: Schema) -> Self { + TypeDescriptor::Schema(schema) + } + + pub fn structural(ty: StructuralType) -> Self { + TypeDescriptor::Structural(ty) + } + + pub fn as_schema(&self) -> Option<&Schema> { + match self { + TypeDescriptor::Schema(s) => Some(s), + _ => None, + } + } +} + +/// Where did a particular type fact originate from. +#[derive(Clone, Debug)] +pub enum TypeProvenance { + SchemaInput, + SchemaData, + Literal, + Assignment, + Propagated, + Builtin, + Rule, + Unknown, +} + +/// Root source for values propagated through type analysis. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SourceRoot { + Input, + Data, +} + +/// Segment within an origin path. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum PathSegment { + Field(String), + Index(usize), + Any, +} + +/// Captures provenance path information for a value. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SourceOrigin { + pub root: SourceRoot, + pub path: Vec, + pub derived: bool, +} + +impl SourceOrigin { + pub fn new(root: SourceRoot) -> Self { + SourceOrigin { + root, + path: Vec::new(), + derived: false, + } + } + + pub fn from_path(root: SourceRoot, path: Vec, derived: bool) -> Self { + SourceOrigin { + root, + path, + derived, + } + } + + pub fn mark_derived(mut self) -> Self { + self.derived = true; + self + } + + pub fn with_segment(mut self, segment: PathSegment) -> Self { + self.path.push(segment); + self + } +} + +/// Constant information attached to a type fact. +#[derive(Clone, Debug)] +pub enum ConstantValue { + Known(Value), + Unknown, +} + +impl ConstantValue { + pub fn known(value: Value) -> Self { + ConstantValue::Known(value) + } + + pub fn unknown() -> Self { + ConstantValue::Unknown + } + + pub fn as_value(&self) -> Option<&Value> { + match self { + ConstantValue::Known(v) => Some(v), + ConstantValue::Unknown => None, + } + } +} + +/// Canonical representation of a rule specialization signature. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct RuleSpecializationSignature { + pub module_idx: u32, + pub rule_idx: usize, + pub arguments: Vec, +} + +/// Canonicalized argument data used within a specialization signature. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct SpecializationArgument { + pub descriptor_key: String, + pub constant: Option, +} + +/// Metadata describing which specializations contributed to a type fact. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SpecializationHit { + pub signature: RuleSpecializationSignature, +} + +/// Combined fact stored in lookup tables for each expression. +#[derive(Clone, Debug)] +pub struct TypeFact { + pub descriptor: TypeDescriptor, + pub constant: ConstantValue, + pub provenance: TypeProvenance, + pub origins: Vec, + pub specialization_hits: Vec, +} + +impl TypeFact { + pub fn new(descriptor: TypeDescriptor, provenance: TypeProvenance) -> Self { + TypeFact { + descriptor, + constant: ConstantValue::Unknown, + provenance, + origins: Vec::new(), + specialization_hits: Vec::new(), + } + } + + pub fn with_constant(mut self, constant: ConstantValue) -> Self { + self.constant = constant; + self + } + + pub fn with_origin(mut self, origin: SourceOrigin) -> Self { + self.origins.push(origin); + self + } + + pub fn with_origins(mut self, origins: Vec) -> Self { + self.origins = origins; + self + } + + pub fn with_specialization_hit(mut self, hit: SpecializationHit) -> Self { + self.specialization_hits.push(hit); + self + } + + pub fn with_specialization_hits(mut self, hits: Vec) -> Self { + self.specialization_hits.extend(hits); + self + } +} + +impl RuleSpecializationSignature { + pub fn from_facts(module_idx: u32, rule_idx: usize, arguments: &[TypeFact]) -> Self { + let arguments = arguments + .iter() + .map(|fact| SpecializationArgument { + descriptor_key: specialization_descriptor_key(&fact.descriptor), + constant: fact.constant.as_value().cloned(), + }) + .collect(); + + RuleSpecializationSignature { + module_idx, + rule_idx, + arguments, + } + } +} + +fn specialization_descriptor_key(descriptor: &TypeDescriptor) -> String { + match descriptor { + TypeDescriptor::Schema(schema) => { + structural_descriptor_key(&StructuralType::from_schema(schema)) + } + TypeDescriptor::Structural(ty) => structural_descriptor_key(ty), + } +} + +fn structural_descriptor_key(ty: &StructuralType) -> String { + match ty { + StructuralType::Any => "any".to_owned(), + StructuralType::Boolean => "boolean".to_owned(), + StructuralType::Number => "number".to_owned(), + StructuralType::Integer => "integer".to_owned(), + StructuralType::String => "string".to_owned(), + StructuralType::Null => "null".to_owned(), + StructuralType::Array(inner) => { + format!("array({})", structural_descriptor_key(inner)) + } + StructuralType::Set(inner) => format!("set({})", structural_descriptor_key(inner)), + StructuralType::Object(shape) => { + if shape.fields.is_empty() { + return "object".to_owned(); + } + + let mut parts = Vec::with_capacity(shape.fields.len()); + for (name, field_ty) in &shape.fields { + parts.push(format!("{name}:{}", structural_descriptor_key(field_ty))); + } + format!("object{{{}}}", parts.join(",")) + } + StructuralType::Union(types) => { + if types.is_empty() { + return "union()".to_owned(); + } + + let mut parts: Vec = types.iter().map(structural_descriptor_key).collect(); + parts.sort(); + format!("union({})", parts.join("|")) + } + StructuralType::Enum(values) => { + if values.is_empty() { + return "enum()".to_owned(); + } + + let mut parts: Vec = values.iter().map(|value| value.to_string()).collect(); + parts.sort(); + format!("enum({})", parts.join("|")) + } + StructuralType::Unknown => "unknown".to_owned(), + } +} + +/// Discriminates whether the descriptor is schema-backed or structural. +#[derive(Clone, Debug)] +pub enum HybridTypeKind { + Schema, + Structural, +} + +/// Wrapper returned by the analyser. +#[derive(Clone, Debug)] +pub struct HybridType { + pub fact: TypeFact, + pub kind: HybridTypeKind, +} + +impl HybridType { + pub fn from_fact(fact: TypeFact) -> Self { + let kind = match fact.descriptor { + TypeDescriptor::Schema(_) => HybridTypeKind::Schema, + TypeDescriptor::Structural(_) => HybridTypeKind::Structural, + }; + HybridType { fact, kind } + } +} + +/// Diagnostics emitted by the analyser. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TypeDiagnosticSeverity { + Error, + Warning, +} + +#[derive(Clone, Debug)] +pub struct TypeDiagnostic { + pub message: String, + pub kind: TypeDiagnosticKind, + pub severity: TypeDiagnosticSeverity, + pub file: Rc, + pub line: u32, + pub col: u32, + pub end_line: u32, + pub end_col: u32, +} + +#[derive(Clone, Debug)] +pub enum TypeDiagnosticKind { + SchemaViolation, + InternalError, + TypeMismatch, + UnreachableStatement, +} + +/// State of constant evaluation for a rule. +#[derive(Clone, Debug, Default)] +pub enum RuleConstantState { + /// Not yet attempted + #[default] + Unknown, + /// Currently being evaluated (for recursion detection) + InProgress, + /// Successfully evaluated to a constant value + Done(Value), + /// Cannot be constant folded (needs input/data or has recursion) + NeedsRuntime, +} + +/// Aggregated metadata for an analysed rule. +#[derive(Clone, Debug, Default)] +pub struct RuleAnalysis { + pub input_dependencies: Vec, + pub rule_dependencies: Vec, + pub constant_state: RuleConstantState, +} + +impl RuleAnalysis { + pub fn record_origins(&mut self, origins: &[SourceOrigin]) { + for origin in origins { + if let Some(existing) = self + .input_dependencies + .iter_mut() + .find(|candidate| candidate.root == origin.root && candidate.path == origin.path) + { + existing.derived |= origin.derived; + } else { + self.input_dependencies.push(origin.clone()); + } + } + } + + pub fn record_rule_dependency>(&mut self, dependency: S) { + let dep = dependency.into(); + if !self + .rule_dependencies + .iter() + .any(|existing| existing == &dep) + { + self.rule_dependencies.push(dep); + } + } + + pub fn merge(&mut self, other: RuleAnalysis) { + self.record_origins(&other.input_dependencies); + for dep in other.rule_dependencies { + self.record_rule_dependency(dep); + } + // Keep the first constant state if already set + if matches!(self.constant_state, RuleConstantState::Unknown) { + self.constant_state = other.constant_state; + } + } +} diff --git a/src/type_analysis/propagation/diagnostics/categories.rs b/src/type_analysis/propagation/diagnostics/categories.rs new file mode 100644 index 00000000..92d54b3b --- /dev/null +++ b/src/type_analysis/propagation/diagnostics/categories.rs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub(super) enum StructuralCategory { + Boolean, + Number, + String, + Null, + Array, + Set, + Object, + Undefined, +} diff --git a/src/type_analysis/propagation/diagnostics/checks.rs b/src/type_analysis/propagation/diagnostics/checks.rs new file mode 100644 index 00000000..3ca4d961 --- /dev/null +++ b/src/type_analysis/propagation/diagnostics/checks.rs @@ -0,0 +1,431 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::format; +use alloc::string::String; + +use crate::ast::{ArithOp, BinOp, BoolOp}; +use crate::lexer::Span; +use crate::type_analysis::model::{ + HybridType, TypeDiagnostic, TypeDiagnosticKind, TypeDiagnosticSeverity, +}; +use crate::value::Value; + +use super::super::facts::schema_allows_value; +use super::super::pipeline::{TypeAnalysisResult, TypeAnalyzer}; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_equality_diagnostic( + &self, + module_idx: u32, + span: &Span, + op: &BoolOp, + lhs: &HybridType, + rhs: &HybridType, + result: &mut TypeAnalysisResult, + ) { + if !matches!(op, BoolOp::Eq) { + return; + } + + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + + let (schema_side, const_side) = match ( + lhs.fact.descriptor.as_schema(), + lhs.fact.constant.as_value(), + rhs.fact.descriptor.as_schema(), + rhs.fact.constant.as_value(), + ) { + (Some(schema), _, _, Some(constant)) => (Some(schema), Some(constant)), + (_, Some(constant), Some(schema), _) => (Some(schema), Some(constant)), + _ => (None, None), + }; + + if let (Some(schema), Some(constant)) = (schema_side, const_side) { + if !schema_allows_value(schema, constant) { + result.diagnostics.push(TypeDiagnostic { + message: format!("value {} is not allowed by schema enumeration", constant), + kind: TypeDiagnosticKind::SchemaViolation, + severity: TypeDiagnosticSeverity::Error, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + + let lhs_struct = Self::hybrid_structural_type(lhs); + let rhs_struct = Self::hybrid_structural_type(rhs); + + if Self::structural_types_certainly_disjoint(&lhs_struct, &rhs_struct) { + let lhs_label = Self::hybrid_type_display(lhs); + let rhs_label = Self::hybrid_type_display(rhs); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "equality comparison between incompatible types: {} == {}", + lhs_label, rhs_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_arithmetic_diagnostic( + &self, + module_idx: u32, + span: &Span, + op: &ArithOp, + lhs: &HybridType, + rhs: &HybridType, + result: &mut TypeAnalysisResult, + ) { + use ArithOp::*; + + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + + if matches!(lhs.fact.constant.as_value(), Some(Value::Undefined)) + || matches!(rhs.fact.constant.as_value(), Some(Value::Undefined)) + { + return; + } + + let lhs_can_be_numeric = Self::hybrid_can_be_numeric(lhs); + let rhs_can_be_numeric = Self::hybrid_can_be_numeric(rhs); + let lhs_can_be_set = Self::hybrid_can_be_set(lhs); + let rhs_can_be_set = Self::hybrid_can_be_set(rhs); + let lhs_can_be_integer = Self::hybrid_can_be_integer(lhs); + let rhs_can_be_integer = Self::hybrid_can_be_integer(rhs); + + let (should_warn, message) = match op { + Add | Mul | Div => { + if lhs_can_be_numeric && rhs_can_be_numeric { + (false, String::new()) + } else { + let lhs_label = Self::hybrid_type_display(lhs); + let rhs_label = Self::hybrid_type_display(rhs); + ( + true, + format!( + "operator {} expects numeric operands; got {} and {}", + Self::arithmetic_op_token(op), + lhs_label, + rhs_label + ), + ) + } + } + Mod => { + if lhs_can_be_integer && rhs_can_be_integer { + (false, String::new()) + } else { + let lhs_label = Self::hybrid_type_display(lhs); + let rhs_label = Self::hybrid_type_display(rhs); + ( + true, + format!( + "operator % expects integer operands; got {} and {}", + lhs_label, rhs_label + ), + ) + } + } + Sub => { + let numeric_possible = lhs_can_be_numeric && rhs_can_be_numeric; + let set_possible = lhs_can_be_set && rhs_can_be_set; + + if numeric_possible || set_possible { + (false, String::new()) + } else { + let lhs_label = Self::hybrid_type_display(lhs); + let rhs_label = Self::hybrid_type_display(rhs); + ( + true, + format!( + "operator - expects both operands to be numbers or both to be sets; got {} and {}", + lhs_label, rhs_label + ), + ) + } + } + }; + + if should_warn { + result.diagnostics.push(TypeDiagnostic { + message, + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_set_operation_diagnostic( + &self, + module_idx: u32, + span: &Span, + op: &BinOp, + lhs: &HybridType, + rhs: &HybridType, + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + + let lhs_can_be_set = Self::hybrid_can_be_set(lhs); + let rhs_can_be_set = Self::hybrid_can_be_set(rhs); + + if lhs_can_be_set && rhs_can_be_set { + return; + } + + let op_label = match op { + BinOp::Union => "|", + BinOp::Intersection => "&", + }; + + let lhs_label = Self::hybrid_type_display(lhs); + let rhs_label = Self::hybrid_type_display(rhs); + + let message = match (lhs_can_be_set, rhs_can_be_set) { + (false, false) => format!( + "operator {} expects set operands; got {} and {}", + op_label, lhs_label, rhs_label + ), + (false, true) => format!( + "operator {} expects left operand to be a set; got {}", + op_label, lhs_label + ), + (true, false) => format!( + "operator {} expects right operand to be a set; got {}", + op_label, rhs_label + ), + (true, true) => return, + }; + + result.diagnostics.push(TypeDiagnostic { + message, + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_membership_diagnostic( + &self, + module_idx: u32, + span: &Span, + value: &HybridType, + collection: &HybridType, + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + let collection_struct = Self::hybrid_structural_type(collection); + + if !Self::hybrid_can_be_collection(collection) { + let collection_label = Self::hybrid_type_display(collection); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "'in' operator requires collection (array, set, or object); got {}", + collection_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + return; + } + + // Check element type compatibility for arrays and sets + let value_struct = Self::hybrid_structural_type(value); + + let element_type = match &collection_struct { + crate::type_analysis::model::StructuralType::Array(elem) + | crate::type_analysis::model::StructuralType::Set(elem) => Some(elem.as_ref()), + _ => None, + }; + + if let Some(elem_ty) = element_type { + if Self::structural_types_certainly_disjoint(&value_struct, elem_ty) { + let value_label = Self::hybrid_type_display(value); + let elem_label = Self::structural_type_display(elem_ty); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "'in' operator: element type {} is incompatible with collection element type {}", + value_label, elem_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_indexing_diagnostic( + &self, + module_idx: u32, + span: &Span, + base: &HybridType, + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + let base_struct = Self::hybrid_structural_type(base); + + let can_be_indexed = match &base_struct { + crate::type_analysis::model::StructuralType::Array(_) + | crate::type_analysis::model::StructuralType::Set(_) + | crate::type_analysis::model::StructuralType::Object(_) + | crate::type_analysis::model::StructuralType::String => true, + crate::type_analysis::model::StructuralType::Union(variants) => { + variants.iter().any(|v| { + matches!( + v, + crate::type_analysis::model::StructuralType::Array(_) + | crate::type_analysis::model::StructuralType::Set(_) + | crate::type_analysis::model::StructuralType::Object(_) + | crate::type_analysis::model::StructuralType::String + ) + }) + } + crate::type_analysis::model::StructuralType::Any + | crate::type_analysis::model::StructuralType::Unknown => true, + _ => false, + }; + + if !can_be_indexed { + let base_label = Self::hybrid_type_display(base); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "cannot index into {}; indexing requires array, set, object, or string", + base_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_builtin_call_diagnostic( + &self, + module_idx: u32, + span: &Span, + builtin_name: &str, + expected_params: usize, + actual_params: usize, + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + if expected_params != actual_params { + result.diagnostics.push(TypeDiagnostic { + message: format!( + "builtin '{}' expects {} parameter(s), got {}", + builtin_name, expected_params, actual_params + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_builtin_param_type_diagnostic( + &self, + module_idx: u32, + span: &Span, + builtin_name: &str, + param_idx: usize, + expected_type: &str, + actual: &HybridType, + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + let actual_label = Self::hybrid_type_display(actual); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "builtin '{}' parameter {} expects {}, got {}", + builtin_name, + param_idx + 1, + expected_type, + actual_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_builtin_additional_rules( + &self, + module_idx: u32, + span: &Span, + builtin_name: &str, + args: &[HybridType], + result: &mut TypeAnalysisResult, + ) { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + if builtin_name == "count" { + if let Some(arg) = args.first() { + if !Self::hybrid_can_be_collection(arg) { + let arg_label = Self::hybrid_type_display(arg); + result.diagnostics.push(TypeDiagnostic { + message: format!( + "builtin 'count' expects array, set, or object; got {}", + arg_label + ), + kind: TypeDiagnosticKind::TypeMismatch, + severity: TypeDiagnosticSeverity::Warning, + file: self.get_file_for_module(module_idx), + line, + col, + end_line, + end_col, + }); + } + } + } + } +} diff --git a/src/type_analysis/propagation/diagnostics/helpers.rs b/src/type_analysis/propagation/diagnostics/helpers.rs new file mode 100644 index 00000000..4982c613 --- /dev/null +++ b/src/type_analysis/propagation/diagnostics/helpers.rs @@ -0,0 +1,255 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::collections::BTreeSet; +use alloc::format; +use alloc::string::{String, ToString}; +use alloc::vec::Vec; + +use crate::type_analysis::model::{HybridType, StructuralType, TypeDescriptor}; +use crate::type_analysis::propagation::pipeline::TypeAnalyzer; +use crate::value::Value; + +use super::categories::StructuralCategory; + +impl TypeAnalyzer { + pub(super) fn get_file_for_module(&self, module_idx: u32) -> crate::Rc { + self.modules + .get(module_idx as usize) + .map(|m| m.as_ref().package.span.source.get_path().as_str()) + .unwrap_or("") + .into() + } + + pub(super) fn hybrid_structural_type(ty: &HybridType) -> StructuralType { + match &ty.fact.descriptor { + TypeDescriptor::Structural(struct_ty) => struct_ty.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + } + } + + pub(super) fn structural_types_certainly_disjoint( + lhs: &StructuralType, + rhs: &StructuralType, + ) -> bool { + let mut lhs_categories = BTreeSet::new(); + if Self::collect_structural_categories(lhs, &mut lhs_categories) { + return false; + } + + let mut rhs_categories = BTreeSet::new(); + if Self::collect_structural_categories(rhs, &mut rhs_categories) { + return false; + } + + if lhs_categories.is_empty() || rhs_categories.is_empty() { + return false; + } + + !lhs_categories + .iter() + .any(|category| rhs_categories.contains(category)) + } + + pub(super) fn hybrid_can_be_numeric(ty: &HybridType) -> bool { + if let Some(value) = ty.fact.constant.as_value() { + return matches!(value, Value::Number(_)); + } + Self::structural_type_can_be_numeric(&Self::hybrid_structural_type(ty)) + } + + pub(crate) fn hybrid_can_be_integer(ty: &HybridType) -> bool { + if let Some(value) = ty.fact.constant.as_value() { + if let Ok(number) = value.as_number() { + return number.is_integer(); + } + return false; + } + Self::structural_type_can_be_integer(&Self::hybrid_structural_type(ty)) + } + + pub(super) fn hybrid_can_be_set(ty: &HybridType) -> bool { + if let Some(value) = ty.fact.constant.as_value() { + return matches!(value, Value::Set(_)); + } + Self::structural_type_can_be_set(&Self::hybrid_structural_type(ty)) + } + + pub(super) fn hybrid_can_be_collection(ty: &HybridType) -> bool { + if let Some(value) = ty.fact.constant.as_value() { + return matches!(value, Value::Array(_) | Value::Set(_) | Value::Object(_)); + } + Self::structural_type_can_be_collection(&Self::hybrid_structural_type(ty)) + } + + pub(super) fn hybrid_type_display(ty: &HybridType) -> String { + if let Some(value) = ty.fact.constant.as_value() { + return format!("constant {}", value); + } + Self::diagnostic_structural_type_label(&Self::hybrid_structural_type(ty)) + } + + pub(super) fn arithmetic_op_token(op: &crate::ast::ArithOp) -> &'static str { + match op { + crate::ast::ArithOp::Add => "+", + crate::ast::ArithOp::Sub => "-", + crate::ast::ArithOp::Mul => "*", + crate::ast::ArithOp::Div => "/", + crate::ast::ArithOp::Mod => "%", + } + } + + fn structural_type_can_be_numeric(ty: &StructuralType) -> bool { + match ty { + StructuralType::Number | StructuralType::Integer => true, + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => { + variants.iter().any(Self::structural_type_can_be_numeric) + } + StructuralType::Enum(values) => { + values.iter().any(|value| matches!(value, Value::Number(_))) + } + _ => false, + } + } + + fn structural_type_can_be_integer(ty: &StructuralType) -> bool { + match ty { + StructuralType::Integer => true, + StructuralType::Number => true, + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => { + variants.iter().any(Self::structural_type_can_be_integer) + } + StructuralType::Enum(values) => values.iter().any(|value| { + if let Value::Number(num) = value { + num.is_integer() + } else { + false + } + }), + _ => false, + } + } + + fn structural_type_can_be_set(ty: &StructuralType) -> bool { + match ty { + StructuralType::Set(_) => true, + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => { + variants.iter().any(Self::structural_type_can_be_set) + } + _ => false, + } + } + + fn structural_type_can_be_collection(ty: &StructuralType) -> bool { + match ty { + StructuralType::Array(_) | StructuralType::Set(_) | StructuralType::Object(_) => true, + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => { + variants.iter().any(Self::structural_type_can_be_collection) + } + _ => false, + } + } + + fn diagnostic_structural_type_label(ty: &StructuralType) -> String { + match ty { + StructuralType::Any => "any".into(), + StructuralType::Unknown => "unknown".into(), + StructuralType::Boolean => "boolean".into(), + StructuralType::Number => "number".into(), + StructuralType::Integer => "integer".into(), + StructuralType::String => "string".into(), + StructuralType::Null => "null".into(), + StructuralType::Array(inner) => { + format!("array<{}>", Self::diagnostic_structural_type_label(inner)) + } + StructuralType::Set(inner) => { + format!("set<{}>", Self::diagnostic_structural_type_label(inner)) + } + StructuralType::Object(_) => "object".into(), + StructuralType::Union(variants) => { + let labels: Vec<_> = variants + .iter() + .map(Self::diagnostic_structural_type_label) + .collect(); + labels.join(" | ") + } + StructuralType::Enum(values) => { + let labels: Vec<_> = values.iter().map(|value| value.to_string()).collect(); + format!("enum {{ {} }}", labels.join(", ")) + } + } + } + + fn collect_structural_categories( + ty: &StructuralType, + output: &mut BTreeSet, + ) -> bool { + use StructuralCategory::*; + match ty { + StructuralType::Any => true, + StructuralType::Unknown => false, + StructuralType::Boolean => { + output.insert(Boolean); + false + } + StructuralType::Number | StructuralType::Integer => { + output.insert(Number); + false + } + StructuralType::String => { + output.insert(String); + false + } + StructuralType::Null => { + output.insert(Null); + false + } + StructuralType::Array(_) => { + output.insert(Array); + false + } + StructuralType::Set(_) => { + output.insert(Set); + false + } + StructuralType::Object(_) => { + output.insert(Object); + false + } + StructuralType::Union(variants) => { + for variant in variants { + if Self::collect_structural_categories(variant, output) { + return true; + } + } + false + } + StructuralType::Enum(values) => { + for value in values { + if let Some(category) = Self::category_from_value(value) { + output.insert(category); + } + } + false + } + } + } + + fn category_from_value(value: &Value) -> Option { + use StructuralCategory::*; + match value { + Value::Null => Some(Null), + Value::Bool(_) => Some(Boolean), + Value::Number(_) => Some(Number), + Value::String(_) => Some(String), + Value::Array(_) => Some(Array), + Value::Set(_) => Some(Set), + Value::Object(_) => Some(Object), + Value::Undefined => Some(Undefined), + } + } +} diff --git a/src/type_analysis/propagation/diagnostics/mod.rs b/src/type_analysis/propagation/diagnostics/mod.rs new file mode 100644 index 00000000..3357238d --- /dev/null +++ b/src/type_analysis/propagation/diagnostics/mod.rs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod categories; +mod checks; +mod helpers; diff --git a/src/type_analysis/propagation/expressions/comprehensions.rs b/src/type_analysis/propagation/expressions/comprehensions.rs new file mode 100644 index 00000000..f106b6ee --- /dev/null +++ b/src/type_analysis/propagation/expressions/comprehensions.rs @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Comprehension expression type inference (array, set, object comprehensions) + +use alloc::boxed::Box; +use alloc::collections::BTreeMap; + +use crate::ast::{Expr, Query, Ref}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + RuleAnalysis, StructuralObjectShape, StructuralType, TypeDescriptor, TypeFact, +}; +use crate::type_analysis::propagation::facts::mark_origins_derived; + +use super::super::pipeline::AnalysisState; +use super::super::pipeline::TypeAnalyzer; + +impl TypeAnalyzer { + pub(crate) fn infer_array_comprehension( + &self, + module_idx: u32, + term: &Ref, + query: &Ref, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + bindings.push_scope(); + let _ = self.analyze_query(module_idx, query, bindings, result, rule_analysis, true); + let term_type = self.infer_expr(module_idx, term, bindings, result, rule_analysis); + bindings.pop_scope(); + + let element_structural = match term_type.fact.descriptor { + TypeDescriptor::Schema(ref schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(ref st) => st.clone(), + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Array(Box::new(element_structural))), + term_type.fact.provenance.clone(), + ); + + if !term_type.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&term_type.fact.origins)); + } + + fact + } + + pub(crate) fn infer_set_comprehension( + &self, + module_idx: u32, + term: &Ref, + query: &Ref, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + bindings.push_scope(); + let _ = self.analyze_query(module_idx, query, bindings, result, rule_analysis, true); + let term_type = self.infer_expr(module_idx, term, bindings, result, rule_analysis); + bindings.pop_scope(); + + let element_structural = match term_type.fact.descriptor { + TypeDescriptor::Schema(ref schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(ref st) => st.clone(), + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new(element_structural))), + term_type.fact.provenance.clone(), + ); + + if !term_type.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&term_type.fact.origins)); + } + + fact + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_object_comprehension( + &self, + module_idx: u32, + key: &Ref, + value: &Ref, + query: &Ref, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + bindings.push_scope(); + let _ = self.analyze_query(module_idx, query, bindings, result, rule_analysis, true); + let _key_type = self.infer_expr(module_idx, key, bindings, result, rule_analysis); + let value_type = self.infer_expr(module_idx, value, bindings, result, rule_analysis); + bindings.pop_scope(); + + let shape = StructuralObjectShape { + fields: BTreeMap::new(), + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Object(shape)), + value_type.fact.provenance.clone(), + ); + + if !value_type.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&value_type.fact.origins)); + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/dispatch.rs b/src/type_analysis/propagation/expressions/dispatch.rs new file mode 100644 index 00000000..d9bfc5ba --- /dev/null +++ b/src/type_analysis/propagation/expressions/dispatch.rs @@ -0,0 +1,394 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; + +use crate::ast::{Expr, Ref}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, HybridType, PathSegment, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, + TypeProvenance, +}; +use crate::type_analysis::propagation::facts::{ + extend_origins_with_segment, extract_schema_constant, schema_additional_properties_schema, + schema_array_items, +}; +use crate::type_analysis::propagation::pipeline::{AnalysisState, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn infer_expr( + &self, + module_idx: u32, + expr: &Ref, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) -> HybridType { + let eidx = expr.eidx(); + let skip_cached_fact = matches!(expr.as_ref(), Expr::Var { .. }); + + if !skip_cached_fact { + if let Some(existing) = result.lookup.get_expr(module_idx, eidx) { + if !existing.origins.is_empty() { + rule_analysis.record_origins(&existing.origins); + } + return HybridType::from_fact(existing.clone()); + } + } + + self.ensure_expr_capacity(module_idx, eidx, result); + + let fact = match expr.as_ref() { + Expr::String { value, .. } | Expr::RawString { value, .. } => { + self.infer_string_literal(value) + } + Expr::Number { value, .. } => self.infer_number_literal(value), + Expr::Bool { value, .. } => self.infer_bool_literal(value), + Expr::Null { value, .. } => self.infer_null_literal(value), + Expr::Var { value, .. } => { + self.infer_var(module_idx, eidx, value, bindings, result, rule_analysis) + } + Expr::Array { items, .. } => { + self.infer_array_literal(module_idx, items, bindings, result, rule_analysis) + } + Expr::Set { items, .. } => { + self.infer_set_literal(module_idx, items, bindings, result, rule_analysis) + } + Expr::Object { fields, .. } => { + self.infer_object_literal(module_idx, fields, bindings, result, rule_analysis) + } + Expr::ArrayCompr { term, query, .. } => self.infer_array_comprehension( + module_idx, + term, + query, + bindings, + result, + rule_analysis, + ), + Expr::SetCompr { term, query, .. } => self.infer_set_comprehension( + module_idx, + term, + query, + bindings, + result, + rule_analysis, + ), + Expr::ObjectCompr { + key, value, query, .. + } => self.infer_object_comprehension( + module_idx, + key, + value, + query, + bindings, + result, + rule_analysis, + ), + Expr::AssignExpr { lhs, rhs, op, .. } => self.infer_assignment_expr( + module_idx, + lhs, + rhs, + op, + bindings, + result, + rule_analysis, + ), + Expr::BoolExpr { + span, op, lhs, rhs, .. + } => self.infer_bool_expr( + module_idx, + span, + op, + lhs, + rhs, + bindings, + result, + rule_analysis, + ), + Expr::ArithExpr { + span, op, lhs, rhs, .. + } => self.infer_arith_expr( + module_idx, + span, + op, + lhs, + rhs, + bindings, + result, + rule_analysis, + ), + Expr::BinExpr { + span, op, lhs, rhs, .. + } => self.infer_bin_expr( + module_idx, + span, + op, + lhs, + rhs, + bindings, + result, + rule_analysis, + ), + Expr::Membership { + span, + value, + collection, + .. + } => self.infer_membership_expr( + module_idx, + span, + value, + collection, + bindings, + result, + rule_analysis, + ), + Expr::UnaryExpr { expr, .. } => { + self.infer_unary_expr(module_idx, expr, bindings, result, rule_analysis) + } + Expr::Call { + span, fcn, params, .. + } => self.infer_call_expr( + module_idx, + eidx, + span, + fcn, + params, + bindings, + result, + rule_analysis, + ), + Expr::RefDot { refr, field, .. } => { + let base = self.infer_expr(module_idx, refr, bindings, result, rule_analysis); + if let Some((field_span, field_value)) = field.as_ref() { + let mut fact = if let Some(rule_fact) = self.try_resolve_rule_property( + module_idx, + eidx, + refr.as_ref(), + field_value, + result, + rule_analysis, + ) { + rule_fact + } else { + self.infer_property_access( + module_idx, + base.clone(), + field_value.clone(), + Some(field_span), + result, + ) + }; + + if let ConstantValue::Known(base_value) = &base.fact.constant { + match base_value { + Value::Object(obj) => { + if let Value::String(field_name) = field_value { + if let Some(field_const) = + obj.get(&Value::String(field_name.clone())) + { + fact = fact.with_constant(ConstantValue::known( + field_const.clone(), + )); + } else { + fact = fact + .with_constant(ConstantValue::known(Value::Undefined)); + } + } + } + Value::Undefined => { + fact = fact.with_constant(ConstantValue::known(Value::Undefined)); + } + _ => {} + } + } + + if matches!(fact.constant.as_value(), Some(Value::Undefined)) { + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } + + fact + } else { + base.fact.clone() + } + } + Expr::RefBrack { + span, refr, index, .. + } => { + let index_fact = + self.infer_expr(module_idx, index, bindings, result, rule_analysis); + let base = self.infer_expr(module_idx, refr, bindings, result, rule_analysis); + + // Check for indexing diagnostic issues + self.check_indexing_diagnostic(module_idx, span, &base, result); + + let base_provenance = match base.fact.provenance { + TypeProvenance::SchemaInput => TypeProvenance::SchemaInput, + TypeProvenance::SchemaData => TypeProvenance::SchemaData, + _ => TypeProvenance::Propagated, + }; + let index_constant_string = index_fact + .fact + .constant + .as_value() + .and_then(|value| value.as_string().ok().map(|s| s.as_ref().to_owned())); + let mut fact = match &base.fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((item_schema, schema_constant)) = schema_array_items(schema) { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(item_schema), + base_provenance.clone(), + ); + if let Some(constant) = schema_constant { + fact = fact.with_constant(ConstantValue::known(constant)); + } + fact + } else if let Some(field_name) = index_constant_string.as_ref() { + self.infer_property_access( + module_idx, + base.clone(), + Value::from(field_name.as_str()), + Some(index.span()), + result, + ) + } else if let Some(additional_schema) = + schema_additional_properties_schema(schema) + { + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::from_schema( + &additional_schema, + )), + base_provenance.clone(), + ); + if let Some(constant) = extract_schema_constant(&additional_schema) { + fact = fact.with_constant(ConstantValue::known(constant)); + } + fact + } else { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + base_provenance.clone(), + ) + } + } + TypeDescriptor::Structural(struct_ty) => { + if let Some(element_ty) = Self::structural_array_element(struct_ty) { + TypeFact::new( + TypeDescriptor::Structural(element_ty), + TypeProvenance::Propagated, + ) + } else { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + base_provenance.clone(), + ) + } + } + }; + + if !base.fact.origins.is_empty() { + let segment = if let Some(value) = index_fact.fact.constant.as_value() { + if let Ok(idx) = value.as_u32() { + PathSegment::Index(idx as usize) + } else if let Ok(field_name) = value.as_string() { + PathSegment::Field(field_name.as_ref().to_owned()) + } else { + PathSegment::Any + } + } else { + PathSegment::Any + }; + let origins = extend_origins_with_segment(&base.fact.origins, segment); + fact = fact.with_origins(origins); + } + + if let (ConstantValue::Known(base_value), ConstantValue::Known(index_value)) = + (&base.fact.constant, &index_fact.fact.constant) + { + if let Ok(arr) = base_value.as_array() { + if let Ok(idx) = index_value.as_u32() { + if let Some(element) = arr.get(idx as usize) { + fact = fact.with_constant(ConstantValue::known(element.clone())); + } else { + fact = fact.with_constant(ConstantValue::known(Value::Undefined)); + } + } + } else if let Ok(obj) = base_value.as_object() { + if let Some(field_value) = obj.get(index_value) { + fact = fact.with_constant(ConstantValue::known(field_value.clone())); + } else { + fact = fact.with_constant(ConstantValue::known(Value::Undefined)); + } + } else if let Ok(set) = base_value.as_set() { + let is_member = set.contains(index_value); + let origins = fact.origins.clone(); + fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Propagated, + ) + .with_constant(ConstantValue::known(Value::from(is_member))); + if !origins.is_empty() { + fact = fact.with_origins(origins); + } + } + } + + if matches!(fact.constant.as_value(), Some(Value::Undefined)) { + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } + + fact + } + #[cfg(feature = "rego-extensions")] + Expr::OrExpr { lhs, rhs, .. } => { + let _lhs = self.infer_expr(module_idx, lhs, bindings, result, rule_analysis); + let _rhs = self.infer_expr(module_idx, rhs, bindings, result, rule_analysis); + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Propagated, + ) + } + }; + + result.lookup.record_expr(module_idx, eidx, fact.clone()); + + if let ConstantValue::Known(value) = &fact.constant { + result + .constants + .record(module_idx, eidx, Some(value.clone())); + } + + if !fact.origins.is_empty() { + rule_analysis.record_origins(&fact.origins); + } + + if let Some(loop_lookup) = self.loop_lookup.as_ref() { + if let Some(expr_loops) = loop_lookup.get_expr_loops(module_idx, eidx) { + for loop_info in expr_loops { + self.process_hoisted_loop( + module_idx, + loop_info, + bindings, + result, + rule_analysis, + ); + } + } + + if let Some(plan) = loop_lookup.get_expr_binding_plan(module_idx, eidx) { + self.apply_binding_plan( + module_idx, + expr, + plan, + &fact, + bindings, + result, + rule_analysis, + ); + } + } + + HybridType::from_fact(fact) + } +} diff --git a/src/type_analysis/propagation/expressions/helpers.rs b/src/type_analysis/propagation/expressions/helpers.rs new file mode 100644 index 00000000..56f95d99 --- /dev/null +++ b/src/type_analysis/propagation/expressions/helpers.rs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::type_analysis::model::StructuralType; +use crate::type_analysis::propagation::pipeline::TypeAnalyzer; + +impl TypeAnalyzer { + /// Check if a structural type is definitely numeric + pub(crate) fn is_numeric_type(ty: &StructuralType) -> bool { + matches!(ty, StructuralType::Number | StructuralType::Integer) + } +} diff --git a/src/type_analysis/propagation/expressions/literals.rs b/src/type_analysis/propagation/expressions/literals.rs new file mode 100644 index 00000000..7fcea7ec --- /dev/null +++ b/src/type_analysis/propagation/expressions/literals.rs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Literal expression type inference (strings, numbers, booleans, null) + +use crate::type_analysis::model::{ + ConstantValue, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::value::Value; + +use super::super::pipeline::TypeAnalyzer; + +impl TypeAnalyzer { + pub(crate) fn infer_string_literal(&self, value: &Value) -> TypeFact { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::String), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(value.clone())) + } + + pub(crate) fn infer_number_literal(&self, value: &Value) -> TypeFact { + match value { + Value::Number(number) if number.is_integer() => TypeFact::new( + TypeDescriptor::Structural(StructuralType::Integer), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(value.clone())), + _ => TypeFact::new( + TypeDescriptor::Structural(StructuralType::Number), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(value.clone())), + } + } + + pub(crate) fn infer_bool_literal(&self, value: &Value) -> TypeFact { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(value.clone())) + } + + pub(crate) fn infer_null_literal(&self, value: &Value) -> TypeFact { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Null), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(value.clone())) + } +} diff --git a/src/type_analysis/propagation/expressions/mod.rs b/src/type_analysis/propagation/expressions/mod.rs new file mode 100644 index 00000000..cbc7666e --- /dev/null +++ b/src/type_analysis/propagation/expressions/mod.rs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod comprehensions; +mod dispatch; +mod helpers; +mod literals; +mod operators; +mod rules; +mod statements; +mod structures; + +pub(crate) use statements::StatementTruth; diff --git a/src/type_analysis/propagation/expressions/operators.rs b/src/type_analysis/propagation/expressions/operators.rs new file mode 100644 index 00000000..7070443c --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators.rs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Operator expression type inference entry points. + +mod arithmetic; +mod assignment; +mod boolean; +mod sets; +mod unary; diff --git a/src/type_analysis/propagation/expressions/operators/arithmetic.rs b/src/type_analysis/propagation/expressions/operators/arithmetic.rs new file mode 100644 index 00000000..696c3a96 --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators/arithmetic.rs @@ -0,0 +1,293 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::boxed::Box; +use alloc::collections::BTreeSet; +use alloc::vec; + +use crate::ast::{ArithOp, Expr, Ref}; +use crate::lexer::Span; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, HybridType, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, + TypeProvenance, +}; +use crate::type_analysis::propagation::facts::{derived_from_pair, schema_array_items}; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_arith_expr( + &self, + module_idx: u32, + span: &Span, + op: &ArithOp, + lhs: &Ref, + rhs: &Ref, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let lhs_fact = self.infer_expr(module_idx, lhs, bindings, result, rule_analysis); + let rhs_fact = self.infer_expr(module_idx, rhs, bindings, result, rule_analysis); + + self.check_arithmetic_diagnostic(module_idx, span, op, &lhs_fact, &rhs_fact, result); + + let lhs_def_integer = Self::hybrid_is_definitely_integer(&lhs_fact); + let rhs_def_integer = Self::hybrid_is_definitely_integer(&rhs_fact); + + let (lhs_element, lhs_provenance, lhs_is_set) = match &lhs_fact.fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((item_schema, _)) = schema_array_items(schema) { + ( + StructuralType::from_schema(&item_schema), + lhs_fact.fact.provenance.clone(), + true, + ) + } else { + (StructuralType::Any, TypeProvenance::Propagated, false) + } + } + TypeDescriptor::Structural(StructuralType::Set(element)) => ( + element.as_ref().clone(), + lhs_fact.fact.provenance.clone(), + true, + ), + _ => (StructuralType::Any, TypeProvenance::Propagated, false), + }; + + let (_rhs_element, rhs_is_set) = match &rhs_fact.fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((item_schema, _)) = schema_array_items(schema) { + (StructuralType::from_schema(&item_schema), true) + } else { + (StructuralType::Any, false) + } + } + TypeDescriptor::Structural(StructuralType::Set(element)) => { + (element.as_ref().clone(), true) + } + _ => (StructuralType::Any, false), + }; + + if matches!(op, ArithOp::Sub) { + let lhs_numeric = matches!(&lhs_fact.fact.descriptor, + TypeDescriptor::Structural(st) if Self::is_numeric_type(st)); + let rhs_numeric = matches!(&rhs_fact.fact.descriptor, + TypeDescriptor::Structural(st) if Self::is_numeric_type(st)); + + if lhs_is_set && rhs_is_set { + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new(lhs_element.clone()))), + lhs_provenance.clone(), + ); + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + + if let (ConstantValue::Known(lhs_val), ConstantValue::Known(rhs_val)) = + (&lhs_fact.fact.constant, &rhs_fact.fact.constant) + { + if lhs_val == &Value::Undefined || rhs_val == &Value::Undefined { + fact = fact.with_constant(ConstantValue::unknown()); + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } else if let (Ok(lhs_set), Ok(rhs_set)) = (lhs_val.as_set(), rhs_val.as_set()) + { + let diff_set: BTreeSet = lhs_set + .iter() + .filter(|item| !rhs_set.contains(item)) + .cloned() + .collect(); + fact = fact.with_constant(ConstantValue::known(Value::from(diff_set))); + } + } + + return fact; + } + + if lhs_numeric && rhs_numeric { + let result_numeric_type = if lhs_def_integer && rhs_def_integer { + StructuralType::Integer + } else { + StructuralType::Number + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(result_numeric_type.clone()), + TypeProvenance::Propagated, + ); + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + + // Perform constant folding for numeric subtraction + if let (ConstantValue::Known(lhs_val), ConstantValue::Known(rhs_val)) = + (&lhs_fact.fact.constant, &rhs_fact.fact.constant) + { + if *lhs_val == Value::Undefined || *rhs_val == Value::Undefined { + fact = fact.with_constant(ConstantValue::unknown()); + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } else if let (Ok(lhs_num), Ok(rhs_num)) = + (lhs_val.as_number(), rhs_val.as_number()) + { + if let Ok(result) = lhs_num.sub(rhs_num) { + let structural_type = if result.is_integer() { + StructuralType::Integer + } else { + StructuralType::Number + }; + + fact = TypeFact::new( + TypeDescriptor::Structural(structural_type), + TypeProvenance::Propagated, + ) + .with_constant(ConstantValue::known(Value::from(result))); + + if !lhs_fact.fact.origins.is_empty() + || !rhs_fact.fact.origins.is_empty() + { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + } + } + } + + return fact; + } + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(Self::make_union(vec![ + StructuralType::Number, + StructuralType::Set(Box::new(StructuralType::Any)), + ])), + TypeProvenance::Propagated, + ); + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + + return fact; + } + + let result_numeric_type = if lhs_def_integer && rhs_def_integer { + StructuralType::Integer + } else { + StructuralType::Number + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(result_numeric_type), + TypeProvenance::Propagated, + ); + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + + if let (ConstantValue::Known(lhs_val), ConstantValue::Known(rhs_val)) = + (&lhs_fact.fact.constant, &rhs_fact.fact.constant) + { + if *lhs_val == Value::Undefined || *rhs_val == Value::Undefined { + fact = fact.with_constant(ConstantValue::unknown()); + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } else if let (Ok(lhs_num), Ok(rhs_num)) = (lhs_val.as_number(), rhs_val.as_number()) { + if matches!(op, ArithOp::Mod) && (!lhs_num.is_integer() || !rhs_num.is_integer()) { + fact = fact.with_constant(ConstantValue::unknown()); + fact.descriptor = TypeDescriptor::Structural(StructuralType::Unknown); + } else { + let result_number = match op { + ArithOp::Add => lhs_num.add(rhs_num).ok(), + ArithOp::Sub => lhs_num.sub(rhs_num).ok(), + ArithOp::Mul => lhs_num.mul(rhs_num).ok(), + ArithOp::Div => lhs_num.clone().divide(rhs_num).ok(), + ArithOp::Mod => lhs_num.clone().modulo(rhs_num).ok(), + }; + + if let Some(num) = result_number { + let structural_type = if num.is_integer() { + StructuralType::Integer + } else { + StructuralType::Number + }; + + fact = TypeFact::new( + TypeDescriptor::Structural(structural_type), + TypeProvenance::Propagated, + ) + .with_constant(ConstantValue::known(Value::from(num))); + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + } + } + } + } + + fact + } +} + +impl TypeAnalyzer { + fn hybrid_is_definitely_integer(ty: &HybridType) -> bool { + if let Some(value) = ty.fact.constant.as_value() { + if let Ok(number) = value.as_number() { + return number.is_integer(); + } + return false; + } + + Self::descriptor_is_definitely_integer(&ty.fact.descriptor) + } + + fn descriptor_is_definitely_integer(descriptor: &TypeDescriptor) -> bool { + match descriptor { + TypeDescriptor::Structural(struct_ty) => { + Self::structural_is_definitely_integer(struct_ty) + } + TypeDescriptor::Schema(schema) => { + let structural = StructuralType::from_schema(schema); + Self::structural_is_definitely_integer(&structural) + } + } + } + + fn structural_is_definitely_integer(struct_ty: &StructuralType) -> bool { + match struct_ty { + StructuralType::Integer => true, + StructuralType::Union(variants) => { + !variants.is_empty() && variants.iter().all(Self::structural_is_definitely_integer) + } + StructuralType::Enum(values) => values.iter().all(|value| { + if let Value::Number(num) = value { + num.is_integer() + } else { + false + } + }), + _ => false, + } + } +} diff --git a/src/type_analysis/propagation/expressions/operators/assignment.rs b/src/type_analysis/propagation/expressions/operators/assignment.rs new file mode 100644 index 00000000..02e7a3de --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators/assignment.rs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; + +use crate::ast::{AssignOp, Expr, Ref}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::mark_origins_derived; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_assignment_expr( + &self, + module_idx: u32, + lhs: &Ref, + rhs: &Ref, + _op: &AssignOp, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let rhs_fact = self.infer_expr(module_idx, rhs, bindings, result, rule_analysis); + + if let Expr::Var { value, .. } = lhs.as_ref() { + if let Ok(name) = value.as_string() { + bindings.ensure_root_scope(); + bindings.assign(name.as_ref().to_owned(), rhs_fact.fact.clone()); + } + } + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Assignment, + ); + + if !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&rhs_fact.fact.origins)); + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/operators/boolean.rs b/src/type_analysis/propagation/expressions/operators/boolean.rs new file mode 100644 index 00000000..0be60075 --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators/boolean.rs @@ -0,0 +1,76 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::ast::{BoolOp, Expr, Ref}; +use crate::lexer::Span; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::derived_from_pair; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_bool_expr( + &self, + module_idx: u32, + span: &Span, + op: &BoolOp, + lhs: &Ref, + rhs: &Ref, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let lhs_fact = self.infer_expr(module_idx, lhs, bindings, result, rule_analysis); + let rhs_fact = self.infer_expr(module_idx, rhs, bindings, result, rule_analysis); + + self.check_equality_diagnostic(module_idx, span, op, &lhs_fact, &rhs_fact, result); + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Propagated, + ); + + if let (Some(left), Some(right)) = ( + lhs_fact.fact.constant.as_value(), + rhs_fact.fact.constant.as_value(), + ) { + let result_value = if *left == Value::Undefined || *right == Value::Undefined { + Some(Value::Undefined) + } else { + match op { + BoolOp::Eq => Some(Value::from(left == right)), + BoolOp::Ne => Some(Value::from(left != right)), + BoolOp::Lt => left + .partial_cmp(right) + .map(|ord| Value::from(ord == core::cmp::Ordering::Less)), + BoolOp::Gt => left + .partial_cmp(right) + .map(|ord| Value::from(ord == core::cmp::Ordering::Greater)), + BoolOp::Le => left + .partial_cmp(right) + .map(|ord| Value::from(ord != core::cmp::Ordering::Greater)), + BoolOp::Ge => left + .partial_cmp(right) + .map(|ord| Value::from(ord != core::cmp::Ordering::Less)), + } + }; + + if let Some(value) = result_value { + fact = fact.with_constant(ConstantValue::known(value)); + } + } + + if !lhs_fact.fact.origins.is_empty() || !rhs_fact.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_fact.fact.origins, + &rhs_fact.fact.origins, + )); + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/operators/sets.rs b/src/type_analysis/propagation/expressions/operators/sets.rs new file mode 100644 index 00000000..4561a657 --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators/sets.rs @@ -0,0 +1,165 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::boxed::Box; + +use crate::ast::{BinOp, Expr, Ref}; +use crate::lexer::Span; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::{ + derived_from_pair, mark_origins_derived, schema_array_items, +}; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_bin_expr( + &self, + module_idx: u32, + span: &Span, + op: &BinOp, + lhs: &Ref, + rhs: &Ref, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let lhs_type = self.infer_expr(module_idx, lhs, bindings, result, rule_analysis); + let rhs_type = self.infer_expr(module_idx, rhs, bindings, result, rule_analysis); + + self.check_set_operation_diagnostic(module_idx, span, op, &lhs_type, &rhs_type, result); + + let (lhs_element, lhs_provenance) = match &lhs_type.fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((items, _)) = schema_array_items(schema) { + ( + StructuralType::from_schema(&items), + lhs_type.fact.provenance.clone(), + ) + } else { + (StructuralType::Any, lhs_type.fact.provenance.clone()) + } + } + TypeDescriptor::Structural(StructuralType::Set(elem)) => { + ((**elem).clone(), lhs_type.fact.provenance.clone()) + } + _ => (StructuralType::Any, TypeProvenance::Propagated), + }; + + let rhs_element = match &rhs_type.fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((items, _)) = schema_array_items(schema) { + StructuralType::from_schema(&items) + } else { + StructuralType::Any + } + } + TypeDescriptor::Structural(StructuralType::Set(elem)) => (**elem).clone(), + _ => StructuralType::Any, + }; + + let result_element = match op { + BinOp::Union => Self::join_structural_types(&[lhs_element, rhs_element]), + BinOp::Intersection => lhs_element, + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new(result_element))), + lhs_provenance, + ); + + if !lhs_type.fact.origins.is_empty() || !rhs_type.fact.origins.is_empty() { + fact = fact.with_origins(derived_from_pair( + &lhs_type.fact.origins, + &rhs_type.fact.origins, + )); + } + + if let (ConstantValue::Known(lhs_val), ConstantValue::Known(rhs_val)) = + (&lhs_type.fact.constant, &rhs_type.fact.constant) + { + if *lhs_val == Value::Undefined || *rhs_val == Value::Undefined { + fact = fact.with_constant(ConstantValue::known(Value::Undefined)); + } else if let (Ok(lhs_set), Ok(rhs_set)) = (lhs_val.as_set(), rhs_val.as_set()) { + let result_set = match op { + BinOp::Union => { + let mut union_set = lhs_set.clone(); + for item in rhs_set.iter() { + union_set.insert(item.clone()); + } + union_set + } + BinOp::Intersection => lhs_set + .iter() + .filter(|item| rhs_set.contains(item)) + .cloned() + .collect(), + }; + + fact = fact.with_constant(ConstantValue::known(Value::from(result_set))); + } + } + + fact + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_membership_expr( + &self, + module_idx: u32, + span: &Span, + value: &Ref, + collection: &Ref, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let value_type = self.infer_expr(module_idx, value, bindings, result, rule_analysis); + let collection_type = + self.infer_expr(module_idx, collection, bindings, result, rule_analysis); + + // Check for membership diagnostic issues + self.check_membership_diagnostic(module_idx, span, &value_type, &collection_type, result); + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Propagated, + ); + + if !collection_type.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&collection_type.fact.origins)); + } + + if let (ConstantValue::Known(val), ConstantValue::Known(coll)) = + (&value_type.fact.constant, &collection_type.fact.constant) + { + if *val == Value::Undefined || *coll == Value::Undefined { + fact = fact.with_constant(ConstantValue::known(Value::Undefined)); + } else { + let is_member = if let Ok(set) = coll.as_set() { + set.contains(val) + } else if let Ok(arr) = coll.as_array() { + arr.contains(val) + } else if let Ok(obj) = coll.as_object() { + obj.contains_key(val) + } else if let Ok(s) = coll.as_string() { + if let Ok(needle) = val.as_string() { + s.contains(needle.as_ref()) + } else { + false + } + } else { + false + }; + + fact = fact.with_constant(ConstantValue::known(Value::from(is_member))); + } + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/operators/unary.rs b/src/type_analysis/propagation/expressions/operators/unary.rs new file mode 100644 index 00000000..4cafeb8a --- /dev/null +++ b/src/type_analysis/propagation/expressions/operators/unary.rs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::ast::{Expr, Ref}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::mark_origins_derived; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn infer_unary_expr( + &self, + module_idx: u32, + expr: &Ref, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let operand = self.infer_expr(module_idx, expr, bindings, result, rule_analysis); + + if let Some(value) = operand.fact.constant.as_value() { + if let Ok(number) = value.as_number() { + if let Ok(negated_number) = crate::number::Number::from(0i64).sub(number) { + let structural_type = if negated_number.is_integer() { + StructuralType::Integer + } else { + StructuralType::Number + }; + + return TypeFact::new( + TypeDescriptor::Structural(structural_type), + TypeProvenance::Literal, + ) + .with_constant(ConstantValue::known(Value::from(negated_number))); + } + } + } + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Number), + TypeProvenance::Propagated, + ); + + if !operand.fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&operand.fact.origins)); + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/rules.rs b/src/type_analysis/propagation/expressions/rules.rs new file mode 100644 index 00000000..211e0a48 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules.rs @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod calls; +mod helpers; +mod lookup; +mod variables; diff --git a/src/type_analysis/propagation/expressions/rules/calls/effects.rs b/src/type_analysis/propagation/expressions/rules/calls/effects.rs new file mode 100644 index 00000000..2dc61fc5 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/calls/effects.rs @@ -0,0 +1,153 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::collections::{BTreeMap, BTreeSet}; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::type_analysis::model::{ + RuleAnalysis, RuleSpecializationSignature, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::pipeline::{RuleHeadInfo, TypeAnalysisResult, TypeAnalyzer}; +use crate::type_analysis::result::RuleSpecializationRecord; +use crate::type_analysis::value_utils; +use crate::type_analysis::RuleConstantState; +use crate::utils::path::normalize_rule_path; +use crate::value::Value; + +pub(crate) fn apply_rule_call_effects( + analyzer: &TypeAnalyzer, + module_idx: u32, + expr_idx: u32, + filtered_targets: Vec, + call_arg_facts: Vec, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, +) -> Vec { + let current_rule_info = result.lookup.current_rule().map(|path| { + let normalized = normalize_rule_path(path); + let base = normalized + .strip_suffix(".default") + .unwrap_or(&normalized) + .to_owned(); + (normalized, base) + }); + + let mut collected_facts = Vec::new(); + let mut seen_paths: BTreeSet = BTreeSet::new(); + + for info in filtered_targets { + let normalized_path = normalize_rule_path(&info.path); + if !seen_paths.insert(normalized_path.clone()) { + continue; + } + + let dependency_base = normalized_path + .strip_suffix(".default") + .unwrap_or(normalized_path.as_str()); + + if let Some((current_norm, current_base)) = current_rule_info.as_ref() { + if normalized_path == *current_norm || dependency_base == current_base.as_str() { + continue; + } + } + + result + .lookup + .record_rule_reference(module_idx, expr_idx, normalized_path.clone()); + rule_analysis.record_rule_dependency(normalized_path.clone()); + + let mut scratch_result = TypeAnalysisResult::new(); + let previous_param_facts = analyzer.prepare_function_rule_specialization( + info.module_idx, + info.rule_idx, + &call_arg_facts, + &mut scratch_result, + ); + analyzer.ensure_rule_analyzed(info.module_idx, info.rule_idx, &mut scratch_result); + analyzer.restore_function_rule_specialization( + info.module_idx, + info.rule_idx, + previous_param_facts, + ); + + if !scratch_result.diagnostics.is_empty() { + result.diagnostics.append(&mut scratch_result.diagnostics); + } + + let head_fact = scratch_result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + + let mut constant_fact: Option = None; + let mut constant_value: Option = None; + if let Some(rule_entry) = scratch_result + .rule_info + .get(info.module_idx as usize) + .and_then(|rules| rules.get(info.rule_idx)) + { + if let RuleConstantState::Done(value) = &rule_entry.constant_state { + let mut fact = value_utils::value_to_type_fact(value); + fact.provenance = TypeProvenance::Rule; + if let Some(existing) = head_fact.as_ref() { + if !existing.origins.is_empty() { + fact = fact.with_origins(existing.origins.clone()); + } + } + constant_value = Some(value.clone()); + constant_fact = Some(fact); + } + } + + let record_head_fact = constant_fact.clone().or(head_fact.clone()); + let expr_facts = collect_specialization_expr_facts(&scratch_result); + + let record = RuleSpecializationRecord { + signature: RuleSpecializationSignature::from_facts( + info.module_idx, + info.rule_idx, + &call_arg_facts, + ), + parameter_facts: call_arg_facts.clone(), + head_fact: record_head_fact.clone(), + constant_value: constant_value.clone(), + expr_facts, + trace: None, + }; + result.record_function_specialization(record); + + if let Some(fact) = constant_fact { + collected_facts.push(fact); + continue; + } + + if let Some(fact) = head_fact { + collected_facts.push(fact); + } + } + + collected_facts +} + +pub(crate) fn collect_specialization_expr_facts( + scratch: &TypeAnalysisResult, +) -> BTreeMap> { + let mut modules: BTreeMap> = BTreeMap::new(); + + for (module_idx, module_slots) in scratch.lookup.expr_types().modules().iter().enumerate() { + let mut expr_map: BTreeMap = BTreeMap::new(); + for (expr_idx, slot) in module_slots.iter().enumerate() { + if let Some(fact) = slot { + expr_map.insert(expr_idx as u32, fact.clone()); + } + } + + if !expr_map.is_empty() { + modules.insert(module_idx as u32, expr_map); + } + } + + modules +} diff --git a/src/type_analysis/propagation/expressions/rules/calls/mod.rs b/src/type_analysis/propagation/expressions/rules/calls/mod.rs new file mode 100644 index 00000000..94a4e8ac --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/calls/mod.rs @@ -0,0 +1,184 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::ast::{Expr, Ref}; +use crate::lexer::Span; +use crate::type_analysis::builtins; +use crate::type_analysis::builtins::BuiltinTypeTemplate; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::pipeline::{RuleHeadInfo, TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +mod effects; +mod resolution; + +use effects::apply_rule_call_effects; +use resolution::resolve_call_path; + +fn template_description(template: BuiltinTypeTemplate) -> String { + match template { + BuiltinTypeTemplate::Any => "any".to_owned(), + BuiltinTypeTemplate::Boolean => "boolean".to_owned(), + BuiltinTypeTemplate::Number => "number".to_owned(), + BuiltinTypeTemplate::Integer => "integer".to_owned(), + BuiltinTypeTemplate::String => "string".to_owned(), + BuiltinTypeTemplate::Null => "null".to_owned(), + BuiltinTypeTemplate::ArrayAny => "array".to_owned(), + BuiltinTypeTemplate::SetAny => "set".to_owned(), + BuiltinTypeTemplate::ObjectAny => "object".to_owned(), + BuiltinTypeTemplate::SameAsArgument(idx) => { + format!("same type as argument {}", idx + 1) + } + BuiltinTypeTemplate::CollectionElement(idx) => { + format!("element type of argument {}", idx + 1) + } + } +} + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn infer_call_expr( + &self, + module_idx: u32, + expr_idx: u32, + span: &Span, + fcn: &Ref, + params: &[Ref], + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let mut arg_types = Vec::with_capacity(params.len()); + for param in params { + arg_types.push(self.infer_expr(module_idx, param, bindings, result, rule_analysis)); + } + + if let Some(call_name) = resolve_call_path(fcn.as_ref()) { + if let Some(spec) = builtins::lookup(&call_name) { + let expected = spec.param_count() as usize; + let mut matches = expected == arg_types.len(); + + if expected != arg_types.len() { + self.check_builtin_call_diagnostic( + module_idx, + span, + &call_name, + expected, + arg_types.len(), + result, + ); + } else if let Some(templates) = spec.params() { + for (idx, template) in templates.iter().enumerate() { + if !builtins::matches_template(&arg_types[idx], *template, &arg_types) { + matches = false; + let expected_type = template_description(*template); + self.check_builtin_param_type_diagnostic( + module_idx, + span, + &call_name, + idx, + &expected_type, + &arg_types[idx], + result, + ); + } + } + } + + self.check_builtin_additional_rules( + module_idx, span, &call_name, &arg_types, result, + ); + + let descriptor = if matches { + spec.return_descriptor(&arg_types) + } else { + TypeDescriptor::structural(StructuralType::Any) + }; + + let mut fact = TypeFact::new(descriptor, TypeProvenance::Builtin); + + if matches { + let origins = builtins::combined_arg_origins(&arg_types); + if !origins.is_empty() { + fact = fact.with_origins(origins); + } + + if spec.is_pure() { + let all_constant = arg_types + .iter() + .all(|arg| arg.fact.constant.as_value().is_some()); + if all_constant { + let arg_values: Vec = arg_types + .iter() + .filter_map(|arg| arg.fact.constant.as_value().cloned()) + .collect(); + + if arg_values.len() == arg_types.len() { + if let Some(builtin_fn) = + crate::builtins::BUILTINS.get(call_name.as_str()) + { + let dummy_span = params + .first() + .map(|p| p.span()) + .unwrap_or_else(|| fcn.span()); + if let Ok(result_value) = + builtin_fn.0(dummy_span, params, &arg_values, false) + { + if result_value != Value::Undefined { + fact = fact + .with_constant(ConstantValue::known(result_value)); + } + } + } + } + } + } + } + + return fact; + } + + let targets = self.resolve_rule_call_targets(module_idx, &call_name); + let filtered_targets: Vec = targets + .into_iter() + .filter(|info| self.function_definition_may_match_call(info, &arg_types)) + .collect(); + + if !filtered_targets.is_empty() { + let call_arg_facts: Vec = + arg_types.iter().map(|arg| arg.fact.clone()).collect(); + let collected_facts = apply_rule_call_effects( + self, + module_idx, + expr_idx, + filtered_targets, + call_arg_facts, + result, + rule_analysis, + ); + + if !collected_facts.is_empty() { + return Self::merge_rule_facts(&collected_facts); + } + } + + return TypeFact::new( + TypeDescriptor::structural(StructuralType::Any), + TypeProvenance::Rule, + ); + } + + TypeFact::new( + TypeDescriptor::structural(StructuralType::Any), + TypeProvenance::Unknown, + ) + } +} diff --git a/src/type_analysis/propagation/expressions/rules/calls/resolution.rs b/src/type_analysis/propagation/expressions/rules/calls/resolution.rs new file mode 100644 index 00000000..c23a7412 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/calls/resolution.rs @@ -0,0 +1,170 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::collections::BTreeSet; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::ast::{Expr, Rule, RuleHead}; +use crate::type_analysis::model::{HybridType, StructuralType, TypeDescriptor}; +use crate::type_analysis::propagation::pipeline::{RuleHeadInfo, TypeAnalyzer}; +use crate::utils::get_path_string; +use crate::utils::path::normalize_rule_path; +use crate::value::Value; + +pub(crate) fn resolve_call_path(expr: &Expr) -> Option { + let mut segments: Vec = Vec::new(); + let mut current = expr; + + loop { + match current { + Expr::RefDot { refr, field, .. } => { + let (_, field_value) = field.as_ref()?; + let segment = field_value.as_string().ok()?.as_ref().to_owned(); + segments.push(segment); + current = refr.as_ref(); + } + Expr::RefBrack { refr, index, .. } => { + if let Expr::String { value, .. } = index.as_ref() { + let segment = value.as_string().ok()?.as_ref().to_owned(); + segments.push(segment); + current = refr.as_ref(); + } else { + return None; + } + } + Expr::Var { value, .. } => { + let segment = value.as_string().ok()?.as_ref().to_owned(); + segments.push(segment); + break; + } + _ => return None, + } + } + + segments.reverse(); + Some(segments.join(".")) +} + +impl TypeAnalyzer { + pub(crate) fn resolve_rule_call_targets( + &self, + module_idx: u32, + call_name: &str, + ) -> Vec { + let mut acceptable_paths: BTreeSet = BTreeSet::new(); + + if call_name.starts_with("data.") { + acceptable_paths.insert(normalize_rule_path(call_name)); + } else if call_name.contains('.') { + acceptable_paths.insert(normalize_rule_path(&format!("data.{call_name}"))); + } + + let module_path = self + .modules + .get(module_idx as usize) + .and_then(|module| get_path_string(module.package.refr.as_ref(), Some("data")).ok()) + .unwrap_or_else(|| "data".to_owned()); + + acceptable_paths.insert(normalize_rule_path(&format!("{module_path}.{call_name}"))); + + let key = call_name.rsplit('.').next().unwrap_or(call_name); + let mut matches = Vec::new(); + let mut seen_indices: BTreeSet<(u32, u32)> = BTreeSet::new(); + + for info in self.rule_heads_for_name(module_idx, key) { + let normalized = normalize_rule_path(&info.path); + if acceptable_paths.contains(&normalized) + && seen_indices.insert((info.module_idx, info.expr_idx)) + { + matches.push(info.clone()); + } + } + + matches + } + + pub(crate) fn function_definition_may_match_call( + &self, + info: &RuleHeadInfo, + call_args: &[HybridType], + ) -> bool { + if let Some(module) = self.modules.get(info.module_idx as usize) { + if let Some(rule) = module.policy.get(info.rule_idx) { + if let Rule::Spec { + head: RuleHead::Func { args, .. }, + .. + } = rule.as_ref() + { + if args.len() != call_args.len() { + return false; + } + + for (head_arg, call_arg) in args.iter().zip(call_args.iter()) { + if !function_head_arg_matches_call(head_arg.as_ref(), call_arg) { + return false; + } + } + + return true; + } + } + } + + true + } +} + +fn function_head_arg_matches_call(head_arg: &Expr, call_arg: &HybridType) -> bool { + if let Some(literal) = literal_value_from_expr(head_arg) { + if let Some(call_value) = call_arg.fact.constant.as_value() { + if call_value == &Value::Undefined { + return true; + } + + return call_value == &literal; + } + + let structural = match &call_arg.fact.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + + return structural_type_accepts_value(&structural, &literal); + } + + true +} + +fn literal_value_from_expr(expr: &Expr) -> Option { + match expr { + Expr::String { value, .. } + | Expr::RawString { value, .. } + | Expr::Number { value, .. } + | Expr::Bool { value, .. } + | Expr::Null { value, .. } => Some(value.clone()), + _ => None, + } +} + +fn structural_type_accepts_value(ty: &StructuralType, value: &Value) -> bool { + use crate::value::Value as Val; + + match ty { + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Boolean => matches!(value, Val::Bool(_)), + StructuralType::Number => matches!(value, Val::Number(_)), + StructuralType::Integer => matches!(value, Val::Number(num) if num.is_integer()), + StructuralType::String => matches!(value, Val::String(_)), + StructuralType::Null => matches!(value, Val::Null), + StructuralType::Array(_) => matches!(value, Val::Array(_)), + StructuralType::Set(_) => matches!(value, Val::Set(_)), + StructuralType::Object(_) => matches!(value, Val::Object(_)), + StructuralType::Union(variants) => variants + .iter() + .any(|variant| structural_type_accepts_value(variant, value)), + StructuralType::Enum(values) => values.iter().any(|variant| variant == value), + } +} diff --git a/src/type_analysis/propagation/expressions/rules/helpers.rs b/src/type_analysis/propagation/expressions/rules/helpers.rs new file mode 100644 index 00000000..ace06523 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/helpers.rs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::vec::Vec; + +use crate::type_analysis::model::{ + SourceOrigin, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::pipeline::TypeAnalyzer; + +impl TypeAnalyzer { + pub(crate) fn merge_rule_facts(facts: &[TypeFact]) -> TypeFact { + if facts.is_empty() { + return TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::Unknown, + ); + } + + if facts.len() == 1 { + return facts[0].clone(); + } + + let mut structural_types = Vec::with_capacity(facts.len()); + let mut origins: Vec = Vec::new(); + let mut specialization_hits = Vec::new(); + let provenance = facts[0].provenance.clone(); + + for fact in facts { + let structural = match &fact.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + structural_types.push(structural); + if !fact.origins.is_empty() { + origins.extend(fact.origins.clone()); + } + if !fact.specialization_hits.is_empty() { + specialization_hits.extend(fact.specialization_hits.clone()); + } + } + + let merged_type = Self::join_structural_types(&structural_types); + let mut merged_fact = TypeFact::new(TypeDescriptor::Structural(merged_type), provenance); + + if !origins.is_empty() { + merged_fact = merged_fact.with_origins(origins); + } + + if !specialization_hits.is_empty() { + merged_fact = merged_fact.with_specialization_hits(specialization_hits); + } + + merged_fact + } +} diff --git a/src/type_analysis/propagation/expressions/rules/lookup.rs b/src/type_analysis/propagation/expressions/rules/lookup.rs new file mode 100644 index 00000000..6cec1126 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/lookup.rs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::collections::BTreeSet; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::ast::Expr; +use crate::type_analysis::model::{RuleAnalysis, TypeFact}; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::utils::path::{normalize_rule_path, AccessComponent, ReferenceChain}; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn try_resolve_rule_property( + &self, + module_idx: u32, + expr_idx: u32, + base_expr: &Expr, + field_value: &Value, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> Option { + let field_name = match field_value.as_string() { + Ok(name) => name, + Err(_) => return None, + }; + + let reference = build_reference_chain(base_expr)?; + let static_prefix = reference.static_prefix(); + if static_prefix.is_empty() || static_prefix[0] != "data" || static_prefix.len() < 2 { + return None; + } + + let base_path = static_prefix.join("."); + let candidate_path = normalize_rule_path(&format!("{base_path}.{}", field_name.as_ref())); + + let current_rule_info = result.lookup.current_rule().map(|path| { + let normalized = normalize_rule_path(path); + let base = normalized + .strip_suffix(".default") + .unwrap_or(&normalized) + .to_owned(); + (normalized, base) + }); + + let mut collected_facts = Vec::new(); + let mut processed_paths: BTreeSet = BTreeSet::new(); + for info in self.rule_heads_for_name(module_idx, field_name.as_ref()) { + let normalized_path = normalize_rule_path(&info.path); + if normalized_path != candidate_path { + continue; + } + + let dependency_base = normalized_path + .strip_suffix(".default") + .unwrap_or(normalized_path.as_str()); + if let Some((current_norm, current_base)) = current_rule_info.as_ref() { + if normalized_path == *current_norm || dependency_base == current_base.as_str() { + continue; + } + } + + let first_encounter = processed_paths.insert(normalized_path.clone()); + + if first_encounter { + result + .lookup + .record_rule_reference(module_idx, expr_idx, normalized_path.clone()); + rule_analysis.record_rule_dependency(normalized_path.clone()); + } + + let mut head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + + if head_fact.is_none() { + if let Some(module) = self.modules.get(info.module_idx as usize) { + for (rule_idx, rule) in module.policy.iter().enumerate() { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if refr.eidx() == info.expr_idx { + self.ensure_rule_analyzed(info.module_idx, rule_idx, result); + head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + break; + } + } + } + } + } + + if let Some(fact) = head_fact { + collected_facts.push(fact); + } + } + + if collected_facts.is_empty() { + None + } else { + Some(Self::merge_rule_facts(&collected_facts)) + } + } +} + +fn build_reference_chain(expr: &Expr) -> Option { + let mut components: Vec = Vec::new(); + let mut current = expr; + + loop { + match current { + Expr::RefDot { refr, field, .. } => { + let (_, field_value) = field.as_ref()?; + let field_name = field_value.as_string().ok()?.as_ref().to_owned(); + components.push(AccessComponent::Field(field_name)); + current = refr.as_ref(); + } + Expr::RefBrack { refr, .. } => { + components.push(AccessComponent::Dynamic); + current = refr.as_ref(); + } + Expr::Var { value, .. } => { + let root = value.as_string().ok()?.as_ref().to_owned(); + components.reverse(); + return Some(ReferenceChain { root, components }); + } + _ => return None, + } + } +} diff --git a/src/type_analysis/propagation/expressions/rules/variables.rs b/src/type_analysis/propagation/expressions/rules/variables.rs new file mode 100644 index 00000000..036741a8 --- /dev/null +++ b/src/type_analysis/propagation/expressions/rules/variables.rs @@ -0,0 +1,329 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::collections::{BTreeMap, BTreeSet}; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, SourceOrigin, SourceRoot, StructuralObjectShape, StructuralType, + TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::extract_schema_constant; +use crate::type_analysis::propagation::pipeline::{RuleHeadInfo, TypeAnalysisResult, TypeAnalyzer}; +use crate::utils::get_path_string; +use crate::utils::path::normalize_rule_path; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn infer_var( + &self, + module_idx: u32, + expr_idx: u32, + value: &Value, + bindings: &ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let name = match value.as_string() { + Ok(name) => name.as_ref().to_owned(), + Err(_) => { + return TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::Unknown, + ) + } + }; + if name == "input" { + let mut fact = if let Some(schema) = &self.options.input_schema { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(schema.clone()), + TypeProvenance::SchemaInput, + ); + if let Some(constant) = extract_schema_constant(schema) { + fact = fact.with_constant(ConstantValue::known(constant)); + } + fact + } else { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::SchemaInput, + ) + }; + fact.origins.push(SourceOrigin::new(SourceRoot::Input)); + return fact; + } else if name == "data" { + let mut fact = if let Some(schema) = &self.options.data_schema { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(schema.clone()), + TypeProvenance::SchemaData, + ); + if let Some(constant) = extract_schema_constant(schema) { + fact = fact.with_constant(ConstantValue::known(constant)); + } + fact + } else { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::SchemaData, + ) + }; + fact.origins.push(SourceOrigin::new(SourceRoot::Data)); + return fact; + } + + if let Some(fact) = bindings.lookup(&name) { + return fact.clone(); + } + + let rule_infos = self.rule_heads_for_name(module_idx, &name); + let current_rule_info = result.lookup.current_rule().map(|path| { + let normalized = normalize_rule_path(path); + let base = normalized + .strip_suffix(".default") + .unwrap_or(&normalized) + .to_owned(); + (normalized, base) + }); + + if let Some(fact) = self.collect_rule_facts_from_infos( + module_idx, + expr_idx, + rule_infos, + current_rule_info.as_ref(), + result, + rule_analysis, + ) { + return fact; + } + + if let Some(fact) = self.collect_rule_object_fact( + module_idx, + expr_idx, + &name, + current_rule_info.as_ref(), + result, + rule_analysis, + ) { + return fact; + } + + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::Unknown, + ) + } + + fn collect_rule_object_fact( + &self, + module_idx: u32, + expr_idx: u32, + name: &str, + current_rule_info: Option<&(String, String)>, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> Option { + let mut prefixes: Vec = Vec::new(); + + if let Some(module) = self.modules.get(module_idx as usize) { + if let Ok(module_path) = get_path_string(module.package.refr.as_ref(), Some("data")) { + prefixes.push(format!("{module_path}.{name}")); + } + } + + prefixes.push(format!("data.{name}")); + prefixes.sort(); + prefixes.dedup(); + + let mut field_facts: BTreeMap> = BTreeMap::new(); + let mut processed_paths: BTreeSet = BTreeSet::new(); + + for prefix in prefixes { + let normalized_prefix = normalize_rule_path(&prefix); + + for info in self.rule_heads_with_prefix(module_idx, &prefix) { + let normalized_path = normalize_rule_path(&info.path); + let dependency_base = normalized_path + .strip_suffix(".default") + .unwrap_or(normalized_path.as_str()) + .to_owned(); + + if !dependency_base.starts_with(&normalized_prefix) { + continue; + } + + if let Some((current_norm, current_base)) = current_rule_info { + if normalized_path == *current_norm || dependency_base == *current_base { + continue; + } + } + + let remainder = dependency_base[normalized_prefix.len()..].strip_prefix('.'); + let Some(remainder) = remainder else { + continue; + }; + + let field_name = remainder.split('.').next().unwrap_or("").to_owned(); + if field_name.is_empty() { + continue; + } + + let first_encounter = processed_paths.insert(normalized_path.clone()); + + if first_encounter { + result.lookup.record_rule_reference( + module_idx, + expr_idx, + normalized_path.clone(), + ); + rule_analysis.record_rule_dependency(normalized_path.clone()); + } + + let mut head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + if head_fact.is_none() { + self.ensure_all_rule_definitions_analyzed(&info.path, result); + head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + } + + if let Some(fact) = head_fact { + field_facts + .entry(field_name.clone()) + .or_default() + .push(fact); + } + } + } + + if field_facts.is_empty() { + return None; + } + + let mut shape = StructuralObjectShape::new(); + let mut aggregated_origins: Vec = Vec::new(); + + for (field, facts) in field_facts { + if facts.is_empty() { + continue; + } + + let merged = if facts.len() == 1 { + facts.into_iter().next().unwrap() + } else { + Self::merge_rule_facts(&facts) + }; + + let structural = match &merged.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + + shape.fields.insert(field, structural); + + if !merged.origins.is_empty() { + for origin in merged.origins.iter() { + if !aggregated_origins.contains(origin) { + aggregated_origins.push(origin.clone()); + } + } + } + } + + if shape.fields.is_empty() { + return None; + } + + let mut object_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Object(shape)), + TypeProvenance::Rule, + ); + + if !aggregated_origins.is_empty() { + object_fact = object_fact.with_origins(aggregated_origins); + } + + Some(object_fact) + } + + fn collect_rule_facts_from_infos( + &self, + module_idx: u32, + expr_idx: u32, + infos: Vec<&RuleHeadInfo>, + current_rule_info: Option<&(String, String)>, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> Option { + if infos.is_empty() { + return None; + } + + let mut facts_by_base: BTreeMap> = BTreeMap::new(); + let mut processed_paths: BTreeSet = BTreeSet::new(); + + for info in infos { + let normalized_path = normalize_rule_path(&info.path); + let dependency_base = normalized_path + .strip_suffix(".default") + .unwrap_or(normalized_path.as_str()) + .to_owned(); + + if let Some((current_norm, current_base)) = current_rule_info { + if normalized_path == *current_norm || dependency_base == *current_base { + continue; + } + } + + let first_encounter = processed_paths.insert(normalized_path.clone()); + + if first_encounter { + result + .lookup + .record_rule_reference(module_idx, expr_idx, normalized_path.clone()); + rule_analysis.record_rule_dependency(normalized_path.clone()); + } + + let mut head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + if head_fact.is_none() { + self.ensure_all_rule_definitions_analyzed(&info.path, result); + head_fact = result + .lookup + .get_expr(info.module_idx, info.expr_idx) + .cloned(); + } + + if let Some(fact) = head_fact { + facts_by_base + .entry(dependency_base.clone()) + .or_default() + .push(fact); + } + } + + if facts_by_base.is_empty() { + return None; + } + + let mut merged = Vec::new(); + for (_base, mut facts) in facts_by_base { + if facts.len() == 1 { + merged.push(facts.pop().unwrap()); + } else { + merged.push(Self::merge_rule_facts(&facts)); + } + } + + Some(Self::merge_rule_facts(&merged)) + } +} diff --git a/src/type_analysis/propagation/expressions/statements.rs b/src/type_analysis/propagation/expressions/statements.rs new file mode 100644 index 00000000..0e4226c5 --- /dev/null +++ b/src/type_analysis/propagation/expressions/statements.rs @@ -0,0 +1,119 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::ast::{Expr, Literal, LiteralStmt}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::RuleAnalysis; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum StatementTruth { + AlwaysTrue, + AlwaysFalse, + Unknown, +} + +impl TypeAnalyzer { + pub(crate) fn analyze_stmt( + &self, + module_idx: u32, + stmt: &LiteralStmt, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> StatementTruth { + match &stmt.literal { + Literal::SomeVars { .. } => { + // Hoisted loops manage these bindings via binding plans. + StatementTruth::Unknown + } + Literal::SomeIn { + key, + value, + collection, + .. + } => { + let coll_fact = + self.infer_expr(module_idx, collection, bindings, result, rule_analysis); + + let mut truth = StatementTruth::Unknown; + if let Some(const_value) = coll_fact.fact.constant.as_value() { + if collection_is_definitely_empty(const_value) { + truth = StatementTruth::AlwaysFalse; + } + } + + if let Some(const_value) = coll_fact.fact.constant.as_value() { + result.constants.record( + module_idx, + collection.eidx(), + Some(const_value.clone()), + ); + } + + if let Some(k) = key { + let _ = self.infer_expr(module_idx, k, bindings, result, rule_analysis); + } + let _ = self.infer_expr(module_idx, value, bindings, result, rule_analysis); + truth + } + Literal::Expr { expr, .. } | Literal::NotExpr { expr, .. } => { + let hybrid = self.infer_expr(module_idx, expr, bindings, result, rule_analysis); + if matches!(stmt.literal, Literal::Expr { .. }) { + truth_from_positive_literal(expr, hybrid.fact.constant.as_value()) + } else { + truth_from_negated_literal(hybrid.fact.constant.as_value()) + } + } + Literal::Every { domain, query, .. } => { + self.infer_expr(module_idx, domain, bindings, result, rule_analysis); + bindings.push_scope(); + let _ = + self.analyze_query(module_idx, query, bindings, result, rule_analysis, true); + bindings.pop_scope(); + StatementTruth::Unknown + } + } + } +} + +fn truth_from_positive_literal(expr: &Expr, constant: Option<&Value>) -> StatementTruth { + if matches!(expr, Expr::AssignExpr { .. }) { + // Unification statements can still succeed even when RHS is false. + return StatementTruth::Unknown; + } + + constant + .and_then(|value| value.as_bool().ok().copied()) + .map(|flag| { + if flag { + StatementTruth::AlwaysTrue + } else { + StatementTruth::AlwaysFalse + } + }) + .unwrap_or(StatementTruth::Unknown) +} + +fn truth_from_negated_literal(constant: Option<&Value>) -> StatementTruth { + constant + .and_then(|value| value.as_bool().ok().copied()) + .map(|flag| { + if flag { + StatementTruth::AlwaysFalse + } else { + StatementTruth::AlwaysTrue + } + }) + .unwrap_or(StatementTruth::Unknown) +} + +fn collection_is_definitely_empty(value: &Value) -> bool { + match value { + Value::Array(items) => items.is_empty(), + Value::Set(items) => items.is_empty(), + Value::Object(fields) => fields.is_empty(), + _ => false, + } +} diff --git a/src/type_analysis/propagation/expressions/structures.rs b/src/type_analysis/propagation/expressions/structures.rs new file mode 100644 index 00000000..b4fe8beb --- /dev/null +++ b/src/type_analysis/propagation/expressions/structures.rs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod literals; +mod property; +mod utils; diff --git a/src/type_analysis/propagation/expressions/structures/literals.rs b/src/type_analysis/propagation/expressions/structures/literals.rs new file mode 100644 index 00000000..cbf06744 --- /dev/null +++ b/src/type_analysis/propagation/expressions/structures/literals.rs @@ -0,0 +1,206 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use alloc::collections::{BTreeMap, BTreeSet}; +use alloc::vec::Vec; + +use crate::ast::{Expr, Ref}; +use crate::lexer::Span; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, PathSegment, RuleAnalysis, StructuralObjectShape, StructuralType, + TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::{extend_origins_with_segment, mark_origins_derived}; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn infer_array_literal( + &self, + module_idx: u32, + items: &[Ref], + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let mut element_types = Vec::new(); + let mut all_origins = Vec::new(); + let mut constant_elements = Vec::new(); + let mut all_constant = true; + + for (idx, item_expr) in items.iter().enumerate() { + let item_type = self.infer_expr(module_idx, item_expr, bindings, result, rule_analysis); + + element_types.push(match &item_type.fact.descriptor { + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(st) => st.clone(), + }); + + if !item_type.fact.origins.is_empty() { + let item_origins = + extend_origins_with_segment(&item_type.fact.origins, PathSegment::Index(idx)); + all_origins.extend(item_origins); + } + + if let ConstantValue::Known(value) = &item_type.fact.constant { + constant_elements.push(value.clone()); + } else { + all_constant = false; + } + } + + let element_type = if element_types.is_empty() { + StructuralType::Any + } else { + Self::join_structural_types(&element_types) + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Array(Box::new(element_type))), + TypeProvenance::Literal, + ); + + if !all_origins.is_empty() { + fact = fact.with_origins(all_origins); + } + + if all_constant { + fact = fact.with_constant(ConstantValue::known(Value::from(constant_elements))); + } + + fact + } + + pub(crate) fn infer_set_literal( + &self, + module_idx: u32, + items: &[Ref], + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let mut element_types = Vec::new(); + let mut all_origins = Vec::new(); + let mut constant_elements = BTreeSet::new(); + let mut all_constant = true; + + for item_expr in items { + let item_type = self.infer_expr(module_idx, item_expr, bindings, result, rule_analysis); + + element_types.push(match &item_type.fact.descriptor { + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(st) => st.clone(), + }); + + if !item_type.fact.origins.is_empty() { + all_origins.extend(mark_origins_derived(&item_type.fact.origins)); + } + + if let ConstantValue::Known(value) = &item_type.fact.constant { + constant_elements.insert(value.clone()); + } else { + all_constant = false; + } + } + + let element_type = if element_types.is_empty() { + StructuralType::Any + } else { + Self::join_structural_types(&element_types) + }; + + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new(element_type))), + TypeProvenance::Literal, + ); + + if !all_origins.is_empty() { + fact = fact.with_origins(all_origins); + } + + if all_constant { + fact = fact.with_constant(ConstantValue::known(Value::from(constant_elements))); + } + + fact + } + + pub(crate) fn infer_object_literal( + &self, + module_idx: u32, + fields: &[(Span, Ref, Ref)], + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) -> TypeFact { + let mut field_types = BTreeMap::new(); + let mut all_origins = Vec::new(); + let mut constant_fields = BTreeMap::new(); + let mut all_constant = true; + + for (_key_span, key_expr, value_expr) in fields { + let (key_name, key_value) = if let Expr::String { value, .. } + | Expr::RawString { value, .. } = key_expr.as_ref() + { + ( + value.as_string().ok().map(|s| s.as_ref().to_owned()), + Some(value.clone()), + ) + } else { + let key_fact = + self.infer_expr(module_idx, key_expr, bindings, result, rule_analysis); + let key_const = key_fact.fact.constant.as_value().cloned(); + (None, key_const) + }; + + let value_type = + self.infer_expr(module_idx, value_expr, bindings, result, rule_analysis); + + let value_structural_type = match &value_type.fact.descriptor { + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + TypeDescriptor::Structural(st) => st.clone(), + }; + + if let Some(name) = key_name { + field_types.insert(name.clone(), value_structural_type); + + if !value_type.fact.origins.is_empty() { + let field_origins = extend_origins_with_segment( + &value_type.fact.origins, + PathSegment::Field(name), + ); + all_origins.extend(field_origins); + } + } else if !value_type.fact.origins.is_empty() { + all_origins.extend(mark_origins_derived(&value_type.fact.origins)); + } + + if let (Some(k), ConstantValue::Known(v)) = (key_value, &value_type.fact.constant) { + constant_fields.insert(k, v.clone()); + } else { + all_constant = false; + } + } + + let shape = StructuralObjectShape { + fields: field_types, + }; + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Object(shape)), + TypeProvenance::Literal, + ); + + if !all_origins.is_empty() { + fact = fact.with_origins(all_origins); + } + + if all_constant { + fact = fact.with_constant(ConstantValue::known(Value::from(constant_fields))); + } + + fact + } +} diff --git a/src/type_analysis/propagation/expressions/structures/property.rs b/src/type_analysis/propagation/expressions/structures/property.rs new file mode 100644 index 00000000..347317e0 --- /dev/null +++ b/src/type_analysis/propagation/expressions/structures/property.rs @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; + +use crate::lexer::Span; +use crate::type_analysis::model::{ + ConstantValue, HybridType, PathSegment, StructuralType, TypeDescriptor, TypeDiagnostic, + TypeDiagnosticKind, TypeDiagnosticSeverity, TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::facts::{ + extend_origins_with_segment, extract_schema_constant, schema_additional_properties_schema, + schema_property, +}; +use crate::type_analysis::propagation::pipeline::{TypeAnalysisResult, TypeAnalyzer}; +use crate::type_analysis::value_utils; +use crate::value::Value; + +impl TypeAnalyzer { + pub(crate) fn infer_property_access( + &self, + _module_idx: u32, + base: HybridType, + field_value: Value, + field_span: Option<&Span>, + result: &mut TypeAnalysisResult, + ) -> TypeFact { + let base_provenance = match base.fact.provenance { + TypeProvenance::SchemaInput => TypeProvenance::SchemaInput, + TypeProvenance::SchemaData => TypeProvenance::SchemaData, + _ => TypeProvenance::Propagated, + }; + + if let Value::String(field) = &field_value { + if let Some(schema) = base.fact.descriptor.as_schema() { + let missing_prop_severity = Self::schema_missing_property_severity(schema); + let field_name = field.as_ref(); + + if Self::schema_has_named_property(schema, field_name) { + if let Some((prop_schema, schema_constant)) = + schema_property(schema, field_name) + { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(prop_schema), + base_provenance.clone(), + ); + + if let Some(constant) = schema_constant { + fact = fact.with_constant(ConstantValue::known(constant.clone())); + let structural_descriptor = + value_utils::value_to_type_fact(&constant).descriptor; + fact.descriptor = structural_descriptor; + } + + if !base.fact.origins.is_empty() { + let origins = extend_origins_with_segment( + &base.fact.origins, + PathSegment::Field(field_name.to_owned()), + ); + fact = fact.with_origins(origins); + } + + return fact; + } + } else { + if let Some(span) = field_span { + if let Some(message) = + Self::schema_missing_property_message(schema, field_name) + { + let (line, col, end_line, end_col) = + Self::diagnostic_range_from_span(span); + result.diagnostics.push(TypeDiagnostic { + file: span.source.get_path().as_str().into(), + message, + kind: TypeDiagnosticKind::SchemaViolation, + severity: missing_prop_severity, + line, + col, + end_line, + end_col, + }); + } + } + + if let Some(additional_schema) = schema_additional_properties_schema(schema) { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(additional_schema.clone()), + base_provenance.clone(), + ); + if let Some(constant) = extract_schema_constant(&additional_schema) { + fact = fact.with_constant(ConstantValue::known(constant.clone())); + let structural_descriptor = + value_utils::value_to_type_fact(&constant).descriptor; + fact.descriptor = structural_descriptor; + } + if !base.fact.origins.is_empty() { + let origins = extend_origins_with_segment( + &base.fact.origins, + PathSegment::Field(field_name.to_owned()), + ); + fact = fact.with_origins(origins); + } + + return fact; + } + } + } + + if let TypeDescriptor::Structural(struct_ty) = &base.fact.descriptor { + if let Some(field_ty) = Self::structural_field_type(struct_ty, field.as_ref()) { + let mut fact = TypeFact::new( + TypeDescriptor::Structural(field_ty), + TypeProvenance::Propagated, + ); + + if !base.fact.origins.is_empty() { + let origins = extend_origins_with_segment( + &base.fact.origins, + PathSegment::Field(field.as_ref().to_owned()), + ); + fact = fact.with_origins(origins); + } + + return fact; + } else if let Some(message) = + Self::structural_missing_property_message(struct_ty, field.as_ref()) + { + if let Some(span) = field_span { + let (line, col, end_line, end_col) = Self::diagnostic_range_from_span(span); + result.diagnostics.push(TypeDiagnostic { + file: span.source.get_path().as_str().into(), + message, + kind: TypeDiagnosticKind::SchemaViolation, + severity: TypeDiagnosticSeverity::Error, + line, + col, + end_line, + end_col, + }); + } + } + } + } + + let fallback_ty = match &base.fact.descriptor { + TypeDescriptor::Schema(_) => StructuralType::Any, + TypeDescriptor::Structural(_) => StructuralType::Unknown, + }; + + TypeFact::new(TypeDescriptor::Structural(fallback_ty), base_provenance) + } +} diff --git a/src/type_analysis/propagation/expressions/structures/utils.rs b/src/type_analysis/propagation/expressions/structures/utils.rs new file mode 100644 index 00000000..d24f1a2d --- /dev/null +++ b/src/type_analysis/propagation/expressions/structures/utils.rs @@ -0,0 +1,377 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use alloc::collections::BTreeMap; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::schema::{Schema, Type}; +use crate::type_analysis::model::{StructuralObjectShape, StructuralType, TypeDiagnosticSeverity}; +use crate::type_analysis::propagation::pipeline::TypeAnalyzer; +use crate::value::Value; + +impl TypeAnalyzer { + /// Join multiple structural types to find their least upper bound (LUB) + pub(crate) fn join_structural_types(types: &[StructuralType]) -> StructuralType { + use StructuralType::*; + + if types.is_empty() { + return StructuralType::Any; + } + + if types.len() == 1 { + return Self::normalize_structural_type(types[0].clone()); + } + + let first = &types[0]; + if types.iter().all(|t| t == first) { + return Self::normalize_structural_type(first.clone()); + } + + let all_numbers = types.iter().all(|t| matches!(t, Number | Integer)); + if all_numbers { + if types.iter().all(|t| matches!(t, Integer)) { + return StructuralType::Integer; + } + return StructuralType::Number; + } + + let all_arrays: Option> = types + .iter() + .map(|t| { + if let Array(elem) = t { + Some((**elem).clone()) + } else { + None + } + }) + .collect(); + + if let Some(element_types) = all_arrays { + let joined_element = Self::join_structural_types(&element_types); + return Self::normalize_structural_type(StructuralType::Array(Box::new( + joined_element, + ))); + } + + let all_sets: Option> = types + .iter() + .map(|t| { + if let Set(elem) = t { + Some((**elem).clone()) + } else { + None + } + }) + .collect(); + + if let Some(element_types) = all_sets { + let joined_element = Self::join_structural_types(&element_types); + return Self::normalize_structural_type(StructuralType::Set(Box::new(joined_element))); + } + + let has_union = types.iter().any(|t| matches!(t, Union(_))); + if has_union { + let mut all_types = Vec::new(); + for ty in types { + if let Union(inner) = ty { + all_types.extend(inner.clone()); + } else { + all_types.push(ty.clone()); + } + } + return Self::make_union(all_types); + } + + Self::make_union(types.to_vec()) + } + + /// Create a Union type with deduplication and flattening + pub(crate) fn make_union(types: Vec) -> StructuralType { + let mut flattened = Vec::new(); + for ty in types { + if let StructuralType::Union(inner) = ty { + flattened.extend(inner); + } else { + flattened.push(ty); + } + } + + let normalized: Vec = flattened + .into_iter() + .map(Self::normalize_structural_type) + .collect(); + + Self::finalize_union(normalized) + } + + /// Extract the type of a field from a structural object (supports unions) + pub(crate) fn structural_field_type( + ty: &StructuralType, + field: &str, + ) -> Option { + match ty { + StructuralType::Object(shape) => shape.fields.get(field).cloned(), + StructuralType::Union(variants) => { + let mut collected = Vec::new(); + for variant in variants { + if let Some(field_ty) = Self::structural_field_type(variant, field) { + collected.push(field_ty); + } else { + return None; + } + } + if collected.is_empty() { + None + } else { + Some(Self::make_union(collected)) + } + } + _ => None, + } + } + + pub(crate) fn structural_definitely_missing_field(ty: &StructuralType, field: &str) -> bool { + match ty { + StructuralType::Object(shape) => !shape.fields.contains_key(field), + StructuralType::Union(variants) => variants + .iter() + .all(|variant| Self::structural_definitely_missing_field(variant, field)), + StructuralType::Any | StructuralType::Unknown => false, + _ => true, + } + } + + pub(crate) fn structural_type_display(ty: &StructuralType) -> String { + match ty { + StructuralType::Any => "any".to_owned(), + StructuralType::Boolean => "boolean".to_owned(), + StructuralType::Number => "number".to_owned(), + StructuralType::Integer => "integer".to_owned(), + StructuralType::String => "string".to_owned(), + StructuralType::Null => "null".to_owned(), + StructuralType::Array(elem) => { + format!("array[{}]", Self::structural_type_display(elem.as_ref())) + } + StructuralType::Set(elem) => { + format!("set[{}]", Self::structural_type_display(elem.as_ref())) + } + StructuralType::Object(shape) => { + if shape.fields.is_empty() { + "object".to_owned() + } else { + let fields: Vec = shape.fields.keys().cloned().collect(); + format!("object{{{}}}", fields.join(", ")) + } + } + StructuralType::Union(variants) => { + let parts: Vec = + variants.iter().map(Self::structural_type_display).collect(); + format!("union[{}]", parts.join(" | ")) + } + StructuralType::Enum(values) => { + let rendered: Vec = values + .iter() + .map(|value| value.to_json_str().unwrap_or_else(|_| "?".to_owned())) + .collect(); + format!("enum[{}]", rendered.join(" | ")) + } + StructuralType::Unknown => "unknown".to_owned(), + } + } + + pub(crate) fn structural_missing_property_message( + struct_ty: &StructuralType, + field: &str, + ) -> Option { + if !Self::structural_definitely_missing_field(struct_ty, field) { + return None; + } + + match struct_ty { + StructuralType::Object(shape) => { + let fields: Vec = shape.fields.keys().cloned().collect(); + if fields.is_empty() { + Some(format!( + "Property '{}' is not defined on this object", + field + )) + } else { + Some(format!( + "Property '{}' is not defined on this object; known fields: {}", + field, + fields.join(", ") + )) + } + } + StructuralType::Union(variants) => { + let parts: Vec = + variants.iter().map(Self::structural_type_display).collect(); + Some(format!( + "Property '{}' is not defined on any of the inferred union types ({})", + field, + parts.join(" | ") + )) + } + StructuralType::Any | StructuralType::Unknown => None, + other => Some(format!( + "Cannot access property '{}' on {} value", + field, + Self::structural_type_display(other) + )), + } + } + + /// Extract the element type from a structural array (supports unions) + pub(crate) fn structural_array_element(ty: &StructuralType) -> Option { + match ty { + StructuralType::Array(elem) | StructuralType::Set(elem) => Some((**elem).clone()), + StructuralType::Union(variants) => { + let mut collected = Vec::new(); + for variant in variants { + if let Some(elem_ty) = Self::structural_array_element(variant) { + collected.push(elem_ty); + } else { + return None; + } + } + if collected.is_empty() { + None + } else { + Some(Self::make_union(collected)) + } + } + _ => None, + } + } + + pub(crate) fn schema_has_named_property(schema: &Schema, field: &str) -> bool { + match schema.as_type() { + Type::Object { properties, .. } => properties.contains_key(field), + Type::AnyOf(variants) => variants + .iter() + .any(|variant| Self::schema_has_named_property(variant, field)), + _ => false, + } + } + + pub(crate) fn schema_missing_property_message(schema: &Schema, field: &str) -> Option { + match schema.as_type() { + Type::Object { properties, .. } => { + let fields: Vec = properties + .keys() + .map(|name| name.as_ref().to_owned()) + .collect(); + if fields.is_empty() { + Some(format!("Property '{}' is not defined by the schema", field)) + } else { + Some(format!( + "Property '{}' is not defined by the schema; known properties: {}", + field, + fields.join(", ") + )) + } + } + Type::AnyOf(variants) => { + let defined_somewhere = variants + .iter() + .any(|variant| Self::schema_has_named_property(variant, field)); + if defined_somewhere { + None + } else { + Some(format!( + "Property '{}' is not defined by any branch of this schema", + field + )) + } + } + Type::Any { .. } => None, + _ => Some(format!( + "Cannot access property '{}' on non-object schema type", + field + )), + } + } + + fn schema_allows_additional_properties(schema: &Schema) -> bool { + match schema.as_type() { + Type::Object { + additional_properties, + .. + } => additional_properties.is_some(), + Type::AnyOf(variants) => variants + .iter() + .all(Self::schema_allows_additional_properties), + _ => false, + } + } + + pub(crate) fn schema_missing_property_severity(schema: &Schema) -> TypeDiagnosticSeverity { + if Self::schema_allows_additional_properties(schema) { + TypeDiagnosticSeverity::Warning + } else { + TypeDiagnosticSeverity::Error + } + } + + pub(crate) fn normalize_structural_type(ty: StructuralType) -> StructuralType { + use StructuralType::*; + + match ty { + Enum(values) if Self::enum_values_are_boolean(&values) => Boolean, + Enum(values) => Enum(values), + Array(elem) => Array(Box::new(Self::normalize_structural_type(*elem))), + Set(elem) => Set(Box::new(Self::normalize_structural_type(*elem))), + Object(shape) => Object(Self::normalize_object_shape(shape)), + Union(variants) => { + let normalized: Vec = variants + .into_iter() + .map(Self::normalize_structural_type) + .collect(); + Self::finalize_union(normalized) + } + other => other, + } + } + + fn normalize_object_shape(shape: StructuralObjectShape) -> StructuralObjectShape { + let mut fields = BTreeMap::new(); + for (name, ty) in shape.fields.into_iter() { + fields.insert(name, Self::normalize_structural_type(ty)); + } + StructuralObjectShape { fields } + } + + fn finalize_union(mut variants: Vec) -> StructuralType { + if variants.is_empty() { + return StructuralType::Unknown; + } + + variants.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); + variants.dedup(); + + let has_specific = variants + .iter() + .any(|ty| !matches!(ty, StructuralType::Any | StructuralType::Unknown)); + if has_specific { + variants.retain(|ty| !matches!(ty, StructuralType::Any | StructuralType::Unknown)); + } + + if variants.is_empty() { + return StructuralType::Unknown; + } + + if variants.len() == 1 { + return variants.into_iter().next().unwrap(); + } + + StructuralType::Union(variants) + } + + fn enum_values_are_boolean(values: &[Value]) -> bool { + values.iter().all(|value| value.as_bool().is_ok()) + } +} diff --git a/src/type_analysis/propagation/facts/iteration.rs b/src/type_analysis/propagation/facts/iteration.rs new file mode 100644 index 00000000..019ccb68 --- /dev/null +++ b/src/type_analysis/propagation/facts/iteration.rs @@ -0,0 +1,197 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::vec::Vec; + +use crate::schema::Type; +use crate::type_analysis::model::{ + PathSegment, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; + +use super::{extend_origins_with_segment, mark_origins_derived}; +use crate::type_analysis::propagation::pipeline::TypeAnalyzer; + +pub(crate) struct IterationFacts { + pub(crate) value_fact: TypeFact, + pub(crate) key_fact: Option, +} + +impl TypeAnalyzer { + pub(crate) fn build_iteration_facts(&self, collection_fact: &TypeFact) -> IterationFacts { + let provenance = match collection_fact.provenance { + TypeProvenance::SchemaInput => TypeProvenance::SchemaInput, + TypeProvenance::SchemaData => TypeProvenance::SchemaData, + _ => TypeProvenance::Propagated, + }; + + let derived_origins = if collection_fact.origins.is_empty() { + Vec::new() + } else { + mark_origins_derived(&extend_origins_with_segment( + &collection_fact.origins, + PathSegment::Any, + )) + }; + + match &collection_fact.descriptor { + TypeDescriptor::Schema(schema) => match schema.as_type() { + Type::Array { items, .. } => { + let mut value_fact = + TypeFact::new(TypeDescriptor::Schema(items.clone()), provenance.clone()); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let mut key_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Integer), + provenance, + ); + if !value_fact.origins.is_empty() { + key_fact = key_fact.with_origins(value_fact.origins.clone()); + } + + IterationFacts { + value_fact, + key_fact: Some(key_fact), + } + } + Type::Set { items, .. } => { + let mut value_fact = + TypeFact::new(TypeDescriptor::Schema(items.clone()), provenance.clone()); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let key_fact = Some(value_fact.clone()); + + IterationFacts { + value_fact, + key_fact, + } + } + Type::Object { + properties, + additional_properties, + .. + } => { + let mut property_types: Vec = properties + .values() + .map(StructuralType::from_schema) + .collect(); + + if let Some(additional) = additional_properties { + property_types.push(StructuralType::from_schema(additional)); + } + + let value_descriptor = if property_types.is_empty() { + TypeDescriptor::Structural(StructuralType::Any) + } else { + let joined = Self::join_structural_types(&property_types); + TypeDescriptor::Structural(joined) + }; + + let mut value_fact = TypeFact::new(value_descriptor, provenance.clone()); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let mut key_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::String), + provenance, + ); + if !derived_origins.is_empty() { + key_fact = key_fact.with_origins(derived_origins); + } + + IterationFacts { + value_fact, + key_fact: Some(key_fact), + } + } + _ => IterationFacts { + value_fact: TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + provenance, + ), + key_fact: None, + }, + }, + TypeDescriptor::Structural(struct_ty) => match struct_ty { + StructuralType::Array(element_ty) => { + let mut value_fact = TypeFact::new( + TypeDescriptor::Structural((**element_ty).clone()), + provenance.clone(), + ); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let mut key_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Integer), + provenance, + ); + if !derived_origins.is_empty() { + key_fact = key_fact.with_origins(derived_origins); + } + + IterationFacts { + value_fact, + key_fact: Some(key_fact), + } + } + StructuralType::Set(element_ty) => { + let mut value_fact = TypeFact::new( + TypeDescriptor::Structural((**element_ty).clone()), + provenance.clone(), + ); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let key_fact = Some(value_fact.clone()); + + IterationFacts { + value_fact, + key_fact, + } + } + StructuralType::Object(shape) => { + let property_types: Vec = + shape.fields.values().cloned().collect(); + + let value_descriptor = if property_types.is_empty() { + TypeDescriptor::Structural(StructuralType::Any) + } else { + let joined = Self::join_structural_types(&property_types); + TypeDescriptor::Structural(joined) + }; + + let mut value_fact = TypeFact::new(value_descriptor, provenance.clone()); + if !derived_origins.is_empty() { + value_fact = value_fact.with_origins(derived_origins.clone()); + } + + let mut key_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::String), + provenance, + ); + if !derived_origins.is_empty() { + key_fact = key_fact.with_origins(derived_origins); + } + + IterationFacts { + value_fact, + key_fact: Some(key_fact), + } + } + _ => IterationFacts { + value_fact: TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + provenance, + ), + key_fact: None, + }, + }, + } + } +} diff --git a/src/type_analysis/propagation/facts/mod.rs b/src/type_analysis/propagation/facts/mod.rs new file mode 100644 index 00000000..7cc1d57c --- /dev/null +++ b/src/type_analysis/propagation/facts/mod.rs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod iteration; +mod origins; +mod schema; + +pub(crate) use origins::{derived_from_pair, extend_origins_with_segment, mark_origins_derived}; +pub(crate) use schema::{ + extract_schema_constant, schema_additional_properties_schema, schema_allows_value, + schema_array_items, schema_property, +}; diff --git a/src/type_analysis/propagation/facts/origins.rs b/src/type_analysis/propagation/facts/origins.rs new file mode 100644 index 00000000..ac6e6c8a --- /dev/null +++ b/src/type_analysis/propagation/facts/origins.rs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::vec::Vec; + +use crate::type_analysis::model::{PathSegment, SourceOrigin}; + +pub(crate) fn extend_origins_with_segment( + origins: &[SourceOrigin], + segment: PathSegment, +) -> Vec { + origins + .iter() + .map(|origin| { + let mut updated = origin.clone(); + updated.path.push(segment.clone()); + updated + }) + .collect() +} + +pub(crate) fn mark_origins_derived(origins: &[SourceOrigin]) -> Vec { + origins + .iter() + .map(|origin| { + let mut updated = origin.clone(); + updated.derived = true; + updated + }) + .collect() +} + +pub(crate) fn derived_from_pair(lhs: &[SourceOrigin], rhs: &[SourceOrigin]) -> Vec { + lhs.iter() + .chain(rhs.iter()) + .map(|origin| { + let mut updated = origin.clone(); + updated.derived = true; + updated + }) + .collect() +} diff --git a/src/type_analysis/propagation/facts/schema.rs b/src/type_analysis/propagation/facts/schema.rs new file mode 100644 index 00000000..99d4ed39 --- /dev/null +++ b/src/type_analysis/propagation/facts/schema.rs @@ -0,0 +1,145 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::vec::Vec; + +use crate::schema::{Schema, Type}; +use crate::value::Value; + +pub(crate) fn schema_property(schema: &Schema, field: &str) -> Option<(Schema, Option)> { + match schema.as_type() { + Type::Object { + properties, + additional_properties, + .. + } => { + let prop_schema = properties + .get(field) + .cloned() + .or_else(|| additional_properties.clone())?; + let constant = extract_schema_constant(&prop_schema); + Some((prop_schema, constant)) + } + Type::AnyOf(variants) => { + let results: Vec<_> = variants + .iter() + .filter_map(|variant| schema_property(variant, field)) + .collect(); + + if results.is_empty() { + return None; + } + + let prop_schema = results[0].0.clone(); + + let mut constant: Option = results[0].1.clone(); + for (_, var_const) in results.iter().skip(1) { + match (constant.as_ref(), var_const.as_ref()) { + (Some(c1), Some(c2)) if c1 != c2 => { + constant = None; + break; + } + (Some(_), None) | (None, Some(_)) => { + constant = None; + break; + } + _ => {} + } + } + + Some((prop_schema, constant)) + } + _ => None, + } +} + +pub(crate) fn schema_additional_properties_schema(schema: &Schema) -> Option { + match schema.as_type() { + Type::Object { + additional_properties, + .. + } => additional_properties.clone(), + _ => None, + } +} + +pub(crate) fn schema_array_items(schema: &Schema) -> Option<(Schema, Option)> { + match schema.as_type() { + Type::Array { items, .. } | Type::Set { items, .. } => { + let constant = extract_schema_constant(items); + Some((items.clone(), constant)) + } + Type::AnyOf(variants) => { + let results: Vec<_> = variants.iter().filter_map(schema_array_items).collect(); + + if results.is_empty() { + return None; + } + + let item_schema = results[0].0.clone(); + + let mut constant: Option = results[0].1.clone(); + for (_, var_const) in results.iter().skip(1) { + match (constant.as_ref(), var_const.as_ref()) { + (Some(c1), Some(c2)) if c1 != c2 => { + constant = None; + break; + } + (Some(_), None) | (None, Some(_)) => { + constant = None; + break; + } + _ => {} + } + } + + Some((item_schema, constant)) + } + _ => None, + } +} + +pub(crate) fn schema_allows_value(schema: &Schema, value: &Value) -> bool { + match schema.as_type() { + Type::Enum { values, .. } => values.iter().any(|v| v == value), + Type::Const { value: allowed, .. } => allowed == value, + Type::AnyOf(variants) => variants + .iter() + .any(|variant| schema_allows_value(variant, value)), + _ => true, + } +} + +pub(crate) fn extract_schema_constant(schema: &Schema) -> Option { + match schema.as_type() { + Type::Const { value, .. } => Some(value.clone()), + Type::Enum { values, .. } => { + if values.len() == 1 { + Some(values[0].clone()) + } else { + None + } + } + Type::AnyOf(variants) => { + let mut constant: Option = None; + for variant in variants.iter() { + match extract_schema_constant(variant) { + Some(val) => { + if let Some(ref existing) = constant { + if existing != &val { + return None; + } + } else { + constant = Some(val); + } + } + None => { + return None; + } + } + } + constant + } + _ => None, + } +} diff --git a/src/type_analysis/propagation/loops/binding.rs b/src/type_analysis/propagation/loops/binding.rs new file mode 100644 index 00000000..475370c1 --- /dev/null +++ b/src/type_analysis/propagation/loops/binding.rs @@ -0,0 +1,166 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::compiler::destructuring_planner::{AssignmentPlan, BindingPlan}; + +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{RuleAnalysis, TypeFact}; + +use super::super::pipeline::{AnalysisState, TypeAnalyzer}; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(crate) fn apply_binding_plan( + &self, + module_idx: u32, + _expr: &crate::ast::Ref, + plan: &BindingPlan, + current_fact: &TypeFact, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) { + match plan { + BindingPlan::Assignment { plan } => { + self.apply_assignment_plan(module_idx, plan, bindings, result, rule_analysis); + } + BindingPlan::LoopIndex { + destructuring_plan, .. + } => { + self.apply_destructuring_plan( + module_idx, + destructuring_plan, + current_fact, + bindings, + result, + rule_analysis, + ); + } + BindingPlan::Parameter { + param_expr, + destructuring_plan, + } => { + let expr_idx = param_expr.eidx(); + self.ensure_expr_capacity(module_idx, expr_idx, result); + result + .lookup + .record_expr(module_idx, expr_idx, current_fact.clone()); + + self.apply_destructuring_plan( + module_idx, + destructuring_plan, + current_fact, + bindings, + result, + rule_analysis, + ); + } + BindingPlan::SomeIn { + key_plan, + value_plan, + .. + } => { + let iteration = self.build_iteration_facts(current_fact); + if let Some(key_plan) = key_plan { + if let Some(key_fact) = iteration.key_fact.as_ref() { + self.apply_destructuring_plan( + module_idx, + key_plan, + key_fact, + bindings, + result, + rule_analysis, + ); + } + } + self.apply_destructuring_plan( + module_idx, + value_plan, + &iteration.value_fact, + bindings, + result, + rule_analysis, + ); + } + } + } + + pub(super) fn apply_assignment_plan( + &self, + module_idx: u32, + plan: &AssignmentPlan, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) { + use AssignmentPlan::*; + + match plan { + ColonEquals { + lhs_expr, + rhs_expr, + lhs_plan, + } + | EqualsBindLeft { + lhs_plan, + lhs_expr, + rhs_expr, + } => { + let rhs_fact = self + .infer_expr(module_idx, rhs_expr, bindings, result, rule_analysis) + .fact; + self.apply_destructuring_plan( + module_idx, + lhs_plan, + &rhs_fact, + bindings, + result, + rule_analysis, + ); + let _ = self.infer_expr(module_idx, lhs_expr, bindings, result, rule_analysis); + } + EqualsBindRight { + rhs_plan, + lhs_expr, + rhs_expr, + } => { + let lhs_fact = self + .infer_expr(module_idx, lhs_expr, bindings, result, rule_analysis) + .fact; + self.apply_destructuring_plan( + module_idx, + rhs_plan, + &lhs_fact, + bindings, + result, + rule_analysis, + ); + let _ = self.infer_expr(module_idx, rhs_expr, bindings, result, rule_analysis); + } + EqualsBothSides { element_pairs, .. } => { + for (value_expr, element_plan) in element_pairs { + let value_fact = self + .infer_expr(module_idx, value_expr, bindings, result, rule_analysis) + .fact; + self.apply_destructuring_plan( + module_idx, + element_plan, + &value_fact, + bindings, + result, + rule_analysis, + ); + } + } + EqualityCheck { + lhs_expr, rhs_expr, .. + } + | WildcardMatch { + lhs_expr, rhs_expr, .. + } => { + let _ = self.infer_expr(module_idx, lhs_expr, bindings, result, rule_analysis); + let _ = self.infer_expr(module_idx, rhs_expr, bindings, result, rule_analysis); + } + } + } +} diff --git a/src/type_analysis/propagation/loops/destructuring.rs b/src/type_analysis/propagation/loops/destructuring.rs new file mode 100644 index 00000000..b2bb1fda --- /dev/null +++ b/src/type_analysis/propagation/loops/destructuring.rs @@ -0,0 +1,195 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; + +use crate::compiler::destructuring_planner::DestructuringPlan; +use crate::value::Value; + +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, PathSegment, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, +}; + +use super::super::facts::{ + extend_origins_with_segment, mark_origins_derived, schema_array_items, schema_property, +}; +use super::super::pipeline::{TypeAnalysisResult, TypeAnalyzer}; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(super) fn apply_destructuring_plan( + &self, + module_idx: u32, + plan: &DestructuringPlan, + source_fact: &TypeFact, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) { + use DestructuringPlan::*; + + match plan { + Var(span) => { + let name = span.text(); + if name != "_" { + bindings.ensure_root_scope(); + bindings.assign(name.to_owned(), source_fact.clone()); + } + } + Ignore => {} + EqualityExpr(expr) => { + let _ = self.infer_expr(module_idx, expr, bindings, result, rule_analysis); + } + EqualityValue(_) => {} + Array { element_plans } => { + let element_fact = self.derive_array_element_fact(source_fact); + for element_plan in element_plans { + self.apply_destructuring_plan( + module_idx, + element_plan, + &element_fact, + bindings, + result, + rule_analysis, + ); + } + } + Object { + field_plans, + dynamic_fields, + } => { + for (key, field_plan) in field_plans { + let field_fact = self.derive_object_field_fact(source_fact, key); + self.apply_destructuring_plan( + module_idx, + field_plan, + &field_fact, + bindings, + result, + rule_analysis, + ); + } + + for (key_expr, field_plan) in dynamic_fields { + let dynamic_fact = self.derive_dynamic_field_fact(source_fact); + self.apply_destructuring_plan( + module_idx, + field_plan, + &dynamic_fact, + bindings, + result, + rule_analysis, + ); + let _ = self.infer_expr(module_idx, key_expr, bindings, result, rule_analysis); + } + } + } + } + + pub(super) fn derive_array_element_fact(&self, source_fact: &TypeFact) -> TypeFact { + match &source_fact.descriptor { + TypeDescriptor::Schema(schema) => { + if let Some((item_schema, schema_constant)) = schema_array_items(schema) { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(item_schema), + source_fact.provenance.clone(), + ); + + if let Some(constant) = schema_constant { + fact = fact.with_constant(ConstantValue::known(constant)); + } + + if !source_fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived( + &extend_origins_with_segment(&source_fact.origins, PathSegment::Any), + )); + } + return fact; + } + } + TypeDescriptor::Structural(StructuralType::Array(element_ty)) + | TypeDescriptor::Structural(StructuralType::Set(element_ty)) => { + let mut fact = TypeFact::new( + TypeDescriptor::Structural((**element_ty).clone()), + source_fact.provenance.clone(), + ); + if !source_fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&extend_origins_with_segment( + &source_fact.origins, + PathSegment::Any, + ))); + } + return fact; + } + _ => {} + } + + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + source_fact.provenance.clone(), + ) + } + + pub(super) fn derive_object_field_fact(&self, source_fact: &TypeFact, key: &Value) -> TypeFact { + if let Ok(name) = key.as_string() { + if let TypeDescriptor::Schema(schema) = &source_fact.descriptor { + if let Some((prop_schema, schema_constant)) = schema_property(schema, name.as_ref()) + { + let mut fact = TypeFact::new( + TypeDescriptor::Schema(prop_schema), + source_fact.provenance.clone(), + ); + + if let Some(constant) = schema_constant { + fact = fact.with_constant(ConstantValue::known(constant)); + } + + if !source_fact.origins.is_empty() { + fact = + fact.with_origins(mark_origins_derived(&extend_origins_with_segment( + &source_fact.origins, + PathSegment::Field(name.as_ref().to_owned()), + ))); + } + return fact; + } + } + + if let TypeDescriptor::Structural(StructuralType::Object(shape)) = + &source_fact.descriptor + { + if let Some(field_ty) = shape.fields.get(name.as_ref()) { + let mut fact = TypeFact::new( + TypeDescriptor::Structural(field_ty.clone()), + source_fact.provenance.clone(), + ); + if !source_fact.origins.is_empty() { + fact = + fact.with_origins(mark_origins_derived(&extend_origins_with_segment( + &source_fact.origins, + PathSegment::Field(name.as_ref().to_owned()), + ))); + } + return fact; + } + } + } + + self.derive_dynamic_field_fact(source_fact) + } + + pub(super) fn derive_dynamic_field_fact(&self, source_fact: &TypeFact) -> TypeFact { + let mut fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + source_fact.provenance.clone(), + ); + if !source_fact.origins.is_empty() { + fact = fact.with_origins(mark_origins_derived(&extend_origins_with_segment( + &source_fact.origins, + PathSegment::Any, + ))); + } + fact + } +} diff --git a/src/type_analysis/propagation/loops/mod.rs b/src/type_analysis/propagation/loops/mod.rs new file mode 100644 index 00000000..1e4c3300 --- /dev/null +++ b/src/type_analysis/propagation/loops/mod.rs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod binding; +mod destructuring; +mod seeding; diff --git a/src/type_analysis/propagation/loops/seeding.rs b/src/type_analysis/propagation/loops/seeding.rs new file mode 100644 index 00000000..6ed6ff5f --- /dev/null +++ b/src/type_analysis/propagation/loops/seeding.rs @@ -0,0 +1,165 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::boxed::Box; + +use crate::ast::{Expr, LiteralStmt, Ref}; +use crate::compiler::hoist::{HoistedLoop, LoopType}; + +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; + +use super::super::pipeline::{TypeAnalysisResult, TypeAnalyzer}; + +impl TypeAnalyzer { + pub(crate) fn seed_statement_loops( + &self, + module_idx: u32, + stmt: &LiteralStmt, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) { + let Some(loop_lookup) = self.loop_lookup.as_ref() else { + return; + }; + + if let Some(loops) = loop_lookup.get_statement_loops(module_idx, stmt.sidx) { + for loop_info in loops { + self.process_hoisted_loop(module_idx, loop_info, bindings, result, rule_analysis); + } + } + } + + pub(crate) fn process_hoisted_loop( + &self, + module_idx: u32, + loop_info: &HoistedLoop, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) { + match loop_info.loop_type { + LoopType::IndexIteration => { + let collection_ty = self.infer_expr( + module_idx, + &loop_info.collection, + bindings, + result, + rule_analysis, + ); + + let iteration = self.build_iteration_facts(&collection_ty.fact); + + if let Some(key_expr) = &loop_info.key { + if let Some(key_fact) = iteration.key_fact.as_ref() { + self.seed_expr_with_fact( + module_idx, + key_expr, + key_fact.clone(), + bindings, + result, + rule_analysis, + ); + } + } + + self.seed_expr_with_fact( + module_idx, + &loop_info.value, + iteration.value_fact.clone(), + bindings, + result, + rule_analysis, + ); + + if let Some(loop_expr) = &loop_info.loop_expr { + if loop_expr.eidx() != loop_info.value.eidx() { + self.seed_expr_with_fact( + module_idx, + loop_expr, + iteration.value_fact.clone(), + bindings, + result, + rule_analysis, + ); + } + } + } + LoopType::Walk => { + let fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Array(Box::new( + StructuralType::Any, + ))), + TypeProvenance::Propagated, + ); + self.seed_expr_with_fact( + module_idx, + &loop_info.value, + fact, + bindings, + result, + rule_analysis, + ); + } + } + } + + pub(crate) fn seed_expr_with_fact( + &self, + module_idx: u32, + expr: &Ref, + fact: TypeFact, + bindings: &mut ScopedBindings, + result: &mut TypeAnalysisResult, + rule_analysis: &mut RuleAnalysis, + ) { + let eidx = expr.eidx(); + + if let Some(existing) = result.lookup.get_expr(module_idx, eidx).cloned() { + if let Some(loop_lookup) = self.loop_lookup.as_ref() { + if let Some(plan) = loop_lookup.get_expr_binding_plan(module_idx, eidx) { + self.apply_binding_plan( + module_idx, + expr, + plan, + &existing, + bindings, + result, + rule_analysis, + ); + } + } + return; + } + + self.ensure_expr_capacity(module_idx, eidx, result); + result.lookup.record_expr(module_idx, eidx, fact.clone()); + + if let ConstantValue::Known(value) = &fact.constant { + result + .constants + .record(module_idx, eidx, Some(value.clone())); + } + + if !fact.origins.is_empty() { + rule_analysis.record_origins(&fact.origins); + } + + if let Some(loop_lookup) = self.loop_lookup.as_ref() { + if let Some(plan) = loop_lookup.get_expr_binding_plan(module_idx, eidx) { + self.apply_binding_plan( + module_idx, + expr, + plan, + &fact, + bindings, + result, + rule_analysis, + ); + } + } + } +} diff --git a/src/type_analysis/propagation/mod.rs b/src/type_analysis/propagation/mod.rs new file mode 100644 index 00000000..442d47ce --- /dev/null +++ b/src/type_analysis/propagation/mod.rs @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +mod diagnostics; +mod expressions; +mod facts; +mod loops; +mod pipeline; + +pub use pipeline::{AnalysisState, TypeAnalysisOptions, TypeAnalyzer}; diff --git a/src/type_analysis/propagation/pipeline.rs b/src/type_analysis/propagation/pipeline.rs new file mode 100644 index 00000000..252bed85 --- /dev/null +++ b/src/type_analysis/propagation/pipeline.rs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod analyzer; +mod options; +mod result; + +pub(crate) use analyzer::RuleHeadInfo; +pub use analyzer::TypeAnalyzer; +pub use options::TypeAnalysisOptions; +pub use result::AnalysisState; +pub(crate) use result::TypeAnalysisResult; diff --git a/src/type_analysis/propagation/pipeline/analyzer/entrypoints.rs b/src/type_analysis/propagation/pipeline/analyzer/entrypoints.rs new file mode 100644 index 00000000..5dbf4edb --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/entrypoints.rs @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{borrow::ToOwned, collections::BTreeSet, format, vec, vec::Vec}; + +use crate::ast::Expr; +use crate::utils::get_path_string; +use crate::utils::path::{matches_path_pattern, normalize_rule_path}; + +use super::TypeAnalyzer; + +impl TypeAnalyzer { + /// Resolve entrypoint patterns to concrete (module_idx, rule_idx) pairs. + /// Returns None if no filtering configured, otherwise Some(entrypoint_set). + /// Also tracks default rules that should be included. + pub(super) fn resolve_entrypoints(&self) -> Option> { + if !self.entrypoint_filtering { + return None; + } + + let entrypoints = self.options.entrypoints.as_ref()?; + let mut resolved = BTreeSet::new(); + + for pattern in entrypoints { + for (module_idx, module) in self.modules.iter().enumerate() { + for (rule_idx, rule) in module.policy.iter().enumerate() { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Expr::Var { .. } = refr.as_ref() { + let module_path = + get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + let var_path = get_path_string(refr.as_ref(), Some(&module_path)) + .unwrap_or_else(|_| "unknown".to_owned()); + + let normalized = normalize_rule_path(&var_path); + if matches_path_pattern(&normalized, pattern) { + resolved.insert((module_idx as u32, rule_idx)); + self.find_and_add_default_rule( + &normalized, + module_idx as u32, + &mut resolved, + ); + } + } + } + } + } + } + + Some(resolved) + } + + /// Find and include the default rule for a given rule path. + pub(super) fn find_and_add_default_rule( + &self, + rule_path: &str, + hint_module_idx: u32, + resolved: &mut BTreeSet<(u32, usize)>, + ) { + let default_path = format!("{rule_path}.default"); + + let search_modules: Vec = if (hint_module_idx as usize) < self.modules.len() { + let mut mods = vec![hint_module_idx as usize]; + mods.extend((0..self.modules.len()).filter(|&i| i != hint_module_idx as usize)); + mods + } else { + (0..self.modules.len()).collect() + }; + + for module_idx in search_modules { + let module = &self.modules[module_idx]; + for (rule_idx, rule) in module.policy.iter().enumerate() { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Expr::Var { .. } = refr.as_ref() { + let module_path = + get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + let var_path = get_path_string(refr.as_ref(), Some(&module_path)) + .unwrap_or_else(|_| "unknown".to_owned()); + + let normalized = normalize_rule_path(&var_path); + + if normalized == default_path { + resolved.insert((module_idx as u32, rule_idx)); + self.included_defaults + .borrow_mut() + .insert(default_path.clone()); + return; + } + } + } + } + } + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/mod.rs b/src/type_analysis/propagation/pipeline/analyzer/mod.rs new file mode 100644 index 00000000..97d05dca --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/mod.rs @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{ + borrow::ToOwned, + collections::{BTreeMap, BTreeSet}, + string::String, + vec::Vec, +}; + +use crate::ast::{Expr, Module}; +use crate::compiler::hoist::HoistedLoopsLookup; +use crate::lexer::Span; +use crate::scheduler::Schedule; +use crate::Rc; + +use crate::utils::get_path_string; + +use super::{result::AnalysisState, TypeAnalysisOptions}; + +mod entrypoints; +mod rule_analysis; +mod rule_index; +mod validation; + +pub(crate) use rule_index::RuleHeadInfo; + +pub struct TypeAnalyzer { + pub(crate) modules: Rc>>, + pub(crate) schedule: Option>, + pub(crate) options: TypeAnalysisOptions, + pub(crate) loop_lookup: Option>, + module_rule_heads: Vec>>, + global_rule_heads: BTreeMap>, + analysis_stack: core::cell::RefCell>, + entrypoint_filtering: bool, + requested_entrypoints: Vec, + included_defaults: core::cell::RefCell>, + function_param_facts: + core::cell::RefCell>>, + constant_eval_engine: core::cell::OnceCell>, + disable_function_generic_pass: bool, +} + +impl TypeAnalyzer { + pub fn new( + modules: &[crate::ast::Ref], + schedule: Option<&Schedule>, + options: TypeAnalysisOptions, + ) -> Self { + let (module_rule_heads, global_rule_heads) = Self::build_rule_head_index(modules); + let entrypoint_filtering = options.is_entrypoint_filtered(); + let requested_entrypoints = options.entrypoints.clone().unwrap_or_default(); + let disable_function_generic_pass = options.disable_function_generic_pass; + + TypeAnalyzer { + modules: Rc::new(modules.to_vec()), + schedule: schedule.map(|sched| { + Rc::new(Schedule { + queries: sched.queries.clone(), + }) + }), + loop_lookup: options.loop_lookup.clone(), + options, + module_rule_heads, + global_rule_heads, + analysis_stack: core::cell::RefCell::new(Vec::new()), + entrypoint_filtering, + requested_entrypoints, + included_defaults: core::cell::RefCell::new(BTreeSet::new()), + function_param_facts: core::cell::RefCell::new(BTreeMap::new()), + constant_eval_engine: core::cell::OnceCell::new(), + disable_function_generic_pass, + } + } + + /// Create a TypeAnalyzer from an engine that has been prepared for evaluation. + /// The engine will be used for constant folding of rules. + pub fn from_engine( + engine: &mut crate::engine::Engine, + options: TypeAnalysisOptions, + ) -> Option { + let (modules, schedule, engine_loop_lookup, _compiled_policy) = + engine.get_type_analysis_context()?; + + let (module_rule_heads, global_rule_heads) = Self::build_rule_head_index(modules.as_ref()); + let entrypoint_filtering = options.is_entrypoint_filtered(); + let requested_entrypoints = options.entrypoints.clone().unwrap_or_default(); + let disable_function_generic_pass = options.disable_function_generic_pass; + + let analyzer = TypeAnalyzer { + modules, + schedule, + loop_lookup: options.loop_lookup.clone().or(engine_loop_lookup), + options, + module_rule_heads, + global_rule_heads, + analysis_stack: core::cell::RefCell::new(Vec::new()), + entrypoint_filtering, + requested_entrypoints, + included_defaults: core::cell::RefCell::new(BTreeSet::new()), + function_param_facts: core::cell::RefCell::new(BTreeMap::new()), + constant_eval_engine: core::cell::OnceCell::new(), + disable_function_generic_pass, + }; + + // Seed the engine for constant folding. + let _ = analyzer + .constant_eval_engine + .set(core::cell::RefCell::new(engine.clone())); + + Some(analyzer) + } + + pub(crate) fn diagnostic_range_from_span(span: &Span) -> (u32, u32, u32, u32) { + let (line, col) = span.source.offset_to_line_col(span.start); + let mut end_line = line; + let mut end_col = col; + let mut advanced = false; + + for ch in span.text().chars() { + advanced = true; + if ch == '\n' { + end_line += 1; + end_col = 1; + } else { + end_col += 1; + } + } + + if !advanced { + end_col += 1; + } + + (line, col, end_line, end_col) + } + + pub fn analyze_modules(&self) -> crate::type_analysis::TypeAnalysisResult { + let mut state = AnalysisState::new(); + + self.validate_rule_definitions(&mut state); + state.requested_entrypoints = self.requested_entrypoints.clone(); + + if let Some(entrypoints) = self.resolve_entrypoints() { + state.included_defaults = self.included_defaults.borrow().clone(); + + for (module_idx, rule_idx) in &entrypoints { + let module = &self.modules[*module_idx as usize]; + let rule = &module.policy[*rule_idx]; + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Expr::Var { .. } = refr.as_ref() { + let module_path = + get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + let var_path = get_path_string(refr.as_ref(), Some(&module_path)) + .unwrap_or_else(|_| "unknown".to_owned()); + + state.lookup.mark_reachable(var_path); + } + } + } + + for (module_idx, module) in self.modules.iter().enumerate() { + let module_idx_u32 = module_idx as u32; + state + .lookup + .ensure_expr_capacity(module_idx_u32, module.num_expressions); + state.ensure_rule_capacity(module_idx_u32, module.policy.len()); + + for rule_idx in 0..module.policy.len() { + if entrypoints.contains(&(module_idx_u32, rule_idx)) { + self.ensure_rule_analyzed(module_idx_u32, rule_idx, &mut state); + } + } + } + } else { + for (module_idx, module) in self.modules.iter().enumerate() { + let module_idx = module_idx as u32; + state + .lookup + .ensure_expr_capacity(module_idx, module.num_expressions); + state.ensure_rule_capacity(module_idx, module.policy.len()); + self.analyze_module(module_idx, module, &mut state); + } + } + + // Convert AnalysisState to public TypeAnalysisResult + crate::type_analysis::TypeAnalysisResult::from_analysis_state(state, &self.modules) + } + + pub(crate) fn prepare_function_rule_specialization( + &self, + module_idx: u32, + rule_idx: usize, + facts: &[crate::type_analysis::model::TypeFact], + result: &mut AnalysisState, + ) -> Option> { + let mut previous = None; + if let Some(module) = self.modules.get(module_idx as usize) { + if let Some(rule) = module.policy.get(rule_idx) { + if matches!( + rule.as_ref(), + crate::ast::Rule::Spec { + head: crate::ast::RuleHead::Func { .. }, + .. + } + ) { + previous = self + .function_param_facts + .borrow_mut() + .insert((module_idx, rule_idx), facts.to_vec()); + + result.ensure_rule_capacity(module_idx, rule_idx + 1); + result.rule_info[module_idx as usize][rule_idx] = + crate::type_analysis::model::RuleAnalysis::default(); + + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + result + .lookup + .expr_types_mut() + .clear(module_idx, refr.eidx()); + } + } + } + } + + previous + } + + pub(crate) fn restore_function_rule_specialization( + &self, + module_idx: u32, + rule_idx: usize, + previous: Option>, + ) { + let mut store = self.function_param_facts.borrow_mut(); + if let Some(prev) = previous { + store.insert((module_idx, rule_idx), prev); + } else { + store.remove(&(module_idx, rule_idx)); + } + } + + pub(crate) fn try_evaluate_rule_constant( + &self, + _module_idx: u32, + rule_path: &str, + ) -> Option { + use crate::engine::Engine; + + let engine_cell = self.constant_eval_engine.get_or_init(|| { + core::cell::RefCell::new(Engine::new_with_modules(self.modules.clone())) + }); + + let mut engine = engine_cell.borrow_mut(); + engine.try_eval_rule_constant(rule_path) + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/bodies.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/bodies.rs new file mode 100644 index 00000000..e45ae6c7 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/bodies.rs @@ -0,0 +1,379 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{borrow::ToOwned, boxed::Box, vec}; + +use crate::ast::{Expr, RuleBody, RuleHead}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + RuleAnalysis, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; +use crate::value::Value; + +use super::super::super::result::AnalysisState; +use super::super::TypeAnalyzer; + +impl TypeAnalyzer { + #[allow(clippy::too_many_arguments)] + pub(super) fn analyze_rule_body( + &self, + module_idx: u32, + rule_idx: usize, + body_idx: usize, + head: &RuleHead, + body: &RuleBody, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + allow_unreachable_diag: bool, + ) { + let mut bindings = ScopedBindings::new(); + bindings.ensure_root_scope(); + + let specialized_param_facts = match head { + RuleHead::Func { .. } => self + .function_param_facts + .borrow() + .get(&(module_idx, rule_idx)) + .cloned(), + _ => None, + }; + + if let RuleHead::Func { args, .. } = head { + for (arg_idx, arg) in args.iter().enumerate() { + let binding_plan = self + .loop_lookup + .as_ref() + .and_then(|lookup| lookup.get_expr_binding_plan(module_idx, arg.eidx())); + + let specialized_fact = specialized_param_facts + .as_ref() + .and_then(|facts| facts.get(arg_idx).cloned()); + + let binding_fact = specialized_fact.as_ref().cloned().unwrap_or_else(|| { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Any), + TypeProvenance::Unknown, + ) + }); + + if let Some(plan) = binding_plan { + self.apply_binding_plan( + module_idx, + arg, + plan, + &binding_fact, + &mut bindings, + result, + rule_analysis, + ); + } else if let Some(fact) = specialized_fact { + self.ensure_expr_capacity(module_idx, arg.eidx(), result); + result + .lookup + .record_expr(module_idx, arg.eidx(), fact.clone()); + if let Expr::Var { value, .. } = arg.as_ref() { + if let Ok(name) = value.as_string() { + bindings.assign(name.as_ref().to_owned(), fact); + } + } + } + } + } + + if let Some(assign) = &body.assign { + let fact = self + .infer_expr( + module_idx, + &assign.value, + &mut bindings, + result, + rule_analysis, + ) + .fact; + if let Expr::Var { value, .. } = assign.value.as_ref() { + if let Ok(name) = value.as_string() { + bindings.assign(name.as_ref().to_owned(), fact); + } + } + } + + let query_eval = self.analyze_query( + module_idx, + &body.query, + &mut bindings, + result, + rule_analysis, + allow_unreachable_diag, + ); + + result.record_body_query_truth( + module_idx, + rule_idx, + body_idx, + query_eval.reachable, + query_eval.always_true, + ); + + if !query_eval.reachable { + return; + } + + if let RuleHead::Func { args, .. } = head { + for arg in args { + if let Expr::Var { value, .. } = arg.as_ref() { + if let Ok(name) = value.as_string() { + if let Some(param_fact) = bindings.lookup(name.as_ref()) { + self.ensure_expr_capacity(module_idx, arg.eidx(), result); + result + .lookup + .record_expr(module_idx, arg.eidx(), param_fact.clone()); + + if let Some(const_value) = param_fact.constant.as_value() { + result.constants.record( + module_idx, + arg.eidx(), + Some(const_value.clone()), + ); + } + } + } + } + } + } + + let output_context = self + .loop_lookup + .as_ref() + .and_then(|lookup| lookup.get_query_context(module_idx, body.query.qidx)); + + match head { + RuleHead::Compr { refr, assign, .. } => { + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + + if let Some(context) = output_context { + if let Some(key_expr) = &context.key_expr { + let _key_fact = self.infer_expr( + module_idx, + key_expr, + &mut bindings, + result, + rule_analysis, + ); + + if let Some(value_expr) = &context.value_expr { + let value_fact = self.infer_expr( + module_idx, + value_expr, + &mut bindings, + result, + rule_analysis, + ); + + let mut recorded_fact = value_fact.fact.clone(); + if !query_eval.always_true { + recorded_fact.constant = + crate::type_analysis::ConstantValue::unknown(); + } + Self::record_rule_head_fact( + result, + module_idx, + refr.eidx(), + recorded_fact, + ); + + if let Some(expr_idx) = context.value_expr.as_ref().map(|e| e.eidx()) { + result.lookup.record_expr( + module_idx, + expr_idx, + value_fact.fact.clone(), + ); + } + return; + } + } else if let Some(value_expr) = &context.value_expr { + let value_fact = self.infer_expr( + module_idx, + value_expr, + &mut bindings, + result, + rule_analysis, + ); + let mut recorded_fact = value_fact.fact.clone(); + if !query_eval.always_true { + recorded_fact.constant = crate::type_analysis::ConstantValue::unknown(); + } + Self::record_rule_head_fact(result, module_idx, refr.eidx(), recorded_fact); + return; + } + } + + let fact = self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + let mut fact_to_record = fact.fact.clone(); + if assign.is_none() { + fact_to_record.descriptor = TypeDescriptor::Structural(StructuralType::Boolean); + fact_to_record.constant = crate::type_analysis::ConstantValue::unknown(); + } else if !query_eval.always_true { + fact_to_record.constant = crate::type_analysis::ConstantValue::unknown(); + } + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact_to_record); + } + RuleHead::Set { refr, key, .. } => { + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + + if let Some(context) = output_context { + if let Some(value_expr) = &context.value_expr { + let element_fact = self.infer_expr( + module_idx, + value_expr, + &mut bindings, + result, + rule_analysis, + ); + let element_type = match &element_fact.fact.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + let existing_fact = result.lookup.get_expr(module_idx, refr.eidx()); + let merged_element_type = + Self::merge_set_element_types(existing_fact, element_type); + + let set_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new( + merged_element_type, + ))), + TypeProvenance::Propagated, + ); + + Self::record_rule_head_fact(result, module_idx, refr.eidx(), set_fact); + return; + } + } + + if let Some(key_expr) = key { + self.ensure_expr_capacity(module_idx, key_expr.eidx(), result); + let element_fact = + self.infer_expr(module_idx, key_expr, &mut bindings, result, rule_analysis); + let element_type = match &element_fact.fact.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + let existing_fact = result.lookup.get_expr(module_idx, refr.eidx()); + let merged_element_type = + Self::merge_set_element_types(existing_fact, element_type); + let set_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new( + merged_element_type, + ))), + TypeProvenance::Propagated, + ); + Self::record_rule_head_fact(result, module_idx, refr.eidx(), set_fact); + } else { + let fact = + self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact.fact.clone()); + } + } + RuleHead::Func { refr, assign, .. } => { + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + + let has_assign = assign.is_some(); + let mut assign_value_fact: Option<(TypeFact, bool)> = None; + if let Some(assign) = assign { + self.ensure_expr_capacity(module_idx, assign.value.eidx(), result); + let value_fact = self.infer_expr( + module_idx, + &assign.value, + &mut bindings, + result, + rule_analysis, + ); + let informative = fact_is_informative(&value_fact.fact); + if informative || !self.disable_function_generic_pass { + assign_value_fact = Some((value_fact.fact.clone(), informative)); + } + } + + let fact = self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + let mut fact_to_record = fact.fact.clone(); + if assign.is_none() { + fact_to_record.descriptor = TypeDescriptor::Structural(StructuralType::Boolean); + fact_to_record.constant = crate::type_analysis::ConstantValue::unknown(); + } + let fact_informative = fact_is_informative(&fact_to_record); + let assign_informative = assign_value_fact + .as_ref() + .map(|(_, informative)| *informative) + .unwrap_or(false); + if let Some((value_fact, _)) = assign_value_fact.as_ref() { + let mut recorded_fact = value_fact.clone(); + if !query_eval.always_true { + recorded_fact.constant = crate::type_analysis::ConstantValue::unknown(); + } + Self::record_rule_head_fact(result, module_idx, refr.eidx(), recorded_fact); + } + let skip_generic_fact = if assign_value_fact.is_some() { + self.disable_function_generic_pass || (assign_informative && !fact_informative) + } else if has_assign { + self.disable_function_generic_pass + } else { + false + }; + if assign.is_none() { + fact_to_record.constant = if query_eval.always_true { + crate::type_analysis::ConstantValue::Known(Value::Bool(true)) + } else { + crate::type_analysis::ConstantValue::unknown() + }; + } else if !query_eval.always_true { + fact_to_record.constant = crate::type_analysis::ConstantValue::unknown(); + } + + if !skip_generic_fact { + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact_to_record); + } + } + } + } + + pub(super) fn merge_set_element_types( + existing_fact: Option<&TypeFact>, + new_element: StructuralType, + ) -> StructuralType { + if let Some(existing_fact) = existing_fact { + match &existing_fact.descriptor { + TypeDescriptor::Structural(StructuralType::Set(existing_elem)) => { + let combined = vec![existing_elem.as_ref().clone(), new_element]; + Self::join_structural_types(&combined) + } + TypeDescriptor::Schema(schema) => { + if let StructuralType::Set(existing_elem) = StructuralType::from_schema(schema) + { + let combined = vec![(*existing_elem).clone(), new_element]; + Self::join_structural_types(&combined) + } else { + new_element + } + } + TypeDescriptor::Structural(StructuralType::Any) => new_element, + _ => new_element, + } + } else { + new_element + } + } +} + +fn fact_is_informative(fact: &TypeFact) -> bool { + match &fact.descriptor { + TypeDescriptor::Structural(ty) => !structural_contains_unknownish(ty), + _ => true, + } +} + +fn structural_contains_unknownish(ty: &StructuralType) -> bool { + match ty { + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => variants.iter().any(structural_contains_unknownish), + _ => false, + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/heads.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/heads.rs new file mode 100644 index 00000000..bb383b6b --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/heads.rs @@ -0,0 +1,317 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{borrow::ToOwned, boxed::Box, vec::Vec}; + +use crate::ast::{Expr, RuleHead}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, SourceOrigin, StructuralType, TypeDescriptor, TypeFact, + TypeProvenance, +}; + +use super::super::super::result::AnalysisState; +use super::super::TypeAnalyzer; + +impl TypeAnalyzer { + pub(super) fn analyze_rule_head_without_body( + &self, + module_idx: u32, + head: &RuleHead, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + ) { + let mut bindings = ScopedBindings::new(); + bindings.ensure_root_scope(); + + match head { + RuleHead::Compr { refr, assign, .. } => { + if let Some(assign) = assign { + self.ensure_expr_capacity(module_idx, assign.value.eidx(), result); + let value_fact = self.infer_expr( + module_idx, + &assign.value, + &mut bindings, + result, + rule_analysis, + ); + + if let Expr::Var { value, .. } = refr.as_ref() { + if let Ok(name) = value.as_string() { + bindings.assign(name.as_ref().to_owned(), value_fact.fact.clone()); + } + } + } + + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + let fact = self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact.fact.clone()); + } + RuleHead::Set { refr, key, .. } => { + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + + if let Some(key_expr) = key { + self.ensure_expr_capacity(module_idx, key_expr.eidx(), result); + let element_fact = + self.infer_expr(module_idx, key_expr, &mut bindings, result, rule_analysis); + + let element_type = match &element_fact.fact.descriptor { + TypeDescriptor::Structural(st) => st.clone(), + TypeDescriptor::Schema(schema) => StructuralType::from_schema(schema), + }; + + let set_fact = TypeFact::new( + TypeDescriptor::Structural(StructuralType::Set(Box::new(element_type))), + TypeProvenance::Propagated, + ); + + Self::record_rule_head_fact(result, module_idx, refr.eidx(), set_fact); + } else { + let fact = + self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact.fact.clone()); + } + } + RuleHead::Func { + refr, args, assign, .. + } => { + let has_assign = assign.is_some(); + let mut assign_value_fact: Option<(TypeFact, bool)> = None; + if let Some(assign) = assign { + self.ensure_expr_capacity(module_idx, assign.value.eidx(), result); + let value_fact = self.infer_expr( + module_idx, + &assign.value, + &mut bindings, + result, + rule_analysis, + ); + + let informative = fact_is_informative(&value_fact.fact); + if informative || !self.disable_function_generic_pass { + assign_value_fact = Some((value_fact.fact.clone(), informative)); + } + + if let Expr::Var { value, .. } = refr.as_ref() { + if let Ok(name) = value.as_string() { + bindings.assign(name.as_ref().to_owned(), value_fact.fact.clone()); + } + } + } + + for arg in args { + self.ensure_expr_capacity(module_idx, arg.eidx(), result); + self.infer_expr(module_idx, arg, &mut bindings, result, rule_analysis); + } + + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + let fact = self.infer_expr(module_idx, refr, &mut bindings, result, rule_analysis); + if let Some((assign_fact, _)) = assign_value_fact.as_ref() { + Self::record_rule_head_fact( + result, + module_idx, + refr.eidx(), + assign_fact.clone(), + ); + } + + let fact_informative = fact_is_informative(&fact.fact); + let assign_informative = assign_value_fact + .as_ref() + .map(|(_, informative)| *informative) + .unwrap_or(false); + // Only suppress the generic fact when we're dealing with a function rule and the + // specialized return provided a more precise descriptor. Other rule kinds still + // rely on the generic pass for baseline facts. + let skip_generic_fact = if assign_value_fact.is_some() { + self.disable_function_generic_pass || (assign_informative && !fact_informative) + } else if has_assign { + self.disable_function_generic_pass + } else { + false + }; + + if !skip_generic_fact { + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact.fact.clone()); + } + } + } + } + + pub(super) fn record_rule_head_fact( + result: &mut AnalysisState, + module_idx: u32, + expr_idx: u32, + fact: TypeFact, + ) { + let TypeFact { + descriptor, + constant, + provenance, + origins, + specialization_hits, + } = fact; + + let provenance = match provenance { + TypeProvenance::Unknown => TypeProvenance::Rule, + other => other, + }; + + result.lookup.ensure_expr_capacity(module_idx, expr_idx); + + let slot = result.lookup.expr_types_mut().get_mut(module_idx, expr_idx); + + if let Some(existing) = slot { + // Use constant-aware merging to preserve enum values + Self::merge_rule_head_descriptor_with_constants( + &mut existing.descriptor, + &existing.constant, + descriptor, + &constant, + ); + existing.constant = Self::merge_rule_head_constant(&existing.constant, constant); + if matches!(existing.provenance, TypeProvenance::Unknown) { + existing.provenance = provenance; + } + Self::merge_rule_head_origins(&mut existing.origins, origins); + if !specialization_hits.is_empty() { + existing.specialization_hits.extend(specialization_hits); + } + } else { + let mut stored = TypeFact::new(descriptor, provenance); + stored.constant = constant; + Self::merge_rule_head_origins(&mut stored.origins, origins); + stored.specialization_hits = specialization_hits; + *slot = Some(stored); + } + } + + fn merge_rule_head_constant( + existing: &ConstantValue, + incoming: ConstantValue, + ) -> ConstantValue { + match (existing, incoming) { + (ConstantValue::Known(lhs), ConstantValue::Known(rhs)) => { + if lhs == &rhs { + ConstantValue::Known(lhs.clone()) + } else { + // Different constants - we lose constant tracking but keep type info + ConstantValue::Unknown + } + } + (ConstantValue::Known(lhs), ConstantValue::Unknown) => { + ConstantValue::Known(lhs.clone()) + } + (ConstantValue::Unknown, ConstantValue::Known(rhs)) => ConstantValue::Known(rhs), + (ConstantValue::Unknown, ConstantValue::Unknown) => ConstantValue::Unknown, + } + } + + fn merge_rule_head_descriptor_with_constants( + existing_descriptor: &mut TypeDescriptor, + existing_constant: &ConstantValue, + incoming_descriptor: TypeDescriptor, + incoming_constant: &ConstantValue, + ) { + // Only create an Enum if BOTH have known constants + let both_have_constants = matches!(existing_constant, ConstantValue::Known(_)) + && matches!(incoming_constant, ConstantValue::Known(_)); + + if both_have_constants { + // Collect known constant values + let mut enum_values = alloc::vec::Vec::new(); + + if let ConstantValue::Known(val) = existing_constant { + enum_values.push(val.clone()); + } + + if let ConstantValue::Known(val) = incoming_constant { + if !enum_values.contains(val) { + enum_values.push(val.clone()); + } + } + + // If we have multiple distinct constants, create an Enum type + if enum_values.len() > 1 { + *existing_descriptor = + TypeDescriptor::Structural(StructuralType::Enum(enum_values)); + return; + } + } + + if let TypeDescriptor::Structural(StructuralType::Enum(enum_values)) = existing_descriptor { + if let TypeDescriptor::Structural(StructuralType::Enum(incoming_values)) = + &incoming_descriptor + { + let mut added = false; + for value in incoming_values { + if !enum_values.contains(value) { + enum_values.push(value.clone()); + added = true; + } + } + if added { + return; + } + } + + if let ConstantValue::Known(value) = incoming_constant { + if !enum_values.contains(value) { + enum_values.push(value.clone()); + } + return; + } + } + + // Otherwise use standard merging + let (incoming_structural, incoming_was_schema) = match incoming_descriptor { + TypeDescriptor::Structural(st) => (st, false), + TypeDescriptor::Schema(schema) => (StructuralType::from_schema(&schema), true), + }; + + let (existing_structural, existing_was_schema) = match existing_descriptor { + TypeDescriptor::Structural(st) => (st.clone(), false), + TypeDescriptor::Schema(schema) => (StructuralType::from_schema(schema), true), + }; + + if existing_was_schema && incoming_was_schema && existing_structural == incoming_structural + { + return; + } + + let merged = Self::join_structural_types(&[existing_structural, incoming_structural]); + *existing_descriptor = TypeDescriptor::Structural(merged); + } + + fn merge_rule_head_origins(existing: &mut Vec, incoming: Vec) { + if incoming.is_empty() { + return; + } + + if existing.is_empty() { + *existing = incoming; + return; + } + + for origin in incoming { + if !existing.contains(&origin) { + existing.push(origin); + } + } + } +} + +fn fact_is_informative(fact: &TypeFact) -> bool { + match &fact.descriptor { + TypeDescriptor::Structural(ty) => !structural_contains_unknownish(ty), + _ => true, + } +} + +fn structural_contains_unknownish(ty: &StructuralType) -> bool { + match ty { + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => variants.iter().any(structural_contains_unknownish), + _ => false, + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/mod.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/mod.rs new file mode 100644 index 00000000..cbb83c03 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/mod.rs @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod bodies; +mod heads; +mod orchestration; +mod queries; diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/orchestration.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/orchestration.rs new file mode 100644 index 00000000..aab49250 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/orchestration.rs @@ -0,0 +1,308 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::borrow::ToOwned; + +use crate::ast::{Expr, Module, Rule, RuleHead}; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{RuleAnalysis, StructuralType, TypeDescriptor, TypeProvenance}; +use crate::utils::get_path_string; + +use super::super::super::result::AnalysisState; +use super::super::TypeAnalyzer; + +impl TypeAnalyzer { + pub(crate) fn analyze_module( + &self, + module_idx: u32, + module: &crate::ast::Ref, + result: &mut AnalysisState, + ) { + for (rule_idx, _) in module.policy.iter().enumerate() { + self.ensure_rule_analyzed(module_idx, rule_idx, result); + } + } + + pub(super) fn analyze_rule( + &self, + module_idx: u32, + rule_idx: usize, + rule: &crate::ast::Ref, + result: &mut AnalysisState, + ) { + let mut aggregate = RuleAnalysis::default(); + + // Check if this is a function rule (has parameters) + let is_function_rule = matches!( + rule.as_ref(), + Rule::Spec { + head: RuleHead::Func { .. }, + .. + } + ); + + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Expr::Var { .. } = refr.as_ref() { + let module = &self.modules[module_idx as usize]; + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + if let Ok(var_path) = get_path_string(refr.as_ref(), Some(&module_path)) { + result.ensure_rule_capacity(module_idx, rule_idx + 1); + let current_state = + &result.rule_info[module_idx as usize][rule_idx].constant_state; + + if matches!( + current_state, + crate::type_analysis::RuleConstantState::Unknown + ) { + result.rule_info[module_idx as usize][rule_idx].constant_state = + crate::type_analysis::RuleConstantState::InProgress; + + // Skip constant folding for function rules - they need arguments + if !is_function_rule { + if let Some(value) = + self.try_evaluate_rule_constant(module_idx, &var_path) + { + aggregate.constant_state = + crate::type_analysis::RuleConstantState::Done(value); + } else { + aggregate.constant_state = + crate::type_analysis::RuleConstantState::NeedsRuntime; + } + } else { + // Function rules always need runtime (arguments) + aggregate.constant_state = + crate::type_analysis::RuleConstantState::NeedsRuntime; + } + } + } + } + } + + match rule.as_ref() { + Rule::Spec { head, bodies, .. } => { + if bodies.is_empty() { + self.analyze_rule_head_without_body(module_idx, head, result, &mut aggregate); + } else { + let last_body_index = bodies.len() - 1; + for (body_idx, body) in bodies.iter().enumerate() { + let mut body_analysis = RuleAnalysis::default(); + self.analyze_rule_body( + module_idx, + rule_idx, + body_idx, + head, + body, + result, + &mut body_analysis, + body_idx == last_body_index, + ); + aggregate.merge(body_analysis); + } + } + } + Rule::Default { refr, value, .. } => { + self.ensure_expr_capacity(module_idx, value.eidx(), result); + let mut bindings = ScopedBindings::new(); + bindings.ensure_root_scope(); + let fact = + self.infer_expr(module_idx, value, &mut bindings, result, &mut aggregate); + aggregate.record_origins(&fact.fact.origins); + + self.ensure_expr_capacity(module_idx, refr.eidx(), result); + Self::record_rule_head_fact(result, module_idx, refr.eidx(), fact.fact.clone()); + } + } + + if let crate::type_analysis::RuleConstantState::Done(value) = &aggregate.constant_state { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + let expr_idx = refr.eidx(); + self.ensure_expr_capacity(module_idx, expr_idx, result); + + let mut constant_fact = + crate::type_analysis::value_utils::value_to_type_fact(value); + constant_fact.provenance = TypeProvenance::Rule; + + let slot = result.lookup.expr_types_mut().get_mut(module_idx, expr_idx); + match slot { + Some(existing) => { + existing.constant = + crate::type_analysis::ConstantValue::Known(value.clone()); + + if should_replace_descriptor( + &existing.descriptor, + &constant_fact.descriptor, + ) { + existing.descriptor = constant_fact.descriptor.clone(); + } + + if matches!(existing.provenance, TypeProvenance::Unknown) { + existing.provenance = TypeProvenance::Rule; + } + } + None => { + *slot = Some(constant_fact); + } + } + + result + .constants + .record(module_idx, expr_idx, Some(value.clone())); + } + } + + result.record_rule_analysis(module_idx, rule_idx, aggregate); + } + + pub(crate) fn ensure_rule_analyzed( + &self, + module_idx: u32, + rule_idx: usize, + result: &mut AnalysisState, + ) { + let has_rule_info = result + .rule_info + .get(module_idx as usize) + .and_then(|rules| rules.get(rule_idx)) + .map(|analysis| { + !matches!( + analysis.constant_state, + crate::type_analysis::RuleConstantState::Unknown + ) + }) + .unwrap_or(false); + + let has_expr_type = if let Some(module) = self.modules.get(module_idx as usize) { + if let Some(rule) = module.policy.get(rule_idx) { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + result.lookup.get_expr(module_idx, refr.eidx()).is_some() + } else { + false + } + } else { + false + } + } else { + false + }; + + if has_rule_info || has_expr_type { + return; + } + + if self.is_rule_on_stack(module_idx, rule_idx) { + result.ensure_rule_capacity(module_idx, rule_idx + 1); + result.rule_info[module_idx as usize][rule_idx].constant_state = + crate::type_analysis::RuleConstantState::NeedsRuntime; + return; + } + + if let Some(module) = self.modules.get(module_idx as usize) { + if let Some(rule) = module.policy.get(rule_idx) { + if Self::rule_requires_arguments(rule.as_ref()) + && !self + .function_param_facts + .borrow() + .contains_key(&(module_idx, rule_idx)) + { + result.ensure_rule_capacity(module_idx, rule_idx + 1); + let entry = &mut result.rule_info[module_idx as usize][rule_idx]; + if matches!( + entry.constant_state, + crate::type_analysis::RuleConstantState::Unknown + ) { + entry.constant_state = + crate::type_analysis::RuleConstantState::NeedsRuntime; + } + return; + } + + let rule_path = + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Expr::Var { .. } = refr.as_ref() { + let module_path = + get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + let var_path = get_path_string(refr.as_ref(), Some(&module_path)) + .unwrap_or_else(|_| "unknown".to_owned()); + + Some(var_path) + } else { + None + } + } else { + None + }; + + if let Some(ref path) = rule_path { + if self.entrypoint_filtering { + result.lookup.mark_reachable(path.clone()); + } + result.lookup.set_current_rule(Some(path.clone())); + } + + self.push_rule_stack(module_idx, rule_idx); + self.analyze_rule(module_idx, rule_idx, rule, result); + self.pop_rule_stack(); + + result.lookup.set_current_rule(None); + } + } + } + + fn is_rule_on_stack(&self, module_idx: u32, rule_idx: usize) -> bool { + self.analysis_stack + .borrow() + .contains(&(module_idx, rule_idx)) + } + + fn push_rule_stack(&self, module_idx: u32, rule_idx: usize) { + self.analysis_stack + .borrow_mut() + .push((module_idx, rule_idx)); + } + + fn pop_rule_stack(&self) { + self.analysis_stack.borrow_mut().pop(); + } + + fn rule_requires_arguments(rule: &Rule) -> bool { + matches!( + rule, + Rule::Spec { + head: RuleHead::Func { .. }, + .. + } + ) + } +} + +fn should_replace_descriptor(existing: &TypeDescriptor, replacement: &TypeDescriptor) -> bool { + match (existing, replacement) { + (TypeDescriptor::Structural(StructuralType::Any), _) + | (TypeDescriptor::Structural(StructuralType::Unknown), _) => true, + ( + TypeDescriptor::Structural(StructuralType::Array(existing_inner)), + TypeDescriptor::Structural(StructuralType::Array(replacement_inner)), + ) => { + is_unknownish(existing_inner) + || (!is_unknownish(replacement_inner) + && existing_inner.as_ref() != replacement_inner.as_ref()) + } + ( + TypeDescriptor::Structural(StructuralType::Set(existing_inner)), + TypeDescriptor::Structural(StructuralType::Set(replacement_inner)), + ) => { + is_unknownish(existing_inner) + || (!is_unknownish(replacement_inner) + && existing_inner.as_ref() != replacement_inner.as_ref()) + } + _ => false, + } +} + +fn is_unknownish(ty: &StructuralType) -> bool { + matches!(ty, StructuralType::Any | StructuralType::Unknown) +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/queries.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/queries.rs new file mode 100644 index 00000000..73131a60 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_analysis/queries.rs @@ -0,0 +1,86 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::string::String; + +use crate::ast::Query; +use crate::type_analysis::context::ScopedBindings; +use crate::type_analysis::model::{ + RuleAnalysis, TypeDiagnostic, TypeDiagnosticKind, TypeDiagnosticSeverity, +}; +use crate::type_analysis::propagation::expressions::StatementTruth; + +use super::super::super::result::AnalysisState; +use super::super::TypeAnalyzer; + +#[derive(Clone, Copy, Debug, Default)] +pub(crate) struct QueryEvaluation { + pub reachable: bool, + pub always_true: bool, +} + +impl TypeAnalyzer { + pub(crate) fn analyze_query( + &self, + module_idx: u32, + query: &crate::ast::Ref, + bindings: &mut ScopedBindings, + result: &mut AnalysisState, + rule_analysis: &mut RuleAnalysis, + allow_unreachable_diag: bool, + ) -> QueryEvaluation { + let order = self + .schedule + .as_ref() + .and_then(|schedule| schedule.queries.get(module_idx, query.qidx)) + .map(|qs| qs.order.clone()) + .unwrap_or_else(|| (0..query.stmts.len() as u16).collect()); + + let mut eval = QueryEvaluation { + reachable: true, + always_true: true, + }; + for stmt_idx in order { + if let Some(stmt) = query.stmts.get(stmt_idx as usize) { + self.seed_statement_loops(module_idx, stmt, bindings, result, rule_analysis); + let truth = self.analyze_stmt(module_idx, stmt, bindings, result, rule_analysis); + if !matches!(truth, StatementTruth::AlwaysTrue) { + eval.always_true = false; + } + + if matches!(truth, StatementTruth::AlwaysFalse) { + if allow_unreachable_diag { + let (line, col, end_line, end_col) = + Self::diagnostic_range_from_span(&stmt.span); + result.diagnostics.push(TypeDiagnostic { + file: stmt.span.source.get_path().as_str().into(), + message: String::from( + "Statement always evaluates to false; later statements were skipped", + ), + kind: TypeDiagnosticKind::UnreachableStatement, + severity: TypeDiagnosticSeverity::Warning, + line, + col, + end_line, + end_col, + }); + } + eval.reachable = false; + break; + } + } + } + + eval + } + + pub(crate) fn ensure_expr_capacity( + &self, + module_idx: u32, + expr_idx: u32, + result: &mut AnalysisState, + ) { + result.lookup.ensure_expr_capacity(module_idx, expr_idx); + result.constants.ensure_capacity(module_idx, expr_idx); + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/rule_index.rs b/src/type_analysis/propagation/pipeline/analyzer/rule_index.rs new file mode 100644 index 00000000..9cf6450b --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/rule_index.rs @@ -0,0 +1,240 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{ + borrow::ToOwned, + collections::{BTreeMap, BTreeSet}, + format, + string::String, + vec::Vec, +}; + +use crate::ast::{Expr, Module, Rule, RuleHead}; +use crate::utils::{get_path_string, path::normalize_rule_path}; + +use super::super::result::AnalysisState; +use super::TypeAnalyzer; + +#[derive(Clone, Debug)] +pub(crate) struct RuleHeadInfo { + pub module_idx: u32, + pub rule_idx: usize, + pub expr_idx: u32, + pub path: String, +} + +pub(super) type RuleHeadIndex = ( + Vec>>, + BTreeMap>, +); + +impl TypeAnalyzer { + pub(super) fn build_rule_head_index(modules: &[crate::ast::Ref]) -> RuleHeadIndex { + let mut module_rule_heads: Vec>> = + Vec::with_capacity(modules.len()); + let mut global_rule_heads: BTreeMap> = BTreeMap::new(); + + for (module_idx, module) in modules.iter().enumerate() { + let module_idx = module_idx as u32; + let mut module_map: BTreeMap> = BTreeMap::new(); + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + for (rule_idx, rule) in module.policy.iter().enumerate() { + match rule.as_ref() { + Rule::Spec { head, .. } => { + if let Some(refr) = Self::rule_head_expression(head) { + Self::record_rule_head( + module_idx, + rule_idx, + refr.eidx(), + refr.as_ref(), + &module_path, + &mut module_map, + &mut global_rule_heads, + ); + } + } + Rule::Default { refr, .. } => { + Self::record_rule_head( + module_idx, + rule_idx, + refr.eidx(), + refr.as_ref(), + &module_path, + &mut module_map, + &mut global_rule_heads, + ); + } + } + } + + module_rule_heads.push(module_map); + } + + (module_rule_heads, global_rule_heads) + } + + pub(crate) fn rule_heads_for_name(&self, module_idx: u32, name: &str) -> Vec<&RuleHeadInfo> { + let mut matches: Vec<&RuleHeadInfo> = Vec::new(); + + if let Some(module_heads) = self.module_rule_heads.get(module_idx as usize) { + if let Some(local) = module_heads.get(name) { + for info in local { + if !matches.iter().any(|existing| { + existing.module_idx == info.module_idx && existing.expr_idx == info.expr_idx + }) { + matches.push(info); + } + } + } + } + + if let Some(global) = self.global_rule_heads.get(name) { + for info in global { + if !matches.iter().any(|existing| { + existing.module_idx == info.module_idx && existing.expr_idx == info.expr_idx + }) { + matches.push(info); + } + } + } + + matches + } + + pub(crate) fn rule_heads_with_prefix( + &self, + module_idx: u32, + prefix: &str, + ) -> Vec<&RuleHeadInfo> { + let normalized_prefix = normalize_rule_path(prefix); + let mut matches: Vec<&RuleHeadInfo> = Vec::new(); + let mut seen: BTreeSet<(u32, u32)> = BTreeSet::new(); + + if let Some(module_heads) = self.module_rule_heads.get(module_idx as usize) { + for infos in module_heads.values() { + for info in infos { + if Self::rule_path_has_prefix(&info.path, &normalized_prefix) + && seen.insert((info.module_idx, info.expr_idx)) + { + matches.push(info); + } + } + } + } + + for infos in self.global_rule_heads.values() { + for info in infos { + if Self::rule_path_has_prefix(&info.path, &normalized_prefix) + && seen.insert((info.module_idx, info.expr_idx)) + { + matches.push(info); + } + } + } + + matches + } + + fn rule_path_has_prefix(path: &str, normalized_prefix: &str) -> bool { + let normalized_path = normalize_rule_path(path); + if normalized_path.len() <= normalized_prefix.len() { + return false; + } + + normalized_path.starts_with(normalized_prefix) + && matches!( + normalized_path.chars().nth(normalized_prefix.len()), + Some('.') | Some('[') + ) + } + + pub(crate) fn ensure_all_rule_definitions_analyzed( + &self, + rule_path: &str, + result: &mut AnalysisState, + ) { + let normalized = normalize_rule_path(rule_path); + let base_path = normalized.strip_suffix(".default").unwrap_or(&normalized); + + for (module_idx, module) in self.modules.iter().enumerate() { + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + for (rule_idx, rule) in module.policy.iter().enumerate() { + if let Some(refr) = Self::rule_head_expression_from_rule(rule.as_ref()) { + if let Ok(var_path) = get_path_string(refr.as_ref(), Some(&module_path)) { + let rule_normalized = normalize_rule_path(&var_path); + let rule_base = rule_normalized + .strip_suffix(".default") + .unwrap_or(&rule_normalized); + + if rule_base == base_path { + self.ensure_rule_analyzed(module_idx as u32, rule_idx, result); + } + } + } + } + } + } + + fn record_rule_head( + module_idx: u32, + rule_idx: usize, + expr_idx: u32, + expr: &Expr, + module_path: &str, + module_map: &mut BTreeMap>, + global_map: &mut BTreeMap>, + ) { + if let Ok(mut path) = get_path_string(expr, None) { + if !path.starts_with("data.") { + path = format!("{module_path}.{path}"); + } + + let info = RuleHeadInfo { + module_idx, + rule_idx, + expr_idx, + path: path.clone(), + }; + let key = Self::rule_name_key(&path).to_owned(); + + Self::insert_rule_head(module_map, key.clone(), info.clone()); + Self::insert_rule_head(global_map, key, info); + } + } + + fn insert_rule_head( + map: &mut BTreeMap>, + key: String, + info: RuleHeadInfo, + ) { + let entry = map.entry(key).or_default(); + if !entry.iter().any(|existing| { + existing.module_idx == info.module_idx && existing.expr_idx == info.expr_idx + }) { + entry.push(info); + } + } + + fn rule_name_key(path: &str) -> &str { + path.rsplit('.').next().unwrap_or(path) + } + + pub(super) fn rule_head_expression(head: &RuleHead) -> Option<&crate::ast::Ref> { + match head { + RuleHead::Compr { refr, .. } + | RuleHead::Set { refr, .. } + | RuleHead::Func { refr, .. } => Some(refr), + } + } + + pub(crate) fn rule_head_expression_from_rule(rule: &Rule) -> Option<&crate::ast::Ref> { + match rule { + Rule::Spec { head, .. } => Self::rule_head_expression(head), + Rule::Default { refr, .. } => Some(refr), + } + } +} diff --git a/src/type_analysis/propagation/pipeline/analyzer/validation.rs b/src/type_analysis/propagation/pipeline/analyzer/validation.rs new file mode 100644 index 00000000..e716c784 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/analyzer/validation.rs @@ -0,0 +1,164 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::{borrow::ToOwned, collections::BTreeMap, format, string::String}; + +use crate::ast::{Rule, RuleHead}; +use crate::lexer::Span; +use crate::type_analysis::model::{TypeDiagnostic, TypeDiagnosticKind, TypeDiagnosticSeverity}; +use crate::utils::get_path_string; + +use super::super::result::AnalysisState; +use super::TypeAnalyzer; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RuleKind { + Complete, + PartialSet, + PartialObject, + Function, +} + +impl TypeAnalyzer { + /// Validate rule definitions for semantic errors: + /// 1. Default rules cannot be used with partial set/object rules + /// 2. All definitions of a rule must have the same type (complete/set/function) + pub(super) fn validate_rule_definitions(&self, result: &mut AnalysisState) { + let mut rule_types: BTreeMap = BTreeMap::new(); + + for (module_idx, module) in self.modules.iter().enumerate() { + let module_idx = module_idx as u32; + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + for rule in &module.policy { + if let Rule::Spec { head, span, .. } = rule.as_ref() { + let span_ref = span; + if let Some(refr) = Self::rule_head_expression(head) { + if let Ok(mut rule_path) = get_path_string(refr.as_ref(), None) { + if !rule_path.starts_with("data.") { + rule_path = format!("{module_path}.{rule_path}"); + } + + let kind = Self::classify_rule_head(head); + + if let Some((existing_kind, _, first_span)) = rule_types.get(&rule_path) + { + if !Self::rule_kinds_compatible(existing_kind, &kind) { + let (line, col, end_line, end_col) = + Self::diagnostic_range_from_span(span_ref); + result.diagnostics.push(TypeDiagnostic { + file: span.source.get_path().as_str().into(), + message: format!( + "Rule '{}' has inconsistent types: {} and {}. All definitions of a rule must have the same type.", + rule_path.rsplit('.').next().unwrap_or(&rule_path), + Self::rule_kind_name(existing_kind), + Self::rule_kind_name(&kind) + ), + kind: TypeDiagnosticKind::SchemaViolation, + severity: TypeDiagnosticSeverity::Error, + line, + col, + end_line, + end_col, + }); + + let (first_line, first_col, first_end_line, first_end_col) = + Self::diagnostic_range_from_span(first_span); + result.diagnostics.push(TypeDiagnostic { + file: first_span.source.get_path().as_str().into(), + message: format!( + "First defined as {} here", + Self::rule_kind_name(existing_kind) + ), + kind: TypeDiagnosticKind::InternalError, + severity: TypeDiagnosticSeverity::Warning, + line: first_line, + col: first_col, + end_line: first_end_line, + end_col: first_end_col, + }); + } + } else { + rule_types.insert(rule_path, (kind, module_idx, span.clone())); + } + } + } + } + } + } + + for module in self.modules.iter() { + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + + for rule in &module.policy { + if let Rule::Default { refr, span, .. } = rule.as_ref() { + let span_ref = span; + if let Ok(mut rule_path) = get_path_string(refr.as_ref(), None) { + if !rule_path.starts_with("data.") { + rule_path = format!("{module_path}.{rule_path}"); + } + + let base_path = rule_path + .strip_suffix(".default") + .unwrap_or(&rule_path) + .to_owned(); + + if let Some((kind, _, _)) = rule_types.get(&base_path) { + if matches!(kind, RuleKind::PartialSet | RuleKind::PartialObject) { + let (line, col, end_line, end_col) = + Self::diagnostic_range_from_span(span_ref); + result.diagnostics.push(TypeDiagnostic { + file: span.source.get_path().as_str().into(), + message: format!( + "Default rule cannot be used with partial {} rules. Default rules are only allowed for complete rules.", + if matches!(kind, RuleKind::PartialSet) { + "set" + } else { + "object" + } + ), + kind: TypeDiagnosticKind::SchemaViolation, + severity: TypeDiagnosticSeverity::Error, + line, + col, + end_line, + end_col, + }); + } + } + } + } + } + } + } + + fn classify_rule_head(head: &RuleHead) -> RuleKind { + match head { + RuleHead::Compr { .. } => RuleKind::Complete, + RuleHead::Set { key: Some(_), .. } => RuleKind::PartialSet, + RuleHead::Set { key: None, .. } => RuleKind::PartialObject, + RuleHead::Func { .. } => RuleKind::Function, + } + } + + fn rule_kinds_compatible(a: &RuleKind, b: &RuleKind) -> bool { + matches!( + (a, b), + (RuleKind::Complete, RuleKind::Complete) + | (RuleKind::PartialSet, RuleKind::PartialSet) + | (RuleKind::PartialObject, RuleKind::PartialObject) + | (RuleKind::Function, RuleKind::Function) + ) + } + + fn rule_kind_name(kind: &RuleKind) -> &'static str { + match kind { + RuleKind::Complete => "complete rule", + RuleKind::PartialSet => "partial set rule", + RuleKind::PartialObject => "partial object rule", + RuleKind::Function => "function", + } + } +} diff --git a/src/type_analysis/propagation/pipeline/options.rs b/src/type_analysis/propagation/pipeline/options.rs new file mode 100644 index 00000000..dc0a5a62 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/options.rs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::string::String; +use alloc::vec::Vec; + +use crate::compiler::hoist::HoistedLoopsLookup; +use crate::schema::Schema; +use crate::Rc; + +/// Configuration for the type analyser. +#[derive(Clone, Debug)] +pub struct TypeAnalysisOptions { + pub input_schema: Option, + pub data_schema: Option, + pub loop_lookup: Option>, + /// Optional entrypoint filtering - analyze only rules reachable from these paths + pub entrypoints: Option>, + /// Experimental: disable the generic pass for function rules. + pub disable_function_generic_pass: bool, +} + +impl Default for TypeAnalysisOptions { + fn default() -> Self { + Self { + input_schema: None, + data_schema: None, + loop_lookup: None, + entrypoints: None, + disable_function_generic_pass: true, + } + } +} + +impl TypeAnalysisOptions { + /// Check if entrypoint filtering is enabled + pub fn is_entrypoint_filtered(&self) -> bool { + self.entrypoints.is_some() + } + + /// Get the list of entrypoints (empty if not filtered) + pub fn get_entrypoints(&self) -> &[String] { + self.entrypoints.as_deref().unwrap_or(&[]) + } +} diff --git a/src/type_analysis/propagation/pipeline/result.rs b/src/type_analysis/propagation/pipeline/result.rs new file mode 100644 index 00000000..6d457957 --- /dev/null +++ b/src/type_analysis/propagation/pipeline/result.rs @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use alloc::collections::{BTreeMap, BTreeSet}; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::type_analysis::constants::ConstantStore; +use crate::type_analysis::context::LookupContext; +use crate::type_analysis::model::{RuleAnalysis, RuleSpecializationSignature, TypeDiagnostic}; +use crate::type_analysis::result::RuleSpecializationRecord; + +#[derive(Clone, Debug, Default)] +pub struct BodyQueryTruth { + pub reachable: bool, + pub always_true: bool, +} + +/// Intermediate state produced by the propagation pipeline. +#[derive(Clone, Debug)] +pub struct AnalysisState { + pub lookup: LookupContext, + pub constants: ConstantStore, + pub diagnostics: Vec, + pub rule_info: Vec>, + pub body_query_truths: Vec>>, + pub function_rule_specializations: + BTreeMap<(u32, usize), BTreeMap>, + /// Requested entrypoint patterns (if filtering was enabled) + pub(crate) requested_entrypoints: Vec, + /// Default rules included due to entrypoint filtering + pub(crate) included_defaults: BTreeSet, +} + +impl Default for AnalysisState { + fn default() -> Self { + Self::new() + } +} + +impl AnalysisState { + pub fn new() -> Self { + AnalysisState { + lookup: LookupContext::new(), + constants: ConstantStore::new(), + diagnostics: Vec::new(), + rule_info: Vec::new(), + body_query_truths: Vec::new(), + function_rule_specializations: BTreeMap::new(), + requested_entrypoints: Vec::new(), + included_defaults: BTreeSet::new(), + } + } + + pub fn ensure_rule_capacity(&mut self, module_idx: u32, rule_count: usize) { + let module_idx = module_idx as usize; + if self.rule_info.len() <= module_idx { + self.rule_info.resize_with(module_idx + 1, Vec::new); + } + let module_rules = &mut self.rule_info[module_idx]; + if module_rules.len() < rule_count { + module_rules.resize(rule_count, RuleAnalysis::default()); + } + } + + pub fn record_rule_analysis( + &mut self, + module_idx: u32, + rule_idx: usize, + analysis: RuleAnalysis, + ) { + self.ensure_rule_capacity(module_idx, rule_idx + 1); + self.rule_info[module_idx as usize][rule_idx] = analysis; + } + + pub fn record_function_specialization(&mut self, record: RuleSpecializationRecord) { + let key = (record.signature.module_idx, record.signature.rule_idx); + let signature = record.signature.clone(); + self.function_rule_specializations + .entry(key) + .or_default() + .insert(signature, record); + } + + pub fn record_body_query_truth( + &mut self, + module_idx: u32, + rule_idx: usize, + body_idx: usize, + reachable: bool, + always_true: bool, + ) { + let module_idx_usize = module_idx as usize; + + if self.body_query_truths.len() <= module_idx_usize { + self.body_query_truths + .resize_with(module_idx_usize + 1, Vec::new); + } + + let module_rules = &mut self.body_query_truths[module_idx_usize]; + if module_rules.len() <= rule_idx { + module_rules.resize_with(rule_idx + 1, Vec::new); + } + + let rule_bodies = &mut module_rules[rule_idx]; + if rule_bodies.len() <= body_idx { + rule_bodies.resize(body_idx + 1, BodyQueryTruth::default()); + } + + rule_bodies[body_idx] = BodyQueryTruth { + reachable, + always_true, + }; + } +} + +/// Temporary alias to ease transition while call sites are updated. +pub(crate) type TypeAnalysisResult = AnalysisState; diff --git a/src/type_analysis/result/deps.rs b/src/type_analysis/result/deps.rs new file mode 100644 index 00000000..96c9ca3d --- /dev/null +++ b/src/type_analysis/result/deps.rs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Rule dependency graph structures. + +use alloc::collections::BTreeSet; +use alloc::string::String; +use alloc::vec::Vec; + +/// Compact representation of the rule dependency graph produced by analysis. +#[derive(Clone, Debug, Default)] +pub struct DependencyGraph { + pub edges: Vec, + pub sccs: Vec>, + pub recursive_rules: BTreeSet, +} + +/// Directed dependency between two rule paths. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DependencyEdge { + pub source: String, + pub target: String, + pub kind: DependencyKind, +} + +/// Classification of rule dependencies. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum DependencyKind { + StaticCall, + DynamicCall, + DefaultLink, +} diff --git a/src/type_analysis/result/entrypoints.rs b/src/type_analysis/result/entrypoints.rs new file mode 100644 index 00000000..637922b5 --- /dev/null +++ b/src/type_analysis/result/entrypoints.rs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Entrypoint metadata captured during type analysis. + +use alloc::collections::BTreeSet; +use alloc::string::String; +use alloc::vec::Vec; + +/// Reachability information for requested entrypoints. +#[derive(Clone, Debug, Default)] +pub struct EntrypointSummary { + pub requested: Vec, + pub reachable: BTreeSet, + pub included_defaults: BTreeSet, + pub dynamic_refs: Vec, +} + +/// Pattern describing dynamic lookups discovered during analysis. +#[derive(Clone, Debug, Default)] +pub struct DynamicLookupPattern { + pub static_prefix: Vec, + pub pattern: String, +} diff --git a/src/type_analysis/result/expressions.rs b/src/type_analysis/result/expressions.rs new file mode 100644 index 00000000..4e3c7610 --- /dev/null +++ b/src/type_analysis/result/expressions.rs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Expression-level result structures captured by the analyser. + +use crate::type_analysis::constants::ConstantStore; +use crate::type_analysis::context::LookupContext; + +/// Combined set of per-expression facts and constant information. +#[derive(Clone, Debug, Default)] +pub struct ExpressionFacts { + pub facts: LookupContext, + pub constants: ConstantStore, +} diff --git a/src/type_analysis/result/mod.rs b/src/type_analysis/result/mod.rs new file mode 100644 index 00000000..7cac77d6 --- /dev/null +++ b/src/type_analysis/result/mod.rs @@ -0,0 +1,549 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Prototype result data model for the type analyser. +//! This module is not yet wired into the main pipeline; it exists so we can +//! iterate on the structures without disturbing the current implementation. + +mod deps; +mod entrypoints; +mod expressions; +mod rules; + +use alloc::{borrow::ToOwned, collections::BTreeMap, format, string::String, vec, vec::Vec}; + +use crate::ast::{Expr, Rule, RuleBody, RuleHead}; +use crate::type_analysis::model::{ + ConstantValue, RuleAnalysis, RuleConstantState, StructuralType, TypeDescriptor, TypeDiagnostic, + TypeFact, TypeProvenance, +}; +use crate::type_analysis::propagation::TypeAnalyzer; +use crate::utils::get_path_string; +use crate::value::Value; + +pub use deps::{DependencyEdge, DependencyGraph, DependencyKind}; +pub use entrypoints::{DynamicLookupPattern, EntrypointSummary}; +pub use expressions::ExpressionFacts; +pub use rules::{ + DefinitionSummary, ModuleSummary, RuleBodyKind, RuleBodySummary, RuleKind, + RuleSpecializationRecord, RuleSpecializationTrace, RuleSummary, RuleTable, RuleVerboseInfo, + TraceLocal, TraceStatement, +}; + +/// Top-level aggregate returned after analysing a module set. +#[derive(Clone, Debug, Default)] +pub struct TypeAnalysisResult { + pub expressions: ExpressionFacts, + pub rules: RuleTable, + pub dependencies: DependencyGraph, + pub entrypoints: EntrypointSummary, + pub diagnostics: Vec, +} + +fn rule_returns_boolean_by_default(head: &RuleHead) -> bool { + matches!( + head, + RuleHead::Func { assign: None, .. } + | RuleHead::Compr { assign: None, .. } + | RuleHead::Set { .. } + ) +} + +fn default_true_body_fact() -> TypeFact { + TypeFact::new( + TypeDescriptor::Structural(StructuralType::Boolean), + TypeProvenance::Propagated, + ) + .with_constant(ConstantValue::known(Value::from(true))) +} + +fn is_constant_body(value_fact: Option<&TypeFact>, body: &RuleBody) -> bool { + if !body.query.as_ref().stmts.is_empty() { + return false; + } + + matches!( + value_fact, + Some(fact) if matches!(fact.constant, ConstantValue::Known(_)) + ) +} + +impl TypeAnalysisResult { + /// Create TypeAnalysisResult from internal AnalysisState. + /// This converts the mutable pipeline state into an immutable public result. + pub(crate) fn from_analysis_state( + state: super::propagation::AnalysisState, + modules: &[crate::ast::Ref], + ) -> Self { + // Build ExpressionFacts from lookup and constants + let expressions = ExpressionFacts { + facts: state.lookup.clone(), + constants: state.constants.clone(), + }; + + // Collect reachable rules + let reachable = expressions.facts.reachable_rules().cloned().collect(); + + // Convert dynamic references + let dynamic_refs = expressions + .facts + .dynamic_references() + .iter() + .map(|dr| DynamicLookupPattern { + static_prefix: dr.static_prefix.clone(), + pattern: dr.pattern.clone(), + }) + .collect(); + + // Build RuleTable with rule metadata + let mut modules_summary = Vec::with_capacity(modules.len()); + let mut rules_by_path: BTreeMap = BTreeMap::new(); + + for (module_idx, module_ref) in modules.iter().enumerate() { + let module = module_ref.as_ref(); + let module_idx_u32 = module_idx as u32; + let module_path = get_path_string(module.package.refr.as_ref(), Some("data")) + .unwrap_or_else(|_| "data".to_owned()); + let source_name = module.package.span.source.get_path().clone(); + + let module_rules: &[RuleAnalysis] = state + .rule_info + .get(module_idx) + .map(Vec::as_slice) + .unwrap_or_default(); + + let mut rule_paths: Vec = Vec::new(); + let mut rule_summaries: Vec = Vec::new(); + + for (rule_idx, rule_ref) in module.policy.iter().enumerate() { + let rule = rule_ref.as_ref(); + let analysis = module_rules + .get(rule_idx) + .cloned() + .unwrap_or_else(RuleAnalysis::default); + + let head_expr = rule_head_expression(rule); + let head_expr_idx = head_expr.map(|expr| expr.eidx()); + let head_value_expr_idx = rule_head_value_expr_idx(rule); + let rule_path = head_expr + .and_then(|refr| get_path_string(refr.as_ref(), Some(&module_path)).ok()) + .unwrap_or_else(|| unknown_rule_path(&module_path, rule_idx)); + + let specializations: Vec = state + .function_rule_specializations + .get(&(module_idx_u32, rule_idx)) + .map(|specs| specs.values().cloned().collect()) + .unwrap_or_default(); + + let rule_dependencies = analysis + .rule_dependencies + .iter() + .map(|target| DependencyEdge { + source: rule_path.clone(), + target: target.clone(), + kind: DependencyKind::StaticCall, + }) + .collect(); + + let head_fact = head_expr_idx + .and_then(|idx| state.lookup.get_expr(module_idx_u32, idx)) + .cloned(); + + let constant_value = match &analysis.constant_state { + RuleConstantState::Done(value) => Some(value.clone()), + _ => None, + }; + + let aggregated_head_fact = aggregate_definition_head_fact( + module_idx_u32, + head_expr_idx, + head_value_expr_idx, + &head_fact, + &specializations, + ); + let aggregated_parameter_facts = + aggregate_definition_parameter_facts(&specializations); + + // Extract span information from rule + let head_span = head_expr.map(|expr| { + let span = expr.span(); + rules::SourceSpan { + file: span.source.get_path().clone(), + line: span.line, + col: span.col, + } + }); + + let definition_span = rule.span().clone(); + let def_span = Some(rules::SourceSpan { + file: definition_span.source.get_path().clone(), + line: definition_span.line, + col: definition_span.col, + }); + + let bodies_summary = match rule { + Rule::Spec { head, bodies, .. } => bodies + .iter() + .enumerate() + .map(|(body_idx, body)| { + let span = body.span.clone(); + let span = rules::SourceSpan { + file: span.source.get_path().clone(), + line: span.line, + col: span.col, + }; + + let value_expr_idx = + body.assign.as_ref().map(|assign| assign.value.eidx()).or( + if body_idx == 0 { + head_value_expr_idx + } else { + None + }, + ); + + let mut value_fact = value_expr_idx + .and_then(|idx| state.lookup.get_expr(module_idx_u32, idx)) + .cloned() + .or_else(|| { + value_expr_idx.and_then(|idx| { + specializations.iter().find_map(|spec| { + spec.expr_facts + .get(&module_idx_u32) + .and_then(|exprs| exprs.get(&idx).cloned()) + }) + }) + }); + + if value_fact.is_none() + && body_idx == 0 + && rule_returns_boolean_by_default(head) + { + value_fact = Some(default_true_body_fact()); + } + + let is_constant = is_constant_body(value_fact.as_ref(), body); + + rules::RuleBodySummary { + body_idx, + kind: if body_idx == 0 { + rules::RuleBodyKind::Primary + } else { + rules::RuleBodyKind::Else + }, + span: Some(span), + value_expr_idx, + value_fact, + is_constant, + } + }) + .collect(), + Rule::Default { .. } => Vec::new(), + }; + + let definition = DefinitionSummary { + definition_idx: rule_idx, + module_idx: module_idx_u32, + span: def_span, + analysis: analysis.clone(), + head_fact: head_fact.clone(), + aggregated_head_fact: aggregated_head_fact.clone(), + aggregated_parameter_facts: aggregated_parameter_facts.clone(), + bodies: bodies_summary, + constant_value: constant_value.clone(), + specializations: specializations.clone(), + ..DefinitionSummary::default() + }; + let definitions = vec![definition]; + let rule_aggregated_head_fact = aggregate_rule_head_fact(&definitions); + let rule_aggregated_parameter_facts = aggregate_rule_parameter_facts(&definitions); + + let mut rule_summary = RuleSummary { + id: rule_path.clone(), + module_idx: module_idx_u32, + head_span, + definitions, + kind: classify_rule(rule), + arity: rule_arity(rule), + head_expr: head_expr_idx, + constant_state: analysis.constant_state.clone(), + input_dependencies: analysis.input_dependencies.clone(), + rule_dependencies, + aggregated_head_fact: rule_aggregated_head_fact, + aggregated_parameter_facts: rule_aggregated_parameter_facts, + specializations, + ..RuleSummary::default() + }; + + rule_summary.trace = None; // traces populated elsewhere + + rule_paths.push(rule_path.clone()); + rules_by_path.insert(rule_path.clone(), rule_summary.clone()); + rule_summaries.push(rule_summary); + } + + modules_summary.push(ModuleSummary { + module_idx: module_idx_u32, + package_path: module_path, + source_name, + rule_paths, + rules: rule_summaries, + }); + } + + let rules = RuleTable { + by_path: rules_by_path, + modules: modules_summary, + }; + + // Build DependencyGraph (empty for now) + let dependencies = DependencyGraph::default(); + + // Build EntrypointSummary from state fields + let entrypoints = EntrypointSummary { + requested: state.requested_entrypoints, + reachable, + included_defaults: state.included_defaults, + dynamic_refs, + }; + + TypeAnalysisResult { + expressions, + rules, + dependencies, + entrypoints, + diagnostics: state.diagnostics, + } + } +} + +fn rule_head_expression(rule: &Rule) -> Option<&crate::ast::Ref> { + match rule { + Rule::Spec { head, .. } => match head { + RuleHead::Compr { refr, .. } + | RuleHead::Set { refr, .. } + | RuleHead::Func { refr, .. } => Some(refr), + }, + Rule::Default { refr, .. } => Some(refr), + } +} + +fn rule_head_value_expr_idx(rule: &Rule) -> Option { + match rule { + Rule::Spec { head, .. } => match head { + RuleHead::Compr { assign, .. } | RuleHead::Func { assign, .. } => { + assign.as_ref().map(|assign| assign.value.eidx()) + } + RuleHead::Set { .. } => None, + }, + Rule::Default { value, .. } => Some(value.eidx()), + } +} + +fn classify_rule(rule: &Rule) -> RuleKind { + match rule { + Rule::Spec { head, .. } => match head { + RuleHead::Func { .. } => RuleKind::Function, + RuleHead::Set { .. } => RuleKind::PartialSet, + RuleHead::Compr { .. } => RuleKind::PartialObject, + }, + Rule::Default { .. } => RuleKind::Complete, + } +} + +fn rule_arity(rule: &Rule) -> Option { + match rule { + Rule::Spec { + head: RuleHead::Func { args, .. }, + .. + } => Some(args.len()), + Rule::Default { args, .. } if !args.is_empty() => Some(args.len()), + _ => None, + } +} + +fn unknown_rule_path(module_path: &str, rule_idx: usize) -> String { + format!("{module_path}::") +} + +fn aggregate_definition_head_fact( + module_idx: u32, + head_expr_idx: Option, + head_value_expr_idx: Option, + head_fact: &Option, + specializations: &[RuleSpecializationRecord], +) -> Option { + let mut all_facts: Vec = Vec::new(); + + if let Some(fact) = head_fact { + all_facts.push(fact.clone()); + } + + for spec in specializations { + if let Some(fact) = &spec.head_fact { + all_facts.push(fact.clone()); + } + + if spec + .head_fact + .as_ref() + .map(fact_is_informative) + .unwrap_or(false) + { + continue; + } + + if let Some(module_map) = spec.expr_facts.get(&module_idx) { + if let Some(expr_idx) = head_value_expr_idx { + if let Some(expr_fact) = module_map.get(&expr_idx) { + all_facts.push(expr_fact.clone()); + continue; + } + } + + if let Some(expr_idx) = head_expr_idx { + if let Some(expr_fact) = module_map.get(&expr_idx) { + all_facts.push(expr_fact.clone()); + } + } + } + } + + if all_facts.is_empty() { + return None; + } + + let informative: Vec = all_facts + .iter() + .filter(|fact| fact_is_informative(fact)) + .cloned() + .collect(); + + if !informative.is_empty() { + Some(TypeAnalyzer::merge_rule_facts(&informative)) + } else { + Some(TypeAnalyzer::merge_rule_facts(&all_facts)) + } +} + +fn aggregate_definition_parameter_facts( + specializations: &[RuleSpecializationRecord], +) -> Vec> { + let max_params = specializations + .iter() + .map(|spec| spec.parameter_facts.len()) + .max() + .unwrap_or(0); + + let mut merged: Vec> = Vec::with_capacity(max_params); + + for index in 0..max_params { + let mut slot_facts = Vec::new(); + for spec in specializations { + if let Some(fact) = spec.parameter_facts.get(index) { + slot_facts.push(fact.clone()); + } + } + + if slot_facts.is_empty() { + merged.push(None); + continue; + } + + let informative: Vec = slot_facts + .iter() + .filter(|fact| fact_is_informative(fact)) + .cloned() + .collect(); + + if !informative.is_empty() { + merged.push(Some(TypeAnalyzer::merge_rule_facts(&informative))); + } else { + merged.push(Some(TypeAnalyzer::merge_rule_facts(&slot_facts))); + } + } + + merged +} + +fn aggregate_rule_head_fact(definitions: &[DefinitionSummary]) -> Option { + let mut all_facts: Vec = Vec::new(); + for definition in definitions { + if let Some(fact) = &definition.aggregated_head_fact { + all_facts.push(fact.clone()); + } + } + + if all_facts.is_empty() { + return None; + } + + let informative: Vec = all_facts + .iter() + .filter(|fact| fact_is_informative(fact)) + .cloned() + .collect(); + + if !informative.is_empty() { + Some(TypeAnalyzer::merge_rule_facts(&informative)) + } else { + Some(TypeAnalyzer::merge_rule_facts(&all_facts)) + } +} + +fn aggregate_rule_parameter_facts(definitions: &[DefinitionSummary]) -> Vec> { + let max_params = definitions + .iter() + .map(|definition| definition.aggregated_parameter_facts.len()) + .max() + .unwrap_or(0); + + let mut merged: Vec> = Vec::with_capacity(max_params); + + for index in 0..max_params { + let mut slot_facts = Vec::new(); + for definition in definitions { + if let Some(fact) = definition + .aggregated_parameter_facts + .get(index) + .and_then(|opt| opt.clone()) + { + slot_facts.push(fact); + } + } + + if slot_facts.is_empty() { + merged.push(None); + continue; + } + + let informative: Vec = slot_facts + .iter() + .filter(|fact| fact_is_informative(fact)) + .cloned() + .collect(); + + if !informative.is_empty() { + merged.push(Some(TypeAnalyzer::merge_rule_facts(&informative))); + } else { + merged.push(Some(TypeAnalyzer::merge_rule_facts(&slot_facts))); + } + } + + merged +} + +fn fact_is_informative(fact: &TypeFact) -> bool { + match &fact.descriptor { + TypeDescriptor::Structural(ty) => !structural_contains_unknownish(ty), + _ => true, + } +} + +fn structural_contains_unknownish(ty: &StructuralType) -> bool { + match ty { + StructuralType::Any | StructuralType::Unknown => true, + StructuralType::Union(variants) => variants.iter().any(structural_contains_unknownish), + _ => false, + } +} diff --git a/src/type_analysis/result/rules.rs b/src/type_analysis/result/rules.rs new file mode 100644 index 00000000..76f226ea --- /dev/null +++ b/src/type_analysis/result/rules.rs @@ -0,0 +1,151 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Rule-centric result structures. + +use alloc::collections::BTreeMap; +use alloc::format; +use alloc::string::String; +use alloc::vec::Vec; + +use crate::type_analysis::model::{ + RuleAnalysis, RuleConstantState, RuleSpecializationSignature, SourceOrigin, TypeFact, +}; +use crate::value::Value; + +use super::deps::DependencyEdge; + +/// Source location for a rule or definition +#[derive(Clone, Debug, Default)] +pub struct SourceSpan { + pub file: String, + pub line: u32, + pub col: u32, +} + +impl SourceSpan { + pub fn format(&self) -> String { + format!("{}:{}:{}", self.file, self.line, self.col) + } +} + +/// Top-level storage for all analysed rules. +#[derive(Clone, Debug, Default)] +pub struct RuleTable { + pub by_path: BTreeMap, + pub modules: Vec, +} + +/// Metadata about a source module. +#[derive(Clone, Debug, Default)] +pub struct ModuleSummary { + pub module_idx: u32, + pub package_path: String, + pub source_name: String, + pub rule_paths: Vec, + /// Full summaries for each rule in this module (aligned with rule_paths). + pub rules: Vec, +} + +/// Aggregated view of a logical rule head. +#[derive(Clone, Debug, Default)] +pub struct RuleSummary { + pub id: String, + pub module_idx: u32, + pub head_span: Option, + pub definitions: Vec, + pub kind: RuleKind, + pub arity: Option, + pub head_expr: Option, + pub constant_state: RuleConstantState, + pub input_dependencies: Vec, + pub rule_dependencies: Vec, + pub aggregated_head_fact: Option, + pub aggregated_parameter_facts: Vec>, + pub specializations: Vec, + pub trace: Option, +} + +/// Data captured for a concrete rule definition (individual body). +#[derive(Clone, Debug, Default)] +pub struct DefinitionSummary { + pub definition_idx: usize, + pub module_idx: u32, + pub span: Option, + pub analysis: RuleAnalysis, + pub head_fact: Option, + pub aggregated_head_fact: Option, + pub aggregated_parameter_facts: Vec>, + pub bodies: Vec, + pub constant_value: Option, + pub specializations: Vec, + pub trace: Option, +} + +/// Summary of a single rule body (primary or else clause). +#[derive(Clone, Debug, Default)] +pub struct RuleBodySummary { + pub body_idx: usize, + pub kind: RuleBodyKind, + pub span: Option, + pub value_expr_idx: Option, + pub value_fact: Option, + pub is_constant: bool, +} + +/// Distinguishes the main body from `else` bodies within a definition. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub enum RuleBodyKind { + #[default] + Primary, + Else, +} + +/// Classification for rule heads. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub enum RuleKind { + #[default] + Complete, + PartialSet, + PartialObject, + Function, +} + +/// Specialization record captured for function rules. +#[derive(Clone, Debug)] +pub struct RuleSpecializationRecord { + pub signature: RuleSpecializationSignature, + pub parameter_facts: Vec, + pub head_fact: Option, + pub constant_value: Option, + pub expr_facts: BTreeMap>, + pub trace: Option, +} + +/// Verbose trace for full rule or specialization evaluation. +#[derive(Clone, Debug, Default)] +pub struct RuleVerboseInfo { + pub locals: Vec, + pub statements: Vec, +} + +/// Captured locals for verbose output. +#[derive(Clone, Debug, Default)] +pub struct TraceLocal { + pub name: String, + pub fact: Option, +} + +/// Statement summary with associated fact lines. +#[derive(Clone, Debug, Default)] +pub struct TraceStatement { + pub summary: String, + pub fact_lines: Vec, +} + +/// Trace captured for an individual specialization run. +#[derive(Clone, Debug, Default)] +pub struct RuleSpecializationTrace { + pub locals: Vec, + pub statements: Vec, +} diff --git a/src/type_analysis/value_utils.rs b/src/type_analysis/value_utils.rs new file mode 100644 index 00000000..6f464752 --- /dev/null +++ b/src/type_analysis/value_utils.rs @@ -0,0 +1,212 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Utilities for converting between runtime Values and type analysis facts. + +use alloc::borrow::ToOwned; +use alloc::boxed::Box; +use alloc::collections::BTreeMap; +use alloc::vec::Vec; + +use crate::value::Value; + +use super::model::{ + ConstantValue, StructuralObjectShape, StructuralType, TypeDescriptor, TypeFact, TypeProvenance, +}; + +/// Converts a runtime Value into a TypeFact with constant information. +/// This is used when the interpreter evaluates a rule in constant-folding mode +/// and we need to bridge the result back to the type analyzer. +pub(crate) fn value_to_type_fact(value: &Value) -> TypeFact { + let structural_type = infer_structural_type(value); + + TypeFact::new( + TypeDescriptor::Structural(structural_type), + TypeProvenance::Propagated, + ) + .with_constant(ConstantValue::Known(value.clone())) +} + +/// Infers the structural type from a runtime value. +fn infer_structural_type(value: &Value) -> StructuralType { + match value { + Value::Null => StructuralType::Null, + Value::Bool(_) => StructuralType::Boolean, + Value::Number(n) => { + if n.is_integer() { + StructuralType::Integer + } else { + StructuralType::Number + } + } + Value::String(_) => StructuralType::String, + Value::Array(items) => { + // Try to infer a common element type + let element_type = if items.is_empty() { + StructuralType::Any + } else { + // Find the most specific common type among all elements + let types: Vec<_> = items.iter().map(infer_structural_type).collect(); + unify_types(&types) + }; + StructuralType::Array(Box::new(element_type)) + } + Value::Set(items) => { + let element_type = if items.is_empty() { + StructuralType::Any + } else { + let types: Vec<_> = items.iter().map(infer_structural_type).collect(); + unify_types(&types) + }; + StructuralType::Set(Box::new(element_type)) + } + Value::Object(fields) => { + // Capture string-keyed fields; fall back to a generic object if we encounter + // non-string keys since we cannot statically represent them in a structural shape. + let mut shape_fields = BTreeMap::new(); + for (key, value) in fields.iter() { + let Value::String(name) = key else { + return StructuralType::Object(Default::default()); + }; + + let field_type = infer_structural_type(value); + shape_fields.insert(name.as_ref().to_owned(), field_type); + } + + StructuralType::Object(StructuralObjectShape { + fields: shape_fields, + }) + } + Value::Undefined => StructuralType::Unknown, + } +} + +/// Unifies multiple structural types into a single common type. +/// If all types are the same, returns that type. +/// Otherwise, returns a Union or Any as appropriate. +fn unify_types(types: &[StructuralType]) -> StructuralType { + if types.is_empty() { + return StructuralType::Unknown; + } + + // Check if all types are identical (treat Unknown as neutral when Any present) + let first = &types[0]; + if types.iter().all(|t| t == first) { + return first.clone(); + } + + // Check if all types are numeric (Number or Integer) + let all_numeric = types + .iter() + .all(|t| matches!(t, StructuralType::Number | StructuralType::Integer)); + if all_numeric { + // If any is Number, the union is Number; otherwise Integer + if types.iter().any(|t| matches!(t, StructuralType::Number)) { + return StructuralType::Number; + } + return StructuralType::Integer; + } + + // Otherwise, create a union + StructuralType::Union(types.to_vec()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::value::Value; + use alloc::collections::BTreeMap; + use alloc::vec; + + #[test] + fn test_value_to_type_fact_null() { + let value = Value::Null; + let fact = value_to_type_fact(&value); + assert!(matches!( + fact.descriptor, + TypeDescriptor::Structural(StructuralType::Null) + )); + assert!(matches!(fact.constant, ConstantValue::Known(_))); + } + + #[test] + fn test_value_to_type_fact_bool() { + let value = Value::Bool(true); + let fact = value_to_type_fact(&value); + assert!(matches!( + fact.descriptor, + TypeDescriptor::Structural(StructuralType::Boolean) + )); + } + + #[test] + fn test_value_to_type_fact_integer() { + let value = Value::from(42); + let fact = value_to_type_fact(&value); + assert!(matches!( + fact.descriptor, + TypeDescriptor::Structural(StructuralType::Integer) + )); + } + + #[test] + fn test_value_to_type_fact_string() { + let value = Value::String("test".into()); + let fact = value_to_type_fact(&value); + assert!(matches!( + fact.descriptor, + TypeDescriptor::Structural(StructuralType::String) + )); + } + + #[test] + fn test_value_to_type_fact_empty_array() { + let value = Value::from(vec![Value::Null; 0]); + let fact = value_to_type_fact(&value); + assert!(matches!( + fact.descriptor, + TypeDescriptor::Structural(StructuralType::Array(_)) + )); + } + + #[test] + fn test_value_to_type_fact_homogeneous_array() { + let value = Value::from(vec![Value::from(1), Value::from(2), Value::from(3)]); + let fact = value_to_type_fact(&value); + if let TypeDescriptor::Structural(StructuralType::Array(elem_type)) = &fact.descriptor { + assert!(matches!(**elem_type, StructuralType::Integer)); + } else { + panic!("Expected array type"); + } + } + + #[test] + fn test_value_to_type_fact_object_with_string_keys() { + let mut fields = BTreeMap::new(); + fields.insert(Value::from("value"), Value::from(1)); + + let value = Value::from(fields); + let fact = value_to_type_fact(&value); + + if let TypeDescriptor::Structural(StructuralType::Object(shape)) = &fact.descriptor { + let field = shape.fields.get("value").expect("missing field"); + assert!(matches!(field, StructuralType::Integer)); + } else { + panic!("Expected object type"); + } + } + + #[test] + fn test_unify_types_same() { + let types = vec![StructuralType::Integer, StructuralType::Integer]; + let unified = unify_types(&types); + assert!(matches!(unified, StructuralType::Integer)); + } + + #[test] + fn test_unify_types_numeric() { + let types = vec![StructuralType::Integer, StructuralType::Number]; + let unified = unify_types(&types); + assert!(matches!(unified, StructuralType::Number)); + } +} diff --git a/src/type_checker.rs b/src/type_checker.rs new file mode 100644 index 00000000..fa22d618 --- /dev/null +++ b/src/type_checker.rs @@ -0,0 +1,358 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! High-level type checker for Regorus policies. +//! +//! This module provides a convenient API for running type analysis on Rego policies +//! and querying the results. It's designed to be used both standalone and integrated +//! with the Engine. + +use crate::ast::{Module, Ref}; +use crate::compiler::hoist::HoistedLoopsLookup; +use crate::schema::Schema; +use crate::type_analysis::{TypeAnalysisOptions, TypeAnalysisResult, TypeAnalyzer}; +use crate::value::Value; +use crate::Rc; + +use alloc::string::String; +use alloc::vec::Vec; +use anyhow::Result; + +/// High-level type checker for Rego policies. +/// +/// The TypeChecker provides a convenient interface for performing type analysis +/// on Rego policies. It handles the necessary preparation steps (like loop hoisting) +/// automatically and caches results for efficiency. +/// +/// # Example +/// +/// ```no_run +/// # use regorus::*; +/// # fn main() -> anyhow::Result<()> { +/// let mut engine = Engine::new(); +/// engine.add_policy( +/// "policy.rego".to_string(), +/// r#" +/// package example +/// allow = input.user == "admin" +/// "#.to_string() +/// )?; +/// +/// let modules = engine.get_modules(); +/// let modules = Rc::new(modules.clone()); +/// let mut type_checker = TypeChecker::new(modules); +/// +/// // Optionally set input schema +/// #[cfg(feature = "jsonschema")] +/// { +/// let input_schema = Schema::from_json_str( +/// r#"{"type": "object", "properties": {"user": {"type": "string"}}}"# +/// ).map_err(|e| anyhow::anyhow!("{e}"))?; +/// type_checker.set_input_schema(input_schema); +/// } +/// +/// // Run type analysis +/// type_checker.check()?; +/// +/// // Query results +/// if let Some(result) = type_checker.get_result() { +/// println!("Found {} diagnostics", result.diagnostics.len()); +/// } +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Clone)] +pub struct TypeChecker { + modules: Rc>>, + input_schema: Option, + data_schema: Option, + loop_lookup: Option>, + entrypoints: Option>, + result: Option, +} + +impl TypeChecker { + /// Create a new TypeChecker for the given modules. + /// + /// # Arguments + /// + /// * `modules` - The parsed Rego policy modules to analyze + pub fn new(modules: Rc>>) -> Self { + Self { + modules, + input_schema: None, + data_schema: None, + loop_lookup: None, + entrypoints: None, + result: None, + } + } + + /// Set the input schema for type analysis. + /// + /// The input schema is used to provide more precise type information + /// for expressions that reference the `input` document. + /// + /// # Arguments + /// + /// * `schema` - JSON Schema describing the structure of the input document + pub fn set_input_schema(&mut self, schema: Schema) { + self.input_schema = Some(schema); + // Invalidate cached result since schema changed + self.result = None; + } + + /// Set the data schema for type analysis. + /// + /// The data schema is used to provide more precise type information + /// for expressions that reference the `data` document. + /// + /// # Arguments + /// + /// * `schema` - JSON Schema describing the structure of the data document + pub fn set_data_schema(&mut self, schema: Schema) { + self.data_schema = Some(schema); + // Invalidate cached result since schema changed + self.result = None; + } + + /// Get the input schema if one has been set. + pub fn get_input_schema(&self) -> Option<&Schema> { + self.input_schema.as_ref() + } + + /// Get the data schema if one has been set. + pub fn get_data_schema(&self) -> Option<&Schema> { + self.data_schema.as_ref() + } + + /// Set entrypoints for filtered type analysis. + /// + /// When entrypoints are set, type analysis will only process rules + /// reachable from the specified paths. + /// + /// # Arguments + /// + /// * `entrypoints` - List of rule paths (e.g., "data.package.rule") + pub fn set_entrypoints(&mut self, entrypoints: Vec) { + self.entrypoints = Some(entrypoints); + // Invalidate cached result since entrypoints changed + self.result = None; + } + + /// Get the entrypoints if any have been set. + pub fn get_entrypoints(&self) -> Option<&[String]> { + self.entrypoints.as_deref() + } + + /// Run type checking/analysis on the policies. + /// + /// This method performs the following steps: + /// 1. Runs loop hoisting if not already done (to extract output expressions) + /// 2. Runs type analysis with the configured schemas + /// 3. Caches the results for subsequent queries + /// + /// Returns an error if hoisting or type analysis fails. + pub fn check(&mut self) -> Result<()> { + // Run hoister if not already done + if self.loop_lookup.is_none() { + let hoister = crate::compiler::hoist::LoopHoister::new(); + let lookup = hoister.populate(&self.modules)?; + self.loop_lookup = Some(Rc::new(lookup)); + } + + // Prepare type analysis options + let options = TypeAnalysisOptions { + input_schema: self.input_schema.clone(), + data_schema: self.data_schema.clone(), + loop_lookup: self.loop_lookup.clone(), + entrypoints: self.entrypoints.clone(), + disable_function_generic_pass: true, + }; + + // Run type analysis + let analyzer = TypeAnalyzer::new(&self.modules, None, options); + let result = analyzer.analyze_modules(); + + // Cache the result + self.result = Some(result); + + Ok(()) + } + + /// Get the type analysis result. + /// + /// Returns `None` if type checking hasn't been run yet via [`check()`](Self::check). + pub fn get_result(&self) -> Option<&TypeAnalysisResult> { + self.result.as_ref() + } + + /// Get the hoisted loops lookup. + /// + /// This is useful for advanced use cases that need to access the + /// output expressions and scope contexts from the hoister. + /// + /// Returns `None` if type checking hasn't been run yet. + pub fn get_loop_lookup(&self) -> Option<&Rc> { + self.loop_lookup.as_ref() + } + + /// Check if there are any type errors in the analysis result. + /// + /// Returns `None` if type checking hasn't been run yet. + pub fn has_errors(&self) -> Option { + self.result.as_ref().map(|r| !r.diagnostics.is_empty()) + } + + /// Get the number of diagnostics found. + /// + /// Returns `None` if type checking hasn't been run yet. + pub fn diagnostic_count(&self) -> Option { + self.result.as_ref().map(|r| r.diagnostics.len()) + } + + /// Get the type of a specific rule by its path. + /// + /// # Arguments + /// + /// * `module_idx` - The index of the module containing the rule + /// * `rule_name` - The name of the rule (e.g., "allow", "violation") + /// + /// Returns the type descriptor for the rule if found and type checking has been run. + pub fn get_rule_type( + &self, + _module_idx: usize, + _rule_name: &str, + ) -> Option<&crate::type_analysis::TypeDescriptor> { + // This is a simplified implementation - a full implementation would + // need to look up the rule by name in the module and get its type from the result + self.result.as_ref()?; + // TODO: Implement rule name lookup + None + } + + /// Get a constant value for a rule if it was determined to be constant. + /// + /// # Arguments + /// + /// * `module_idx` - The index of the module containing the rule + /// * `rule_idx` - The index of the rule within the module + /// + /// Returns the constant value if the rule is constant and type checking has been run. + pub fn get_rule_constant(&self, module_idx: usize, rule_idx: usize) -> Option<&Value> { + let result = self.result.as_ref()?; + // Note: For now we return None since RuleTable is not yet populated. + // Once RuleTable is fully implemented, this will use: + // let module_summary = result.rules.modules.get(module_idx)?; + // let rule_path = module_summary.rule_paths.get(rule_idx)?; + // let rule_summary = result.rules.by_path.get(rule_path)?; + // match &rule_summary.constant_state { ... } + let _ = (result, module_idx, rule_idx); + None + } + + /// Clear all cached results and force re-analysis on next check. + pub fn invalidate(&mut self) { + self.result = None; + self.loop_lookup = None; + } + + /// Update the modules being analyzed. + /// + /// This invalidates all cached results. + pub fn set_modules(&mut self, modules: Rc>>) { + self.modules = modules; + self.invalidate(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::*; + + #[test] + fn test_type_checker_basic() -> Result<()> { + let mut engine = Engine::new(); + engine.add_policy( + "test.rego".to_string(), + r#" + package test + allow = true + "# + .to_string(), + )?; + + let modules = Rc::new(engine.get_modules().clone()); + let mut checker = TypeChecker::new(modules); + + checker.check()?; + + assert!(checker.get_result().is_some()); + assert_eq!(checker.has_errors(), Some(false)); + assert_eq!(checker.diagnostic_count(), Some(0)); + + Ok(()) + } + + #[cfg(feature = "jsonschema")] + #[test] + fn test_type_checker_with_schema() -> Result<()> { + let mut engine = Engine::new(); + engine.add_policy( + "test.rego".to_string(), + r#" + package test + allow = input.value > 10 + "# + .to_string(), + )?; + + let modules = Rc::new(engine.get_modules().clone()); + let mut checker = TypeChecker::new(modules); + + let schema = Schema::from_json_str( + r#"{"type": "object", "properties": {"value": {"type": "integer"}}}"#, + ) + .map_err(|e| anyhow::anyhow!("{}", e))?; + checker.set_input_schema(schema); + + checker.check()?; + + assert!(checker.get_result().is_some()); + + Ok(()) + } + + #[cfg(feature = "jsonschema")] + #[test] + fn test_type_checker_invalidation() -> Result<()> { + let mut engine = Engine::new(); + engine.add_policy( + "test.rego".to_string(), + r#" + package test + x = 1 + "# + .to_string(), + )?; + + let modules = Rc::new(engine.get_modules().clone()); + let mut checker = TypeChecker::new(modules); + + checker.check()?; + assert!(checker.get_result().is_some()); + + // Setting schema should invalidate + let schema = + Schema::from_json_str(r#"{"type": "object"}"#).map_err(|e| anyhow::anyhow!("{}", e))?; + checker.set_input_schema(schema); + assert!(checker.get_result().is_none()); + + // Check again to rebuild cache + checker.check()?; + assert!(checker.get_result().is_some()); + + Ok(()) + } +} diff --git a/src/utils.rs b/src/utils.rs index e0ca2d57..97980adf 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +pub mod path; + use crate::ast::*; use crate::builtins::*; use crate::lexer::*; @@ -15,7 +17,10 @@ pub fn get_path_string(refr: &Expr, document: Option<&str>) -> Result { while expr.is_some() { match expr { Some(Expr::RefDot { refr, field, .. }) => { - comps.push(field.0.text()); + let (field_span, _) = field + .as_ref() + .ok_or_else(|| refr.span().error("incomplete reference"))?; + comps.push(field_span.text()); expr = Some(refr); } Some(Expr::RefBrack { refr, index, .. }) => { diff --git a/src/utils/path.rs b/src/utils/path.rs new file mode 100644 index 00000000..b656e083 --- /dev/null +++ b/src/utils/path.rs @@ -0,0 +1,170 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Path manipulation utilities shared between analyzer and compiler + +use alloc::string::{String, ToString}; +use alloc::vec::Vec; + +/// Component of a reference chain +#[derive(Debug, Clone)] +pub enum AccessComponent { + /// Static field access (e.g., .field_name) + Field(String), + /// Dynamic access (e.g., [expr]) + Dynamic, +} + +/// Represents a chained reference like data.a.b[expr].c +#[derive(Debug, Clone)] +pub struct ReferenceChain { + /// Root variable (e.g., "data", "input", "local_var") + pub root: String, + /// Chain of field accesses + pub components: Vec, +} + +impl ReferenceChain { + /// Get the static prefix path (all literal components from the start) + pub fn static_prefix(&self) -> Vec<&str> { + let mut prefix = alloc::vec![self.root.as_str()]; + for component in &self.components { + match component { + AccessComponent::Field(field) => prefix.push(field.as_str()), + AccessComponent::Dynamic => break, + } + } + prefix + } + + /// Check if the chain is fully static (no dynamic components) + #[allow(dead_code)] + pub fn is_fully_static(&self) -> bool { + self.components + .iter() + .all(|c| matches!(c, AccessComponent::Field(_))) + } + + /// Get full path as string (for static chains) + #[allow(dead_code)] + pub fn to_path_string(&self) -> Option { + if !self.is_fully_static() { + return None; + } + let mut parts = alloc::vec![self.root.as_str()]; + for component in &self.components { + if let AccessComponent::Field(field) = component { + parts.push(field.as_str()); + } + } + Some(parts.join(".")) + } +} + +/// Parse a rule path string into components +#[allow(dead_code)] +pub fn parse_rule_path(path: &str) -> Vec { + path.split('.').map(|s| s.to_string()).collect() +} + +/// Check if a rule path matches a pattern with wildcards +/// e.g., "data.test.users.alice" matches "data.test.users.*" +pub fn matches_path_pattern(rule_path: &str, pattern: &str) -> bool { + if !pattern.contains('*') { + // Exact match or prefix + return rule_path == pattern || rule_path.starts_with(&alloc::format!("{}.", pattern)); + } + + let rule_parts: Vec<&str> = rule_path.split('.').collect(); + let pattern_parts: Vec<&str> = pattern.split('.').collect(); + + let match_length = rule_parts.len().min(pattern_parts.len()); + + for i in 0..match_length { + let rule_part = rule_parts[i]; + let pattern_part = pattern_parts[i]; + + if pattern_part == "*" { + if rule_part.is_empty() { + return false; + } + } else if rule_part != pattern_part { + return false; + } + } + + rule_parts.len() >= pattern_parts.len() + || pattern_parts[match_length..].iter().all(|&p| p == "*") +} + +/// Normalize a rule path (remove redundant "data." prefix if present twice) +pub fn normalize_rule_path(path: &str) -> String { + // Handle cases like "data.data.package.rule" + if path.starts_with("data.data.") { + path.strip_prefix("data.").unwrap().to_string() + } else { + path.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_matches_path_pattern() { + assert!(matches_path_pattern( + "data.test.users.alice", + "data.test.users.*" + )); + assert!(matches_path_pattern( + "data.test.users.alice", + "data.*.*.alice" + )); + assert!(!matches_path_pattern( + "data.test.admin", + "data.test.users.*" + )); + assert!(matches_path_pattern("data.pkg.rule", "data.pkg.rule")); + } + + #[test] + fn test_static_prefix() { + let chain = ReferenceChain { + root: "data".to_string(), + components: alloc::vec![ + AccessComponent::Field("a".to_string()), + AccessComponent::Field("b".to_string()), + AccessComponent::Dynamic, + AccessComponent::Field("c".to_string()), + ], + }; + assert_eq!(chain.static_prefix(), alloc::vec!["data", "a", "b"]); + assert!(!chain.is_fully_static()); + } + + #[test] + fn test_fully_static_chain() { + let chain = ReferenceChain { + root: "input".to_string(), + components: alloc::vec![ + AccessComponent::Field("user".to_string()), + AccessComponent::Field("name".to_string()), + ], + }; + assert!(chain.is_fully_static()); + assert_eq!(chain.to_path_string(), Some("input.user.name".to_string())); + } + + #[test] + fn test_normalize_rule_path() { + assert_eq!( + normalize_rule_path("data.data.package.rule"), + "data.package.rule" + ); + assert_eq!( + normalize_rule_path("data.package.rule"), + "data.package.rule" + ); + } +} diff --git a/tests/aci/data.schema.json b/tests/aci/data.schema.json new file mode 100644 index 00000000..c8da384a --- /dev/null +++ b/tests/aci/data.schema.json @@ -0,0 +1,18 @@ +{ + "type": "object", + "properties": { + "metadata": { + "type": "object", + "properties": { + "devices": { + "type": "object", + "additionalProperties": { "type": "string" } + } + }, + "required": ["devices"], + "additionalProperties": false + } + }, + "required": ["metadata"], + "additionalProperties": false +} diff --git a/tests/aci/input.schema.json b/tests/aci/input.schema.json new file mode 100644 index 00000000..a2592509 --- /dev/null +++ b/tests/aci/input.schema.json @@ -0,0 +1,85 @@ +{ + "type": "object", + "properties": { + "deviceHash": { "type": "string" }, + "target": { "type": "string" }, + "containerID": { "type": "string" }, + "layerPaths": { + "type": "array", + "items": { "type": "string" } + }, + "encrypted": { "type": "boolean" }, + "argList": { + "type": "array", + "items": { "type": "string" } + }, + "capabilities": { + "type": "object", + "properties": { + "ambient": { "type": "array", "items": { "type": "string" } }, + "bounding": { "type": "array", "items": { "type": "string" } }, + "effective": { "type": "array", "items": { "type": "string" } }, + "inheritable": { "type": "array", "items": { "type": "string" } }, + "permitted": { "type": "array", "items": { "type": "string" } } + }, + "additionalProperties": false + }, + "envList": { + "type": "array", + "items": { "type": "string" } + }, + "groups": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { "type": "integer" }, + "name": { "type": "string" } + }, + "required": ["id", "name"], + "additionalProperties": false + } + }, + "hugePagesDir": { "type": "string" }, + "isInitProcess": { "type": "boolean" }, + "signal": { "type": "integer" }, + "rootPrefix": { "type": "string" }, + "mountPathPrefix": { "type": "string" }, + "rule": { "type": "string" }, + "name": { "type": "string" }, + "mounts": { + "type": "array", + "items": { + "type": "object", + "properties": { + "destination": { "type": "string" }, + "options": { "type": "array", "items": { "type": "string" } }, + "source": { "type": "string" }, + "type": { "type": "string" } + }, + "required": ["destination", "options", "source", "type"], + "additionalProperties": false + } + }, + "noNewPrivileges": { "type": "boolean" }, + "privileged": { "type": "boolean" }, + "seccompProfileSHA256": { "type": "string" }, + "sandboxDir": { "type": "string" }, + "umask": { "type": "string" }, + "user": { + "type": "object", + "properties": { + "id": { "type": "integer" }, + "name": { "type": "string" } + }, + "required": ["id", "name"], + "additionalProperties": false + }, + "workingDir": { "type": "string" }, + "unmountTarget": { "type": "string" }, + "feed": { "type": "string" }, + "issuer": { "type": "string" }, + "namespace": { "type": "string" } + }, + "additionalProperties": false +} diff --git a/tests/parser/mod.rs b/tests/parser/mod.rs index 00a26e63..397cf22d 100644 --- a/tests/parser/mod.rs +++ b/tests/parser/mod.rs @@ -150,7 +150,23 @@ fn match_expr_impl(e: &Expr, v: &Value) -> Result<()> { } => { match_span_opt(span, &v["refdot"]["span"])?; match_expr(refr, &v["refdot"]["refr"])?; - match_span(&field.0, &v["refdot"]["field"]) + match field { + Some((field_span, _)) => match_span(field_span, &v["refdot"]["field"]), + None => { + my_assert_eq!( + Value::Undefined, + v["refdot"]["field"].clone(), + "{}", + span.source.message( + span.line, + span.col, + "match-error", + "expected undefined field entry for dynamic access", + ), + ); + Ok(()) + } + } } Expr::RefBrack { span, refr, index, .. diff --git a/tests/type_analysis/aggregates.yaml b/tests/type_analysis/aggregates.yaml new file mode 100644 index 00000000..d8c996f9 --- /dev/null +++ b/tests/type_analysis/aggregates.yaml @@ -0,0 +1,77 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: array comprehension + modules: + - | + package type_analysis.aggregates + + arr := [x | x := input.values[_]] + input_schema: + type: object + properties: + values: + type: array + items: + type: integer + required: + - values + rules: + - rule: data.type_analysis.aggregates.arr + type: + kind: Array + element: + kind: Integer + provenance: SchemaInput + exprs: + - expr: arr + line: 3 + col: 1 + type: + kind: Array + element: + kind: Integer + provenance: SchemaInput + - expr: "[x | x := input.values[_]]" + line: 3 + col: 8 + type: + kind: Array + element: + kind: Integer + provenance: SchemaInput + - expr: x + line: 3 + col: 9 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: "x := input.values[_]" + line: 3 + col: 15 + type: Boolean + provenance: Assignment + - expr: "input.values[_]" + line: 3 + col: 30 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: input.values + line: 3 + col: 23 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: input + line: 3 + col: 18 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: _ + line: 3 + col: 31 + type: Any + provenance: Unknown diff --git a/tests/type_analysis/builtins.yaml b/tests/type_analysis/builtins.yaml new file mode 100644 index 00000000..1b72f790 --- /dev/null +++ b/tests/type_analysis/builtins.yaml @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: [] diff --git a/tests/type_analysis/collections.yaml b/tests/type_analysis/collections.yaml new file mode 100644 index 00000000..1b72f790 --- /dev/null +++ b/tests/type_analysis/collections.yaml @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: [] diff --git a/tests/type_analysis/constants.yaml b/tests/type_analysis/constants.yaml new file mode 100644 index 00000000..0e1583b4 --- /dev/null +++ b/tests/type_analysis/constants.yaml @@ -0,0 +1,425 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +cases: + - note: literal propagation through assignments + modules: + - | + package type_analysis.constants + + value := "fox" + alias := value + combined := value + + default other := value + rules: + - rule: data.type_analysis.constants.value + type: String + constant: fox + provenance: Literal + - rule: data.type_analysis.constants.alias + type: String + constant: fox + provenance: Literal + - rule: data.type_analysis.constants.combined + type: String + constant: fox + provenance: Literal + - rule: data.type_analysis.constants.other + type: String + constant: fox + provenance: Literal + exprs: + - expr: "fox" + line: 3 + type: String + constant: fox + provenance: Literal + - expr: value + line: 3 + col: 1 + type: String + constant: fox + provenance: Literal + - expr: alias + line: 4 + col: 1 + type: String + constant: fox + provenance: Literal + - expr: combined + line: 5 + col: 1 + type: String + constant: fox + provenance: Literal + - expr: other + line: 7 + col: 9 + type: String + constant: fox + provenance: Literal + + - note: arithmetic constant folding + modules: + - | + package test.arithmetic + + sum := 10 + 5 + diff := 20 - 8 + product := 6 * 7 + quotient := 100 / 4 + remainder := 17 % 5 + negative := -42 + chain := 2 + 3 * 4 + rules: + - rule: data.test.arithmetic.sum + type: Integer + constant: 15 + - rule: data.test.arithmetic.diff + type: Integer + constant: 12 + - rule: data.test.arithmetic.product + type: Integer + constant: 42 + - rule: data.test.arithmetic.quotient + type: Integer + constant: 25 + - rule: data.test.arithmetic.remainder + type: Integer + constant: 2 + - rule: data.test.arithmetic.negative + type: Integer + constant: -42 + - rule: data.test.arithmetic.chain + type: Integer + constant: 14 + + - note: boolean constant folding + modules: + - | + package test.boolean + + and(a, b) if { + a + b + } else := false + + or_(a, b) if { + a + } else := true if { + b + } else := false + + not_(a) := true if { + not a + } else := false + + and_true := and(true, true) + and_false := and(true, false) + or_true := or_(true, false) + or_false := or_(false, false) + not_true := not_(false) + not_false := not_(true) + complex := or_(and(true, false), and(true, true)) + rules: + - rule: data.test.boolean.and_true + type: Boolean + constant: true + - rule: data.test.boolean.and_false + type: Boolean + constant: false + - rule: data.test.boolean.or_true + type: Boolean + constant: true + - rule: data.test.boolean.or_false + type: Boolean + constant: false + - rule: data.test.boolean.not_true + type: Boolean + constant: true + - rule: data.test.boolean.not_false + type: Boolean + constant: false + - rule: data.test.boolean.complex + type: Boolean + constant: true + + - note: comparison constant folding + modules: + - | + package test.comparison + + eq_true := 5 == 5 + eq_false := 5 == 6 + neq_true := 5 != 6 + neq_false := 5 != 5 + lt := 3 < 5 + lte := 5 <= 5 + gt := 10 > 5 + gte := 5 >= 5 + str_eq := "hello" == "hello" + str_neq := "hello" != "world" + rules: + - rule: data.test.comparison.eq_true + type: Boolean + constant: true + - rule: data.test.comparison.eq_false + type: Boolean + constant: false + - rule: data.test.comparison.neq_true + type: Boolean + constant: true + - rule: data.test.comparison.neq_false + type: Boolean + constant: false + - rule: data.test.comparison.lt + type: Boolean + constant: true + - rule: data.test.comparison.lte + type: Boolean + constant: true + - rule: data.test.comparison.gt + type: Boolean + constant: true + - rule: data.test.comparison.gte + type: Boolean + constant: true + - rule: data.test.comparison.str_eq + type: Boolean + constant: true + - rule: data.test.comparison.str_neq + type: Boolean + constant: true + + - note: array and object constant folding + modules: + - | + package test.collections + + arr := [1, 2, 3] + obj := {"a": 10, "b": 20} + nested := {"x": [1, 2], "y": {"z": 3}} + arr_index := [10, 20, 30][1] + obj_field := {"name": "alice"}["name"] + rules: + - rule: data.test.collections.arr + type: + kind: Array + element: + kind: Integer + constant: [1, 2, 3] + - rule: data.test.collections.obj + type: Object + constant: {"a": 10, "b": 20} + - rule: data.test.collections.nested + type: Object + - rule: data.test.collections.arr_index + type: Integer + constant: 20 + - rule: data.test.collections.obj_field + type: String + constant: alice + + - note: set constant folding + modules: + - | + package test.sets + + simple_set := {1, 2, 3} + union_set := {1, 2} | {2, 3} + intersect_set := {1, 2, 3} & {2, 3, 4} + diff_set := {1, 2, 3} - {2} + membership := 2 in {1, 2, 3} + not_in := 5 in {1, 2, 3} + rules: + - rule: data.test.sets.simple_set + type: + kind: Set + element: + kind: Integer + constant: + set!: + - 1 + - 2 + - 3 + - rule: data.test.sets.union_set + type: + kind: Set + element: + kind: Integer + constant: + set!: + - 1 + - 2 + - 3 + - rule: data.test.sets.intersect_set + type: + kind: Set + element: + kind: Integer + constant: + set!: + - 2 + - 3 + - rule: data.test.sets.diff_set + type: + kind: Set + element: + kind: Integer + constant: + set!: + - 1 + - 3 + - rule: data.test.sets.membership + type: Boolean + constant: true + - rule: data.test.sets.not_in + type: Boolean + constant: false + + - note: builtin function constant folding + modules: + - | + package test.builtins + + concat_result := concat("/", ["a", "b", "c"]) + upper_result := upper("hello") + lower_result := lower("WORLD") + trim_result := trim(" space ", " ") + split_result := split("a,b,c", ",") + contains_result := contains("hello world", "world") + startswith_result := startswith("hello", "hel") + endswith_result := endswith("world", "ld") + rules: + - rule: data.test.builtins.concat_result + type: String + constant: "a/b/c" + - rule: data.test.builtins.upper_result + type: String + constant: HELLO + - rule: data.test.builtins.lower_result + type: String + constant: world + - rule: data.test.builtins.trim_result + type: String + constant: space + - rule: data.test.builtins.split_result + type: + kind: Array + element: + kind: String + constant: ["a", "b", "c"] + - rule: data.test.builtins.contains_result + type: Boolean + constant: true + - rule: data.test.builtins.startswith_result + type: Boolean + constant: true + - rule: data.test.builtins.endswith_result + type: Boolean + constant: true + + - note: undefined propagation + modules: + - | + package test.undefined + + obj := {"a": 10} + missing := obj.b + chained := obj.b.c + array := [1, 2, 3] + out_of_bounds := array[10] + math_with_undef := missing + 5 + rules: + - rule: data.test.undefined.obj + type: Object + constant: {"a": 10} + - rule: data.test.undefined.missing + type: Unknown + - rule: data.test.undefined.chained + type: Unknown + - rule: data.test.undefined.array + type: + kind: Array + element: + kind: Integer + constant: [1, 2, 3] + - rule: data.test.undefined.out_of_bounds + type: Unknown + - rule: data.test.undefined.math_with_undef + type: Unknown + diagnostics: + - kind: SchemaViolation + message: "Property 'b' is not defined on this object" + line: 4 + col: 16 + - kind: SchemaViolation + message: "Property 'b' is not defined on this object" + line: 5 + col: 16 + + - note: schema constant extraction + modules: + - | + package test.schema + + value := input.fixed_value + enum_val := input.status + input_schema: + type: object + properties: + fixed_value: + const: 42 + status: + enum: ["active"] + choice: + anyOf: + - const: "option1" + - const: "option1" + rules: + - rule: data.test.schema.value + type: Integer + constant: 42 + provenance: SchemaInput + - rule: data.test.schema.enum_val + type: String + constant: active + provenance: SchemaInput + + - note: constant folding with control flow + modules: + - | + package test.controlflow + + result := 10 if true else := 20 + result2 := 30 if false else := 40 + ternary_chain := 1 if false else := 2 if true else := 3 + rules: + - rule: data.test.controlflow.result + type: Integer + constant: 10 + - rule: data.test.controlflow.result2 + type: Integer + constant: 40 + - rule: data.test.controlflow.ternary_chain + type: Integer + constant: 2 + + - note: mixed constant and variable operations + modules: + - | + package test.mixed + + const_part := 10 + 5 + needs_input := input.value + 5 + combined := const_part + 100 + rules: + - rule: data.test.mixed.const_part + type: Integer + constant: 15 + - rule: data.test.mixed.needs_input + type: Number + - rule: data.test.mixed.combined + type: Integer + constant: 115 + diff --git a/tests/type_analysis/control.yaml b/tests/type_analysis/control.yaml new file mode 100644 index 00000000..1b72f790 --- /dev/null +++ b/tests/type_analysis/control.yaml @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: [] diff --git a/tests/type_analysis/core.yaml b/tests/type_analysis/core.yaml new file mode 100644 index 00000000..dffe11de --- /dev/null +++ b/tests/type_analysis/core.yaml @@ -0,0 +1,464 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: bool literal + modules: + - | + package test + import rego.v1 + allow := true + rules: + - rule: data.test.allow + type: Boolean + constant: true + provenance: Literal + + - note: integer literal + modules: + - | + package test + import rego.v1 + count := 42 + rules: + - rule: data.test.count + type: Integer + constant: 42 + provenance: Literal + + - note: float literal + modules: + - | + package test + import rego.v1 + ratio := 3.14 + rules: + - rule: data.test.ratio + type: Number + constant: 3.14 + provenance: Literal + + - note: null literal + modules: + - | + package test + import rego.v1 + nothing := null + rules: + - rule: data.test.nothing + type: "Null" + constant: null + provenance: Literal + + - note: string literal + modules: + - | + package test + import rego.v1 + message := "hello" + rules: + - rule: data.test.message + type: String + constant: "hello" + provenance: Literal + + - note: empty array + modules: + - | + package test + import rego.v1 + items := [] + rules: + - rule: data.test.items + type: + kind: Array + element: + kind: Any + provenance: Literal + + - note: empty object + modules: + - | + package test + import rego.v1 + config := {} + rules: + - rule: data.test.config + type: + kind: Object + fields: {} + provenance: Literal + + - note: homogeneous array + modules: + - | + package test + import rego.v1 + numbers := [1, 2, 3] + rules: + - rule: data.test.numbers + type: + kind: Array + element: + kind: Integer + provenance: Literal + + - note: heterogeneous array + modules: + - | + package test + import rego.v1 + mixed := [1, "two", true] + rules: + - rule: data.test.mixed + type: + kind: Array + element: + kind: Union + variants: + - kind: Integer + - kind: String + - kind: Boolean + provenance: Literal + + - note: object with fields + modules: + - | + package test + import rego.v1 + person := {"name": "Alice", "age": 30} + rules: + - rule: data.test.person + type: + kind: Object + fields: + name: + kind: String + age: + kind: Integer + provenance: Literal + + - note: set literal + modules: + - | + package test + import rego.v1 + flags := {true, false} + rules: + - rule: data.test.flags + type: + kind: Set + element: + kind: Boolean + provenance: Literal + + - note: unary negation constant + modules: + - | + package test + import rego.v1 + neg := -42 + rules: + - rule: data.test.neg + type: Integer + constant: -42 + provenance: Literal + + - note: unary negation propagated + modules: + - | + package test + import rego.v1 + value := input.amount + neg_value := -value + input_schema: + type: object + properties: + amount: + type: number + required: + - amount + rules: + - rule: data.test.value + type: Schema + schema_backed: true + provenance: SchemaInput + - rule: data.test.neg_value + type: Number + provenance: Propagated + + - note: input property propagation + modules: + - | + package test + import rego.v1 + allow := input.flag + input_schema: + type: object + properties: + flag: + type: boolean + required: + - flag + rules: + - rule: data.test.allow + type: Schema + schema_backed: true + provenance: SchemaInput + + - note: data property propagation + modules: + - | + package test + import rego.v1 + mode := data.config.mode + data_schema: + type: object + properties: + config: + type: object + properties: + mode: + type: string + required: + - mode + required: + - config + rules: + - rule: data.test.mode + type: Schema + schema_backed: true + provenance: SchemaData + + - note: array indexing + modules: + - | + package test + import rego.v1 + first := input.items[0] + input_schema: + type: object + properties: + items: + type: array + items: + type: boolean + required: + - items + rules: + - rule: data.test.first + type: Schema + schema_backed: true + provenance: SchemaInput + + - note: builtin function + modules: + - | + package test + import rego.v1 + result := abs(-5) + rules: + - rule: data.test.result + type: Number + provenance: Builtin + + - note: equality constant folding + modules: + - | + package test + import rego.v1 + check := 2 == 2 + rules: + - rule: data.test.check + type: Boolean + constant: true + provenance: Propagated + + - note: membership test + modules: + - | + package test + import rego.v1 + present := 1 in [1, 2, 3] + rules: + - rule: data.test.present + type: Boolean + provenance: Propagated + + - note: literal propagation chain + modules: + - | + package type_analysis.chain.literal + import rego.v1 + + value := "snow" + stage1 := value + stage2 := stage1 + rules: + - rule: data.type_analysis.chain.literal.value + type: String + constant: snow + provenance: Literal + - rule: data.type_analysis.chain.literal.stage1 + type: String + constant: snow + provenance: Literal + - rule: data.type_analysis.chain.literal.stage2 + type: String + constant: snow + provenance: Literal + exprs: + - expr: value + line: 4 + col: 1 + type: String + constant: snow + provenance: Literal + - expr: stage1 + line: 5 + col: 1 + type: String + constant: snow + provenance: Literal + - expr: stage2 + line: 6 + col: 1 + type: String + constant: snow + provenance: Literal + + - note: schema propagation chain + modules: + - | + package type_analysis.chain.schema + import rego.v1 + + value := input.profile.name + stage1 := value + final := stage1 + input_schema: + type: object + properties: + profile: + type: object + properties: + name: + type: string + required: + - name + required: + - profile + rules: + - rule: data.type_analysis.chain.schema.value + type: Schema + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.chain.schema.stage1 + type: Schema + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.chain.schema.final + type: Schema + schema_backed: true + provenance: SchemaInput + exprs: + - expr: input.profile + line: 4 + col: 10 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: input.profile.name + line: 4 + col: 10 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: value + line: 4 + col: 1 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: stage1 + line: 5 + col: 1 + type: Schema + schema_backed: true + provenance: SchemaInput + - expr: final + line: 6 + col: 1 + type: Schema + schema_backed: true + provenance: SchemaInput + + - note: missing schema property + modules: + - | + package test + import rego.v1 + maybe := input.unknown + input_schema: + type: object + properties: + known: + type: string + required: + - known + rules: + - rule: data.test.maybe + type: Schema + schema_backed: true + provenance: SchemaInput + diagnostics: + - kind: SchemaViolation + message: Property 'unknown' + + - note: missing schema property with additional schema + modules: + - | + package test.additional + import rego.v1 + + dynamic := input.extra + input_schema: + type: object + properties: + required_field: + type: string + additionalProperties: + type: integer + required: + - required_field + rules: + - rule: data.test.additional.dynamic + type: Schema + schema_backed: true + provenance: SchemaInput + diagnostics: + - kind: SchemaViolation + message: "Property 'extra' is not defined by the schema; known properties: required_field" + + - note: schema violation diagnostic + modules: + - | + package test + import rego.v1 + check := input.mode == "invalid" + input_schema: + type: object + properties: + mode: + enum: + - allow + - deny + required: + - mode + rules: + - rule: data.test.check + type: Boolean + provenance: Propagated + diagnostics: + - kind: SchemaViolation + message: not allowed by schema enumeration diff --git a/tests/type_analysis/diagnostics.yaml b/tests/type_analysis/diagnostics.yaml new file mode 100644 index 00000000..61844d7a --- /dev/null +++ b/tests/type_analysis/diagnostics.yaml @@ -0,0 +1,49 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: structural missing property diagnostic + modules: + - | + package type_analysis.diagnostics.structural + import rego.v1 + + base := {"inner": {"value": 1}} + missing := base.inner.missing + rules: + - rule: data.type_analysis.diagnostics.structural.missing + type: Unknown + provenance: Propagated + diagnostics: + - kind: SchemaViolation + message: "Property 'missing' is not defined on this object; known fields: value" + + - note: property access on non-object diagnostic + modules: + - | + package type_analysis.diagnostics.non_object + import rego.v1 + + text := "hello" + result := text.foo + rules: + - rule: data.type_analysis.diagnostics.non_object.result + type: Unknown + provenance: Propagated + diagnostics: + - kind: SchemaViolation + message: "Cannot access property 'foo' on string value" + + - note: constant false statement short-circuits analysis + modules: + - | + package type_analysis.diagnostics.constant_false + import rego.v1 + + allow if { + false + input.done == true + } + diagnostics: + - kind: UnreachableStatement + message: "Statement always evaluates to false; later statements were skipped" diff --git a/tests/type_analysis/loops.yaml b/tests/type_analysis/loops.yaml new file mode 100644 index 00000000..1b72f790 --- /dev/null +++ b/tests/type_analysis/loops.yaml @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: [] diff --git a/tests/type_analysis/membership.yaml b/tests/type_analysis/membership.yaml new file mode 100644 index 00000000..550bd1a6 --- /dev/null +++ b/tests/type_analysis/membership.yaml @@ -0,0 +1,30 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: membership on arrays and sets + modules: + - | + package type_analysis.membership + import rego.v1 + + arr := [1, 2, 3] + present := 2 in arr + absent := 5 in arr + + numbers := {1, 2, 3} + set_present := 2 in numbers + set_absent := 5 in numbers + rules: + - rule: data.type_analysis.membership.present + type: Boolean + constant: true + - rule: data.type_analysis.membership.absent + type: Boolean + constant: false + - rule: data.type_analysis.membership.set_present + type: Boolean + constant: true + - rule: data.type_analysis.membership.set_absent + type: Boolean + constant: false diff --git a/tests/type_analysis/operators.yaml b/tests/type_analysis/operators.yaml new file mode 100644 index 00000000..43f20c16 --- /dev/null +++ b/tests/type_analysis/operators.yaml @@ -0,0 +1,124 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: arithmetic operations + modules: + - | + package type_analysis.operators.arith + import rego.v1 + + sum := 1 + 2 + difference := 5 - 3 + product := 2 * 4 + quotient := 8 / 2 + remainder := 9 % 4 + mix := input.amount + 5 + input_schema: + type: object + properties: + amount: + type: number + required: + - amount + rules: + - rule: data.type_analysis.operators.arith.sum + type: Integer + provenance: Propagated + - rule: data.type_analysis.operators.arith.difference + type: Integer + provenance: Propagated + - rule: data.type_analysis.operators.arith.product + type: Integer + provenance: Propagated + - rule: data.type_analysis.operators.arith.quotient + type: Integer + provenance: Propagated + - rule: data.type_analysis.operators.arith.remainder + type: Integer + provenance: Propagated + - rule: data.type_analysis.operators.arith.mix + type: Number + provenance: Propagated + + - note: comparison operations + modules: + - | + package type_analysis.operators.compare + import rego.v1 + + lt := 3 < 4 + le := input.score <= 100 + gt := 5 > 1 + ge := input.score >= input.threshold + ne := 1 != 2 + input_schema: + type: object + properties: + score: + type: number + threshold: + type: number + required: + - score + - threshold + rules: + - rule: data.type_analysis.operators.compare.lt + type: Boolean + provenance: Propagated + - rule: data.type_analysis.operators.compare.le + type: Boolean + provenance: Propagated + - rule: data.type_analysis.operators.compare.gt + type: Boolean + provenance: Propagated + - rule: data.type_analysis.operators.compare.ge + type: Boolean + provenance: Propagated + - rule: data.type_analysis.operators.compare.ne + type: Boolean + constant: true + provenance: Propagated + + - note: set union and intersection + modules: + - | + package type_analysis.operators.sets + import rego.v1 + + left := {1, 2} + right := {2, 3} + union := left | right + intersection := left & right + difference := left - right + rules: + - rule: data.type_analysis.operators.sets.left + type: + kind: Set + element: + kind: Integer + provenance: Literal + - rule: data.type_analysis.operators.sets.right + type: + kind: Set + element: + kind: Integer + provenance: Literal + - rule: data.type_analysis.operators.sets.union + type: + kind: Set + element: + kind: Integer + provenance: Literal + - rule: data.type_analysis.operators.sets.intersection + type: + kind: Set + element: + kind: Integer + provenance: Literal + - rule: data.type_analysis.operators.sets.difference + type: + kind: Set + element: + kind: Integer + provenance: Literal diff --git a/tests/type_analysis/references.yaml b/tests/type_analysis/references.yaml new file mode 100644 index 00000000..2b403609 --- /dev/null +++ b/tests/type_analysis/references.yaml @@ -0,0 +1,270 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cases: + - note: input schema property access + modules: + - | + package type_analysis.references.schema + import rego.v1 + + user := input.user + user_name := user.name + user_age := input.user.age + input_schema: + type: object + properties: + user: + type: object + properties: + name: + type: string + age: + type: integer + required: + - name + - age + required: + - user + rules: + - rule: data.type_analysis.references.schema.user + type: + kind: Object + properties: + name: + kind: String + age: + kind: Integer + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.schema.user_name + type: + kind: String + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.schema.user_age + type: + kind: Integer + schema_backed: true + provenance: SchemaInput + + - note: data schema property access + modules: + - | + package type_analysis.references.data + import rego.v1 + + catalog := data.catalog + first_price := data.catalog.items[0].price + data_schema: + type: object + properties: + catalog: + type: object + properties: + items: + type: array + items: + type: object + properties: + price: + type: number + required: + - price + required: + - items + required: + - catalog + rules: + - rule: data.type_analysis.references.data.catalog + type: + kind: Object + properties: + items: + kind: Array + element: + kind: Object + properties: + price: + kind: Number + schema_backed: true + provenance: SchemaData + - rule: data.type_analysis.references.data.first_price + type: + kind: Number + schema_backed: true + provenance: SchemaData + + - note: structural object property fallback + modules: + - | + package type_analysis.references.structural + import rego.v1 + + person := {"name": "Alice", "age": 30} + person_name := person.name + rules: + - rule: data.type_analysis.references.structural.person + type: + kind: Object + fields: + name: + kind: String + age: + kind: Integer + provenance: Literal + - rule: data.type_analysis.references.structural.person_name + type: String + provenance: Propagated + + - note: schema array element access + modules: + - | + package type_analysis.references.array + import rego.v1 + + first_order := input.orders[0] + first_price := input.orders[0].price + any_order := input.orders[input.pick] + any_price := any_order.price + input_schema: + type: object + properties: + pick: + type: integer + orders: + type: array + items: + type: object + properties: + price: + type: number + required: + - price + required: + - orders + rules: + - rule: data.type_analysis.references.array.first_order + type: + kind: Object + properties: + price: + kind: Number + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.array.first_price + type: + kind: Number + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.array.any_order + type: + kind: Object + properties: + price: + kind: Number + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.array.any_price + type: + kind: Number + schema_backed: true + provenance: SchemaInput + + - note: schema property missing -> structural any + modules: + - | + package type_analysis.references.unknown + import rego.v1 + + user := input.user + nickname := user.nickname + input_schema: + type: object + properties: + user: + type: object + properties: + name: + type: string + additionalProperties: false + required: + - name + required: + - user + rules: + - rule: data.type_analysis.references.unknown.user + type: + kind: Object + properties: + name: + kind: String + schema_backed: true + provenance: SchemaInput + - rule: data.type_analysis.references.unknown.nickname + type: Any + provenance: SchemaInput + diagnostics: + - kind: SchemaViolation + message: "Property 'nickname' is not defined by the schema; known properties: name" + + - note: dynamic key access on schema object + modules: + - | + package type_analysis.references.dynamic_key + import rego.v1 + + selected := input.selected + value := input.user[selected] + input_schema: + type: object + properties: + selected: + type: string + user: + type: object + properties: + name: + type: string + age: + type: integer + required: + - name + - age + required: + - selected + - user + rules: + - rule: data.type_analysis.references.dynamic_key.value + type: Any + provenance: SchemaInput + + - note: structural array indexing fallback + modules: + - | + package type_analysis.references.structural_array + import rego.v1 + + items := [{"value": 1}, {"value": 2}] + first := items[0] + first_value := first.value + rules: + - rule: data.type_analysis.references.structural_array.items + type: + kind: Array + element: + kind: Object + fields: + value: + kind: Integer + provenance: Literal + - rule: data.type_analysis.references.structural_array.first + type: + kind: Object + fields: + value: + kind: Integer + provenance: Propagated + - rule: data.type_analysis.references.structural_array.first_value + type: Integer + provenance: Propagated