Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 76 additions & 0 deletions src/cli/issue-deduplication.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { RawIssue } from './types';
import { Severity } from '../evaluators/types';

// Map severities to their relative rank for tie-breaking.
const SEVERITY_RANK: Record<Severity, number> = {
[Severity.ERROR]: 3,
[Severity.WARNING]: 2,
};

/**
* Filter and deduplicate overlapping issues to reduce noise.
* Groups by exact (file, line, match) and picks the best issue heuristically.
*
* Deduplication Heuristic:
* 1. Prefer rules with a `suggestion` (to make the error actionable).
* 2. Tie breaker 1: Pick the one with the longest `summary` (to provide maximum context).
* 3. Tie breaker 2: Prefer higher severity (`ERROR` > `WARNING`).
* 4. Tie breaker 3: Default to the first one evaluated.
*
* Note: Issues with an empty `match` text are explicitly preserved and not deduplicated
* against each other, as their exact overlap cannot be verified.
*/
export function filterDuplicateIssues(issues: RawIssue[]): RawIssue[] {
const grouped = new Map<string, RawIssue[]>();

for (const issue of issues) {
const matchText = issue.match || '';
const key = `${issue.file}:${issue.line}:${matchText}`;

const group = grouped.get(key) || [];
group.push(issue);
grouped.set(key, group);
}

const filtered: RawIssue[] = [];

for (const group of grouped.values()) {
const first = group[0];
if (!first) continue;

const matchText = first.match || '';
if (matchText === '' && group.length > 1) {
filtered.push(...group);
continue;
}

if (group.length === 1) {
filtered.push(first);
continue;
}

const best = group.reduce((prev, curr) => {
// 1. Prefer suggestion
const prevHas = !!prev.suggestion;
const currHas = !!curr.suggestion;
if (currHas !== prevHas) return currHas ? curr : prev;

// 2. Tie breaker 1: longest summary
const prevLen = prev.summary?.length || 0;
const currLen = curr.summary?.length || 0;
if (currLen !== prevLen) return currLen > prevLen ? curr : prev;

// 3. Tie breaker 2: higher severity
const prevRank = SEVERITY_RANK[prev.severity] || 0;
const currRank = SEVERITY_RANK[curr.severity] || 0;
if (currRank !== prevRank) return currRank > prevRank ? curr : prev;

// 4. Tie breaker 3: First evaluated (prev is always earlier in array)
return prev;
});

filtered.push(best);
}

return filtered;
}
51 changes: 43 additions & 8 deletions src/cli/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ import type {
ReportIssueParams, ProcessViolationsParams,
ProcessCriterionParams, ProcessCriterionResult, ValidationParams, ProcessPromptResultParams,
RunPromptEvaluationParams, RunPromptEvaluationResult, EvaluateFileParams, EvaluateFileResult,
RunPromptEvaluationResultSuccess
RunPromptEvaluationResultSuccess,
RawIssue
} from './types';
import {
calculateCost,
TokenUsageStats
} from '../providers/token-usage';
import { locateQuotedText } from "../output/location";
import { filterDuplicateIssues } from './issue-deduplication';


/*
Expand Down Expand Up @@ -150,6 +152,7 @@ function reportIssue(params: ReportIssueParams): void {
*/
function locateAndReportViolations(params: ProcessViolationsParams): {
hadOperationalErrors: boolean;
issues: RawIssue[];
} {
const {
violations,
Expand Down Expand Up @@ -227,6 +230,8 @@ function locateAndReportViolations(params: ProcessViolationsParams): {
}
}

const issues: RawIssue[] = [];

// Report only verified, unique violations
for (const {
v,
Expand All @@ -235,7 +240,7 @@ function locateAndReportViolations(params: ProcessViolationsParams): {
matchedText,
rowSummary,
} of verifiedViolations) {
reportIssue({
issues.push({
file: relFile,
line,
column,
Expand All @@ -251,7 +256,7 @@ function locateAndReportViolations(params: ProcessViolationsParams): {
});
}

return { hadOperationalErrors };
return { hadOperationalErrors, issues };
}

/*
Expand All @@ -276,6 +281,7 @@ function extractAndReportCriterion(
} = params;
let hadOperationalErrors = false;
let hadSeverityErrors = false;
const issues: RawIssue[] = [];

const nameKey = String(exp.name || exp.id || "");
const criterionId = exp.id
Expand Down Expand Up @@ -307,7 +313,7 @@ function extractAndReportCriterion(
expTargetSpec?.suggestion ||
metaTargetSpec?.suggestion ||
"Add the required target section.";
reportIssue({
issues.push({
file: relFile,
line: 1,
column: 1,
Expand All @@ -327,6 +333,7 @@ function extractAndReportCriterion(
maxScore,
hadOperationalErrors,
hadSeverityErrors,
issues,
scoreEntry: { id: ruleName, scoreText: "0.0/10", score: 0.0 },
scoreComponent: {
criterion: nameKey,
Expand All @@ -351,6 +358,7 @@ function extractAndReportCriterion(
maxScore,
hadOperationalErrors,
hadSeverityErrors,
issues: [],
scoreEntry: { id: ruleName, scoreText: "-", score: 0.0 },
scoreComponent: {
criterion: nameKey,
Expand Down Expand Up @@ -419,6 +427,7 @@ function extractAndReportCriterion(
});
hadOperationalErrors =
hadOperationalErrors || violationResult.hadOperationalErrors;
issues.push(...violationResult.issues);
} else if (score <= 2) {
// No violations but low score - report with summary
severity = score <= 1 ? Severity.ERROR : Severity.WARNING;
Expand All @@ -433,7 +442,7 @@ function extractAndReportCriterion(
const words = sum.split(/\s+/).filter(Boolean);
const limited = words.slice(0, 15).join(" ");
const summaryText = limited || "No findings";
reportIssue({
issues.push({
file: relFile,
line: 1,
column: 1,
Expand Down Expand Up @@ -464,6 +473,7 @@ function extractAndReportCriterion(
normalizedScore: normalizedScore,
normalizedMaxScore: 10,
},
issues,
};
}

Expand Down Expand Up @@ -584,6 +594,7 @@ function routePromptResult(
// Report violations grouped by criterion
let totalErrors = 0;
let totalWarnings = 0;
const issues: RawIssue[] = [];

for (const [criterionName, violations] of violationsByCriterion) {
// Find criterion ID from meta
Expand All @@ -608,6 +619,7 @@ function routePromptResult(
verbose: !!verbose,
});
hadOperationalErrors = hadOperationalErrors || violationResult.hadOperationalErrors;
issues.push(...violationResult.issues);

if (severity === Severity.ERROR) {
totalErrors += violations.length;
Expand All @@ -620,7 +632,7 @@ function routePromptResult(
// If no violations but we have a message (JSON output), report it
if (violationCount === 0 && (outputFormat === OutputFormat.Json || outputFormat === OutputFormat.ValeJson) && result.message) {
const ruleName = buildRuleName(promptFile.pack, promptId, undefined);
reportIssue({
issues.push({
file: relFile,
line: 1,
column: 1,
Expand All @@ -646,6 +658,7 @@ function routePromptResult(
hadOperationalErrors,
hadSeverityErrors: severity === Severity.ERROR,
scoreEntries: [scoreEntry],
issues,
};
}

Expand All @@ -661,6 +674,7 @@ function routePromptResult(
promptWarnings = 0;
const criterionScores: EvaluationSummary[] = [];
const scoreComponents: ScoreComponent[] = [];
const issues: RawIssue[] = [];

// Iterate through each criterion
for (const exp of meta.criteria || []) {
Expand Down Expand Up @@ -688,6 +702,9 @@ function routePromptResult(
if (criterionResult.scoreComponent) {
scoreComponents.push(criterionResult.scoreComponent);
}
if (criterionResult.issues) {
issues.push(...criterionResult.issues);
}
Comment on lines +705 to +707
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Find type definitions for ProcessCriterionResult and ErrorTrackingResult
echo "=== Searching for ProcessCriterionResult and ErrorTrackingResult type definitions ==="
rg -n "type ProcessCriterionResult|interface ProcessCriterionResult|type ErrorTrackingResult|interface ErrorTrackingResult" src/cli --type ts -A 10

echo ""
echo "=== Searching for RawIssue type definition ==="
rg -n "type RawIssue|interface RawIssue" src/cli --type ts -A 5

Repository: TRocket-Labs/vectorlint

Length of output: 1484


🏁 Script executed:

#!/bin/bash
# Find implementations of extractAndReportCriterion and routePromptResult
echo "=== Searching for extractAndReportCriterion implementation ==="
rg -n "function extractAndReportCriterion|const extractAndReportCriterion" src/cli --type ts -A 40 | head -80

echo ""
echo "=== Searching for routePromptResult implementation ==="
rg -n "function routePromptResult|const routePromptResult" src/cli --type ts -A 40 | head -80

Repository: TRocket-Labs/vectorlint

Length of output: 4702


🏁 Script executed:

#!/bin/bash
# Get the full return statements for extractAndReportCriterion
echo "=== extractAndReportCriterion return statement (around line 361 and end of function) ==="
sed -n '267,450p' src/cli/orchestrator.ts | tail -40

echo ""
echo "=== routePromptResult return statement (around line 559-700) ==="
sed -n '690,750p' src/cli/orchestrator.ts

Repository: TRocket-Labs/vectorlint

Length of output: 3079


🏁 Script executed:

#!/bin/bash
# Check for all return statements in extractAndReportCriterion
echo "=== All return statements in extractAndReportCriterion (lines 267-450) ==="
sed -n '267,450p' src/cli/orchestrator.ts | grep -n "return {"

echo ""
echo "=== Full view of extractAndReportCriterion function end (around line 440-460) ==="
sed -n '420,460p' src/cli/orchestrator.ts

Repository: TRocket-Labs/vectorlint

Length of output: 1240


🏁 Script executed:

#!/bin/bash
# Get the complete final return statement of extractAndReportCriterion
echo "=== Final return statement of extractAndReportCriterion (lines 456-475) ==="
sed -n '456,475p' src/cli/orchestrator.ts

echo ""
echo "=== Check line numbers - extract full function signature and locate all returns ==="
rg -n "^function extractAndReportCriterion" src/cli/orchestrator.ts -A 200 | grep -E "return \{|^[0-9]+:function" | head -20

Repository: TRocket-Labs/vectorlint

Length of output: 749


🏁 Script executed:

#!/bin/bash
# Check the early return statements to see if they include issues
echo "=== Early return 1 (around line 329) ==="
sed -n '325,340p' src/cli/orchestrator.ts

echo ""
echo "=== Early return 2 (around line 354) ==="
sed -n '350,365p' src/cli/orchestrator.ts

echo ""
echo "=== Check if issues field is included in early returns ==="
sed -n '325,365p' src/cli/orchestrator.ts | grep -A 12 "return {"

Repository: TRocket-Labs/vectorlint

Length of output: 1609


🏁 Script executed:

#!/bin/bash
# Get the complete final return statement to verify issues is included
echo "=== Complete final return of extractAndReportCriterion (lines 456-480) ==="
sed -n '456,480p' src/cli/orchestrator.ts

Repository: TRocket-Labs/vectorlint

Length of output: 597


Make issues field required on ProcessCriterionResult and ErrorTrackingResult types.

issues is currently declared optional (issues?: RawIssue[]) on ErrorTrackingResult (and inherited by ProcessCriterionResult), but both extractAndReportCriterion and routePromptResult always initialize and return the field in every code path—even early returns include it. Making issues required in the type definitions would eliminate the need for the guards at lines 705-707 and accurately reflect the implementation invariant.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/cli/orchestrator.ts` around lines 705 - 707, Change the types so `issues`
is non-optional on ErrorTrackingResult and ProcessCriterionResult (e.g.,
`issues: RawIssue[]`), update any places that construct those results (notably
`extractAndReportCriterion` and `routePromptResult`) to keep returning an
initialized array, and remove the defensive guard around `issues` in
orchestrator.ts (the `if (criterionResult.issues) {
issues.push(...criterionResult.issues); }` block) so you can directly push
`criterionResult.issues`; also scan for other callers that assumed optional and
adjust them to expect an array.

}

if (outputFormat === OutputFormat.Json && scoreComponents.length > 0) {
Expand All @@ -706,6 +723,7 @@ function routePromptResult(
hadOperationalErrors,
hadSeverityErrors,
scoreEntries: criterionScores,
issues,
};
}

Expand Down Expand Up @@ -781,6 +799,8 @@ async function evaluateFile(
let totalInputTokens = 0;
let totalOutputTokens = 0;

const allIssues: RawIssue[] = [];

const allScores = new Map<string, EvaluationSummary[]>();

const content = readFileSync(file, "utf-8");
Expand Down Expand Up @@ -897,8 +917,6 @@ async function evaluateFile(
jsonFormatter,
verbose,
});
totalErrors += promptResult.errors;
totalWarnings += promptResult.warnings;
hadOperationalErrors =
hadOperationalErrors || promptResult.hadOperationalErrors;
hadSeverityErrors = hadSeverityErrors || promptResult.hadSeverityErrors;
Expand All @@ -907,6 +925,23 @@ async function evaluateFile(
const ruleName = (p.meta.id || p.filename).toString();
allScores.set(ruleName, promptResult.scoreEntries);
}
if (promptResult.issues) {
allIssues.push(...promptResult.issues);
}
}

// Deduplicate issues
const deduplicatedIssues = filterDuplicateIssues(allIssues);

// Recompute counts from deduplicated issues so they match what is reported
for (const issue of deduplicatedIssues) {
if (issue.severity === Severity.ERROR) totalErrors += 1;
else totalWarnings += 1;
}

// Group and format output appropriately
for (const issue of deduplicatedIssues) {
reportIssue(issue);
}

const tokenUsageStats: TokenUsageStats = {
Expand Down
8 changes: 7 additions & 1 deletion src/cli/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export interface ErrorTrackingResult {
hadOperationalErrors: boolean;
hadSeverityErrors: boolean;
scoreEntries?: EvaluationSummary[];
issues?: RawIssue[];
}

export interface EvaluationContext {
Expand All @@ -57,7 +58,7 @@ export interface EvaluationContext {
verbose?: boolean;
}

export interface ReportIssueParams {
export interface RawIssue {
file: string;
line: number;
column: number;
Expand All @@ -72,6 +73,11 @@ export interface ReportIssueParams {
match?: string;
}

/**
* @deprecated Use `RawIssue` instead. This type alias is retained for backward compatibility.
*/
export type ReportIssueParams = RawIssue;

export interface ProcessViolationsParams extends EvaluationContext {
violations: Array<{
line?: number;
Expand Down
Loading