diff --git a/src/vuln_analysis/data_models/output.py b/src/vuln_analysis/data_models/output.py
index ba262f32..0be5bc36 100644
--- a/src/vuln_analysis/data_models/output.py
+++ b/src/vuln_analysis/data_models/output.py
@@ -15,12 +15,107 @@
import typing
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
from pydantic import model_validator
from .input import AgentMorpheusEngineInput
+# =============================================================================
+# STRUCTURED OUTPUT MODELS FOR LLM STAGES
+# =============================================================================
+
+class IntelScoringCriteria(BaseModel):
+ """Individual scoring criteria for intel quality assessment"""
+ technical_specificity: int = Field(ge=0, le=20, description="Technical detail precision (max 20)")
+ clarity: int = Field(ge=0, le=10, description="Text clarity and structure (max 10)")
+ component_impact: int = Field(ge=0, le=15, description="Clarity of affected components (max 15)")
+ reproducibility: int = Field(ge=0, le=15, description="Exploit reproducibility details (max 15)")
+ vulnerable_function: int = Field(ge=0, le=15, description="Specific function identification (max 15)")
+ mitigation: int = Field(ge=0, le=10, description="Mitigation guidance (max 10)")
+ environment: int = Field(ge=0, le=10, description="Environment context (max 10)")
+ configuration: int = Field(ge=0, le=5, description="Configuration details (max 5)")
+
+
+class IntelScoringJustifications(BaseModel):
+ """Brief justifications for each scoring criterion"""
+ technical_specificity: str = Field(min_length=1, max_length=200, description="Why this score")
+ clarity: str = Field(min_length=1, max_length=200, description="Why this score")
+ component_impact: str = Field(min_length=1, max_length=200, description="Why this score")
+ reproducibility: str = Field(min_length=1, max_length=200, description="Why this score")
+ vulnerable_function: str = Field(min_length=1, max_length=200, description="Why this score")
+ mitigation: str = Field(min_length=1, max_length=200, description="Why this score")
+ environment: str = Field(min_length=1, max_length=200, description="Why this score")
+ configuration: str = Field(min_length=1, max_length=200, description="Why this score")
+
+
+class IntelScoringOutput(BaseModel):
+ """Structured output for intel quality scoring"""
+ scores: IntelScoringCriteria
+ justifications: IntelScoringJustifications
+
+ def calculate_total(self) -> int:
+ """Calculate total score from individual criteria"""
+ return sum([
+ self.scores.technical_specificity,
+ self.scores.clarity,
+ self.scores.component_impact,
+ self.scores.reproducibility,
+ self.scores.vulnerable_function,
+ self.scores.mitigation,
+ self.scores.environment,
+ self.scores.configuration,
+ ])
+
+
+class ChecklistGenerationOutput(BaseModel):
+ """Structured output for checklist generation"""
+ checklist_items: list[str] = Field(
+ min_length=3,
+ max_length=5,
+ description="List of 3-5 investigation questions for CVE exploitability assessment"
+ )
+
+
+class JustificationStructuredOutput(BaseModel):
+ """Structured justification output with validated category"""
+ category: typing.Literal[
+ "false_positive",
+ "code_not_present",
+ "code_not_reachable",
+ "requires_configuration",
+ "requires_dependency",
+ "requires_environment",
+ "compiler_protected",
+ "runtime_protected",
+ "perimeter_protected",
+ "mitigating_control_protected",
+ "uncertain",
+ "vulnerable"
+ ] = Field(description="Exploitability classification category (exact match required)")
+ reasoning: str = Field(
+ min_length=20,
+ description="Brief explanation citing specific evidence from investigation"
+ )
+
+
+class CVSSMetricStructuredOutput(BaseModel):
+ """Structured output for a single CVSS metric evaluation"""
+ metric_abbreviation: typing.Literal["AV", "AC", "PR", "UI", "S", "C", "I", "A"]
+ value_abbreviation: str = Field(pattern=r"^[A-Z]$", description="Single letter value code")
+ selected_value: str = Field(description="Full name of selected value (e.g., 'Network', 'Low')")
+ definition_matched: str = Field(description="Definition text that was matched")
+ evidence: str = Field(description="Supporting evidence quotes or 'None'")
+
+ def to_cvss_component(self) -> str:
+ """Format as CVSS component string (e.g., 'AV:N')"""
+ return f"{self.metric_abbreviation}:{self.value_abbreviation}"
+
+
+# =============================================================================
+# EXISTING OUTPUT MODELS
+# =============================================================================
+
class AgentIntermediateStep(BaseModel):
"""
Represents info for an intermediate step taken by an agent.
diff --git a/src/vuln_analysis/functions/cve_agent.py b/src/vuln_analysis/functions/cve_agent.py
index f1434aa0..f36f303a 100644
--- a/src/vuln_analysis/functions/cve_agent.py
+++ b/src/vuln_analysis/functions/cve_agent.py
@@ -26,7 +26,7 @@
from langchain.agents import AgentExecutor
from langchain.agents import create_react_agent
from langchain.agents.agent import RunnableAgent
-from langchain.agents.mrkl.output_parser import MRKLOutputParser
+#from langchain.agents.mrkl.output_parser import MRKLOutputParser
from langchain_core.exceptions import OutputParserException
from langchain_core.prompts import PromptTemplate
from pydantic import Field
@@ -34,6 +34,7 @@
from vuln_analysis.tools.tool_names import ToolNames
from vuln_analysis.utils.error_handling_decorator import ToolRaisedException
from vuln_analysis.utils.prompting import get_agent_prompt
+from vuln_analysis.utils.thinking_aware_parser import ThinkingAwareMRKLParser, create_thinking_aware_error_handler
from vuln_analysis.logging.loggers_factory import LoggingFactory, trace_id
logger = LoggingFactory.get_agent_logger(__name__)
@@ -105,17 +106,23 @@ async def _create_agent(config: CVEAgentExecutorToolConfig, builder: Builder,
}
)
+ # Use thinking-aware parser for robust handling of Llama 3.3 thinking mode
+ output_parser = ThinkingAwareMRKLParser()
+
+ # Create thinking-aware error handler
+ error_handler = create_thinking_aware_error_handler(enabled_tool_names)
+
agent = create_react_agent(llm=llm,
tools=tools,
prompt=prompt,
- output_parser=MRKLOutputParser(),
+ output_parser=output_parser,
stop_sequence=["\nObservation:", "\n\tObservation:"])
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
early_stopping_method="force",
- handle_parsing_errors="Check your output and make sure it conforms, use the Action/Action Input syntax",
+ handle_parsing_errors=error_handler,
max_iterations=config.max_iterations,
return_intermediate_steps=config.return_intermediate_steps,
verbose=config.verbose)
diff --git a/src/vuln_analysis/functions/cve_generate_cvss.py b/src/vuln_analysis/functions/cve_generate_cvss.py
index 424c24d2..e5946ea1 100644
--- a/src/vuln_analysis/functions/cve_generate_cvss.py
+++ b/src/vuln_analysis/functions/cve_generate_cvss.py
@@ -32,11 +32,12 @@
from langchain_core.prompts import PromptTemplate
from pydantic import Field
from cvss import CVSS3
-from langchain.agents.mrkl.output_parser import MRKLOutputParser
+#from langchain.agents.mrkl.output_parser import MRKLOutputParser
from vuln_analysis.data_models.state import AgentMorpheusEngineState
from vuln_analysis.tools.tool_names import ToolNames
from vuln_analysis.utils.prompting import get_cvss_prompt
+from vuln_analysis.utils.thinking_aware_parser import ThinkingAwareMRKLParser, create_thinking_aware_error_handler
logger = logging.getLogger(__name__)
@@ -196,17 +197,22 @@ async def _create_agent(config: CVEGenerateCvssToolConfig, builder: Builder,
(tool.name == ToolNames.CODE_KEYWORD_SEARCH and state.code_index_path is None))
]
+ # Get tool names for error handler
+ enabled_tool_names = [tool.name for tool in tools]
+
# Get prompt (examples now embedded in template)
prompt = PromptTemplate.from_template(
get_cvss_prompt(config.prompt, config.prompt_examples)
)
- error_handler = _make_parse_error_handler(is_openai)
+ # Use thinking-aware parser and error handler
+ output_parser = ThinkingAwareMRKLParser()
+ error_handler = create_thinking_aware_error_handler(enabled_tool_names)
agent = create_react_agent(llm=llm,
tools=tools,
prompt=prompt,
- output_parser=MRKLOutputParser())
+ output_parser=output_parser)
agent_executor = AgentExecutor(
agent=agent,
diff --git a/src/vuln_analysis/functions/cve_justify.py b/src/vuln_analysis/functions/cve_justify.py
index e7f50828..5d1bf5d8 100644
--- a/src/vuln_analysis/functions/cve_justify.py
+++ b/src/vuln_analysis/functions/cve_justify.py
@@ -38,33 +38,47 @@ class CVEJustifyToolConfig(FunctionBaseConfig, name="cve_justify"):
@register_function(config_type=CVEJustifyToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
async def cve_justify(config: CVEJustifyToolConfig, builder: Builder):
- from langchain_core.prompts import PromptTemplate
-
from vuln_analysis.data_models.state import AgentMorpheusEngineState
from vuln_analysis.utils.justification_parser import JustificationParser
+ from vuln_analysis.utils.prompting import get_justification_chat_prompt
+ from vuln_analysis.data_models.output import JustificationStructuredOutput
jp = JustificationParser()
llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
- prompt = PromptTemplate(input_variables=["summary"], template=jp.JUSTIFICATION_PROMPT)
- chain = prompt | llm
+ # Bind structured output schema to LLM
+ structured_llm = llm.with_structured_output(JustificationStructuredOutput)
+
+ prompt = get_justification_chat_prompt()
+ chain = prompt | structured_llm
async def justify_cve(summary):
- justification_text = await chain.ainvoke({"summary": summary})
- return justification_text.content
+ result = await chain.ainvoke({"summary": summary}) # type: JustificationStructuredOutput
+ return result
async def _arun(state: AgentMorpheusEngineState) -> AgentMorpheusEngineState:
trace_id.set(state.original_input.input.scan.id)
- results = await asyncio.gather(*(justify_cve(summary) for summary in state.final_summaries.values()))
- parsed_justification = await asyncio.gather(jp._parse_justification(results))
+ results = await asyncio.gather(
+ *(justify_cve(summary) for summary in state.final_summaries.values())
+ )
- # format justification output
+ # Convert structured output to expected format
justifications = {}
for i, vuln_id in enumerate(state.checklist_results.keys()):
- justifications[vuln_id] = {}
- for key in parsed_justification[0]:
- justifications[vuln_id][key] = parsed_justification[0][key][i]
+ result = results[i]
+
+ # Map to final justification label
+ final_label = jp.RAW_TO_FINAL_JUSTIFICATION_MAP.get(result.category, result.category)
+
+ # Map to affected status
+ affected_status = jp.JUSTIFICATION_TO_AFFECTED_STATUS_MAP.get(final_label, "UNKNOWN")
+
+ justifications[vuln_id] = {
+ jp.JUSTIFICATION_LABEL_COL_NAME: final_label,
+ jp.JUSTIFICATION_REASON_COL_NAME: result.reasoning,
+ jp.AFFECTED_STATUS_COL_NAME: affected_status
+ }
state.justifications = justifications
return state
diff --git a/src/vuln_analysis/functions/cve_summarize.py b/src/vuln_analysis/functions/cve_summarize.py
index af5161a3..c835c5ad 100644
--- a/src/vuln_analysis/functions/cve_summarize.py
+++ b/src/vuln_analysis/functions/cve_summarize.py
@@ -40,13 +40,11 @@ class CVESummarizeToolConfig(FunctionBaseConfig, name="cve_summarize"):
@register_function(config_type=CVESummarizeToolConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN])
async def cve_summarize(config: CVESummarizeToolConfig, builder: Builder):
- from langchain_core.prompts import PromptTemplate
-
from vuln_analysis.data_models.state import AgentMorpheusEngineState
- from vuln_analysis.utils.prompting import SUMMARY_PROMPT
+ from vuln_analysis.utils.prompting import get_summary_chat_prompt
llm = await builder.get_llm(llm_name=config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
- prompt = PromptTemplate(input_variables=["response"], template=SUMMARY_PROMPT)
+ prompt = get_summary_chat_prompt()
chain = prompt | llm
async def summarize_cve(results):
diff --git a/src/vuln_analysis/utils/checklist_prompt_generator.py b/src/vuln_analysis/utils/checklist_prompt_generator.py
index 1822f3c5..e1babf49 100644
--- a/src/vuln_analysis/utils/checklist_prompt_generator.py
+++ b/src/vuln_analysis/utils/checklist_prompt_generator.py
@@ -20,7 +20,7 @@
from jinja2 import Template
from langchain_core.language_models.base import BaseLanguageModel
-from vuln_analysis.utils.prompting import MOD_FEW_SHOT
+from vuln_analysis.utils.prompting import CHECKLIST_USER_PROMPT
from vuln_analysis.utils.prompting import additional_intel_prompting
from vuln_analysis.utils.prompting import get_mod_examples
from vuln_analysis.utils.string_utils import attempt_fix_list_string
@@ -29,10 +29,10 @@
logger = LoggingFactory.get_agent_logger(__name__)
-# Format MOD_FEW_SHOT with examples, preserving {tool_descriptions} for Jinja2 rendering
+# Format CHECKLIST_USER_PROMPT with examples, preserving {tool_descriptions} for Jinja2 rendering
# Use double braces for tool_descriptions to escape it during format()
-_MOD_FEW_SHOT_ESCAPED = MOD_FEW_SHOT.replace('{tool_descriptions}', '{{tool_descriptions}}')
-DEFAULT_CHECKLIST_PROMPT = _MOD_FEW_SHOT_ESCAPED.format(examples=get_mod_examples())
+_CHECKLIST_USER_PROMPT_ESCAPED = CHECKLIST_USER_PROMPT.replace('{tool_descriptions}', '{{tool_descriptions}}')
+DEFAULT_CHECKLIST_PROMPT = _CHECKLIST_USER_PROMPT_ESCAPED.format(examples=get_mod_examples())
cve_prompt2 = """Parse the following numbered checklist into a python list in the format ["x", "y", "z"], a comma separated list surrounded by square braces: {{template}}"""
@@ -118,7 +118,9 @@ async def generate_checklist(prompt: str | None,
tool_names: list[str] | None = None,
enable_llm_list_parsing: bool = False) -> str:
- from vuln_analysis.utils.prompting import build_tool_descriptions
+ from langchain_core.prompts import ChatPromptTemplate
+ from vuln_analysis.utils.prompting import build_tool_descriptions, CHECKLIST_SYS_PROMPT
+ from vuln_analysis.data_models.output import ChecklistGenerationOutput
if not prompt:
prompt = DEFAULT_CHECKLIST_PROMPT
@@ -135,7 +137,6 @@ async def generate_checklist(prompt: str | None,
tool_descriptions = "Analysis tools will be used to investigate these questions."
# Add tool_descriptions to input_dict for Jinja2 rendering
- # This treats it as a Jinja2 variable, consistent with all CVE fields
input_dict_with_tools = {
**input_dict,
'tool_descriptions': tool_descriptions
@@ -162,15 +163,25 @@ async def generate_checklist(prompt: str | None,
# Jinja2 renders {tool_descriptions} along with all CVE fields
format_cve_intel = await format_jinja_prompt(cve_prompt1, input_dict_with_tools)
- gen_checklist = await llm.ainvoke(format_cve_intel)
+ # Bind structured output schema to LLM
+ structured_llm = llm.with_structured_output(ChecklistGenerationOutput)
- if enable_llm_list_parsing:
- parsing_checklist_template = await format_jinja_prompt(cve_prompt2, {"template": gen_checklist.content})
- parsed_checklist = await llm.ainvoke(parsing_checklist_template)
- return parsed_checklist.content
+ # Use ChatPromptTemplate with variable placeholder for rendered message
+ chat_prompt = ChatPromptTemplate.from_messages([
+ ("system", CHECKLIST_SYS_PROMPT),
+ ("user", "{rendered_message}")
+ ])
+
+ # Chain returns validated Pydantic object
+ chain = chat_prompt | structured_llm
+ # Pass the rendered message to the LLM as a dictionary
+ result = await chain.ainvoke({"rendered_message": format_cve_intel}) # type: ignore[assignment]
+
+ # Return the checklist items as JSON-formatted list string for backward compatibility
+ # This matches the expected format: '["item1", "item2", ...]'
+ import json
+ return json.dumps(result.checklist_items)
except Exception as e:
logging.error(f" Error in generating checklist : {e}")
raise
-
- return gen_checklist.content
diff --git a/src/vuln_analysis/utils/intel_source_score.py b/src/vuln_analysis/utils/intel_source_score.py
index 8a040a36..265fb8d0 100644
--- a/src/vuln_analysis/utils/intel_source_score.py
+++ b/src/vuln_analysis/utils/intel_source_score.py
@@ -18,7 +18,7 @@
from ..data_models.cve_intel import CveIntel
from ..functions.cve_calculate_intel_score import CVECalculateIntelScoreConfig
from ..utils import data_utils
-from ..utils.prompting import additional_intel_prompting
+from ..utils.prompting import additional_intel_prompting, INTEL_SCORING_USER_PROMPT
from aiq.builder.framework_enum import LLMFrameworkEnum
logger = logging.getLogger(__name__)
@@ -31,250 +31,39 @@ def __init__(self,
self._builder = builder
async def calculate_intel_score(self, intel: CveIntel) -> CveIntel:
+ from ..utils.prompting import get_intel_scoring_chat_prompt
+ from ..data_models.output import IntelScoringOutput
+
llm = await self._builder.get_llm(llm_name=self._config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
assert isinstance(llm, BaseLanguageModel)
- response = await llm.ainvoke(self.__get_calculate_score_prompt(intel))
+ # Bind structured output schema to LLM
+ structured_llm = llm.with_structured_output(IntelScoringOutput)
+
+ # Get ChatPromptTemplate
+ chat_prompt = get_intel_scoring_chat_prompt()
+
+ # Render CVE data into user message
+ cve_data = self.__render_template(additional_intel_prompting, intel)
+
+ # Chain returns validated Pydantic object
+ chain = chat_prompt | structured_llm
+ result = await chain.ainvoke({"cve_data": cve_data}) # type: ignore[assignment]
if os.environ.get("EXTENDED_VERBOSE_DEBUG", False):
- logger.debug("\nresponse: %s", str(response.content))
+ logger.debug("\nStructured output: %s", result.model_dump_json(indent=2))
- score = self.__extract_score(response.content)
+ # Calculate total score from validated criteria
+ score = result.calculate_total()
if os.environ.get("EXTENDED_VERBOSE_DEBUG", False):
- logger.debug("\nScore: %d", score)
+ logger.debug("\nCalculated total score: %d", score)
intel.intel_score = score
return intel
- def __get_calculate_score_prompt(self, intel: CveIntel) -> str:
- return """
-Evaluate CVE intelligence quality by scoring each criterion independently.
-
-
-
-Provide individual scores for each criterion based on the CVE data below.
-
-1. technical_specificity (max 20 points)
- - How precise and in-depth are the technical details?
- - Are specific vulnerable functions, methods, or code paths identified?
-
-2. clarity (max 10 points)
- - Is the text well-structured and grammatically correct?
- - Is the description clear and easy to understand?
-
-3. component_impact (max 15 points)
- - Does it clearly state what is affected?
- - Are the consequences explicitly described?
-
-4. reproducibility (max 15 points)
- - Could an attacker understand how to exploit this from the description?
- - Are attack vectors and preconditions described?
-
-5. vulnerable_function (max 15 points)
- - Is a specific function, method, or code snippet named?
- - Are vulnerable code locations identifiable?
-
-6. mitigation (max 10 points)
- - Are patches, workarounds, or mitigations described?
- - Is remediation guidance provided?
-
-7. environment (max 10 points)
- - Is there context about the affected environment (OS, version, configuration)?
- - Are deployment scenarios mentioned?
-
-8. configuration (max 5 points)
- - Are relevant configuration settings or misconfigurations described?
-
-
-
-Return JSON only (no markdown, no code blocks):
-{
- "scores": {
- "technical_specificity": <0-20>,
- "clarity": <0-10>,
- "component_impact": <0-15>,
- "reproducibility": <0-15>,
- "vulnerable_function": <0-15>,
- "mitigation": <0-10>,
- "environment": <0-10>,
- "configuration": <0-5>
- },
- "justifications": {
- "technical_specificity": "brief reason for score",
- "clarity": "brief reason for score",
- "component_impact": "brief reason for score",
- "reproducibility": "brief reason for score",
- "vulnerable_function": "brief reason for score",
- "mitigation": "brief reason for score",
- "environment": "brief reason for score",
- "configuration": "brief reason for score"
- }
-}
-
-Do NOT calculate or include a total_score. Only provide the individual criterion scores.
-
-
-
-
-Example Input:
-CVE ID: CVE-2025-30204
-CVE Description: golang-jwt is a Go implementation of JSON Web Tokens. Starting in version 3.2.0 and prior to versions 5.2.2 and 4.5.2, the function parse.ParseUnverified splits (via a call to strings.Split) its argument (which is untrusted data) on periods. As a result, in the face of a malicious request whose Authorization header consists of Bearer followed by many period characters, a call to that function incurs allocations to the tune of O(n) bytes (where n stands for the length of the function's argument), with a constant factor of about 16. This issue is fixed in 5.2.2 and 4.5.2.
-CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
-CWE Name: CWE-405: Asymmetric Resource Consumption (Amplification)
-Notable Vendors: Hashicorp
-
-Example Output (High Quality ~80):
-{
- "scores": {
- "technical_specificity": 18,
- "clarity": 9,
- "component_impact": 14,
- "reproducibility": 14,
- "vulnerable_function": 15,
- "mitigation": 9,
- "environment": 9,
- "configuration": 4
- },
- "justifications": {
- "technical_specificity": "Function parse.ParseUnverified identified with O(n) allocation details",
- "clarity": "Well-structured with clear impact",
- "component_impact": "golang-jwt and DoS impact explicitly stated",
- "reproducibility": "Clear attack: malicious Authorization header with periods",
- "vulnerable_function": "parse.ParseUnverified explicitly named",
- "mitigation": "Patches 5.2.2 and 4.5.2 specified",
- "environment": "Versions 3.2.0-5.2.2 listed",
- "configuration": "Limited config details"
- }
-}
-
-Example Input:
-CVE ID: CVE-2022-29810
-CVE Description: The Hashicorp go-getter library before 1.5.11 does not redact an SSH key from a URL query parameter.
-CVSS Vector: CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:N/A:N
-CWE Name: CWE-532: Insertion of Sensitive Information into Log File
-Notable Vendors: Hashicorp
-
-Example Output (Medium Quality ~62):
-{
- "scores": {
- "technical_specificity": 12,
- "clarity": 8,
- "component_impact": 12,
- "reproducibility": 10,
- "vulnerable_function": 8,
- "mitigation": 8,
- "environment": 8,
- "configuration": 3
- },
- "justifications": {
- "technical_specificity": "Moderate detail about SSH key issue",
- "clarity": "Clear but brief",
- "component_impact": "go-getter and credential exposure stated",
- "reproducibility": "Attack path somewhat clear, lacks details",
- "vulnerable_function": "General functionality, no specific function",
- "mitigation": "Version 1.5.11 patches",
- "environment": "Affected versions specified",
- "configuration": "Minimal context"
- }
-}
-
-Example Input:
-CVE ID: CVE-2022-2385
-CVE Description: A security issue was discovered in aws-iam-authenticator where an allow-listed IAM identity may be able to modify their username and escalate privileges.
-CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
-CWE Name: CWE-20: Improper Input Validation
-Notable Vendors: Kubernetes
-
-Example Output (Low Quality ~20):
-{
- "scores": {
- "technical_specificity": 3,
- "clarity": 5,
- "component_impact": 5,
- "reproducibility": 2,
- "vulnerable_function": 0,
- "mitigation": 3,
- "environment": 4,
- "configuration": 0
- },
- "justifications": {
- "technical_specificity": "Very vague, no detail",
- "clarity": "Brief but understandable",
- "component_impact": "General privilege escalation mention",
- "reproducibility": "No exploit details",
- "vulnerable_function": "No functions identified",
- "mitigation": "Patch mentioned",
- "environment": "Minimal version info",
- "configuration": "None"
- }
-}
-
-
-
-
-""" + self.__render_template(additional_intel_prompting, intel) + "\n\n\nProvide your scoring JSON:"
-
- def __extract_score(self, text: str) -> int:
- """
- Extract individual scores from LLM response and calculate total.
- This prevents LLM arithmetic hallucinations by:
- 1. Validating each score against its maximum
- 2. Calculating total in code (not LLM)
- """
- text = text.replace("```", "").replace("json", "").strip()
- if os.environ.get("EXTENDED_VERBOSE_DEBUG", False):
- logger.debug("\nRaw LLM response: %s", str(text))
-
- try:
- data = json.loads(text)
-
- # Extract individual scores
- scores = data.get('scores', {})
-
- # Define maximum values for each criterion
- max_values = {
- 'technical_specificity': 20,
- 'clarity': 10,
- 'component_impact': 15,
- 'reproducibility': 15,
- 'vulnerable_function': 15,
- 'mitigation': 10,
- 'environment': 10,
- 'configuration': 5
- }
-
- # Validate and cap each score at its maximum
- validated_scores = {}
- for criterion, max_val in max_values.items():
- score = scores.get(criterion, 0)
- # Ensure score is within valid range [0, max]
- validated_scores[criterion] = max(0, min(score, max_val))
-
- if score > max_val:
- logger.warning(
- "Score for '%s' (%d) exceeds maximum (%d), capping to max",
- criterion, score, max_val
- )
-
- # Calculate total using code (not LLM)
- total_score = sum(validated_scores.values())
-
- # Final safety check: total should not exceed 100
- total_score = min(total_score, 100)
-
- if os.environ.get("EXTENDED_VERBOSE_DEBUG", False):
- logger.debug("\nValidated scores: %s", validated_scores)
- logger.debug("\nCalculated total: %d", total_score)
-
- except (json.JSONDecodeError, KeyError, TypeError) as e:
- logger.error("Failed to parse scoring response: %s", e)
- logger.debug("Problematic text: %s", text)
- # Return a default low score on parse failure
- return 0
- return total_score
def __render_template(self, template_str: str, intel: CveIntel) -> str:
from jinja2 import Template
diff --git a/src/vuln_analysis/utils/justification_parser.py b/src/vuln_analysis/utils/justification_parser.py
index 78fe2602..d54caeac 100644
--- a/src/vuln_analysis/utils/justification_parser.py
+++ b/src/vuln_analysis/utils/justification_parser.py
@@ -25,86 +25,6 @@ class JustificationParser:
JUSTIFICATION_REASON_COL_NAME = "justification"
AFFECTED_STATUS_COL_NAME = "affected_status"
- JUSTIFICATION_PROMPT = dedent("""
-
-Review the CVE exploitability investigation summary and perform two tasks:
-1. Classify the exploitability status into one of 12 predefined categories
-2. Provide reasoning for your classification, citing specific evidence from the investigation summary
-
-
-
-Select the single most appropriate category based on the investigation findings.
-
-CLASSIFICATION CATEGORIES (in logical precedence order):
-
-1. false_positive - CVE-to-package association is incorrect (wrong package or mismatched CVE)
-
-2. code_not_present - Vulnerable code/library is absent from the container
- (If code is not present, subsequent factors are irrelevant)
-
-3. code_not_reachable - Vulnerable code exists but is never executed at runtime
- (Only applicable if code IS present but execution path analysis shows no calls)
-
-4. requires_configuration - Exploitation requires specific configuration that is disabled
- (Configuration prevents exploitation)
-
-5. requires_dependency - Exploitation requires a dependency that is missing
-
-6. requires_environment - Exploitation requires specific environment that is absent
-
-7. compiler_protected - Compiler flags prevent exploitation
-
-8. runtime_protected - Runtime mechanisms (ASLR, DEP, sandboxing) prevent exploitation
-
-9. perimeter_protected - Network/physical/logical perimeter defenses block exploitation
-
-10. mitigating_control_protected - Other security controls reduce risk
-
-11. uncertain - Insufficient information to determine exploitability
-
-12. vulnerable - Package is actually vulnerable and needs patching
-
-EXPLOITATION CONDITIONS:
-For a CVE to be classified as "vulnerable", ALL of these must be true:
-- Vulnerable code is PRESENT in the container
-- Vulnerable code is USED/CALLED by the application
-- Vulnerable code is REACHABLE from attack surfaces (user input, network, file processing)
-- No effective mitigations or protections are in place
-
-IF EXPLOITATION CONDITIONS ARE NOT MET:
-Select the PRIMARY reason for non-exploitability following the logical precedence
-order above. For example:
-- If code is not present: "code_not_present" (even if other factors would also prevent it)
-- If code is not reachable: "code_not_reachable" (not "requires_environment")
-- If missing dependency prevents it: "requires_dependency"
-
-The categories are ordered by logical precedence. Work through the list from top
-to bottom and select the first category that applies to the situation.
-
-Use "uncertain" only as a final fallback when the investigation truly lacks
-sufficient information to make any determination.
-
-
-
-Provide exactly two lines:
-
-Line 1: category_name (exact category name from the list above)
-Line 2: reasoning (brief explanation citing specific evidence from the summary)
-
-Do not include labels like "Category:" or "Reasoning:". Just the values on separate lines.
-
-
-
-code_not_reachable
-The vulnerable PIL.ImageMath.eval function exists in the installed Pillow library, but call chain analysis confirmed it is never invoked from application code. The application only uses PIL.Image.open() and PIL.Image.thumbnail() functions, which do not call ImageMath.eval.
-
-
-
-{summary}
-
-
-Provide your justification classification and reasoning on two separate lines:""").strip("\n")
-
RAW_TO_FINAL_JUSTIFICATION_MAP = {
"false_positive": "false_positive",
"code_not_present": "code_not_present",
diff --git a/src/vuln_analysis/utils/prompting.py b/src/vuln_analysis/utils/prompting.py
index ce36986c..078c7f8e 100644
--- a/src/vuln_analysis/utils/prompting.py
+++ b/src/vuln_analysis/utils/prompting.py
@@ -13,7 +13,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Some examples to be optionally passed along in the agent prompt.
+"""
+Prompts for the vulnerability analysis pipeline.
+
+This module contains all LLM prompts organized in the order of pipeline execution:
+1. Preprocessing Stage: Intel Scoring (ChatPromptTemplate)
+2. Core LLM Engine Stage:
+ - Checklist (ChatPromptTemplate)
+ - Agent (PromptTemplate - ReAct)
+ - CVSS (PromptTemplate - ReAct)
+ - Summary (ChatPromptTemplate)
+ - Justification (ChatPromptTemplate)
+"""
+
from abc import ABC
from abc import abstractmethod
from textwrap import dedent
@@ -21,6 +33,10 @@
# pylint: disable=line-too-long
+# =============================================================================
+# SHARED UTILITIES
+# =============================================================================
+
def build_tool_descriptions(tool_names: list[str]) -> list[str]:
"""
Build tool descriptions based on enabled tools.
@@ -81,43 +97,544 @@ def build_tool_descriptions(tool_names: list[str]) -> list[str]:
return descriptions
+class PromptBuilder(ABC):
+
+ @abstractmethod
+ def build_prompt(self) -> str:
+ pass
+
+class IfPromptBuilder(PromptBuilder):
-SUMMARY_PROMPT = """
-Summarize CVE exploitability investigation results into a clear, evidence-based
-paragraph. The investigation results consist of checklist items (questions) and
-their corresponding conclusions from the security analysis.
+ def __init__(self, prop_name: str, description: str) -> None:
+
+ self.prop_name = prop_name
+ self.description = description
+
+ def build_prompt(self) -> str:
+
+ f_string = dedent(f"""
+ {{% if {self.prop_name} %}}
+ - {self.description}{{{{{self.prop_name} | string() | truncate(1024)}}}}
+ {{% endif %}}
+ """).strip()
+
+ return f_string
+
+
+class IfElsePromptBuilder(PromptBuilder):
+
+ def __init__(self, first_prop_name: str, second_prop_name: str, description: str) -> None:
+
+ self.first_prop_name = first_prop_name
+ self.second_prop_name = second_prop_name
+ self.description = description
+
+ def build_prompt(self) -> str:
+
+ f_string = dedent(f"""
+ {{% if {self.first_prop_name} %}}
+ - {self.description}{{{{{self.first_prop_name} | string() | truncate(1024)}}}}
+ {{% elif {self.second_prop_name} %}}
+ - {self.description}{{{{{self.second_prop_name} | string() | truncate(1024)}}}}
+ {{% endif %}}
+ """).strip()
+
+ return f_string
+
+
+# Add additional intel fields for the checklist prompt here (field name, description)
+additional_intel_fields: list[PromptBuilder] = [
+ # Keep these at the top
+ IfPromptBuilder('cve_id', 'CVE ID: '),
+ IfElsePromptBuilder('nvd_cve_description', "ghsa_description", 'CVE Description: '),
+
+ # Sort these alphabetically
+ IfElsePromptBuilder('nvd_cvss_vector', "ghsa_cvss_vector_string", 'CVSS Vector: '),
+ IfElsePromptBuilder('nvd_cwe_name', "ghsa_cwes", 'CWE Name: '),
+ IfPromptBuilder('ghsa_vulnerabilities', 'GHSA Details: '),
+ IfPromptBuilder('nvd_configurations', 'Known Affected Software: '),
+ IfPromptBuilder('nvd_cwe_description', 'CWE Description: '),
+ IfPromptBuilder('nvd_cwe_extended_description', ''),
+ IfPromptBuilder('nvd_vendor_names', 'Notable Vulnerable Software Vendors: '),
+ IfPromptBuilder('rhsa_bugzilla_description', 'RHSA Description: '),
+ IfPromptBuilder('rhsa_details', 'RHSA Details: '),
+ IfPromptBuilder('rhsa_package_state', 'RHSA Affected Packages: '),
+ IfPromptBuilder('rhsa_statement', 'RHSA Statement: '),
+ # IfPromptBuilder('ubuntu_notices', 'Ubuntu Priority Reason: '), # Disabling for now since its very long
+ IfPromptBuilder('ubuntu_ubuntu_description', 'Ubuntu Security Note: '),
+ IfPromptBuilder('vulnerable_dependencies', 'Identified Vulnerable Dependencies: '),
+ IfPromptBuilder('plugin_data', 'Extra information: '),
+]
+
+additional_intel_prompting = '\n'.join([pb.build_prompt() for pb in additional_intel_fields])
+
+
+def get_mod_examples(type='questions', choices=[0, 1]):
+ if type == 'questions':
+ ex_list = [q for idx, q in enumerate(ex_questions) if idx in choices]
+ else:
+ ex_list = [s for idx, s in enumerate(ex_statements) if idx in choices]
+
+ examples = '\n'.join(q.format(idx=idx + 1) for idx, q in enumerate(ex_list))
+ return examples
+
+
+ex_questions = [
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-24329
+- CVE description: An issue in the urllib.parse component of Python before 3.11.4 allows attackers to bypass blocklisting methods by supplying a URL that starts with blank characters.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:N
+- CWE Name: CWE-20: Improper Input Validation (4.14)
+- CWE Description: The product receives input or data, but it does not validate or incorrectly validates that the input has the properties that are required to process the data safely and correctly.
+Input validation is a frequently-used technique for checking potentially dangerous inputs in order to ensure that the inputs are safe for processing within the code, or when communicating with other components. When software does not validate input properly, an attacker is able to craft the input in a form that is not expected by the rest of the application. This will lead to parts of the system receiving unintended input, which may result in altered control flow, arbitrary control of a resource, or arbitrary code execution. Input validation is not the only technique for processing input, however. Other techniques attempt to transform potentially-dangerous input into something safe, such as filtering (CWE-790) - which attempts to remove dangerous inputs - or encoding/escaping (CWE-116), which attempts to ensure that the input is not misinterpreted when it is included in output to another component. Other techniques exist as well (see CWE-138 for more examples.) Input validation can be applied to: raw data - strings, numbers, parameters, file contents, etc. metadata - information about the raw data, such as headers or size Data can be simple or structured. Structured data can be composed of many nested layers, composed of combinations of metadata and raw data, with other simple or structured data. Many properties of raw data or metadata may need to be validated upon entry into the code, such as: specified quantities such as size, length, frequency, price, rate, number of operations, time, etc. implied or derived quantities, such as the actual size of a file instead of a specified size indexes, offsets, or positions into more complex data structures symbolic keys or other elements into hash tables, associative arrays, etc. well-formedness, i.e. syntactic correctness - compliance with expected syntax lexical token correctness - compliance with rules for what is treated as a token specified or derived type - the actual type of the input (or what the input appears to be) consistency - between individual data elements, between raw data and metadata, between references, etc. conformance to domain-specific rules, e.g. business logic equivalence - ensuring that equivalent inputs are treated the same authenticity, ownership, or other attestations about the input, e.g. a cryptographic signature to prove the source of the data Implied or derived properties of data must often be calculated or inferred by the code itself. Errors in deriving properties may be considered a contributing factor to improper input validation. Note that "input validation" has very different meanings to different people, or within different classification schemes. Caution must be used when referencing this CWE entry or mapping to it. For example, some weaknesses might involve inadvertently giving control to an attacker over an input when they should not be able to provide an input at all, but sometimes this is referred to as input validation. Finally, it is important to emphasize that the distinctions between input validation and output escaping are often blurred, and developers must be careful to understand the difference, including how input validation is not always sufficient to prevent vulnerabilities, especially when less stringent data types must be supported, such as free-form text. Consider a SQL injection scenario in which a person's last name is inserted into a query. The name "O'Reilly" would likely pass the validation step since it is a common last name in the English language. However, this valid name cannot be directly inserted into the database because it contains the "'" apostrophe character, which would need to be escaped or otherwise transformed. In this case, removing the apostrophe might reduce the risk of SQL injection, but it would produce incorrect behavior because the wrong name would be recorded.
+- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Netapp', 'Python']
+
+Example {idx}: Checklist:
+[
+ "Review Python Usage: Is the Python installation actively used by applications within the container? Check for scripts or applications that rely on Python, particularly those that might parse URLs using `urllib.parse`.",
+ "Inspect URL Parsing Logic: Does the codebase contain any usage of `urllib.parse` or similar URL parsing mechanisms? Focus on how URLs are handled and whether there is a reliance on blocklisting methods to filter out potentially harmful URLs. This is crucial since the CVE describes a bypass of blocklisting methods via URLs starting with blank characters.",
+ "Evaluate Input Validation Practices: Assess the robustness of input validation practices within the application. Since the CVE involves improper input validation, are inputs, especially URLs, properly sanitized and validated against unexpected or malicious data?"
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-36632
+- CVE description: The legacy email.utils.parseaddr function in Python through 3.11.4 allows attackers to trigger "RecursionError: maximum recursion depth exceeded while calling a Python object" via a crafted argument. This argument is plausibly an untrusted value from an application's input data that was supposed to contain a name and an e-mail address. NOTE: email.utils.parseaddr is categorized as a Legacy API in the documentation of the Python email package. Applications should instead use the email.parser.BytesParser or email.parser.Parser class. NOTE: the vendor's perspective is that this is neither a vulnerability nor a bug. The email package is intended to have size limits and to throw an exception when limits are exceeded; they were exceeded by the example demonstration code.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+- CWE Name: CWE-674: Uncontrolled Recursion (4.14)
+- CWE Description: The product does not properly control the amount of recursion that takes place, consuming excessive resources, such as allocated memory or the program stack.
+- Notable Vulnerable Software Vendors: ['Python']
+
+Example {idx}: Checklist:
+[
+ "Identify Usage of `email.utils.parseaddr`: Review the application code within the container image to check if the `email.utils.parseaddr` function is being used. This function is the specific target of the vulnerability. Does the codebase contain the string 'email.utils.parseaddr'?",
+ "Assess Input Data Handling: Does the application using `email.utils.parseaddr` process potentially untrusted input data that could include crafted arguments designed to exploit this vulnerability? Evaluate how the application handles exceptions like `RecursionError`, as the exploit triggers this specific error.",
+ "Review Alternative Implementations: Has the application already migrated to recommended alternatives such as `email.parser.BytesParser` or `email.parser.Parser` in the code base? If not, recommend transitioning away from the legacy `email.utils.parseaddr` to these safer alternatives."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-50447
+- CVE description: Pillow through 10.1.0 allows PIL.ImageMath.eval Arbitrary Code Execution via the environment parameter, a different vulnerability than CVE-2022-22817 (which was about the expression parameter).
+- CVSS Vector: CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H
+- CWE Name: CWE-94: Improper Control of Generation of Code ('Code Injection') (4.14)
+- CWE Description: The product constructs all or part of a code segment using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the syntax or behavior of the intended code segment.
+When a product allows a user's input to contain code syntax, it might be possible for an attacker to craft the code in such a way that it will alter the intended control flow of the product. Such an alteration could lead to arbitrary code execution. Injection problems encompass a wide variety of issues -- all mitigated in very different ways. For this reason, the most effective way to discuss these weaknesses is to note the distinct features which classify them as injection weaknesses. The most important issue to note is that all injection problems share one thing in common -- i.e., they allow for the injection of control plane data into the user-controlled data plane. This means that the execution of the process may be altered by sending code in through legitimate data channels, using no other mechanism. While buffer overflows, and many other flaws, involve the use of some further issue to gain execution, injection problems need only for the data to be parsed. The most classic instantiations of this category of weakness are SQL injection and format string vulnerabilities.
+- Notable Vulnerable Software Vendors: ['Debian', 'Python']
+
+Example {idx}: Checklist:
+[
+ "Evaluate Use of PIL.ImageMath.eval: Does any application within the container use the PIL.ImageMath.eval function with user-supplied input in the environment parameter? This is the method and parameter where the vulnerability exists.",
+ "Assess Data Input Sources: Do the applications using Pillow receive input directly from untrusted sources (e.g., user uploads, external APIs)? This will help in understanding the risk exposure and potential for exploitation.",
+ "Security Controls and Sanitization: Review the application's input validation and sanitization measures. Since the vulnerability allows for arbitrary code execution via code injection, is the input properly sanitized before being processed?"
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-5363
+- CVE description: Issue summary: A bug has been identified in the processing of key and initialisation vector (IV) lengths. This can lead to potential truncation or overruns during the initialisation of some symmetric ciphers. Impact summary: A truncation in the IV can result in non-uniqueness, which could result in loss of confidentiality for some cipher modes. When calling EVP_EncryptInit_ex2(), EVP_DecryptInit_ex2() or EVP_CipherInit_ex2() the provided OSSL_PARAM array is processed after the key and IV have been established. Any alterations to the key length, via the "keylen" parameter or the IV length, via the "ivlen" parameter, within the OSSL_PARAM array will not take effect as intended, potentially causing truncation or overreading of these values. The following ciphers and cipher modes are impacted: RC2, RC4, RC5, CCM, GCM and OCB. For the CCM, GCM and OCB cipher modes, truncation of the IV can result in loss of confidentiality. For example, when following NIST's SP 800-38D section 8.2.1 guidance for constructing a deterministic IV for AES in GCM mode, truncation of the counter portion could lead to IV reuse. Both truncations and overruns of the key and overruns of the IV will produce incorrect results and could, in some cases, trigger a memory exception. However, these issues are not currently assessed as security critical.
+Changing the key and/or IV lengths is not considered to be a common operation and the vulnerable API was recently introduced. Furthermore it is likely that application developers will have spotted this problem during testing since decryption would fail unless both peers in the communication were similarly vulnerable. For these reasons we expect the probability of an application being vulnerable to this to be quite low. However if an application is vulnerable then this issue is considered very serious. For these reasons we have assessed this issue as Moderate severity overall.
+The OpenSSL SSL/TLS implementation is not affected by this issue. The OpenSSL 3.0 and 3.1 FIPS providers are not affected by this because the issue lies outside of the FIPS provider boundary. OpenSSL 3.1 and 3.0 are vulnerable to this issue.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N
+- Notable Vulnerable Software Vendors: ['Debian', 'Netapp', 'Openssl']
+
+Example {idx}: Checklist:
+[
+ "Check OpenSSL Version: What version of OpenSSL is running in the container image? The vulnerability specifically affects OpenSSL versions 3.0 and 3.1. Is the version running in the container within the vulnerability range? If the container is running these versions, it may be vulnerable.",
+ "Identify Affected Cipher Modes: Does the application within the container image use any of the affected cipher modes: RC2, RC4, RC5, CCM, GCM, or OCB? Special attention should be given to applications using CCM, GCM, and OCB modes as these are particularly noted for potential loss of confidentiality due to IV truncation.",
+ "Review Cryptographic Operations: Does the code or configuration of applications using OpenSSL have any instances where `EVP_EncryptInit_ex2()`, `EVP_DecryptInit_ex2()`, or `EVP_CipherInit_ex2()` are called? Are there any modifications to the `keylen` or `ivlen` parameters after initialization which might not be taking effect as intended?",
+ "Check for Custom Cryptographic Implementations: Since changing the key and/or IV lengths is not a common operation and the issue is in a recently introduced API, it's crucial to identify if any custom cryptographic implementations might be performing such operations. This is less likely but should be checked especially in bespoke or highly customized applications. Are there any custom cryptographic implemenations changing the key and/or IV lengths?"
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2024-2961
+- CVE description: The iconv() function in the GNU C Library versions 2.39 and older may overflow the output buffer passed to it by up to 4 bytes when converting strings to the ISO-2022-CN-EXT character set, which may be used to crash an application or overwrite a neighbouring variable.
+- CWE Name: CWE-787: Out-of-bounds Write (4.14)
+- CWE Description: The product writes data past the end, or before the beginning, of the intended buffer.
+Typically, this can result in corruption of data, a crash, or code execution. The product may modify an index or perform pointer arithmetic that references a memory location that is outside of the boundaries of the buffer. A subsequent write operation then produces undefined or unexpected results.
+- Notable Vulnerable Software Vendors: ['GNU']
+
+Example {idx}: Checklist:
+[
+ "Identify Usage of `iconv()` Function: Review the application code or dependencies. Is the `iconv()` function used? Look particularly for conversions involving the ISO-2022-CN-EXT character set. This function is the specific target of the vulnerability.",
+ "Assess Data Handling and Boundary Conditions: Since the vulnerability involves an out-of-bounds write, it's crucial to analyze how data boundaries are handled in the code. Are there any custom implementations or patches that might mitigate boundary issues around buffer sizes?",
+ "Review Application's Character Encoding Needs: Does the application specifically need to handle the ISO-2022-CN-EXT character set? If not, consider disabling this character set or using alternative safe functions or libraries for character set conversions.",
+ "Evaluate Network Exposure and Attack Surface: Are the affected services exposed to the network? If so, this could increase the risk of exploitation. Additionally, if the application using the `iconv()` function is accessible externally, the risk is higher."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: GHSA-8ghj-p4vj-mr35
+- CVE description: An issue was discovered in Pillow before 10.0.0. It is a Denial of Service that uncontrollably allocates memory to process a given task, potentially causing a service to crash by having it run out of memory. This occurs for truetype in ImageFont when textlength in an ImageDraw instance operates on a long text argument.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+- CWE Name: CWE-770: Allocation of Resources Without Limits or Throttling (4.14)
+- CWE Description: The product allocates a reusable resource or group of resources on behalf of an actor without imposing any restrictions on the size or number of resources that can be allocated, in violation of the intended security policy for that actor.
+- Code frequently has to work with limited resources, so programmers must be careful to ensure that resources are not consumed too quickly, or too easily. Without use of quotas, resource limits, or other protection mechanisms, it can be easy for an attacker to consume many resources by rapidly making many requests, or causing larger resources to be used than is needed. When too many resources are allocated, or if a single resource is too large, then it can prevent the code from working correctly, possibly leading to a denial of service.
+- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Python']
+- GHSA Summary: Pillow Denial of Service vulnerability
+- GHSA Details: [<'first_patched_version': '10.0.0', 'package': <'ecosystem': 'pip', 'name': 'pillow'>, 'vulnerable_functions': ['PIL.ImageFont'], 'vulnerable_version_range': '>= 0, < 10.0.0'>]
+- GHSA CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+
+Example {idx}: Checklist:
+[
+ "Assess Usage of Vulnerable Functions: Specifically, the vulnerability is related to the `PIL.ImageFont` module when processing long text arguments. Does the application code or dependencies use this module and functionality? If your applications use this module to process user-supplied or uncontrolled text inputs, they are likely at risk.",
+ "Evaluate Resource Limits: The vulnerability leads to a denial of service through memory exhaustion. Are there any resource limits set at the container level (e.g., using Docker or Kubernetes settings) that might mitigate the impact of such an attack? Consider setting or reviewing memory limits to prevent a single container from consuming all available system resources."
+]""",
+]
+
+ex_statements = [
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-24329
+- CVE description: An issue in the urllib.parse component of Python before 3.11.4 allows attackers to bypass blocklisting methods by supplying a URL that starts with blank characters.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:N
+- CWE Name: CWE-20: Improper Input Validation (4.14)
+- CWE Description: The product receives input or data, but it does not validate or incorrectly validates that the input has the properties that are required to process the data safely and correctly.
+Input validation is a frequently-used technique for checking potentially dangerous inputs in order to ensure that the inputs are safe for processing within the code, or when communicating with other components. When software does not validate input properly, an attacker is able to craft the input in a form that is not expected by the rest of the application. This will lead to parts of the system receiving unintended input, which may result in altered control flow, arbitrary control of a resource, or arbitrary code execution. Input validation is not the only technique for processing input, however. Other techniques attempt to transform potentially-dangerous input into something safe, such as filtering (CWE-790) - which attempts to remove dangerous inputs - or encoding/escaping (CWE-116), which attempts to ensure that the input is not misinterpreted when it is included in output to another component. Other techniques exist as well (see CWE-138 for more examples.) Input validation can be applied to: raw data - strings, numbers, parameters, file contents, etc. metadata - information about the raw data, such as headers or size Data can be simple or structured. Structured data can be composed of many nested layers, composed of combinations of metadata and raw data, with other simple or structured data. Many properties of raw data or metadata may need to be validated upon entry into the code, such as: specified quantities such as size, length, frequency, price, rate, number of operations, time, etc. implied or derived quantities, such as the actual size of a file instead of a specified size indexes, offsets, or positions into more complex data structures symbolic keys or other elements into hash tables, associative arrays, etc. well-formedness, i.e. syntactic correctness - compliance with expected syntax lexical token correctness - compliance with rules for what is treated as a token specified or derived type - the actual type of the input (or what the input appears to be) consistency - between individual data elements, between raw data and metadata, between references, etc. conformance to domain-specific rules, e.g. business logic equivalence - ensuring that equivalent inputs are treated the same authenticity, ownership, or other attestations about the input, e.g. a cryptographic signature to prove the source of the data Implied or derived properties of data must often be calculated or inferred by the code itself. Errors in deriving properties may be considered a contributing factor to improper input validation. Note that "input validation" has very different meanings to different people, or within different classification schemes. Caution must be used when referencing this CWE entry or mapping to it. For example, some weaknesses might involve inadvertently giving control to an attacker over an input when they should not be able to provide an input at all, but sometimes this is referred to as input validation. Finally, it is important to emphasize that the distinctions between input validation and output escaping are often blurred, and developers must be careful to understand the difference, including how input validation is not always sufficient to prevent vulnerabilities, especially when less stringent data types must be supported, such as free-form text. Consider a SQL injection scenario in which a person's last name is inserted into a query. The name "O'Reilly" would likely pass the validation step since it is a common last name in the English language. However, this valid name cannot be directly inserted into the database because it contains the "'" apostrophe character, which would need to be escaped or otherwise transformed. In this case, removing the apostrophe might reduce the risk of SQL injection, but it would produce incorrect behavior because the wrong name would be recorded.
+- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Netapp', 'Python']
+
+Example {idx}: Checklist:
+[
+ "Check Python Version: Determine the version of Python installed in the container image. The vulnerability affects Python versions before 3.11.4. Use commands like `python --version` or `python3 --version` to check the installed version. If the version is below 3.11.4, the container may be vulnerable.",
+ "Review Python Usage: Identify if the Python installation is actively used by applications within the container. Check for scripts or applications that rely on Python, particularly those that might parse URLs using `urllib.parse`.",
+ "Inspect URL Parsing Logic: Examine the codebase for any usage of `urllib.parse` or similar URL parsing mechanisms. Focus on how URLs are handled and whether there is a reliance on blocklisting methods to filter out potentially harmful URLs. This is crucial since the CVE describes a bypass of blocklisting methods via URLs starting with blank characters.",
+ "Evaluate Input Validation Practices: Assess the robustness of input validation practices within the application. Since the CVE involves improper input validation, ensure that inputs, especially URLs, are properly sanitized and validated against unexpected or malicious data."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-36632
+- CVE description: The legacy email.utils.parseaddr function in Python through 3.11.4 allows attackers to trigger "RecursionError: maximum recursion depth exceeded while calling a Python object" via a crafted argument. This argument is plausibly an untrusted value from an application's input data that was supposed to contain a name and an e-mail address. NOTE: email.utils.parseaddr is categorized as a Legacy API in the documentation of the Python email package. Applications should instead use the email.parser.BytesParser or email.parser.Parser class. NOTE: the vendor's perspective is that this is neither a vulnerability nor a bug. The email package is intended to have size limits and to throw an exception when limits are exceeded; they were exceeded by the example demonstration code.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+- CWE Name: CWE-674: Uncontrolled Recursion (4.14)
+- CWE Description: The product does not properly control the amount of recursion that takes place, consuming excessive resources, such as allocated memory or the program stack.
+- Notable Vulnerable Software Vendors: ['Python']
+
+Example {idx}: Checklist:
+[
+ "Identify Usage of `email.utils.parseaddr`: Review the application code within the container image to check if the `email.utils.parseaddr` function is being used. This function is the specific target of the vulnerability. Consider searching the codebase for the string 'email.utils.parseaddr' to find direct usages.",
+ "Assess Input Data Handling: Determine if the application using `email.utils.parseaddr` processes potentially untrusted input data that could include crafted arguments designed to exploit this vulnerability. Evaluate how the application handles exceptions like `RecursionError`, as the exploit triggers this specific error.",
+ "Review Alternative Implementations: Check if the application has already migrated to recommended alternatives such as `email.parser.BytesParser` or `email.parser.Parser`. If not, recommend transitioning away from the legacy `email.utils.parseaddr` to these safer alternatives."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-50447
+- CVE description: Pillow through 10.1.0 allows PIL.ImageMath.eval Arbitrary Code Execution via the environment parameter, a different vulnerability than CVE-2022-22817 (which was about the expression parameter).
+- CVSS Vector: CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H
+- CWE Name: CWE-94: Improper Control of Generation of Code ('Code Injection') (4.14)
+- CWE Description: The product constructs all or part of a code segment using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the syntax or behavior of the intended code segment.
+When a product allows a user's input to contain code syntax, it might be possible for an attacker to craft the code in such a way that it will alter the intended control flow of the product. Such an alteration could lead to arbitrary code execution. Injection problems encompass a wide variety of issues -- all mitigated in very different ways. For this reason, the most effective way to discuss these weaknesses is to note the distinct features which classify them as injection weaknesses. The most important issue to note is that all injection problems share one thing in common -- i.e., they allow for the injection of control plane data into the user-controlled data plane. This means that the execution of the process may be altered by sending code in through legitimate data channels, using no other mechanism. While buffer overflows, and many other flaws, involve the use of some further issue to gain execution, injection problems need only for the data to be parsed. The most classic instantiations of this category of weakness are SQL injection and format string vulnerabilities.
+- Notable Vulnerable Software Vendors: ['Debian', 'Python']
+
+Example {idx}: Checklist:
+[
+ "Evaluate Use of PIL.ImageMath.eval: Specifically investigate whether any application within the container uses the PIL.ImageMath.eval function with user-supplied input in the environment parameter. This is the method and parameter where the vulnerability exists.",
+ "Assess Data Input Sources: Determine if the applications using Pillow are receiving input directly from untrusted sources (e.g., user uploads, external APIs). This will help in understanding the risk exposure and potential for exploitation.",
+ "Security Controls and Sanitization: Review the application's input validation and sanitization measures. Since the vulnerability allows for arbitrary code execution via code injection, ensuring that input is properly sanitized before being processed can mitigate potential exploitation."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2023-5363
+- CVE description: Issue summary: A bug has been identified in the processing of key and initialisation vector (IV) lengths. This can lead to potential truncation or overruns during the initialisation of some symmetric ciphers. Impact summary: A truncation in the IV can result in non-uniqueness, which could result in loss of confidentiality for some cipher modes. When calling EVP_EncryptInit_ex2(), EVP_DecryptInit_ex2() or EVP_CipherInit_ex2() the provided OSSL_PARAM array is processed after the key and IV have been established. Any alterations to the key length, via the "keylen" parameter or the IV length, via the "ivlen" parameter, within the OSSL_PARAM array will not take effect as intended, potentially causing truncation or overreading of these values. The following ciphers and cipher modes are impacted: RC2, RC4, RC5, CCM, GCM and OCB. For the CCM, GCM and OCB cipher modes, truncation of the IV can result in loss of confidentiality. For example, when following NIST's SP 800-38D section 8.2.1 guidance for constructing a deterministic IV for AES in GCM mode, truncation of the counter portion could lead to IV reuse. Both truncations and overruns of the key and overruns of the IV will produce incorrect results and could, in some cases, trigger a memory exception. However, these issues are not currently assessed as security critical.
+Changing the key and/or IV lengths is not considered to be a common operation and the vulnerable API was recently introduced. Furthermore it is likely that application developers will have spotted this problem during testing since decryption would fail unless both peers in the communication were similarly vulnerable. For these reasons we expect the probability of an application being vulnerable to this to be quite low. However if an application is vulnerable then this issue is considered very serious. For these reasons we have assessed this issue as Moderate severity overall.
+The OpenSSL SSL/TLS implementation is not affected by this issue. The OpenSSL 3.0 and 3.1 FIPS providers are not affected by this because the issue lies outside of the FIPS provider boundary. OpenSSL 3.1 and 3.0 are vulnerable to this issue.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N
+- Notable Vulnerable Software Vendors: ['Debian', 'Netapp', 'Openssl']
+
+Example {idx}: Checklist:
+[
+ "Identify Affected Cipher Modes: Determine if the application within the container image uses any of the affected cipher modes: RC2, RC4, RC5, CCM, GCM, or OCB. Special attention should be given to applications using CCM, GCM, and OCB modes as these are particularly noted for potential loss of confidentiality due to IV truncation.",
+ "Review Cryptographic Operations: Examine the code or configuration of applications using OpenSSL for any instances where `EVP_EncryptInit_ex2()`, `EVP_DecryptInit_ex2()`, or `EVP_CipherInit_ex2()` are called. Check if there are any modifications to the `keylen` or `ivlen` parameters after initialization which might not be taking effect as intended.",
+ "Check for Custom Cryptographic Implementations: Since changing the key and/or IV lengths is not a common operation and the issue is in a recently introduced API, it's crucial to identify if any custom cryptographic implementations might be performing such operations. This is less likely but should be checked especially in bespoke or highly customized applications."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: CVE-2024-2961
+- CVE description: The iconv() function in the GNU C Library versions 2.39 and older may overflow the output buffer passed to it by up to 4 bytes when converting strings to the ISO-2022-CN-EXT character set, which may be used to crash an application or overwrite a neighbouring variable.
+- CWE Name: CWE-787: Out-of-bounds Write (4.14)
+- CWE Description: The product writes data past the end, or before the beginning, of the intended buffer.
+Typically, this can result in corruption of data, a crash, or code execution. The product may modify an index or perform pointer arithmetic that references a memory location that is outside of the boundaries of the buffer. A subsequent write operation then produces undefined or unexpected results.
+- Notable Vulnerable Software Vendors: ['GNU']
+
+Example {idx}: Checklist:
+[
+ "Identify Usage of `iconv()` Function: Review the application code or dependencies to check if the `iconv()` function is used, particularly for conversions involving the ISO-2022-CN-EXT character set. This function is the specific target of the vulnerability.",
+ "Assess Data Handling and Boundary Conditions: Since the vulnerability involves an out-of-bounds write, it's crucial to analyze how data boundaries are handled in the code. Look for any custom implementations or patches that might mitigate boundary issues around buffer sizes.",
+ "Review Application's Character Encoding Needs: Determine if the application specifically needs to handle the ISO-2022-CN-EXT character set. If not, consider disabling this character set or using alternative safe functions or libraries for character set conversions.",
+ "Evaluate Network Exposure and Attack Surface: Consider whether the affected services are exposed to the network, which could increase the risk of exploitation. If the application using the `iconv()` function is accessible externally, the risk is higher."
+]""",
+
+ """Example {idx}: CVE Details:
+- CVE ID: GHSA-8ghj-p4vj-mr35
+- CVE description: An issue was discovered in Pillow before 10.0.0. It is a Denial of Service that uncontrollably allocates memory to process a given task, potentially causing a service to crash by having it run out of memory. This occurs for truetype in ImageFont when textlength in an ImageDraw instance operates on a long text argument.
+- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+- CWE Name: CWE-770: Allocation of Resources Without Limits or Throttling (4.14)
+- CWE Description: The product allocates a reusable resource or group of resources on behalf of an actor without imposing any restrictions on the size or number of resources that can be allocated, in violation of the intended security policy for that actor.
+- Code frequently has to work with limited resources, so programmers must be careful to ensure that resources are not consumed too quickly, or too easily. Without use of quotas, resource limits, or other protection mechanisms, it can be easy for an attacker to consume many resources by rapidly making many requests, or causing larger resources to be used than is needed. When too many resources are allocated, or if a single resource is too large, then it can prevent the code from working correctly, possibly leading to a denial of service.
+- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Python']
+- GHSA Summary: Pillow Denial of Service vulnerability
+- GHSA Details: [<'first_patched_version': '10.0.0', 'package': <'ecosystem': 'pip', 'name': 'pillow'>, 'vulnerable_functions': ['PIL.ImageFont'], 'vulnerable_version_range': '>= 0, < 10.0.0'>]
+- GHSA CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+
+Example {idx}: Checklist:
+[
+ "Assess Usage of Vulnerable Functions: Specifically, the vulnerability is related to the `PIL.ImageFont` module when processing long text arguments. Review the application code or dependencies to see if this module and functionality are used. If your applications use this module to process user-supplied or uncontrolled text inputs, they are likely at risk.",
+ "Evaluate Resource Limits: Since the vulnerability leads to a denial of service through memory exhaustion, check if there are any resource limits set at the container level (e.g., using Docker or Kubernetes settings) that might mitigate the impact of such an attack. Consider setting or reviewing memory limits to prevent a single container from consuming all available system resources."
+]""",
+]
+
+
+# =============================================================================
+# PREPROCESSING STAGE: INTEL SCORING (ChatPromptTemplate)
+# =============================================================================
+
+INTEL_SCORING_SYS_PROMPT = "/no_think\n\nYou are evaluating CVE intelligence quality by scoring criteria."
+
+INTEL_SCORING_USER_PROMPT = """
+Evaluate CVE intelligence quality by scoring each criterion independently.
+
+
+
+Provide individual scores for each criterion based on the CVE data below.
+
+1. technical_specificity (max 20 points)
+ - How precise and in-depth are the technical details?
+ - Are specific vulnerable functions, methods, or code paths identified?
+
+2. clarity (max 10 points)
+ - Is the text well-structured and grammatically correct?
+ - Is the description clear and easy to understand?
+
+3. component_impact (max 15 points)
+ - Does it clearly state what is affected?
+ - Are the consequences explicitly described?
+
+4. reproducibility (max 15 points)
+ - Could an attacker understand how to exploit this from the description?
+ - Are attack vectors and preconditions described?
+
+5. vulnerable_function (max 15 points)
+ - Is a specific function, method, or code snippet named?
+ - Are vulnerable code locations identifiable?
+
+6. mitigation (max 10 points)
+ - Are patches, workarounds, or mitigations described?
+ - Is remediation guidance provided?
+
+7. environment (max 10 points)
+ - Is there context about the affected environment (OS, version, configuration)?
+ - Are deployment scenarios mentioned?
+
+8. configuration (max 5 points)
+ - Are relevant configuration settings or misconfigurations described?
+
+
+
+Return JSON only (no markdown, no code blocks):
+{{
+ "scores": {{
+ "technical_specificity": <0-20>,
+ "clarity": <0-10>,
+ "component_impact": <0-15>,
+ "reproducibility": <0-15>,
+ "vulnerable_function": <0-15>,
+ "mitigation": <0-10>,
+ "environment": <0-10>,
+ "configuration": <0-5>
+ }},
+ "justifications": {{
+ "technical_specificity": "brief reason for score",
+ "clarity": "brief reason for score",
+ "component_impact": "brief reason for score",
+ "reproducibility": "brief reason for score",
+ "vulnerable_function": "brief reason for score",
+ "mitigation": "brief reason for score",
+ "environment": "brief reason for score",
+ "configuration": "brief reason for score"
+ }}
+}}
+
+Do NOT calculate or include a total_score. Only provide the individual criterion scores.
+
+
+
+
+Example Input:
+CVE ID: CVE-2025-30204
+CVE Description: golang-jwt is a Go implementation of JSON Web Tokens. Starting in version 3.2.0 and prior to versions 5.2.2 and 4.5.2, the function parse.ParseUnverified splits (via a call to strings.Split) its argument (which is untrusted data) on periods. As a result, in the face of a malicious request whose Authorization header consists of Bearer followed by many period characters, a call to that function incurs allocations to the tune of O(n) bytes (where n stands for the length of the function's argument), with a constant factor of about 16. This issue is fixed in 5.2.2 and 4.5.2.
+CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
+CWE Name: CWE-405: Asymmetric Resource Consumption (Amplification)
+Notable Vendors: Hashicorp
+
+Example Output (High Quality ~80):
+{{
+ "scores": {{
+ "technical_specificity": 18,
+ "clarity": 9,
+ "component_impact": 14,
+ "reproducibility": 14,
+ "vulnerable_function": 15,
+ "mitigation": 9,
+ "environment": 9,
+ "configuration": 4
+ }},
+ "justifications": {{
+ "technical_specificity": "Function parse.ParseUnverified identified with O(n) allocation details",
+ "clarity": "Well-structured with clear impact",
+ "component_impact": "golang-jwt and DoS impact explicitly stated",
+ "reproducibility": "Clear attack: malicious Authorization header with periods",
+ "vulnerable_function": "parse.ParseUnverified explicitly named",
+ "mitigation": "Patches 5.2.2 and 4.5.2 specified",
+ "environment": "Versions 3.2.0-5.2.2 listed",
+ "configuration": "Limited config details"
+ }}
+}}
+
+Example Input:
+CVE ID: CVE-2022-29810
+CVE Description: The Hashicorp go-getter library before 1.5.11 does not redact an SSH key from a URL query parameter.
+CVSS Vector: CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:N/A:N
+CWE Name: CWE-532: Insertion of Sensitive Information into Log File
+Notable Vendors: Hashicorp
+
+Example Output (Medium Quality ~62):
+{{
+ "scores": {{
+ "technical_specificity": 12,
+ "clarity": 8,
+ "component_impact": 12,
+ "reproducibility": 10,
+ "vulnerable_function": 8,
+ "mitigation": 8,
+ "environment": 8,
+ "configuration": 3
+ }},
+ "justifications": {{
+ "technical_specificity": "Moderate detail about SSH key issue",
+ "clarity": "Clear but brief",
+ "component_impact": "go-getter and credential exposure stated",
+ "reproducibility": "Attack path somewhat clear, lacks details",
+ "vulnerable_function": "General functionality, no specific function",
+ "mitigation": "Version 1.5.11 patches",
+ "environment": "Affected versions specified",
+ "configuration": "Minimal context"
+ }}
+}}
+
+Example Input:
+CVE ID: CVE-2022-2385
+CVE Description: A security issue was discovered in aws-iam-authenticator where an allow-listed IAM identity may be able to modify their username and escalate privileges.
+CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
+CWE Name: CWE-20: Improper Input Validation
+Notable Vendors: Kubernetes
+
+Example Output (Low Quality ~20):
+{{
+ "scores": {{
+ "technical_specificity": 3,
+ "clarity": 5,
+ "component_impact": 5,
+ "reproducibility": 2,
+ "vulnerable_function": 0,
+ "mitigation": 3,
+ "environment": 4,
+ "configuration": 0
+ }},
+ "justifications": {{
+ "technical_specificity": "Very vague, no detail",
+ "clarity": "Brief but understandable",
+ "component_impact": "General privilege escalation mention",
+ "reproducibility": "No exploit details",
+ "vulnerable_function": "No functions identified",
+ "mitigation": "Patch mentioned",
+ "environment": "Minimal version info",
+ "configuration": "None"
+ }}
+}}
+
+
+
+
+{cve_data}
+
+
+Provide your scoring JSON:"""
+
+
+def get_intel_scoring_chat_prompt():
+ """
+ Returns ChatPromptTemplate for intel scoring with /no_think.
+
+ Returns:
+ ChatPromptTemplate with system prompt (includes /no_think) and user prompt
+ """
+ from langchain_core.prompts import ChatPromptTemplate
+ return ChatPromptTemplate.from_messages([
+ ("system", INTEL_SCORING_SYS_PROMPT),
+ ("user", INTEL_SCORING_USER_PROMPT)
+ ])
+
+
+# =============================================================================
+# CORE LLM ENGINE STAGE: CHECKLIST GENERATION (ChatPromptTemplate)
+# =============================================================================
+
+CHECKLIST_SYS_PROMPT = "/no_think\n\nYou generate investigation checklists for CVE exploitability assessment."
+
+CHECKLIST_USER_PROMPT = """
+Generate an investigation checklist for assessing CVE exploitability in a
+containerized environment. Your output must be a comma-separated list enclosed
+in square brackets, with each item enclosed in quotes.
-Write a 3-5 sentence paragraph following this structure:
+Create 3-5 checklist items that meet these requirements:
-1. VERDICT (sentence 1): Begin with explicit statement
- - "The CVE is exploitable" / "The CVE is not exploitable" / "Exploitability is uncertain"
+1. STRUCTURE: Each item must be a clear, answerable question
+ - Start with interrogative words: Is/Does/Are/Can/Has/etc.
+ - Be specific and actionable
+ - Include relevant context from the CVE
-2. EVIDENCE (sentences 2-4): Support with specific findings
- - Cite concrete results: functions found/absent, reachability status, configuration states
- - Use technical details: function names, file paths, components
- - Connect findings to exploitability conditions
+2. CONTENT PRIORITIES:
+ - If the CVE mentions a specific vulnerable function or method, the FIRST
+ checklist item must verify whether that function is called or imported
+ in the codebase
+ - Focus on exploitability factors (version presence is already confirmed)
+ - Include specific technical names from the CVE (functions, libraries,
+ configurations, cipher modes, etc.)
+ - Consider the attack vector (network exposure, user input, file processing, etc.)
+ - Address relevant security controls or mitigations
-3. FOCUS: Use only definitive checklist results; ignore inconclusive items
+3. INVESTIGATION TOOLS AVAILABLE:
+ {tool_descriptions}
+
+ Design questions that can be answered using these analysis capabilities.
+
+4. COMPLETENESS:
+ - Cover the vulnerability chain: presence → usage → exploitability
+ - Each item should independently contribute to understanding exploit risk
-
-The CVE is not exploitable in this container. Investigation confirmed that while
-Python 3.10.0 is installed (vulnerable version), the urllib.parse module is never
-imported or called in the application codebase (verified via code search). Additionally,
-code analysis revealed that the application does not accept URL inputs from untrusted
-sources; all URL handling occurs only with internally generated URLs from configuration
-files. The combination of no urllib.parse usage and lack of external URL input eliminates
-the attack vector described in the CVE.
-
+
+{examples}
+
-
-{response}
-
+
+"""
+
+
+def get_checklist_chat_prompt():
+ """
+ Returns ChatPromptTemplate for checklist generation with /no_think.
+
+ Returns:
+ ChatPromptTemplate with system prompt (includes /no_think) and user prompt
+ """
+ from langchain_core.prompts import ChatPromptTemplate
+ return ChatPromptTemplate.from_messages([
+ ("system", CHECKLIST_SYS_PROMPT),
+ ("user", CHECKLIST_USER_PROMPT)
+ ])
-Write your summary paragraph:"""
+
+# =============================================================================
+# CORE LLM ENGINE STAGE: TASK AGENT INVESTIGATION (PromptTemplate)
+# =============================================================================
AGENT_SYS_PROMPT = (
"You are an expert security analyst investigating Common Vulnerabilities and "
@@ -140,6 +657,26 @@ def build_tool_descriptions(tool_names: list[str]) -> list[str]:
{tool_selection_strategy}
+
+Your output must follow strict formatting for tool execution to work correctly.
+
+FORMAT REQUIREMENTS:
+1. Thought: [Put ALL reasoning, thinking, and analysis here - be as verbose as needed]
+2. Action: [ONLY the exact tool name from the list - no extra text, explanations, or thinking]
+3. Action Input: [ONLY the input value - no prefixes like "search for" or explanations]
+
+
+CORRECT FORMAT EXAMPLE:
+Thought: I need to find urllib.parse usage in the codebase. I'm considering semantic search vs keyword search. Keyword search is better here because I need exact matches for import statements and function calls. I'll search for the module name to catch all import patterns including "from urllib.parse import" and "urllib.parse.urlparse()".
+Action: Code Keyword Search
+Action Input: urllib.parse
+
+INCORRECT FORMAT (WILL FAIL):
+Thought: I should search
+Action: Code Keyword Search (using this because it's more precise) ← WRONG: Extra text breaks parser
+Action Input: urllib.parse
+
+
Follow this format exactly (start each line with one of the specified prefixes):
@@ -211,57 +748,38 @@ def build_tool_descriptions(tool_names: list[str]) -> list[str]:
"""
-AGENT_EXAMPLES_FOR_PROMPT_2 = """
-
-Example 4: Understanding application architecture (semantic searches)
-
-Question: Does the application process images from untrusted sources?
-Thought: I need to understand how the application handles images and where they come from. Let me start by checking the documentation for image handling architecture.
-Action: Docs Semantic Search
-Action Input: How does the application handle image uploads and image processing?
-Observation: The application includes an image processing service that accepts image uploads from users via REST API endpoints at /api/upload. Images are validated for file type and size, then resized and stored in cloud storage.
-Thought: The documentation indicates user uploads are accepted. Let me examine the code to understand what validation occurs before image processing.
-Action: Code Semantic Search
-Action Input: image upload validation and PIL image processing
-Observation: Found image_processor.py which validates file extensions (.jpg, .png, .gif) and MIME types before passing images to PIL (Pillow library) for resizing. The validate_image() function checks basic properties but processes the image using PIL.Image.open() and PIL.Image.thumbnail().
-Thought: Images come from users via API endpoints and are processed with PIL after basic validation. I now have sufficient information.
-Final Answer: Yes, the application processes images from untrusted sources. User-uploaded images are accepted via the /api/upload REST endpoint and processed using the Pillow (PIL) library. While basic validation (file extension and MIME type) is performed, the images originate from untrusted users and are processed before storage, creating potential exploit vectors.
-
-Example 5: Specific coding pattern detection (keyword + semantic)
-
-Question: Does the code modify the keylen or ivlen parameters in OSSL_PARAM arrays after calling EVP initialization functions?
-Thought: This is a very specific coding pattern in C code. Let me first search for OSSL_PARAM usage in the codebase.
-Action: Code Keyword Search
-Action Input: OSSL_PARAM
-Observation: Found 2 matches:
-- src/crypto/cipher_utils.c:45: OSSL_PARAM params[3];
-- src/crypto/cipher_utils.c:78: OSSL_PARAM params[5];
-Thought: Found OSSL_PARAM usage. Now I need to understand if the code modifies keylen or ivlen after EVP initialization. Let me search for this pattern semantically.
-Action: Code Semantic Search
-Action Input: OSSL_PARAM array modification of keylen or ivlen after EVP_EncryptInit_ex2 EVP_DecryptInit_ex2 EVP_CipherInit_ex2 calls
-Observation: The code in cipher_utils.c creates OSSL_PARAM arrays with keylen and ivlen values set during initialization. The arrays are passed to EVP_EncryptInit_ex2() and EVP_DecryptInit_ex2() but are not modified afterward. The parameter values are set once during array creation using OSSL_PARAM_construct_*() functions.
-Thought: I have examined the OSSL_PARAM usage pattern. The vulnerable pattern (modifying keylen/ivlen after initialization) is not present.
-Final Answer: No, the codebase does not modify keylen or ivlen parameters in OSSL_PARAM arrays after calling EVP initialization functions. The code in cipher_utils.c creates OSSL_PARAM arrays with keylen and ivlen values set during initialization, passes them to EVP initialization functions, and does not subsequently modify these parameter values. The vulnerable coding pattern described in the CVE is not present.
-Example 6: Configuration-specific check (keyword search for config patterns)
+def get_agent_prompt(sys_prompt: str | None = None,
+ prompt_examples: bool = False) -> str:
+ """
+ Get the agent prompt template.
+
+ Tool selection strategy is injected via PromptTemplate partial_variables.
+
+ Args:
+ sys_prompt: Optional system prompt override
+ prompt_examples: Whether to include few-shot examples
+
+ Returns:
+ Complete agent prompt template with {tool_selection_strategy} variable
+ """
+ sys_prompt = sys_prompt or AGENT_SYS_PROMPT
+
+ # Select template with or without examples
+ if prompt_examples:
+ prompt_template = AGENT_PROMPT_TEMPLATE.replace(
+ "",
+ "\n" + AGENT_EXAMPLES_FOR_PROMPT
+ )
+ else:
+ prompt_template = AGENT_PROMPT_TEMPLATE
+
+ return f'{sys_prompt}\n\n{prompt_template}'
-Question: Does the application use any of the affected cipher modes: CCM, GCM, OCB, RC2, RC4, or RC5?
-Thought: I need to search for usage of these specific cipher modes in the codebase. Let me start with GCM.
-Action: Code Keyword Search
-Action Input: EVP_aes_.*_gcm
-Observation: Found 1 match: src/crypto/cipher_utils.c:120: cipher = EVP_aes_256_gcm();
-Thought: Found GCM mode usage. Let me check for the other cipher modes mentioned.
-Action: Code Keyword Search
-Action Input: EVP_aes_.*_ccm
-Observation: No matches found
-Thought: No CCM. Let me check for OCB, RC2, RC4, RC5 together.
-Action: Code Keyword Search
-Action Input: RC2|RC4|RC5|OCB
-Observation: No matches found
-Thought: I found one of the affected cipher modes (GCM) but not the others. I have the answer.
-Final Answer: Yes, the application uses GCM (Galois/Counter Mode), which is one of the affected cipher modes. The code uses EVP_aes_256_gcm() in cipher_utils.c at line 120. The other affected cipher modes (CCM, OCB, RC2, RC4, RC5) are not used in the codebase.
-"""
+# =============================================================================
+# CORE LLM ENGINE STAGE: CVSS GENERATION (PromptTemplate)
+# =============================================================================
CVSS_SYS_PROMPT = (
"You are a cybersecurity expert specializing in Common Vulnerabilities and Exposures (CVE) analysis and CVSS (Common Vulnerability Scoring System) metric evaluation."
@@ -379,33 +897,6 @@ def build_tool_descriptions(tool_names: list[str]) -> list[str]:
{agent_scratchpad}"""
-def get_agent_prompt(sys_prompt: str | None = None,
- prompt_examples: bool = False) -> str:
- """
- Get the agent prompt template.
-
- Tool selection strategy is injected via PromptTemplate partial_variables.
-
- Args:
- sys_prompt: Optional system prompt override
- prompt_examples: Whether to include few-shot examples
-
- Returns:
- Complete agent prompt template with {tool_selection_strategy} variable
- """
- sys_prompt = sys_prompt or AGENT_SYS_PROMPT
-
- # Select template with or without examples
- if prompt_examples:
- prompt_template = AGENT_PROMPT_TEMPLATE.replace(
- "",
- "\n" + AGENT_EXAMPLES_FOR_PROMPT
- # AGENT_EXAMPLES_FOR_PROMPT_2
- )
- else:
- prompt_template = AGENT_PROMPT_TEMPLATE
-
- return f'{sys_prompt}\n\n{prompt_template}'
def get_cvss_prompt(sys_prompt: str | None = None,
prompt_examples: bool = True) -> str:
@@ -427,222 +918,159 @@ def get_cvss_prompt(sys_prompt: str | None = None,
return f'{sys_prompt}\n\n{prompt_template}'
-class PromptBuilder(ABC):
- @abstractmethod
- def build_prompt(self) -> str:
- pass
+# =============================================================================
+# CORE LLM ENGINE STAGE: SUMMARIZATION (ChatPromptTemplate)
+# =============================================================================
+SUMMARY_SYS_PROMPT = "/no_think\n\nYou summarize CVE exploitability investigation results concisely."
-class IfPromptBuilder(PromptBuilder):
+SUMMARY_USER_PROMPT = """
+Summarize CVE exploitability investigation results into a clear, evidence-based
+paragraph. The investigation results consist of checklist items (questions) and
+their corresponding conclusions from the security analysis.
+
- def __init__(self, prop_name: str, description: str) -> None:
+
+Write a 3-5 sentence paragraph following this structure:
- self.prop_name = prop_name
- self.description = description
+1. VERDICT (sentence 1): Begin with explicit statement
+ - "The CVE is exploitable" / "The CVE is not exploitable" / "Exploitability is uncertain"
- def build_prompt(self) -> str:
+2. EVIDENCE (sentences 2-4): Support with specific findings
+ - Cite concrete results: functions found/absent, reachability status, configuration states
+ - Use technical details: function names, file paths, components
+ - Connect findings to exploitability conditions
- f_string = dedent(f"""
- {{% if {self.prop_name} %}}
- - {self.description}{{{{{self.prop_name} | string() | truncate(1024)}}}}
- {{% endif %}}
- """).strip()
+3. FOCUS: Use only definitive checklist results; ignore inconclusive items
+
- return f_string
+
+The CVE is not exploitable in this container. Investigation confirmed that while
+Python 3.10.0 is installed (vulnerable version), the urllib.parse module is never
+imported or called in the application codebase (verified via code search). Additionally,
+code analysis revealed that the application does not accept URL inputs from untrusted
+sources; all URL handling occurs only with internally generated URLs from configuration
+files. The combination of no urllib.parse usage and lack of external URL input eliminates
+the attack vector described in the CVE.
+
+
+{response}
+
-class IfElsePromptBuilder(PromptBuilder):
+Write your summary paragraph:"""
- def __init__(self, first_prop_name: str, second_prop_name: str, description: str) -> None:
- self.first_prop_name = first_prop_name
- self.second_prop_name = second_prop_name
- self.description = description
+def get_summary_chat_prompt():
+ """
+ Returns ChatPromptTemplate for summary with /no_think.
+
+ Returns:
+ ChatPromptTemplate with system prompt (includes /no_think) and user prompt
+ """
+ from langchain_core.prompts import ChatPromptTemplate
+ return ChatPromptTemplate.from_messages([
+ ("system", SUMMARY_SYS_PROMPT),
+ ("user", SUMMARY_USER_PROMPT)
+ ])
- def build_prompt(self) -> str:
- f_string = dedent(f"""
- {{% if {self.first_prop_name} %}}
- - {self.description}{{{{{self.first_prop_name} | string() | truncate(1024)}}}}
- {{% elif {self.second_prop_name} %}}
- - {self.description}{{{{{self.second_prop_name} | string() | truncate(1024)}}}}
- {{% endif %}}
- """).strip()
+# =============================================================================
+# CORE LLM ENGINE STAGE: JUSTIFICATION ASSIGNMENT (ChatPromptTemplate)
+# =============================================================================
- return f_string
+JUSTIFICATION_SYS_PROMPT = "/no_think\n\nYou classify CVE exploitability status based on evidence."
+JUSTIFICATION_USER_PROMPT = """
+Review the CVE exploitability investigation summary and perform two tasks:
+1. Classify the exploitability status into one of 12 predefined categories
+2. Provide reasoning for your classification, citing specific evidence from the investigation summary
+
-# Add additional intel fields for the checklist prompt here (field name, description)
-additional_intel_fields: list[PromptBuilder] = [
- # Keep these at the top
- IfPromptBuilder('cve_id', 'CVE ID: '),
- IfElsePromptBuilder('nvd_cve_description', "ghsa_description", 'CVE Description: '),
+
+Select the single most appropriate category based on the investigation findings.
- # Sort these alphabetically
- IfElsePromptBuilder('nvd_cvss_vector', "ghsa_cvss_vector_string", 'CVSS Vector: '),
- IfElsePromptBuilder('nvd_cwe_name', "ghsa_cwes", 'CWE Name: '),
- IfPromptBuilder('ghsa_vulnerabilities', 'GHSA Details: '),
- IfPromptBuilder('nvd_configurations', 'Known Affected Software: '),
- IfPromptBuilder('nvd_cwe_description', 'CWE Description: '),
- IfPromptBuilder('nvd_cwe_extended_description', ''),
- IfPromptBuilder('nvd_vendor_names', 'Notable Vulnerable Software Vendors: '),
- IfPromptBuilder('rhsa_bugzilla_description', 'RHSA Description: '),
- IfPromptBuilder('rhsa_details', 'RHSA Details: '),
- IfPromptBuilder('rhsa_package_state', 'RHSA Affected Packages: '),
- IfPromptBuilder('rhsa_statement', 'RHSA Statement: '),
- # IfPromptBuilder('ubuntu_notices', 'Ubuntu Priority Reason: '), # Disabling for now since its very long
- IfPromptBuilder('ubuntu_ubuntu_description', 'Ubuntu Security Note: '),
- IfPromptBuilder('vulnerable_dependencies', 'Identified Vulnerable Dependencies: '),
- IfPromptBuilder('plugin_data', 'Extra information: '),
-]
+CLASSIFICATION CATEGORIES (in logical precedence order):
-additional_intel_prompting = '\n'.join([pb.build_prompt() for pb in additional_intel_fields])
+1. false_positive - CVE-to-package association is incorrect (wrong package or mismatched CVE)
-ex_questions = [
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-24329\n- CVE description: An issue in the urllib.parse component of Python before 3.11.4 allows attackers to bypass blocklisting methods by supplying a URL that starts with blank characters.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:N\n- CWE Name: CWE-20: Improper Input Validation (4.14)\n- CWE Description: The product receives input or data, but it does not validate or incorrectly validates that the input has the properties that are required to process the data safely and correctly.\nInput validation is a frequently-used technique for checking potentially dangerous inputs in order to ensure that the inputs are safe for processing within the code, or when communicating with other components. When software does not validate input properly, an attacker is able to craft the input in a form that is not expected by the rest of the application. This will lead to parts of the system receiving unintended input, which may result in altered control flow, arbitrary control of a resource, or arbitrary code execution. Input validation is not the only technique for processing input, however. Other techniques attempt to transform potentially-dangerous input into something safe, such as filtering (CWE-790) - which attempts to remove dangerous inputs - or encoding/escaping (CWE-116), which attempts to ensure that the input is not misinterpreted when it is included in output to another component. Other techniques exist as well (see CWE-138 for more examples.) Input validation can be applied to: raw data - strings, numbers, parameters, file contents, etc. metadata - information about the raw data, such as headers or size Data can be simple or structured. Structured data can be composed of many nested layers, composed of combinations of metadata and raw data, with other simple or structured data. Many properties of raw data or metadata may need to be validated upon entry into the code, such as: specified quantities such as size, length, frequency, price, rate, number of operations, time, etc. implied or derived quantities, such as the actual size of a file instead of a specified size indexes, offsets, or positions into more complex data structures symbolic keys or other elements into hash tables, associative arrays, etc. well-formedness, i.e. syntactic correctness - compliance with expected syntax lexical token correctness - compliance with rules for what is treated as a token specified or derived type - the actual type of the input (or what the input appears to be) consistency - between individual data elements, between raw data and metadata, between references, etc. conformance to domain-specific rules, e.g. business logic equivalence - ensuring that equivalent inputs are treated the same authenticity, ownership, or other attestations about the input, e.g. a cryptographic signature to prove the source of the data Implied or derived properties of data must often be calculated or inferred by the code itself. Errors in deriving properties may be considered a contributing factor to improper input validation. Note that "input validation" has very different meanings to different people, or within different classification schemes. Caution must be used when referencing this CWE entry or mapping to it. For example, some weaknesses might involve inadvertently giving control to an attacker over an input when they should not be able to provide an input at all, but sometimes this is referred to as input validation. Finally, it is important to emphasize that the distinctions between input validation and output escaping are often blurred, and developers must be careful to understand the difference, including how input validation is not always sufficient to prevent vulnerabilities, especially when less stringent data types must be supported, such as free-form text. Consider a SQL injection scenario in which a person\'s last name is inserted into a query. The name "O\'Reilly" would likely pass the validation step since it is a common last name in the English language. However, this valid name cannot be directly inserted into the database because it contains the "\'" apostrophe character, which would need to be escaped or otherwise transformed. In this case, removing the apostrophe might reduce the risk of SQL injection, but it would produce incorrect behavior because the wrong name would be recorded.\n- Notable Vulnerable Software Vendors: [\'Fedoraproject\', \'Netapp\', \'Python\']\n\nExample {idx}: Checklist:\n[\n\t"Review Python Usage: Is the Python installation actively used by applications within the container? Check for scripts or applications that rely on Python, particularly those that might parse URLs using `urllib.parse`.",\n\t"Inspect URL Parsing Logic: Does the codebase contain any usage of `urllib.parse` or similar URL parsing mechanisms? Focus on how URLs are handled and whether there is a reliance on blocklisting methods to filter out potentially harmful URLs. This is crucial since the CVE describes a bypass of blocklisting methods via URLs starting with blank characters.",\n\t"Evaluate Input Validation Practices: Assess the robustness of input validation practices within the application. Since the CVE involves improper input validation, are inputs, especially URLs, properly sanitized and validated against unexpected or malicious data?"\n]',
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-36632\n- CVE description: The legacy email.utils.parseaddr function in Python through 3.11.4 allows attackers to trigger "RecursionError: maximum recursion depth exceeded while calling a Python object" via a crafted argument. This argument is plausibly an untrusted value from an application\'s input data that was supposed to contain a name and an e-mail address. NOTE: email.utils.parseaddr is categorized as a Legacy API in the documentation of the Python email package. Applications should instead use the email.parser.BytesParser or email.parser.Parser class. NOTE: the vendor\'s perspective is that this is neither a vulnerability nor a bug. The email package is intended to have size limits and to throw an exception when limits are exceeded; they were exceeded by the example demonstration code.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n- CWE Name: CWE-674: Uncontrolled Recursion (4.14)\n- CWE Description: The product does not properly control the amount of recursion that takes place, consuming excessive resources, such as allocated memory or the program stack.\n- Notable Vulnerable Software Vendors: [\'Python\']\n\nExample {idx}: Checklist:\n[\n\t"Identify Usage of `email.utils.parseaddr`: Review the application code within the container image to check if the `email.utils.parseaddr` function is being used. This function is the specific target of the vulnerability. Does the codebase contain the string \'email.utils.parseaddr\'?",\n\t"Assess Input Data Handling: Does the application using `email.utils.parseaddr` process potentially untrusted input data that could include crafted arguments designed to exploit this vulnerability? Evaluate how the application handles exceptions like `RecursionError`, as the exploit triggers this specific error.",\n\t"Review Alternative Implementations: Has the application already migrated to recommended alternatives such as `email.parser.BytesParser` or `email.parser.Parser` in the code base? If not, recommend transitioning away from the legacy `email.utils.parseaddr` to these safer alternatives."\n]',
- "Example {idx}: CVE Details:\n- CVE ID: CVE-2023-50447\n- CVE description: Pillow through 10.1.0 allows PIL.ImageMath.eval Arbitrary Code Execution via the environment parameter, a different vulnerability than CVE-2022-22817 (which was about the expression parameter).\n- CVSS Vector: CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H\n- CWE Name: CWE-94: Improper Control of Generation of Code ('Code Injection') (4.14)\n- CWE Description: The product constructs all or part of a code segment using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the syntax or behavior of the intended code segment.\nWhen a product allows a user's input to contain code syntax, it might be possible for an attacker to craft the code in such a way that it will alter the intended control flow of the product. Such an alteration could lead to arbitrary code execution. Injection problems encompass a wide variety of issues -- all mitigated in very different ways. For this reason, the most effective way to discuss these weaknesses is to note the distinct features which classify them as injection weaknesses. The most important issue to note is that all injection problems share one thing in common -- i.e., they allow for the injection of control plane data into the user-controlled data plane. This means that the execution of the process may be altered by sending code in through legitimate data channels, using no other mechanism. While buffer overflows, and many other flaws, involve the use of some further issue to gain execution, injection problems need only for the data to be parsed. The most classic instantiations of this category of weakness are SQL injection and format string vulnerabilities.\n- Notable Vulnerable Software Vendors: ['Debian', 'Python']\n\nExample {idx}: Checklist:\n[\n\t\"Evaluate Use of PIL.ImageMath.eval: Does any application within the container use the PIL.ImageMath.eval function with user-supplied input in the environment parameter? This is the method and parameter where the vulnerability exists.\",\n\t\"Assess Data Input Sources: Do the applications using Pillow receive input directly from untrusted sources (e.g., user uploads, external APIs)? This will help in understanding the risk exposure and potential for exploitation.\",\n\t\"Security Controls and Sanitization: Review the application's input validation and sanitization measures. Since the vulnerability allows for arbitrary code execution via code injection, is the input properly sanitized before being processed?\"\n]",
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-5363\n- CVE description: Issue summary: A bug has been identified in the processing of key and initialisation vector (IV) lengths. This can lead to potential truncation or overruns during the initialisation of some symmetric ciphers. Impact summary: A truncation in the IV can result in non-uniqueness, which could result in loss of confidentiality for some cipher modes. When calling EVP_EncryptInit_ex2(), EVP_DecryptInit_ex2() or EVP_CipherInit_ex2() the provided OSSL_PARAM array is processed after the key and IV have been established. Any alterations to the key length, via the "keylen" parameter or the IV length, via the "ivlen" parameter, within the OSSL_PARAM array will not take effect as intended, potentially causing truncation or overreading of these values. The following ciphers and cipher modes are impacted: RC2, RC4, RC5, CCM, GCM and OCB. For the CCM, GCM and OCB cipher modes, truncation of the IV can result in loss of confidentiality. For example, when following NIST\'s SP 800-38D section 8.2.1 guidance for constructing a deterministic IV for AES in GCM mode, truncation of the counter portion could lead to IV reuse. Both truncations and overruns of the key and overruns of the IV will produce incorrect results and could, in some cases, trigger a memory exception. However, these issues are not currently assessed as security critical.\nChanging the key and/or IV lengths is not considered to be a common operation and the vulnerable API was recently introduced. Furthermore it is likely that application developers will have spotted this problem during testing since decryption would fail unless both peers in the communication were similarly vulnerable. For these reasons we expect the probability of an application being vulnerable to this to be quite low. However if an application is vulnerable then this issue is considered very serious. For these reasons we have assessed this issue as Moderate severity overall.\nThe OpenSSL SSL/TLS implementation is not affected by this issue. The OpenSSL 3.0 and 3.1 FIPS providers are not affected by this because the issue lies outside of the FIPS provider boundary. OpenSSL 3.1 and 3.0 are vulnerable to this issue.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n- Notable Vulnerable Software Vendors: [\'Debian\', \'Netapp\', \'Openssl\']\n\nExample {idx}: Checklist:\n[\n\t"Check OpenSSL Version: What version of OpenSSL is running in the container image? The vulnerability specifically affects OpenSSL versions 3.0 and 3.1. Is the version running in the container within the vulnerability range? If the container is running these versions, it may be vulnerable.",\n\t"Identify Affected Cipher Modes: Does the application within the container image use any of the affected cipher modes: RC2, RC4, RC5, CCM, GCM, or OCB? Special attention should be given to applications using CCM, GCM, and OCB modes as these are particularly noted for potential loss of confidentiality due to IV truncation.",\n\t"Review Cryptographic Operations: Does the code or configuration of applications using OpenSSL have any instances where `EVP_EncryptInit_ex2()`, `EVP_DecryptInit_ex2()`, or `EVP_CipherInit_ex2()` are called? Are there any modifications to the `keylen` or `ivlen` parameters after initialization which might not be taking effect as intended?",\n\t"Check for Custom Cryptographic Implementations: Since changing the key and/or IV lengths is not a common operation and the issue is in a recently introduced API, it\'s crucial to identify if any custom cryptographic implementations might be performing such operations. This is less likely but should be checked especially in bespoke or highly customized applications. Are there any custom cryptographic implemenations changing the key and/or IV lengths?"\n]',
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2024-2961\n- CVE description: The iconv() function in the GNU C Library versions 2.39 and older may overflow the output buffer passed to it by up to 4 bytes when converting strings to the ISO-2022-CN-EXT character set, which may be used to crash an application or overwrite a neighbouring variable.\n- CWE Name: CWE-787: Out-of-bounds Write (4.14)\n- CWE Description: The product writes data past the end, or before the beginning, of the intended buffer.\nTypically, this can result in corruption of data, a crash, or code execution. The product may modify an index or perform pointer arithmetic that references a memory location that is outside of the boundaries of the buffer. A subsequent write operation then produces undefined or unexpected results.\n- Notable Vulnerable Software Vendors: [\'GNU\']\n\nExample {idx}: Checklist:\n[\n\t"Identify Usage of `iconv()` Function: Review the application code or dependencies. Is the `iconv()` function used? Look particularly for conversions involving the ISO-2022-CN-EXT character set. This function is the specific target of the vulnerability.",\n\t"Assess Data Handling and Boundary Conditions: Since the vulnerability involves an out-of-bounds write, it\'s crucial to analyze how data boundaries are handled in the code. Are there any custom implementations or patches that might mitigate boundary issues around buffer sizes?",\n\t"Review Application\'s Character Encoding Needs: Does the application specifically need to handle the ISO-2022-CN-EXT character set? If not, consider disabling this character set or using alternative safe functions or libraries for character set conversions.",\n\t"Evaluate Network Exposure and Attack Surface: Are the affected services exposed to the network? If so, this could increase the risk of exploitation. Additionally, if the application using the `iconv()` function is accessible externally, the risk is higher."\n]',
- "Example {idx}: CVE Details:\n- CVE ID: GHSA-8ghj-p4vj-mr35\n- CVE description: An issue was discovered in Pillow before 10.0.0. It is a Denial of Service that uncontrollably allocates memory to process a given task, potentially causing a service to crash by having it run out of memory. This occurs for truetype in ImageFont when textlength in an ImageDraw instance operates on a long text argument.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n- CWE Name: CWE-770: Allocation of Resources Without Limits or Throttling (4.14)\n- CWE Description: The product allocates a reusable resource or group of resources on behalf of an actor without imposing any restrictions on the size or number of resources that can be allocated, in violation of the intended security policy for that actor.\n- Code frequently has to work with limited resources, so programmers must be careful to ensure that resources are not consumed too quickly, or too easily. Without use of quotas, resource limits, or other protection mechanisms, it can be easy for an attacker to consume many resources by rapidly making many requests, or causing larger resources to be used than is needed. When too many resources are allocated, or if a single resource is too large, then it can prevent the code from working correctly, possibly leading to a denial of service.\n- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Python']\n- GHSA Summary: Pillow Denial of Service vulnerability\n- GHSA Details: [<'first_patched_version': '10.0.0', 'package': <'ecosystem': 'pip', 'name': 'pillow'>, 'vulnerable_functions': ['PIL.ImageFont'], 'vulnerable_version_range': '>= 0, < 10.0.0'>]\n- GHSA CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n\nExample {idx}: Checklist:\n[\n\t\"Assess Usage of Vulnerable Functions: Specifically, the vulnerability is related to the `PIL.ImageFont` module when processing long text arguments. Does the application code or dependencies use this module and functionality? If your applications use this module to process user-supplied or uncontrolled text inputs, they are likely at risk.\",\n\t\"Evaluate Resource Limits: The vulnerability leads to a denial of service through memory exhaustion. Are there any resource limits set at the container level (e.g., using Docker or Kubernetes settings) that might mitigate the impact of such an attack? Consider setting or reviewing memory limits to prevent a single container from consuming all available system resources.\"\n]",
-]
+2. code_not_present - Vulnerable code/library is absent from the container
+ (If code is not present, subsequent factors are irrelevant)
-ex_statements = [
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-24329\n- CVE description: An issue in the urllib.parse component of Python before 3.11.4 allows attackers to bypass blocklisting methods by supplying a URL that starts with blank characters.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:N\n- CWE Name: CWE-20: Improper Input Validation (4.14)\n- CWE Description: The product receives input or data, but it does not validate or incorrectly validates that the input has the properties that are required to process the data safely and correctly.\nInput validation is a frequently-used technique for checking potentially dangerous inputs in order to ensure that the inputs are safe for processing within the code, or when communicating with other components. When software does not validate input properly, an attacker is able to craft the input in a form that is not expected by the rest of the application. This will lead to parts of the system receiving unintended input, which may result in altered control flow, arbitrary control of a resource, or arbitrary code execution. Input validation is not the only technique for processing input, however. Other techniques attempt to transform potentially-dangerous input into something safe, such as filtering (CWE-790) - which attempts to remove dangerous inputs - or encoding/escaping (CWE-116), which attempts to ensure that the input is not misinterpreted when it is included in output to another component. Other techniques exist as well (see CWE-138 for more examples.) Input validation can be applied to: raw data - strings, numbers, parameters, file contents, etc. metadata - information about the raw data, such as headers or size Data can be simple or structured. Structured data can be composed of many nested layers, composed of combinations of metadata and raw data, with other simple or structured data. Many properties of raw data or metadata may need to be validated upon entry into the code, such as: specified quantities such as size, length, frequency, price, rate, number of operations, time, etc. implied or derived quantities, such as the actual size of a file instead of a specified size indexes, offsets, or positions into more complex data structures symbolic keys or other elements into hash tables, associative arrays, etc. well-formedness, i.e. syntactic correctness - compliance with expected syntax lexical token correctness - compliance with rules for what is treated as a token specified or derived type - the actual type of the input (or what the input appears to be) consistency - between individual data elements, between raw data and metadata, between references, etc. conformance to domain-specific rules, e.g. business logic equivalence - ensuring that equivalent inputs are treated the same authenticity, ownership, or other attestations about the input, e.g. a cryptographic signature to prove the source of the data Implied or derived properties of data must often be calculated or inferred by the code itself. Errors in deriving properties may be considered a contributing factor to improper input validation. Note that "input validation" has very different meanings to different people, or within different classification schemes. Caution must be used when referencing this CWE entry or mapping to it. For example, some weaknesses might involve inadvertently giving control to an attacker over an input when they should not be able to provide an input at all, but sometimes this is referred to as input validation. Finally, it is important to emphasize that the distinctions between input validation and output escaping are often blurred, and developers must be careful to understand the difference, including how input validation is not always sufficient to prevent vulnerabilities, especially when less stringent data types must be supported, such as free-form text. Consider a SQL injection scenario in which a person\'s last name is inserted into a query. The name "O\'Reilly" would likely pass the validation step since it is a common last name in the English language. However, this valid name cannot be directly inserted into the database because it contains the "\'" apostrophe character, which would need to be escaped or otherwise transformed. In this case, removing the apostrophe might reduce the risk of SQL injection, but it would produce incorrect behavior because the wrong name would be recorded.\n- Notable Vulnerable Software Vendors: [\'Fedoraproject\', \'Netapp\', \'Python\']\n\nExample {idx}: Checklist:\n[\n\t"Check Python Version: Determine the version of Python installed in the container image. The vulnerability affects Python versions before 3.11.4. Use commands like `python --version` or `python3 --version` to check the installed version. If the version is below 3.11.4, the container may be vulnerable.",\n\t"Review Python Usage: Identify if the Python installation is actively used by applications within the container. Check for scripts or applications that rely on Python, particularly those that might parse URLs using `urllib.parse`.",\n\t"Inspect URL Parsing Logic: Examine the codebase for any usage of `urllib.parse` or similar URL parsing mechanisms. Focus on how URLs are handled and whether there is a reliance on blocklisting methods to filter out potentially harmful URLs. This is crucial since the CVE describes a bypass of blocklisting methods via URLs starting with blank characters.",\n\t"Evaluate Input Validation Practices: Assess the robustness of input validation practices within the application. Since the CVE involves improper input validation, ensure that inputs, especially URLs, are properly sanitized and validated against unexpected or malicious data."\n]',
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-36632\n- CVE description: The legacy email.utils.parseaddr function in Python through 3.11.4 allows attackers to trigger "RecursionError: maximum recursion depth exceeded while calling a Python object" via a crafted argument. This argument is plausibly an untrusted value from an application\'s input data that was supposed to contain a name and an e-mail address. NOTE: email.utils.parseaddr is categorized as a Legacy API in the documentation of the Python email package. Applications should instead use the email.parser.BytesParser or email.parser.Parser class. NOTE: the vendor\'s perspective is that this is neither a vulnerability nor a bug. The email package is intended to have size limits and to throw an exception when limits are exceeded; they were exceeded by the example demonstration code.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n- CWE Name: CWE-674: Uncontrolled Recursion (4.14)\n- CWE Description: The product does not properly control the amount of recursion that takes place, consuming excessive resources, such as allocated memory or the program stack.\n- Notable Vulnerable Software Vendors: [\'Python\']\n\nExample {idx}: Checklist:\n[\n\t"Identify Usage of `email.utils.parseaddr`: Review the application code within the container image to check if the `email.utils.parseaddr` function is being used. This function is the specific target of the vulnerability. Consider searching the codebase for the string \'email.utils.parseaddr\' to find direct usages.",\n\t"Assess Input Data Handling: Determine if the application using `email.utils.parseaddr` processes potentially untrusted input data that could include crafted arguments designed to exploit this vulnerability. Evaluate how the application handles exceptions like `RecursionError`, as the exploit triggers this specific error.",\n\t"Review Alternative Implementations: Check if the application has already migrated to recommended alternatives such as `email.parser.BytesParser` or `email.parser.Parser`. If not, recommend transitioning away from the legacy `email.utils.parseaddr` to these safer alternatives."\n]',
- "Example {idx}: CVE Details:\n- CVE ID: CVE-2023-50447\n- CVE description: Pillow through 10.1.0 allows PIL.ImageMath.eval Arbitrary Code Execution via the environment parameter, a different vulnerability than CVE-2022-22817 (which was about the expression parameter).\n- CVSS Vector: CVSS:3.1/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H\n- CWE Name: CWE-94: Improper Control of Generation of Code ('Code Injection') (4.14)\n- CWE Description: The product constructs all or part of a code segment using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the syntax or behavior of the intended code segment.\nWhen a product allows a user's input to contain code syntax, it might be possible for an attacker to craft the code in such a way that it will alter the intended control flow of the product. Such an alteration could lead to arbitrary code execution. Injection problems encompass a wide variety of issues -- all mitigated in very different ways. For this reason, the most effective way to discuss these weaknesses is to note the distinct features which classify them as injection weaknesses. The most important issue to note is that all injection problems share one thing in common -- i.e., they allow for the injection of control plane data into the user-controlled data plane. This means that the execution of the process may be altered by sending code in through legitimate data channels, using no other mechanism. While buffer overflows, and many other flaws, involve the use of some further issue to gain execution, injection problems need only for the data to be parsed. The most classic instantiations of this category of weakness are SQL injection and format string vulnerabilities.\n- Notable Vulnerable Software Vendors: ['Debian', 'Python']\n\nExample {idx}: Checklist:\n[\n\t\"Evaluate Use of PIL.ImageMath.eval: Specifically investigate whether any application within the container uses the PIL.ImageMath.eval function with user-supplied input in the environment parameter. This is the method and parameter where the vulnerability exists.\",\n\t\"Assess Data Input Sources: Determine if the applications using Pillow are receiving input directly from untrusted sources (e.g., user uploads, external APIs). This will help in understanding the risk exposure and potential for exploitation.\",\n\t\"Security Controls and Sanitization: Review the application's input validation and sanitization measures. Since the vulnerability allows for arbitrary code execution via code injection, ensuring that input is properly sanitized before being processed can mitigate potential exploitation.\"\n]",
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2023-5363\n- CVE description: Issue summary: A bug has been identified in the processing of key and initialisation vector (IV) lengths. This can lead to potential truncation or overruns during the initialisation of some symmetric ciphers. Impact summary: A truncation in the IV can result in non-uniqueness, which could result in loss of confidentiality for some cipher modes. When calling EVP_EncryptInit_ex2(), EVP_DecryptInit_ex2() or EVP_CipherInit_ex2() the provided OSSL_PARAM array is processed after the key and IV have been established. Any alterations to the key length, via the "keylen" parameter or the IV length, via the "ivlen" parameter, within the OSSL_PARAM array will not take effect as intended, potentially causing truncation or overreading of these values. The following ciphers and cipher modes are impacted: RC2, RC4, RC5, CCM, GCM and OCB. For the CCM, GCM and OCB cipher modes, truncation of the IV can result in loss of confidentiality. For example, when following NIST\'s SP 800-38D section 8.2.1 guidance for constructing a deterministic IV for AES in GCM mode, truncation of the counter portion could lead to IV reuse. Both truncations and overruns of the key and overruns of the IV will produce incorrect results and could, in some cases, trigger a memory exception. However, these issues are not currently assessed as security critical.\nChanging the key and/or IV lengths is not considered to be a common operation and the vulnerable API was recently introduced. Furthermore it is likely that application developers will have spotted this problem during testing since decryption would fail unless both peers in the communication were similarly vulnerable. For these reasons we expect the probability of an application being vulnerable to this to be quite low. However if an application is vulnerable then this issue is considered very serious. For these reasons we have assessed this issue as Moderate severity overall.\nThe OpenSSL SSL/TLS implementation is not affected by this issue. The OpenSSL 3.0 and 3.1 FIPS providers are not affected by this because the issue lies outside of the FIPS provider boundary. OpenSSL 3.1 and 3.0 are vulnerable to this issue.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n- Notable Vulnerable Software Vendors: [\'Debian\', \'Netapp\', \'Openssl\']\n\nExample {idx}: Checklist:\n[\n\t"Identify Affected Cipher Modes: Determine if the application within the container image uses any of the affected cipher modes: RC2, RC4, RC5, CCM, GCM, or OCB. Special attention should be given to applications using CCM, GCM, and OCB modes as these are particularly noted for potential loss of confidentiality due to IV truncation.",\n\t"Review Cryptographic Operations: Examine the code or configuration of applications using OpenSSL for any instances where `EVP_EncryptInit_ex2()`, `EVP_DecryptInit_ex2()`, or `EVP_CipherInit_ex2()` are called. Check if there are any modifications to the `keylen` or `ivlen` parameters after initialization which might not be taking effect as intended.",\n\t"Check for Custom Cryptographic Implementations: Since changing the key and/or IV lengths is not a common operation and the issue is in a recently introduced API, it\'s crucial to identify if any custom cryptographic implementations might be performing such operations. This is less likely but should be checked especially in bespoke or highly customized applications."\n]',
- 'Example {idx}: CVE Details:\n- CVE ID: CVE-2024-2961\n- CVE description: The iconv() function in the GNU C Library versions 2.39 and older may overflow the output buffer passed to it by up to 4 bytes when converting strings to the ISO-2022-CN-EXT character set, which may be used to crash an application or overwrite a neighbouring variable.\n- CWE Name: CWE-787: Out-of-bounds Write (4.14)\n- CWE Description: The product writes data past the end, or before the beginning, of the intended buffer.\nTypically, this can result in corruption of data, a crash, or code execution. The product may modify an index or perform pointer arithmetic that references a memory location that is outside of the boundaries of the buffer. A subsequent write operation then produces undefined or unexpected results.\n- Notable Vulnerable Software Vendors: [\'GNU\']\n\nExample {idx}: Checklist:\n[\n\t"Identify Usage of `iconv()` Function: Review the application code or dependencies to check if the `iconv()` function is used, particularly for conversions involving the ISO-2022-CN-EXT character set. This function is the specific target of the vulnerability.",\n\t"Assess Data Handling and Boundary Conditions: Since the vulnerability involves an out-of-bounds write, it\'s crucial to analyze how data boundaries are handled in the code. Look for any custom implementations or patches that might mitigate boundary issues around buffer sizes.",\n\t"Review Application\'s Character Encoding Needs: Determine if the application specifically needs to handle the ISO-2022-CN-EXT character set. If not, consider disabling this character set or using alternative safe functions or libraries for character set conversions.",\n\t"Evaluate Network Exposure and Attack Surface: Consider whether the affected services are exposed to the network, which could increase the risk of exploitation. If the application using the `iconv()` function is accessible externally, the risk is higher."\n]',
- "Example {idx}: CVE Details:\n- CVE ID: GHSA-8ghj-p4vj-mr35\n- CVE description: An issue was discovered in Pillow before 10.0.0. It is a Denial of Service that uncontrollably allocates memory to process a given task, potentially causing a service to crash by having it run out of memory. This occurs for truetype in ImageFont when textlength in an ImageDraw instance operates on a long text argument.\n- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n- CWE Name: CWE-770: Allocation of Resources Without Limits or Throttling (4.14)\n- CWE Description: The product allocates a reusable resource or group of resources on behalf of an actor without imposing any restrictions on the size or number of resources that can be allocated, in violation of the intended security policy for that actor.\n- Code frequently has to work with limited resources, so programmers must be careful to ensure that resources are not consumed too quickly, or too easily. Without use of quotas, resource limits, or other protection mechanisms, it can be easy for an attacker to consume many resources by rapidly making many requests, or causing larger resources to be used than is needed. When too many resources are allocated, or if a single resource is too large, then it can prevent the code from working correctly, possibly leading to a denial of service.\n- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Python']\n- GHSA Summary: Pillow Denial of Service vulnerability\n- GHSA Details: [<'first_patched_version': '10.0.0', 'package': <'ecosystem': 'pip', 'name': 'pillow'>, 'vulnerable_functions': ['PIL.ImageFont'], 'vulnerable_version_range': '>= 0, < 10.0.0'>]\n- GHSA CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n\nExample {idx}: Checklist:\n[\n\t\"Assess Usage of Vulnerable Functions: Specifically, the vulnerability is related to the `PIL.ImageFont` module when processing long text arguments. Review the application code or dependencies to see if this module and functionality are used. If your applications use this module to process user-supplied or uncontrolled text inputs, they are likely at risk.\",\n\t\"Evaluate Resource Limits: Since the vulnerability leads to a denial of service through memory exhaustion, check if there are any resource limits set at the container level (e.g., using Docker or Kubernetes settings) that might mitigate the impact of such an attack. Consider setting or reviewing memory limits to prevent a single container from consuming all available system resources.\"\n]",
-]
+3. code_not_reachable - Vulnerable code exists but is never executed at runtime
+ (Only applicable if code IS present but execution path analysis shows no calls)
-FEW_SHOT = """Generate a checklist for a security analyst to use when assessing the exploitability of a specific CVE within a containerized environment. Use the provided examples as a guide to understand how to construct a checklist from a given set of CVE details, then apply this understanding to create a specific checklist for the CVE details provided below. All output should be a comma separated list enclosed in square brackets with each list item enclosed in quotes.
+4. requires_configuration - Exploitation requires specific configuration that is disabled
+ (Configuration prevents exploitation)
-Example 1: CVE Details:
-- CVE ID: CVE-2024-23334
-- CVE Description: aiohttp is an asynchronous HTTP client/server framework for asyncio and Python. When using aiohttp as a web server and configuring static routes, it is necessary to specify the root path for static files. Additionally, the option 'follow_symlinks' can be used to determine whether to follow symbolic links outside the static root directory. When 'follow_symlinks' is set to True, there is no validation to check if reading a file is within the root directory. This can lead to directory traversal vulnerabilities, resulting in unauthorized access to arbitrary files on the system, even when symlinks are not present. Disabling follow_symlinks and using a reverse proxy are encouraged mitigations. Version 3.9.2 fixes this issue.
-- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N
-- CWE Name: CWE-22: Improper Limitation of a Pathname to a Restricted Directory ('Path Traversal') (4.14)
-- CWE Description: The product uses external input to construct a pathname that is intended to identify a file or directory that is located underneath a restricted parent directory, but the product does not properly neutralize special elements within the pathname that can cause the pathname to resolve to a location that is outside of the restricted directory.
-Many file operations are intended to take place within a restricted directory. By using special elements such as ".." and "/" separators, attackers can escape outside of the restricted location to access files or directories that are elsewhere on the system. One of the most common special elements is the "../" sequence, which in most modern operating systems is interpreted as the parent directory of the current location. This is referred to as relative path traversal. Path traversal also covers the use of absolute pathnames such as "/usr/local/bin", which may also be useful in accessing unexpected files. This is referred to as absolute path traversal. In many programming languages, the injection of a null byte (the 0 or NUL) may allow an attacker to truncate a generated filename to widen the scope of attack. For example, the product may add ".txt" to any pathname, thus limiting the attacker to text files, but a null injection may effectively remove this restriction.
-- Notable Vulnerable Software Vendors: ['Aiohttp', 'Fedoraproject']
+5. requires_dependency - Exploitation requires a dependency that is missing
-Example 1: Checklist:
-[
- "Vulnerable package check. Does the project use the aiohttp library, which is the affected package? If aiohttp is not a dependency in your project, then your code is not vulnerable to this CVE.",
- "Vulnerable version check. Is the version of aiohttp that the project depends on vulnerable? According to the vulnerability details, versions before 3.9.2 are vulnerable.,
- "Review code to check for vulnerability mitigation. Is the 'follow_symlinks' option set to False to mitigate the risk of directory traversal vulnerabilities?"
-]
+6. requires_environment - Exploitation requires specific environment that is absent
-Example 2: CVE Details:
-- CVE ID: CVE-2022-2309
-- CVE description: NULL Pointer Dereference allows attackers to cause a denial of service (or application crash). This only applies when lxml (version 4.9.0 and earlier) is used together with libxml2 2.9.10 through 2.9.14. libxml2 2.9.9 and earlier are not affected. It allows triggering crashes through forged input data, given a vulnerable code sequence in the application. The vulnerability is caused by the iterwalk function (also used by the canonicalize function). Such code shouldn't be in wide-spread use, given that parsing + iterwalk would usually be replaced with the more efficient iterparse function. However, an XML converter that serialises to C14N would also be vulnerable, for example, and there are legitimate use cases for this code sequence. If untrusted input is received (also remotely) and processed via iterwalk function, a crash can be triggered.
-- CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
-- CWE Name: CWE-476: NULL Pointer Dereference (4.14)
-- CWE Description: A NULL pointer dereference occurs when the application dereferences a pointer that it expects to be valid, but is NULL, typically causing a crash or exit.
-NULL pointer dereference issues can occur through a number of flaws, including race conditions, and simple programming omissions.
-- Notable Vulnerable Software Vendors: ['Fedoraproject', 'Lxml', 'Xmlsoft']
+7. compiler_protected - Compiler flags prevent exploitation
-Example 2: Checklist:
-[
- “Vulnerable package check. Does the project use the lxml library, which is the affected package? If lxml is not a dependency in your project, then your code is not vulnerable to this CVE.”,
- “Vulnerable version check. Is the version of lxml that the project depends on vulnerable? According to the vulnerability details, versions 4.9.0 and earlier are vulnerable.”,
- “Vulnerable version check of connected dependency. Is the version of libxml, the connected dependency, that the project depends on vulnerable? The package is only vulnerable if libxml 2.9.10 through 2.9.14 is also present.”,
- “Review code for vulnerable functionality. The library is vulnerable through its `iterwalk` function, which is also utilized by the `canonicalize` function. Are either of these functions used in your code base?”
-]
+8. runtime_protected - Runtime mechanisms (ASLR, DEP, sandboxing) prevent exploitation
-Given CVE Details:
-""" + additional_intel_prompting
+9. perimeter_protected - Network/physical/logical perimeter defenses block exploitation
-MOD_FEW_SHOT = """
-Generate an investigation checklist for assessing CVE exploitability in a
-containerized environment. Your output must be a comma-separated list enclosed
-in square brackets, with each item enclosed in quotes.
-
+10. mitigating_control_protected - Other security controls reduce risk
-
-Create 3-5 checklist items that meet these requirements:
+11. uncertain - Insufficient information to determine exploitability
-1. STRUCTURE: Each item must be a clear, answerable question
- - Start with interrogative words: Is/Does/Are/Can/Has/etc.
- - Be specific and actionable
- - Include relevant context from the CVE
+12. vulnerable - Package is actually vulnerable and needs patching
-2. CONTENT PRIORITIES:
- - If the CVE mentions a specific vulnerable function or method, the FIRST
- checklist item must verify whether that function is called or imported
- in the codebase
- - Focus on exploitability factors (version presence is already confirmed)
- - Include specific technical names from the CVE (functions, libraries,
- configurations, cipher modes, etc.)
- - Consider the attack vector (network exposure, user input, file processing, etc.)
- - Address relevant security controls or mitigations
+EXPLOITATION CONDITIONS:
+For a CVE to be classified as "vulnerable", ALL of these must be true:
+- Vulnerable code is PRESENT in the container
+- Vulnerable code is USED/CALLED by the application
+- Vulnerable code is REACHABLE from attack surfaces (user input, network, file processing)
+- No effective mitigations or protections are in place
-3. INVESTIGATION TOOLS AVAILABLE:
- {tool_descriptions}
-
- Design questions that can be answered using these analysis capabilities.
-
-4. COMPLETENESS:
- - Cover the vulnerability chain: presence → usage → exploitability
- - Each item should independently contribute to understanding exploit risk
+IF EXPLOITATION CONDITIONS ARE NOT MET:
+Select the PRIMARY reason for non-exploitability following the logical precedence
+order above. For example:
+- If code is not present: "code_not_present" (even if other factors would also prevent it)
+- If code is not reachable: "code_not_reachable" (not "requires_environment")
+- If missing dependency prevents it: "requires_dependency"
+
+The categories are ordered by logical precedence. Work through the list from top
+to bottom and select the first category that applies to the situation.
+
+Use "uncertain" only as a final fallback when the investigation truly lacks
+sufficient information to make any determination.
-
-{examples}
-
+
+Provide exactly two lines:
-
-"""
+Line 1: category_name (exact category name from the list above)
+Line 2: reasoning (brief explanation citing specific evidence from the summary)
-investigation_guideline = """1. Is the flagged component in the product? Determine if the container image includes the flagged package version. Verify the presence of the library or software in question by checking the container's software bill of materials (SBOM).
-2. Is the vulnerable code of the flagged component in the product? Check if an application or any dependency within the container image uses a function or a component of the library that contains the vulnerability. Check whether a patch has been applied or if the vulnerable code has been removed.
-3. Is the vulnerable code of the flagged component executed by the product? Analyze the application's execution paths to confirm whether the vulnerable code is executed during normal operation. Examine the application's dependencies to ensure there are no indirect execution paths that could trigger the vulnerable code.
-4. If the vulnerable code is executed, is it exploitable? Investigate whether the exploitability of the issue depends on a specific configuration option and if this configuration is enabled.
-5. If the vulnerable code is executed, is it exploitable? Determine if the exploitability relies on a library dependency and verify the existence of this dependency in the product.
-6. If the vulnerable code is executed, is it exploitable? Research if the exploitability of the issue depends on a specific environment and verify whether this environment is absent or present in the container.
-7. Does other protection exist? Compiler Flags: Assess whether the exploitability hinges on the setting or unsetting of compiler flags.
-8. Does other protection exist? Runtime Protections: Investigate the presence of mechanisms that prevent exploits during runtime.
-9. Does other protection exist? Perimeter Defenses: Explore protective measures that block attacks at the physical, logical, or network perimeters.
-10. Does other protection exist? Mitigating Controls: Identify any mitigating controls in place to prevent exploitability.
-"""
+Do not include labels like "Category:" or "Reasoning:". Just the values on separate lines.
+
+
+
+code_not_reachable
+The vulnerable PIL.ImageMath.eval function exists in the installed Pillow library, but call chain analysis confirmed it is never invoked from application code. The application only uses PIL.Image.open() and PIL.Image.thumbnail() functions, which do not call ImageMath.eval.
+
-ZERO_SHOT = f"""Your task is to create a checklist for a security analyst to help determine if a given CVE is
-exploitable in a containerized environment. Use the information provided below, structured within XML tags for both CVE
-details and investigation guidelines. The checklist should convert the steps from the investigation guidelines into
-actionable steps specific to the CVE. Each step should be self-contained, including details specific to the CVE, and
-start with an action verb to clearly state the step the analyst needs to take. Avoid references to the CVE ID, focusing
-on providing detailed investigative directions within the steps themselves.
-All output should be a comma separated list enclosed in square brackets with each list item enclosed in quotes.\n
-{additional_intel_prompting}\n{investigation_guideline}"""
-
-_ONE_SHOT = """This is an example of (1) CVE information, and (2) a checklist produced to determine if a given CVE is exploitable in a containerized environment:
-(1) CVE Information:
-CVE Description: DISPUTED: In Apache Batik 1.x before 1.10, when deserializing subclass of `AbstractDocument`, the class takes a string from the inputStream as the class name which then use it to call the no-arg constructor of the class. Fix was to check the class type before calling newInstance in deserialization.
-CVSS Vector: CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
-CWE Name: CWE-502: Deserialization of Untrusted Data (4.11)
-CWE Description: The product deserializes untrusted data without sufficiently verifying that the resulting data will be valid. It is often convenient to serialize objects for communication or to save them for later use. However, deserialized data or code can often be modified without using the provided accessor functions if it does not use cryptography to protect itself. Furthermore, any cryptography would still be client-side security -- which is a dangerous security assumption. Data that is untrusted can not be trusted to be well-formed. When developers place no restrictions on gadget chains, or series of instances and method invocations that can self-execute during the deserialization process (i.e., before the object is returned to the caller), it is sometimes possible for attackers to leverage them to perform unauthorized actions, like generating a shell.
-Notable Vulnerable Software Vendors: ['Oracle', 'Apache', 'Canonical', 'Debian']
-(2) Checklist:
-Based on the information available, CVE-2018-8013 is a critical vulnerability that involves the deserialization of untrusted data in Apache Batik before version 1.10. The Common Vulnerability Scoring System (CVSS) 3.x score is 9.8, indicating a critical severity.
-All output should be a comma separated list enclosed in square brackets with each list item enclosed in quotes.
-Here's an example of preliminary checklist similiar to what you should create to determine if a container image is vulnerable to this exploit:
-["1. Check the Apache Batik version: The vulnerability affects Apache Batik versions 1.0 to 1.9.1. If your container image is running Apache Batik, verify the version. If it's between 1.0 and 1.9.1, it's vulnerable and you should update to version 1.10 or later.",
-"2. Check for any software using vulnerable Apache Batik versions: It's important to note that other applications might use Apache Batik and thus be vulnerable. Check all applications within your container image for dependencies on vulnerable versions of Apache Batik. Some notable software includes certain versions of Ubuntu Linux, Debian Linux, and multiple Oracle applications.",
-"3. Evaluate the deserialization risk: Since the vulnerability involves deserialization of untrusted data, you should consider whether your applications are performing such operations. If they are, consider if these deserialization operations are using untrusted, user-supplied data. If so, this is a potential attack vector.",
-"4. Network exposure: The vulnerability has a network attack vector. Consider whether your container image has network exposure, especially if the Apache Batik service or any service using it is exposed to the internet.",
-"5. Disputed status: CVE-2018-8013 is disputed and may not be a vulnerability."]
-
-Given the following information about {{cve_id}}, make preliminary checklist for a security analyst to follow to determine whether a container image is vulnerable to this exploit.
-""" + additional_intel_prompting
+
+{summary}
+
+Provide your justification classification and reasoning on two separate lines:"""
-def get_mod_examples(type='questions', choices=[0, 1]):
- if type == 'questions':
- ex_list = [q for idx, q in enumerate(ex_questions) if idx in choices]
- else:
- ex_list = [s for idx, s in enumerate(ex_statements) if idx in choices]
- examples = '\n'.join(q.format(idx=idx + 1) for idx, q in enumerate(ex_list))
- return examples
+def get_justification_chat_prompt():
+ """
+ Returns ChatPromptTemplate for justification with /no_think.
+
+ Returns:
+ ChatPromptTemplate with system prompt (includes /no_think) and user prompt
+ """
+ from langchain_core.prompts import ChatPromptTemplate
+ return ChatPromptTemplate.from_messages([
+ ("system", JUSTIFICATION_SYS_PROMPT),
+ ("user", JUSTIFICATION_USER_PROMPT)
+ ])
diff --git a/src/vuln_analysis/utils/thinking_aware_parser.py b/src/vuln_analysis/utils/thinking_aware_parser.py
new file mode 100644
index 00000000..c08f19ec
--- /dev/null
+++ b/src/vuln_analysis/utils/thinking_aware_parser.py
@@ -0,0 +1,214 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Custom ReAct output parser that handles verbose thinking modes."""
+
+import re
+import logging
+from typing import Union
+
+from langchain.agents.mrkl.output_parser import MRKLOutputParser
+from langchain_core.agents import AgentAction, AgentFinish
+from langchain_core.exceptions import OutputParserException
+
+logger = logging.getLogger(__name__)
+
+
+class ThinkingAwareMRKLParser(MRKLOutputParser):
+ """
+ Enhanced MRKL parser that handles thinking mode output from verbose thinking mode models.
+
+ This parser strips thinking tags and cleans up verbose output while preserving
+ the core ReAct format (Action: / Action Input:) required by the agent executor.
+
+ Verbose thinking mode models can inject ... tags or verbose explanations that
+ break the strict Action/Action Input format expected by MRKLOutputParser.
+ This parser cleans those artifacts before delegating to the parent parser.
+ """
+
+ def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
+ """
+ Parse ReAct format output, handling thinking mode tags and verbosity.
+
+ Args:
+ text: Raw LLM output potentially containing thinking tags
+
+ Returns:
+ AgentAction or AgentFinish
+
+ Raises:
+ OutputParserException: If format cannot be parsed after cleanup
+ """
+ original_text = text
+ cleaned = False
+
+ # Step 1: Remove thinking tags and their content
+ if '' in text.lower():
+ text = re.sub(r'.*?', '', text, flags=re.DOTALL | re.IGNORECASE)
+ cleaned = True
+
+ # Step 2: Remove explicit "Thinking:" sections that might appear
+ if re.search(r'Thinking:', text, re.IGNORECASE):
+ text = re.sub(
+ r'Thinking:.*?(?=\n(Thought|Action|Final Answer|Observation|$))',
+ '',
+ text,
+ flags=re.DOTALL | re.IGNORECASE
+ )
+ cleaned = True
+
+ # Step 3: Clean Action line - ensure it contains ONLY the tool name
+ action_match = re.search(r'^Action:\s*(.+?)$', text, re.MULTILINE)
+ if action_match:
+ action_line_full = action_match.group(1).strip()
+
+ # Extract just the tool name, removing explanations
+ # Take first line only (in case of multi-line)
+ tool_name = action_line_full.split('\n')[0].strip()
+
+ # Remove parenthetical explanations: "Tool (because X)" → "Tool"
+ tool_name = re.sub(r'\s*\([^)]*\)', '', tool_name)
+
+ # Remove explanations after comma/dash/colon
+ tool_name = tool_name.split(',')[0].split('--')[0].split(':')[0].strip()
+
+ # Remove any thinking tags within the action line
+ tool_name = re.sub(r'.*?', '', tool_name, flags=re.IGNORECASE).strip()
+
+ # If we cleaned the action line, replace it
+ if tool_name != action_line_full:
+ text = re.sub(
+ r'^Action:\s*.+?$',
+ f'Action: {tool_name}',
+ text,
+ count=1,
+ flags=re.MULTILINE
+ )
+ cleaned = True
+
+ # Step 4: Clean Action Input line - ensure it contains ONLY the input value
+ input_match = re.search(r'^Action Input:\s*(.+?)$', text, re.MULTILINE)
+ if input_match:
+ input_line_full = input_match.group(1).strip()
+
+ # Take first line only
+ action_input = input_line_full.split('\n')[0].strip()
+
+ # Remove leading explanatory text like "Let me search for X" → "X"
+ # But keep the actual input intact
+ if action_input.lower().startswith(('search for ', 'look for ', 'find ')):
+ action_input = re.sub(r'^(search for|look for|find)\s+', '', action_input, flags=re.IGNORECASE)
+
+ # Remove thinking tags
+ action_input = re.sub(r'.*?', '', action_input, flags=re.IGNORECASE).strip()
+
+ # If we cleaned the input line, replace it
+ if action_input != input_line_full:
+ text = re.sub(
+ r'^Action Input:\s*.+?$',
+ f'Action Input: {action_input}',
+ text,
+ count=1,
+ flags=re.MULTILINE
+ )
+ cleaned = True
+
+ # Step 5: Clean up multiple consecutive blank lines
+ text = re.sub(r'\n\s*\n+', '\n\n', text)
+
+ # Log if we cleaned anything
+ if cleaned:
+ logger.debug(
+ "Cleaned thinking mode artifacts from ReAct output. "
+ "Original length: %d, Cleaned length: %d",
+ len(original_text),
+ len(text)
+ )
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("Cleaned output preview: %s...", text[:300])
+
+ # Step 6: Delegate to parent MRKL parser
+ try:
+ return super().parse(text)
+ except OutputParserException as e:
+ # Enhanced error logging
+ logger.error(
+ "Failed to parse ReAct output even after thinking mode cleanup. "
+ "Error: %s\nCleaned text: %s\nOriginal text: %s",
+ str(e),
+ text[:500],
+ original_text[:500]
+ )
+ raise e
+
+
+def create_thinking_aware_error_handler(tool_names: list[str]):
+ """
+ Create an enhanced error handler that provides specific guidance for thinking mode issues.
+
+ Args:
+ tool_names: List of available tool names for validation hints
+
+ Returns:
+ Error handler function for AgentExecutor
+ """
+ tool_names_str = ", ".join(tool_names)
+
+ def handle_parse_error(exception: OutputParserException) -> str:
+ """Handle parsing errors with thinking-mode-aware guidance"""
+ exception_str = str(exception)
+
+ # Detect thinking mode interference
+ if ('' in exception_str.lower() or
+ 'thinking:' in exception_str.lower()):
+ return (
+ "FORMAT ERROR: Thinking content interfered with Action format.\n\n"
+ "CORRECT FORMAT:\n"
+ "Thought: [All your reasoning here - be as verbose as needed]\n"
+ "Action: [Exact tool name only]\n"
+ "Action Input: [Input value only]\n\n"
+ "Put ALL thinking and explanations in the 'Thought:' section.\n"
+ "The 'Action:' and 'Action Input:' lines must be clean.\n\n"
+ f"Available tools: {tool_names_str}"
+ )
+
+ # Detect action + final answer confusion
+ if "both a final answer and a parse-able action" in exception_str.lower():
+ return (
+ "FORMAT ERROR: Cannot output both Action and Final Answer.\n\n"
+ "If ready to answer:\n"
+ "Thought: I now know the final answer\n"
+ "Final Answer: [your detailed answer with evidence]"
+ )
+
+ # Detect missing Action/Action Input
+ if "could not parse" in exception_str.lower():
+ return (
+ "FORMAT ERROR: Could not find valid Action and Action Input.\n\n"
+ "Required format:\n"
+ "Action: [exact tool name]\n"
+ "Action Input: [input value]\n\n"
+ f"Available tools: {tool_names_str}\n\n"
+ "Ensure your tool name exactly matches one from the list above."
+ )
+
+ # Generic fallback
+ return (
+ f"Format error. Each line must start with: Thought:, Action:, Action Input:, or Final Answer:\n"
+ f"Available tools: {tool_names_str}"
+ )
+
+ return handle_parse_error
+