Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/GitHub-Issue-Debug-Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ After execution, the script outputs results in JSON format, including the follow
"prometheus_result": {
"patch": "Generated code patch",
"passed_reproducing_test": true,
"passed_build": true,
"passed_existing_test": false,
"passed_regression_test": true,
"passed_reproduction_test": true,
Expand Down
17 changes: 13 additions & 4 deletions prometheus/app/api/routes/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ async def answer_issue(issue: IssueRequest, request: Request) -> Response[IssueR
code=400,
message="workdir must be provided for user defined environment",
)

# Validate build and test commands if required
if issue.run_build and not issue.build_commands:
raise ServerException(
code=400, message="No build commands available, please provide build commands"
)

if issue.run_existing_test and not issue.test_commands:
raise ServerException(
code=400, message="No test commands available, please provide test commands"
)

# Ensure the repository is not currently being used
if repository.is_working:
raise ServerException(
Expand All @@ -83,7 +95,6 @@ async def answer_issue(issue: IssueRequest, request: Request) -> Response[IssueR
(
patch,
passed_reproducing_test,
passed_build,
passed_regression_test,
passed_existing_test,
issue_response,
Expand Down Expand Up @@ -115,12 +126,11 @@ async def answer_issue(issue: IssueRequest, request: Request) -> Response[IssueR
if (
patch,
passed_reproducing_test,
passed_build,
passed_regression_test,
passed_existing_test,
issue_response,
issue_type,
) == (None, False, False, False, False, None, None):
) == (None, False, False, False, None, None):
raise ServerException(
code=500,
message="Failed to process the issue. Please try again later.",
Expand All @@ -135,7 +145,6 @@ async def answer_issue(issue: IssueRequest, request: Request) -> Response[IssueR
data=IssueResponse(
patch=patch,
passed_reproducing_test=passed_reproducing_test,
passed_build=passed_build,
passed_regression_test=passed_regression_test,
passed_existing_test=passed_existing_test,
issue_response=issue_response,
Expand Down
1 change: 0 additions & 1 deletion prometheus/app/models/response/issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
class IssueResponse(BaseModel):
patch: str | None = None
passed_reproducing_test: bool
passed_build: bool
passed_regression_test: bool
passed_existing_test: bool
issue_response: str | None = None
Expand Down
12 changes: 4 additions & 8 deletions prometheus/app/services/issue_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,7 @@ def answer_issue(
dockerfile_content: Optional[str] = None,
image_name: Optional[str] = None,
workdir: Optional[str] = None,
) -> (
tuple[None, bool, bool, bool, bool, None, None]
| tuple[str, bool, bool, bool, bool, str, IssueType]
):
) -> tuple[None, bool, bool, bool, None, None] | tuple[str, bool, bool, bool, str, IssueType]:
"""
Processes an issue, generates patches if needed, runs optional builds and tests, and returning the results.

Expand All @@ -76,9 +73,10 @@ def answer_issue(
Tuple containing:
- edit_patch (str): The generated patch for the issue.
- passed_reproducing_test (bool): Whether the reproducing test passed.
- passed_build (bool): Whether the build passed.
- passed_regression_test (bool): Whether the regression tests passed.
- passed_existing_test (bool): Whether the existing tests passed.
- issue_response (str): Response generated for the issue.
- issue_type (IssueType): The type of the issue (BUG or QUESTION).
"""

# Set up a dedicated logger for this thread
Expand Down Expand Up @@ -111,7 +109,6 @@ def answer_issue(
kg=knowledge_graph,
git_repo=repository,
container=container,
build_commands=build_commands,
test_commands=test_commands,
)

Expand All @@ -131,15 +128,14 @@ def answer_issue(
return (
output_state["edit_patch"],
output_state["passed_reproducing_test"],
output_state["passed_build"],
output_state["passed_regression_test"],
output_state["passed_existing_test"],
output_state["issue_response"],
output_state["issue_type"],
)
except Exception as e:
logger.error(f"Error in answer_issue: {str(e)}\n{traceback.format_exc()}")
return None, False, False, False, False, None, None
return None, False, False, False, None, None
finally:
logger.removeHandler(file_handler)
file_handler.close()
22 changes: 11 additions & 11 deletions prometheus/docker/base_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def update_files(
Creates a tar archive of the new files and copies them into the workdir of the container.

Args:
new_project_path: Path to the directory containing new files.
project_root_path: Path to the project root directory.
updated_files: List of file paths (relative to project_root_path) to update in the container.
removed_files: List of file paths (relative to project_root_path) to remove from the container.
"""
if not project_root_path.is_absolute():
raise ValueError("project_root_path {project_root_path} must be a absolute path")
Expand Down Expand Up @@ -157,10 +159,10 @@ def execute_command(self, command: str) -> str:
{command} timeout after {self.timeout} seconds
*******************************************************************************
"""
timeout_command = f"timeout -k 5 {self.timeout}s {command}"
command = f'/bin/bash -l -c "{timeout_command}"'
bash_cmd = ["/bin/bash", "-lc", command]
full_cmd = ["timeout", "-k", "5", f"{self.timeout}s", *bash_cmd]
self._logger.debug(f"Running command in container: {command}")
exec_result = self.container.exec_run(command, workdir=self.workdir)
exec_result = self.container.exec_run(full_cmd, workdir=self.workdir)
exec_result_str = exec_result.output.decode("utf-8")

if exec_result.exit_code in (124, 137):
Expand All @@ -169,13 +171,11 @@ def execute_command(self, command: str) -> str:
self._logger.debug(f"Command output:\n{exec_result_str}")
return exec_result_str

def restart_container(self):
self._logger.info("Restarting the container")
if self.container:
self.container.stop(timeout=10)
self.container.remove(force=True)

self.start_container()
def reset_repository(self):
"""Reset the git repository in the container to a clean state."""
self._logger.info("Resetting git repository in the container")
self.execute_command("git reset --hard")
self.execute_command("git clean -fd")

def cleanup(self):
"""Clean up container resources and temporary files.
Expand Down
2 changes: 0 additions & 2 deletions prometheus/lang_graph/graphs/issue_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def __init__(
kg: KnowledgeGraph,
git_repo: GitRepository,
container: BaseContainer,
build_commands: Optional[Sequence[str]] = None,
test_commands: Optional[Sequence[str]] = None,
):
self.git_repo = git_repo
Expand All @@ -52,7 +51,6 @@ def __init__(
container=container,
kg=kg,
git_repo=git_repo,
build_commands=build_commands,
test_commands=test_commands,
)

Expand Down
1 change: 0 additions & 1 deletion prometheus/lang_graph/graphs/issue_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class IssueState(TypedDict):

passed_regression_test: bool
passed_reproducing_test: bool
passed_build: bool
passed_existing_test: bool

issue_response: str
37 changes: 25 additions & 12 deletions prometheus/lang_graph/nodes/context_extraction_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,16 @@

HUMAN_MESSAGE = """\
This is the original user query:

--- BEGIN ORIGINAL QUERY ---
{original_query}
--- END ORIGINAL QUERY ---

The context or file content that you have seen so far (Some of the context may be IRRELEVANT to the query!!!):

--- BEGIN CONTEXT ---
{context}
--- END CONTEXT ---

REMEMBER: Your task is to summarize the relevant contexts to a given query and return it in the specified format!
"""
Expand Down Expand Up @@ -112,16 +118,6 @@ def __init__(self, model: BaseChatModel, root_path: str):
f"thread-{threading.get_ident()}.prometheus.lang_graph.nodes.context_extraction_node"
)

def get_human_message(self, state: ContextRetrievalState) -> str:
full_context_str = transform_tool_messages_to_str(
extract_last_tool_messages(state["context_provider_messages"])
)
original_query = state["query"]
return HUMAN_MESSAGE.format(
original_query=original_query,
context=full_context_str,
)

def __call__(self, state: ContextRetrievalState):
"""
Extract relevant code contexts from the codebase based on the user query and existing context.
Expand All @@ -130,9 +126,26 @@ def __call__(self, state: ContextRetrievalState):
self._logger.info("Starting context extraction process")
# Get Context List with existing context
final_context = state.get("context", [])
# Get a human message
human_message = self.get_human_message(state)

# Transform the tool messages to a single string
full_context_str = transform_tool_messages_to_str(
extract_last_tool_messages(state["context_provider_messages"])
)

# return existing context if no new context is available
if not full_context_str:
self._logger.debug(
"No context available from tool messages, returning existing context"
)
return {"context": final_context}

# Format the human message
human_message = HUMAN_MESSAGE.format(
original_query=state["query"],
context=full_context_str,
)
self._logger.debug(human_message)

# Summarize the context based on the last messages and system prompt
response = self.model.invoke({"human_prompt": human_message})
self._logger.debug(f"Model response: {response}")
Expand Down
11 changes: 8 additions & 3 deletions prometheus/lang_graph/nodes/context_retrieval_subgraph_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Dict, Sequence

from langchain_core.language_models.chat_models import BaseChatModel
from langgraph.errors import GraphRecursionError

from prometheus.graph.knowledge_graph import KnowledgeGraph
from prometheus.lang_graph.subgraphs.context_retrieval_subgraph import ContextRetrievalSubgraph
Expand Down Expand Up @@ -31,8 +32,12 @@ def __init__(

def __call__(self, state: Dict) -> Dict[str, Sequence[Context]]:
self._logger.info("Enter context retrieval subgraph")
output_state = self.context_retrieval_subgraph.invoke(
state[self.query_key_name], state["max_refined_query_loop"]
)
try:
output_state = self.context_retrieval_subgraph.invoke(
state[self.query_key_name], state["max_refined_query_loop"]
)
except GraphRecursionError as e:
self._logger.debug("Graph recursion limit reached, returning empty context")
raise e
self._logger.info(f"Context retrieved: {output_state['context']}")
return {self.context_key_name: output_state["context"]}
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ def format_human_message(self, state: Dict):
)
elif "reproducing_test_fail_log" in state and state["reproducing_test_fail_log"]:
edit_error = f"The patch failed to pass the bug exposing test cases:\n{state['reproducing_test_fail_log']}"
elif "build_fail_log" in state and state["build_fail_log"]:
edit_error = f"The patch failed to pass the build:\n{state['build_fail_log']}"
elif "existing_test_fail_log" in state and state["existing_test_fail_log"]:
edit_error = (
f"The patch failed to existing test cases:\n{state['existing_test_fail_log']}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class IssueBugReproductionContextMessageNode:
BUG_REPRODUCING_QUERY = """\
{issue_info}

OBJECTIVE: Find three relevant existing test cases that demonstrates similar functionality to the reported bug,
OBJECTIVE: Find 5 relevant existing test cases that demonstrates similar functionality to the reported bug,
including ALL necessary imports, test setup, mocking, assertions, and any test method used in the test case.

<reasoning>
Expand All @@ -36,7 +36,7 @@ class IssueBugReproductionContextMessageNode:
</reasoning>

REQUIREMENTS:
- Return THREE complete, self-contained test cases most similar to bug scenario
- Return 5 complete, self-contained test cases most similar to bug scenario
- Must include ALL necessary imports at the start of each test file
- Must include full test method implementation
- Must include ALL mock/fixture setup
Expand Down Expand Up @@ -105,7 +105,7 @@ def test_file_permission_denied(self, mock_open, mock_access):
3. Tests with comparable mocking patterns
4. Tests demonstrating similar assertions

Find the THREE most relevant test cases with complete context, ensuring ALL necessary imports are included at the start of each test file.
Find the 5 most relevant test cases with complete context, ensuring ALL necessary imports are included at the start of each test file.
"""

def __init__(self):
Expand Down
10 changes: 5 additions & 5 deletions prometheus/lang_graph/nodes/issue_bug_responder_node.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
import threading
from typing import Dict

from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import HumanMessage, SystemMessage

from prometheus.lang_graph.subgraphs.issue_bug_state import IssueBugState
from prometheus.utils.issue_util import format_issue_info


Expand Down Expand Up @@ -53,15 +53,15 @@ def __init__(self, model: BaseChatModel):
f"thread-{threading.get_ident()}.prometheus.lang_graph.nodes.issue_bug_responder_node"
)

def format_human_message(self, state: Dict) -> HumanMessage:
def format_human_message(self, state: IssueBugState) -> HumanMessage:
verification_messages = []

# We only report successful verifications that were performed
if state["passed_reproducing_test"]:
verification_messages.append("✓ The bug reproducing test passed")

if state["passed_build"]:
verification_messages.append("✓ Build passes successfully")
if state["passed_regression_test"]:
verification_messages.append("✓ All selected regression tests passes successfully")

if state["passed_existing_test"]:
verification_messages.append("✓ All existing tests pass successfully")
Expand All @@ -78,7 +78,7 @@ def format_human_message(self, state: Dict) -> HumanMessage:

return HumanMessage(content=formatted_message)

def __call__(self, state: Dict):
def __call__(self, state: IssueBugState):
messages = [
self.system_prompt,
self.format_human_message(state),
Expand Down
Loading