diff --git a/pyproject.toml b/pyproject.toml
index 0733f20..a0a9cec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ packages = ["src/copra"]
[project]
name = "copra-theorem-prover"
-version = "1.5.0"
+version = "1.6.0"
authors = [
{ name="Amitayush Thakur", email="amitayush@utexas.edu" },
]
diff --git a/src/copra/agent/dfs_policy_prompter.py b/src/copra/agent/dfs_policy_prompter.py
index d97cc5c..4e96301 100644
--- a/src/copra/agent/dfs_policy_prompter.py
+++ b/src/copra/agent/dfs_policy_prompter.py
@@ -10,7 +10,6 @@
from copra.agent.rate_limiter import InvalidActionException
from copra.agent.simple_policy_prompter import SimplePolicyPrompter
from copra.agent.gpt_guided_tree_search_policy import PromptSummary, ProofQInfo, TreeSearchAction, TreeSearchActionType
-from copra.gpts.llama_access import ServiceDownError
from copra.retrieval.coq_bm25_reranker import CoqBM25TrainingDataRetriever
from copra.prompt_generator.gpt_request_grammar import CoqGPTRequestGrammar, CoqGptRequest, CoqGptRequestActions
from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar
@@ -327,10 +326,6 @@ def run_prompt(self, request: CoqGptResponse) -> list:
# don't change temperature for now
self._num_api_calls += 1
- except ServiceDownError as e:
- self.logger.info("Got a service down error. Will giveup until the docker container is restarted.")
- self.logger.exception(e)
- raise
except Exception as e:
self.logger.info("Got an unknown exception. Retrying.")
self.logger.exception(e)
diff --git a/src/copra/agent/simple_policy_prompter.py b/src/copra/agent/simple_policy_prompter.py
index a7490c6..6fdc386 100644
--- a/src/copra/agent/simple_policy_prompter.py
+++ b/src/copra/agent/simple_policy_prompter.py
@@ -12,7 +12,6 @@
import logging
from copra.agent.rate_limiter import RateLimiter
from copra.gpts.gpt_access import GptAccess
-from copra.gpts.llama_access import LlamaAccess, ServiceDownError
from copra.prompt_generator.prompter import PolicyPrompter
from copra.tools.misc import model_supports_openai_api, is_vllm_model
@@ -74,10 +73,7 @@ def __init__(
# Initialize LLM access (GptAccess or LlamaAccess)
# Note: vLLM models (with "vllm:" prefix) are handled by GptAccess
- if not model_supports_openai_api(gpt_model_name):
- self._gpt_access = LlamaAccess(gpt_model_name)
- else:
- self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
+ self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
# Get model configuration
# For vLLM models, use the generic "vllm" key in model_info
@@ -104,14 +100,11 @@ def __init__(
def __enter__(self):
"""Context manager entry - initialize LLM service if needed."""
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__enter__()
return self
def __exit__(self, exc_type, exc_value, traceback):
"""Context manager exit - cleanup LLM service if needed."""
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__exit__(exc_type, exc_value, traceback)
+ pass
def add_to_history(self, message: typing.Dict[str, str]):
"""
diff --git a/src/copra/baselines/gpt4/few_shot_policy_prompter.py b/src/copra/baselines/gpt4/few_shot_policy_prompter.py
index cc57324..4023b01 100644
--- a/src/copra/baselines/gpt4/few_shot_policy_prompter.py
+++ b/src/copra/baselines/gpt4/few_shot_policy_prompter.py
@@ -8,12 +8,10 @@
from copra.agent.rate_limiter import RateLimiter, InvalidActionException
from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction
from copra.gpts.gpt_access import GptAccess
-from copra.gpts.llama_access import LlamaAccess
from itp_interface.rl.proof_action import ProofAction
from copra.prompt_generator.prompter import PolicyPrompter
from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar
from copra.baselines.gpt4.few_shot_grammar import FewShotGptRequest, FewShotGptRequestGrammar, FewShotGptResponse, FewShotGptResponseGrammar
-from copra.tools.misc import model_supports_openai_api
class FewShotGptPolicyPrompter(PolicyPrompter):
_cache: typing.Dict[str, typing.Any] = {}
@@ -43,10 +41,7 @@ def __init__(self,
conv_messages = self.agent_grammar.get_openai_conv_messages(example_conv_prompt_path, "system")
main_message = self.agent_grammar.get_openai_main_message(main_sys_prompt_path, "system")
self.system_messages = [main_message] + conv_messages
- if not model_supports_openai_api(gpt_model_name):
- self._gpt_access = LlamaAccess(gpt_model_name)
- else:
- self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
+ self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
# For vLLM models, use the generic "vllm" key for model info
model_info_key = "vllm" if gpt_model_name.startswith("vllm:") else gpt_model_name
@@ -83,12 +78,10 @@ def __init__(self,
pass
def __enter__(self):
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__enter__()
+ pass
def __exit__(self, exc_type, exc_value, traceback):
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__exit__(exc_type, exc_value, traceback)
+ pass
def _init_retriever(self):
if FewShotGptPolicyPrompter._cache.get(self._training_data_path, None) is not None:
diff --git a/src/copra/baselines/gpt4/hammer_policy_prompter.py b/src/copra/baselines/gpt4/hammer_policy_prompter.py
index 679be23..1ae9ee1 100644
--- a/src/copra/baselines/gpt4/hammer_policy_prompter.py
+++ b/src/copra/baselines/gpt4/hammer_policy_prompter.py
@@ -6,10 +6,7 @@
from copra.retrieval.coq_bm25_reranker import CoqBM25TrainingDataRetriever
from copra.prompt_generator.agent_grammar import CoqGPTResponseGrammar
from copra.prompt_generator.gpt_request_grammar import CoqGPTRequestGrammar, CoqGptRequestActions
-from copra.agent.rate_limiter import RateLimiter
from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction, TreeSearchActionType
-from copra.gpts.gpt_access import GptAccess
-from copra.gpts.llama_access import LlamaAccess
from itp_interface.rl.proof_action import ProofAction
from copra.prompt_generator.prompter import PolicyPrompter
from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar
diff --git a/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py b/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py
index 4f5b90f..1059783 100644
--- a/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py
+++ b/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py
@@ -8,7 +8,6 @@
from copra.agent.rate_limiter import RateLimiter, InvalidActionException
from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction
from copra.gpts.gpt_access import GptAccess
-from copra.gpts.llama_access import LlamaAccess
from itp_interface.rl.proof_action import ProofAction
from copra.prompt_generator.prompter import PolicyPrompter
from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar
@@ -44,10 +43,7 @@ def __init__(self,
conv_messages = self.agent_grammar.get_openai_conv_messages(example_conv_prompt_path, "system")
main_message = self.agent_grammar.get_openai_main_message(main_sys_prompt_path, "system")
self.system_messages = [main_message] + conv_messages
- if not model_supports_openai_api(gpt_model_name):
- self._gpt_access = LlamaAccess(gpt_model_name)
- else:
- self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
+ self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name)
self._token_limit_per_min = GptAccess.gpt_model_info[gpt_model_name]["token_limit_per_min"]
self._request_limit_per_min = GptAccess.gpt_model_info[gpt_model_name]["request_limit_per_min"]
self._max_token_per_prompt = GptAccess.gpt_model_info[gpt_model_name]["max_token_per_prompt"]
@@ -81,12 +77,10 @@ def __init__(self,
pass
def __enter__(self):
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__enter__()
+ pass
def __exit__(self, exc_type, exc_value, traceback):
- if isinstance(self._gpt_access, LlamaAccess):
- self._gpt_access.__exit__(exc_type, exc_value, traceback)
+ pass
def _init_retriever(self):
if InformalFewShotGptPolicyPrompter._cache.get(self._training_data_path, None) is not None:
diff --git a/src/copra/gpts/llama2_chat_format.py b/src/copra/gpts/llama2_chat_format.py
deleted file mode 100644
index 0ed3d1c..0000000
--- a/src/copra/gpts/llama2_chat_format.py
+++ /dev/null
@@ -1,227 +0,0 @@
-#!/usr/bin/env python3
-
-class Llama2FormatChat(object):
- """
- prompt = f"<>\\n{system}\\n<>\\n\\n{user_1}"
- prompt += f"[INST] {prompt.strip()} [/INST] {answer_1.strip()} "
- prompt += f"[INST] {user_2.strip()} [/INST] {answer_2.strip()} "
- prompt += f"[INST] {user_3.strip()} [/INST]"
- """
- start_token = ""
- end_token = ""
- sys_start = "<>"
- sys_end = "<>"
- inst_start = "[INST]"
- inst_end = "[/INST]"
- def __init__(self):
- pass
-
- def __call__(self, messages) -> str:
- """
- messages are of the form
- messages = [
- {
- "role": "system",
- "content": "....",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "....",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "....",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "....",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "....",
- },
- {
- "role": "user",
- "content": "",
- }
- ]
- """
- # Collect all the system messages
- system_messages = []
- user_messages = []
- assistant_messages = []
- role_names = set()
- for message in messages:
- if message["role"] == "system":
- system_messages.append(message)
- if "name" in message:
- role_names.add(message["name"])
- elif message["role"] == "user":
- user_messages.append(message)
- if "name" in message:
- role_names.add(message["name"])
- elif message["role"] == "assistant":
- assistant_messages.append(message)
- if "name" in message:
- role_names.add(message["name"])
- else:
- raise ValueError(f"Unknown role: {message['role']}")
- sys_prompt = self._format_system_messages(system_messages)
- user_prompt = self._format_user_assistant_messages(user_messages, assistant_messages)
- prompt = \
-f"""{sys_prompt}
-
-{user_prompt}"""
- return prompt, role_names
-
- def _format_system_messages(self, system_message):
- """
- system_message is of the form
- """
- main_system_messages = [sys_msg["content"] for sys_msg in system_message if "name" not in sys_msg]
- main_system_message = "\n".join(main_system_messages)
- example_messages = [(sys_msg["name"], sys_msg["content"]) for sys_msg in system_message if "name" in sys_msg]
- example_messages = [f"`{name}`:\n{msg}" for name, msg in example_messages]
- if len(example_messages) > 0:
- example_message = "\n".join(example_messages)
- else:
- example_message = ""
- if len(example_messages) > 0:
- system_message = \
-f"""{main_system_message}
-
-An example of user and assistant interaction is as follows:
-{example_message}"""
- else:
- system_message = \
-f"""{main_system_message}"""
- system_prompt = \
-f"""{Llama2FormatChat.start_token}{Llama2FormatChat.inst_start} {Llama2FormatChat.sys_start}
-{system_message}
-{Llama2FormatChat.sys_end}"""
- return system_prompt
-
- def _format_user_assistant_messages(self, user_messages, assistant_messages):
- """
- user_messages is of the form
- """
- user_messages = [user_msg["content"] for user_msg in user_messages]
- user_messages = [f"{user_msg} {Llama2FormatChat.inst_end} " for user_msg in user_messages]
- assistant_messages = [assistant_msg["content"] for assistant_msg in assistant_messages]
- assistant_messages = [f"{assistant_msg} {Llama2FormatChat.end_token}{Llama2FormatChat.start_token}{Llama2FormatChat.inst_start} " for assistant_msg in assistant_messages]
- # Combine the messages one after the other
- messages = []
- idx = 0
- while idx < len(user_messages) or idx < len(assistant_messages):
- if idx < len(user_messages):
- messages.append(user_messages[idx])
- if idx < len(assistant_messages):
- messages.append(assistant_messages[idx])
- idx += 1
- message = "".join(messages)
- return message
-
-if __name__ == "__main__":
- messages = [
- {
- "role": "system",
- "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "New synergies will help drive top-line growth.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Things working well together will increase revenue.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Let's talk later when we're less busy about how to do better.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Our idea seems to be scooped, don't know how to change direction now."
- },
- {
- "role": "user",
- "content": "We changed the direction of the project, but we don't have time to do it.",
- },
- {
- "role": "assistant",
- "content": "Too many changes do not have time to do it.",
- },
- {
- "role": "user",
- "content": "The pot is boiling, probably the water will spill.",
- }
- ]
- llama2_format_chat = Llama2FormatChat()
- prompt, role_names = llama2_format_chat(messages)
- print(prompt)
- print('='*50)
- print(role_names)
- print('-'*100)
- messages = [
- {
- "role": "system",
- "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "New synergies will help drive top-line growth.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Things working well together will increase revenue.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Let's talk later when we're less busy about how to do better.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Our idea seems to be scooped, don't know how to change direction now."
- },
- {
- "role": "user",
- "content": "We changed the direction of the project, but we don't have time to do it.",
- }
- ]
- llama2_format_chat = Llama2FormatChat()
- prompt, role_names = llama2_format_chat(messages)
- print(prompt)
- print('='*50)
- print(role_names)
\ No newline at end of file
diff --git a/src/copra/gpts/llama_access.py b/src/copra/gpts/llama_access.py
deleted file mode 100644
index 24311d3..0000000
--- a/src/copra/gpts/llama_access.py
+++ /dev/null
@@ -1,329 +0,0 @@
-#!/usr/bin/env python3
-
-import time
-import typing
-import os
-import random
-import logging
-import threading
-# from litellm import token_counter
-from subprocess import Popen, PIPE, STDOUT
-from copra.gpts.gpt_access import GptAccess
-from copra.gpts.llama2_chat_format import Llama2FormatChat
-from huggingface_hub import InferenceClient
-
-class ServiceDownError(Exception):
- pass
-
-class LlamaAccess(GptAccess):
- # Use this https://huggingface.co/blog/codellama#conversational-instructions for formatting instructions
- """
- This is not thread safe"""
- process = None
- model_name = None
- debug = False
- random_suffix = random.randint(0, 10**16)
- models_supported_name = ['codellama/CodeLlama-7b-Instruct-hf', 'EleutherAI/llemma_7b', 'morph-labs/morph-prover-v0-7b']
- logger : logging.Logger = None
- port = 8080
- docker_exit_signal = False
- litellm_exit_signal = False
- docker_logging_thread = None
- litellm_logging_thread = None
- def __init__(self, model_name: str | None = None, temperature = 0.0) -> None:
- assert model_name == LlamaAccess.model_name or model_name is None, "Model name must be initialized before use"
- assert LlamaAccess.process is not None, "LlamaAccess class must be initialized before use"
- self.secret_filepath = None
- self.model_name = model_name if model_name is not None else LlamaAccess.models_supported_name[0]
- self.temperature = temperature
- self.usage = {
- "prompt_tokens": 0,
- "completion_tokens": 0,
- "total_tokens": 0
- }
- self.is_open_ai_model = False
- self._llama2_format_chat = Llama2FormatChat()
-
- def __enter__(self):
- self.model_name = f"huggingface/{self.model_name}"
- self.interface = InferenceClient(model=f"http://localhost:{LlamaAccess.port}")
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- # self.kill() # only kill the service if this is the last object
- pass
-
- def _get_docker_container_name(model_name: str) -> str:
- return model_name.replace("/","-") + f"-{LlamaAccess.random_suffix}"
-
- def _get_docker_container_id(model_name: str) -> str:
- return os.popen(f"docker ps -q --filter='NAME={LlamaAccess._get_docker_container_name(model_name)}'").read().strip()
-
- def _check_if_docker_running() -> bool:
- try:
- return LlamaAccess._get_docker_container_id(LlamaAccess.model_name) != ""
- except:
- return False
-
- def class_init(model_name: str = None, temperature = 0.0, port = 8080, debug = False, logger: logging.Logger = None):
- if model_name is None:
- model_name = LlamaAccess.models_supported_name[0]
- elif model_name is not None:
- assert model_name in LlamaAccess.models_supported_name, f"Model name {model_name} not supported"
- # Check if docker is running
- if LlamaAccess.model_name is None:
- LlamaAccess.model_name = model_name
- LlamaAccess.debug = debug
- LlamaAccess.logger = logger if logger is not None else logging.getLogger(__name__)
- LlamaAccess.docker_exit_signal = False
- LlamaAccess.litellm_exit_signal = False
- LlamaAccess.docker_logging_thread = None
- LlamaAccess.port = port
- if not LlamaAccess._check_if_docker_running():
- LlamaAccess._start_service(model_name, temperature, port, debug)
- pass
-
- def class_kill():
- LlamaAccess.kill()
-
- def _docker_service_logs():
- try:
- while not LlamaAccess.docker_exit_signal:
- line = LlamaAccess.process.stdout.readline().strip()
- if line:
- LlamaAccess.logger.info(f'Docker:\n {line}')
- else:
- # sleep for a bit to avoid busy waiting
- time.sleep(0.02)
- except:
- pass
-
- def _start_service(model_name: str, temperature = 0.0, port = 8080, debug = False) -> None:
- # Change the openai.api_key to the llama api key
- # Start the docker container for llama TGI
- docker_container_name = LlamaAccess._get_docker_container_name(model_name)
- cuda_visible_devices = os.popen('echo $CUDA_VISIBLE_DEVICES').read().strip()
- if cuda_visible_devices == '':
- cuda_visible_devices = '0'
- cmd = f'sh src/gpts/start_llama.sh {docker_container_name} {model_name} {port}'
- LlamaAccess.process = Popen(
- cmd,
- shell = True,
- stdin = PIPE,
- stdout = PIPE,
- stderr = STDOUT,
- cwd = root_dir,
- bufsize = 1,
- universal_newlines = True)
- exit_wait = False
- start_time = time.time()
- retry = 3
- while not exit_wait and retry > 0:
- line = LlamaAccess.process.stdout.readline().strip()
- if line:
- LlamaAccess.logger.info(f'Docker:\n {line}')
- if "Error" in line or "error" in line:
- LlamaAccess.process.kill()
- raise Exception(f'Failed to start docker container {docker_container_name}, because of error: \n{line}')
- if line.endswith('Connected'):
- time.sleep(1)
- exit_wait = True
- else:
- # sleep for a bit to avoid busy waiting
- time.sleep(0.02)
- end_time = time.time()
- if end_time - start_time > 400:
- LlamaAccess.process.kill()
- LlamaAccess.kill()
- LlamaAccess.process = Popen(
- cmd,
- shell = True,
- stdin = PIPE,
- stdout = PIPE,
- stderr = STDOUT,
- cwd = root_dir,
- bufsize = 1,
- universal_newlines = True)
- exit_wait = False
- start_time = time.time()
- retry -= 1
-
- # Start the docker logging thread
- LlamaAccess.docker_exit_signal = False
- LlamaAccess.docker_logging_thread = threading.Thread(target=LlamaAccess._docker_service_logs)
- LlamaAccess.docker_logging_thread.start()
-
- def token_counter(self, model_name: str, text: typing.List[str]) -> int:
- tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
- text = " ".join([message["content"] for message in messages])
- enc = tokenizer.encode(text)
- num_tokens = len(enc.ids)
- return num_tokens
-
- def num_tokens_from_messages(self, messages, model=None):
- model = model if model is not None else self.model_name
- num_tokens = self.token_counter(model, messages=messages)
- return num_tokens
-
- def complete_chat(self,
- messages: typing.List[str],
- model: typing.Optional[str] = None,
- n: int = 1,
- max_tokens: int = 5,
- temperature: float = 0.25,
- top_p: float = 1.0,
- frequency_penalty: float = 0.0,
- presence_penalty: float = 0.0,
- stop: list = ["\n"]) -> typing.Tuple[list, dict]:
- temperature = None if temperature == 0.0 else temperature
- top_p = None if top_p == 1.0 else top_p
- try:
- outputs = []
- prompt_tokens = self.num_tokens_from_messages(messages)
- completion_tokens = 0
- prompt, role_names = self._llama2_format_chat(messages)
- # LlamaAccess.logger.debug(f"Prompt Received:\n{prompt}")
- for i in range(n):
- output = self.interface.text_generation(
- prompt=prompt,
- details=True,
- max_new_tokens=max_tokens,
- temperature=temperature,
- top_p=top_p,
- stop_sequences=stop,
- do_sample=i>0,
- )
- generated_text = output.generated_text
- finish_reason = output.details.finish_reason
- # LlamaAccess.logger.debug(f"Generated Text:\n{generated_text}")
- if finish_reason.value == "stop_sequence":
- finish_reason = "stop"
- else:
- finish_reason = finish_reason.value
- if finish_reason == "stop":
- # Remove the stop token
- for stop_token in stop:
- if generated_text.endswith(stop_token):
- generated_text = generated_text[:generated_text.rfind(stop_token)]
- break
- generated_text = generated_text.strip()
- for role_name in role_names:
- if generated_text.startswith(role_name):
- generated_text = generated_text[len(role_name):].strip()
- break
- elif generated_text.startswith(f"`{role_name}`"):
- generated_text = generated_text[len(f"`{role_name}`:"):].strip()
- break
- outputs.append({'role': 'assistant', 'content': generated_text, 'finish_reason': finish_reason})
- completion_tokens += output.details.generated_tokens
- total_tokens = prompt_tokens + completion_tokens
- usage = {
- "prompt_tokens": prompt_tokens,
- "completion_tokens": completion_tokens,
- "total_tokens": total_tokens,
- "reason": outputs[-1]["finish_reason"] if len(outputs) > 0 else "stop"
- }
- self.usage['prompt_tokens'] += prompt_tokens
- self.usage['completion_tokens'] += completion_tokens
- self.usage['total_tokens'] += total_tokens
- return outputs, usage
- except:
- if not LlamaAccess._check_if_docker_running():
- raise ServiceDownError("Docker is shut down, restart the service")
- else:
- raise
-
- def kill():
- LlamaAccess.logger.info("Killing the docker processes")
- docker_container_name = LlamaAccess._get_docker_container_name(LlamaAccess.model_name)
- if LlamaAccess._check_if_docker_running():
- docker_name = os.popen(f"docker stop {docker_container_name}").read().strip()
- assert docker_name == docker_container_name, f"Failed to stop container {docker_container_name}"
- time.sleep(2)
- LlamaAccess.logger.info(f"Docker Container {docker_container_name} stopped")
- else:
- LlamaAccess.logger.info(f"Docker Container {docker_container_name} already stopped")
- # Remove the docker container
- docker_name = os.popen(f"docker rm {docker_container_name}").read().strip()
- time.sleep(2)
- if docker_name == '':
- LlamaAccess.logger.info(f"Docker Container {docker_container_name} already removed")
- else:
- assert docker_name == docker_container_name, f"Failed to remove container {LlamaAccess._get_docker_container_name(LlamaAccess.model_name)}"
- LlamaAccess.logger.info(f"Docker Container {docker_container_name} removed")
- try:
- LlamaAccess.process.kill()
- except:
- pass
- time.sleep(2)
- # Stop logging threads
- LlamaAccess.docker_exit_signal = True
- LlamaAccess.litellm_exit_signal = True
- LlamaAccess.docker_logging_thread.join()
- LlamaAccess.logger.info("Docker processes killed and logging threads stopped")
- try:
- LlamaAccess.process.stdin.close()
- except:
- pass
- time.sleep(2)
- try:
- LlamaAccess.process.stdout.close()
- except:
- pass
- time.sleep(2)
- LlamaAccess.logger.info("Docker stdin and stdout closed")
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
- logger.log(logging.INFO, "Testing LlamaAccess")
- LlamaAccess.class_init(port=10005, logger=logger)
- try:
- with LlamaAccess() as llama:
- messages = [
- {
- "role": "system",
- "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "New synergies will help drive top-line growth.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Things working well together will increase revenue.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Let's talk later when we're less busy about how to do better.",
- },
- {
- "role": "system",
- "name": "example_user",
- "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
- },
- {
- "role": "system",
- "name": "example_assistant",
- "content": "Our idea seems to be scooped, don't know how to change direction now."
- },
- {
- "role": "user",
- "content": "We changed the direction of the project, but we don't have time to do it.",
- }
- ]
- messages = [messages[0]] + [messages[1+(i%len(messages[1:-1]))] for i in range(300)] + [messages[-1]]
- print(llama.num_tokens_from_messages(messages))
- print("Will call complete_chat soon")
- #time.sleep(30)
- print(llama.complete_chat(messages, max_tokens=50, n=2, temperature=0.0, stop=['.']))
- finally:
- LlamaAccess.class_kill()
\ No newline at end of file
diff --git a/src/copra/gpts/start_llama.sh b/src/copra/gpts/start_llama.sh
deleted file mode 100644
index 3ae2412..0000000
--- a/src/copra/gpts/start_llama.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-name=codellama_tgi
-model=codellama/CodeLlama-7b-Instruct-hf #tiiuae/falcon-7b-instruct
-port=8080
-cuda="0"
-volume=$PWD/.log/tgi/models
-# Change container name by container name argument
-if [ $# -eq 1 ]; then
- name=$1
-fi
-# Change model by model name argument
-if [ $# -eq 2 ]; then
- name=$1
- model=$2
-fi
-# Change port by port argument
-if [ $# -eq 3 ]; then
- name=$1
- model=$2
- port=$3
-fi
-# Change cuda by cuda argument
-if [ $# -eq 4 ]; then
- name=$1
- model=$2
- port=$3
- cuda=$4
-fi
-# Change volume by volume argument
-if [ $# -eq 5 ]; then
- name=$1
- model=$2
- port=$3
- cuda=$4
- volume=$5
-fi
-# Check if volume exists
-if [ ! -d $volume ]; then
- # Raise error if volume does not exist
- echo "Volume $volume does not exist"
- exit 1
-fi
-echo "Starting $name with model $model and volume $volume"
-docker run --env CUDA_VISIBLE_DEVICES=$cuda --name $name --gpus all --shm-size 1g -p $port:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model --max-input-length 14000 --max-total-tokens 16384 --max-batch-prefill-tokens 14000
\ No newline at end of file
diff --git a/src/copra/main/eval_benchmark.py b/src/copra/main/eval_benchmark.py
index 7c00f24..07a785d 100644
--- a/src/copra/main/eval_benchmark.py
+++ b/src/copra/main/eval_benchmark.py
@@ -17,7 +17,6 @@
import math
import typing
-from copra.gpts.llama_access import LlamaAccess, ServiceDownError
from copra.main.config import (
EnvSettings, EvalBenchmark, EvalDataset, EvalProofResults, EvalSettings,
Experiments, EvalRunCheckpointInfo, PromptSettings, parse_config
@@ -118,19 +117,6 @@ def _initialize_services(
logger.error(f"Failed to start vLLM server: {e}")
raise
- # Initialize Llama service if using non-OpenAI model (deprecated, prefer vLLM)
- elif eval_settings.gpt_model_name is not None and \
- len(eval_settings.gpt_model_name) != 0 and \
- not model_supports_openai_api(eval_settings.gpt_model_name):
- llama_logger = setup_logger(
- __name__ + "_llama",
- os.path.join(eval_checkpoint_info.logging_dirs[-1], "llama.log"),
- logging.INFO,
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
- )
- LlamaAccess.class_init(eval_settings.gpt_model_name, eval_settings.temperature,
- debug=False, logger=llama_logger)
-
# Initialize Isabelle service if needed
if eval_benchmark.language == ProofAction.Language.ISABELLE:
isabelle_logger = setup_logger(
@@ -174,12 +160,6 @@ def _shutdown_services(
_vllm_server_process = None
logging.getLogger(__name__).info("vLLM server stopped")
- # Shutdown Llama service if it was initialized
- elif eval_settings.gpt_model_name is not None and \
- len(eval_settings.gpt_model_name) != 0 and \
- not model_supports_openai_api(eval_settings.gpt_model_name):
- LlamaAccess.class_kill()
-
# Shutdown Isabelle service if it was initialized
if eval_benchmark.language == ProofAction.Language.ISABELLE:
IsabelleExecutor.stop_server()
@@ -511,18 +491,6 @@ def _process_lemma(
_handle_proof_timeout(lemma_name, path, elapsed_time, no_proof_res, checkpoint_manager, attempt_idx)
logger.info(f"Dumping proof search result:\nProof FAILED for lemma: {lemma_name}\n")
should_retry = False
- elif return_dict.get("service_down", False):
- # Retry for service down
- should_retry = True
- logger.info("Killing the llama process")
- LlamaAccess.class_kill()
- logger.info("Killed the llama process")
- logger.info("Restarting the llama process")
- # Get llama logger from logs
- llama_logger = logging.getLogger(__name__ + "_llama")
- LlamaAccess.class_init(eval_settings.gpt_model_name,
- eval_settings.temperature, debug=False, logger=llama_logger)
- logger.info("Restarted the llama process")
else:
# Success case
diff --git a/src/copra/main/proof_execution.py b/src/copra/main/proof_execution.py
index 306eb6a..c0456ad 100644
--- a/src/copra/main/proof_execution.py
+++ b/src/copra/main/proof_execution.py
@@ -13,7 +13,6 @@
from typing import Dict, Any
from copra.main.parallel_execution import get_executor
-from copra.gpts.llama_access import ServiceDownError
from copra.agent.dfs_tree_search_with_stack import DFSTreeSearch
from copra.agent.gpt_guided_tree_search_policy import GptGuidedTreeSearchPolicy
from copra.agent.simple_proof_agent import ProofAgent
@@ -178,10 +177,6 @@ def _run_prover_wrapper(
ret_dict["proof_res"] = proof_res
ret_dict["attempted_success"] = True
ret_dict["service_down"] = False
- except ServiceDownError:
- subprocess_logger.exception(f"ServiceDownError occurred while proving lemma: {lemma_name} in file {path}")
- ret_dict["attempted_success"] = False
- ret_dict["service_down"] = True
except Exception:
subprocess_logger.exception(f"Exception occurred while proving lemma: {lemma_name} in file {path}")
ret_dict["attempted_success"] = False