diff --git a/pyproject.toml b/pyproject.toml index 0733f20..a0a9cec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ packages = ["src/copra"] [project] name = "copra-theorem-prover" -version = "1.5.0" +version = "1.6.0" authors = [ { name="Amitayush Thakur", email="amitayush@utexas.edu" }, ] diff --git a/src/copra/agent/dfs_policy_prompter.py b/src/copra/agent/dfs_policy_prompter.py index d97cc5c..4e96301 100644 --- a/src/copra/agent/dfs_policy_prompter.py +++ b/src/copra/agent/dfs_policy_prompter.py @@ -10,7 +10,6 @@ from copra.agent.rate_limiter import InvalidActionException from copra.agent.simple_policy_prompter import SimplePolicyPrompter from copra.agent.gpt_guided_tree_search_policy import PromptSummary, ProofQInfo, TreeSearchAction, TreeSearchActionType -from copra.gpts.llama_access import ServiceDownError from copra.retrieval.coq_bm25_reranker import CoqBM25TrainingDataRetriever from copra.prompt_generator.gpt_request_grammar import CoqGPTRequestGrammar, CoqGptRequest, CoqGptRequestActions from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar @@ -327,10 +326,6 @@ def run_prompt(self, request: CoqGptResponse) -> list: # don't change temperature for now self._num_api_calls += 1 - except ServiceDownError as e: - self.logger.info("Got a service down error. Will giveup until the docker container is restarted.") - self.logger.exception(e) - raise except Exception as e: self.logger.info("Got an unknown exception. Retrying.") self.logger.exception(e) diff --git a/src/copra/agent/simple_policy_prompter.py b/src/copra/agent/simple_policy_prompter.py index a7490c6..6fdc386 100644 --- a/src/copra/agent/simple_policy_prompter.py +++ b/src/copra/agent/simple_policy_prompter.py @@ -12,7 +12,6 @@ import logging from copra.agent.rate_limiter import RateLimiter from copra.gpts.gpt_access import GptAccess -from copra.gpts.llama_access import LlamaAccess, ServiceDownError from copra.prompt_generator.prompter import PolicyPrompter from copra.tools.misc import model_supports_openai_api, is_vllm_model @@ -74,10 +73,7 @@ def __init__( # Initialize LLM access (GptAccess or LlamaAccess) # Note: vLLM models (with "vllm:" prefix) are handled by GptAccess - if not model_supports_openai_api(gpt_model_name): - self._gpt_access = LlamaAccess(gpt_model_name) - else: - self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) + self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) # Get model configuration # For vLLM models, use the generic "vllm" key in model_info @@ -104,14 +100,11 @@ def __init__( def __enter__(self): """Context manager entry - initialize LLM service if needed.""" - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__enter__() return self def __exit__(self, exc_type, exc_value, traceback): """Context manager exit - cleanup LLM service if needed.""" - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__exit__(exc_type, exc_value, traceback) + pass def add_to_history(self, message: typing.Dict[str, str]): """ diff --git a/src/copra/baselines/gpt4/few_shot_policy_prompter.py b/src/copra/baselines/gpt4/few_shot_policy_prompter.py index cc57324..4023b01 100644 --- a/src/copra/baselines/gpt4/few_shot_policy_prompter.py +++ b/src/copra/baselines/gpt4/few_shot_policy_prompter.py @@ -8,12 +8,10 @@ from copra.agent.rate_limiter import RateLimiter, InvalidActionException from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction from copra.gpts.gpt_access import GptAccess -from copra.gpts.llama_access import LlamaAccess from itp_interface.rl.proof_action import ProofAction from copra.prompt_generator.prompter import PolicyPrompter from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar from copra.baselines.gpt4.few_shot_grammar import FewShotGptRequest, FewShotGptRequestGrammar, FewShotGptResponse, FewShotGptResponseGrammar -from copra.tools.misc import model_supports_openai_api class FewShotGptPolicyPrompter(PolicyPrompter): _cache: typing.Dict[str, typing.Any] = {} @@ -43,10 +41,7 @@ def __init__(self, conv_messages = self.agent_grammar.get_openai_conv_messages(example_conv_prompt_path, "system") main_message = self.agent_grammar.get_openai_main_message(main_sys_prompt_path, "system") self.system_messages = [main_message] + conv_messages - if not model_supports_openai_api(gpt_model_name): - self._gpt_access = LlamaAccess(gpt_model_name) - else: - self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) + self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) # For vLLM models, use the generic "vllm" key for model info model_info_key = "vllm" if gpt_model_name.startswith("vllm:") else gpt_model_name @@ -83,12 +78,10 @@ def __init__(self, pass def __enter__(self): - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__enter__() + pass def __exit__(self, exc_type, exc_value, traceback): - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__exit__(exc_type, exc_value, traceback) + pass def _init_retriever(self): if FewShotGptPolicyPrompter._cache.get(self._training_data_path, None) is not None: diff --git a/src/copra/baselines/gpt4/hammer_policy_prompter.py b/src/copra/baselines/gpt4/hammer_policy_prompter.py index 679be23..1ae9ee1 100644 --- a/src/copra/baselines/gpt4/hammer_policy_prompter.py +++ b/src/copra/baselines/gpt4/hammer_policy_prompter.py @@ -6,10 +6,7 @@ from copra.retrieval.coq_bm25_reranker import CoqBM25TrainingDataRetriever from copra.prompt_generator.agent_grammar import CoqGPTResponseGrammar from copra.prompt_generator.gpt_request_grammar import CoqGPTRequestGrammar, CoqGptRequestActions -from copra.agent.rate_limiter import RateLimiter from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction, TreeSearchActionType -from copra.gpts.gpt_access import GptAccess -from copra.gpts.llama_access import LlamaAccess from itp_interface.rl.proof_action import ProofAction from copra.prompt_generator.prompter import PolicyPrompter from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar diff --git a/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py b/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py index 4f5b90f..1059783 100644 --- a/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py +++ b/src/copra/baselines/gpt4/informal_few_shot_policy_prompter.py @@ -8,7 +8,6 @@ from copra.agent.rate_limiter import RateLimiter, InvalidActionException from copra.agent.gpt_guided_tree_search_policy import TreeSearchAction from copra.gpts.gpt_access import GptAccess -from copra.gpts.llama_access import LlamaAccess from itp_interface.rl.proof_action import ProofAction from copra.prompt_generator.prompter import PolicyPrompter from copra.prompt_generator.dfs_agent_grammar import DfsAgentGrammar @@ -44,10 +43,7 @@ def __init__(self, conv_messages = self.agent_grammar.get_openai_conv_messages(example_conv_prompt_path, "system") main_message = self.agent_grammar.get_openai_main_message(main_sys_prompt_path, "system") self.system_messages = [main_message] + conv_messages - if not model_supports_openai_api(gpt_model_name): - self._gpt_access = LlamaAccess(gpt_model_name) - else: - self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) + self._gpt_access = GptAccess(secret_filepath=secret_filepath, model_name=gpt_model_name) self._token_limit_per_min = GptAccess.gpt_model_info[gpt_model_name]["token_limit_per_min"] self._request_limit_per_min = GptAccess.gpt_model_info[gpt_model_name]["request_limit_per_min"] self._max_token_per_prompt = GptAccess.gpt_model_info[gpt_model_name]["max_token_per_prompt"] @@ -81,12 +77,10 @@ def __init__(self, pass def __enter__(self): - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__enter__() + pass def __exit__(self, exc_type, exc_value, traceback): - if isinstance(self._gpt_access, LlamaAccess): - self._gpt_access.__exit__(exc_type, exc_value, traceback) + pass def _init_retriever(self): if InformalFewShotGptPolicyPrompter._cache.get(self._training_data_path, None) is not None: diff --git a/src/copra/gpts/llama2_chat_format.py b/src/copra/gpts/llama2_chat_format.py deleted file mode 100644 index 0ed3d1c..0000000 --- a/src/copra/gpts/llama2_chat_format.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python3 - -class Llama2FormatChat(object): - """ - prompt = f"<>\\n{system}\\n<>\\n\\n{user_1}" - prompt += f"[INST] {prompt.strip()} [/INST] {answer_1.strip()} " - prompt += f"[INST] {user_2.strip()} [/INST] {answer_2.strip()} " - prompt += f"[INST] {user_3.strip()} [/INST]" - """ - start_token = "" - end_token = "" - sys_start = "<>" - sys_end = "<>" - inst_start = "[INST]" - inst_end = "[/INST]" - def __init__(self): - pass - - def __call__(self, messages) -> str: - """ - messages are of the form - messages = [ - { - "role": "system", - "content": "....", - }, - { - "role": "system", - "name": "example_user", - "content": "....", - }, - { - "role": "system", - "name": "example_assistant", - "content": "....", - }, - { - "role": "system", - "name": "example_user", - "content": "....", - }, - { - "role": "system", - "name": "example_assistant", - "content": "....", - }, - { - "role": "user", - "content": "", - } - ] - """ - # Collect all the system messages - system_messages = [] - user_messages = [] - assistant_messages = [] - role_names = set() - for message in messages: - if message["role"] == "system": - system_messages.append(message) - if "name" in message: - role_names.add(message["name"]) - elif message["role"] == "user": - user_messages.append(message) - if "name" in message: - role_names.add(message["name"]) - elif message["role"] == "assistant": - assistant_messages.append(message) - if "name" in message: - role_names.add(message["name"]) - else: - raise ValueError(f"Unknown role: {message['role']}") - sys_prompt = self._format_system_messages(system_messages) - user_prompt = self._format_user_assistant_messages(user_messages, assistant_messages) - prompt = \ -f"""{sys_prompt} - -{user_prompt}""" - return prompt, role_names - - def _format_system_messages(self, system_message): - """ - system_message is of the form - """ - main_system_messages = [sys_msg["content"] for sys_msg in system_message if "name" not in sys_msg] - main_system_message = "\n".join(main_system_messages) - example_messages = [(sys_msg["name"], sys_msg["content"]) for sys_msg in system_message if "name" in sys_msg] - example_messages = [f"`{name}`:\n{msg}" for name, msg in example_messages] - if len(example_messages) > 0: - example_message = "\n".join(example_messages) - else: - example_message = "" - if len(example_messages) > 0: - system_message = \ -f"""{main_system_message} - -An example of user and assistant interaction is as follows: -{example_message}""" - else: - system_message = \ -f"""{main_system_message}""" - system_prompt = \ -f"""{Llama2FormatChat.start_token}{Llama2FormatChat.inst_start} {Llama2FormatChat.sys_start} -{system_message} -{Llama2FormatChat.sys_end}""" - return system_prompt - - def _format_user_assistant_messages(self, user_messages, assistant_messages): - """ - user_messages is of the form - """ - user_messages = [user_msg["content"] for user_msg in user_messages] - user_messages = [f"{user_msg} {Llama2FormatChat.inst_end} " for user_msg in user_messages] - assistant_messages = [assistant_msg["content"] for assistant_msg in assistant_messages] - assistant_messages = [f"{assistant_msg} {Llama2FormatChat.end_token}{Llama2FormatChat.start_token}{Llama2FormatChat.inst_start} " for assistant_msg in assistant_messages] - # Combine the messages one after the other - messages = [] - idx = 0 - while idx < len(user_messages) or idx < len(assistant_messages): - if idx < len(user_messages): - messages.append(user_messages[idx]) - if idx < len(assistant_messages): - messages.append(assistant_messages[idx]) - idx += 1 - message = "".join(messages) - return message - -if __name__ == "__main__": - messages = [ - { - "role": "system", - "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.", - }, - { - "role": "system", - "name": "example_user", - "content": "New synergies will help drive top-line growth.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Things working well together will increase revenue.", - }, - { - "role": "system", - "name": "example_user", - "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Let's talk later when we're less busy about how to do better.", - }, - { - "role": "system", - "name": "example_user", - "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Our idea seems to be scooped, don't know how to change direction now." - }, - { - "role": "user", - "content": "We changed the direction of the project, but we don't have time to do it.", - }, - { - "role": "assistant", - "content": "Too many changes do not have time to do it.", - }, - { - "role": "user", - "content": "The pot is boiling, probably the water will spill.", - } - ] - llama2_format_chat = Llama2FormatChat() - prompt, role_names = llama2_format_chat(messages) - print(prompt) - print('='*50) - print(role_names) - print('-'*100) - messages = [ - { - "role": "system", - "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.", - }, - { - "role": "system", - "name": "example_user", - "content": "New synergies will help drive top-line growth.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Things working well together will increase revenue.", - }, - { - "role": "system", - "name": "example_user", - "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Let's talk later when we're less busy about how to do better.", - }, - { - "role": "system", - "name": "example_user", - "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Our idea seems to be scooped, don't know how to change direction now." - }, - { - "role": "user", - "content": "We changed the direction of the project, but we don't have time to do it.", - } - ] - llama2_format_chat = Llama2FormatChat() - prompt, role_names = llama2_format_chat(messages) - print(prompt) - print('='*50) - print(role_names) \ No newline at end of file diff --git a/src/copra/gpts/llama_access.py b/src/copra/gpts/llama_access.py deleted file mode 100644 index 24311d3..0000000 --- a/src/copra/gpts/llama_access.py +++ /dev/null @@ -1,329 +0,0 @@ -#!/usr/bin/env python3 - -import time -import typing -import os -import random -import logging -import threading -# from litellm import token_counter -from subprocess import Popen, PIPE, STDOUT -from copra.gpts.gpt_access import GptAccess -from copra.gpts.llama2_chat_format import Llama2FormatChat -from huggingface_hub import InferenceClient - -class ServiceDownError(Exception): - pass - -class LlamaAccess(GptAccess): - # Use this https://huggingface.co/blog/codellama#conversational-instructions for formatting instructions - """ - This is not thread safe""" - process = None - model_name = None - debug = False - random_suffix = random.randint(0, 10**16) - models_supported_name = ['codellama/CodeLlama-7b-Instruct-hf', 'EleutherAI/llemma_7b', 'morph-labs/morph-prover-v0-7b'] - logger : logging.Logger = None - port = 8080 - docker_exit_signal = False - litellm_exit_signal = False - docker_logging_thread = None - litellm_logging_thread = None - def __init__(self, model_name: str | None = None, temperature = 0.0) -> None: - assert model_name == LlamaAccess.model_name or model_name is None, "Model name must be initialized before use" - assert LlamaAccess.process is not None, "LlamaAccess class must be initialized before use" - self.secret_filepath = None - self.model_name = model_name if model_name is not None else LlamaAccess.models_supported_name[0] - self.temperature = temperature - self.usage = { - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0 - } - self.is_open_ai_model = False - self._llama2_format_chat = Llama2FormatChat() - - def __enter__(self): - self.model_name = f"huggingface/{self.model_name}" - self.interface = InferenceClient(model=f"http://localhost:{LlamaAccess.port}") - return self - - def __exit__(self, exc_type, exc_value, traceback): - # self.kill() # only kill the service if this is the last object - pass - - def _get_docker_container_name(model_name: str) -> str: - return model_name.replace("/","-") + f"-{LlamaAccess.random_suffix}" - - def _get_docker_container_id(model_name: str) -> str: - return os.popen(f"docker ps -q --filter='NAME={LlamaAccess._get_docker_container_name(model_name)}'").read().strip() - - def _check_if_docker_running() -> bool: - try: - return LlamaAccess._get_docker_container_id(LlamaAccess.model_name) != "" - except: - return False - - def class_init(model_name: str = None, temperature = 0.0, port = 8080, debug = False, logger: logging.Logger = None): - if model_name is None: - model_name = LlamaAccess.models_supported_name[0] - elif model_name is not None: - assert model_name in LlamaAccess.models_supported_name, f"Model name {model_name} not supported" - # Check if docker is running - if LlamaAccess.model_name is None: - LlamaAccess.model_name = model_name - LlamaAccess.debug = debug - LlamaAccess.logger = logger if logger is not None else logging.getLogger(__name__) - LlamaAccess.docker_exit_signal = False - LlamaAccess.litellm_exit_signal = False - LlamaAccess.docker_logging_thread = None - LlamaAccess.port = port - if not LlamaAccess._check_if_docker_running(): - LlamaAccess._start_service(model_name, temperature, port, debug) - pass - - def class_kill(): - LlamaAccess.kill() - - def _docker_service_logs(): - try: - while not LlamaAccess.docker_exit_signal: - line = LlamaAccess.process.stdout.readline().strip() - if line: - LlamaAccess.logger.info(f'Docker:\n {line}') - else: - # sleep for a bit to avoid busy waiting - time.sleep(0.02) - except: - pass - - def _start_service(model_name: str, temperature = 0.0, port = 8080, debug = False) -> None: - # Change the openai.api_key to the llama api key - # Start the docker container for llama TGI - docker_container_name = LlamaAccess._get_docker_container_name(model_name) - cuda_visible_devices = os.popen('echo $CUDA_VISIBLE_DEVICES').read().strip() - if cuda_visible_devices == '': - cuda_visible_devices = '0' - cmd = f'sh src/gpts/start_llama.sh {docker_container_name} {model_name} {port}' - LlamaAccess.process = Popen( - cmd, - shell = True, - stdin = PIPE, - stdout = PIPE, - stderr = STDOUT, - cwd = root_dir, - bufsize = 1, - universal_newlines = True) - exit_wait = False - start_time = time.time() - retry = 3 - while not exit_wait and retry > 0: - line = LlamaAccess.process.stdout.readline().strip() - if line: - LlamaAccess.logger.info(f'Docker:\n {line}') - if "Error" in line or "error" in line: - LlamaAccess.process.kill() - raise Exception(f'Failed to start docker container {docker_container_name}, because of error: \n{line}') - if line.endswith('Connected'): - time.sleep(1) - exit_wait = True - else: - # sleep for a bit to avoid busy waiting - time.sleep(0.02) - end_time = time.time() - if end_time - start_time > 400: - LlamaAccess.process.kill() - LlamaAccess.kill() - LlamaAccess.process = Popen( - cmd, - shell = True, - stdin = PIPE, - stdout = PIPE, - stderr = STDOUT, - cwd = root_dir, - bufsize = 1, - universal_newlines = True) - exit_wait = False - start_time = time.time() - retry -= 1 - - # Start the docker logging thread - LlamaAccess.docker_exit_signal = False - LlamaAccess.docker_logging_thread = threading.Thread(target=LlamaAccess._docker_service_logs) - LlamaAccess.docker_logging_thread.start() - - def token_counter(self, model_name: str, text: typing.List[str]) -> int: - tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") - text = " ".join([message["content"] for message in messages]) - enc = tokenizer.encode(text) - num_tokens = len(enc.ids) - return num_tokens - - def num_tokens_from_messages(self, messages, model=None): - model = model if model is not None else self.model_name - num_tokens = self.token_counter(model, messages=messages) - return num_tokens - - def complete_chat(self, - messages: typing.List[str], - model: typing.Optional[str] = None, - n: int = 1, - max_tokens: int = 5, - temperature: float = 0.25, - top_p: float = 1.0, - frequency_penalty: float = 0.0, - presence_penalty: float = 0.0, - stop: list = ["\n"]) -> typing.Tuple[list, dict]: - temperature = None if temperature == 0.0 else temperature - top_p = None if top_p == 1.0 else top_p - try: - outputs = [] - prompt_tokens = self.num_tokens_from_messages(messages) - completion_tokens = 0 - prompt, role_names = self._llama2_format_chat(messages) - # LlamaAccess.logger.debug(f"Prompt Received:\n{prompt}") - for i in range(n): - output = self.interface.text_generation( - prompt=prompt, - details=True, - max_new_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - stop_sequences=stop, - do_sample=i>0, - ) - generated_text = output.generated_text - finish_reason = output.details.finish_reason - # LlamaAccess.logger.debug(f"Generated Text:\n{generated_text}") - if finish_reason.value == "stop_sequence": - finish_reason = "stop" - else: - finish_reason = finish_reason.value - if finish_reason == "stop": - # Remove the stop token - for stop_token in stop: - if generated_text.endswith(stop_token): - generated_text = generated_text[:generated_text.rfind(stop_token)] - break - generated_text = generated_text.strip() - for role_name in role_names: - if generated_text.startswith(role_name): - generated_text = generated_text[len(role_name):].strip() - break - elif generated_text.startswith(f"`{role_name}`"): - generated_text = generated_text[len(f"`{role_name}`:"):].strip() - break - outputs.append({'role': 'assistant', 'content': generated_text, 'finish_reason': finish_reason}) - completion_tokens += output.details.generated_tokens - total_tokens = prompt_tokens + completion_tokens - usage = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": total_tokens, - "reason": outputs[-1]["finish_reason"] if len(outputs) > 0 else "stop" - } - self.usage['prompt_tokens'] += prompt_tokens - self.usage['completion_tokens'] += completion_tokens - self.usage['total_tokens'] += total_tokens - return outputs, usage - except: - if not LlamaAccess._check_if_docker_running(): - raise ServiceDownError("Docker is shut down, restart the service") - else: - raise - - def kill(): - LlamaAccess.logger.info("Killing the docker processes") - docker_container_name = LlamaAccess._get_docker_container_name(LlamaAccess.model_name) - if LlamaAccess._check_if_docker_running(): - docker_name = os.popen(f"docker stop {docker_container_name}").read().strip() - assert docker_name == docker_container_name, f"Failed to stop container {docker_container_name}" - time.sleep(2) - LlamaAccess.logger.info(f"Docker Container {docker_container_name} stopped") - else: - LlamaAccess.logger.info(f"Docker Container {docker_container_name} already stopped") - # Remove the docker container - docker_name = os.popen(f"docker rm {docker_container_name}").read().strip() - time.sleep(2) - if docker_name == '': - LlamaAccess.logger.info(f"Docker Container {docker_container_name} already removed") - else: - assert docker_name == docker_container_name, f"Failed to remove container {LlamaAccess._get_docker_container_name(LlamaAccess.model_name)}" - LlamaAccess.logger.info(f"Docker Container {docker_container_name} removed") - try: - LlamaAccess.process.kill() - except: - pass - time.sleep(2) - # Stop logging threads - LlamaAccess.docker_exit_signal = True - LlamaAccess.litellm_exit_signal = True - LlamaAccess.docker_logging_thread.join() - LlamaAccess.logger.info("Docker processes killed and logging threads stopped") - try: - LlamaAccess.process.stdin.close() - except: - pass - time.sleep(2) - try: - LlamaAccess.process.stdout.close() - except: - pass - time.sleep(2) - LlamaAccess.logger.info("Docker stdin and stdout closed") - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - logger = logging.getLogger(__name__) - logger.log(logging.INFO, "Testing LlamaAccess") - LlamaAccess.class_init(port=10005, logger=logger) - try: - with LlamaAccess() as llama: - messages = [ - { - "role": "system", - "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.", - }, - { - "role": "system", - "name": "example_user", - "content": "New synergies will help drive top-line growth.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Things working well together will increase revenue.", - }, - { - "role": "system", - "name": "example_user", - "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Let's talk later when we're less busy about how to do better.", - }, - { - "role": "system", - "name": "example_user", - "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.", - }, - { - "role": "system", - "name": "example_assistant", - "content": "Our idea seems to be scooped, don't know how to change direction now." - }, - { - "role": "user", - "content": "We changed the direction of the project, but we don't have time to do it.", - } - ] - messages = [messages[0]] + [messages[1+(i%len(messages[1:-1]))] for i in range(300)] + [messages[-1]] - print(llama.num_tokens_from_messages(messages)) - print("Will call complete_chat soon") - #time.sleep(30) - print(llama.complete_chat(messages, max_tokens=50, n=2, temperature=0.0, stop=['.'])) - finally: - LlamaAccess.class_kill() \ No newline at end of file diff --git a/src/copra/gpts/start_llama.sh b/src/copra/gpts/start_llama.sh deleted file mode 100644 index 3ae2412..0000000 --- a/src/copra/gpts/start_llama.sh +++ /dev/null @@ -1,43 +0,0 @@ -name=codellama_tgi -model=codellama/CodeLlama-7b-Instruct-hf #tiiuae/falcon-7b-instruct -port=8080 -cuda="0" -volume=$PWD/.log/tgi/models -# Change container name by container name argument -if [ $# -eq 1 ]; then - name=$1 -fi -# Change model by model name argument -if [ $# -eq 2 ]; then - name=$1 - model=$2 -fi -# Change port by port argument -if [ $# -eq 3 ]; then - name=$1 - model=$2 - port=$3 -fi -# Change cuda by cuda argument -if [ $# -eq 4 ]; then - name=$1 - model=$2 - port=$3 - cuda=$4 -fi -# Change volume by volume argument -if [ $# -eq 5 ]; then - name=$1 - model=$2 - port=$3 - cuda=$4 - volume=$5 -fi -# Check if volume exists -if [ ! -d $volume ]; then - # Raise error if volume does not exist - echo "Volume $volume does not exist" - exit 1 -fi -echo "Starting $name with model $model and volume $volume" -docker run --env CUDA_VISIBLE_DEVICES=$cuda --name $name --gpus all --shm-size 1g -p $port:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model --max-input-length 14000 --max-total-tokens 16384 --max-batch-prefill-tokens 14000 \ No newline at end of file diff --git a/src/copra/main/eval_benchmark.py b/src/copra/main/eval_benchmark.py index 7c00f24..07a785d 100644 --- a/src/copra/main/eval_benchmark.py +++ b/src/copra/main/eval_benchmark.py @@ -17,7 +17,6 @@ import math import typing -from copra.gpts.llama_access import LlamaAccess, ServiceDownError from copra.main.config import ( EnvSettings, EvalBenchmark, EvalDataset, EvalProofResults, EvalSettings, Experiments, EvalRunCheckpointInfo, PromptSettings, parse_config @@ -118,19 +117,6 @@ def _initialize_services( logger.error(f"Failed to start vLLM server: {e}") raise - # Initialize Llama service if using non-OpenAI model (deprecated, prefer vLLM) - elif eval_settings.gpt_model_name is not None and \ - len(eval_settings.gpt_model_name) != 0 and \ - not model_supports_openai_api(eval_settings.gpt_model_name): - llama_logger = setup_logger( - __name__ + "_llama", - os.path.join(eval_checkpoint_info.logging_dirs[-1], "llama.log"), - logging.INFO, - '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) - LlamaAccess.class_init(eval_settings.gpt_model_name, eval_settings.temperature, - debug=False, logger=llama_logger) - # Initialize Isabelle service if needed if eval_benchmark.language == ProofAction.Language.ISABELLE: isabelle_logger = setup_logger( @@ -174,12 +160,6 @@ def _shutdown_services( _vllm_server_process = None logging.getLogger(__name__).info("vLLM server stopped") - # Shutdown Llama service if it was initialized - elif eval_settings.gpt_model_name is not None and \ - len(eval_settings.gpt_model_name) != 0 and \ - not model_supports_openai_api(eval_settings.gpt_model_name): - LlamaAccess.class_kill() - # Shutdown Isabelle service if it was initialized if eval_benchmark.language == ProofAction.Language.ISABELLE: IsabelleExecutor.stop_server() @@ -511,18 +491,6 @@ def _process_lemma( _handle_proof_timeout(lemma_name, path, elapsed_time, no_proof_res, checkpoint_manager, attempt_idx) logger.info(f"Dumping proof search result:\nProof FAILED for lemma: {lemma_name}\n") should_retry = False - elif return_dict.get("service_down", False): - # Retry for service down - should_retry = True - logger.info("Killing the llama process") - LlamaAccess.class_kill() - logger.info("Killed the llama process") - logger.info("Restarting the llama process") - # Get llama logger from logs - llama_logger = logging.getLogger(__name__ + "_llama") - LlamaAccess.class_init(eval_settings.gpt_model_name, - eval_settings.temperature, debug=False, logger=llama_logger) - logger.info("Restarted the llama process") else: # Success case diff --git a/src/copra/main/proof_execution.py b/src/copra/main/proof_execution.py index 306eb6a..c0456ad 100644 --- a/src/copra/main/proof_execution.py +++ b/src/copra/main/proof_execution.py @@ -13,7 +13,6 @@ from typing import Dict, Any from copra.main.parallel_execution import get_executor -from copra.gpts.llama_access import ServiceDownError from copra.agent.dfs_tree_search_with_stack import DFSTreeSearch from copra.agent.gpt_guided_tree_search_policy import GptGuidedTreeSearchPolicy from copra.agent.simple_proof_agent import ProofAgent @@ -178,10 +177,6 @@ def _run_prover_wrapper( ret_dict["proof_res"] = proof_res ret_dict["attempted_success"] = True ret_dict["service_down"] = False - except ServiceDownError: - subprocess_logger.exception(f"ServiceDownError occurred while proving lemma: {lemma_name} in file {path}") - ret_dict["attempted_success"] = False - ret_dict["service_down"] = True except Exception: subprocess_logger.exception(f"Exception occurred while proving lemma: {lemma_name} in file {path}") ret_dict["attempted_success"] = False