diff --git a/README.md b/README.md index 1c438c0c..09dcecc7 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ conda create -n ai_scientist python=3.11 conda activate ai_scientist # LLM APIs -pip install anthropic aider-chat backoff openai +pip install anthropic aider-chat backoff openai groq # Viz pip install matplotlib pypdf pymupdf4llm # Install pdflatex @@ -55,7 +55,7 @@ pip install torch numpy transformers datasets tiktoken wandb tqdm We use the following environment variables for the different API providers for different models: -`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `DEEPSEEK_API_KEY`, `OPENROUTER_API_KEY` +`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `DEEPSEEK_API_KEY`, `OPENROUTER_API_KEY`, `GROQ_API_KEY` Our code can also optionally use a Semantic Scholar API Key (`S2_API_KEY`) for higher throughput [if you have one](https://www.semanticscholar.org/product/api), though in principle it should work without it. @@ -115,6 +115,7 @@ conda activate ai_scientist # Run the paper generation. python launch_scientist.py --model "gpt-4o-2024-05-13" --experiment nanoGPT_lite --num-ideas 2 python launch_scientist.py --model "claude-3-5-sonnet-20240620" --experiment nanoGPT_lite --num-ideas 2 +python launch_scientist.py --model "llama3-70b-8192" --experiment nanoGPT_lite --num-ideas 2 ``` ## Getting an LLM Generated Paper Review diff --git a/ai_scientist/generate_ideas.py b/ai_scientist/generate_ideas.py index 260d1ecb..db7055a2 100644 --- a/ai_scientist/generate_ideas.py +++ b/ai_scientist/generate_ideas.py @@ -468,6 +468,7 @@ def check_idea_novelty( "gpt-4o-2024-05-13", "deepseek-coder-v2-0724", "llama3.1-405b", + "llama3-70b-8192", ], help="Model to use for AI Scientist.", ) @@ -496,6 +497,11 @@ def check_idea_novelty( print(f"Using OpenAI API with model {args.model}.") client_model = "gpt-4o-2024-05-13" client = openai.OpenAI() + elif args.model == "llama3-70b-8192": + from groq import Groq + print(f"Using Groq API with {args.model}.") + client_model = "llama3-70b-8192" + client = Groq(api_key=os.environ["GROQ_API_KEY"]) elif args.model == "deepseek-coder-v2-0724": import openai diff --git a/ai_scientist/llm.py b/ai_scientist/llm.py index 1045db9d..2b085180 100644 --- a/ai_scientist/llm.py +++ b/ai_scientist/llm.py @@ -1,7 +1,8 @@ import backoff import openai import json - +import os +from groq import Groq # Get N responses from a single message, used for ensembling. @backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APITimeoutError)) @@ -57,23 +58,29 @@ def get_batch_responses_from_llm( new_msg_history = [ new_msg_history + [{"role": "assistant", "content": c}] for c in content ] - elif model == "llama-3-1-405b-instruct": + elif model in ["llama-3-1-405b-instruct", "llama3-70b-8192"]: new_msg_history = msg_history + [{"role": "user", "content": msg}] - response = client.chat.completions.create( - model="meta-llama/llama-3.1-405b-instruct", - messages=[ - {"role": "system", "content": system_message}, - *new_msg_history, - ], - temperature=temperature, - max_tokens=3000, - n=n_responses, - stop=None, - ) - content = [r.message.content for r in response.choices] - new_msg_history = [ - new_msg_history + [{"role": "assistant", "content": c}] for c in content - ] + if model == "llama-3-1-405b-instruct": + model_name = "meta-llama/llama-3.1-405b-instruct" + else: + model_name = "llama3-70b-8192" + client = Groq(api_key=os.environ.get("GROQ_API_KEY")) + + content = [] + new_msg_history = [] + for _ in range(n_responses): + response = client.chat.completions.create( + model=model_name, + messages=[ + {"role": "system", "content": system_message}, + *new_msg_history, + ], + temperature=temperature, + max_tokens=3000, + stop=None, + ) + content.append(response.choices[0].message.content) + new_msg_history.append(new_msg_history + [{"role": "assistant", "content": content[-1]}]) elif model == "claude-3-5-sonnet-20240620": content, new_msg_history = [], [] for _ in range(n_responses): @@ -89,7 +96,6 @@ def get_batch_responses_from_llm( content.append(c) new_msg_history.append(hist) else: - # TODO: This is only supported for GPT-4 in our reviewer pipeline. raise ValueError(f"Model {model} not supported.") if print_debug: @@ -184,10 +190,16 @@ def get_response_from_llm( ) content = response.choices[0].message.content new_msg_history = new_msg_history + [{"role": "assistant", "content": content}] - elif model in ["meta-llama/llama-3.1-405b-instruct", "llama-3-1-405b-instruct"]: + elif model in ["meta-llama/llama-3.1-405b-instruct", "llama-3-1-405b-instruct", "llama3-70b-8192"]: new_msg_history = msg_history + [{"role": "user", "content": msg}] + if model in ["meta-llama/llama-3.1-405b-instruct", "llama-3-1-405b-instruct"]: + model_name = "meta-llama/llama-3.1-405b-instruct" + else: + model_name = "llama3-70b-8192" + client = Groq(api_key=os.environ.get("GROQ_API_KEY")) + response = client.chat.completions.create( - model="meta-llama/llama-3.1-405b-instruct", + model=model_name, messages=[ {"role": "system", "content": system_message}, *new_msg_history, diff --git a/ai_scientist/perform_writeup.py b/ai_scientist/perform_writeup.py index 1f548a03..38d35e9f 100644 --- a/ai_scientist/perform_writeup.py +++ b/ai_scientist/perform_writeup.py @@ -528,6 +528,7 @@ def perform_writeup( "gpt-4o-2024-05-13", "deepseek-coder-v2-0724", "llama3.1-405b", + "llama3-70b-8192", ], help="Model to use for AI Scientist.", ) @@ -538,6 +539,12 @@ def perform_writeup( print(f"Using Anthropic API with model {args.model}.") client_model = "claude-3-5-sonnet-20240620" client = anthropic.Anthropic() + elif args.model == "llama3-70b-8192": + from groq import Groq + print(f"Using Groq API with {args.model}.") + client_model = "llama3-70b-8192" + client = Groq(api_key=os.environ["GROQ_API_KEY"]) + elif args.model == "gpt-4o-2024-05-13" or args.model == "hybrid": import openai @@ -586,6 +593,8 @@ def perform_writeup( main_model = Model("deepseek/deepseek-coder") elif args.model == "llama3.1-405b": main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") + elif args.model == "llama3-70b-8192": + main_model = Model("groq/llama3-70b-8192") else: main_model = Model(model) coder = Coder.create( diff --git a/experimental/launch_oe_scientist.py b/experimental/launch_oe_scientist.py index 69e6e4f4..a6a2c200 100644 --- a/experimental/launch_oe_scientist.py +++ b/experimental/launch_oe_scientist.py @@ -42,6 +42,7 @@ def parse_arguments(): "gpt-4o-2024-05-13", "deepseek-coder-v2-0724", "llama3.1-405b", + "llama3-70b-8192", ], help="Model to use for AI Scientist.", ) @@ -189,6 +190,8 @@ def do_idea( main_model = Model("deepseek/deepseek-coder") elif model == "llama3.1-405b": main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") + elif model == "llama3-70b-8192": + main_model = Model("groq/llama3-70b-8192") else: main_model = Model(model) coder = Coder.create( @@ -225,6 +228,8 @@ def do_idea( main_model = Model("deepseek/deepseek-coder") elif model == "llama3.1-405b": main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") + elif model == "llama3-70b-8192": + main_model = Model("llama3-70b-8192") else: main_model = Model(model) coder = Coder.create( @@ -348,6 +353,11 @@ def do_idea( api_key=os.environ["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1", ) + elif args.model == "llama3-70b-8192": + from groq import Groq + print(f"Using Groq API with {args.model}.") + client_model = "llama3-70b-8192" + client = Groq(api_key=os.environ["GROQ_API_KEY"]) else: raise ValueError(f"Model {args.model} not supported.") diff --git a/launch_scientist.py b/launch_scientist.py index 779cdf15..5ddedb28 100644 --- a/launch_scientist.py +++ b/launch_scientist.py @@ -25,7 +25,8 @@ def print_time(): def parse_arguments(): - parser = argparse.ArgumentParser(description="Run AI scientist experiments") + parser = argparse.ArgumentParser( + description="Run AI scientist experiments") parser.add_argument( "--skip-idea-generation", action="store_true", @@ -47,7 +48,10 @@ def parse_arguments(): "--model", type=str, default="claude-3-5-sonnet-20240620", - choices=["claude-3-5-sonnet-20240620", "gpt-4o-2024-05-13", "deepseek-coder-v2-0724", "llama3.1-405b"], + choices=[ + "claude-3-5-sonnet-20240620", "gpt-4o-2024-05-13", + "deepseek-coder-v2-0724", "llama3.1-405b", "llama3-70b-8192" + ], help="Model to use for AI Scientist.", ) parser.add_argument( @@ -72,7 +76,8 @@ def parse_arguments(): "--gpus", type=str, default=None, - help="Comma-separated list of GPU IDs to use (e.g., '0,1,2'). If not specified, all available GPUs will be used.", + help= + "Comma-separated list of GPU IDs to use (e.g., '0,1,2'). If not specified, all available GPUs will be used.", ) parser.add_argument( "--num-ideas", @@ -89,23 +94,36 @@ def get_available_gpus(gpu_ids=None): return list(range(torch.cuda.device_count())) -def worker(queue, base_dir, results_dir, model, client, client_model, writeup, improvement, gpu_id): +def worker(queue, base_dir, results_dir, model, client, client_model, writeup, + improvement, gpu_id): os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) print(f"Worker {gpu_id} started.") while True: idea = queue.get() if idea is None: break - success = do_idea( - base_dir, results_dir, idea, model, client, client_model, writeup, improvement, log_file=True - ) + success = do_idea(base_dir, + results_dir, + idea, + model, + client, + client_model, + writeup, + improvement, + log_file=True) print(f"Completed idea: {idea['Name']}, Success: {success}") print(f"Worker {gpu_id} finished.") -def do_idea( - base_dir, results_dir, idea, model, client, client_model, writeup, improvement, log_file=False -): +def do_idea(base_dir, + results_dir, + idea, + model, + client, + client_model, + writeup, + improvement, + log_file=False): ## CREATE PROJECT FOLDER timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") idea_name = f"{timestamp}_{idea['Name']}" @@ -137,23 +155,30 @@ def do_idea( print(f"*Starting idea: {idea_name}*") ## PERFORM EXPERIMENTS fnames = [exp_file, vis_file, notes] - io = InputOutput(yes=True, chat_history_file=f"{folder_name}/{idea_name}_aider.txt") + io = InputOutput( + yes=True, chat_history_file=f"{folder_name}/{idea_name}_aider.txt") if model == "hybrid": main_model = Model("claude-3-5-sonnet-20240620") elif model == "deepseek-coder-v2-0724": main_model = Model("deepseek/deepseek-coder") elif model == "llama3.1-405b": main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") + elif model == "llama3-70b-8192": + main_model = Model("llama3-70b-8192") else: main_model = Model(model) - coder = Coder.create( - main_model=main_model, fnames=fnames, io=io, stream=False, use_git=False, edit_format="diff" - ) + coder = Coder.create(main_model=main_model, + fnames=fnames, + io=io, + stream=False, + use_git=False, + edit_format="diff") print_time() print(f"*Starting Experiments*") try: - success = perform_experiments(idea, folder_name, coder, baseline_results) + success = perform_experiments(idea, folder_name, coder, + baseline_results) except Exception as e: print(f"Error during experiments: {e}") print(f"Experiments failed for idea {idea_name}") @@ -174,12 +199,18 @@ def do_idea( elif model == "deepseek-coder-v2-0724": main_model = Model("deepseek/deepseek-coder") elif model == "llama3.1-405b": - main_model = Model("openrouter/meta-llama/llama-3.1-405b-instruct") + main_model = Model( + "openrouter/meta-llama/llama-3.1-405b-instruct") + elif model == "llama3-70b-8192": + main_model = Model("groq/llama3-70b-8192") else: main_model = Model(model) - coder = Coder.create( - main_model=main_model, fnames=fnames, io=io, stream=False, use_git=False, edit_format="diff" - ) + coder = Coder.create(main_model=main_model, + fnames=fnames, + io=io, + stream=False, + use_git=False, + edit_format="diff") try: perform_writeup(idea, folder_name, coder, client, client_model) except Exception as e: @@ -217,8 +248,10 @@ def do_idea( print(f"*Starting Improvement*") try: perform_improvement(review, coder) - generate_latex(coder, folder_name, f"{folder_name}/{idea['Name']}_improved.pdf") - paper_text = load_paper(f"{folder_name}/{idea['Name']}_improved.pdf") + generate_latex(coder, folder_name, + f"{folder_name}/{idea['Name']}_improved.pdf") + paper_text = load_paper( + f"{folder_name}/{idea['Name']}_improved.pdf") review = perform_review( paper_text, model="gpt-4o-2024-05-13", @@ -229,7 +262,8 @@ def do_idea( temperature=0.1, ) # Store the review in separate review.txt file - with open(osp.join(folder_name, "review_improved.txt"), "w") as f: + with open(osp.join(folder_name, "review_improved.txt"), + "w") as f: f.write(json.dumps(review)) except Exception as e: print(f"Failed to perform improvement: {e}") @@ -277,19 +311,20 @@ def do_idea( print(f"Using OpenAI API with {args.model}.") client_model = "deepseek-coder-v2-0724" - client = openai.OpenAI( - api_key=os.environ["DEEPSEEK_API_KEY"], - base_url="https://api.deepseek.com" - ) + client = openai.OpenAI(api_key=os.environ["DEEPSEEK_API_KEY"], + base_url="https://api.deepseek.com") elif args.model == "llama3.1-405b": import openai print(f"Using OpenAI API with {args.model}.") client_model = "meta-llama/llama-3.1-405b-instruct" - client = openai.OpenAI( - api_key=os.environ["OPENROUTER_API_KEY"], - base_url="https://openrouter.ai/api/v1" - ) + client = openai.OpenAI(api_key=os.environ["OPENROUTER_API_KEY"], + base_url="https://openrouter.ai/api/v1") + elif args.model == "llama3-70b-8192": + from groq import Groq + print(f"Using Groq API with {args.model}.") + client_model = "llama3-70b-8192" + client = Groq(api_key=os.environ["GROQ_API_KEY"]) else: raise ValueError(f"Model {args.model} not supported.") @@ -325,20 +360,18 @@ def do_idea( processes = [] for i in range(args.parallel): gpu_id = available_gpus[i % len(available_gpus)] - p = multiprocessing.Process( - target=worker, - args=( - queue, - base_dir, - results_dir, - args.model, - client, - client_model, - args.writeup, - args.improvement, - gpu_id, - ) - ) + p = multiprocessing.Process(target=worker, + args=( + queue, + base_dir, + results_dir, + args.model, + client, + client_model, + args.writeup, + args.improvement, + gpu_id, + )) p.start() time.sleep(150) processes.append(p)