diff --git a/agbenchmark/__init__.py b/agbenchmark/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agbenchmark/benchmarks.py b/agbenchmark/benchmarks.py new file mode 100644 index 000000000..a4e6f4c93 --- /dev/null +++ b/agbenchmark/benchmarks.py @@ -0,0 +1,34 @@ +import os +import glob +import subprocess +import sys +from typing import Tuple + + +def run_specific_agent(task: str) -> Tuple[str, int]: + # Construct the command + command = ["python", "main_no_modal.py", task] + subprocess.run(command, text=True) + + +def execute_generated_files(): + # Navigate to generated directory + os.chdir("generated") + + # Iterate over every .txt file in the directory + for file_name in glob.glob("../*.txt"): + with open(file_name, "r") as file: + python_code = file.read() + python_code = python_code.replace("```python", "") + python_code = python_code.replace("```", "") + # Execute the code in the .txt file + exec(python_code) + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + task = sys.argv[1] + run_specific_agent(task) + execute_generated_files() diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 000000000..5fd051ecc --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1,4 @@ +{ + "workspace": "generated", + "entry_path": "agbenchmark.benchmarks" +} diff --git a/agbenchmark/regression_tests.json b/agbenchmark/regression_tests.json new file mode 100644 index 000000000..b66a16cf3 --- /dev/null +++ b/agbenchmark/regression_tests.json @@ -0,0 +1,10 @@ +{ + "TestWriteFile": { + "difficulty": "interface", + "dependencies": [], + "data_path": "agbenchmark/challenges/interface/write_file" + }, + "TestBasicCodeGeneration": { + "data_path": "agbenchmark/challenges/code/d4", + } +} diff --git a/agbenchmark/reports/1.json b/agbenchmark/reports/1.json new file mode 100644 index 000000000..48a631c5b --- /dev/null +++ b/agbenchmark/reports/1.json @@ -0,0 +1,18 @@ +{ + "command": "agbenchmark start --test TestWriteFile --mock", + "completion_time": "2023-07-10-21:19", + "time_elapsed": "8.34 seconds", + "tests": { + "TestWriteFile": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/interface/write_file", + "success": true + } + }, + "config": { + "workspace": "generated", + "entry_path": "agbenchmark/benchmarks.py", + "cutoff": 60 + } +} \ No newline at end of file diff --git a/generated/.gitkeep b/generated/.gitkeep deleted file mode 100644 index 4eb9b6ffa..000000000 --- a/generated/.gitkeep +++ /dev/null @@ -1,3 +0,0 @@ -# generated folder - -by default, `main.py` will generate the app in this folder (you can customize with the `--directory=newFolderHere` flag). \ No newline at end of file