block · Kvadratni · Mar 26, 2025 · Mar 26, 2025 · Mar 28, 2025 · Kvadratni
diff --git a/README.md b/README.md
@@ -105,6 +105,54 @@ uv run ai-migrate merge-branches
 uv run ai-migrate migrate --help
 ```
 
+### S3 Upload Support
+
+The tool can automatically upload migration results and logs to S3. Files are organized by:
+- Project
+- Timestamp
+- Result type (pass/fail)
+- Logs
+
+To enable S3 uploads:
+
+1. Configure AWS credentials using any standard method:
+```bash
+# Environment variables
+export AWS_ACCESS_KEY_ID="your_access_key"
+export AWS_SECRET_ACCESS_KEY="your_secret_key"
+export AWS_DEFAULT_REGION="us-west-2"
+
+# Or use AWS credentials file (~/.aws/credentials)
+# Or use IAM roles if running on AWS infrastructure
+```
+
+2. Specify the S3 bucket in one of two ways:
+```bash
+# Command line argument
+uv run ai-migrate migrate --s3-bucket my-bucket path/to/files
+
+# Or environment variable
+export AI_MIGRATE_S3_BUCKET="my-bucket"
+uv run ai-migrate migrate path/to/files
+```
+
+The files will be uploaded with this structure:
+```
+bucket/
+  project-name/
+    attempt-20250326-184018/
+      manifest.json
+      pass/
+        file1.java
+        file2.java
+      fail/
+        file3.java
+```
+
+#### Note
+ If you set the bucket name to `localhost` - the system will save the results locally to `~/ai-migration-results`
+
+
 ### Project Selection
 
 The interactive CLI now provides an easy way to select which migration project to use:
@@ -150,6 +198,7 @@ Here's how commands from the main branch map to the new interactive CLI:
 | `uv run ai-migrate projects run --rerun-passed`         | `uv run ai-migrate migrate --rerun-passed`                        | Re-run migrations that have already passed  |
 | `uv run ai-migrate projects run --max-workers=<num>`    | `uv run ai-migrate migrate --max-workers=<num>`                   | Set maximum number of parallel workers      |
 | `uv run ai-migrate projects run --local-worktrees`      | `uv run ai-migrate migrate --local-worktrees`                     | Create worktrees alongside the git repo     |
+| `uv run ai-migrate projects run --s3-bucket=<bucket>`   | `uv run ai-migrate migrate --s3-bucket=<bucket>`                  | Upload results to S3 bucket                 |
 | `uv run ai-migrate projects checkout <file>`            | `uv run ai-migrate checkout <file>`                               | Check out the branch for a failed migration |
 | `uv run ai-migrate projects merge-branches`             | `uv run ai-migrate merge-branches`                                | Merge changes from migrator branches        |
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "rich>=13.0.0",
     "prompt_toolkit>=3.0.0",
     "tiktoken>=0.9.0",
+    "boto3>=1.34.0",
 ]
 
 [project.scripts]

diff --git a/src/ai_migrate/cli.py b/src/ai_migrate/cli.py
@@ -716,6 +716,11 @@ def project_dir_validate(ctx, param, project_dir):
     is_flag=True,
     help="Don't automatically create evaluations after successful migrations",
 )
+@click.option(
+    "--s3-bucket",
+    help="S3 bucket name for storing migration results (can also use AI_MIGRATE_S3_BUCKET env var). Pass 'localhost' to save locally to '~/ai-migration-results'",
+    envvar="AI_MIGRATE_S3_BUCKET",
+)
 def migrate(
     file_paths,
     project_dir,
@@ -726,6 +731,7 @@ def migrate(
     local_worktrees,
     llm_fakes,
     dont_create_evals,
+    s3_bucket,
 ):
     """Migrate one or more files or manage project resources.
 
@@ -788,6 +794,7 @@ def migrate(
             resume=True,
             llm_fakes=llm_fakes,
             dont_create_evals=dont_create_evals,
+            s3_bucket=s3_bucket,
         )
     )
 

diff --git a/src/ai_migrate/projects.py b/src/ai_migrate/projects.py
@@ -69,6 +69,7 @@ async def run(
     resume: bool = True,
     llm_fakes=None,
     dont_create_evals: bool = False,
+    s3_bucket: str | None = None,
 ) -> list[FileGroup]:
     """Run an AI migration project."""
     if manifest_file:
@@ -179,14 +180,14 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str):
             for fn in file.files:
                 print(fn)
 
-    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
-    results_file = f"manifest-{ts}.json"
+    timestamp = datetime.now()
+    results_file = f"manifest-{timestamp.strftime('%Y%m%d-%H%M%S')}.json"
     with open(results_file, "w") as f:
         result_manifest = manifest.model_copy(
             update={
                 "files": results,
                 "eval_target_repo_ref": target_sha,
-                "time": datetime.now(),
+                "time": timestamp,
             }
         )
         f.write(
@@ -196,6 +197,19 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str):
         )
 
     print(f"Results saved to {results_file}")
+
+    if s3_bucket:
+        from .s3_uploader import S3Uploader
+
+        uploader = S3Uploader(s3_bucket)
+        print("Results upload started")
+        await uploader.upload_results(
+            project=Path(project_dir).name,
+            results=results,
+            results_file=Path(results_file),
+            timestamp=timestamp,
+        )
+
     return results
 
 

diff --git a/src/ai_migrate/s3_uploader.py b/src/ai_migrate/s3_uploader.py
@@ -0,0 +1,62 @@
+from datetime import datetime
+from pathlib import Path
+import boto3
+import asyncio
+import shutil
+import logging
+
+logger = logging.getLogger("ai_migrate.s3_uploader")
+
+
+class S3Uploader:
+    def __init__(self, bucket_name: str):
+        self.s3_client = boto3.client("s3") if bucket_name != "localhost" else None
+        self.bucket = bucket_name
+
+    async def upload(
+        self, project: str, paths: list[Path], result_type: str, timestamp: datetime
+    ) -> None:
+        dest = Path(f"{project}/attempt-{timestamp.strftime('%Y%m%d-%H%M%S')}")
+        if result_type:
+            dest = dest / result_type
+
+        if self.bucket == "localhost":
+            dest = Path("~/ai-migration-results").expanduser() / dest
+            dest.mkdir(parents=True, exist_ok=True)
+            for path in paths:
+                try:
+                    shutil.copy2(path, dest / path.name)
+                except OSError as e:
+                    logger.error(f"Failed to save {path.name}: {e}")
+            return
+
+        semaphore = asyncio.Semaphore(5)
+
+        async def upload_one(path: Path):
+            key = str(dest / path.name)
+            try:
+                async with semaphore:
+                    await asyncio.to_thread(
+                        self.s3_client.put_object,
+                        Bucket=self.bucket,
+                        Key=key,
+                        Body=path.read_bytes(),
+                    )
+            except Exception as e:
+                logger.error(f"Failed to upload {path.name} to {self.bucket}: {e}")
+
+        await asyncio.gather(*[upload_one(path) for path in paths])
+
+    async def upload_results(
+        self,
+        project: str,
+        results: list,
+        results_file: Path,
+        timestamp: datetime = None,
+    ) -> None:
+        timestamp = timestamp or datetime.now()
+        await self.upload(project, [results_file], "manifest", timestamp)
+        for result in results:
+            await self.upload(
+                project, [Path(f) for f in result.files], result.result, timestamp
+            )