Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,54 @@ uv run ai-migrate merge-branches
uv run ai-migrate migrate --help
```

### S3 Upload Support

The tool can automatically upload migration results and logs to S3. Files are organized by:
- Project
- Timestamp
- Result type (pass/fail)
- Logs

To enable S3 uploads:

1. Configure AWS credentials using any standard method:
```bash
# Environment variables
export AWS_ACCESS_KEY_ID="your_access_key"
export AWS_SECRET_ACCESS_KEY="your_secret_key"
export AWS_DEFAULT_REGION="us-west-2"

# Or use AWS credentials file (~/.aws/credentials)
# Or use IAM roles if running on AWS infrastructure
```

2. Specify the S3 bucket in one of two ways:
```bash
# Command line argument
uv run ai-migrate migrate --s3-bucket my-bucket path/to/files

# Or environment variable
export AI_MIGRATE_S3_BUCKET="my-bucket"
uv run ai-migrate migrate path/to/files
```

The files will be uploaded with this structure:
```
bucket/
project-name/
attempt-20250326-184018/
manifest.json
pass/
file1.java
file2.java
fail/
file3.java
```

#### Note
If you set the bucket name to `localhost` - the system will save the results locally to `~/ai-migration-results`


### Project Selection

The interactive CLI now provides an easy way to select which migration project to use:
Expand Down Expand Up @@ -150,6 +198,7 @@ Here's how commands from the main branch map to the new interactive CLI:
| `uv run ai-migrate projects run --rerun-passed` | `uv run ai-migrate migrate --rerun-passed` | Re-run migrations that have already passed |
| `uv run ai-migrate projects run --max-workers=<num>` | `uv run ai-migrate migrate --max-workers=<num>` | Set maximum number of parallel workers |
| `uv run ai-migrate projects run --local-worktrees` | `uv run ai-migrate migrate --local-worktrees` | Create worktrees alongside the git repo |
| `uv run ai-migrate projects run --s3-bucket=<bucket>` | `uv run ai-migrate migrate --s3-bucket=<bucket>` | Upload results to S3 bucket |
| `uv run ai-migrate projects checkout <file>` | `uv run ai-migrate checkout <file>` | Check out the branch for a failed migration |
| `uv run ai-migrate projects merge-branches` | `uv run ai-migrate merge-branches` | Merge changes from migrator branches |

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"rich>=13.0.0",
"prompt_toolkit>=3.0.0",
"tiktoken>=0.9.0",
"boto3>=1.34.0",
]

[project.scripts]
Expand Down
7 changes: 7 additions & 0 deletions src/ai_migrate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,11 @@ def project_dir_validate(ctx, param, project_dir):
is_flag=True,
help="Don't automatically create evaluations after successful migrations",
)
@click.option(
"--s3-bucket",
help="S3 bucket name for storing migration results (can also use AI_MIGRATE_S3_BUCKET env var). Pass 'localhost' to save locally to '~/ai-migration-results'",
envvar="AI_MIGRATE_S3_BUCKET",
)
def migrate(
file_paths,
project_dir,
Expand All @@ -726,6 +731,7 @@ def migrate(
local_worktrees,
llm_fakes,
dont_create_evals,
s3_bucket,
):
"""Migrate one or more files or manage project resources.

Expand Down Expand Up @@ -788,6 +794,7 @@ def migrate(
resume=True,
llm_fakes=llm_fakes,
dont_create_evals=dont_create_evals,
s3_bucket=s3_bucket,
)
)

Expand Down
20 changes: 17 additions & 3 deletions src/ai_migrate/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ async def run(
resume: bool = True,
llm_fakes=None,
dont_create_evals: bool = False,
s3_bucket: str | None = None,
) -> list[FileGroup]:
"""Run an AI migration project."""
if manifest_file:
Expand Down Expand Up @@ -179,14 +180,14 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str):
for fn in file.files:
print(fn)

ts = datetime.now().strftime("%Y%m%d-%H%M%S")
results_file = f"manifest-{ts}.json"
timestamp = datetime.now()
results_file = f"manifest-{timestamp.strftime('%Y%m%d-%H%M%S')}.json"
with open(results_file, "w") as f:
result_manifest = manifest.model_copy(
update={
"files": results,
"eval_target_repo_ref": target_sha,
"time": datetime.now(),
"time": timestamp,
}
)
f.write(
Expand All @@ -196,6 +197,19 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str):
)

print(f"Results saved to {results_file}")

if s3_bucket:
from .s3_uploader import S3Uploader

uploader = S3Uploader(s3_bucket)
print("Results upload started")
await uploader.upload_results(
project=Path(project_dir).name,
results=results,
results_file=Path(results_file),
timestamp=timestamp,
)

return results


Expand Down
62 changes: 62 additions & 0 deletions src/ai_migrate/s3_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from datetime import datetime
from pathlib import Path
import boto3
import asyncio
import shutil
import logging

logger = logging.getLogger("ai_migrate.s3_uploader")


class S3Uploader:
def __init__(self, bucket_name: str):
self.s3_client = boto3.client("s3") if bucket_name != "localhost" else None
self.bucket = bucket_name

async def upload(
self, project: str, paths: list[Path], result_type: str, timestamp: datetime
) -> None:
dest = Path(f"{project}/attempt-{timestamp.strftime('%Y%m%d-%H%M%S')}")
if result_type:
dest = dest / result_type

if self.bucket == "localhost":
dest = Path("~/ai-migration-results").expanduser() / dest
dest.mkdir(parents=True, exist_ok=True)
for path in paths:
try:
shutil.copy2(path, dest / path.name)
except OSError as e:
logger.error(f"Failed to save {path.name}: {e}")
return

semaphore = asyncio.Semaphore(5)

async def upload_one(path: Path):
key = str(dest / path.name)
try:
async with semaphore:
await asyncio.to_thread(
self.s3_client.put_object,
Bucket=self.bucket,
Key=key,
Body=path.read_bytes(),
)
except Exception as e:
logger.error(f"Failed to upload {path.name} to {self.bucket}: {e}")

await asyncio.gather(*[upload_one(path) for path in paths])

async def upload_results(
self,
project: str,
results: list,
results_file: Path,
timestamp: datetime = None,
) -> None:
timestamp = timestamp or datetime.now()
await self.upload(project, [results_file], "manifest", timestamp)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if I want to put the manifest into a folder?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we basically want to create the same structure we have in evals inside of the project directories, then we can point run_eval at these

for result in results:
await self.upload(
project, [Path(f) for f in result.files], result.result, timestamp
)