-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathjustfile
More file actions
40 lines (32 loc) · 1.32 KB
/
justfile
File metadata and controls
40 lines (32 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# KernelBench ↔ Tinker Integration Commands
run_name := "run_" + `date +%Y%m%d_%H%M%S`
config := "src/kernelbench_tinker/config/rl_kernelbench.yaml"
runs_dir := "./runs"
default:
@just --list
# === Training ===
train run=run_name:
@mkdir -p {{runs_dir}}
@echo "Starting training: {{run}}"
nohup uv run python -m kernelbench_tinker.scripts.train_kernel_rl \
--config {{config}} \
log_path={{runs_dir}}/{{run}} \
> {{runs_dir}}/{{run}}_nohup.log 2>&1 &
@sleep 2
@pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "Training started (PID: $$(pgrep -f 'log_path={{runs_dir}}/{{run}}'))" || echo "Failed to start"
@echo "Logs: {{runs_dir}}/{{run}}/logs.log"
resume run:
@echo "Resuming training: {{run}}"
nohup uv run python -m kernelbench_tinker.scripts.train_kernel_rl \
--config {{config}} \
log_path={{runs_dir}}/{{run}} \
load_checkpoint_path={{runs_dir}}/{{run}} \
> {{runs_dir}}/{{run}}_nohup.log 2>&1 &
@sleep 2
@pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "Training resumed" || echo "Failed to start"
# === Monitoring ===
logs run:
@tail -f {{runs_dir}}/{{run}}/logs.log
status:
@echo "=== Running Training Jobs ==="
@pgrep -fa "train_kernel_rl" | grep -v grep || echo "No training jobs running"