|
1 | | -import rawRows from "$data/website_data_lite.csv"; |
2 | | -import codesData from "$data/website_data_codes.json"; |
| 1 | +import fs from "node:fs"; |
| 2 | +import path from "node:path"; |
| 3 | +// The explorer ships a curated slice pinned to commit 8907cb99, where the |
| 4 | +// CSV's agent_recording paths line up with the .cast files actually present |
| 5 | +// under src/data/recordings/. The site-wide CSV (website_data_lite.csv) was |
| 6 | +// later swapped to a different experiment whose recordings were never |
| 7 | +// uploaded, which is why we load a separate copy here. |
| 8 | +import rawRows from "$data/website_data_lite.explorer.csv"; |
| 9 | +import codesData from "$data/website_data_codes.explorer.json"; |
3 | 10 | import { buildWorkloads, summarizeFacets } from "$utils/explorer.js"; |
| 11 | +import { AGENT_IDS } from "$utils/agents.js"; |
| 12 | + |
| 13 | +const RECORDINGS_DIR = path.resolve("src/data/recordings"); |
| 14 | + |
| 15 | +// Per the historical CSV at 8907cb99 (which was committed alongside these |
| 16 | +// .cast files), the on-disk agent-N suffixes map as follows: |
| 17 | +const AGENT_NUM_TO_ID = { |
| 18 | + "2": AGENT_IDS.HUMAN, |
| 19 | + "3": AGENT_IDS.CLAUDE, |
| 20 | + "4": AGENT_IDS.GPT5 |
| 21 | +}; |
| 22 | + |
| 23 | +function buildRecordingManifest(root) { |
| 24 | + const manifest = {}; |
| 25 | + if (!fs.existsSync(root)) return manifest; |
| 26 | + for (const ts of fs.readdirSync(root)) { |
| 27 | + const tsPath = path.join(root, ts); |
| 28 | + if (!fs.statSync(tsPath).isDirectory()) continue; |
| 29 | + for (const taskId of fs.readdirSync(tsPath)) { |
| 30 | + const taskPath = path.join(tsPath, taskId); |
| 31 | + if (!fs.statSync(taskPath).isDirectory()) continue; |
| 32 | + for (const runDir of fs.readdirSync(taskPath)) { |
| 33 | + const runPath = path.join(taskPath, runDir); |
| 34 | + if (!fs.statSync(runPath).isDirectory()) continue; |
| 35 | + if (!fs.existsSync(path.join(runPath, "sessions", "agent.cast"))) |
| 36 | + continue; |
| 37 | + const m = runDir.match(/\.agent-(\d+)-/); |
| 38 | + if (!m) continue; |
| 39 | + const agentId = AGENT_NUM_TO_ID[m[1]]; |
| 40 | + if (!agentId) continue; |
| 41 | + if (!manifest[taskId]) manifest[taskId] = {}; |
| 42 | + manifest[taskId][agentId] = |
| 43 | + `/recordings/${ts}/${taskId}/${runDir}/sessions/agent.cast`; |
| 44 | + } |
| 45 | + } |
| 46 | + } |
| 47 | + return manifest; |
| 48 | +} |
| 49 | + |
| 50 | +// task_id like "pandas_dev-pandas_12" → repo_name "pandas_dev-pandas". |
| 51 | +// The last `_<number>` segment is the task index; everything before is repo. |
| 52 | +function repoFromTaskId(taskId) { |
| 53 | + const m = taskId.match(/^(.*)_\d+$/); |
| 54 | + return m ? m[1] : taskId; |
| 55 | +} |
| 56 | + |
| 57 | +// Build stub workloads for task_ids that have recordings on disk but no row |
| 58 | +// in the CSV. They land in the explorer as bare cards: task_id, repo, and |
| 59 | +// the per-agent recording links — no speedups or benchmark code to show. |
| 60 | +function synthesizeOrphans(manifest, csvTaskIds) { |
| 61 | + const out = []; |
| 62 | + let synthId = 1_000_000; |
| 63 | + for (const [taskId, recordings] of Object.entries(manifest)) { |
| 64 | + if (csvTaskIds.has(taskId)) continue; |
| 65 | + out.push({ |
| 66 | + id: String(synthId++), |
| 67 | + task_id: taskId, |
| 68 | + level: "", |
| 69 | + benchmark_name: taskId, |
| 70 | + repo_name: repoFromTaskId(taskId), |
| 71 | + agent_id: "", |
| 72 | + "agent/nop": null, |
| 73 | + "oracle/nop": null, |
| 74 | + agent_recording: null, |
| 75 | + __orphanRecordings: recordings |
| 76 | + }); |
| 77 | + } |
| 78 | + return out; |
| 79 | +} |
4 | 80 |
|
5 | 81 | export async function load() { |
6 | | - const workloads = buildWorkloads(rawRows, codesData); |
| 82 | + const recordings = buildRecordingManifest(RECORDINGS_DIR); |
| 83 | + const csvTaskIds = new Set(rawRows.map((r) => r.task_id)); |
| 84 | + const orphanWorkloads = synthesizeOrphans(recordings, csvTaskIds); |
| 85 | + const workloads = [ |
| 86 | + ...buildWorkloads(rawRows, codesData), |
| 87 | + ...orphanWorkloads.map((o) => ({ |
| 88 | + key: `${o.task_id}::::`, |
| 89 | + task_id: o.task_id, |
| 90 | + level: o.level, |
| 91 | + benchmark_name: o.benchmark_name, |
| 92 | + repo_name: o.repo_name, |
| 93 | + id: o.id, |
| 94 | + oracle: null, |
| 95 | + agents: {}, |
| 96 | + recordings: o.__orphanRecordings, |
| 97 | + codeText: null, |
| 98 | + codeFqName: null, |
| 99 | + bestAgentId: null, |
| 100 | + bestAgentSpeedup: null, |
| 101 | + beatsOracle: false |
| 102 | + })) |
| 103 | + ]; |
7 | 104 | const facets = summarizeFacets(workloads); |
8 | 105 | return { |
9 | 106 | workloads, |
|
0 commit comments