pin explorer to curated csv slice with on-disk recording manifest

atharvas · atharvas · commit d56ea2083fc4 · 2026-05-12T13:51:45.000-05:00
diff --git a/src/components/pages/ExplorerPage.svelte b/src/components/pages/ExplorerPage.svelte
@@ -121,10 +121,8 @@
 				Browse {facets.stats.total.toLocaleString()} performance workloads
 			</h1>
 			<p class="page-desc">
-				Every workload in this preview slice of FormulaCode, indexed by
-				repository, optimization level, and task. Click a card to see the
-				benchmark code, per-agent speedups, and a link to the agent's
-				terminal session recording.
+				This is a preview slice of the FormulaCode workload dataset, while we work
+				on <a href="https://api.formulacode.org/"><code>api.formulacode.org</code></a>. Every workload is indexed by repository, optimization level, and task. Click a card to see the benchmark code, per-agent speedups, and a link to the agent's terminal session recording.
 			</p>
 
 			<div class="stats-strip mini">
diff --git a/src/data/website_data_codes.explorer.json b/src/data/website_data_codes.explorer.json
diff --git a/src/data/website_data_lite.explorer.csv b/src/data/website_data_lite.explorer.csv
diff --git a/src/routes/explorer/+page.server.js b/src/routes/explorer/+page.server.js
@@ -1,9 +1,106 @@
-import rawRows from "$data/website_data_lite.csv";
-import codesData from "$data/website_data_codes.json";
+import fs from "node:fs";
+import path from "node:path";
+// The explorer ships a curated slice pinned to commit 8907cb99, where the
+// CSV's agent_recording paths line up with the .cast files actually present
+// under src/data/recordings/. The site-wide CSV (website_data_lite.csv) was
+// later swapped to a different experiment whose recordings were never
+// uploaded, which is why we load a separate copy here.
+import rawRows from "$data/website_data_lite.explorer.csv";
+import codesData from "$data/website_data_codes.explorer.json";
 import { buildWorkloads, summarizeFacets } from "$utils/explorer.js";
+import { AGENT_IDS } from "$utils/agents.js";
+
+const RECORDINGS_DIR = path.resolve("src/data/recordings");
+
+// Per the historical CSV at 8907cb99 (which was committed alongside these
+// .cast files), the on-disk agent-N suffixes map as follows:
+const AGENT_NUM_TO_ID = {
+	"2": AGENT_IDS.HUMAN,
+	"3": AGENT_IDS.CLAUDE,
+	"4": AGENT_IDS.GPT5
+};
+
+function buildRecordingManifest(root) {
+	const manifest = {};
+	if (!fs.existsSync(root)) return manifest;
+	for (const ts of fs.readdirSync(root)) {
+		const tsPath = path.join(root, ts);
+		if (!fs.statSync(tsPath).isDirectory()) continue;
+		for (const taskId of fs.readdirSync(tsPath)) {
+			const taskPath = path.join(tsPath, taskId);
+			if (!fs.statSync(taskPath).isDirectory()) continue;
+			for (const runDir of fs.readdirSync(taskPath)) {
+				const runPath = path.join(taskPath, runDir);
+				if (!fs.statSync(runPath).isDirectory()) continue;
+				if (!fs.existsSync(path.join(runPath, "sessions", "agent.cast")))
+					continue;
+				const m = runDir.match(/\.agent-(\d+)-/);
+				if (!m) continue;
+				const agentId = AGENT_NUM_TO_ID[m[1]];
+				if (!agentId) continue;
+				if (!manifest[taskId]) manifest[taskId] = {};
+				manifest[taskId][agentId] =
+					`/recordings/${ts}/${taskId}/${runDir}/sessions/agent.cast`;
+			}
+		}
+	}
+	return manifest;
+}
+
+// task_id like "pandas_dev-pandas_12" → repo_name "pandas_dev-pandas".
+// The last `_<number>` segment is the task index; everything before is repo.
+function repoFromTaskId(taskId) {
+	const m = taskId.match(/^(.*)_\d+$/);
+	return m ? m[1] : taskId;
+}
+
+// Build stub workloads for task_ids that have recordings on disk but no row
+// in the CSV. They land in the explorer as bare cards: task_id, repo, and
+// the per-agent recording links — no speedups or benchmark code to show.
+function synthesizeOrphans(manifest, csvTaskIds) {
+	const out = [];
+	let synthId = 1_000_000;
+	for (const [taskId, recordings] of Object.entries(manifest)) {
+		if (csvTaskIds.has(taskId)) continue;
+		out.push({
+			id: String(synthId++),
+			task_id: taskId,
+			level: "",
+			benchmark_name: taskId,
+			repo_name: repoFromTaskId(taskId),
+			agent_id: "",
+			"agent/nop": null,
+			"oracle/nop": null,
+			agent_recording: null,
+			__orphanRecordings: recordings
+		});
+	}
+	return out;
+}
 
 export async function load() {
-	const workloads = buildWorkloads(rawRows, codesData);
+	const recordings = buildRecordingManifest(RECORDINGS_DIR);
+	const csvTaskIds = new Set(rawRows.map((r) => r.task_id));
+	const orphanWorkloads = synthesizeOrphans(recordings, csvTaskIds);
+	const workloads = [
+		...buildWorkloads(rawRows, codesData),
+		...orphanWorkloads.map((o) => ({
+			key: `${o.task_id}::::`,
+			task_id: o.task_id,
+			level: o.level,
+			benchmark_name: o.benchmark_name,
+			repo_name: o.repo_name,
+			id: o.id,
+			oracle: null,
+			agents: {},
+			recordings: o.__orphanRecordings,
+			codeText: null,
+			codeFqName: null,
+			bestAgentId: null,
+			bestAgentSpeedup: null,
+			beatsOracle: false
+		}))
+	];
 	const facets = summarizeFacets(workloads);
 	return {
 		workloads,
diff --git a/src/routes/player/[...recordingPath]/+page.js b/src/routes/player/[...recordingPath]/+page.js
@@ -1,8 +1,14 @@
-import allBenchmarkData from "$data/website_data_lite.csv";
+import mainBenchmarkData from "$data/website_data_lite.csv";
+import explorerBenchmarkData from "$data/website_data_lite.explorer.csv";
 
 export const ssr = false;
 export const prerender = false;
 
+// Search both the main scrollytelling CSV and the explorer's curated slice.
+// The two cover different experiment runs; the explorer CSV is what carries
+// the agent_recording paths that actually resolve to files on disk.
+const allBenchmarkData = [...mainBenchmarkData, ...explorerBenchmarkData];
+
 export async function load({ params, url }) {
 	const rawRecordingPath = params.recordingPath || "";
 	// Trim any trailing slash that SvelteKit's catch-all may include so the
@@ -23,12 +29,16 @@ export async function load({ params, url }) {
 		);
 	}
 
-	// Lazy load the heavy code data
+	// Lazy load the heavy code data — try both the main and explorer maps
+	// since the benchmark may have come from either CSV.
 	if (benchmark) {
 		try {
-			const module = await import("$data/website_data_codes.json");
-			const heavyData = module.default;
-			const extra = heavyData[benchmark.id];
+			const [mainMod, explorerMod] = await Promise.all([
+				import("$data/website_data_codes.json"),
+				import("$data/website_data_codes.explorer.json")
+			]);
+			const extra =
+				mainMod.default[benchmark.id] ?? explorerMod.default[benchmark.id];
 			if (extra) {
 				benchmark = { ...benchmark, ...extra };
 			}
diff --git a/src/utils/explorer.js b/src/utils/explorer.js
@@ -50,10 +50,15 @@ export function levelShort(level) {
  * (task_id, level, benchmark_name) with per-agent stats.
  *
  * The CSV structure has each benchmark replicated across the three agents
- * (oracle, claude, gpt-5). Oracle's `agent/nop` ≡ `oracle/nop`. The agent
- * rows additionally carry a recording path; oracle rows do not.
+ * (oracle, claude, gpt-5). Oracle's `agent/nop` ≡ `oracle/nop`.
+ *
+ * `recordingManifest` is an optional `{task_id: {agent_id: url}}` map built
+ * from what's actually on disk. When provided, workloads whose task_id has
+ * no on-disk recording are dropped entirely, and the per-agent recording
+ * URLs come from the manifest rather than the CSV (the CSV's paths point
+ * to a now-stale filename scheme).
  */
-export function buildWorkloads(rows, codeMap) {
+export function buildWorkloads(rows, codeMap, recordingManifest = null) {
 	const groups = new Map();
 
 	for (const r of rows) {
@@ -62,6 +67,8 @@ export function buildWorkloads(rows, codeMap) {
 		const name = r.benchmark_name;
 		if (!taskId || !level || !name) continue;
 
+		if (recordingManifest && !recordingManifest[taskId]) continue;
+
 		const key = `${taskId}::${level}::${name}`;
 		let wl = groups.get(key);
 		if (!wl) {
@@ -74,7 +81,9 @@ export function buildWorkloads(rows, codeMap) {
 				id: r.id,
 				oracle: null,
 				agents: {},
-				recordings: {}
+				recordings: recordingManifest
+					? { ...(recordingManifest[taskId] ?? {}) }
+					: {}
 			};
 			groups.set(key, wl);
 		}
@@ -88,7 +97,7 @@ export function buildWorkloads(rows, codeMap) {
 		if (agentId && agentNop !== null) {
 			wl.agents[agentId] = agentNop;
 		}
-		if (agentId && r.agent_recording) {
+		if (!recordingManifest && agentId && r.agent_recording) {
 			wl.recordings[agentId] = r.agent_recording;
 		}
 	}
diff --git a/static/recordings b/static/recordings
@@ -0,0 +1 @@
+../src/data/recordings