Skip to content

Commit 3a69395

Browse files
committed
substitute scrollscatter scalars from live data
1 parent 83a24ac commit 3a69395

2 files changed

Lines changed: 135 additions & 42 deletions

File tree

src/components/charts/ChartScroll.svelte

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,19 @@
33
import {
44
agentSelected,
55
agentCopyKey,
6-
chartScrollTrigger
6+
chartScrollTrigger,
7+
bigScatterData
78
} from "$stores/misc.js";
89
import Scrolly from "$components/helpers/ChartScrolly.svelte";
910
import ScrollScatter from "$components/charts/ScrollScatter.svelte";
1011
import Filters from "$components/ui/Filters.svelte";
1112
import Icon from "$components/helpers/Icon.svelte";
1213
import inView from "$actions/inView.js";
1314
import MathJax from "$components/helpers/MathJax.svelte";
15+
import {
16+
computeAdvantageScalar,
17+
substituteCopyTokens
18+
} from "$utils/benchmarkData.js";
1419
1520
const copy = getContext("copy");
1621
@@ -24,6 +29,26 @@
2429
let scrollyIndex; // Raw index from Scrolly component
2530
// No offset needed since chartScrollSteps is already isolated
2631
$: chartScrollIndex = scrollyIndex !== undefined ? scrollyIndex : 0;
32+
33+
// Compute the narrative scalars from the live dataset so step copy that
34+
// uses `{claudeAdv}` / `{gpt5Adv}` / `{claudeModuleAdv}` / `{gpt5ModuleAdv}`
35+
// tokens always reflects the shipped CSV instead of stale hardcoded
36+
// numbers. Until the gdoc is updated to reference these tokens, the
37+
// substitution is a no-op on the existing copy.
38+
$: narrativeTokens = {
39+
claudeAdv: computeAdvantageScalar($bigScatterData, {
40+
agentNeedle: "claude"
41+
}),
42+
gpt5Adv: computeAdvantageScalar($bigScatterData, { agentNeedle: "gpt" }),
43+
claudeModuleAdv: computeAdvantageScalar($bigScatterData, {
44+
agentNeedle: "claude",
45+
level: "module-level"
46+
}),
47+
gpt5ModuleAdv: computeAdvantageScalar($bigScatterData, {
48+
agentNeedle: "gpt",
49+
level: "module-level"
50+
})
51+
};
2752
</script>
2853

2954
<section
@@ -51,7 +76,7 @@
5176
{#if block?.type === "math"}
5277
<MathJax expression={block.value} />
5378
{:else if block?.type === "text"}
54-
<p>{@html block.value}</p>
79+
<p>{@html substituteCopyTokens(block.value, narrativeTokens)}</p>
5580
{/if}
5681
{/each}
5782
{/if}

src/utils/benchmarkData.js

Lines changed: 108 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export function parseBenchmarkCodes(codesString) {
1212
try {
1313
return JSON.parse(codesString);
1414
} catch (e) {
15-
console.warn('Failed to parse benchmark_codes:', e);
15+
console.warn("Failed to parse benchmark_codes:", e);
1616
return {};
1717
}
1818
}
@@ -25,7 +25,7 @@ export function parseBenchmarkCodes(codesString) {
2525
export function getMainCode(codesString) {
2626
const codes = parseBenchmarkCodes(codesString);
2727
const entries = Object.entries(codes);
28-
return entries.length > 0 ? entries[0][1] : '';
28+
return entries.length > 0 ? entries[0][1] : "";
2929
}
3030

3131
/**
@@ -44,8 +44,8 @@ export function getCodeNames(codesString) {
4444
* @returns {string} Agent name
4545
*/
4646
export function extractAgentName(agentId) {
47-
if (!agentId) return '';
48-
const parts = agentId.split(',');
47+
if (!agentId) return "";
48+
const parts = agentId.split(",");
4949
return parts.length > 1 ? parts[1].trim() : agentId;
5050
}
5151

@@ -58,45 +58,45 @@ export function extractAgentName(agentId) {
5858
* @returns {string} Formatted display name
5959
*/
6060
export function formatAgentDisplayName(agentId) {
61-
if (!agentId) return '';
61+
if (!agentId) return "";
6262

63-
const parts = agentId.split(',').map(p => p.trim());
63+
const parts = agentId.split(",").map((p) => p.trim());
6464

6565
// Format the agent part (e.g., "terminus-2" -> "Terminus 2")
6666
const agentPart = parts[0]
67-
.split('-')
68-
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
69-
.join(' ');
67+
.split("-")
68+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
69+
.join(" ");
7070

7171
// Format the model part
72-
const modelPart = parts[1] || '';
73-
let formattedModel = '';
72+
const modelPart = parts[1] || "";
73+
let formattedModel = "";
7474

7575
switch (modelPart.toLowerCase()) {
76-
case 'oracle':
77-
formattedModel = 'Expert Human';
76+
case "oracle":
77+
formattedModel = "Expert Human";
7878
break;
79-
case 'gpt-5':
80-
case 'gpt5':
81-
formattedModel = 'GPT-5';
79+
case "gpt-5":
80+
case "gpt5":
81+
formattedModel = "GPT-5";
8282
break;
83-
case 'gpt-4':
84-
case 'gpt4':
85-
formattedModel = 'GPT-4';
83+
case "gpt-4":
84+
case "gpt4":
85+
formattedModel = "GPT-4";
8686
break;
87-
case 'claude':
88-
formattedModel = 'Claude Sonnet 4.0';
87+
case "claude":
88+
formattedModel = "Claude Sonnet 4.0";
8989
break;
90-
case 'claude-3.5':
91-
case 'claude-3-5':
92-
formattedModel = 'Claude Sonnet 3.5';
90+
case "claude-3.5":
91+
case "claude-3-5":
92+
formattedModel = "Claude Sonnet 3.5";
9393
break;
9494
default:
9595
// Capitalize first letter of each word
9696
formattedModel = modelPart
97-
.split('-')
98-
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
99-
.join(' ');
97+
.split("-")
98+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
99+
.join(" ");
100100
}
101101

102102
return `${agentPart} - ${formattedModel}`;
@@ -108,7 +108,7 @@ export function formatAgentDisplayName(agentId) {
108108
* @returns {string} Formatted breadcrumb string (module > class > function > params)
109109
*/
110110
export function formatBenchmarkBreadcrumb(decomposition) {
111-
if (!decomposition) return '';
111+
if (!decomposition) return "";
112112
// The decomposition might be a single string or need to be parsed
113113
// For now, return as-is
114114
return decomposition;
@@ -126,7 +126,7 @@ export function calculateLevel(benchmark) {
126126
if (benchmark.level) return benchmark.level;
127127

128128
// Could also derive from benchmark_decoposed if needed
129-
return 'unknown';
129+
return "unknown";
130130
}
131131

132132
/**
@@ -136,7 +136,7 @@ export function calculateLevel(benchmark) {
136136
*/
137137
export function getUniqueAgents(data) {
138138
const agents = new Set();
139-
data.forEach(d => {
139+
data.forEach((d) => {
140140
if (d.agent_id) agents.add(d.agent_id);
141141
});
142142
return Array.from(agents).sort();
@@ -149,7 +149,7 @@ export function getUniqueAgents(data) {
149149
*/
150150
export function getUniqueLevels(data) {
151151
const levels = new Set();
152-
data.forEach(d => {
152+
data.forEach((d) => {
153153
if (d.level) levels.add(d.level);
154154
});
155155
return Array.from(levels).sort();
@@ -162,7 +162,7 @@ export function getUniqueLevels(data) {
162162
*/
163163
export function getUniqueTypes(data) {
164164
const types = new Set();
165-
data.forEach(d => {
165+
data.forEach((d) => {
166166
if (d.benchmark_type) types.add(d.benchmark_type);
167167
});
168168
return Array.from(types).sort();
@@ -175,7 +175,7 @@ export function getUniqueTypes(data) {
175175
* @returns {Object} Statistics object with median, count, etc.
176176
*/
177177
export function calculateAgentStats(data, agentId) {
178-
const agentData = data.filter(d => d.agent_id === agentId);
178+
const agentData = data.filter((d) => d.agent_id === agentId);
179179

180180
if (agentData.length === 0) {
181181
return {
@@ -185,13 +185,13 @@ export function calculateAgentStats(data, agentId) {
185185
};
186186
}
187187

188-
const toNumber = value => {
189-
if (typeof value === 'number') return value;
188+
const toNumber = (value) => {
189+
if (typeof value === "number") return value;
190190
const parsed = parseFloat(value);
191191
return Number.isFinite(parsed) ? parsed : undefined;
192192
};
193193

194-
const getMedian = values => {
194+
const getMedian = (values) => {
195195
if (values.length === 0) return 0;
196196
const mid = Math.floor(values.length / 2);
197197
return values.length % 2 !== 0
@@ -201,12 +201,12 @@ export function calculateAgentStats(data, agentId) {
201201

202202
// Calculate medians
203203
const agentNops = agentData
204-
.map(d => toNumber(d['agent/nop']))
205-
.filter(v => v !== undefined)
204+
.map((d) => toNumber(d["agent/nop"]))
205+
.filter((v) => v !== undefined)
206206
.sort((a, b) => a - b);
207207
const oracleNops = agentData
208-
.map(d => toNumber(d['oracle/nop']))
209-
.filter(v => v !== undefined)
208+
.map((d) => toNumber(d["oracle/nop"]))
209+
.filter((v) => v !== undefined)
210210
.sort((a, b) => a - b);
211211

212212
const medianAgentNop = getMedian(agentNops);
@@ -218,3 +218,71 @@ export function calculateAgentStats(data, agentId) {
218218
medianOracleNop
219219
};
220220
}
221+
222+
/**
223+
* Compute the mean advantage scalar for a given (agentNeedle, level) slice of
224+
* the dataset. "Advantage" = oracle/nop − agent/nop, averaged across all
225+
* matching rows. Used to dynamically substitute scalars that were previously
226+
* hardcoded in copy.json (ScrollScatter step annotations).
227+
*
228+
* @param {Array} data - Array of benchmark rows (must include agent_id,
229+
* level, agent/nop, oracle/nop)
230+
* @param {Object} opts
231+
* @param {string} opts.agentNeedle - Substring matched against agent_id (case
232+
* insensitive). e.g. "claude", "gpt", "gemini".
233+
* @param {string|null} opts.level - Optional level filter (e.g. "4-module").
234+
* If null, all levels are included (workload-level advantage).
235+
* @returns {number|null} Advantage scalar, or null if the slice is empty.
236+
*/
237+
export function computeAdvantageScalar(data, { agentNeedle, level = null } = {}) {
238+
if (!Array.isArray(data) || data.length === 0 || !agentNeedle) return null;
239+
const needle = String(agentNeedle).toLowerCase();
240+
const toNum = (v) => {
241+
const n = typeof v === "number" ? v : parseFloat(v);
242+
return Number.isFinite(n) ? n : null;
243+
};
244+
let sum = 0;
245+
let n = 0;
246+
for (const row of data) {
247+
if (!row || !row.agent_id) continue;
248+
if (!String(row.agent_id).toLowerCase().includes(needle)) continue;
249+
if (level && row.level !== level) continue;
250+
const a = toNum(row["agent/nop"]);
251+
const o = toNum(row["oracle/nop"]);
252+
if (a === null || o === null) continue;
253+
sum += o - a;
254+
n += 1;
255+
}
256+
return n > 0 ? sum / n : null;
257+
}
258+
259+
/**
260+
* Apply {token} substitution to copy text. Designed for narrative strings
261+
* that quote dataset-derived scalars (e.g. ScrollScatter step annotations).
262+
* Pass a `tokens` object whose keys correspond to `{tokenName}` placeholders
263+
* in the text. Numeric values are formatted to 4 decimals with a leading
264+
* sign; strings pass through unchanged. Unknown tokens are left as literal
265+
* `{token}` so missing data is visible in dev.
266+
*
267+
* Example:
268+
* substituteCopyTokens(
269+
* "Claude's advantage is {claudeAdv}.",
270+
* { claudeAdv: 0.0749 }
271+
* ) → "Claude's advantage is +0.0749."
272+
*
273+
* Tokens used by the ScrollScatter narrative (see src/data/copy.json
274+
* "chartScroll"): claudeAdv, gpt5Adv, claudeModuleAdv, gpt5ModuleAdv. These
275+
* should map to the corresponding `computeAdvantageScalar` outputs and are
276+
* passed in by the component that owns the data context.
277+
*/
278+
export function substituteCopyTokens(text, tokens = {}) {
279+
if (typeof text !== "string" || text.indexOf("{") === -1) return text;
280+
return text.replace(/\{(\w+)\}/g, (whole, name) => {
281+
const v = tokens[name];
282+
if (v === undefined || v === null) return whole; // leave placeholder visible
283+
if (typeof v === "number" && Number.isFinite(v)) {
284+
return `${v >= 0 ? "+" : ""}${v.toFixed(4)}`;
285+
}
286+
return String(v);
287+
});
288+
}

0 commit comments

Comments
 (0)