Skip to content

Commit 9201f13

Browse files
committed
tidy landing layout and docs copy
1 parent 96d6d8d commit 9201f13

3 files changed

Lines changed: 17 additions & 78 deletions

File tree

src/components/pages/DocsPage.svelte

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
{
44
name: "fc-eval",
55
description:
6-
"Run frontier LLM agents against the FormulaCode benchmark. Spins up reproducible Docker environments, replays the unit-test suite, and computes per-workload speedup, advantage, and stratified scores. Bring your own Terminus or OpenHands agent — `fceval run -d formulacode -a <your-agent>` is all it takes.",
6+
"Run frontier LLM agents against the FormulaCode benchmark. Spins up reproducible Docker environments, verifies correctness against the unit-test suite, and computes per-workload speedup, advantage, and stratified scores.",
77
docsUrl: "/docs/eval/",
88
repoUrl: "https://github.com/formula-code/fc-eval",
99
image: "/assets/og/fc-eval.png",
@@ -12,7 +12,7 @@
1212
{
1313
name: "datasmith",
1414
description:
15-
"The four-stage pipeline that mines FormulaCode's tasks from real GitHub repositories: scraping high-quality performance PRs, attribute-filtering them with LLM judges, synthesising reproducible build environments, and running the statistical-significance tests that admit a candidate into the benchmark.",
15+
"The pipeline for curating FormulaCode's tasks from real GitHub repositories. The code for scraping, filtering, building, and verifying high-quality performance PRs is maintained here.",
1616
docsUrl: "/docs/data/",
1717
repoUrl: "https://github.com/formula-code/datasmith",
1818
image: "/assets/og/datasmith.png",
@@ -53,9 +53,9 @@
5353
</div>
5454
<h1 class="page-title">Build, evaluate, and explore FormulaCode</h1>
5555
<p class="page-desc">
56-
Three open-source repositories power the benchmark. Pick the docs you
57-
need — run agents against the benchmark, see how tasks are mined, or
58-
query the live data behind the leaderboard.
56+
FormulaCode consists of two parts: a pipeline to construct performance
57+
optimization tasks, and an execution harness that connects a language
58+
model to our terminal sandbox.
5959
</p>
6060
</div>
6161
</header>
@@ -99,17 +99,22 @@
9999
<div class="endpoints-head">
100100
<h2 class="section-title">Live data endpoints</h2>
101101
<p class="section-subtitle">
102-
Two subdomains expose the live task and run database. <strong>Uptime
103-
is not guaranteed</strong> — these are research endpoints, sometimes
104-
rebuilt mid-week. For reproducible evaluation, prefer the static
105-
CSV that ships with this site.
102+
Two subdomains expose the live task and run database. <strong
103+
>Uptime is not guaranteed</strong
104+
> — these are research endpoints, sometimes rebuilt mid-week. For reproducible
105+
evaluation, prefer the static CSV that ships with this site.
106106
</p>
107107
</div>
108108

109109
<ul class="endpoints-list">
110110
{#each liveEndpoints as e}
111111
<li class="endpoint">
112-
<a class="endpoint-host" href={e.href} target="_blank" rel="noopener noreferrer">
112+
<a
113+
class="endpoint-host"
114+
href={e.href}
115+
target="_blank"
116+
rel="noopener noreferrer"
117+
>
113118
{e.host} ↗
114119
</a>
115120
<span class="endpoint-label">{e.label}</span>

src/components/pages/OverviewPage.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
<div class="overview-page">
1414
<OverviewHeader />
1515

16-
<DatasetPipelineViz />
1716
<Abstract />
17+
<DatasetPipelineViz />
1818
<DatasetStatistics />
1919
<KeyFindingsScroll />
2020

2121
<ResultsHighlights
2222
title="Leaderboard at a glance"
23-
subtitle="The six findings above roll up into the per-agent advantage scores below. Positive bars beat the human expert; negative bars trail."
23+
subtitle="The findings above roll up into the per-agent advantage scores below. Positive bars beat the human expert; negative bars trail."
2424
stratified={leaderboardData.stratified}
2525
/>
2626

src/components/sections/ResultsHighlights.svelte

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
1717
const TABS = [
1818
{ key: "advantage", label: "Overall", desc: "Overall" },
19-
{ key: "level1", label: "L1 — Params", desc: "Parameter level" },
2019
{ key: "level2", label: "L2 — Function", desc: "Function level" },
2120
{ key: "level3", label: "L3 — Class", desc: "Class level" },
2221
{ key: "level4", label: "L4 — Module", desc: "Module level" }
@@ -43,25 +42,6 @@
4342
return { abs: Math.max(Math.abs(max), Math.abs(min), 0.02) };
4443
})();
4544
46-
$: leader = sortedRows[0];
47-
$: laggard = sortedRows[sortedRows.length - 1];
48-
49-
$: takeaway = (() => {
50-
if (!leader || !laggard) return "";
51-
const tab = TABS.find((t) => t.key === activeTab);
52-
const ctx = tab?.desc ?? "Overall";
53-
const ln = `${leader.agent} · ${leader.model}`;
54-
const lln = `${laggard.agent} · ${laggard.model}`;
55-
const fmt = (v) => `${v >= 0 ? "+" : ""}${v.toFixed(4)}`;
56-
if (leader.value > 0 && laggard.value < 0) {
57-
return `${ctx}: <strong>${ln}</strong> leads at <code>${fmt(leader.value)}</code>, slightly above the human expert. <strong>${lln}</strong> trails at <code>${fmt(laggard.value)}</code>.`;
58-
}
59-
if (leader.value > 0) {
60-
return `${ctx}: <strong>${ln}</strong> edges out the expert at <code>${fmt(leader.value)}</code>; the rest sit at or below the human baseline.`;
61-
}
62-
return `${ctx}: every agent underperforms the expert. <strong>${ln}</strong> is closest at <code>${fmt(leader.value)}</code>; <strong>${lln}</strong> is furthest behind at <code>${fmt(laggard.value)}</code>.`;
63-
})();
64-
6545
function widthFor(v) {
6646
if (!Number.isFinite(v) || scale.abs === 0) return 0;
6747
return (Math.abs(v) / scale.abs) * 50;
@@ -101,13 +81,6 @@
10181
</div>
10282

10383
<div class="results-panel">
104-
<div class="results-panel-title">
105-
{TABS.find((t) => t.key === activeTab)?.desc} advantage
106-
</div>
107-
<div class="results-kicker">
108-
Ranked by Σ(oracle speedup − agent speedup) / N. Bars symmetric around 0.
109-
</div>
110-
11184
<div class="bar-chart" class:revealed>
11285
{#each sortedRows as row, i (row.agent + row.model + activeTab)}
11386
<div class="bar-row" style="--i: {i}">
@@ -139,8 +112,6 @@
139112
</div>
140113
{/each}
141114
</div>
142-
143-
<p class="results-caption">{@html takeaway}</p>
144115
</div>
145116
</div>
146117
</section>
@@ -200,20 +171,6 @@
200171
box-shadow: var(--shadow);
201172
}
202173
203-
.results-panel-title {
204-
font-family: var(--sans);
205-
font-size: 0.875rem;
206-
font-weight: 700;
207-
color: var(--text-primary);
208-
margin-bottom: var(--space-xs);
209-
}
210-
211-
.results-kicker {
212-
font-size: 0.75rem;
213-
color: var(--text-muted);
214-
margin-bottom: var(--space-md);
215-
}
216-
217174
.bar-chart {
218175
display: grid;
219176
gap: 10px;
@@ -322,29 +279,6 @@
322279
color: var(--score-bad);
323280
}
324281
325-
.results-caption {
326-
margin-top: var(--space-md);
327-
padding-top: var(--space-sm);
328-
border-top: 1px dashed var(--border-primary);
329-
font-size: 0.8125rem;
330-
line-height: 1.65;
331-
color: var(--text-muted);
332-
}
333-
334-
.results-caption :global(strong) {
335-
color: var(--text-primary);
336-
font-weight: 600;
337-
}
338-
339-
.results-caption :global(code) {
340-
font-family: var(--mono);
341-
font-size: 0.85em;
342-
padding: 1px 5px;
343-
background: var(--bg-secondary);
344-
border-radius: 3px;
345-
color: var(--text-primary);
346-
}
347-
348282
@media (max-width: 720px) {
349283
.bar-row {
350284
grid-template-columns: 1fr;

0 commit comments

Comments
 (0)