Skip to content

Commit b87b06a

Browse files
authored
Create evals from UI (#597)
1 parent 82948a7 commit b87b06a

File tree

16 files changed

+2426
-3149
lines changed

16 files changed

+2426
-3149
lines changed

client/src/components/EvalsTab.tsx

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,26 @@ import { useAuth } from "@workos-inc/authkit-react";
33
import { useConvexAuth, useQuery } from "convex/react";
44
import { FlaskConical } from "lucide-react";
55
import { EmptyState } from "@/components/ui/empty-state";
6+
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
67
import type { EvalCase, EvalIteration, EvalSuite } from "./evals/types";
78
import { aggregateSuite } from "./evals/helpers";
89
import { SuitesOverview } from "./evals/suites-overview";
910
import { SuiteIterationsView } from "./evals/suite-iterations-view";
11+
import { EvalRunner } from "./evals/eval-runner";
12+
import { useChat } from "@/hooks/use-chat";
1013

1114
export function EvalsTab() {
1215
const { isAuthenticated, isLoading } = useConvexAuth();
1316
const { user } = useAuth();
1417

1518
const [selectedSuiteId, setSelectedSuiteId] = useState<string | null>(null);
1619

20+
const { availableModels } = useChat({
21+
systemPrompt: "",
22+
temperature: 1,
23+
selectedServers: [],
24+
});
25+
1726
// Fetch overview data for authenticated user - only suites with metadata
1827
const enableOverviewQuery = isAuthenticated && !!user;
1928
const overviewData = useQuery(
@@ -106,40 +115,53 @@ export function EvalsTab() {
106115
return (
107116
<div className="p-6 space-y-6">
108117
<div className="flex items-center justify-between">
109-
<div className="flex items-center space-x-2">
110-
<h1 className="text-2xl font-bold">Evals</h1>
111-
{overviewData?.metadata && (
112-
<div className="text-sm text-muted-foreground">
113-
{overviewData.metadata.iterationsPassed} passed ·{" "}
114-
{overviewData.metadata.iterationsFailed} failed
118+
<h1 className="text-2xl font-bold">Evals</h1>
119+
{overviewData?.metadata && (
120+
<div className="text-sm text-muted-foreground">
121+
{overviewData.metadata.iterationsPassed} passed ·{" "}
122+
{overviewData.metadata.iterationsFailed} failed
123+
</div>
124+
)}
125+
</div>
126+
127+
<Tabs defaultValue="results" className="w-full">
128+
<TabsList>
129+
<TabsTrigger value="results">Results</TabsTrigger>
130+
<TabsTrigger value="create">Create Run</TabsTrigger>
131+
</TabsList>
132+
133+
<TabsContent value="results" className="mt-6">
134+
{!selectedSuite ? (
135+
<SuitesOverview
136+
suites={suites || []}
137+
onSelectSuite={setSelectedSuiteId}
138+
/>
139+
) : isSuiteDetailsLoading ? (
140+
<div className="flex items-center justify-center h-64">
141+
<div className="text-center">
142+
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto" />
143+
<p className="mt-4 text-muted-foreground">
144+
Loading suite details...
145+
</p>
146+
</div>
115147
</div>
148+
) : (
149+
<SuiteIterationsView
150+
suite={selectedSuite}
151+
cases={suiteDetails?.testCases || []}
152+
iterations={iterationsForSelectedSuite}
153+
aggregate={suiteAggregate}
154+
onBack={() => setSelectedSuiteId(null)}
155+
/>
116156
)}
117-
</div>
118-
</div>
157+
</TabsContent>
119158

120-
{!selectedSuite ? (
121-
<SuitesOverview
122-
suites={suites || []}
123-
onSelectSuite={setSelectedSuiteId}
124-
/>
125-
) : isSuiteDetailsLoading ? (
126-
<div className="flex items-center justify-center h-64">
127-
<div className="text-center">
128-
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto" />
129-
<p className="mt-4 text-muted-foreground">
130-
Loading suite details...
131-
</p>
159+
<TabsContent value="create" className="mt-6">
160+
<div className="max-w-4xl">
161+
<EvalRunner availableModels={availableModels} inline={true} />
132162
</div>
133-
</div>
134-
) : (
135-
<SuiteIterationsView
136-
suite={selectedSuite}
137-
cases={suiteDetails?.testCases || []}
138-
iterations={iterationsForSelectedSuite}
139-
aggregate={suiteAggregate}
140-
onBack={() => setSelectedSuiteId(null)}
141-
/>
142-
)}
163+
</TabsContent>
164+
</Tabs>
143165
</div>
144166
);
145167
}

client/src/components/chat/model-selector.tsx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ interface ModelSelectorProps {
2020
onModelChange: (model: ModelDefinition) => void;
2121
disabled?: boolean;
2222
isLoading?: boolean;
23+
hideProvidedModels?: boolean;
2324
}
2425

2526
// Helper function to group models by provider
@@ -62,6 +63,7 @@ export function ModelSelector({
6263
onModelChange,
6364
disabled,
6465
isLoading,
66+
hideProvidedModels = false,
6567
}: ModelSelectorProps) {
6668
const [isModelSelectorOpen, setIsModelSelectorOpen] = useState(false);
6769
const currentModelData = currentModel;
@@ -73,9 +75,9 @@ export function ModelSelector({
7375
// Get sorted provider keys for consistent ordering
7476
const sortedProviders = Array.from(groupedModels.keys()).sort();
7577
const MCPJAM_PROVIDERS: ModelProvider[] = ["meta"];
76-
const mcpjamProviders = sortedProviders.filter((p) =>
77-
MCPJAM_PROVIDERS.includes(p),
78-
);
78+
const mcpjamProviders = hideProvidedModels
79+
? []
80+
: sortedProviders.filter((p) => MCPJAM_PROVIDERS.includes(p));
7981
const otherProviders = sortedProviders.filter(
8082
(p) => !MCPJAM_PROVIDERS.includes(p),
8183
);

client/src/components/evals/SuiteRow.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ export function SuiteRow({ suite, onSelectSuite }: SuiteRowProps) {
9999
passed={aggregate.totals.passed}
100100
failed={aggregate.totals.failed}
101101
cancelled={aggregate.totals.cancelled}
102-
pending={0}
102+
pending={aggregate.totals.pending}
103103
/>
104104
) : (
105105
<span className="text-xs text-muted-foreground">Loading...</span>

0 commit comments

Comments
 (0)