mattpocock · cantemizyurek · Feb 5, 2026 · Feb 5, 2026
diff --git a/.changeset/0000-multiple-file-filters.md b/.changeset/0000-multiple-file-filters.md
@@ -0,0 +1,5 @@
+---
+"evalite": minor
+---
+
+Support multiple file path filters as positional arguments in the CLI. You can now run specific eval files by passing multiple paths: `evalite ./src/system-prompt.eval.ts ./src/tools/search.eval.ts` or `evalite watch ./src/system-prompt.eval.ts`.
diff --git a/packages/evalite-tests/tests/paths.test.ts b/packages/evalite-tests/tests/paths.test.ts
@@ -6,7 +6,7 @@ it("Should allow you to pass a specific filename to run", async () => {
 
   await fixture.run({
     mode: "run-once-and-exit",
-    path: "should-run.eval.ts",
+    paths: ["should-run.eval.ts"],
   });
 
   const evals = await getSuitesAsRecordViaStorage(fixture.storage);
@@ -15,16 +15,44 @@ it("Should allow you to pass a specific filename to run", async () => {
   expect(evals["Should Not Run"]).not.toBeDefined();
 });
 
+it("Should allow you to pass multiple paths to run", async () => {
+  await using fixture = await loadFixture("paths");
+
+  await fixture.run({
+    mode: "run-once-and-exit",
+    paths: ["should-run.eval.ts", "should-not-run.eval.ts"],
+  });
+
+  const evals = await getSuitesAsRecordViaStorage(fixture.storage);
+
+  expect(evals["Should Run"]).toHaveLength(1);
+  expect(evals["Should Not Run"]).toHaveLength(1);
+});
+
 it("Should allow you to pass a filename filter", async () => {
   await using fixture = await loadFixture("paths");
 
   await fixture.run({
     mode: "run-once-and-exit",
-    path: "should-run",
+    paths: ["should-run"],
   });
 
   const evals = await getSuitesAsRecordViaStorage(fixture.storage);
 
   expect(evals["Should Run"]).toHaveLength(1);
   expect(evals["Should Not Run"]).not.toBeDefined();
 });
+
+it("Should allow you to pass multiple filters to run", async () => {
+  await using fixture = await loadFixture("paths");
+
+  await fixture.run({
+    mode: "run-once-and-exit",
+    paths: ["should-run", "should-not-run"],
+  });
+
+  const evals = await getSuitesAsRecordViaStorage(fixture.storage);
+
+  expect(evals["Should Run"]).toHaveLength(1);
+  expect(evals["Should Not Run"]).toHaveLength(1);
+});
diff --git a/packages/evalite-tests/tests/test-utils.ts b/packages/evalite-tests/tests/test-utils.ts
@@ -47,7 +47,7 @@ export const loadFixture = async (
       });
     },
     run: async (opts: {
-      path?: string | undefined;
+      paths?: string[];
       mode:
         | "watch-for-file-changes"
         | "run-once-and-exit"

diff --git a/packages/evalite/src/command.test.ts b/packages/evalite/src/command.test.ts
@@ -21,7 +21,11 @@ describe("createCommand", () => {
 
     expect(runOnceAtPath).toHaveBeenCalled();
     expect(runOnceAtPath).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
   });
 
@@ -41,7 +45,35 @@ describe("createCommand", () => {
 
     expect(watch).not.toHaveBeenCalled();
     expect(runOnceAtPath).toHaveBeenCalledWith({
-      path: "./src",
+      paths: ["./src"],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
+    });
+  });
+
+  it("evalite with multiple paths", async () => {
+    const watch = vitest.fn();
+    const runOnceAtPath = vitest.fn();
+    const exportFn = vitest.fn();
+    const serveFn = vitest.fn();
+    const program = createProgram({
+      watch,
+      runOnceAtPath,
+      export: exportFn,
+      serve: serveFn,
+    });
+
+    await run(program, ["./src/a.eval.ts", "./src/b.eval.ts"], { process });
+
+    expect(watch).not.toHaveBeenCalled();
+    expect(runOnceAtPath).toHaveBeenCalledWith({
+      paths: ["./src/a.eval.ts", "./src/b.eval.ts"],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
   });
 
@@ -60,7 +92,11 @@ describe("createCommand", () => {
     await run(program, ["watch"], { process });
 
     expect(watch).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
     expect(runOnceAtPath).not.toHaveBeenCalled();
   });
@@ -80,7 +116,37 @@ describe("createCommand", () => {
     await run(program, ["watch", "./src"], { process });
 
     expect(watch).toHaveBeenCalledWith({
-      path: "./src",
+      paths: ["./src"],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
+    });
+    expect(runOnceAtPath).not.toHaveBeenCalled();
+  });
+
+  it("evalite watch with multiple paths", async () => {
+    const watch = vitest.fn();
+    const runOnceAtPath = vitest.fn();
+    const exportFn = vitest.fn();
+    const serveFn = vitest.fn();
+    const program = createProgram({
+      watch,
+      runOnceAtPath,
+      export: exportFn,
+      serve: serveFn,
+    });
+
+    await run(program, ["watch", "./src/a.eval.ts", "./src/b.eval.ts"], {
+      process,
+    });
+
+    expect(watch).toHaveBeenCalledWith({
+      paths: ["./src/a.eval.ts", "./src/b.eval.ts"],
+      threshold: undefined,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
     expect(runOnceAtPath).not.toHaveBeenCalled();
   });
@@ -101,8 +167,11 @@ describe("createCommand", () => {
 
     expect(watch).not.toHaveBeenCalled();
     expect(runOnceAtPath).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
       threshold: 50,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
   });
 
@@ -121,8 +190,11 @@ describe("createCommand", () => {
     await run(program, ["watch", "--threshold=50"], { process });
 
     expect(watch).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
       threshold: 50,
+      outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
     expect(runOnceAtPath).not.toHaveBeenCalled();
   });
@@ -163,8 +235,11 @@ describe("createCommand", () => {
 
     expect(watch).not.toHaveBeenCalled();
     expect(runOnceAtPath).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
       outputPath: "results.json",
+      threshold: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
   });
 
@@ -183,9 +258,11 @@ describe("createCommand", () => {
     await run(program, ["serve"], { process });
 
     expect(serveFn).toHaveBeenCalledWith({
-      path: undefined,
+      paths: [],
       threshold: undefined,
       outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
     expect(watch).not.toHaveBeenCalled();
     expect(runOnceAtPath).not.toHaveBeenCalled();
@@ -206,9 +283,11 @@ describe("createCommand", () => {
     await run(program, ["serve", "./src"], { process });
 
     expect(serveFn).toHaveBeenCalledWith({
-      path: "./src",
+      paths: ["./src"],
       threshold: undefined,
       outputPath: undefined,
+      hideTable: undefined,
+      noCache: undefined,
     });
     expect(watch).not.toHaveBeenCalled();
     expect(runOnceAtPath).not.toHaveBeenCalled();

diff --git a/packages/evalite/src/command.ts b/packages/evalite/src/command.ts
@@ -14,7 +14,7 @@ const packageJson = createRequire(import.meta.url)(
 ) as typeof import("../package.json");
 
 type ProgramOpts = {
-  path: string | undefined;
+  paths: string[];
   threshold: number | undefined;
   outputPath: string | undefined;
   hideTable: boolean | undefined;
@@ -23,8 +23,9 @@ type ProgramOpts = {
 
 const commonParameters = {
   positional: {
-    kind: "tuple",
-    parameters: [{ parse: String, brief: "path", optional: true }],
+    kind: "array",
+    parameter: { parse: String, brief: "paths" },
+    minimum: 0,
   },
   flags: {
     threshold: {
@@ -73,9 +74,9 @@ export const createProgram = (commands: {
 }) => {
   const runOnce = buildCommand({
     parameters: commonParameters,
-    func: async (flags: Flags, path: string | undefined) => {
+    func: async (flags: Flags, ...paths: string[]) => {
       return commands.runOnceAtPath({
-        path,
+        paths,
         threshold: flags.threshold,
         outputPath: flags.outputPath,
         hideTable: flags.hideTable,
@@ -89,9 +90,9 @@ export const createProgram = (commands: {
 
   const serve = buildCommand({
     parameters: commonParameters,
-    func: (flags: Flags, path: string | undefined) => {
+    func: (flags: Flags, ...paths: string[]) => {
       return commands.serve({
-        path,
+        paths,
         threshold: flags.threshold,
         outputPath: flags.outputPath,
         hideTable: flags.hideTable,
@@ -105,14 +106,14 @@ export const createProgram = (commands: {
 
   const watch = buildCommand({
     parameters: commonParameters,
-    func: (flags: Flags, path: string | undefined) => {
+    func: (flags: Flags, ...paths: string[]) => {
       if (flags.outputPath) {
         throw new Error(
           "--outputPath is not supported in watch mode. Use 'evalite --outputPath <path>' instead."
         );
       }
       return commands.watch({
-        path,
+        paths,
         threshold: flags.threshold,
         outputPath: flags.outputPath,
         hideTable: flags.hideTable,
@@ -195,35 +196,35 @@ export const createProgram = (commands: {
 };
 
 export const program = createProgram({
-  watch: (path) => {
+  watch: (opts) => {
     return runEvalite({
-      path: path.path,
-      scoreThreshold: path.threshold,
+      paths: opts.paths,
+      scoreThreshold: opts.threshold,
       cwd: undefined,
       mode: "watch-for-file-changes",
-      outputPath: path.outputPath,
-      hideTable: path.hideTable,
-      cacheEnabled: path.noCache ? false : undefined,
+      outputPath: opts.outputPath,
+      hideTable: opts.hideTable,
+      cacheEnabled: opts.noCache ? false : undefined,
     });
   },
-  runOnceAtPath: (path) => {
+  runOnceAtPath: (opts) => {
     return runEvalite({
-      path: path.path,
-      scoreThreshold: path.threshold,
+      paths: opts.paths,
+      scoreThreshold: opts.threshold,
       cwd: undefined,
       mode: "run-once-and-exit",
-      outputPath: path.outputPath,
-      cacheEnabled: path.noCache ? false : undefined,
+      outputPath: opts.outputPath,
+      cacheEnabled: opts.noCache ? false : undefined,
     });
   },
-  serve: (path) => {
+  serve: (opts) => {
     return runEvalite({
-      path: path.path,
-      scoreThreshold: path.threshold,
+      paths: opts.paths,
+      scoreThreshold: opts.threshold,
       cwd: undefined,
       mode: "run-once-and-serve",
-      outputPath: path.outputPath,
-      cacheEnabled: path.noCache ? false : undefined,
+      outputPath: opts.outputPath,
+      cacheEnabled: opts.noCache ? false : undefined,
     });
   },
   export: async (opts) => {

diff --git a/packages/evalite/src/run-evalite.ts b/packages/evalite/src/run-evalite.ts
@@ -168,7 +168,7 @@ const exportResultsToJSON = async (opts: {
  * watch mode, score thresholds, and result exporting.
  *
  * @param opts - Configuration options for running evaluations
- * @param opts.path - Optional path filter to run specific eval files (defaults to undefined, which runs all evals)
+ * @param opts.paths - Optional array of path filters to run specific eval files (defaults to undefined, which runs all evals)
  * @param opts.cwd - Working directory (defaults to process.cwd())
  * @param opts.testOutputWritable - Optional writable stream for test output
  * @param opts.mode - Execution mode: "watch-for-file-changes", "run-once-and-exit", "run-once-and-serve", or "run-once"
@@ -198,16 +198,16 @@ const exportResultsToJSON = async (opts: {
  *   forceRerunTriggers: ["src/**\/*.ts", "prompts/**\/*"]
  * });
  *
- * // Run specific eval file with custom working directory
+ * // Run specific eval files with custom working directory
  * await runEvalite({
- *   path: "tests/my-eval.eval.ts",
+ *   paths: ["tests/my-eval.eval.ts", "tests/other.eval.ts"],
  *   cwd: "/path/to/project",
  *   mode: "run-once-and-exit"
  * });
  * ```
  */
 export const runEvalite = async (opts: {
-  path?: string | undefined;
+  paths?: string[];
   cwd?: string | undefined;
   testOutputWritable?: Writable;
   mode: Evalite.RunMode;
@@ -278,7 +278,7 @@ export const runEvalite = async (opts: {
       ? opts.forceRerunTriggers
       : config?.forceRerunTriggers) ?? configDefaults.forceRerunTriggers;
 
-  const filters = opts.path ? [opts.path] : undefined;
+  const filters = opts.paths?.length ? opts.paths : undefined;
   process.env.EVALITE_REPORT_TRACES = "true";
 
   let server: ReturnType<typeof createServer> | undefined = undefined;