diff --git a/packages/core/examples/observe_variables_login.ts b/packages/core/examples/observe_variables_login.ts new file mode 100644 index 000000000..faf0faecb --- /dev/null +++ b/packages/core/examples/observe_variables_login.ts @@ -0,0 +1,115 @@ +/** + * This example shows how to use observe({ variables }) to plan a sensitive + * login flow, validate the returned placeholder actions, and then execute them + * with act(). + * + * observe() returns %variableName% placeholders in action arguments. That lets + * you review the planned actions before any real secret values are used. + */ +import { Action, Stagehand } from "../lib/v3/index.js"; +import chalk from "chalk"; + +const variables = { + username: "test@browserbase.com", + password: "stagehand=goated", +}; + +const loginInstruction = [ + "Fill the login form using the available variables.", + "Use %username% for the email field.", + "Use %password% for the password field.", + "Include the field name in each action description.", +].join(" "); + +function findValidatedAction( + observed: Action[], + placeholder: string, + keywords: string[], +): Action { + const matches = observed.filter((action) => { + const description = action.description.toLowerCase(); + return ( + action.arguments?.includes(placeholder) === true && + keywords.some((keyword) => description.includes(keyword)) + ); + }); + + if (matches.length !== 1) { + throw new Error( + `Expected exactly one safe action for ${placeholder}, found ${matches.length}`, + ); + } + + return matches[0]; +} + +async function observeVariablesLogin() { + const stagehand = new Stagehand({ + env: "BROWSERBASE", + verbose: 1, + }); + + await stagehand.init(); + + try { + const page = stagehand.context.pages()[0]; + + await page.goto("https://v0-modern-login-flow.vercel.app/", { + waitUntil: "networkidle", + timeoutMs: 30000, + }); + + const observed = await stagehand.observe(loginInstruction, { + variables, + }); + + console.log( + `${chalk.green("Observe:")} Placeholder actions found:\n${observed + .map( + (action) => + `${chalk.yellow(action.description)} -> ${chalk.blue(action.arguments?.join(", ") || "no arguments")}`, + ) + .join("\n")}`, + ); + + const emailAction = findValidatedAction(observed, "%username%", ["email"]); + const passwordAction = findValidatedAction(observed, "%password%", [ + "password", + ]); + + console.log( + `\n${chalk.green("Validated:")} Safe actions to execute:\n${[ + emailAction, + passwordAction, + ] + .map( + (action) => + `${chalk.yellow(action.description)} -> ${chalk.blue(action.arguments?.[0] || "no value")}`, + ) + .join("\n")}`, + ); + + await stagehand.act(emailAction, { variables }); + await stagehand.act(passwordAction, { variables }); + + const [submitButton] = await stagehand.observe("find the sign in button"); + + if (!submitButton) { + throw new Error("Could not find the sign in button"); + } + + await stagehand.act(submitButton); + console.log( + chalk.green( + "\nSubmitted login form. Waiting 10 seconds before closing...", + ), + ); + await page.waitForTimeout(10000); + } finally { + await stagehand.close(); + } +} + +(async () => { + await observeVariablesLogin(); +})(); diff --git a/packages/core/lib/inference.ts b/packages/core/lib/inference.ts index 0bda03aea..b95bc952f 100644 --- a/packages/core/lib/inference.ts +++ b/packages/core/lib/inference.ts @@ -17,6 +17,7 @@ import type { StagehandZodObject, } from "./v3/zodCompat.js"; import { SupportedUnderstudyAction } from "./v3/types/private/handlers.js"; +import type { Variables } from "./v3/types/public/agent.js"; // Re-export for backward compatibility export type { LLMParsedResponse, LLMUsage } from "./v3/llm/LLMClient.js"; @@ -245,6 +246,7 @@ export async function observe({ logger, logInferenceToFile = false, supportedActions, + variables, }: { instruction: string; domElements: string; @@ -253,6 +255,7 @@ export async function observe({ logger: (message: LogLine) => void; logInferenceToFile?: boolean; supportedActions?: string[]; + variables?: Variables; }) { const isGPT5 = llmClient.modelName.includes("gpt-5"); // TODO: remove this as we update support for gpt-5 configuration options @@ -297,7 +300,11 @@ export async function observe({ type ObserveResponse = z.infer; const messages: ChatMessage[] = [ - buildObserveSystemPrompt(userProvidedInstructions, supportedActions), + buildObserveSystemPrompt( + userProvidedInstructions, + supportedActions, + variables, + ), buildObserveUserMessage(instruction, domElements), ]; diff --git a/packages/core/lib/prompt.ts b/packages/core/lib/prompt.ts index 10437b66d..9ebc601ca 100644 --- a/packages/core/lib/prompt.ts +++ b/packages/core/lib/prompt.ts @@ -1,5 +1,6 @@ import { ChatMessage } from "./v3/llm/LLMClient.js"; import type { Variables } from "./v3/types/public/agent.js"; +import { getVariablePromptEntries } from "./v3/agent/utils/variables.js"; export function buildUserInstructionsString( userProvidedInstructions?: string, @@ -112,10 +113,21 @@ Extracted content: ${JSON.stringify(extractionResponse, null, 2)}`, export function buildObserveSystemPrompt( userProvidedInstructions?: string, supportedActions?: string[], + variables?: Variables, ): ChatMessage { const actionsString = supportedActions?.length ? `\n\nSupported actions: ${supportedActions.join(", ")}` : ""; + const variableEntries = getVariablePromptEntries(variables); + const variablesString = variableEntries.length + ? `\n\nAvailable variables: ${variableEntries + .map(({ name, description }) => { + return description ? `%${name}% (${description})` : `%${name}%`; + }) + .join( + ", ", + )}. When an action needs a dynamic or sensitive value, return the matching %variableName% placeholder in the action arguments instead of a literal value` + : ""; const observeSystemPrompt = ` You are helping the user automate the browser by finding elements based on what the user wants to observe in the page. @@ -125,7 +137,7 @@ You will be given: 2. a hierarchical accessibility tree showing the semantic structure of the page. The tree is a hybrid of the DOM and the accessibility tree. Return an array of elements that match the instruction if they exist, otherwise return an empty array. -When returning elements, include the appropriate method from the supported actions list.${actionsString}. When choosing non-left click actions, provide right or middle as the argument.`; +When returning elements, include the appropriate method from the supported actions list.${actionsString}${variablesString}. When choosing non-left click actions, provide right or middle as the argument.`; const content = observeSystemPrompt.replace(/\s+/g, " "); return { diff --git a/packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts b/packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts index a6db243a2..d5b6637e8 100644 --- a/packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts +++ b/packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts @@ -1,5 +1,6 @@ import type { AgentToolMode, Variables } from "../../types/public/agent.js"; import { CAPTCHA_SYSTEM_PROMPT_NOTE } from "../utils/captchaSolver.js"; +import { getVariablePromptEntries } from "../utils/variables.js"; export interface AgentSystemPromptOptions { url: string; @@ -214,17 +215,14 @@ export function buildAgentSystemPrompt( const variableToolsNote = isHybridMode ? "Use %variableName% syntax in the type, fillFormVision, or act tool's value/text/action fields." : "Use %variableName% syntax in the act or fillForm tool's action fields."; + const variableEntries = getVariablePromptEntries(variables); const variablesSection = hasVariables ? ` You have access to the following variables. Use %variableName% syntax to substitute variable values. This is especially important for sensitive data like passwords. ${variableToolsNote} To type a password, use: type %password% into the password field - ${Object.entries(variables) - .map(([name, v]) => { - const description = - typeof v === "object" && v !== null && "value" in v - ? v.description - : undefined; + ${variableEntries + .map(({ name, description }) => { return description ? `${description}` : ``; diff --git a/packages/core/lib/v3/agent/tools/fillform.ts b/packages/core/lib/v3/agent/tools/fillform.ts index 852adb654..4502d5c45 100644 --- a/packages/core/lib/v3/agent/tools/fillform.ts +++ b/packages/core/lib/v3/agent/tools/fillform.ts @@ -46,8 +46,8 @@ export const fillFormTool = ( .join(", ")}`; const observeOptions = executionModel - ? { model: executionModel, timeout: toolTimeout } - : { timeout: toolTimeout }; + ? { model: executionModel, variables, timeout: toolTimeout } + : { variables, timeout: toolTimeout }; const observeResults = await v3.observe(instruction, observeOptions); const completed = [] as unknown[]; diff --git a/packages/core/lib/v3/agent/utils/variables.ts b/packages/core/lib/v3/agent/utils/variables.ts index 9f46b1951..2694060b2 100644 --- a/packages/core/lib/v3/agent/utils/variables.ts +++ b/packages/core/lib/v3/agent/utils/variables.ts @@ -22,6 +22,21 @@ export function getVariableDescription(v: VariableValue): string | undefined { return undefined; } +export interface VariablePromptEntry { + name: string; + description?: string; +} + +export function getVariablePromptEntries( + variables?: Variables, +): VariablePromptEntry[] { + if (!variables) return []; + return Object.entries(variables).map(([name, value]) => ({ + name, + description: getVariableDescription(value), + })); +} + /** * Substitutes %variableName% tokens in text with resolved variable values. * Works with both simple and rich variable formats. diff --git a/packages/core/lib/v3/handlers/observeHandler.ts b/packages/core/lib/v3/handlers/observeHandler.ts index d7a5c0c29..03d817504 100644 --- a/packages/core/lib/v3/handlers/observeHandler.ts +++ b/packages/core/lib/v3/handlers/observeHandler.ts @@ -64,7 +64,7 @@ export class ObserveHandler { } async observe(params: ObserveHandlerParams): Promise { - const { instruction, page, timeout, selector, model } = params; + const { instruction, page, timeout, selector, model, variables } = params; const llmClient = this.resolveLlmClient(model); @@ -116,6 +116,7 @@ export class ObserveHandler { logger: v3Logger, logInferenceToFile: this.logInferenceToFile, supportedActions: Object.values(SupportedUnderstudyAction), + variables, }); const { diff --git a/packages/core/lib/v3/types/private/handlers.ts b/packages/core/lib/v3/types/private/handlers.ts index ab30ff6ac..6ef48de34 100644 --- a/packages/core/lib/v3/types/private/handlers.ts +++ b/packages/core/lib/v3/types/private/handlers.ts @@ -23,6 +23,7 @@ export interface ExtractHandlerParams { export interface ObserveHandlerParams { instruction?: string; model?: ModelConfiguration; + variables?: Variables; timeout?: number; selector?: string; page: Page; diff --git a/packages/core/lib/v3/types/public/api.ts b/packages/core/lib/v3/types/public/api.ts index 2b9647df5..34f48896d 100644 --- a/packages/core/lib/v3/types/public/api.ts +++ b/packages/core/lib/v3/types/public/api.ts @@ -10,6 +10,12 @@ */ import { z } from "zod/v4"; import type Browserbase from "@browserbasehq/sdk"; +import { VariablesSchema } from "./variables.js"; +export { + VariablePrimitiveSchema, + VariableValueSchema, + VariablesSchema, +} from "./variables.js"; // ============================================================================= // Shared Components @@ -405,13 +411,17 @@ export const ActOptionsSchema = z description: "Model configuration object or model name string (e.g., 'openai/gpt-5-nano')", }), - variables: z - .record(z.string(), z.string()) - .optional() - .meta({ - description: "Variables to substitute in the action instruction", - example: { username: "john_doe" }, - }), + variables: VariablesSchema.optional().meta({ + description: + "Variables to substitute in the action instruction. Accepts flat primitives or { value, description? } objects.", + example: { + username: "john_doe", + password: { + value: "secret123", + description: "The login password", + }, + }, + }), timeout: z.number().optional().meta({ description: "Timeout in ms for the action", example: 30000, @@ -540,6 +550,17 @@ export const ObserveOptionsSchema = z description: "Model configuration object or model name string (e.g., 'openai/gpt-5-nano')", }), + variables: VariablesSchema.optional().meta({ + description: + "Variables whose names are exposed to the model so observe() returns %variableName% placeholders in suggested action arguments instead of literal values. Accepts flat primitives or { value, description? } objects.", + example: { + username: { + value: "john@example.com", + description: "The login email", + }, + rememberMe: true, + }, + }), timeout: z.number().optional().meta({ description: "Timeout in ms for the observation", example: 30000, diff --git a/packages/core/lib/v3/types/public/methods.ts b/packages/core/lib/v3/types/public/methods.ts index ded9c0267..77b1b0eaf 100644 --- a/packages/core/lib/v3/types/public/methods.ts +++ b/packages/core/lib/v3/types/public/methods.ts @@ -73,6 +73,7 @@ export const pageTextSchema = z.object({ export interface ObserveOptions { model?: ModelConfiguration; + variables?: Variables; timeout?: number; selector?: string; page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page; diff --git a/packages/core/lib/v3/types/public/variables.ts b/packages/core/lib/v3/types/public/variables.ts new file mode 100644 index 000000000..fd7286ef0 --- /dev/null +++ b/packages/core/lib/v3/types/public/variables.ts @@ -0,0 +1,24 @@ +import { z } from "zod/v4"; +import type { VariableValue, Variables } from "./agent.js"; + +type VariablePrimitive = string | number | boolean; + +export const VariablePrimitiveSchema: z.ZodType = z + .union([z.string(), z.number(), z.boolean()]) + .meta({ id: "VariablePrimitive" }); + +export const VariableValueSchema: z.ZodType = z + .union([ + VariablePrimitiveSchema, + z + .object({ + value: VariablePrimitiveSchema, + description: z.string().optional(), + }) + .strict(), + ]) + .meta({ id: "VariableValue" }); + +export const VariablesSchema: z.ZodType = z + .record(z.string(), VariableValueSchema) + .meta({ id: "Variables" }); diff --git a/packages/core/lib/v3/v3.ts b/packages/core/lib/v3/v3.ts index 7129d3b27..a3bd06039 100644 --- a/packages/core/lib/v3/v3.ts +++ b/packages/core/lib/v3/v3.ts @@ -1378,6 +1378,7 @@ export class V3 { const handlerParams: ObserveHandlerParams = { instruction, model: options?.model, + variables: options?.variables, timeout: options?.timeout, selector: options?.selector, page: page!, @@ -1400,6 +1401,7 @@ export class V3 { "observe", { instruction, + variables: options?.variables, timeout: options?.timeout, }, results, diff --git a/packages/core/tests/unit/agent-execution-model.test.ts b/packages/core/tests/unit/agent-execution-model.test.ts index 82e3158fd..b1e4dfd65 100644 --- a/packages/core/tests/unit/agent-execution-model.test.ts +++ b/packages/core/tests/unit/agent-execution-model.test.ts @@ -6,10 +6,10 @@ import type { V3 } from "../../lib/v3/v3.js"; /** * Minimal mock of V3 that captures how tools pass `model` options - * into v3.act(), v3.extract(), and v3.observe(). + * into v3.act(), v3.extract(), and v3.observe(), plus observe variables. */ function createMockV3() { - const calls: { method: string; model: unknown }[] = []; + const calls: { method: string; model: unknown; variables?: unknown }[] = []; const mock = { logger: vi.fn(), @@ -34,8 +34,15 @@ function createMockV3() { }, ), observe: vi.fn( - async (_instruction: unknown, options?: { model?: unknown }) => { - calls.push({ method: "observe", model: options?.model }); + async ( + _instruction: unknown, + options?: { model?: unknown; variables?: unknown }, + ) => { + calls.push({ + method: "observe", + model: options?.model, + variables: options?.variables, + }); return []; }, ), @@ -103,6 +110,29 @@ describe("agent tools pass full executionModel config to v3 methods", () => { expect(v3.calls[0].model).toBe(modelConfig); }); + it("fillFormTool passes variables through to v3.observe()", async () => { + const v3 = createMockV3(); + const variables = { + username: { + value: "john@example.com", + description: "The login email", + }, + }; + const tool = fillFormTool(v3, undefined, variables); + await tool.execute!( + { fields: [{ action: "type %username% into the email field" }] }, + { + toolCallId: "t3-variables", + messages: [], + abortSignal: new AbortController().signal, + }, + ); + + expect(v3.calls).toHaveLength(1); + expect(v3.calls[0].method).toBe("observe"); + expect(v3.calls[0].variables).toBe(variables); + }); + it("actTool passes undefined when no executionModel is set", async () => { const v3 = createMockV3(); const tool = actTool(v3, undefined); diff --git a/packages/core/tests/unit/agent-system-prompt-variables.test.ts b/packages/core/tests/unit/agent-system-prompt-variables.test.ts new file mode 100644 index 000000000..bf556d1c6 --- /dev/null +++ b/packages/core/tests/unit/agent-system-prompt-variables.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from "vitest"; +import { buildAgentSystemPrompt } from "../../lib/v3/agent/prompts/agentSystemPrompt.js"; + +describe("buildAgentSystemPrompt variables", () => { + it("includes variable descriptions when present", () => { + const prompt = buildAgentSystemPrompt({ + url: "https://example.com", + executionInstruction: "Fill the form", + mode: "dom", + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret123", + }, + }); + + expect(prompt).toContain( + 'The login email', + ); + expect(prompt).toContain(''); + }); +}); diff --git a/packages/core/tests/unit/api-client-observe-variables.test.ts b/packages/core/tests/unit/api-client-observe-variables.test.ts new file mode 100644 index 000000000..ee306642a --- /dev/null +++ b/packages/core/tests/unit/api-client-observe-variables.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi } from "vitest"; +import { StagehandAPIClient } from "../../lib/v3/api.js"; + +describe("StagehandAPIClient variable serialization", () => { + it("preserves rich variables when sending the act request", async () => { + const client = new StagehandAPIClient({ + apiKey: "bb-test", + logger: vi.fn(), + }); + const executeMock = vi.fn().mockResolvedValue({ + success: true, + message: "ok", + actionDescription: "typed", + actions: [], + }); + + ( + client as unknown as { + execute: typeof executeMock; + } + ).execute = executeMock; + + await client.act({ + input: "type %username% into the email field", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret", + }, + }, + }); + + expect(executeMock).toHaveBeenCalledWith({ + method: "act", + args: { + input: "type %username% into the email field", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret", + }, + }, + frameId: undefined, + }, + serverCache: undefined, + }); + }); + + it("preserves rich variables when sending the observe request", async () => { + const client = new StagehandAPIClient({ + apiKey: "bb-test", + logger: vi.fn(), + }); + const executeMock = vi.fn().mockResolvedValue([]); + + ( + client as unknown as { + execute: typeof executeMock; + } + ).execute = executeMock; + + await client.observe({ + instruction: "find the field where %username% should be entered", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret", + }, + }, + }); + + expect(executeMock).toHaveBeenCalledWith({ + method: "observe", + args: { + instruction: "find the field where %username% should be entered", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret", + }, + }, + frameId: undefined, + }, + serverCache: undefined, + }); + }); +}); diff --git a/packages/core/tests/unit/api-variables-schema.test.ts b/packages/core/tests/unit/api-variables-schema.test.ts new file mode 100644 index 000000000..cda10c19c --- /dev/null +++ b/packages/core/tests/unit/api-variables-schema.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; +import { Api } from "../../lib/v3/types/public/index.js"; + +describe("API variable schemas", () => { + it("accepts rich variables for act requests", () => { + const result = Api.ActRequestSchema.safeParse({ + input: "type %username% into the email field", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + rememberMe: true, + }, + }, + }); + + expect(result.success).toBe(true); + }); + + it("accepts rich variables for observe requests", () => { + const result = Api.ObserveRequestSchema.safeParse({ + instruction: "find the field where %username% should be entered", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + rememberMe: true, + }, + }, + }); + + expect(result.success).toBe(true); + }); +}); diff --git a/packages/core/tests/unit/prompt-observe-variables.test.ts b/packages/core/tests/unit/prompt-observe-variables.test.ts new file mode 100644 index 000000000..886623260 --- /dev/null +++ b/packages/core/tests/unit/prompt-observe-variables.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from "vitest"; +import { buildObserveSystemPrompt } from "../../lib/prompt.js"; + +describe("buildObserveSystemPrompt", () => { + it("includes variable descriptions when present", () => { + const prompt = buildObserveSystemPrompt(undefined, ["click", "fill"], { + username: { + value: "john@example.com", + description: "The login email", + }, + password: "secret123", + }); + + expect(prompt.content).toContain("Supported actions: click, fill"); + expect(prompt.content).toContain( + "Available variables: %username% (The login email), %password%", + ); + expect(prompt.content).toContain( + "return the matching %variableName% placeholder", + ); + }); +}); diff --git a/packages/core/tests/unit/public-api/public-types.test.ts b/packages/core/tests/unit/public-api/public-types.test.ts index 0cd8fc055..203cca159 100644 --- a/packages/core/tests/unit/public-api/public-types.test.ts +++ b/packages/core/tests/unit/public-api/public-types.test.ts @@ -164,6 +164,7 @@ describe("Stagehand public API types", () => { describe("ObserveOptions", () => { type ExpectedObserveOptions = { model?: Stagehand.ModelConfiguration; + variables?: Stagehand.Variables; timeout?: number; selector?: string; page?: Stagehand.AnyPage; diff --git a/packages/core/tests/unit/timeout-handlers.test.ts b/packages/core/tests/unit/timeout-handlers.test.ts index d30e09682..fad085834 100644 --- a/packages/core/tests/unit/timeout-handlers.test.ts +++ b/packages/core/tests/unit/timeout-handlers.test.ts @@ -898,6 +898,66 @@ describe("No-timeout success paths", () => { ); }); + it("observe() forwards variables to inference and preserves placeholders", async () => { + const captureHybridSnapshotMock = vi.mocked(captureHybridSnapshot); + captureHybridSnapshotMock.mockResolvedValue({ + combinedTree: "tree content", + combinedXpathMap: { "1-0": "/html/body/input" }, + combinedUrlMap: {}, + }); + + const observeInferenceMock = vi.mocked(observeInference); + observeInferenceMock.mockResolvedValue({ + elements: [ + { + elementId: "1-0", + description: "Email field", + method: "fill", + arguments: ["%username%"], + }, + ], + prompt_tokens: 150, + completion_tokens: 75, + reasoning_tokens: 15, + cached_input_tokens: 8, + inference_time_ms: 600, + } as ReturnType extends Promise + ? T + : never); + + vi.mocked(createTimeoutGuard).mockImplementation(() => { + return vi.fn(() => { + // No-op - never throws + }); + }); + + const handler = buildObserveHandler(); + const fakePage = { + mainFrame: vi.fn().mockReturnValue({}), + } as unknown as Page; + const variables = { + username: { + value: "john@example.com", + description: "The login email", + }, + }; + + const result = await handler.observe({ + instruction: "find the field where %username% should be entered", + variables, + page: fakePage, + }); + + expect(observeInferenceMock).toHaveBeenCalledWith( + expect.objectContaining({ + variables, + }), + ); + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty("arguments"); + expect(result[0]?.arguments).toEqual(["%username%"]); + }); + it("act() with zero timeout behaves as no timeout", async () => { const waitForDomNetworkQuietMock = vi.mocked(waitForDomNetworkQuiet); waitForDomNetworkQuietMock.mockResolvedValue(undefined); diff --git a/packages/docs/v3/basics/observe.mdx b/packages/docs/v3/basics/observe.mdx index 21cdd8640..80b4ebd03 100644 --- a/packages/docs/v3/basics/observe.mdx +++ b/packages/docs/v3/basics/observe.mdx @@ -101,7 +101,7 @@ await stagehand.observe("what is the page title?"); ## Advanced Configuration -You can pass additional options to configure the model, timeout, and selector scope: +You can pass additional options to configure the model, timeout, selector scope, and placeholder variables: ```typescript // Custom model configuration @@ -112,6 +112,32 @@ const actions = await stagehand.observe("find navigation links", { }); ``` +### Validate Then Act with Variables + +For login and other safety-sensitive flows, use `observe()` to discover candidate actions, validate them, and then execute them with `act()`. When you pass `variables`, `observe()` returns `%variableName%` placeholders in the suggested action arguments instead of raw secret values. + +```typescript +const actions = await stagehand.observe("find the login form fields", { + variables: { + username: { value: "user@example.com", description: "The login email" }, + password: { value: process.env.USER_PASSWORD, description: "The login password" }, + } +}); + +const emailField = actions.find((action) => action.arguments?.includes("%username%")); +const passwordField = actions.find((action) => action.arguments?.includes("%password%")); + +if (emailField && passwordField) { + await stagehand.act(emailField, { + variables: { username: "user@example.com" } + }); + + await stagehand.act(passwordField, { + variables: { password: process.env.USER_PASSWORD } + }); +} +``` + ### Server-side Caching diff --git a/packages/docs/v3/references/observe.mdx b/packages/docs/v3/references/observe.mdx index 11e0fd733..9addc444b 100644 --- a/packages/docs/v3/references/observe.mdx +++ b/packages/docs/v3/references/observe.mdx @@ -31,12 +31,19 @@ await stagehand.observe(instruction: string, options: ObserveOptions): Promise; timeout?: number; selector?: string; page?: PlaywrightPage | PuppeteerPage | PatchrightPage | Page; serverCache?: boolean; } +type VariableValue = + | string + | number + | boolean + | { value: string | number | boolean; description?: string }; + // ModelConfiguration can be either a string or an object type ModelConfiguration = | string // Format: "provider/model" (e.g., "openai/gpt-4o", "anthropic/claude-sonnet-4-6") @@ -76,6 +83,12 @@ type ModelConfiguration = + + Key-value pairs for placeholder generation using `%variableName%` syntax in returned action arguments. `observe()` exposes only the placeholder names to the model and returns `Action[]` that still contain `%variableName%` tokens so you can validate them before executing with `act()`. + + Values can be simple primitives (`string`, `number`, `boolean`) or rich objects with an optional description (`{ value, description? }`). + + Maximum time in milliseconds to wait for the observation to complete. Default varies by configuration. @@ -215,6 +228,33 @@ const tableActions = await stagehand.observe("find all table rows", { }); ``` + + + +```typescript +const [emailField, passwordField, submitButton] = await stagehand.observe( + "Find the login fields and submit button", + { + variables: { + username: { value: "user@example.com", description: "The login email" }, + password: { value: process.env.USER_PASSWORD, description: "The login password" }, + }, + } +); + +// Returned actions can reference placeholders like %username% and %password% +console.log(emailField.arguments); + +// Validate the suggested actions before executing them +await stagehand.act(emailField, { + variables: { username: "user@example.com" }, +}); +await stagehand.act(passwordField, { + variables: { password: process.env.USER_PASSWORD }, +}); +await stagehand.act(submitButton); +``` + diff --git a/packages/server-v3/openapi.v3.yaml b/packages/server-v3/openapi.v3.yaml index 616d8a93b..445221b35 100644 --- a/packages/server-v3/openapi.v3.yaml +++ b/packages/server-v3/openapi.v3.yaml @@ -226,6 +226,29 @@ components: required: - selector - description + VariablePrimitive: + anyOf: + - type: string + - type: number + - type: boolean + VariableValue: + anyOf: + - $ref: "#/components/schemas/VariablePrimitive" + - type: object + properties: + value: + $ref: "#/components/schemas/VariablePrimitive" + description: + type: string + required: + - value + additionalProperties: false + Variables: + type: object + propertyNames: + type: string + additionalProperties: + $ref: "#/components/schemas/VariableValue" BrowserConfig: type: object properties: @@ -478,14 +501,14 @@ components: - $ref: "#/components/schemas/ModelConfig" - type: string variables: - description: Variables to substitute in the action instruction + description: Variables to substitute in the action instruction. Accepts flat + primitives or { value, description? } objects. example: username: john_doe - type: object - propertyNames: - type: string - additionalProperties: - type: string + password: + value: secret123 + description: The login password + $ref: "#/components/schemas/Variables" timeout: description: Timeout in ms for the action example: 30000 @@ -608,6 +631,17 @@ components: anyOf: - $ref: "#/components/schemas/ModelConfig" - type: string + variables: + description: Variables whose names are exposed to the model so observe() returns + %variableName% placeholders in suggested action arguments instead of + literal values. Accepts flat primitives or { value, description? } + objects. + example: + username: + value: john@example.com + description: The login email + rememberMe: true + $ref: "#/components/schemas/Variables" timeout: description: Timeout in ms for the observation example: 30000 @@ -1446,14 +1480,14 @@ components: - $ref: "#/components/schemas/ModelConfigOutput" - type: string variables: - description: Variables to substitute in the action instruction + description: Variables to substitute in the action instruction. Accepts flat + primitives or { value, description? } objects. example: username: john_doe - type: object - propertyNames: - type: string - additionalProperties: - type: string + password: + value: secret123 + description: The login password + $ref: "#/components/schemas/Variables" timeout: description: Timeout in ms for the action example: 30000 @@ -1535,6 +1569,17 @@ components: anyOf: - $ref: "#/components/schemas/ModelConfigOutput" - type: string + variables: + description: Variables whose names are exposed to the model so observe() returns + %variableName% placeholders in suggested action arguments instead of + literal values. Accepts flat primitives or { value, description? } + objects. + example: + username: + value: john@example.com + description: The login email + rememberMe: true + $ref: "#/components/schemas/Variables" timeout: description: Timeout in ms for the observation example: 30000 diff --git a/packages/server-v3/scripts/gen-openapi.ts b/packages/server-v3/scripts/gen-openapi.ts index c9e421187..5bcfbe427 100644 --- a/packages/server-v3/scripts/gen-openapi.ts +++ b/packages/server-v3/scripts/gen-openapi.ts @@ -45,6 +45,9 @@ async function main() { ModelConfigObject: Api.ModelConfigObjectSchema, ModelConfig: Api.ModelConfigSchema, Action: Api.ActionSchema, + VariablePrimitive: Api.VariablePrimitiveSchema, + VariableValue: Api.VariableValueSchema, + Variables: Api.VariablesSchema, SessionIdParams: Api.SessionIdParamsSchema, BrowserConfig: Api.BrowserConfigSchema, SessionHeaders: Api.SessionHeadersSchema, diff --git a/packages/server-v3/src/routes/v1/sessions/_id/observe.ts b/packages/server-v3/src/routes/v1/sessions/_id/observe.ts index 84fd0a3ec..65892bac8 100644 --- a/packages/server-v3/src/routes/v1/sessions/_id/observe.ts +++ b/packages/server-v3/src/routes/v1/sessions/_id/observe.ts @@ -1,6 +1,10 @@ import type { RouteHandlerMethod, RouteOptions } from "fastify"; import { StatusCodes } from "http-status-codes"; -import type { Action } from "@browserbasehq/stagehand"; +import type { + Action, + ObserveOptions, + Variables, +} from "@browserbasehq/stagehand"; import type { FastifyZodOpenApiSchema } from "fastify-zod-openapi"; import { Api } from "@browserbasehq/stagehand"; @@ -51,8 +55,9 @@ const observeRouteHandler: RouteHandlerMethod = withErrorHandling( ); } - const safeOptions = { + const safeOptions: ObserveOptions = { ...data.options, + variables: data.options?.variables as Variables | undefined, model: typeof data.options?.model === "string" ? { modelName: data.options.model } diff --git a/packages/server-v3/test/integration/v3/observe.test.ts b/packages/server-v3/test/integration/v3/observe.test.ts index 1e0651060..efc993059 100644 --- a/packages/server-v3/test/integration/v3/observe.test.ts +++ b/packages/server-v3/test/integration/v3/observe.test.ts @@ -156,6 +156,91 @@ describe("POST /v1/sessions/:id/observe (V3)", () => { ); }); + it("should observe with variables option", async () => { + const url = getBaseUrl(); + + interface ObserveResponse { + success: boolean; + data?: { result: unknown[]; actionId?: string }; + } + + const ctx = await fetchWithContext( + `${url}/v1/sessions/${sessionId}/observe`, + { + method: "POST", + headers: getHeaders("3.0.0"), + body: JSON.stringify({ + instruction: "Find any link on the page", + options: { + variables: { + username: "john@example.com", + }, + }, + }), + }, + ); + + assertFetchStatus(ctx, HTTP_OK, "Observe with variables should succeed"); + assertFetchOk(ctx.body !== null, "Response body should be parseable", ctx); + assertFetchOk(ctx.body.success, "Response should indicate success", ctx); + assertFetchOk( + ctx.body.data !== undefined, + "Response should have data", + ctx, + ); + assertFetchOk( + Array.isArray(ctx.body.data.result), + "Result should be an array of observed elements", + ctx, + ); + }); + + it("should observe with rich variables option", async () => { + const url = getBaseUrl(); + + interface ObserveResponse { + success: boolean; + data?: { result: unknown[]; actionId?: string }; + } + + const ctx = await fetchWithContext( + `${url}/v1/sessions/${sessionId}/observe`, + { + method: "POST", + headers: getHeaders("3.0.0"), + body: JSON.stringify({ + instruction: "Find any link on the page", + options: { + variables: { + username: { + value: "john@example.com", + description: "The login email", + }, + }, + }, + }), + }, + ); + + assertFetchStatus( + ctx, + HTTP_OK, + "Observe with rich variables should succeed", + ); + assertFetchOk(ctx.body !== null, "Response body should be parseable", ctx); + assertFetchOk(ctx.body.success, "Response should indicate success", ctx); + assertFetchOk( + ctx.body.data !== undefined, + "Response should have data", + ctx, + ); + assertFetchOk( + Array.isArray(ctx.body.data.result), + "Result should be an array of observed elements", + ctx, + ); + }); + it("should observe without instruction (observe all)", async () => { const url = getBaseUrl();