From 4e3b974968ba72c661334aa19eaf9d12cef957c0 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 22 Mar 2026 12:50:33 +0800 Subject: [PATCH 01/12] feat: add cloud browser support --- packages/browser/README.md | 191 +++ packages/browser/package.json | 39 + packages/browser/src/browser.ts | 172 +++ packages/browser/src/cdp.ts | 264 ++++ packages/browser/src/command.ts | 1127 +++++++++++++++++ packages/browser/src/index.ts | 41 + packages/browser/src/page.ts | 848 +++++++++++++ packages/browser/src/providers/browser-use.ts | 119 ++ packages/browser/src/providers/browserbase.ts | 92 ++ packages/browser/src/providers/index.ts | 7 + packages/browser/src/providers/types.ts | 17 + packages/browser/src/snapshot.ts | 665 ++++++++++ packages/browser/tsconfig.json | 20 + packages/core/src/chat/browser-viewer.svelte | 173 +++ packages/core/src/chat/chat-interface.svelte | 2 + packages/core/src/chat/settings-panel.svelte | 72 ++ packages/core/src/chat/tool-call-block.svelte | 14 +- packages/sdk/package.json | 1 + packages/sdk/src/index.ts | 6 + packages/sdk/src/tools/bash.ts | 4 +- packages/sdk/src/vfs/custom-commands.ts | 38 + packages/sdk/src/web/config.ts | 3 + packages/sdk/tests/tools-bash.test.ts | 37 +- pnpm-lock.yaml | 17 + 24 files changed, 3935 insertions(+), 34 deletions(-) create mode 100644 packages/browser/README.md create mode 100644 packages/browser/package.json create mode 100644 packages/browser/src/browser.ts create mode 100644 packages/browser/src/cdp.ts create mode 100644 packages/browser/src/command.ts create mode 100644 packages/browser/src/index.ts create mode 100644 packages/browser/src/page.ts create mode 100644 packages/browser/src/providers/browser-use.ts create mode 100644 packages/browser/src/providers/browserbase.ts create mode 100644 packages/browser/src/providers/index.ts create mode 100644 packages/browser/src/providers/types.ts create mode 100644 packages/browser/src/snapshot.ts create mode 100644 packages/browser/tsconfig.json create mode 100644 packages/core/src/chat/browser-viewer.svelte diff --git a/packages/browser/README.md b/packages/browser/README.md new file mode 100644 index 0000000..9987f0e --- /dev/null +++ b/packages/browser/README.md @@ -0,0 +1,191 @@ +# @office-agents/browser + +Browser-native CDP (Chrome DevTools Protocol) client for cloud browser automation. **Zero Node.js dependencies** — works in any browser context including Office Add-in taskpanes. + +## Architecture + +The core primitive is a **CDP WebSocket URL**. Any provider that gives you one works: + +``` +Your browser (Office taskpane, web app, etc.) + → WebSocket to wss://connect.browserbase.com/?signingKey=... + → CDP JSON-RPC messages + → Cloud Chrome instance +``` + +## Providers + +The `BrowserProvider` interface abstracts session creation. The CDP URL is the universal handoff point — any cloud browser provider that exposes CDP works. + +| Provider | Status | Notes | +|----------|--------|-------| +| [Browserbase](https://browserbase.com) | ✅ Built-in | Anti-bot stealth, CAPTCHA solving, residential proxies | +| [Browser Use](https://browser-use.com) | ✅ Built-in | Cloud browser with proxy support, session profiles | +| Any CDP URL | ✅ `Browser.connect()` | Direct WebSocket connection | +| Custom | ✅ Implement `BrowserProvider` | Just return a `cdpUrl` from `createSession()` | + +## Usage + +### With Browserbase + +```typescript +import { Browser, BrowserbaseProvider } from "@office-agents/browser"; + +const provider = new BrowserbaseProvider({ + apiKey: "bb-api-...", + projectId: "proj-...", +}); + +const browser = await Browser.launch({ provider }); + +// Navigate +await browser.page.goto("https://example.com"); + +// Get accessibility tree with element refs +const snapshot = await browser.page.snapshot(); +console.log(snapshot.tree); +// [0-1] document: Example Domain +// [0-5] heading: Example Domain +// [0-8] paragraph: This domain is for use in... +// [0-12] link: More information... + +// Click by ref from snapshot +await browser.page.clickRef("0-12"); + +// Screenshot +const { base64 } = await browser.page.screenshot(); + +// Type text +await browser.page.type("hello world"); + +// Press keys +await browser.page.pressKey("Enter"); +await browser.page.pressKey("Cmd+A"); + +// Evaluate JavaScript +const title = await browser.page.evaluate("document.title"); + +// Clean up +await browser.close(); +``` + +### With Browser Use + +```typescript +import { Browser, BrowserUseProvider } from "@office-agents/browser"; + +const provider = new BrowserUseProvider({ + apiKey: "bu-api-...", +}); + +const browser = await Browser.launch({ provider }); +await browser.page.goto("https://example.com"); +// ... same API as Browserbase +await browser.close(); +``` + +### With any CDP URL + +```typescript +import { Browser } from "@office-agents/browser"; + +const browser = await Browser.connect({ + cdpUrl: "wss://some-provider.com/cdp?token=...", +}); + +await browser.page.goto("https://example.com"); +await browser.close(); +``` + +### Low-level CDP access + +```typescript +import { CdpClient } from "@office-agents/browser"; + +const cdp = await CdpClient.connect("wss://..."); + +// Send any CDP command +await cdp.send("Page.navigate", { url: "https://example.com" }); + +// Listen for events +cdp.on("Page.loadEventFired", (params) => { + console.log("Page loaded"); +}); + +// Screenshot +const { data } = await cdp.send("Page.captureScreenshot", { format: "png" }); + +await cdp.close(); +``` + +## API + +### Browser + +- `Browser.launch({ provider, sessionOptions? })` — Create a cloud browser session via provider +- `Browser.connect({ cdpUrl })` — Connect directly to any CDP WebSocket URL +- `browser.page` — The active `Page` instance +- `browser.close()` — Close browser and release session + +### Page + +**Navigation:** +- `page.goto(url, { waitUntil?, timeoutMs? })` — Navigate to URL +- `page.reload()` — Reload page +- `page.goBack()` / `page.goForward()` — History navigation + +**State:** +- `page.snapshot()` — Accessibility tree with element refs (preferred for agents) +- `page.screenshot({ fullPage?, format?, quality? })` — Visual screenshot as base64 +- `page.getUrl()` / `page.getTitle()` / `page.getInfo()` +- `page.getText(selector?)` / `page.getHtml(selector?)` + +**Interaction:** +- `page.clickRef(ref)` — Click element by ref from snapshot (e.g. `"0-5"`, `"@0-5"`) +- `page.click(x, y, { button?, clickCount? })` — Click at coordinates +- `page.type(text, { delay? })` — Type text +- `page.pressKey(key)` — Press key or combo (`"Enter"`, `"Cmd+A"`, `"Ctrl+C"`) +- `page.fill(selector, value, { pressEnter? })` — Fill input field +- `page.hover(x, y)` — Hover at coordinates +- `page.scroll(x, y, deltaX, deltaY)` — Scroll + +**Other:** +- `page.evaluate(expression)` — Execute JavaScript in page +- `page.setViewport(width, height, { deviceScaleFactor? })` +- `page.waitForSelector(selector, timeoutMs?)` +- `page.waitForTimeout(ms)` + +### BrowserProvider + +Implement this interface to add a new cloud browser provider: + +```typescript +interface BrowserProvider { + name: string; + createSession(options?: CreateSessionOptions): Promise; + closeSession(sessionId: string): Promise; +} + +interface BrowserSession { + cdpUrl: string; // The WebSocket URL — the universal primitive + sessionId: string; + metadata?: Record; +} +``` + +## How it works + +This package is a direct port of the command set from [`@browserbasehq/browse-cli`](https://github.com/browserbase/stagehand/tree/main/packages/cli), rewritten to use **browser-native `WebSocket`** instead of the Node.js `ws` library. + +Every command maps to CDP protocol calls: + +| Command | CDP Methods | +|---------|------------| +| `goto` | `Page.navigate` | +| `click` | `Input.dispatchMouseEvent` | +| `type` | `Input.dispatchKeyEvent` | +| `screenshot` | `Page.captureScreenshot` | +| `snapshot` | `Accessibility.getFullAXTree` + `DOM.getDocument` | +| `evaluate` | `Runtime.evaluate` | +| `viewport` | `Emulation.setDeviceMetricsOverride` | +| `back/forward` | `Page.getNavigationHistory` + `Page.navigateToHistoryEntry` | diff --git a/packages/browser/package.json b/packages/browser/package.json new file mode 100644 index 0000000..ca54e0d --- /dev/null +++ b/packages/browser/package.json @@ -0,0 +1,39 @@ +{ + "name": "@office-agents/browser", + "version": "0.0.1", + "type": "module", + "description": "Browser-native CDP client for cloud browser automation. Works in any browser context — no Node.js required.", + "license": "MIT", + "main": "src/index.ts", + "types": "src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "publishConfig": { + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + } + }, + "repository": { + "type": "git", + "url": "https://github.com/hewliyang/office-agents.git", + "directory": "packages/browser" + }, + "files": [ + "dist", + "README.md" + ], + "scripts": { + "typecheck": "tsc --noEmit", + "build": "tsc" + }, + "devDependencies": { + "devtools-protocol": "^0.0.1602427", + "typescript": "^5.8.0" + } +} diff --git a/packages/browser/src/browser.ts b/packages/browser/src/browser.ts new file mode 100644 index 0000000..c32458a --- /dev/null +++ b/packages/browser/src/browser.ts @@ -0,0 +1,172 @@ +import type { Protocol } from "devtools-protocol/types/protocol.js"; +import { CdpClient } from "./cdp.js"; +import { Page } from "./page.js"; +import type { + BrowserProvider, + BrowserSession, + CreateSessionOptions, +} from "./providers/types.js"; + +export interface BrowserOptions { + provider: BrowserProvider; + sessionOptions?: CreateSessionOptions; +} + +export interface ConnectOptions { + cdpUrl: string; +} + +export interface BrowserTab { + index: number; + targetId: string; + url: string; + title: string; + active: boolean; +} + +export class Browser { + private cdp: CdpClient | null = null; + private provider: BrowserProvider | null = null; + private session: BrowserSession | null = null; + private _page: Page | null = null; + private currentTargetId: string | null = null; + + private constructor() {} + + static async launch(options: BrowserOptions): Promise { + const browser = new Browser(); + browser.provider = options.provider; + browser.session = await options.provider.createSession( + options.sessionOptions, + ); + try { + browser.cdp = await CdpClient.connect(browser.session.cdpUrl); + browser._page = await Page.attachToFirstPage(browser.cdp); + browser.currentTargetId = browser._page.targetId ?? null; + } catch (err) { + await browser.close(); + throw err; + } + return browser; + } + + static async connect(options: ConnectOptions): Promise { + const browser = new Browser(); + browser.cdp = await CdpClient.connect(options.cdpUrl); + browser._page = await Page.attachToFirstPage(browser.cdp); + browser.currentTargetId = browser._page.targetId ?? null; + return browser; + } + + get page(): Page { + if (!this._page) throw new Error("Browser not connected"); + return this._page; + } + + get sessionId(): string | undefined { + return this.session?.sessionId; + } + + get sessionMetadata(): Record | undefined { + return this.session?.metadata; + } + + get cdpUrl(): string | undefined { + return this.session?.cdpUrl; + } + + private get cdpClient(): CdpClient { + if (!this.cdp) throw new Error("Browser not connected"); + return this.cdp; + } + + private async attachToTarget(targetId: string): Promise { + if (this._page?.sessionId) { + await this.cdpClient + .send("Target.detachFromTarget", { + sessionId: this._page.sessionId, + }) + .catch(() => {}); + } + const page = await Page.attachToTarget(this.cdpClient, targetId); + this._page = page; + this.currentTargetId = targetId; + await this.cdpClient + .send("Target.activateTarget", { targetId }) + .catch(() => {}); + return page; + } + + async listTabs(): Promise { + const { targetInfos } = await this.cdpClient.send("Target.getTargets"); + return targetInfos + .filter((target) => target.type === "page") + .map((target, index) => ({ + index, + targetId: target.targetId, + url: target.url, + title: target.title, + active: target.targetId === this.currentTargetId, + })); + } + + async newTab(url = "about:blank"): Promise { + const { targetId } = await this.cdpClient.send("Target.createTarget", { + url, + }); + await this.attachToTarget(targetId); + return this.listTabs(); + } + + async switchTab(index: number): Promise { + const tabs = await this.listTabs(); + const tab = tabs[index]; + if (!tab) { + throw new Error(`No tab at index ${index}`); + } + await this.attachToTarget(tab.targetId); + return this.listTabs(); + } + + async closeTab(index?: number): Promise { + const tabs = await this.listTabs(); + if (!tabs.length) return tabs; + const targetTab = + index === undefined ? tabs.find((tab) => tab.active) : tabs[index]; + if (!targetTab) { + throw new Error( + index === undefined ? "No active tab" : `No tab at index ${index}`, + ); + } + + await this.cdpClient.send("Target.closeTarget", { + targetId: targetTab.targetId, + }); + + const remaining = await this.listTabs(); + if (!remaining.length) { + const { targetId } = await this.cdpClient.send("Target.createTarget", { + url: "about:blank", + }); + await this.attachToTarget(targetId); + return this.listTabs(); + } + + const next = remaining[Math.min(targetTab.index, remaining.length - 1)]; + await this.attachToTarget(next.targetId); + return this.listTabs(); + } + + async close(): Promise { + if (this.cdp) { + await this.cdp.close(); + this.cdp = null; + } + if (this.provider && this.session) { + await this.provider.closeSession(this.session.sessionId).catch(() => {}); + this.session = null; + } + this.currentTargetId = null; + this._page = null; + } +} diff --git a/packages/browser/src/cdp.ts b/packages/browser/src/cdp.ts new file mode 100644 index 0000000..493b9e5 --- /dev/null +++ b/packages/browser/src/cdp.ts @@ -0,0 +1,264 @@ +import type { ProtocolMapping } from "devtools-protocol/types/protocol-mapping.js"; + +export type CdpCommands = ProtocolMapping.Commands; +export type CdpEvents = ProtocolMapping.Events; + +type CommandMethod = keyof CdpCommands; +type EventMethod = keyof CdpEvents; + +type CommandReturn = CdpCommands[M]["returnType"]; + +type SendArgs = CdpCommands[M]["paramsType"] extends [] + ? [] + : CdpCommands[M]["paramsType"] extends [(infer P)?] + ? [params?: P] + : CdpCommands[M]["paramsType"] extends [infer P] + ? [params: P] + : [params?: Record]; + +type EventParams = CdpEvents[E] extends [ + infer P, + ...unknown[], +] + ? P + : undefined; + +export type CdpEventHandler = ( + params: EventParams, +) => void; + +interface CdpResponseMessage { + id: number; + sessionId?: string; + result?: Record; + error?: { code: number; message: string; data?: string }; +} + +interface CdpEventMessage { + method: string; + params?: Record; + sessionId?: string; +} + +interface InflightRequest { + resolve: (result: unknown) => void; + reject: (error: Error) => void; + method: string; +} + +export class CdpClient { + private ws: WebSocket; + private nextId = 1; + private inflight = new Map(); + private eventHandlers = new Map>>(); + private sessions = new Map(); + private closeHandlers = new Set<(reason: string) => void>(); + + private constructor(ws: WebSocket) { + this.ws = ws; + ws.onmessage = (event) => this.onMessage(event); + ws.onclose = (event) => { + const reason = `close code=${event.code} reason=${event.reason || ""}`; + this.rejectAllInflight(reason); + for (const handler of this.closeHandlers) { + try { + handler(reason); + } catch {} + } + }; + ws.onerror = () => { + this.rejectAllInflight("websocket error"); + }; + } + + static connect(wsUrl: string): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(wsUrl); + ws.onopen = () => resolve(new CdpClient(ws)); + ws.onerror = (e) => + reject(new Error(`CDP WebSocket connection failed: ${e}`)); + }); + } + + send( + method: M, + ...args: SendArgs + ): Promise> { + const id = this.nextId++; + const params = args[0]; + const payload: Record = { id, method }; + if (params !== undefined) payload.params = params; + + return new Promise((resolve, reject) => { + this.inflight.set(id, { + resolve: resolve as (v: unknown) => void, + reject, + method, + }); + this.ws.send(JSON.stringify(payload)); + }); + } + + on(event: E, handler: CdpEventHandler): void { + const set = this.eventHandlers.get(event) ?? new Set(); + set.add(handler as CdpEventHandler); + this.eventHandlers.set(event, set); + } + + off(event: E, handler: CdpEventHandler): void { + this.eventHandlers.get(event)?.delete(handler as CdpEventHandler); + } + + onClose(handler: (reason: string) => void): void { + this.closeHandlers.add(handler); + } + + offClose(handler: (reason: string) => void): void { + this.closeHandlers.delete(handler); + } + + session(sessionId: string): CdpSession { + let s = this.sessions.get(sessionId); + if (!s) { + s = new CdpSession(this, sessionId); + this.sessions.set(sessionId, s); + } + return s; + } + + async close(): Promise { + this.ws.close(); + } + + get readyState(): number { + return this.ws.readyState; + } + + private onMessage(event: MessageEvent): void { + let data: CdpResponseMessage & CdpEventMessage; + try { + data = JSON.parse(typeof event.data === "string" ? event.data : ""); + } catch { + return; + } + + // Response to a command + if (typeof data.id === "number") { + const { sessionId } = data; + const inflight = sessionId + ? this.sessions.get(sessionId)?.consumeInflight(data.id) + : this.inflight.get(data.id); + + if (inflight) { + if (!sessionId) this.inflight.delete(data.id); + if (data.error) { + inflight.reject( + new Error(`CDP ${inflight.method}: ${data.error.message}`), + ); + } else { + inflight.resolve(data.result ?? {}); + } + } + return; + } + + // Event + if (data.method) { + const { sessionId } = data; + const params = data.params ?? {}; + + if (sessionId) { + this.sessions.get(sessionId)?.dispatchEvent(data.method, params); + } + + const handlers = this.eventHandlers.get(data.method); + if (handlers) { + for (const h of handlers) { + try { + (h as (p: unknown) => void)(params); + } catch {} + } + } + } + } + + private rejectAllInflight(reason: string): void { + for (const [_, req] of this.inflight) { + req.reject( + new Error(`CDP connection closed (${reason}), pending: ${req.method}`), + ); + } + this.inflight.clear(); + for (const session of this.sessions.values()) { + session.rejectAll(reason); + } + } +} + +export class CdpSession { + private nextId = 1; + private inflight = new Map(); + private eventHandlers = new Map>>(); + + constructor( + private root: CdpClient, + readonly id: string, + ) {} + + send( + method: M, + ...args: SendArgs + ): Promise> { + const id = this.nextId++; + const params = args[0]; + const payload: Record = { id, method, sessionId: this.id }; + if (params !== undefined) payload.params = params; + + return new Promise((resolve, reject) => { + this.inflight.set(id, { + resolve: resolve as (v: unknown) => void, + reject, + method, + }); + // Access private ws via the root client + const ws = (this.root as unknown as { ws: WebSocket }).ws; + ws.send(JSON.stringify(payload)); + }); + } + + on(event: E, handler: CdpEventHandler): void { + const set = this.eventHandlers.get(event) ?? new Set(); + set.add(handler as CdpEventHandler); + this.eventHandlers.set(event, set); + } + + off(event: E, handler: CdpEventHandler): void { + this.eventHandlers.get(event)?.delete(handler as CdpEventHandler); + } + + consumeInflight(id: number): InflightRequest | undefined { + const req = this.inflight.get(id); + if (req) this.inflight.delete(id); + return req; + } + + dispatchEvent(method: string, params: Record): void { + const handlers = this.eventHandlers.get(method); + if (handlers) { + for (const h of handlers) { + try { + (h as (p: unknown) => void)(params); + } catch {} + } + } + } + + rejectAll(reason: string): void { + for (const [_, req] of this.inflight) { + req.reject( + new Error(`CDP session closed (${reason}), pending: ${req.method}`), + ); + } + this.inflight.clear(); + } +} diff --git a/packages/browser/src/command.ts b/packages/browser/src/command.ts new file mode 100644 index 0000000..f39f281 --- /dev/null +++ b/packages/browser/src/command.ts @@ -0,0 +1,1127 @@ +import { Browser } from "./browser.js"; +import type { BrowserProvider } from "./providers/types.js"; + +let activeBrowser: Browser | null = null; + +export interface BrowseSessionEvent { + active: boolean; + liveUrl?: string; + sessionId?: string; +} + +type BrowseSessionListener = (event: BrowseSessionEvent) => void; + +const sessionListeners = new Set(); + +export function onBrowseSessionChange( + listener: BrowseSessionListener, +): () => void { + sessionListeners.add(listener); + return () => sessionListeners.delete(listener); +} + +export function getBrowseSessionState(): BrowseSessionEvent { + if (!activeBrowser) return { active: false }; + const metadata = activeBrowser.sessionMetadata; + return { + active: true, + liveUrl: metadata?.liveUrl as string | undefined, + sessionId: activeBrowser.sessionId, + }; +} + +function emitSessionChange(): void { + const event = getBrowseSessionState(); + for (const listener of sessionListeners) { + try { + listener(event); + } catch {} + } +} + +export interface BrowseCommandConfig { + getProvider: () => BrowserProvider | null; + writeFile?: (path: string, data: Uint8Array) => Promise; +} + +let config: BrowseCommandConfig | null = null; + +export function configureBrowseCommand(cfg: BrowseCommandConfig): void { + config = cfg; +} + +function getProvider(): BrowserProvider { + const provider = config?.getProvider(); + if (!provider) { + throw new Error( + "No browser provider configured. Set a browser provider in settings.", + ); + } + return provider; +} + +const HELP = `Usage: browse [options] + +Core: + open [--wait=load|domcontentloaded|networkidle] [--timeout=ms] + snapshot [-i|--interactive] [-c|--compact] [-d N|--depth=N] + click + dblclick + fill [--no-enter] + type [--delay=ms] + press + hover | hover + focus + check + uncheck + select + eval + +Get: + get url + get title + get text [ref|selector] + get html [ref|selector] + get value + get attr + get count + get cdp-url + +State: + is visible + is enabled + is checked + +Wait: + wait + wait + wait --text [--timeout=ms] + wait --url [--timeout=ms] + wait --load [--timeout=ms] + wait --fn [--timeout=ms] + wait selector [--timeout=ms] [--state=visible|hidden|attached] + wait timeout + +Tabs: + tab + tab new [url] + tab + tab close [n] + +Cookies & storage: + cookies + cookies set [--url=...] [--domain=...] [--path=...] [--httpOnly] [--secure] [--sameSite=Strict|Lax|None] [--expires=ts] + cookies clear + storage local [key] + storage local set + storage local clear + storage session [key] + storage session set + storage session clear + +Settings: + viewport [--scale=N] + set viewport [scale] + set headers + set offline [on|off] + set media [dark|light|no-preference] + set geo + +Artifacts: + screenshot [outfile] [--format=png|jpeg] [--quality=N] [--full-page] + pdf + download Fetch a URL via the browser and save to VFS + +Nav/session: + reload + back + forward + status + stop + +Options: + --json + --help`; + +function parseArgs(args: string[]): { + flags: Record; + positional: string[]; +} { + const flags: Record = {}; + const positional: string[] = []; + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === "--json") { + flags.json = "true"; + } else if (arg === "--help" || arg === "-h") { + flags.help = "true"; + } else if ( + arg === "--full-page" || + arg === "--no-enter" || + arg === "--interactive" || + arg === "--compact" || + arg === "-i" || + arg === "-c" || + arg === "--httpOnly" || + arg === "--secure" + ) { + flags[arg.replace(/^--?/, "")] = "true"; + positional.push(arg); + } else if ((arg === "--depth" || arg === "-d") && args[i + 1]) { + flags.depth = args[i + 1]; + positional.push(arg, args[i + 1]); + i += 1; + } else if (arg.startsWith("--") && arg.includes("=")) { + const eqIdx = arg.indexOf("="); + flags[arg.slice(2, eqIdx)] = arg.slice(eqIdx + 1); + positional.push(arg); + } else { + positional.push(arg); + } + } + return { flags, positional }; +} + +function output(data: unknown, json: boolean): string { + if (json) return JSON.stringify(data, null, 2); + if (typeof data === "string") return data; + return JSON.stringify(data, null, 2); +} + +function parseSnapshotOptions(args: string[]): { + interactive?: boolean; + compact?: boolean; + depth?: number; +} { + const options: { interactive?: boolean; compact?: boolean; depth?: number } = + {}; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === "-i" || arg === "--interactive") { + options.interactive = true; + } else if (arg === "-c" || arg === "--compact") { + options.compact = true; + } else if (arg === "-d" || arg === "--depth") { + const value = args[i + 1]; + const depth = value ? parseInt(value, 10) : NaN; + if (!isNaN(depth)) { + options.depth = depth; + i += 1; + } + } else if (arg.startsWith("--depth=")) { + const depth = parseInt(arg.slice("--depth=".length), 10); + if (!isNaN(depth)) options.depth = depth; + } + } + + return options; +} + +function looksLikeSnapshotRef(value: string): boolean { + return /^@?e\d+$/.test(value) || /^\d+-\d+$/.test(value); +} + +function parseJsonObject(text: string): Record { + const parsed = JSON.parse(text); + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + throw new Error("Expected a JSON object"); + } + return Object.fromEntries( + Object.entries(parsed).map(([key, value]) => [key, String(value)]), + ); +} + +function requireBrowser(): Browser { + if (!activeBrowser) { + throw new Error("No browser session. Run 'browse open ' first."); + } + return activeBrowser; +} + +export async function executeBrowseCommand( + args: string[], +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + const { flags, positional } = parseArgs(args); + + if (flags.help || positional.length === 0) { + return { stdout: HELP, stderr: "", exitCode: flags.help ? 0 : 1 }; + } + + const command = positional[0]; + const cmdArgs = positional.slice(1); + const json = flags.json === "true"; + + try { + switch (command) { + case "open": { + const url = cmdArgs[0]; + if (!url) { + return { + stdout: "", + stderr: "Usage: browse open ", + exitCode: 1, + }; + } + if (activeBrowser) { + await activeBrowser.close().catch(() => {}); + activeBrowser = null; + } + activeBrowser = await Browser.launch({ provider: getProvider() }); + emitSessionChange(); + await activeBrowser.page.goto(url, { + waitUntil: flags.wait ?? "load", + timeoutMs: flags.timeout ? parseInt(flags.timeout, 10) : undefined, + }); + const result: Record = { + url: await activeBrowser.page.getUrl(), + }; + const liveUrl = activeBrowser.sessionMetadata?.liveUrl; + if (liveUrl) result.liveUrl = liveUrl; + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "status": { + if (!activeBrowser) { + return { + stdout: output({ status: "disconnected" }, json), + stderr: "", + exitCode: 0, + }; + } + const url = await activeBrowser.page.getUrl(); + const title = await activeBrowser.page.getTitle(); + return { + stdout: output( + { + status: "connected", + sessionId: activeBrowser.sessionId, + liveUrl: activeBrowser.sessionMetadata?.liveUrl, + url, + title, + }, + json, + ), + stderr: "", + exitCode: 0, + }; + } + + case "stop": { + if (activeBrowser) { + await activeBrowser.close(); + activeBrowser = null; + emitSessionChange(); + } + return { + stdout: output({ stopped: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "reload": { + const browser = requireBrowser(); + const result = await browser.page.reload(); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "back": { + const browser = requireBrowser(); + const result = await browser.page.goBack(); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "forward": { + const browser = requireBrowser(); + const result = await browser.page.goForward(); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "snapshot": { + const browser = requireBrowser(); + const snap = await browser.page.snapshot(parseSnapshotOptions(cmdArgs)); + return { + stdout: output( + json + ? { tree: snap.tree, refCount: Object.keys(snap.refs).length } + : snap.tree, + json, + ), + stderr: "", + exitCode: 0, + }; + } + + case "screenshot": { + const browser = requireBrowser(); + const outFile = cmdArgs[0]; + const result = await browser.page.screenshot({ + format: (flags.format as "png" | "jpeg") ?? "png", + quality: flags.quality ? parseInt(flags.quality, 10) : undefined, + fullPage: flags["full-page"] === "true", + }); + + if (outFile) { + if (!config?.writeFile) { + return { + stdout: "", + stderr: "File writing not available", + exitCode: 1, + }; + } + const binary = Uint8Array.from(atob(result.base64), (c) => + c.charCodeAt(0), + ); + await config.writeFile(outFile, binary); + return { + stdout: `Saved ${result.format ?? "screenshot"} to ${outFile} (${binary.length} bytes)`, + stderr: "", + exitCode: 0, + }; + } + + return { stdout: output(result, true), stderr: "", exitCode: 0 }; + } + + case "pdf": { + const browser = requireBrowser(); + const result = await browser.page.pdf(); + return { stdout: output(result, true), stderr: "", exitCode: 0 }; + } + + case "download": { + const browser = requireBrowser(); + const url = cmdArgs[0]; + const outFile = cmdArgs[1]; + if (!url || !outFile) { + return { + stdout: "", + stderr: "Usage: browse download ", + exitCode: 1, + }; + } + if (!config?.writeFile) { + return { + stdout: "", + stderr: "File writing not available", + exitCode: 1, + }; + } + + const b64 = await browser.page.evaluate(` + fetch(${JSON.stringify(url)}) + .then(r => { + if (!r.ok) throw new Error('HTTP ' + r.status); + return r.arrayBuffer(); + }) + .then(buf => { + const bytes = new Uint8Array(buf); + let binary = ''; + for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]); + return btoa(binary); + }) + `); + + if (typeof b64 !== "string") { + return { + stdout: "", + stderr: `Download failed: ${JSON.stringify(b64)}`, + exitCode: 1, + }; + } + + const binary = Uint8Array.from(atob(b64), (c) => c.charCodeAt(0)); + await config.writeFile(outFile, binary); + + return { + stdout: `Downloaded ${url} to ${outFile} (${binary.length} bytes)`, + stderr: "", + exitCode: 0, + }; + } + + case "get": { + const browser = requireBrowser(); + const what = cmdArgs[0]; + if (!what) { + return { + stdout: "", + stderr: + "Usage: browse get ", + exitCode: 1, + }; + } + switch (what) { + case "url": { + const value = await browser.page.getUrl(); + return { + stdout: output(json ? { url: value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "title": { + const value = await browser.page.getTitle(); + return { + stdout: output(json ? { title: value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "text": { + const value = await browser.page.getText(cmdArgs[1]); + return { + stdout: output(json ? { text: value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "html": { + const value = await browser.page.getHtml(cmdArgs[1]); + return { + stdout: output(json ? { html: value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "value": { + const target = cmdArgs[1]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse get value ", + exitCode: 1, + }; + const value = await browser.page.getValue(target); + return { + stdout: output(json ? { value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "attr": { + const target = cmdArgs[1]; + const attr = cmdArgs[2]; + if (!target || !attr) { + return { + stdout: "", + stderr: "Usage: browse get attr ", + exitCode: 1, + }; + } + const value = await browser.page.getAttribute(target, attr); + return { + stdout: output(json ? { value } : value, json), + stderr: "", + exitCode: 0, + }; + } + case "count": { + const target = cmdArgs[1]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse get count ", + exitCode: 1, + }; + const count = await browser.page.getCount(target); + return { + stdout: output(json ? { count } : String(count), json), + stderr: "", + exitCode: 0, + }; + } + case "cdp-url": { + const value = browser.cdpUrl ?? ""; + return { + stdout: output(json ? { cdpUrl: value } : value, json), + stderr: "", + exitCode: 0, + }; + } + default: + return { + stdout: "", + stderr: `Unknown get target: ${what}`, + exitCode: 1, + }; + } + } + + case "is": { + const browser = requireBrowser(); + const what = cmdArgs[0]; + const target = cmdArgs[1]; + if (!what || !target) { + return { + stdout: "", + stderr: "Usage: browse is ", + exitCode: 1, + }; + } + let value: boolean; + switch (what) { + case "visible": + value = await browser.page.isVisible(target); + break; + case "enabled": + value = await browser.page.isEnabled(target); + break; + case "checked": + value = await browser.page.isChecked(target); + break; + default: + return { + stdout: "", + stderr: `Unknown state check: ${what}`, + exitCode: 1, + }; + } + return { + stdout: output(json ? { value } : String(value), json), + stderr: "", + exitCode: 0, + }; + } + + case "click": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse click ", + exitCode: 1, + }; + const result = looksLikeSnapshotRef(target) + ? await browser.page.clickRef(target) + : await browser.page.clickSelector(target); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "dblclick": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse dblclick ", + exitCode: 1, + }; + const result = await browser.page.dblclick(target); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "click-xy": { + const browser = requireBrowser(); + const x = parseFloat(cmdArgs[0]); + const y = parseFloat(cmdArgs[1]); + if (isNaN(x) || isNaN(y)) { + return { + stdout: "", + stderr: "Usage: browse click-xy ", + exitCode: 1, + }; + } + const result = await browser.page.click(x, y, { + button: flags.button, + clickCount: flags.count ? parseInt(flags.count, 10) : undefined, + }); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "type": { + const browser = requireBrowser(); + const text = cmdArgs.join(" "); + if (!text) + return { + stdout: "", + stderr: "Usage: browse type ", + exitCode: 1, + }; + const result = await browser.page.type(text, { + delay: flags.delay ? parseInt(flags.delay, 10) : undefined, + }); + return { stdout: output(result, json), stderr: "", exitCode: 0 }; + } + + case "press": { + const browser = requireBrowser(); + const key = cmdArgs[0]; + if (!key) + return { + stdout: "", + stderr: "Usage: browse press ", + exitCode: 1, + }; + await browser.page.pressKey(key); + return { + stdout: output({ pressed: key }, json), + stderr: "", + exitCode: 0, + }; + } + + case "fill": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + const value = cmdArgs.slice(1).join(" "); + if (!target || !value) { + return { + stdout: "", + stderr: "Usage: browse fill ", + exitCode: 1, + }; + } + await browser.page.fill(target, value, { + pressEnter: flags["no-enter"] !== "true", + }); + return { + stdout: output({ filled: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "hover": { + const browser = requireBrowser(); + const x = parseFloat(cmdArgs[0]); + const y = parseFloat(cmdArgs[1]); + if (!isNaN(x) && !isNaN(y) && cmdArgs.length >= 2) { + await browser.page.hover(x, y); + } else { + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse hover | hover ", + exitCode: 1, + }; + await browser.page.hoverTarget(target); + } + return { + stdout: output({ hovered: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "focus": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse focus ", + exitCode: 1, + }; + await browser.page.focus(target); + return { + stdout: output({ focused: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "check": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse check ", + exitCode: 1, + }; + await browser.page.check(target, true); + return { + stdout: output({ checked: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "uncheck": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + if (!target) + return { + stdout: "", + stderr: "Usage: browse uncheck ", + exitCode: 1, + }; + await browser.page.check(target, false); + return { + stdout: output({ checked: false }, json), + stderr: "", + exitCode: 0, + }; + } + + case "select": { + const browser = requireBrowser(); + const target = cmdArgs[0]; + const values = cmdArgs.slice(1); + if (!target || values.length === 0) { + return { + stdout: "", + stderr: "Usage: browse select ", + exitCode: 1, + }; + } + await browser.page.select(target, values); + return { + stdout: output({ selected: values }, json), + stderr: "", + exitCode: 0, + }; + } + + case "scroll": { + const browser = requireBrowser(); + const [sx, sy, dx, dy] = cmdArgs.map(parseFloat); + if ([sx, sy, dx, dy].some(isNaN)) { + return { + stdout: "", + stderr: "Usage: browse scroll ", + exitCode: 1, + }; + } + await browser.page.scroll(sx, sy, dx, dy); + return { + stdout: output({ scrolled: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "eval": { + const browser = requireBrowser(); + const expr = cmdArgs.join(" "); + if (!expr) + return { + stdout: "", + stderr: "Usage: browse eval ", + exitCode: 1, + }; + const result = await browser.page.evaluate(expr); + return { stdout: output({ result }, json), stderr: "", exitCode: 0 }; + } + + case "viewport": { + const browser = requireBrowser(); + const w = parseInt(cmdArgs[0], 10); + const h = parseInt(cmdArgs[1], 10); + if (isNaN(w) || isNaN(h)) { + return { + stdout: "", + stderr: "Usage: browse viewport ", + exitCode: 1, + }; + } + await browser.page.setViewport(w, h, { + deviceScaleFactor: flags.scale ? parseFloat(flags.scale) : undefined, + }); + return { + stdout: output({ viewport: { width: w, height: h } }, json), + stderr: "", + exitCode: 0, + }; + } + + case "set": { + const browser = requireBrowser(); + const sub = cmdArgs[0]; + switch (sub) { + case "viewport": { + const w = parseInt(cmdArgs[1], 10); + const h = parseInt(cmdArgs[2], 10); + const scale = cmdArgs[3] ? parseFloat(cmdArgs[3]) : undefined; + if (isNaN(w) || isNaN(h)) { + return { + stdout: "", + stderr: "Usage: browse set viewport [scale]", + exitCode: 1, + }; + } + await browser.page.setViewport(w, h, { deviceScaleFactor: scale }); + return { + stdout: output( + { viewport: { width: w, height: h, scale } }, + json, + ), + stderr: "", + exitCode: 0, + }; + } + case "headers": { + const raw = cmdArgs.slice(1).join(" "); + if (!raw) + return { + stdout: "", + stderr: "Usage: browse set headers ", + exitCode: 1, + }; + await browser.page.setHeaders(parseJsonObject(raw)); + return { + stdout: output({ headers: true }, json), + stderr: "", + exitCode: 0, + }; + } + case "offline": { + const mode = cmdArgs[1] ?? "on"; + await browser.page.setOffline(mode !== "off"); + return { + stdout: output({ offline: mode !== "off" }, json), + stderr: "", + exitCode: 0, + }; + } + case "media": { + const scheme = (cmdArgs[1] ?? "light") as + | "dark" + | "light" + | "no-preference"; + await browser.page.setMedia(scheme); + return { + stdout: output({ media: scheme }, json), + stderr: "", + exitCode: 0, + }; + } + case "geo": { + const lat = parseFloat(cmdArgs[1]); + const lng = parseFloat(cmdArgs[2]); + if (isNaN(lat) || isNaN(lng)) { + return { + stdout: "", + stderr: "Usage: browse set geo ", + exitCode: 1, + }; + } + await browser.page.setGeolocation(lat, lng); + return { + stdout: output({ latitude: lat, longitude: lng }, json), + stderr: "", + exitCode: 0, + }; + } + default: + return { + stdout: "", + stderr: `Unknown set command: ${sub}`, + exitCode: 1, + }; + } + } + + case "wait": { + const browser = requireBrowser(); + const timeout = flags.timeout ? parseInt(flags.timeout, 10) : 30000; + + if (cmdArgs[0] === "--text") { + const text = cmdArgs[1]; + if (!text) + return { + stdout: "", + stderr: "Usage: browse wait --text ", + exitCode: 1, + }; + await browser.page.waitForText(text, timeout); + } else if (cmdArgs[0] === "--url") { + const pattern = cmdArgs[1]; + if (!pattern) + return { + stdout: "", + stderr: "Usage: browse wait --url ", + exitCode: 1, + }; + await browser.page.waitForUrl(pattern, timeout); + } else if (cmdArgs[0] === "--load") { + const state = cmdArgs[1] ?? "load"; + await browser.page.waitForLoad(state, timeout); + } else if (cmdArgs[0] === "--fn") { + const expression = cmdArgs.slice(1).join(" "); + if (!expression) + return { + stdout: "", + stderr: "Usage: browse wait --fn ", + exitCode: 1, + }; + await browser.page.waitForFunction(expression, timeout); + } else if (cmdArgs[0] === "selector") { + const sel = cmdArgs[1]; + if (!sel) + return { + stdout: "", + stderr: "Usage: browse wait selector ", + exitCode: 1, + }; + const state = + (flags.state as "visible" | "hidden" | "attached" | undefined) ?? + "visible"; + await browser.page.waitForSelector(sel, timeout, state); + } else if (cmdArgs[0] === "load") { + await browser.page.waitForLoad(cmdArgs[1] ?? "load", timeout); + } else if (cmdArgs[0] === "timeout") { + const ms = parseInt(cmdArgs[1], 10); + if (isNaN(ms)) + return { + stdout: "", + stderr: "Usage: browse wait timeout ", + exitCode: 1, + }; + await browser.page.waitForTimeout(ms); + } else if (cmdArgs[0] && /^\d+$/.test(cmdArgs[0])) { + await browser.page.waitForTimeout(parseInt(cmdArgs[0], 10)); + } else if (cmdArgs[0]) { + const state = + (flags.state as "visible" | "hidden" | "attached" | undefined) ?? + "visible"; + await browser.page.waitForSelector(cmdArgs[0], timeout, state); + } else { + return { + stdout: "", + stderr: "Usage: browse wait ", + exitCode: 1, + }; + } + + return { + stdout: output({ waited: true }, json), + stderr: "", + exitCode: 0, + }; + } + + case "cookies": { + const browser = requireBrowser(); + const sub = cmdArgs[0] ?? "get"; + if (sub === "clear") { + await browser.page.clearCookies(); + return { + stdout: output({ cleared: true }, json), + stderr: "", + exitCode: 0, + }; + } + if (sub === "set") { + const name = cmdArgs[1]; + const value = cmdArgs[2]; + if (!name || value === undefined) { + return { + stdout: "", + stderr: "Usage: browse cookies set ", + exitCode: 1, + }; + } + const success = await browser.page.setCookie({ + name, + value, + url: flags.url, + domain: flags.domain, + path: flags.path, + httpOnly: flags.httpOnly === "true", + secure: flags.secure === "true", + sameSite: flags.sameSite as "Strict" | "Lax" | "None" | undefined, + expires: flags.expires ? parseInt(flags.expires, 10) : undefined, + }); + return { stdout: output({ success }, json), stderr: "", exitCode: 0 }; + } + const cookies = await browser.page.getCookies(); + return { stdout: output(cookies, true), stderr: "", exitCode: 0 }; + } + + case "storage": { + const browser = requireBrowser(); + const kind = cmdArgs[0]; + if (kind !== "local" && kind !== "session") { + return { + stdout: "", + stderr: "Usage: browse storage [key|set|clear]", + exitCode: 1, + }; + } + const op = cmdArgs[1]; + if (op === "set") { + const key = cmdArgs[2]; + const value = cmdArgs.slice(3).join(" "); + if (!key || value === "") { + return { + stdout: "", + stderr: `Usage: browse storage ${kind} set `, + exitCode: 1, + }; + } + await browser.page.setStorage(kind, key, value); + return { + stdout: output({ stored: true }, json), + stderr: "", + exitCode: 0, + }; + } + if (op === "clear") { + await browser.page.clearStorage(kind); + return { + stdout: output({ cleared: true }, json), + stderr: "", + exitCode: 0, + }; + } + const value = await browser.page.getStorage(kind, op); + return { stdout: output(value, true), stderr: "", exitCode: 0 }; + } + + case "tab": { + const browser = requireBrowser(); + const sub = cmdArgs[0]; + if (!sub || sub === "list") { + const tabs = await browser.listTabs(); + return { stdout: output(tabs, true), stderr: "", exitCode: 0 }; + } + if (sub === "new") { + const tabs = await browser.newTab(cmdArgs[1] ?? "about:blank"); + return { stdout: output(tabs, true), stderr: "", exitCode: 0 }; + } + if (sub === "close") { + const index = + cmdArgs[1] !== undefined ? parseInt(cmdArgs[1], 10) : undefined; + if (cmdArgs[1] !== undefined && isNaN(index!)) { + return { + stdout: "", + stderr: "Usage: browse tab close [index]", + exitCode: 1, + }; + } + const tabs = await browser.closeTab(index); + return { stdout: output(tabs, true), stderr: "", exitCode: 0 }; + } + const index = parseInt(sub, 10); + if (isNaN(index)) { + return { + stdout: "", + stderr: "Usage: browse tab [list|new|close|]", + exitCode: 1, + }; + } + const tabs = await browser.switchTab(index); + return { stdout: output(tabs, true), stderr: "", exitCode: 0 }; + } + + default: + return { + stdout: "", + stderr: `Unknown command: ${command}\n\nRun 'browse --help' for usage.`, + exitCode: 1, + }; + } + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } +} + +export function getActiveBrowser(): Browser | null { + return activeBrowser; +} diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts new file mode 100644 index 0000000..b8d441c --- /dev/null +++ b/packages/browser/src/index.ts @@ -0,0 +1,41 @@ +export { CdpClient, CdpSession, type CdpEventHandler } from "./cdp.js"; +export { + Page, + type NavigateResult, + type ClickResult, + type TypeResult, + type ScreenshotResult, + type PdfResult, + type PageInfo, + type CookieInput, +} from "./page.js"; +export { + Browser, + type BrowserOptions, + type ConnectOptions, + type BrowserTab, +} from "./browser.js"; +export { + captureSnapshot, + type Snapshot, + type SnapshotRef, + type SnapshotOptions, +} from "./snapshot.js"; +export { + type BrowserProvider, + type BrowserSession, + type CreateSessionOptions, + BrowserbaseProvider, + type BrowserbaseConfig, + BrowserUseProvider, + type BrowserUseConfig, +} from "./providers/index.js"; +export { + executeBrowseCommand, + configureBrowseCommand, + getActiveBrowser, + onBrowseSessionChange, + getBrowseSessionState, + type BrowseCommandConfig, + type BrowseSessionEvent, +} from "./command.js"; diff --git a/packages/browser/src/page.ts b/packages/browser/src/page.ts new file mode 100644 index 0000000..4dbbbb8 --- /dev/null +++ b/packages/browser/src/page.ts @@ -0,0 +1,848 @@ +import type { Protocol } from "devtools-protocol/types/protocol.js"; +import { CdpSession, type CdpClient } from "./cdp.js"; +import { + captureSnapshot, + type Snapshot, + type SnapshotOptions, +} from "./snapshot.js"; + +export interface NavigateResult { + url: string; +} + +export interface ClickResult { + clicked: boolean; +} + +export interface TypeResult { + typed: boolean; +} + +export interface ScreenshotResult { + base64: string; + format: "png" | "jpeg"; +} + +export interface PdfResult { + base64: string; +} + +export interface PageInfo { + url: string; + title: string; +} + +export interface CookieInput { + name: string; + value: string; + url?: string; + domain?: string; + path?: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: "Strict" | "Lax" | "None"; + expires?: number; +} + +type CdpSender = CdpClient | CdpSession; + +type WaitSelectorState = "visible" | "hidden" | "attached"; + +const KEY_MAP: Record = + { + Enter: { key: "Enter", code: "Enter", keyCode: 13 }, + Tab: { key: "Tab", code: "Tab", keyCode: 9 }, + Escape: { key: "Escape", code: "Escape", keyCode: 27 }, + Backspace: { key: "Backspace", code: "Backspace", keyCode: 8 }, + Delete: { key: "Delete", code: "Delete", keyCode: 46 }, + ArrowUp: { key: "ArrowUp", code: "ArrowUp", keyCode: 38 }, + ArrowDown: { key: "ArrowDown", code: "ArrowDown", keyCode: 40 }, + ArrowLeft: { key: "ArrowLeft", code: "ArrowLeft", keyCode: 37 }, + ArrowRight: { key: "ArrowRight", code: "ArrowRight", keyCode: 39 }, + Home: { key: "Home", code: "Home", keyCode: 36 }, + End: { key: "End", code: "End", keyCode: 35 }, + PageUp: { key: "PageUp", code: "PageUp", keyCode: 33 }, + PageDown: { key: "PageDown", code: "PageDown", keyCode: 34 }, + Space: { key: " ", code: "Space", keyCode: 32 }, + }; + +const MODIFIER_MAP: Record< + string, + { key: string; code: string; keyCode: number; bit: number } +> = { + Shift: { key: "Shift", code: "ShiftLeft", keyCode: 16, bit: 8 }, + Control: { key: "Control", code: "ControlLeft", keyCode: 17, bit: 4 }, + Ctrl: { key: "Control", code: "ControlLeft", keyCode: 17, bit: 4 }, + Alt: { key: "Alt", code: "AltLeft", keyCode: 18, bit: 1 }, + Meta: { key: "Meta", code: "MetaLeft", keyCode: 91, bit: 2 }, + Cmd: { key: "Meta", code: "MetaLeft", keyCode: 91, bit: 2 }, + Command: { key: "Meta", code: "MetaLeft", keyCode: 91, bit: 2 }, +}; + +function globToRegExp(glob: string): RegExp { + const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&"); + const regex = `^${escaped + .replace(/\*\*/g, "::DOUBLE_STAR::") + .replace(/\*/g, "[^/]*") + .replace(/::DOUBLE_STAR::/g, ".*")}$`; + return new RegExp(regex); +} + +export class Page { + private session: CdpSender; + private currentUrl = ""; + private snapshotData: Snapshot | null = null; + + constructor( + session: CdpSender, + readonly targetId?: string, + ) { + this.session = session; + } + + get sessionId(): string | undefined { + return this.session instanceof CdpSession ? this.session.id : undefined; + } + + async enableDomains(): Promise { + await Promise.all([ + this.session.send("Page.enable"), + this.session.send("Page.setLifecycleEventsEnabled", { enabled: true }), + this.session.send("DOM.enable"), + this.session.send("Runtime.enable"), + this.session.send("Network.enable"), + ]); + } + + static async attachToFirstPage(cdp: CdpClient): Promise { + const { targetInfos } = await cdp.send("Target.getTargets"); + let pageTarget = targetInfos.find( + (t) => t.type === "page" && t.attached !== true, + ); + + if (!pageTarget) { + const { targetId } = await cdp.send("Target.createTarget", { + url: "about:blank", + }); + const { targetInfos: updated } = await cdp.send("Target.getTargets"); + pageTarget = updated.find((t) => t.targetId === targetId); + } + + if (!pageTarget) { + throw new Error("Could not find or create a page target"); + } + + return this.attachToTarget(cdp, pageTarget.targetId); + } + + static async attachToTarget(cdp: CdpClient, targetId: string): Promise { + const { sessionId } = await cdp.send("Target.attachToTarget", { + targetId, + flatten: true, + }); + + const session = cdp.session(sessionId); + const page = new Page(session, targetId); + await page.enableDomains(); + return page; + } + + private ensureLookupTarget(selectorOrRef: string): string { + if (!selectorOrRef) { + throw new Error("Missing selector or ref"); + } + return selectorOrRef; + } + + resolveRef(ref: string): string | null { + if (!this.snapshotData) return null; + + let normalized = ref.trim(); + if (normalized.startsWith("@")) normalized = normalized.slice(1); + if (normalized.startsWith("[") && normalized.endsWith("]")) { + normalized = normalized.slice(1, -1); + } + if (normalized.startsWith("ref=")) normalized = normalized.slice(4); + + return ( + this.snapshotData.xpathMap[normalized] ?? + this.snapshotData.legacyXPathMap[normalized] ?? + null + ); + } + + private elementLookupExpression(selectorOrRef: string): string { + const target = this.ensureLookupTarget(selectorOrRef); + const xpath = this.resolveRef(target); + if (xpath) { + return `document.evaluate(${JSON.stringify( + xpath, + )}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`; + } + if (target.startsWith("xpath=")) { + const rawXpath = target.slice("xpath=".length); + return `document.evaluate(${JSON.stringify( + rawXpath, + )}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`; + } + return `document.querySelector(${JSON.stringify(target)})`; + } + + private async evaluateInPage(expression: string): Promise { + const result = await this.session.send("Runtime.evaluate", { + expression, + returnByValue: true, + awaitPromise: true, + }); + if (result.exceptionDetails) { + throw new Error( + `Evaluation failed: ${result.exceptionDetails.text ?? result.exceptionDetails.exception?.description ?? "unknown error"}`, + ); + } + return result.result?.value as T; + } + + private async getElementCenter(selectorOrRef: string): Promise<{ + x: number; + y: number; + } | null> { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node || !(node instanceof Element)) return null; + const rect = node.getBoundingClientRect(); + return { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 }; + })()`); + } + + async goto( + url: string, + options?: { waitUntil?: string; timeoutMs?: number }, + ): Promise { + const timeout = options?.timeoutMs ?? 30000; + await this.session.send("Page.navigate", { url }); + this.currentUrl = url; + await this.waitForLoad(options?.waitUntil ?? "load", timeout); + return { url: await this.getUrl() }; + } + + async reload(): Promise { + await this.session.send("Page.reload"); + await this.waitForLoad("load", 30000); + return { url: await this.getUrl() }; + } + + async goBack(): Promise { + const { currentIndex, entries } = await this.session.send( + "Page.getNavigationHistory", + ); + if (currentIndex > 0) { + await this.session.send("Page.navigateToHistoryEntry", { + entryId: entries[currentIndex - 1].id, + }); + await this.waitForLoad("load", 30000); + } + return { url: await this.getUrl() }; + } + + async goForward(): Promise { + const { currentIndex, entries } = await this.session.send( + "Page.getNavigationHistory", + ); + if (currentIndex < entries.length - 1) { + await this.session.send("Page.navigateToHistoryEntry", { + entryId: entries[currentIndex + 1].id, + }); + await this.waitForLoad("load", 30000); + } + return { url: await this.getUrl() }; + } + + async getUrl(): Promise { + const result = await this.session.send("Runtime.evaluate", { + expression: "window.location.href", + returnByValue: true, + }); + this.currentUrl = result.result?.value ?? this.currentUrl; + return this.currentUrl; + } + + async getTitle(): Promise { + const result = await this.session.send("Runtime.evaluate", { + expression: "document.title", + returnByValue: true, + }); + return result.result?.value ?? ""; + } + + async getInfo(): Promise { + return { url: await this.getUrl(), title: await this.getTitle() }; + } + + async getText(selectorOrRef?: string): Promise { + return this.evaluateInPage( + selectorOrRef && selectorOrRef !== "body" + ? `(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + return node?.innerText ?? node?.textContent ?? ""; + })()` + : "document.body.innerText", + ); + } + + async getHtml(selectorOrRef?: string): Promise { + return this.evaluateInPage( + selectorOrRef + ? `(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node) return ""; + return node instanceof Element ? node.innerHTML : ""; + })()` + : "document.documentElement.outerHTML", + ); + } + + async getValue(selectorOrRef: string): Promise { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node) return ""; + return "value" in node ? String(node.value ?? "") : ""; + })()`); + } + + async getAttribute( + selectorOrRef: string, + attribute: string, + ): Promise { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!(node instanceof Element)) return null; + return node.getAttribute(${JSON.stringify(attribute)}); + })()`); + } + + async getCount(selectorOrRef: string): Promise { + const xpath = this.resolveRef(selectorOrRef); + if (xpath) return 1; + if (selectorOrRef.startsWith("xpath=")) { + return this.evaluateInPage( + `document.evaluate(${JSON.stringify( + selectorOrRef.slice("xpath=".length), + )}, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null).snapshotLength`, + ); + } + return this.evaluateInPage( + `document.querySelectorAll(${JSON.stringify(selectorOrRef)}).length`, + ); + } + + async isVisible(selectorOrRef: string): Promise { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!(node instanceof Element)) return false; + const style = getComputedStyle(node); + const rect = node.getBoundingClientRect(); + return style.visibility !== "hidden" && style.display !== "none" && Number(style.opacity || "1") !== 0 && rect.width > 0 && rect.height > 0; + })()`); + } + + async isEnabled(selectorOrRef: string): Promise { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node || !(node instanceof Element)) return false; + return !("disabled" in node && !!node.disabled) && node.getAttribute("aria-disabled") !== "true"; + })()`); + } + + async isChecked(selectorOrRef: string): Promise { + return this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node || !(node instanceof Element)) return false; + if ("checked" in node) return !!node.checked; + return node.getAttribute("aria-checked") === "true"; + })()`); + } + + async snapshot(options?: SnapshotOptions): Promise { + this.snapshotData = await captureSnapshot( + this.session, + 0, + undefined, + options, + ); + return this.snapshotData; + } + + get lastSnapshot(): Snapshot | null { + return this.snapshotData; + } + + async click( + x: number, + y: number, + options?: { button?: string; clickCount?: number }, + ): Promise { + const button = (options?.button ?? "left") as Protocol.Input.MouseButton; + const clickCount = options?.clickCount ?? 1; + + await this.session.send("Input.dispatchMouseEvent", { + type: "mouseMoved", + x, + y, + button: "none", + }); + + for (let i = 1; i <= clickCount; i++) { + await this.session.send("Input.dispatchMouseEvent", { + type: "mousePressed", + x, + y, + button, + clickCount: i, + }); + await this.session.send("Input.dispatchMouseEvent", { + type: "mouseReleased", + x, + y, + button, + clickCount: i, + }); + } + + return { clicked: true }; + } + + async clickRef(ref: string): Promise { + const xpath = this.resolveRef(ref); + if (!xpath) { + throw new Error( + `Unknown ref "${ref}" — run snapshot first to populate refs`, + ); + } + const coords = await this.getElementCenter(ref); + if (!coords) { + throw new Error( + `Could not locate element for ref "${ref}" (xpath: ${xpath})`, + ); + } + return this.click(coords.x, coords.y); + } + + async clickSelector(selector: string): Promise { + const coords = await this.getElementCenter(selector); + if (!coords) { + throw new Error(`Could not locate element for selector "${selector}"`); + } + return this.click(coords.x, coords.y); + } + + async dblclick(selectorOrRef: string): Promise { + const coords = await this.getElementCenter(selectorOrRef); + if (!coords) { + throw new Error(`Could not locate element: ${selectorOrRef}`); + } + return this.click(coords.x, coords.y, { clickCount: 2 }); + } + + async hover(x: number, y: number): Promise { + await this.session.send("Input.dispatchMouseEvent", { + type: "mouseMoved", + x, + y, + button: "none", + }); + } + + async hoverTarget(selectorOrRef: string): Promise { + const coords = await this.getElementCenter(selectorOrRef); + if (!coords) throw new Error(`Could not locate element: ${selectorOrRef}`); + await this.hover(coords.x, coords.y); + } + + async scroll( + x: number, + y: number, + deltaX: number, + deltaY: number, + ): Promise { + await this.session.send("Input.dispatchMouseEvent", { + type: "mouseWheel", + x, + y, + deltaX, + deltaY, + }); + } + + async focus(selectorOrRef: string): Promise { + await this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (node instanceof HTMLElement || node instanceof SVGElement) node.focus(); + return true; + })()`); + } + + async check(selectorOrRef: string, checked = true): Promise { + await this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!node || !(node instanceof Element)) throw new Error("Element not found"); + if ("checked" in node) { + node.checked = ${checked ? "true" : "false"}; + node.dispatchEvent(new Event("input", { bubbles: true })); + node.dispatchEvent(new Event("change", { bubbles: true })); + return true; + } + node.setAttribute("aria-checked", ${JSON.stringify(checked ? "true" : "false")}); + return true; + })()`); + } + + async select(selectorOrRef: string, values: string[]): Promise { + await this.evaluateInPage(`(() => { + const node = ${this.elementLookupExpression(selectorOrRef)}; + if (!(node instanceof HTMLSelectElement)) { + throw new Error("Target is not a { browserbaseProjectId = (e.currentTarget as HTMLInputElement).value; updateWebSettings({ browserbaseProjectId }); }} + placeholder="proj-..." + class="w-full bg-(--chat-input-bg) text-(--chat-text-primary) text-sm px-3 py-2 border border-(--chat-border) placeholder:text-(--chat-text-muted) focus:outline-none focus:border-(--chat-border-active)" + style={inputStyle} + /> + +

+ Get credentials at browserbase.com/settings +

+ + + +
+ + + +
+ {@render apiKeyField("API Key", browserUseApiKey, (v) => { browserUseApiKey = v; updateWebSettings({ browserUseApiKey }); }, "bu-api-...")} +

+ Get an API key at cloud.browser-use.com +

+
+
+ + +
{#if isConfigured} diff --git a/packages/core/src/chat/tool-call-block.svelte b/packages/core/src/chat/tool-call-block.svelte index 7e68325..dda00ea 100644 --- a/packages/core/src/chat/tool-call-block.svelte +++ b/packages/core/src/chat/tool-call-block.svelte @@ -40,7 +40,7 @@ return { codeBlocks, rest }; } - function cleanResult(raw: string): string { + function cleanResult(raw: string): { text: string; lang: string } { try { const parsed = JSON.parse(raw); if (typeof parsed === "object" && parsed !== null) { @@ -50,11 +50,11 @@ delete cleaned[key]; } } - return JSON.stringify(cleaned, null, 2); + return { text: JSON.stringify(cleaned, null, 2), lang: "json" }; } - return JSON.stringify(parsed, null, 2); + return { text: JSON.stringify(parsed, null, 2), lang: "json" }; } catch { - return raw; + return { text: raw, lang: "text" }; } } @@ -64,7 +64,7 @@ const ToolExtras = $derived(chat.adapter.ToolExtras); const split = $derived(splitArgs(part.args)); const hasRestArgs = $derived(Object.keys(split.rest).length > 0); - const resultText = $derived(part.result ? cleanResult(part.result) : undefined); + const resultData = $derived(part.result ? cleanResult(part.result) : undefined); const isStreaming = $derived( part.status === "pending" || part.status === "running", ); @@ -153,14 +153,14 @@
{/if} - {#if resultText} + {#if resultData}
{part.status === "error" ? "error" : "result"}
diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 8975f32..1f0927d 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -40,6 +40,7 @@ "@mozilla/readability": "^0.6.0", "@sinclair/typebox": "^0.34.48", "@types/turndown": "^5.0.6", + "@office-agents/browser": "workspace:*", "idb": "^8.0.3", "just-bash": "^2.7.0", "mammoth": "^1.11.0", diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 37dda5c..247369d 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -96,6 +96,12 @@ export { truncateHead, truncateTail, } from "./truncate"; +// Browser +export { + onBrowseSessionChange, + getBrowseSessionState, + type BrowseSessionEvent, +} from "@office-agents/browser"; // VFS export { type CustomCommandsResult, diff --git a/packages/sdk/src/tools/bash.ts b/packages/sdk/src/tools/bash.ts index e51f026..b6e28a7 100644 --- a/packages/sdk/src/tools/bash.ts +++ b/packages/sdk/src/tools/bash.ts @@ -6,7 +6,7 @@ import { truncateTail, } from "../truncate"; import { getBash } from "../vfs"; -import { defineTool, toolError, toolSuccess } from "./types"; +import { defineTool, toolError, toolText } from "./types"; export const bashTool = defineTool({ name: "bash", @@ -72,7 +72,7 @@ export const bashTool = defineTool({ } } - return toolSuccess({ output: outputText, exitCode: result.exitCode }); + return toolText(outputText); } catch (error) { const message = error instanceof Error diff --git a/packages/sdk/src/vfs/custom-commands.ts b/packages/sdk/src/vfs/custom-commands.ts index 1c20f72..615b544 100644 --- a/packages/sdk/src/vfs/custom-commands.ts +++ b/packages/sdk/src/vfs/custom-commands.ts @@ -6,6 +6,12 @@ import { loadWebConfig } from "../web/config"; import { fetchWeb } from "../web/fetch"; import { searchImages, searchWeb } from "../web/search"; import { parseFlags, parsePageRanges } from "./command-utils"; +import { + executeBrowseCommand, + configureBrowseCommand, + BrowserbaseProvider, + BrowserUseProvider, +} from "@office-agents/browser"; interface CommandFs { mkdir(path: string, options: { recursive: boolean }): Promise; @@ -583,6 +589,37 @@ function collect(described: DescribedCommand[]): CustomCommandsResult { }; } +function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { + const webConfig = loadWebConfig(); + + const browserUseApiKey = webConfig.apiKeys?.browserUse; + if (browserUseApiKey) { + return new BrowserUseProvider({ apiKey: browserUseApiKey }); + } + + const browserbaseApiKey = webConfig.apiKeys?.browserbase; + const browserbaseProjectId = webConfig.apiKeys?.browserbaseProjectId; + if (browserbaseApiKey && browserbaseProjectId) { + return new BrowserbaseProvider({ + apiKey: browserbaseApiKey, + projectId: browserbaseProjectId, + }); + } + + return null; +} + +const browseCmd: Command = defineCommand("browse", async (args, ctx) => { + configureBrowseCommand({ + getProvider: () => getBrowserProvider(), + writeFile: ctx + ? (path, data) => writeVfsOutput(ctx, path, data).then(() => {}) + : undefined, + }); + + return executeBrowseCommand(args); +}); + export function getSharedCustomCommands( options: SharedCustomCommandOptions = {}, ): CustomCommandsResult { @@ -593,6 +630,7 @@ export function getSharedCustomCommands( xlsxToCsv, webSearchCmd, webFetchCmd, + browseCmd, ]; if (options.includeImageSearch) { diff --git a/packages/sdk/src/web/config.ts b/packages/sdk/src/web/config.ts index 2d7a778..bbccb9f 100644 --- a/packages/sdk/src/web/config.ts +++ b/packages/sdk/src/web/config.ts @@ -6,6 +6,9 @@ export interface WebConfig { exa?: string; brave?: string; serper?: string; + browserbase?: string; + browserbaseProjectId?: string; + browserUse?: string; }; } diff --git a/packages/sdk/tests/tools-bash.test.ts b/packages/sdk/tests/tools-bash.test.ts index 7bb3de4..510c44e 100644 --- a/packages/sdk/tests/tools-bash.test.ts +++ b/packages/sdk/tests/tools-bash.test.ts @@ -24,39 +24,31 @@ describe("bashTool", () => { it("executes a simple echo command", async () => { const result = await execute("tc_1", { command: "echo hello" }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("hello"); - expect(parsed.exitCode).toBe(0); + expect(getText(result)).toBe("hello"); }); it("supports piping", async () => { const result = await execute("tc_2", { command: 'echo "banana\napple\ncherry" | sort', }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("apple\nbanana\ncherry"); + expect(getText(result)).toBe("apple\nbanana\ncherry"); }); it("captures stderr", async () => { const result = await execute("tc_3", { command: "echo error >&2", }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toContain("stderr: error"); + expect(getText(result)).toContain("stderr: error"); }); it("reports non-zero exit codes", async () => { const result = await execute("tc_4", { command: "exit 42" }); - const parsed = JSON.parse(getText(result)); - expect(parsed.exitCode).toBe(42); - expect(parsed.output).toContain("[exit code: 42]"); + expect(getText(result)).toContain("[exit code: 42]"); }); it("reports [no output] for silent commands", async () => { const result = await execute("tc_5", { command: "true" }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("[no output]"); - expect(parsed.exitCode).toBe(0); + expect(getText(result)).toBe("[no output]"); }); it("can read and write files in the VFS", async () => { @@ -66,33 +58,29 @@ describe("bashTool", () => { const result = await execute("tc_7", { command: "cat /home/user/uploads/test.txt", }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("data"); + expect(getText(result)).toBe("data"); }); it("supports command chaining with &&", async () => { const result = await execute("tc_8", { command: 'echo first && echo second', }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("first\nsecond"); + expect(getText(result)).toBe("first\nsecond"); }); it("supports variables", async () => { const result = await execute("tc_9", { command: 'X=world; echo "hello $X"', }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toBe("hello world"); + expect(getText(result)).toBe("hello world"); }); it("truncates very large output", async () => { const result = await execute("tc_10", { command: 'for i in $(seq 1 5000); do echo "line $i"; done', }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toContain("[Showing last"); - expect(parsed.output).toContain("Output truncated.]"); + expect(getText(result)).toContain("[Showing last"); + expect(getText(result)).toContain("Output truncated.]"); }); it("supports text processing commands", async () => { @@ -103,8 +91,7 @@ describe("bashTool", () => { const result = await execute("r", { command: "cat /home/user/uploads/data.csv | grep alice", }); - const parsed = JSON.parse(getText(result)); - expect(parsed.output).toContain("alice,90"); - expect(parsed.output).toContain("alice,95"); + expect(getText(result)).toContain("alice,90"); + expect(getText(result)).toContain("alice,95"); }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 59116c6..8642ea4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -48,6 +48,15 @@ importers: specifier: ^8.18.3 version: 8.19.0 + packages/browser: + devDependencies: + devtools-protocol: + specifier: ^0.0.1602427 + version: 0.0.1602427 + typescript: + specifier: ^5.8.0 + version: 5.9.3 + packages/core: dependencies: '@mariozechner/pi-agent-core': @@ -266,6 +275,9 @@ importers: '@mozilla/readability': specifier: ^0.6.0 version: 0.6.0 + '@office-agents/browser': + specifier: workspace:* + version: link:../browser '@sinclair/typebox': specifier: ^0.34.48 version: 0.34.48 @@ -2858,6 +2870,9 @@ packages: devlop@1.1.0: resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} + devtools-protocol@0.0.1602427: + resolution: {integrity: sha512-njq3w5kagNNrtv6xVsmI9b/Kvbz99GIlZ6qoDAQH8iWLWr77CuCQf9r6tnRgBmfBgeOO8meJJADXrh5zXFLh1A==} + diagnostic-channel-publishers@0.4.4: resolution: {integrity: sha512-l126t01d2ZS9EreskvEtZPrcgstuvH3rbKy82oUhUrVmBaGx4hO9wECdl3cvZbKDYjMF3QJDB5z5dL9yWAjvZQ==} peerDependencies: @@ -8032,6 +8047,8 @@ snapshots: dependencies: dequal: 2.0.3 + devtools-protocol@0.0.1602427: {} + diagnostic-channel-publishers@0.4.4(diagnostic-channel@0.3.1): dependencies: diagnostic-channel: 0.3.1 From 0dbdad7d6385860b67d29ec56acc768dd3e24170 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 22 Mar 2026 13:21:58 +0800 Subject: [PATCH 02/12] chore: fix lint ordering --- packages/sdk/src/index.ts | 12 +- packages/sdk/src/vfs/custom-commands.ts | 864 +++++++++++------------- 2 files changed, 411 insertions(+), 465 deletions(-) diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 247369d..c7c5376 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -1,5 +1,11 @@ // Runtime +// Browser +export { + type BrowseSessionEvent, + getBrowseSessionState, + onBrowseSessionChange, +} from "@office-agents/browser"; export type { ImageResizeOptions, ResizedImage } from "./image-resize"; export { resizeImage } from "./image-resize"; // Lockdown @@ -96,12 +102,6 @@ export { truncateHead, truncateTail, } from "./truncate"; -// Browser -export { - onBrowseSessionChange, - getBrowseSessionState, - type BrowseSessionEvent, -} from "@office-agents/browser"; // VFS export { type CustomCommandsResult, diff --git a/packages/sdk/src/vfs/custom-commands.ts b/packages/sdk/src/vfs/custom-commands.ts index 615b544..5d2f2c9 100644 --- a/packages/sdk/src/vfs/custom-commands.ts +++ b/packages/sdk/src/vfs/custom-commands.ts @@ -1,4 +1,10 @@ -import type { CustomCommand } from "just-bash/browser"; +import { + BrowserbaseProvider, + BrowserUseProvider, + configureBrowseCommand, + executeBrowseCommand, +} from "@office-agents/browser"; +import type { Command, CustomCommand } from "just-bash/browser"; import { defineCommand } from "just-bash/browser"; import { loadPdfDocument } from "../pdf"; import { loadSavedConfig } from "../provider-config"; @@ -6,12 +12,6 @@ import { loadWebConfig } from "../web/config"; import { fetchWeb } from "../web/fetch"; import { searchImages, searchWeb } from "../web/search"; import { parseFlags, parsePageRanges } from "./command-utils"; -import { - executeBrowseCommand, - configureBrowseCommand, - BrowserbaseProvider, - BrowserUseProvider, -} from "@office-agents/browser"; interface CommandFs { mkdir(path: string, options: { recursive: boolean }): Promise; @@ -24,12 +24,6 @@ interface CommandContext { fs: CommandFs; } -export interface DescribedCommand { - command: CustomCommand; - promptSnippet: string; - isAvailable?: () => boolean; -} - export interface SharedCustomCommandOptions { includeImageSearch?: boolean; } @@ -68,526 +62,478 @@ function getProxyUrl(): string | undefined { return config?.useProxy && config?.proxyUrl ? config.proxyUrl : undefined; } -const pdfToText: DescribedCommand = { - promptSnippet: - "- pdf-to-text — Extract text from PDF to file. Use head/grep/tail to read selectively.", - command: { - name: "pdf-to-text", - load: async () => - defineCommand("pdf-to-text", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: pdf-to-text \n file - Path to PDF file in VFS\n outfile - Output text file\n", - exitCode: 1, - }; +const pdfToText: CustomCommand = { + name: "pdf-to-text", + load: async () => + defineCommand("pdf-to-text", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: pdf-to-text \n file - Path to PDF file in VFS\n outfile - Output text file\n", + exitCode: 1, + }; + } + + const [filePath, outFile] = args; + + try { + const data = await resolveVfsPath(ctx, filePath); + const doc = await loadPdfDocument(data); + const pages: string[] = []; + + for (let i = 1; i <= doc.numPages; i++) { + const page = await doc.getPage(i); + const content = await page.getTextContent(); + const text = content.items + .filter((item) => "str" in item) + .map((item) => (item as { str: string }).str) + .join(" "); + if (text.trim()) pages.push(text); } - const [filePath, outFile] = args; + const fullText = pages.join("\n\n"); + await writeVfsOutput(ctx, outFile, fullText); - try { - const data = await resolveVfsPath(ctx, filePath); - const doc = await loadPdfDocument(data); - const pages: string[] = []; - - for (let i = 1; i <= doc.numPages; i++) { - const page = await doc.getPage(i); - const content = await page.getTextContent(); - const text = content.items - .filter((item) => "str" in item) - .map((item) => (item as { str: string }).str) - .join(" "); - if (text.trim()) pages.push(text); - } + return { + stdout: `Extracted text from ${doc.numPages} page(s) to ${outFile} (${fullText.length} chars)`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), +}; - const fullText = pages.join("\n\n"); - await writeVfsOutput(ctx, outFile, fullText); +const pdfToImages: CustomCommand = { + name: "pdf-to-images", + load: async () => + defineCommand("pdf-to-images", async (args, ctx) => { + const positional = args.filter((arg) => !arg.startsWith("--")); + const scaleArg = args.find((arg) => arg.startsWith("--scale=")); + const pagesArg = args.find((arg) => arg.startsWith("--pages=")); - return { - stdout: `Extracted text from ${doc.numPages} page(s) to ${outFile} (${fullText.length} chars)`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), - }, -}; + if (positional.length < 2) { + return { + stdout: "", + stderr: + "Usage: pdf-to-images [--scale=N] [--pages=1,3,5-8]\n file - Path to PDF file in VFS\n outdir - Output directory for PNG images\n --scale - Render scale factor (default: 2)\n --pages - Page selection (e.g. 1,3,5-8). Default: all\n", + exitCode: 1, + }; + } -const pdfToImages: DescribedCommand = { - promptSnippet: - "- pdf-to-images [--scale=N] [--pages=1,3,5-8] — Render PDF pages to PNG images. Use for scanned PDFs where text extraction won't work. Then use read to visually inspect the images.", - command: { - name: "pdf-to-images", - load: async () => - defineCommand("pdf-to-images", async (args, ctx) => { - const positional = args.filter((arg) => !arg.startsWith("--")); - const scaleArg = args.find((arg) => arg.startsWith("--scale=")); - const pagesArg = args.find((arg) => arg.startsWith("--pages=")); - - if (positional.length < 2) { - return { - stdout: "", - stderr: - "Usage: pdf-to-images [--scale=N] [--pages=1,3,5-8]\n file - Path to PDF file in VFS\n outdir - Output directory for PNG images\n --scale - Render scale factor (default: 2)\n --pages - Page selection (e.g. 1,3,5-8). Default: all\n", - exitCode: 1, - }; - } + const [filePath, outDir] = positional; + const scale = scaleArg ? Number.parseFloat(scaleArg.split("=")[1]) : 2; - const [filePath, outDir] = positional; - const scale = scaleArg ? Number.parseFloat(scaleArg.split("=")[1]) : 2; + if (Number.isNaN(scale) || scale <= 0 || scale > 5) { + return { + stdout: "", + stderr: "Scale must be between 0 and 5", + exitCode: 1, + }; + } - if (Number.isNaN(scale) || scale <= 0 || scale > 5) { + try { + const data = await resolveVfsPath(ctx, filePath); + const doc = await loadPdfDocument(data); + + const selectedPages = pagesArg + ? parsePageRanges(pagesArg.split("=")[1], doc.numPages) + : new Set(Array.from({ length: doc.numPages }, (_, i) => i + 1)); + + if (selectedPages.size === 0) { return { stdout: "", - stderr: "Scale must be between 0 and 5", + stderr: "No valid pages in selection", exitCode: 1, }; } + const resolvedDir = resolvePath(ctx.cwd, outDir); try { - const data = await resolveVfsPath(ctx, filePath); - const doc = await loadPdfDocument(data); - - const selectedPages = pagesArg - ? parsePageRanges(pagesArg.split("=")[1], doc.numPages) - : new Set(Array.from({ length: doc.numPages }, (_, i) => i + 1)); - - if (selectedPages.size === 0) { - return { - stdout: "", - stderr: "No valid pages in selection", - exitCode: 1, - }; - } - - const resolvedDir = resolvePath(ctx.cwd, outDir); - try { - await ctx.fs.mkdir(resolvedDir, { recursive: true }); - } catch { - // directory may already exist - } + await ctx.fs.mkdir(resolvedDir, { recursive: true }); + } catch { + // directory may already exist + } - const outputs: string[] = []; - const sortedPages = [...selectedPages].sort((a, b) => a - b); - - for (const pageNum of sortedPages) { - const page = await doc.getPage(pageNum); - const viewport = page.getViewport({ scale }); - - const canvas = document.createElement("canvas"); - canvas.width = Math.floor(viewport.width); - canvas.height = Math.floor(viewport.height); - const canvasCtx = canvas.getContext("2d"); - if (!canvasCtx) - throw new Error("Failed to create canvas 2D context"); - - await page.render({ canvasContext: canvasCtx, canvas, viewport }) - .promise; - - const pngData = await new Promise((resolve, reject) => { - canvas.toBlob((blob) => { - if (!blob) return reject(new Error("Canvas toBlob failed")); - blob.arrayBuffer().then((buf) => resolve(new Uint8Array(buf))); - }, "image/png"); - }); - - const pagePath = `${resolvedDir}/page-${pageNum}.png`; - await ctx.fs.writeFile(pagePath, pngData); - outputs.push( - `page-${pageNum}.png (${Math.round(pngData.length / 1024)}KB, ${canvas.width}×${canvas.height})`, - ); - - canvas.width = 0; - canvas.height = 0; - } + const outputs: string[] = []; + const sortedPages = [...selectedPages].sort((a, b) => a - b); + + for (const pageNum of sortedPages) { + const page = await doc.getPage(pageNum); + const viewport = page.getViewport({ scale }); + + const canvas = document.createElement("canvas"); + canvas.width = Math.floor(viewport.width); + canvas.height = Math.floor(viewport.height); + const canvasCtx = canvas.getContext("2d"); + if (!canvasCtx) throw new Error("Failed to create canvas 2D context"); + + await page.render({ canvasContext: canvasCtx, canvas, viewport }) + .promise; + + const pngData = await new Promise((resolve, reject) => { + canvas.toBlob((blob) => { + if (!blob) return reject(new Error("Canvas toBlob failed")); + blob.arrayBuffer().then((buf) => resolve(new Uint8Array(buf))); + }, "image/png"); + }); + + const pagePath = `${resolvedDir}/page-${pageNum}.png`; + await ctx.fs.writeFile(pagePath, pngData); + outputs.push( + `page-${pageNum}.png (${Math.round(pngData.length / 1024)}KB, ${canvas.width}×${canvas.height})`, + ); - return { - stdout: `Converted ${outputs.length} page(s) from ${doc.numPages} total to ${outDir}/:\n${outputs.map((o) => ` ${o}`).join("\n")}`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; + canvas.width = 0; + canvas.height = 0; } - }), - }, -}; -const docxToText: DescribedCommand = { - promptSnippet: - "- docx-to-text — Extract text from DOCX to file.", - command: { - name: "docx-to-text", - load: async () => - defineCommand("docx-to-text", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: docx-to-text \n file - Path to DOCX file in VFS\n outfile - Output text file\n", - exitCode: 1, - }; - } + return { + stdout: `Converted ${outputs.length} page(s) from ${doc.numPages} total to ${outDir}/:\n${outputs.map((o) => ` ${o}`).join("\n")}`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), +}; - const [filePath, outFile] = args; +const docxToText: CustomCommand = { + name: "docx-to-text", + load: async () => + defineCommand("docx-to-text", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: docx-to-text \n file - Path to DOCX file in VFS\n outfile - Output text file\n", + exitCode: 1, + }; + } - try { - const data = await resolveVfsPath(ctx, filePath); - const mammoth = await import("mammoth"); - const ab = data.buffer.slice( - data.byteOffset, - data.byteOffset + data.byteLength, - ); - const bufferCtor = ( - globalThis as typeof globalThis & { - Buffer?: { from(input: ArrayBuffer): unknown }; - } - ).Buffer; - const options: Record = { - arrayBuffer: ab, - }; - if (bufferCtor) { - options.buffer = bufferCtor.from(ab); + const [filePath, outFile] = args; + + try { + const data = await resolveVfsPath(ctx, filePath); + const mammoth = await import("mammoth"); + const ab = data.buffer.slice( + data.byteOffset, + data.byteOffset + data.byteLength, + ); + const bufferCtor = ( + globalThis as typeof globalThis & { + Buffer?: { from(input: ArrayBuffer): unknown }; } - const result = await mammoth.extractRawText( - options as unknown as Parameters[0], - ); + ).Buffer; + const options: Record = { + arrayBuffer: ab, + }; + if (bufferCtor) { + options.buffer = bufferCtor.from(ab); + } + const result = await mammoth.extractRawText( + options as unknown as Parameters[0], + ); - await writeVfsOutput(ctx, outFile, result.value); + await writeVfsOutput(ctx, outFile, result.value); - return { - stdout: `Extracted text from DOCX to ${outFile} (${result.value.length} chars)`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), - }, + return { + stdout: `Extracted text from DOCX to ${outFile} (${result.value.length} chars)`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), }; -const xlsxToCsv: DescribedCommand = { - promptSnippet: - "- xlsx-to-csv [sheet] — Convert XLSX/XLS/ODS sheet to CSV. Sheet by name or 0-based index.", - command: { - name: "xlsx-to-csv", - load: async () => - defineCommand("xlsx-to-csv", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: xlsx-to-csv [sheet]\n file - Path to XLSX/XLS/ODS file in VFS\n outfile - Output CSV file (for multiple sheets: ..csv)\n sheet - Sheet name or 0-based index (optional, exports all sheets if omitted)\n", - exitCode: 1, - }; - } - - const [filePath, outFile, sheetArg] = args; +const xlsxToCsv: CustomCommand = { + name: "xlsx-to-csv", + load: async () => + defineCommand("xlsx-to-csv", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: xlsx-to-csv [sheet]\n file - Path to XLSX/XLS/ODS file in VFS\n outfile - Output CSV file (for multiple sheets: ..csv)\n sheet - Sheet name or 0-based index (optional, exports all sheets if omitted)\n", + exitCode: 1, + }; + } - try { - const data = await resolveVfsPath(ctx, filePath); - const XLSX = await import("xlsx"); - const workbook = XLSX.read(data, { type: "array" }); - - if (sheetArg) { - let sheetName: string; - if (workbook.SheetNames.includes(sheetArg)) { - sheetName = sheetArg; + const [filePath, outFile, sheetArg] = args; + + try { + const data = await resolveVfsPath(ctx, filePath); + const XLSX = await import("xlsx"); + const workbook = XLSX.read(data, { type: "array" }); + + if (sheetArg) { + let sheetName: string; + if (workbook.SheetNames.includes(sheetArg)) { + sheetName = sheetArg; + } else { + const idx = Number.parseInt(sheetArg, 10); + if ( + !Number.isNaN(idx) && + idx >= 0 && + idx < workbook.SheetNames.length + ) { + sheetName = workbook.SheetNames[idx]; } else { - const idx = Number.parseInt(sheetArg, 10); - if ( - !Number.isNaN(idx) && - idx >= 0 && - idx < workbook.SheetNames.length - ) { - sheetName = workbook.SheetNames[idx]; - } else { - return { - stdout: "", - stderr: `Sheet not found: ${sheetArg}. Available: ${workbook.SheetNames.join(", ")}`, - exitCode: 1, - }; - } - } - - const sheet = workbook.Sheets[sheetName]; - if (!sheet) { return { stdout: "", - stderr: `Sheet "${sheetName}" not found`, + stderr: `Sheet not found: ${sheetArg}. Available: ${workbook.SheetNames.join(", ")}`, exitCode: 1, }; } - - const csv = XLSX.utils.sheet_to_csv(sheet); - await writeVfsOutput(ctx, outFile, csv); - - return { - stdout: `Converted sheet "${sheetName}" → ${outFile}`, - stderr: "", - exitCode: 0, - }; } - const names = workbook.SheetNames; - - if (names.length === 1) { - const csv = XLSX.utils.sheet_to_csv(workbook.Sheets[names[0]]); - await writeVfsOutput(ctx, outFile, csv); + const sheet = workbook.Sheets[sheetName]; + if (!sheet) { return { - stdout: `Converted sheet "${names[0]}" → ${outFile}`, - stderr: "", - exitCode: 0, + stdout: "", + stderr: `Sheet "${sheetName}" not found`, + exitCode: 1, }; } - const dotIdx = outFile.lastIndexOf("."); - const base = dotIdx > 0 ? outFile.substring(0, dotIdx) : outFile; - const ext = dotIdx > 0 ? outFile.substring(dotIdx) : ".csv"; - const outputs: string[] = []; - - for (const name of names) { - const sheet = workbook.Sheets[name]; - if (!sheet) continue; - const csv = XLSX.utils.sheet_to_csv(sheet); - const safeName = name.replace(/[/\\?*[\]]/g, "_"); - const path = `${base}.${safeName}${ext}`; - await writeVfsOutput(ctx, path, csv); - outputs.push(` "${name}" → ${path}`); - } + const csv = XLSX.utils.sheet_to_csv(sheet); + await writeVfsOutput(ctx, outFile, csv); return { - stdout: `Converted ${names.length} sheets:\n${outputs.join("\n")}`, + stdout: `Converted sheet "${sheetName}" → ${outFile}`, stderr: "", exitCode: 0, }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; } - }), - }, -}; -const webSearchCmd: DescribedCommand = { - promptSnippet: - "- web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json] — Search the web. Returns title, URL, and snippet for each result.", - command: defineCommand("web-search", async (args) => { - const { flags, positional } = parseFlags(args); - const query = positional.join(" "); + const names = workbook.SheetNames; - if (!query) { - return { - stdout: "", - stderr: - "Usage: web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json]\n query - Search query\n --max - Max results (default: 10)\n --region - Region code, e.g. us-en, uk-en (default: us-en)\n --time - Time filter: d(ay), w(eek), m(onth), y(ear)\n --page - Page number (default: 1)\n --json - Output as JSON\n", - exitCode: 1, - }; - } + if (names.length === 1) { + const csv = XLSX.utils.sheet_to_csv(workbook.Sheets[names[0]]); + await writeVfsOutput(ctx, outFile, csv); + return { + stdout: `Converted sheet "${names[0]}" → ${outFile}`, + stderr: "", + exitCode: 0, + }; + } - try { - const webConfig = loadWebConfig(); - const results = await searchWeb( - query, - { - maxResults: flags.max ? Number.parseInt(flags.max, 10) : 10, - region: flags.region, - timelimit: flags.time as "d" | "w" | "m" | "y" | undefined, - page: flags.page ? Number.parseInt(flags.page, 10) : undefined, - }, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.searchProvider, - ); - - if (results.length === 0) { - return { stdout: "No results found.", stderr: "", exitCode: 0 }; - } + const dotIdx = outFile.lastIndexOf("."); + const base = dotIdx > 0 ? outFile.substring(0, dotIdx) : outFile; + const ext = dotIdx > 0 ? outFile.substring(dotIdx) : ".csv"; + const outputs: string[] = []; + + for (const name of names) { + const sheet = workbook.Sheets[name]; + if (!sheet) continue; + const csv = XLSX.utils.sheet_to_csv(sheet); + const safeName = name.replace(/[/\\?*[\]]/g, "_"); + const path = `${base}.${safeName}${ext}`; + await writeVfsOutput(ctx, path, csv); + outputs.push(` "${name}" → ${path}`); + } - if (flags.json === "true") { return { - stdout: JSON.stringify(results, null, 2), + stdout: `Converted ${names.length} sheets:\n${outputs.join("\n")}`, stderr: "", exitCode: 0, }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; } - - const lines = results.map( - (result, index) => - `${index + 1}. ${result.title}\n ${result.href}\n ${result.body}`, - ); - return { - stdout: lines.join("\n\n"), - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), + }), }; -const webFetchCmd: DescribedCommand = { - promptSnippet: - "- web-fetch — Fetch a web page and extract its readable content to a file. Use head/grep/tail to read selectively.", - command: defineCommand("web-fetch", async (args, ctx) => { - const url = args[0]; - const outFile = args[1]; +const webSearchCmd: Command = defineCommand("web-search", async (args) => { + const { flags, positional } = parseFlags(args); + const query = positional.join(" "); + + if (!query) { + return { + stdout: "", + stderr: + "Usage: web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json]\n query - Search query\n --max - Max results (default: 10)\n --region - Region code, e.g. us-en, uk-en (default: us-en)\n --time - Time filter: d(ay), w(eek), m(onth), y(ear)\n --page - Page number (default: 1)\n --json - Output as JSON\n", + exitCode: 1, + }; + } - if (!url || !outFile) { - return { - stdout: "", - stderr: - "Usage: web-fetch \n url - URL to fetch\n outfile - Output file path\n\nFetches a URL and saves to a file.\n - HTML pages: extracts readable content (Markdown)\n - Binary files (PDF, DOCX, XLSX, etc.): downloads raw file\n - Text/JSON/XML: saves as-is\n", - exitCode: 1, - }; + try { + const webConfig = loadWebConfig(); + const results = await searchWeb( + query, + { + maxResults: flags.max ? Number.parseInt(flags.max, 10) : 10, + region: flags.region, + timelimit: flags.time as "d" | "w" | "m" | "y" | undefined, + page: flags.page ? Number.parseInt(flags.page, 10) : undefined, + }, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.searchProvider, + ); + + if (results.length === 0) { + return { stdout: "No results found.", stderr: "", exitCode: 0 }; } - try { - const webConfig = loadWebConfig(); - const result = await fetchWeb( - url, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.fetchProvider, - ); - - if (result.kind === "text") { - const header = [ - result.title ? `Title: ${result.title}` : "", - ...Object.entries(result.metadata || {}).map( - ([key, value]) => `${key}: ${value}`, - ), - ] - .filter(Boolean) - .join("\n"); - const output = header ? `${header}\n\n${result.text}` : result.text; - - await writeVfsOutput(ctx, outFile, output); - return { - stdout: `Fetched text → ${outFile} (${result.text.length} chars, ${result.contentType})`, - stderr: "", - exitCode: 0, - }; - } - - await writeVfsOutput(ctx, outFile, result.data); - - const size = - result.data.length >= 1024 - ? `${Math.round(result.data.length / 1024)}KB` - : `${result.data.length}B`; - + if (flags.json === "true") { return { - stdout: `Downloaded → ${outFile} (${size}, ${result.contentType || "unknown type"})`, + stdout: JSON.stringify(results, null, 2), stderr: "", exitCode: 0, }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; } - }), -}; -const imageSearchCmd: DescribedCommand = { - promptSnippet: - "- image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json] — Search for images. Returns image URLs, dimensions, source, and page link.", - isAvailable: () => { - const webConfig = loadWebConfig(); - return !!webConfig.apiKeys?.serper; - }, - command: defineCommand("image-search", async (args) => { - const { flags, positional } = parseFlags(args); - const query = positional.join(" "); + const lines = results.map( + (result, index) => + `${index + 1}. ${result.title}\n ${result.href}\n ${result.body}`, + ); + return { + stdout: lines.join("\n\n"), + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } +}); + +const webFetchCmd: Command = defineCommand("web-fetch", async (args, ctx) => { + const url = args[0]; + const outFile = args[1]; + + if (!url || !outFile) { + return { + stdout: "", + stderr: + "Usage: web-fetch \n url - URL to fetch\n outfile - Output file path\n\nFetches a URL and saves to a file.\n - HTML pages: extracts readable content (Markdown)\n - Binary files (PDF, DOCX, XLSX, etc.): downloads raw file\n - Text/JSON/XML: saves as-is\n", + exitCode: 1, + }; + } - if (!query) { + try { + const webConfig = loadWebConfig(); + const result = await fetchWeb( + url, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.fetchProvider, + ); + + if (result.kind === "text") { + const header = [ + result.title ? `Title: ${result.title}` : "", + ...Object.entries(result.metadata || {}).map( + ([key, value]) => `${key}: ${value}`, + ), + ] + .filter(Boolean) + .join("\n"); + const output = header ? `${header}\n\n${result.text}` : result.text; + + await writeVfsOutput(ctx, outFile, output); return { - stdout: "", - stderr: - "Usage: image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json]\n" + - " query - Image search query\n" + - " --num - Number of results (default: 10)\n" + - " --page - Page number (default: 1)\n" + - " --gl - Country code, e.g. us, uk (default: us)\n" + - " --hl - Language code, e.g. en, fr (default: en)\n" + - " --json - Output as JSON\n" + - "\nRequires a Serper API key configured in Settings > Web > API Keys.\n", - exitCode: 1, + stdout: `Fetched text → ${outFile} (${result.text.length} chars, ${result.contentType})`, + stderr: "", + exitCode: 0, }; } - try { - const webConfig = loadWebConfig(); - const results = await searchImages( - query, - { - num: flags.num ? Number.parseInt(flags.num, 10) : undefined, - page: flags.page ? Number.parseInt(flags.page, 10) : undefined, - gl: flags.gl, - hl: flags.hl, - }, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.imageSearchProvider, - ); - - if (results.length === 0) { - return { stdout: "No images found.", stderr: "", exitCode: 0 }; - } + await writeVfsOutput(ctx, outFile, result.data); + + const size = + result.data.length >= 1024 + ? `${Math.round(result.data.length / 1024)}KB` + : `${result.data.length}B`; + + return { + stdout: `Downloaded → ${outFile} (${size}, ${result.contentType || "unknown type"})`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } +}); - if (flags.json === "true") { - return { - stdout: JSON.stringify(results, null, 2), - stderr: "", - exitCode: 0, - }; - } +const imageSearchCmd: Command = defineCommand("image-search", async (args) => { + const { flags, positional } = parseFlags(args); + const query = positional.join(" "); + + if (!query) { + return { + stdout: "", + stderr: + "Usage: image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json]\n" + + " query - Image search query\n" + + " --num - Number of results (default: 10)\n" + + " --page - Page number (default: 1)\n" + + " --gl - Country code, e.g. us, uk (default: us)\n" + + " --hl - Language code, e.g. en, fr (default: en)\n" + + " --json - Output as JSON\n" + + "\nRequires a Serper API key configured in Settings > Web > API Keys.\n", + exitCode: 1, + }; + } + + try { + const webConfig = loadWebConfig(); + const results = await searchImages( + query, + { + num: flags.num ? Number.parseInt(flags.num, 10) : undefined, + page: flags.page ? Number.parseInt(flags.page, 10) : undefined, + gl: flags.gl, + hl: flags.hl, + }, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.imageSearchProvider, + ); + + if (results.length === 0) { + return { stdout: "No images found.", stderr: "", exitCode: 0 }; + } - const lines = results.map( - (result, index) => - `${index + 1}. ${result.title}\n Image: ${result.imageUrl} (${result.imageWidth}×${result.imageHeight})\n Source: ${result.source} (${result.domain})\n Page: ${result.link}`, - ); + if (flags.json === "true") { return { - stdout: lines.join("\n\n"), + stdout: JSON.stringify(results, null, 2), stderr: "", exitCode: 0, }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; } - }), -}; -export interface CustomCommandsResult { - commands: CustomCommand[]; - promptSnippets: string[]; -} - -function collect(described: DescribedCommand[]): CustomCommandsResult { - const availableSnippets = described.filter( - (d) => !d.isAvailable || d.isAvailable(), - ); - return { - commands: described.map((d) => d.command), - promptSnippets: availableSnippets.map((d) => d.promptSnippet), - }; -} + const lines = results.map( + (result, index) => + `${index + 1}. ${result.title}\n Image: ${result.imageUrl} (${result.imageWidth}×${result.imageHeight})\n Source: ${result.source} (${result.domain})\n Page: ${result.link}`, + ); + return { + stdout: lines.join("\n\n"), + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } +}); function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { const webConfig = loadWebConfig(); @@ -622,8 +568,8 @@ const browseCmd: Command = defineCommand("browse", async (args, ctx) => { export function getSharedCustomCommands( options: SharedCustomCommandOptions = {}, -): CustomCommandsResult { - const all: DescribedCommand[] = [ +): CustomCommand[] { + const commands: CustomCommand[] = [ pdfToText, pdfToImages, docxToText, @@ -634,8 +580,8 @@ export function getSharedCustomCommands( ]; if (options.includeImageSearch) { - all.push(imageSearchCmd); + commands.push(imageSearchCmd); } - return collect(all); + return commands; } From 69a8c904070591223c7e2aa64bc98d1798543c1c Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 22 Mar 2026 13:51:02 +0800 Subject: [PATCH 03/12] wip: cleanup --- packages/sdk/src/vfs/custom-commands.ts | 877 +++++++++++++----------- 1 file changed, 468 insertions(+), 409 deletions(-) diff --git a/packages/sdk/src/vfs/custom-commands.ts b/packages/sdk/src/vfs/custom-commands.ts index 5d2f2c9..9d79db6 100644 --- a/packages/sdk/src/vfs/custom-commands.ts +++ b/packages/sdk/src/vfs/custom-commands.ts @@ -4,7 +4,7 @@ import { configureBrowseCommand, executeBrowseCommand, } from "@office-agents/browser"; -import type { Command, CustomCommand } from "just-bash/browser"; +import type { CustomCommand } from "just-bash/browser"; import { defineCommand } from "just-bash/browser"; import { loadPdfDocument } from "../pdf"; import { loadSavedConfig } from "../provider-config"; @@ -24,6 +24,17 @@ interface CommandContext { fs: CommandFs; } +export interface DescribedCommand { + command: CustomCommand; + promptSnippet: string; + isAvailable?: () => boolean; +} + +export interface CustomCommandsResult { + commands: CustomCommand[]; + promptSnippets: string[]; +} + export interface SharedCustomCommandOptions { includeImageSearch?: boolean; } @@ -62,478 +73,511 @@ function getProxyUrl(): string | undefined { return config?.useProxy && config?.proxyUrl ? config.proxyUrl : undefined; } -const pdfToText: CustomCommand = { - name: "pdf-to-text", - load: async () => - defineCommand("pdf-to-text", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: pdf-to-text \n file - Path to PDF file in VFS\n outfile - Output text file\n", - exitCode: 1, - }; - } - - const [filePath, outFile] = args; - - try { - const data = await resolveVfsPath(ctx, filePath); - const doc = await loadPdfDocument(data); - const pages: string[] = []; - - for (let i = 1; i <= doc.numPages; i++) { - const page = await doc.getPage(i); - const content = await page.getTextContent(); - const text = content.items - .filter((item) => "str" in item) - .map((item) => (item as { str: string }).str) - .join(" "); - if (text.trim()) pages.push(text); +const pdfToText: DescribedCommand = { + promptSnippet: + "- pdf-to-text — Extract text from PDF to file. Use head/grep/tail to read selectively.", + command: { + name: "pdf-to-text", + load: async () => + defineCommand("pdf-to-text", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: pdf-to-text \n file - Path to PDF file in VFS\n outfile - Output text file\n", + exitCode: 1, + }; } - const fullText = pages.join("\n\n"); - await writeVfsOutput(ctx, outFile, fullText); - - return { - stdout: `Extracted text from ${doc.numPages} page(s) to ${outFile} (${fullText.length} chars)`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), -}; - -const pdfToImages: CustomCommand = { - name: "pdf-to-images", - load: async () => - defineCommand("pdf-to-images", async (args, ctx) => { - const positional = args.filter((arg) => !arg.startsWith("--")); - const scaleArg = args.find((arg) => arg.startsWith("--scale=")); - const pagesArg = args.find((arg) => arg.startsWith("--pages=")); + const [filePath, outFile] = args; - if (positional.length < 2) { - return { - stdout: "", - stderr: - "Usage: pdf-to-images [--scale=N] [--pages=1,3,5-8]\n file - Path to PDF file in VFS\n outdir - Output directory for PNG images\n --scale - Render scale factor (default: 2)\n --pages - Page selection (e.g. 1,3,5-8). Default: all\n", - exitCode: 1, - }; - } + try { + const data = await resolveVfsPath(ctx, filePath); + const doc = await loadPdfDocument(data); + const pages: string[] = []; + + for (let i = 1; i <= doc.numPages; i++) { + const page = await doc.getPage(i); + const content = await page.getTextContent(); + const text = content.items + .filter((item) => "str" in item) + .map((item) => (item as { str: string }).str) + .join(" "); + if (text.trim()) pages.push(text); + } - const [filePath, outDir] = positional; - const scale = scaleArg ? Number.parseFloat(scaleArg.split("=")[1]) : 2; + const fullText = pages.join("\n\n"); + await writeVfsOutput(ctx, outFile, fullText); - if (Number.isNaN(scale) || scale <= 0 || scale > 5) { - return { - stdout: "", - stderr: "Scale must be between 0 and 5", - exitCode: 1, - }; - } + return { + stdout: `Extracted text from ${doc.numPages} page(s) to ${outFile} (${fullText.length} chars)`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), + }, +}; - try { - const data = await resolveVfsPath(ctx, filePath); - const doc = await loadPdfDocument(data); +const pdfToImages: DescribedCommand = { + promptSnippet: + "- pdf-to-images [--scale=N] [--pages=1,3,5-8] — Render PDF pages to PNG images. Use for scanned PDFs where text extraction won't work. Then use read to visually inspect the images.", + command: { + name: "pdf-to-images", + load: async () => + defineCommand("pdf-to-images", async (args, ctx) => { + const positional = args.filter((arg) => !arg.startsWith("--")); + const scaleArg = args.find((arg) => arg.startsWith("--scale=")); + const pagesArg = args.find((arg) => arg.startsWith("--pages=")); + + if (positional.length < 2) { + return { + stdout: "", + stderr: + "Usage: pdf-to-images [--scale=N] [--pages=1,3,5-8]\n file - Path to PDF file in VFS\n outdir - Output directory for PNG images\n --scale - Render scale factor (default: 2)\n --pages - Page selection (e.g. 1,3,5-8). Default: all\n", + exitCode: 1, + }; + } - const selectedPages = pagesArg - ? parsePageRanges(pagesArg.split("=")[1], doc.numPages) - : new Set(Array.from({ length: doc.numPages }, (_, i) => i + 1)); + const [filePath, outDir] = positional; + const scale = scaleArg ? Number.parseFloat(scaleArg.split("=")[1]) : 2; - if (selectedPages.size === 0) { + if (Number.isNaN(scale) || scale <= 0 || scale > 5) { return { stdout: "", - stderr: "No valid pages in selection", + stderr: "Scale must be between 0 and 5", exitCode: 1, }; } - const resolvedDir = resolvePath(ctx.cwd, outDir); try { - await ctx.fs.mkdir(resolvedDir, { recursive: true }); - } catch { - // directory may already exist - } + const data = await resolveVfsPath(ctx, filePath); + const doc = await loadPdfDocument(data); - const outputs: string[] = []; - const sortedPages = [...selectedPages].sort((a, b) => a - b); - - for (const pageNum of sortedPages) { - const page = await doc.getPage(pageNum); - const viewport = page.getViewport({ scale }); - - const canvas = document.createElement("canvas"); - canvas.width = Math.floor(viewport.width); - canvas.height = Math.floor(viewport.height); - const canvasCtx = canvas.getContext("2d"); - if (!canvasCtx) throw new Error("Failed to create canvas 2D context"); - - await page.render({ canvasContext: canvasCtx, canvas, viewport }) - .promise; - - const pngData = await new Promise((resolve, reject) => { - canvas.toBlob((blob) => { - if (!blob) return reject(new Error("Canvas toBlob failed")); - blob.arrayBuffer().then((buf) => resolve(new Uint8Array(buf))); - }, "image/png"); - }); - - const pagePath = `${resolvedDir}/page-${pageNum}.png`; - await ctx.fs.writeFile(pagePath, pngData); - outputs.push( - `page-${pageNum}.png (${Math.round(pngData.length / 1024)}KB, ${canvas.width}×${canvas.height})`, - ); + const selectedPages = pagesArg + ? parsePageRanges(pagesArg.split("=")[1], doc.numPages) + : new Set(Array.from({ length: doc.numPages }, (_, i) => i + 1)); - canvas.width = 0; - canvas.height = 0; - } + if (selectedPages.size === 0) { + return { + stdout: "", + stderr: "No valid pages in selection", + exitCode: 1, + }; + } - return { - stdout: `Converted ${outputs.length} page(s) from ${doc.numPages} total to ${outDir}/:\n${outputs.map((o) => ` ${o}`).join("\n")}`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), + const resolvedDir = resolvePath(ctx.cwd, outDir); + try { + await ctx.fs.mkdir(resolvedDir, { recursive: true }); + } catch { + // directory may already exist + } + + const outputs: string[] = []; + const sortedPages = [...selectedPages].sort((a, b) => a - b); + + for (const pageNum of sortedPages) { + const page = await doc.getPage(pageNum); + const viewport = page.getViewport({ scale }); + + const canvas = document.createElement("canvas"); + canvas.width = Math.floor(viewport.width); + canvas.height = Math.floor(viewport.height); + const canvasCtx = canvas.getContext("2d"); + if (!canvasCtx) + throw new Error("Failed to create canvas 2D context"); + + await page.render({ canvasContext: canvasCtx, canvas, viewport }) + .promise; + + const pngData = await new Promise((resolve, reject) => { + canvas.toBlob((blob) => { + if (!blob) return reject(new Error("Canvas toBlob failed")); + blob.arrayBuffer().then((buf) => resolve(new Uint8Array(buf))); + }, "image/png"); + }); + + const pagePath = `${resolvedDir}/page-${pageNum}.png`; + await ctx.fs.writeFile(pagePath, pngData); + outputs.push( + `page-${pageNum}.png (${Math.round(pngData.length / 1024)}KB, ${canvas.width}×${canvas.height})`, + ); + + canvas.width = 0; + canvas.height = 0; + } + + return { + stdout: `Converted ${outputs.length} page(s) from ${doc.numPages} total to ${outDir}/:\n${outputs.map((o) => ` ${o}`).join("\n")}`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), + }, }; -const docxToText: CustomCommand = { - name: "docx-to-text", - load: async () => - defineCommand("docx-to-text", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: docx-to-text \n file - Path to DOCX file in VFS\n outfile - Output text file\n", - exitCode: 1, - }; - } +const docxToText: DescribedCommand = { + promptSnippet: + "- docx-to-text — Extract text from DOCX to file.", + command: { + name: "docx-to-text", + load: async () => + defineCommand("docx-to-text", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: docx-to-text \n file - Path to DOCX file in VFS\n outfile - Output text file\n", + exitCode: 1, + }; + } + + const [filePath, outFile] = args; - const [filePath, outFile] = args; - - try { - const data = await resolveVfsPath(ctx, filePath); - const mammoth = await import("mammoth"); - const ab = data.buffer.slice( - data.byteOffset, - data.byteOffset + data.byteLength, - ); - const bufferCtor = ( - globalThis as typeof globalThis & { - Buffer?: { from(input: ArrayBuffer): unknown }; + try { + const data = await resolveVfsPath(ctx, filePath); + const mammoth = await import("mammoth"); + const ab = data.buffer.slice( + data.byteOffset, + data.byteOffset + data.byteLength, + ); + const bufferCtor = ( + globalThis as typeof globalThis & { + Buffer?: { from(input: ArrayBuffer): unknown }; + } + ).Buffer; + const options: Record = { + arrayBuffer: ab, + }; + if (bufferCtor) { + options.buffer = bufferCtor.from(ab); } - ).Buffer; - const options: Record = { - arrayBuffer: ab, - }; - if (bufferCtor) { - options.buffer = bufferCtor.from(ab); - } - const result = await mammoth.extractRawText( - options as unknown as Parameters[0], - ); + const result = await mammoth.extractRawText( + options as unknown as Parameters[0], + ); - await writeVfsOutput(ctx, outFile, result.value); + await writeVfsOutput(ctx, outFile, result.value); - return { - stdout: `Extracted text from DOCX to ${outFile} (${result.value.length} chars)`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } - }), + return { + stdout: `Extracted text from DOCX to ${outFile} (${result.value.length} chars)`, + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), + }, }; -const xlsxToCsv: CustomCommand = { - name: "xlsx-to-csv", - load: async () => - defineCommand("xlsx-to-csv", async (args, ctx) => { - if (args.length < 2) { - return { - stdout: "", - stderr: - "Usage: xlsx-to-csv [sheet]\n file - Path to XLSX/XLS/ODS file in VFS\n outfile - Output CSV file (for multiple sheets: ..csv)\n sheet - Sheet name or 0-based index (optional, exports all sheets if omitted)\n", - exitCode: 1, - }; - } +const xlsxToCsv: DescribedCommand = { + promptSnippet: + "- xlsx-to-csv [sheet] — Convert XLSX/XLS/ODS sheet to CSV. Sheet by name or 0-based index.", + command: { + name: "xlsx-to-csv", + load: async () => + defineCommand("xlsx-to-csv", async (args, ctx) => { + if (args.length < 2) { + return { + stdout: "", + stderr: + "Usage: xlsx-to-csv [sheet]\n file - Path to XLSX/XLS/ODS file in VFS\n outfile - Output CSV file (for multiple sheets: ..csv)\n sheet - Sheet name or 0-based index (optional, exports all sheets if omitted)\n", + exitCode: 1, + }; + } + + const [filePath, outFile, sheetArg] = args; - const [filePath, outFile, sheetArg] = args; - - try { - const data = await resolveVfsPath(ctx, filePath); - const XLSX = await import("xlsx"); - const workbook = XLSX.read(data, { type: "array" }); - - if (sheetArg) { - let sheetName: string; - if (workbook.SheetNames.includes(sheetArg)) { - sheetName = sheetArg; - } else { - const idx = Number.parseInt(sheetArg, 10); - if ( - !Number.isNaN(idx) && - idx >= 0 && - idx < workbook.SheetNames.length - ) { - sheetName = workbook.SheetNames[idx]; + try { + const data = await resolveVfsPath(ctx, filePath); + const XLSX = await import("xlsx"); + const workbook = XLSX.read(data, { type: "array" }); + + if (sheetArg) { + let sheetName: string; + if (workbook.SheetNames.includes(sheetArg)) { + sheetName = sheetArg; } else { + const idx = Number.parseInt(sheetArg, 10); + if ( + !Number.isNaN(idx) && + idx >= 0 && + idx < workbook.SheetNames.length + ) { + sheetName = workbook.SheetNames[idx]; + } else { + return { + stdout: "", + stderr: `Sheet not found: ${sheetArg}. Available: ${workbook.SheetNames.join(", ")}`, + exitCode: 1, + }; + } + } + + const sheet = workbook.Sheets[sheetName]; + if (!sheet) { return { stdout: "", - stderr: `Sheet not found: ${sheetArg}. Available: ${workbook.SheetNames.join(", ")}`, + stderr: `Sheet "${sheetName}" not found`, exitCode: 1, }; } + + const csv = XLSX.utils.sheet_to_csv(sheet); + await writeVfsOutput(ctx, outFile, csv); + + return { + stdout: `Converted sheet "${sheetName}" → ${outFile}`, + stderr: "", + exitCode: 0, + }; } - const sheet = workbook.Sheets[sheetName]; - if (!sheet) { + const names = workbook.SheetNames; + + if (names.length === 1) { + const csv = XLSX.utils.sheet_to_csv(workbook.Sheets[names[0]]); + await writeVfsOutput(ctx, outFile, csv); return { - stdout: "", - stderr: `Sheet "${sheetName}" not found`, - exitCode: 1, + stdout: `Converted sheet "${names[0]}" → ${outFile}`, + stderr: "", + exitCode: 0, }; } - const csv = XLSX.utils.sheet_to_csv(sheet); - await writeVfsOutput(ctx, outFile, csv); + const dotIdx = outFile.lastIndexOf("."); + const base = dotIdx > 0 ? outFile.substring(0, dotIdx) : outFile; + const ext = dotIdx > 0 ? outFile.substring(dotIdx) : ".csv"; + const outputs: string[] = []; + + for (const name of names) { + const sheet = workbook.Sheets[name]; + if (!sheet) continue; + const csv = XLSX.utils.sheet_to_csv(sheet); + const safeName = name.replace(/[/\\?*[\]]/g, "_"); + const path = `${base}.${safeName}${ext}`; + await writeVfsOutput(ctx, path, csv); + outputs.push(` "${name}" → ${path}`); + } return { - stdout: `Converted sheet "${sheetName}" → ${outFile}`, + stdout: `Converted ${names.length} sheets:\n${outputs.join("\n")}`, stderr: "", exitCode: 0, }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; } + }), + }, +}; - const names = workbook.SheetNames; +const webSearchCmd: DescribedCommand = { + promptSnippet: + "- web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json] — Search the web. Returns title, URL, and snippet for each result.", + command: defineCommand("web-search", async (args) => { + const { flags, positional } = parseFlags(args); + const query = positional.join(" "); - if (names.length === 1) { - const csv = XLSX.utils.sheet_to_csv(workbook.Sheets[names[0]]); - await writeVfsOutput(ctx, outFile, csv); - return { - stdout: `Converted sheet "${names[0]}" → ${outFile}`, - stderr: "", - exitCode: 0, - }; - } + if (!query) { + return { + stdout: "", + stderr: + "Usage: web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json]\n query - Search query\n --max - Max results (default: 10)\n --region - Region code, e.g. us-en, uk-en (default: us-en)\n --time - Time filter: d(ay), w(eek), m(onth), y(ear)\n --page - Page number (default: 1)\n --json - Output as JSON\n", + exitCode: 1, + }; + } - const dotIdx = outFile.lastIndexOf("."); - const base = dotIdx > 0 ? outFile.substring(0, dotIdx) : outFile; - const ext = dotIdx > 0 ? outFile.substring(dotIdx) : ".csv"; - const outputs: string[] = []; - - for (const name of names) { - const sheet = workbook.Sheets[name]; - if (!sheet) continue; - const csv = XLSX.utils.sheet_to_csv(sheet); - const safeName = name.replace(/[/\\?*[\]]/g, "_"); - const path = `${base}.${safeName}${ext}`; - await writeVfsOutput(ctx, path, csv); - outputs.push(` "${name}" → ${path}`); - } + try { + const webConfig = loadWebConfig(); + const results = await searchWeb( + query, + { + maxResults: flags.max ? Number.parseInt(flags.max, 10) : 10, + region: flags.region, + timelimit: flags.time as "d" | "w" | "m" | "y" | undefined, + page: flags.page ? Number.parseInt(flags.page, 10) : undefined, + }, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.searchProvider, + ); + + if (results.length === 0) { + return { stdout: "No results found.", stderr: "", exitCode: 0 }; + } + if (flags.json === "true") { return { - stdout: `Converted ${names.length} sheets:\n${outputs.join("\n")}`, + stdout: JSON.stringify(results, null, 2), stderr: "", exitCode: 0, }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; } - }), + + const lines = results.map( + (result, index) => + `${index + 1}. ${result.title}\n ${result.href}\n ${result.body}`, + ); + return { + stdout: lines.join("\n\n"), + stderr: "", + exitCode: 0, + }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; + } + }), }; -const webSearchCmd: Command = defineCommand("web-search", async (args) => { - const { flags, positional } = parseFlags(args); - const query = positional.join(" "); - - if (!query) { - return { - stdout: "", - stderr: - "Usage: web-search [--max=N] [--region=REGION] [--time=d|w|m|y] [--page=N] [--json]\n query - Search query\n --max - Max results (default: 10)\n --region - Region code, e.g. us-en, uk-en (default: us-en)\n --time - Time filter: d(ay), w(eek), m(onth), y(ear)\n --page - Page number (default: 1)\n --json - Output as JSON\n", - exitCode: 1, - }; - } +const webFetchCmd: DescribedCommand = { + promptSnippet: + "- web-fetch — Fetch a web page and extract its readable content to a file. Use head/grep/tail to read selectively.", + command: defineCommand("web-fetch", async (args, ctx) => { + const url = args[0]; + const outFile = args[1]; - try { - const webConfig = loadWebConfig(); - const results = await searchWeb( - query, - { - maxResults: flags.max ? Number.parseInt(flags.max, 10) : 10, - region: flags.region, - timelimit: flags.time as "d" | "w" | "m" | "y" | undefined, - page: flags.page ? Number.parseInt(flags.page, 10) : undefined, - }, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.searchProvider, - ); - - if (results.length === 0) { - return { stdout: "No results found.", stderr: "", exitCode: 0 }; + if (!url || !outFile) { + return { + stdout: "", + stderr: + "Usage: web-fetch \n url - URL to fetch\n outfile - Output file path\n\nFetches a URL and saves to a file.\n - HTML pages: extracts readable content (Markdown)\n - Binary files (PDF, DOCX, XLSX, etc.): downloads raw file\n - Text/JSON/XML: saves as-is\n", + exitCode: 1, + }; } - if (flags.json === "true") { + try { + const webConfig = loadWebConfig(); + const result = await fetchWeb( + url, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.fetchProvider, + ); + + if (result.kind === "text") { + const header = [ + result.title ? `Title: ${result.title}` : "", + ...Object.entries(result.metadata || {}).map( + ([key, value]) => `${key}: ${value}`, + ), + ] + .filter(Boolean) + .join("\n"); + const output = header ? `${header}\n\n${result.text}` : result.text; + + await writeVfsOutput(ctx, outFile, output); + return { + stdout: `Fetched text → ${outFile} (${result.text.length} chars, ${result.contentType})`, + stderr: "", + exitCode: 0, + }; + } + + await writeVfsOutput(ctx, outFile, result.data); + + const size = + result.data.length >= 1024 + ? `${Math.round(result.data.length / 1024)}KB` + : `${result.data.length}B`; + return { - stdout: JSON.stringify(results, null, 2), + stdout: `Downloaded → ${outFile} (${size}, ${result.contentType || "unknown type"})`, stderr: "", exitCode: 0, }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; } + }), +}; - const lines = results.map( - (result, index) => - `${index + 1}. ${result.title}\n ${result.href}\n ${result.body}`, - ); - return { - stdout: lines.join("\n\n"), - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } -}); - -const webFetchCmd: Command = defineCommand("web-fetch", async (args, ctx) => { - const url = args[0]; - const outFile = args[1]; - - if (!url || !outFile) { - return { - stdout: "", - stderr: - "Usage: web-fetch \n url - URL to fetch\n outfile - Output file path\n\nFetches a URL and saves to a file.\n - HTML pages: extracts readable content (Markdown)\n - Binary files (PDF, DOCX, XLSX, etc.): downloads raw file\n - Text/JSON/XML: saves as-is\n", - exitCode: 1, - }; - } - - try { +const imageSearchCmd: DescribedCommand = { + promptSnippet: + "- image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json] — Search for images. Returns image URLs, dimensions, source, and page link.", + isAvailable: () => { const webConfig = loadWebConfig(); - const result = await fetchWeb( - url, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.fetchProvider, - ); - - if (result.kind === "text") { - const header = [ - result.title ? `Title: ${result.title}` : "", - ...Object.entries(result.metadata || {}).map( - ([key, value]) => `${key}: ${value}`, - ), - ] - .filter(Boolean) - .join("\n"); - const output = header ? `${header}\n\n${result.text}` : result.text; - - await writeVfsOutput(ctx, outFile, output); + return !!webConfig.apiKeys?.serper; + }, + command: defineCommand("image-search", async (args) => { + const { flags, positional } = parseFlags(args); + const query = positional.join(" "); + + if (!query) { return { - stdout: `Fetched text → ${outFile} (${result.text.length} chars, ${result.contentType})`, - stderr: "", - exitCode: 0, + stdout: "", + stderr: + "Usage: image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json]\n" + + " query - Image search query\n" + + " --num - Number of results (default: 10)\n" + + " --page - Page number (default: 1)\n" + + " --gl - Country code, e.g. us, uk (default: us)\n" + + " --hl - Language code, e.g. en, fr (default: en)\n" + + " --json - Output as JSON\n" + + "\nRequires a Serper API key configured in Settings > Web > API Keys.\n", + exitCode: 1, }; } - await writeVfsOutput(ctx, outFile, result.data); - - const size = - result.data.length >= 1024 - ? `${Math.round(result.data.length / 1024)}KB` - : `${result.data.length}B`; - - return { - stdout: `Downloaded → ${outFile} (${size}, ${result.contentType || "unknown type"})`, - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } -}); - -const imageSearchCmd: Command = defineCommand("image-search", async (args) => { - const { flags, positional } = parseFlags(args); - const query = positional.join(" "); - - if (!query) { - return { - stdout: "", - stderr: - "Usage: image-search [--num=N] [--page=N] [--gl=COUNTRY] [--hl=LANG] [--json]\n" + - " query - Image search query\n" + - " --num - Number of results (default: 10)\n" + - " --page - Page number (default: 1)\n" + - " --gl - Country code, e.g. us, uk (default: us)\n" + - " --hl - Language code, e.g. en, fr (default: en)\n" + - " --json - Output as JSON\n" + - "\nRequires a Serper API key configured in Settings > Web > API Keys.\n", - exitCode: 1, - }; - } + try { + const webConfig = loadWebConfig(); + const results = await searchImages( + query, + { + num: flags.num ? Number.parseInt(flags.num, 10) : undefined, + page: flags.page ? Number.parseInt(flags.page, 10) : undefined, + gl: flags.gl, + hl: flags.hl, + }, + { + proxyUrl: getProxyUrl(), + apiKeys: webConfig.apiKeys, + }, + webConfig.imageSearchProvider, + ); + + if (results.length === 0) { + return { stdout: "No images found.", stderr: "", exitCode: 0 }; + } - try { - const webConfig = loadWebConfig(); - const results = await searchImages( - query, - { - num: flags.num ? Number.parseInt(flags.num, 10) : undefined, - page: flags.page ? Number.parseInt(flags.page, 10) : undefined, - gl: flags.gl, - hl: flags.hl, - }, - { - proxyUrl: getProxyUrl(), - apiKeys: webConfig.apiKeys, - }, - webConfig.imageSearchProvider, - ); - - if (results.length === 0) { - return { stdout: "No images found.", stderr: "", exitCode: 0 }; - } + if (flags.json === "true") { + return { + stdout: JSON.stringify(results, null, 2), + stderr: "", + exitCode: 0, + }; + } - if (flags.json === "true") { + const lines = results.map( + (result, index) => + `${index + 1}. ${result.title}\n Image: ${result.imageUrl} (${result.imageWidth}×${result.imageHeight})\n Source: ${result.source} (${result.domain})\n Page: ${result.link}`, + ); return { - stdout: JSON.stringify(results, null, 2), + stdout: lines.join("\n\n"), stderr: "", exitCode: 0, }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + return { stdout: "", stderr: msg, exitCode: 1 }; } - - const lines = results.map( - (result, index) => - `${index + 1}. ${result.title}\n Image: ${result.imageUrl} (${result.imageWidth}×${result.imageHeight})\n Source: ${result.source} (${result.domain})\n Page: ${result.link}`, - ); - return { - stdout: lines.join("\n\n"), - stderr: "", - exitCode: 0, - }; - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - return { stdout: "", stderr: msg, exitCode: 1 }; - } -}); + }), +}; function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { const webConfig = loadWebConfig(); @@ -555,21 +599,36 @@ function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { return null; } -const browseCmd: Command = defineCommand("browse", async (args, ctx) => { - configureBrowseCommand({ - getProvider: () => getBrowserProvider(), - writeFile: ctx - ? (path, data) => writeVfsOutput(ctx, path, data).then(() => {}) - : undefined, - }); +const browseCmd: DescribedCommand = { + promptSnippet: + "- browse [options] — Open a cloud browser session to interact with a web page. Supports navigation, screenshots, clicks, form filling, and data extraction.", + isAvailable: () => getBrowserProvider() !== null, + command: defineCommand("browse", async (args, ctx) => { + configureBrowseCommand({ + getProvider: () => getBrowserProvider(), + writeFile: ctx + ? (path, data) => writeVfsOutput(ctx, path, data).then(() => {}) + : undefined, + }); - return executeBrowseCommand(args); -}); + return executeBrowseCommand(args); + }), +}; + +function collect(described: DescribedCommand[]): CustomCommandsResult { + const availableSnippets = described.filter( + (d) => !d.isAvailable || d.isAvailable(), + ); + return { + commands: described.map((d) => d.command), + promptSnippets: availableSnippets.map((d) => d.promptSnippet), + }; +} export function getSharedCustomCommands( options: SharedCustomCommandOptions = {}, -): CustomCommand[] { - const commands: CustomCommand[] = [ +): CustomCommandsResult { + const all: DescribedCommand[] = [ pdfToText, pdfToImages, docxToText, @@ -580,8 +639,8 @@ export function getSharedCustomCommands( ]; if (options.includeImageSearch) { - commands.push(imageSearchCmd); + all.push(imageSearchCmd); } - return commands; + return collect(all); } From 092891b15065b3b6ff5ea5906ee39b0a7040613d Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 22 Mar 2026 14:16:26 +0800 Subject: [PATCH 04/12] chore: tw --- packages/core/src/chat/browser-viewer.svelte | 135 ++++--------------- 1 file changed, 29 insertions(+), 106 deletions(-) diff --git a/packages/core/src/chat/browser-viewer.svelte b/packages/core/src/chat/browser-viewer.svelte index a86bb29..d5e03bd 100644 --- a/packages/core/src/chat/browser-viewer.svelte +++ b/packages/core/src/chat/browser-viewer.svelte @@ -36,31 +36,46 @@ {#if visible && hasLiveUrl} -
-
-
+
+
+
Live Browser
-
- -
{#if expanded} -
+
{/if} @@ -68,106 +83,14 @@ {/if} {#if hasLiveUrl && !visible} - {/if} - - From 5e30f959e94642a04998d2d5a037536a70d288d3 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 22 Mar 2026 15:30:55 +0800 Subject: [PATCH 05/12] chore: add stop browser button --- packages/browser/README.md | 97 +++-- packages/browser/src/browser.ts | 18 +- packages/browser/src/cdp.ts | 402 ++++++++++++++---- packages/browser/src/command.ts | 44 +- packages/browser/src/index.ts | 10 +- packages/browser/src/page.ts | 4 + packages/browser/src/providers/browser-use.ts | 4 +- packages/browser/src/providers/browserbase.ts | 3 + packages/core/src/chat/browser-viewer.svelte | 29 +- packages/core/src/chat/chat-interface.svelte | 2 +- packages/sdk/src/index.ts | 2 + packages/sdk/src/runtime.ts | 2 + 12 files changed, 486 insertions(+), 131 deletions(-) diff --git a/packages/browser/README.md b/packages/browser/README.md index 9987f0e..741a8b0 100644 --- a/packages/browser/README.md +++ b/packages/browser/README.md @@ -8,21 +8,21 @@ The core primitive is a **CDP WebSocket URL**. Any provider that gives you one w ``` Your browser (Office taskpane, web app, etc.) - → WebSocket to wss://connect.browserbase.com/?signingKey=... + → WebSocket to wss:// → CDP JSON-RPC messages - → Cloud Chrome instance + → Cloud browser instance ``` ## Providers The `BrowserProvider` interface abstracts session creation. The CDP URL is the universal handoff point — any cloud browser provider that exposes CDP works. -| Provider | Status | Notes | -|----------|--------|-------| -| [Browserbase](https://browserbase.com) | ✅ Built-in | Anti-bot stealth, CAPTCHA solving, residential proxies | -| [Browser Use](https://browser-use.com) | ✅ Built-in | Cloud browser with proxy support, session profiles | -| Any CDP URL | ✅ `Browser.connect()` | Direct WebSocket connection | -| Custom | ✅ Implement `BrowserProvider` | Just return a `cdpUrl` from `createSession()` | +| Provider | Status | +|----------|--------| +| [Browserbase](https://browserbase.com) | ✅ Built-in | +| [Browser Use](https://browser-use.com) | ✅ Built-in | +| Any CDP URL | ✅ `Browser.connect()` | +| Custom | ✅ Implement `BrowserProvider` | ## Usage @@ -41,16 +41,19 @@ const browser = await Browser.launch({ provider }); // Navigate await browser.page.goto("https://example.com"); -// Get accessibility tree with element refs +// Get the simplified page snapshot with stable element refs const snapshot = await browser.page.snapshot(); console.log(snapshot.tree); -// [0-1] document: Example Domain -// [0-5] heading: Example Domain -// [0-8] paragraph: This domain is for use in... -// [0-12] link: More information... +// Example output from https://example.com on 2026-03-22: +// - div +// - heading "Example Domain" [ref=e1] +// - paragraph +// - StaticText "This domain is for use in documentation examples without needing permission. Avoid use in operations." +// - paragraph +// - link "Learn more" [ref=e2] // Click by ref from snapshot -await browser.page.clickRef("0-12"); +await browser.page.clickRef("e2"); // Screenshot const { base64 } = await browser.page.screenshot(); @@ -78,9 +81,14 @@ const provider = new BrowserUseProvider({ apiKey: "bu-api-...", }); -const browser = await Browser.launch({ provider }); +const browser = await Browser.launch({ + provider, + cdpOptions: { requestTimeoutMs: 10_000 }, +}); + await browser.page.goto("https://example.com"); -// ... same API as Browserbase +const snapshot = await browser.page.snapshot(); +console.log(snapshot.tree); await browser.close(); ``` @@ -102,18 +110,31 @@ await browser.close(); ```typescript import { CdpClient } from "@office-agents/browser"; -const cdp = await CdpClient.connect("wss://..."); +const cdp = await CdpClient.connect("wss://...", { + requestTimeoutMs: 10_000, +}); + +// Root/browser-scoped commands work directly on the client +const { targetInfos } = await cdp.api.Target.getTargets(); +console.log(targetInfos.length); + +// Attach to a page target for Page.* / Runtime.* domains +const page = await cdp.attachToFirstPage(); +const session = page.cdpSession!; -// Send any CDP command -await cdp.send("Page.navigate", { url: "https://example.com" }); +await session.api.Page.navigate({ url: "https://example.com" }); -// Listen for events -cdp.on("Page.loadEventFired", (params) => { +session.api.Page.on("loadEventFired", () => { console.log("Page loaded"); }); -// Screenshot -const { data } = await cdp.send("Page.captureScreenshot", { format: "png" }); +const evalResult = await session.api.Runtime.evaluate({ + expression: "document.title", + returnByValue: true, +}); +console.log(evalResult.result?.value); + +const { data } = await session.api.Page.captureScreenshot({ format: "png" }); await cdp.close(); ``` @@ -122,11 +143,19 @@ await cdp.close(); ### Browser -- `Browser.launch({ provider, sessionOptions? })` — Create a cloud browser session via provider -- `Browser.connect({ cdpUrl })` — Connect directly to any CDP WebSocket URL +- `Browser.launch({ provider, sessionOptions?, cdpOptions? })` — Create a cloud browser session via provider and auto-attach to the first page target +- `Browser.connect({ cdpUrl, cdpOptions? })` — Connect directly to any CDP WebSocket URL and auto-attach to the first page target - `browser.page` — The active `Page` instance - `browser.close()` — Close browser and release session +### CdpClient + +- `CdpClient.connect(wsUrl, { requestTimeoutMs? })` — Connect to a CDP WebSocket +- `cdp.send("Target.getTargets")` — Send typed root/browser-scoped CDP commands using method strings from `devtools-protocol` +- `cdp.api.Target.getTargets()` — Generated domain proxy API typed from `devtools-protocol/types/protocol-proxy-api` +- `cdp.attachToFirstPage()` / `cdp.attachToTarget(targetId)` — Ergonomic helpers that attach to a page target and return a `Page` +- `cdp.releaseSession(sessionId)` — Drop a detached target session from the local session cache + ### Page **Navigation:** @@ -135,13 +164,14 @@ await cdp.close(); - `page.goBack()` / `page.goForward()` — History navigation **State:** -- `page.snapshot()` — Accessibility tree with element refs (preferred for agents) +- `page.snapshot()` — Simplified accessibility/DOM snapshot with refs like `e1`, `e2` - `page.screenshot({ fullPage?, format?, quality? })` — Visual screenshot as base64 - `page.getUrl()` / `page.getTitle()` / `page.getInfo()` - `page.getText(selector?)` / `page.getHtml(selector?)` **Interaction:** -- `page.clickRef(ref)` — Click element by ref from snapshot (e.g. `"0-5"`, `"@0-5"`) +- `page.clickRef(ref)` — Click element by ref from snapshot +- `page.cdpSession` — The attached target session when the page came from `Browser` or `cdp.attachTo...()` - `page.click(x, y, { button?, clickCount? })` — Click at coordinates - `page.type(text, { delay? })` — Type text - `page.pressKey(key)` — Press key or combo (`"Enter"`, `"Cmd+A"`, `"Ctrl+C"`) @@ -175,7 +205,18 @@ interface BrowserSession { ## How it works -This package is a direct port of the command set from [`@browserbasehq/browse-cli`](https://github.com/browserbase/stagehand/tree/main/packages/cli), rewritten to use **browser-native `WebSocket`** instead of the Node.js `ws` library. +The high-level `browse` command shape and CLI ergonomics are adapted from [`vercel-labs/agent-browser`](https://github.com/vercel-labs/agent-browser), while the implementation here is rewritten around direct CDP calls and **browser-native `WebSocket`** transport instead of a Node-specific client. + +A quick live sanity check on 2026-03-22 against Browser Use + `https://example.com` produced this snapshot: + +```text +- div + - heading "Example Domain" [ref=e1] + - paragraph + - StaticText "This domain is for use in documentation examples without needing permission. Avoid use in operations." + - paragraph + - link "Learn more" [ref=e2] +``` Every command maps to CDP protocol calls: diff --git a/packages/browser/src/browser.ts b/packages/browser/src/browser.ts index c32458a..b7ac1b6 100644 --- a/packages/browser/src/browser.ts +++ b/packages/browser/src/browser.ts @@ -1,5 +1,4 @@ -import type { Protocol } from "devtools-protocol/types/protocol.js"; -import { CdpClient } from "./cdp.js"; +import { CdpClient, type CdpClientOptions } from "./cdp.js"; import { Page } from "./page.js"; import type { BrowserProvider, @@ -10,10 +9,12 @@ import type { export interface BrowserOptions { provider: BrowserProvider; sessionOptions?: CreateSessionOptions; + cdpOptions?: CdpClientOptions; } export interface ConnectOptions { cdpUrl: string; + cdpOptions?: CdpClientOptions; } export interface BrowserTab { @@ -40,7 +41,10 @@ export class Browser { options.sessionOptions, ); try { - browser.cdp = await CdpClient.connect(browser.session.cdpUrl); + browser.cdp = await CdpClient.connect( + browser.session.cdpUrl, + options.cdpOptions, + ); browser._page = await Page.attachToFirstPage(browser.cdp); browser.currentTargetId = browser._page.targetId ?? null; } catch (err) { @@ -52,7 +56,7 @@ export class Browser { static async connect(options: ConnectOptions): Promise { const browser = new Browser(); - browser.cdp = await CdpClient.connect(options.cdpUrl); + browser.cdp = await CdpClient.connect(options.cdpUrl, options.cdpOptions); browser._page = await Page.attachToFirstPage(browser.cdp); browser.currentTargetId = browser._page.targetId ?? null; return browser; @@ -81,12 +85,14 @@ export class Browser { } private async attachToTarget(targetId: string): Promise { - if (this._page?.sessionId) { + const previousSessionId = this._page?.sessionId; + if (previousSessionId) { await this.cdpClient .send("Target.detachFromTarget", { - sessionId: this._page.sessionId, + sessionId: previousSessionId, }) .catch(() => {}); + this.cdpClient.releaseSession(previousSessionId, "detached by client"); } const page = await Page.attachToTarget(this.cdpClient, targetId); this._page = page; diff --git a/packages/browser/src/cdp.ts b/packages/browser/src/cdp.ts index 493b9e5..39f5b29 100644 --- a/packages/browser/src/cdp.ts +++ b/packages/browser/src/cdp.ts @@ -1,7 +1,14 @@ import type { ProtocolMapping } from "devtools-protocol/types/protocol-mapping.js"; +import type { ProtocolProxyApi } from "devtools-protocol/types/protocol-proxy-api.js"; +import { Page } from "./page.js"; export type CdpCommands = ProtocolMapping.Commands; export type CdpEvents = ProtocolMapping.Events; +export type CdpProtocolApi = ProtocolProxyApi.ProtocolApi; + +export interface CdpClientOptions { + requestTimeoutMs?: number; +} type CommandMethod = keyof CdpCommands; type EventMethod = keyof CdpEvents; @@ -30,13 +37,13 @@ export type CdpEventHandler = ( interface CdpResponseMessage { id: number; sessionId?: string; - result?: Record; + result?: unknown; error?: { code: number; message: string; data?: string }; } interface CdpEventMessage { method: string; - params?: Record; + params?: unknown; sessionId?: string; } @@ -46,6 +53,156 @@ interface InflightRequest { method: string; } +interface CdpCommandTarget { + send( + method: M, + ...args: SendArgs + ): Promise>; + on(event: E, handler: CdpEventHandler): void; + off(event: E, handler: CdpEventHandler): void; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function normalizeError(error: unknown, prefix: string): Error { + if (error instanceof Error) { + return new Error(`${prefix}: ${error.message}`); + } + return new Error(`${prefix}: ${String(error)}`); +} + +function formatCdpError( + method: string, + error: { code: number; message: string; data?: string }, +): Error { + const suffix = error.data ? ` (${error.data})` : ""; + return new Error( + `CDP ${method} failed (${error.code}): ${error.message}${suffix}`, + ); +} + +function sendDynamic( + target: CdpCommandTarget, + method: string, + params?: unknown, +): Promise { + if (params === undefined) { + return target.send(method as CommandMethod) as Promise; + } + return target.send( + method as CommandMethod, + params as SendArgs[0], + ) as Promise; +} + +function createProtocolApi(target: CdpCommandTarget): CdpProtocolApi { + const domainCache = new Map(); + + return new Proxy({} as CdpProtocolApi, { + get(_protocol, domainName) { + if (typeof domainName !== "string") return undefined; + + let domain = domainCache.get(domainName); + if (!domain) { + domain = new Proxy( + {}, + { + get(_domain, memberName) { + if (typeof memberName !== "string") return undefined; + if (memberName === "on") { + return (event: string, listener: (params: unknown) => void) => { + target.on( + `${domainName}.${event}` as EventMethod, + listener as CdpEventHandler, + ); + }; + } + if (memberName === "off") { + return (event: string, listener: (params: unknown) => void) => { + target.off( + `${domainName}.${event}` as EventMethod, + listener as CdpEventHandler, + ); + }; + } + return (params?: unknown) => + sendDynamic(target, `${domainName}.${memberName}`, params); + }, + }, + ); + domainCache.set(domainName, domain); + } + + return domain; + }, + }); +} + +function sendRequest(options: { + id: number; + method: M; + params: SendArgs[0] | undefined; + sessionId?: string; + store: Map; + timeoutMs?: number; + sendRaw: (payload: Record) => void; +}): Promise> { + const payload: Record = { + id: options.id, + method: options.method, + }; + if (options.params !== undefined) payload.params = options.params; + if (options.sessionId) payload.sessionId = options.sessionId; + + return new Promise((resolve, reject) => { + let timeoutHandle: ReturnType | undefined; + + const cleanup = () => { + if (timeoutHandle !== undefined) { + clearTimeout(timeoutHandle); + timeoutHandle = undefined; + } + }; + + const inflight: InflightRequest = { + method: options.method, + resolve: (result) => { + cleanup(); + resolve(result as CommandReturn); + }, + reject: (error) => { + cleanup(); + reject(error); + }, + }; + + options.store.set(options.id, inflight); + + if (options.timeoutMs && options.timeoutMs > 0) { + timeoutHandle = setTimeout(() => { + if (options.store.get(options.id) !== inflight) return; + options.store.delete(options.id); + inflight.reject( + new Error( + `CDP ${options.method} timed out after ${options.timeoutMs}ms`, + ), + ); + }, options.timeoutMs); + } + + try { + options.sendRaw(payload); + } catch (error) { + options.store.delete(options.id); + inflight.reject( + normalizeError(error, `Failed to send CDP ${options.method}`), + ); + } + }); +} + export class CdpClient { private ws: WebSocket; private nextId = 1; @@ -53,9 +210,15 @@ export class CdpClient { private eventHandlers = new Map>>(); private sessions = new Map(); private closeHandlers = new Set<(reason: string) => void>(); + private requestTimeoutMs?: number; + + readonly api: CdpProtocolApi; - private constructor(ws: WebSocket) { + private constructor(ws: WebSocket, options?: CdpClientOptions) { this.ws = ws; + this.requestTimeoutMs = options?.requestTimeoutMs; + this.api = createProtocolApi(this); + ws.onmessage = (event) => this.onMessage(event); ws.onclose = (event) => { const reason = `close code=${event.code} reason=${event.reason || ""}`; @@ -71,12 +234,16 @@ export class CdpClient { }; } - static connect(wsUrl: string): Promise { + static connect( + wsUrl: string, + options?: CdpClientOptions, + ): Promise { return new Promise((resolve, reject) => { const ws = new WebSocket(wsUrl); - ws.onopen = () => resolve(new CdpClient(ws)); - ws.onerror = (e) => - reject(new Error(`CDP WebSocket connection failed: ${e}`)); + ws.onopen = () => resolve(new CdpClient(ws, options)); + ws.onerror = (event) => { + reject(normalizeError(event, "CDP WebSocket connection failed")); + }; }); } @@ -85,17 +252,13 @@ export class CdpClient { ...args: SendArgs ): Promise> { const id = this.nextId++; - const params = args[0]; - const payload: Record = { id, method }; - if (params !== undefined) payload.params = params; - - return new Promise((resolve, reject) => { - this.inflight.set(id, { - resolve: resolve as (v: unknown) => void, - reject, - method, - }); - this.ws.send(JSON.stringify(payload)); + return sendRequest({ + id, + method, + params: args[0], + store: this.inflight, + timeoutMs: this.requestTimeoutMs, + sendRaw: (payload) => this.sendRaw(payload), }); } @@ -106,7 +269,12 @@ export class CdpClient { } off(event: E, handler: CdpEventHandler): void { - this.eventHandlers.get(event)?.delete(handler as CdpEventHandler); + const set = this.eventHandlers.get(event); + if (!set) return; + set.delete(handler as CdpEventHandler); + if (set.size === 0) { + this.eventHandlers.delete(event); + } } onClose(handler: (reason: string) => void): void { @@ -118,15 +286,41 @@ export class CdpClient { } session(sessionId: string): CdpSession { - let s = this.sessions.get(sessionId); - if (!s) { - s = new CdpSession(this, sessionId); - this.sessions.set(sessionId, s); + let session = this.sessions.get(sessionId); + if (!session) { + session = new CdpSession(this, sessionId); + this.sessions.set(sessionId, session); } - return s; + return session; + } + + async attachToFirstPage(): Promise { + return Page.attachToFirstPage(this); + } + + async attachToTarget(targetId: string): Promise { + return Page.attachToTarget(this, targetId); + } + + releaseSession(sessionId: string, reason = "session released"): void { + this.detachSession(sessionId, reason); + } + + setRequestTimeout(timeoutMs?: number): void { + this.requestTimeoutMs = timeoutMs; + } + + get defaultRequestTimeoutMs(): number | undefined { + return this.requestTimeoutMs; } async close(): Promise { + if ( + this.ws.readyState === WebSocket.CLOSING || + this.ws.readyState === WebSocket.CLOSED + ) { + return; + } this.ws.close(); } @@ -134,63 +328,87 @@ export class CdpClient { return this.ws.readyState; } + sendRaw(payload: Record): void { + if (this.ws.readyState !== WebSocket.OPEN) { + throw new Error("CDP WebSocket is not open"); + } + this.ws.send(JSON.stringify(payload)); + } + private onMessage(event: MessageEvent): void { - let data: CdpResponseMessage & CdpEventMessage; + let data: Partial; try { data = JSON.parse(typeof event.data === "string" ? event.data : ""); } catch { return; } - // Response to a command if (typeof data.id === "number") { - const { sessionId } = data; - const inflight = sessionId - ? this.sessions.get(sessionId)?.consumeInflight(data.id) + const inflight = data.sessionId + ? this.sessions.get(data.sessionId)?.consumeInflight(data.id) : this.inflight.get(data.id); if (inflight) { - if (!sessionId) this.inflight.delete(data.id); + if (!data.sessionId) this.inflight.delete(data.id); if (data.error) { - inflight.reject( - new Error(`CDP ${inflight.method}: ${data.error.message}`), - ); + inflight.reject(formatCdpError(inflight.method, data.error)); } else { - inflight.resolve(data.result ?? {}); + inflight.resolve(data.result); } } return; } - // Event - if (data.method) { - const { sessionId } = data; - const params = data.params ?? {}; + if (typeof data.method !== "string") { + return; + } + if (data.method === "Target.detachedFromTarget") { + const sessionId = + isRecord(data.params) && typeof data.params.sessionId === "string" + ? data.params.sessionId + : undefined; if (sessionId) { - this.sessions.get(sessionId)?.dispatchEvent(data.method, params); + this.detachSession(sessionId, "target detached"); } + } - const handlers = this.eventHandlers.get(data.method); - if (handlers) { - for (const h of handlers) { - try { - (h as (p: unknown) => void)(params); - } catch {} - } + if (data.sessionId) { + this.sessions + .get(data.sessionId) + ?.dispatchEvent(data.method, data.params); + } + + const handlers = this.eventHandlers.get(data.method); + if (handlers) { + for (const handler of handlers) { + try { + (handler as (params: unknown) => void)(data.params); + } catch {} } } } + private detachSession(sessionId: string, reason: string): void { + const session = this.sessions.get(sessionId); + if (!session) return; + this.sessions.delete(sessionId); + session.markDetached(reason); + } + private rejectAllInflight(reason: string): void { - for (const [_, req] of this.inflight) { - req.reject( - new Error(`CDP connection closed (${reason}), pending: ${req.method}`), + for (const request of this.inflight.values()) { + request.reject( + new Error( + `CDP connection closed (${reason}), pending: ${request.method}`, + ), ); } this.inflight.clear(); - for (const session of this.sessions.values()) { - session.rejectAll(reason); + + for (const [sessionId, session] of this.sessions) { + session.markDetached(reason); + this.sessions.delete(sessionId); } } } @@ -199,30 +417,42 @@ export class CdpSession { private nextId = 1; private inflight = new Map(); private eventHandlers = new Map>>(); + private detachedReason: string | null = null; + + readonly api: CdpProtocolApi; constructor( private root: CdpClient, readonly id: string, - ) {} + ) { + this.api = createProtocolApi(this); + } + + get isDetached(): boolean { + return this.detachedReason !== null; + } send( method: M, ...args: SendArgs ): Promise> { - const id = this.nextId++; - const params = args[0]; - const payload: Record = { id, method, sessionId: this.id }; - if (params !== undefined) payload.params = params; + if (this.detachedReason) { + return Promise.reject( + new Error( + `CDP session ${this.id} is detached (${this.detachedReason})`, + ), + ); + } - return new Promise((resolve, reject) => { - this.inflight.set(id, { - resolve: resolve as (v: unknown) => void, - reject, - method, - }); - // Access private ws via the root client - const ws = (this.root as unknown as { ws: WebSocket }).ws; - ws.send(JSON.stringify(payload)); + const id = this.nextId++; + return sendRequest({ + id, + method, + params: args[0], + sessionId: this.id, + store: this.inflight, + timeoutMs: this.root.defaultRequestTimeoutMs, + sendRaw: (payload) => this.root.sendRaw(payload), }); } @@ -233,30 +463,44 @@ export class CdpSession { } off(event: E, handler: CdpEventHandler): void { - this.eventHandlers.get(event)?.delete(handler as CdpEventHandler); + const set = this.eventHandlers.get(event); + if (!set) return; + set.delete(handler as CdpEventHandler); + if (set.size === 0) { + this.eventHandlers.delete(event); + } } consumeInflight(id: number): InflightRequest | undefined { - const req = this.inflight.get(id); - if (req) this.inflight.delete(id); - return req; + const request = this.inflight.get(id); + if (request) { + this.inflight.delete(id); + } + return request; } - dispatchEvent(method: string, params: Record): void { + dispatchEvent(method: string, params: unknown): void { const handlers = this.eventHandlers.get(method); - if (handlers) { - for (const h of handlers) { - try { - (h as (p: unknown) => void)(params); - } catch {} - } + if (!handlers) return; + + for (const handler of handlers) { + try { + (handler as (payload: unknown) => void)(params); + } catch {} } } + markDetached(reason: string): void { + if (this.detachedReason) return; + this.detachedReason = reason; + this.rejectAll(reason); + this.eventHandlers.clear(); + } + rejectAll(reason: string): void { - for (const [_, req] of this.inflight) { - req.reject( - new Error(`CDP session closed (${reason}), pending: ${req.method}`), + for (const request of this.inflight.values()) { + request.reject( + new Error(`CDP session closed (${reason}), pending: ${request.method}`), ); } this.inflight.clear(); diff --git a/packages/browser/src/command.ts b/packages/browser/src/command.ts index f39f281..9c76656 100644 --- a/packages/browser/src/command.ts +++ b/packages/browser/src/command.ts @@ -45,9 +45,33 @@ export interface BrowseCommandConfig { } let config: BrowseCommandConfig | null = null; +let lifecycleCleanupInstalled = false; + +async function closeAndClearActiveBrowser(): Promise { + const browser = activeBrowser; + if (!browser) return; + activeBrowser = null; + emitSessionChange(); + await browser.close().catch(() => {}); +} + +function installLifecycleCleanup(): void { + if (lifecycleCleanupInstalled || typeof window === "undefined") { + return; + } + lifecycleCleanupInstalled = true; + + const cleanup = () => { + void closeAndClearActiveBrowser(); + }; + + window.addEventListener("pagehide", cleanup); + window.addEventListener("beforeunload", cleanup); +} export function configureBrowseCommand(cfg: BrowseCommandConfig): void { config = cfg; + installLifecycleCleanup(); } function getProvider(): BrowserProvider { @@ -263,10 +287,7 @@ export async function executeBrowseCommand( exitCode: 1, }; } - if (activeBrowser) { - await activeBrowser.close().catch(() => {}); - activeBrowser = null; - } + await closeAndClearActiveBrowser(); activeBrowser = await Browser.launch({ provider: getProvider() }); emitSessionChange(); await activeBrowser.page.goto(url, { @@ -308,11 +329,7 @@ export async function executeBrowseCommand( } case "stop": { - if (activeBrowser) { - await activeBrowser.close(); - activeBrowser = null; - emitSessionChange(); - } + await closeAndClearActiveBrowser(); return { stdout: output({ stopped: true }, json), stderr: "", @@ -1122,6 +1139,15 @@ export async function executeBrowseCommand( } } +export async function closeActiveBrowser(): Promise { + await closeAndClearActiveBrowser(); +} + +export function disposeBrowseCommand(): void { + config = null; + void closeAndClearActiveBrowser(); +} + export function getActiveBrowser(): Browser | null { return activeBrowser; } diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts index b8d441c..9752beb 100644 --- a/packages/browser/src/index.ts +++ b/packages/browser/src/index.ts @@ -1,4 +1,10 @@ -export { CdpClient, CdpSession, type CdpEventHandler } from "./cdp.js"; +export { + CdpClient, + CdpSession, + type CdpEventHandler, + type CdpClientOptions, + type CdpProtocolApi, +} from "./cdp.js"; export { Page, type NavigateResult, @@ -33,6 +39,8 @@ export { export { executeBrowseCommand, configureBrowseCommand, + closeActiveBrowser, + disposeBrowseCommand, getActiveBrowser, onBrowseSessionChange, getBrowseSessionState, diff --git a/packages/browser/src/page.ts b/packages/browser/src/page.ts index 4dbbbb8..54de236 100644 --- a/packages/browser/src/page.ts +++ b/packages/browser/src/page.ts @@ -104,6 +104,10 @@ export class Page { return this.session instanceof CdpSession ? this.session.id : undefined; } + get cdpSession(): CdpSession | undefined { + return this.session instanceof CdpSession ? this.session : undefined; + } + async enableDomains(): Promise { await Promise.all([ this.session.send("Page.enable"), diff --git a/packages/browser/src/providers/browser-use.ts b/packages/browser/src/providers/browser-use.ts index dc12c4a..66fd6d9 100644 --- a/packages/browser/src/providers/browser-use.ts +++ b/packages/browser/src/providers/browser-use.ts @@ -49,9 +49,7 @@ export class BrowserUseProvider implements BrowserProvider { if (this.config.proxyCountryCode) { body.proxy_country_code = this.config.proxyCountryCode; } - if (this.config.timeoutMinutes) { - body.timeout = this.config.timeoutMinutes; - } + body.timeout = this.config.timeoutMinutes ?? 5; const response = await fetch(`${baseUrl}/api/v2/browsers`, { method: "POST", diff --git a/packages/browser/src/providers/browserbase.ts b/packages/browser/src/providers/browserbase.ts index 2553d10..da759c1 100644 --- a/packages/browser/src/providers/browserbase.ts +++ b/packages/browser/src/providers/browserbase.ts @@ -10,6 +10,7 @@ export interface BrowserbaseConfig { apiKey: string; projectId: string; baseUrl?: string; + timeoutSeconds?: number; } interface BrowserbaseSessionBody { @@ -19,6 +20,7 @@ interface BrowserbaseSessionBody { context?: { id: string }; }; proxies?: boolean; + timeout?: number; } interface BrowserbaseSessionResponse { @@ -41,6 +43,7 @@ export class BrowserbaseProvider implements BrowserProvider { browserSettings: { viewport: options?.viewport ?? { width: 1288, height: 711 }, }, + timeout: this.config.timeoutSeconds ?? 300, }; if (options?.contextId) { diff --git a/packages/core/src/chat/browser-viewer.svelte b/packages/core/src/chat/browser-viewer.svelte index d5e03bd..9d47cae 100644 --- a/packages/core/src/chat/browser-viewer.svelte +++ b/packages/core/src/chat/browser-viewer.svelte @@ -1,16 +1,18 @@ + +`, + ), + + "/destination": (res) => + html( + res, + ` + +Destination Page + +

Destination Page

+

Arrived successfully.

+ +`, + ), + + "/form": (res) => + html( + res, + ` + +Form Page + +

Form Page

+
+ + + + + + + + +
+
+ + +`, + ), + + "/visibility": (res) => + html( + res, + ` + +Visibility Test + +
Visible
+ + +
Hidden opacity
+
Zero
+ + + + + +`, + ), + + "/scroll": (res) => + html( + res, + ` + +Scroll Test + +
Top of page
+
Middle of page
+
Bottom of page
+
Hover me
+
+ + +`, + ), + + "/waiting": (res) => + html( + res, + ` + +Waiting Test + + +
+
Not ready
+ + +`, + ), + + "/storage": (res) => + html( + res, + ` + +Storage Test + +

Storage Test

+ + +`, + ), + + "/evaluate": (res) => + html( + res, + ` + +Evaluate Test + +
Content
+ + +`, + ), + + "/empty": (res) => + html( + res, + ` + +Empty Page + +`, + ), + + "/history-a": (res) => + html( + res, + ` + +History A +

Page A

Go to B +`, + ), + + "/history-b": (res) => + html( + res, + ` + +History B +

Page B

Go to C +`, + ), + + "/history-c": (res) => + html( + res, + ` + +History C +

Page C

+`, + ), +}; diff --git a/packages/browser/tests/integration/providers.test.ts b/packages/browser/tests/integration/providers.test.ts new file mode 100644 index 0000000..af12f8a --- /dev/null +++ b/packages/browser/tests/integration/providers.test.ts @@ -0,0 +1,101 @@ +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { Browser, BrowserUseProvider } from "../../src/index.js"; +import { + type FixtureServer, + startFixtureServer, + stopServer, +} from "./helpers.js"; + +const apiKey = process.env.BROWSER_USE_API_KEY; +const suite = apiKey ? describe : describe.skip; + +suite("BrowserUseProvider", () => { + let fixture: FixtureServer; + + beforeAll(async () => { + fixture = await startFixtureServer(); + }); + + afterAll(async () => { + await stopServer(fixture.server); + }); + + it("creates a session and returns a valid CDP url", async () => { + const provider = new BrowserUseProvider({ + apiKey: apiKey!, + timeoutMinutes: 1, + }); + + const session = await provider.createSession(); + try { + expect(session.sessionId).toBeTruthy(); + expect(session.cdpUrl).toMatch(/^wss?:\/\//); + } finally { + await provider.closeSession(session.sessionId); + } + }, 30000); + + it("connects, navigates, and reads page content", async () => { + const provider = new BrowserUseProvider({ + apiKey: apiKey!, + timeoutMinutes: 1, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const title = await browser.page.getTitle(); + expect(title).toContain("Example Domain"); + + const text = await browser.page.getText("h1"); + expect(text).toContain("Example Domain"); + } finally { + await browser.close(); + } + }, 60000); + + it("takes a screenshot", async () => { + const provider = new BrowserUseProvider({ + apiKey: apiKey!, + timeoutMinutes: 1, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const screenshot = await browser.page.screenshot(); + expect(screenshot.format).toBe("png"); + expect(screenshot.base64.length).toBeGreaterThan(1000); + } finally { + await browser.close(); + } + }, 60000); + + it("captures an accessibility snapshot", async () => { + const provider = new BrowserUseProvider({ + apiKey: apiKey!, + timeoutMinutes: 1, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const snap = await browser.page.snapshot({ interactive: true }); + expect(snap.tree).toBeTruthy(); + expect(snap.tree).toContain("Example Domain"); + } finally { + await browser.close(); + } + }, 60000); + + it("rejects with auth error for invalid key", async () => { + const provider = new BrowserUseProvider({ + apiKey: "bu__invalid_key_for_testing", + timeoutMinutes: 1, + }); + + await expect(provider.createSession()).rejects.toThrow( + /authentication failed|401|403/i, + ); + }, 30000); +}); diff --git a/packages/browser/tests/providers.test.ts b/packages/browser/tests/providers.test.ts new file mode 100644 index 0000000..4e99281 --- /dev/null +++ b/packages/browser/tests/providers.test.ts @@ -0,0 +1,306 @@ +import { once } from "node:events"; +import http from "node:http"; +import { afterEach, describe, expect, it } from "vitest"; +import { BrowserUseProvider } from "../src/providers/browser-use.js"; +import { BrowserbaseProvider } from "../src/providers/browserbase.js"; + +interface CapturedRequest { + method: string; + path: string; + headers: http.IncomingHttpHeaders; + body: string; +} + +interface ApiServer { + baseUrl: string; + requests: CapturedRequest[]; + close: () => Promise; +} + +async function startApiServer( + handler: ( + request: CapturedRequest, + baseUrl: string, + ) => { + status?: number; + headers?: Record; + body?: string; + }, +): Promise { + const requests: CapturedRequest[] = []; + const server = http.createServer(async (req, res) => { + const bodyChunks: Buffer[] = []; + for await (const chunk of req) { + bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + + const request: CapturedRequest = { + method: req.method ?? "GET", + path: req.url ?? "/", + headers: req.headers, + body: Buffer.concat(bodyChunks).toString("utf8"), + }; + requests.push(request); + + const address = server.address(); + if (!address || typeof address === "string") { + res.writeHead(500, { "Content-Type": "text/plain; charset=utf-8" }); + res.end("missing server address"); + return; + } + + const baseUrl = `http://127.0.0.1:${address.port}`; + const response = handler(request, baseUrl); + res.writeHead(response.status ?? 200, { + "Content-Type": "text/plain; charset=utf-8", + ...response.headers, + }); + res.end(response.body ?? ""); + }); + + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Failed to determine test server address"); + } + + return { + baseUrl: `http://127.0.0.1:${address.port}`, + requests, + close: async () => { + server.close(); + await once(server, "close"); + }, + }; +} + +async function getUnusedBaseUrl(): Promise { + const server = http.createServer(); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Failed to allocate port"); + } + server.close(); + await once(server, "close"); + return `http://127.0.0.1:${address.port}`; +} + +describe("BrowserbaseProvider", () => { + let apiServer: ApiServer | null = null; + + afterEach(async () => { + if (apiServer) { + await apiServer.close(); + apiServer = null; + } + }); + + it("creates a session with viewport, context, and proxy options", async () => { + apiServer = await startApiServer((request) => { + expect(request.method).toBe("POST"); + expect(request.path).toBe("/v1/sessions"); + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: "sess-1", + connectUrl: "wss://browserbase.example/session", + liveUrl: "https://browserbase.example/live/sess-1", + }), + }; + }); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + projectId: "proj-123", + baseUrl: apiServer.baseUrl, + timeoutSeconds: 123, + }); + + const session = await provider.createSession({ + viewport: { width: 1440, height: 900 }, + contextId: "ctx-1", + proxy: true, + }); + + expect(apiServer.requests).toHaveLength(1); + expect(apiServer.requests[0]).toMatchObject({ + method: "POST", + path: "/v1/sessions", + headers: { + "content-type": "application/json", + "x-bb-api-key": "bb-key", + }, + }); + expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ + projectId: "proj-123", + browserSettings: { + viewport: { width: 1440, height: 900 }, + context: { id: "ctx-1" }, + }, + proxies: true, + timeout: 123, + }); + + expect(session).toEqual({ + cdpUrl: "wss://browserbase.example/session", + sessionId: "sess-1", + metadata: { + id: "sess-1", + connectUrl: "wss://browserbase.example/session", + liveUrl: "https://browserbase.example/live/sess-1", + }, + }); + }); + + it("throws a descriptive error when session creation fails", async () => { + apiServer = await startApiServer(() => ({ + status: 400, + body: "bad request", + })); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + projectId: "proj-123", + baseUrl: apiServer.baseUrl, + }); + + await expect(provider.createSession()).rejects.toThrow( + "Browserbase session creation failed (400): bad request", + ); + }); + + it("closes a session by requesting release", async () => { + apiServer = await startApiServer((request) => { + expect(request.method).toBe("POST"); + expect(request.path).toBe("/v1/sessions/sess-1"); + return { status: 200 }; + }); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + projectId: "proj-123", + baseUrl: apiServer.baseUrl, + }); + + await provider.closeSession("sess-1"); + + expect(apiServer.requests).toHaveLength(1); + expect(apiServer.requests[0]).toMatchObject({ + method: "POST", + path: "/v1/sessions/sess-1", + headers: { + "content-type": "application/json", + "x-bb-api-key": "bb-key", + }, + }); + expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ + status: "REQUEST_RELEASE", + }); + }); +}); + +describe("BrowserUseProvider", () => { + let apiServer: ApiServer | null = null; + + afterEach(async () => { + if (apiServer) { + await apiServer.close(); + apiServer = null; + } + }); + + it("creates a session and resolves the websocket debugger URL", async () => { + apiServer = await startApiServer((request, baseUrl) => { + if (request.path === "/api/v2/browsers") { + expect(request.method).toBe("POST"); + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: "browser-1", + cdpUrl: `${baseUrl}/devtools/browser-1`, + }), + }; + } + + if (request.path === "/devtools/browser-1/json/version") { + expect(request.method).toBe("GET"); + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + webSocketDebuggerUrl: + "wss://browser-use.example/devtools/browser-1/ws", + }), + }; + } + + return { status: 404, body: "not found" }; + }); + + const provider = new BrowserUseProvider({ + apiKey: "bu-key", + baseUrl: apiServer.baseUrl, + profileId: "profile-1", + proxyCountryCode: "us", + timeoutMinutes: 12, + }); + + const session = await provider.createSession(); + + expect(apiServer.requests).toHaveLength(2); + expect(apiServer.requests[0]).toMatchObject({ + method: "POST", + path: "/api/v2/browsers", + headers: { + "content-type": "application/json", + "x-browser-use-api-key": "bu-key", + }, + }); + expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ + profile_id: "profile-1", + proxy_country_code: "us", + timeout: 12, + }); + expect(apiServer.requests[1]).toMatchObject({ + method: "GET", + path: "/devtools/browser-1/json/version", + }); + + expect(session).toEqual({ + cdpUrl: "wss://browser-use.example/devtools/browser-1/ws", + sessionId: "browser-1", + metadata: { + id: "browser-1", + cdpUrl: `${apiServer.baseUrl}/devtools/browser-1`, + }, + }); + }); + + it("throws a clear authentication error for 401 and 403 responses", async () => { + apiServer = await startApiServer(() => ({ + status: 401, + body: "unauthorized", + })); + + const provider = new BrowserUseProvider({ + apiKey: "bad-key", + baseUrl: apiServer.baseUrl, + }); + + await expect(provider.createSession()).rejects.toThrow( + "Browser Use authentication failed (401). Check your BROWSER_USE_API_KEY.", + ); + }); + + it("swallows close errors", async () => { + const baseUrl = await getUnusedBaseUrl(); + const provider = new BrowserUseProvider({ + apiKey: "bu-key", + baseUrl, + }); + + await expect(provider.closeSession("browser-1")).resolves.toBeUndefined(); + }); +}); diff --git a/packages/browser/vitest.config.ts b/packages/browser/vitest.config.ts new file mode 100644 index 0000000..4370917 --- /dev/null +++ b/packages/browser/vitest.config.ts @@ -0,0 +1,27 @@ +import { readFileSync } from "node:fs"; +import { defineConfig } from "vitest/config"; + +function loadDotEnv(): Record { + try { + const content = readFileSync(".env", "utf8"); + const env: Record = {}; + for (const line of content.split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const eqIdx = trimmed.indexOf("="); + if (eqIdx < 0) continue; + env[trimmed.slice(0, eqIdx)] = trimmed.slice(eqIdx + 1); + } + return env; + } catch { + return {}; + } +} + +export default defineConfig({ + test: { + environment: "node", + include: ["tests/**/*.test.ts"], + env: loadDotEnv(), + }, +}); diff --git a/packages/sdk/src/runtime.ts b/packages/sdk/src/runtime.ts index d73b386..a3fc828 100644 --- a/packages/sdk/src/runtime.ts +++ b/packages/sdk/src/runtime.ts @@ -1,4 +1,3 @@ -import { disposeBrowseCommand } from "@office-agents/browser"; import { Agent, type AgentEvent, @@ -14,6 +13,7 @@ import { type Model, streamSimple, } from "@mariozechner/pi-ai"; +import { disposeBrowseCommand } from "@office-agents/browser"; import { agentMessagesToChatMessages, type ChatMessage, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8642ea4..55ef49a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -53,6 +53,9 @@ importers: devtools-protocol: specifier: ^0.0.1602427 version: 0.0.1602427 + playwright: + specifier: 1.57.0 + version: 1.57.0 typescript: specifier: ^5.8.0 version: 5.9.3 @@ -3183,6 +3186,11 @@ packages: fs.realpath@1.0.0: resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + fsevents@2.3.2: + resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -4200,6 +4208,16 @@ packages: resolution: {integrity: sha512-NPE8TDbzl/3YQYY7CSS228s3g2ollTFnc+Qi3tqmqJp9Vg2ovUpixcJEo2HJScN2Ez+kEaal6y70c0ehqJBJeA==} engines: {node: '>=10'} + playwright-core@1.57.0: + resolution: {integrity: sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==} + engines: {node: '>=18'} + hasBin: true + + playwright@1.57.0: + resolution: {integrity: sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==} + engines: {node: '>=18'} + hasBin: true + possible-typed-array-names@1.1.0: resolution: {integrity: sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==} engines: {node: '>= 0.4'} @@ -8431,6 +8449,9 @@ snapshots: fs.realpath@1.0.0: {} + fsevents@2.3.2: + optional: true + fsevents@2.3.3: optional: true @@ -9597,6 +9618,14 @@ snapshots: dependencies: find-up: 5.0.0 + playwright-core@1.57.0: {} + + playwright@1.57.0: + dependencies: + playwright-core: 1.57.0 + optionalDependencies: + fsevents: 2.3.2 + possible-typed-array-names@1.1.0: {} postcss-value-parser@4.2.0: {} From b36f416895154d0ad8d252d6c40934684498e205 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:13:03 +0800 Subject: [PATCH 07/12] fix: add --no-sandbox flags for Chromium in CI, guard afterAll cleanup --- packages/browser/tests/integration.test.ts | 2 +- packages/browser/tests/integration/helpers.ts | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/browser/tests/integration.test.ts b/packages/browser/tests/integration.test.ts index 90a7bac..378c6cd 100644 --- a/packages/browser/tests/integration.test.ts +++ b/packages/browser/tests/integration.test.ts @@ -27,7 +27,7 @@ suite("integration", () => { afterAll(async () => { await browser?.close(); await chrome?.close(); - await stopServer(fixture.server); + if (fixture) await stopServer(fixture.server); }); // --------------------------------------------------------------------------- diff --git a/packages/browser/tests/integration/helpers.ts b/packages/browser/tests/integration/helpers.ts index bd30a82..a52c694 100644 --- a/packages/browser/tests/integration/helpers.ts +++ b/packages/browser/tests/integration/helpers.ts @@ -82,6 +82,8 @@ export async function launchChromium(): Promise { "--disable-background-networking", "--no-first-run", "--no-default-browser-check", + "--no-sandbox", + "--disable-setuid-sandbox", "--remote-debugging-port=0", `--user-data-dir=${userDataDir}`, "about:blank", From fecf3ad67b8ae36b682234aede21f7c254de19ff Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:18:00 +0800 Subject: [PATCH 08/12] ci: bump all workflows to Node 24 --- .github/workflows/ci.yml | 4 ++-- .github/workflows/release-excel.yml | 2 +- .github/workflows/release-ppt.yml | 2 +- .github/workflows/release-word.yml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6038241..98c57b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - uses: actions/setup-node@v6 with: - node-version: 20 + node-version: 24 cache: pnpm - name: Install dependencies @@ -54,7 +54,7 @@ jobs: - uses: actions/setup-node@v6 with: - node-version: 20 + node-version: 24 cache: pnpm - name: Install dependencies diff --git a/.github/workflows/release-excel.yml b/.github/workflows/release-excel.yml index 2a92ac9..fa8fca4 100644 --- a/.github/workflows/release-excel.yml +++ b/.github/workflows/release-excel.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/setup-node@v6 with: - node-version: 20 + node-version: 24 cache: pnpm - name: Install dependencies diff --git a/.github/workflows/release-ppt.yml b/.github/workflows/release-ppt.yml index c67af38..a243f38 100644 --- a/.github/workflows/release-ppt.yml +++ b/.github/workflows/release-ppt.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/setup-node@v6 with: - node-version: 20 + node-version: 24 cache: pnpm - name: Install dependencies diff --git a/.github/workflows/release-word.yml b/.github/workflows/release-word.yml index f5f52f4..c6fb8d9 100644 --- a/.github/workflows/release-word.yml +++ b/.github/workflows/release-word.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/setup-node@v6 with: - node-version: 20 + node-version: 24 cache: pnpm - name: Install dependencies From fc41aac93d4f1d3d4b81eba1e4cf53268ebee76e Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Mon, 23 Mar 2026 19:22:49 +0800 Subject: [PATCH 09/12] fix: polyfill Uint8Array.prototype.toHex in test setup for Node.js pdfjs-dist v5 uses Uint8Array.prototype.toHex() (a TC39 proposal) in its modern build. This exists in modern browsers but not in Node.js. Rather than switching to the legacy build (which would bloat the browser bundle with unnecessary polyfills), add the polyfill only in the vitest setup. --- packages/sdk/tests/custom-commands-integration.test.ts | 1 + packages/sdk/tests/setup.ts | 9 +++++++++ packages/sdk/vitest.config.ts | 1 + 3 files changed, 11 insertions(+) create mode 100644 packages/sdk/tests/setup.ts diff --git a/packages/sdk/tests/custom-commands-integration.test.ts b/packages/sdk/tests/custom-commands-integration.test.ts index d14e7cc..3187f61 100644 --- a/packages/sdk/tests/custom-commands-integration.test.ts +++ b/packages/sdk/tests/custom-commands-integration.test.ts @@ -127,6 +127,7 @@ describe("shared custom commands (integration)", () => { const result = await run( "pdf-to-text /home/user/uploads/test.pdf /home/user/uploads/out.txt", ); + expect(result.stderr, "pdf-to-text stderr").toBe(""); expect(result.exitCode).toBe(0); expect(result.out).toContain("Extracted text from"); expect(result.out).toContain("page"); diff --git a/packages/sdk/tests/setup.ts b/packages/sdk/tests/setup.ts new file mode 100644 index 0000000..bc35e79 --- /dev/null +++ b/packages/sdk/tests/setup.ts @@ -0,0 +1,9 @@ +// Polyfill Uint8Array.prototype.toHex for Node.js environments. +// pdfjs-dist v5 uses this (a TC39 proposal) in its modern build, +// but it's not available in Node. Browsers that support it natively +// use the native version; this only applies to the test runner. +if (typeof Uint8Array.prototype.toHex !== "function") { + Uint8Array.prototype.toHex = function () { + return Array.from(this, (b) => b.toString(16).padStart(2, "0")).join(""); + }; +} diff --git a/packages/sdk/vitest.config.ts b/packages/sdk/vitest.config.ts index bf62dc9..cd21ab3 100644 --- a/packages/sdk/vitest.config.ts +++ b/packages/sdk/vitest.config.ts @@ -4,6 +4,7 @@ export default defineConfig({ test: { environment: "node", include: ["tests/**/*.test.ts"], + setupFiles: ["tests/setup.ts"], server: { deps: { inline: ["pdfjs-dist"], From 50359ea2d2dbf1409d88afccfe556f9706425a5e Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Mon, 23 Mar 2026 20:21:06 +0800 Subject: [PATCH 10/12] wip: remove browserbase for now --- packages/browser/src/index.ts | 2 - packages/browser/src/providers/browserbase.ts | 95 --------------- packages/browser/src/providers/index.ts | 1 - packages/browser/tests/providers.test.ts | 115 ------------------ packages/core/src/chat/settings-panel.svelte | 38 ------ packages/sdk/src/vfs/custom-commands.ts | 12 +- packages/sdk/src/web/config.ts | 2 - 7 files changed, 1 insertion(+), 264 deletions(-) delete mode 100644 packages/browser/src/providers/browserbase.ts diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts index 21fd3b8..46e2a80 100644 --- a/packages/browser/src/index.ts +++ b/packages/browser/src/index.ts @@ -34,8 +34,6 @@ export { type TypeResult, } from "./page.js"; export { - type BrowserbaseConfig, - BrowserbaseProvider, type BrowserProvider, type BrowserSession, type BrowserUseConfig, diff --git a/packages/browser/src/providers/browserbase.ts b/packages/browser/src/providers/browserbase.ts deleted file mode 100644 index da759c1..0000000 --- a/packages/browser/src/providers/browserbase.ts +++ /dev/null @@ -1,95 +0,0 @@ -import type { - BrowserProvider, - BrowserSession, - CreateSessionOptions, -} from "./types.js"; - -const BROWSERBASE_API = "https://api.browserbase.com"; - -export interface BrowserbaseConfig { - apiKey: string; - projectId: string; - baseUrl?: string; - timeoutSeconds?: number; -} - -interface BrowserbaseSessionBody { - projectId: string; - browserSettings: { - viewport: { width: number; height: number }; - context?: { id: string }; - }; - proxies?: boolean; - timeout?: number; -} - -interface BrowserbaseSessionResponse { - id: string; - connectUrl: string; -} - -export class BrowserbaseProvider implements BrowserProvider { - readonly name = "browserbase"; - private config: BrowserbaseConfig; - - constructor(config: BrowserbaseConfig) { - this.config = config; - } - - async createSession(options?: CreateSessionOptions): Promise { - const baseUrl = this.config.baseUrl ?? BROWSERBASE_API; - const body: BrowserbaseSessionBody = { - projectId: this.config.projectId, - browserSettings: { - viewport: options?.viewport ?? { width: 1288, height: 711 }, - }, - timeout: this.config.timeoutSeconds ?? 300, - }; - - if (options?.contextId) { - body.browserSettings.context = { id: options.contextId }; - } - if (options?.proxy) { - body.proxies = true; - } - - const response = await fetch(`${baseUrl}/v1/sessions`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "x-bb-api-key": this.config.apiKey, - }, - body: JSON.stringify(body), - }); - - if (!response.ok) { - const text = await response.text(); - throw new Error( - `Browserbase session creation failed (${response.status}): ${text}`, - ); - } - - const data: BrowserbaseSessionResponse = await response.json(); - if (!data.connectUrl || !data.id) { - throw new Error("Browserbase session response missing connectUrl or id"); - } - - return { - cdpUrl: data.connectUrl, - sessionId: data.id, - metadata: { ...data }, - }; - } - - async closeSession(sessionId: string): Promise { - const baseUrl = this.config.baseUrl ?? BROWSERBASE_API; - await fetch(`${baseUrl}/v1/sessions/${sessionId}`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "x-bb-api-key": this.config.apiKey, - }, - body: JSON.stringify({ status: "REQUEST_RELEASE" }), - }); - } -} diff --git a/packages/browser/src/providers/index.ts b/packages/browser/src/providers/index.ts index 5502bf5..eb76380 100644 --- a/packages/browser/src/providers/index.ts +++ b/packages/browser/src/providers/index.ts @@ -1,5 +1,4 @@ export { type BrowserUseConfig, BrowserUseProvider } from "./browser-use.js"; -export { type BrowserbaseConfig, BrowserbaseProvider } from "./browserbase.js"; export type { BrowserProvider, BrowserSession, diff --git a/packages/browser/tests/providers.test.ts b/packages/browser/tests/providers.test.ts index 4e99281..6a24102 100644 --- a/packages/browser/tests/providers.test.ts +++ b/packages/browser/tests/providers.test.ts @@ -2,7 +2,6 @@ import { once } from "node:events"; import http from "node:http"; import { afterEach, describe, expect, it } from "vitest"; import { BrowserUseProvider } from "../src/providers/browser-use.js"; -import { BrowserbaseProvider } from "../src/providers/browserbase.js"; interface CapturedRequest { method: string; @@ -88,120 +87,6 @@ async function getUnusedBaseUrl(): Promise { return `http://127.0.0.1:${address.port}`; } -describe("BrowserbaseProvider", () => { - let apiServer: ApiServer | null = null; - - afterEach(async () => { - if (apiServer) { - await apiServer.close(); - apiServer = null; - } - }); - - it("creates a session with viewport, context, and proxy options", async () => { - apiServer = await startApiServer((request) => { - expect(request.method).toBe("POST"); - expect(request.path).toBe("/v1/sessions"); - return { - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - id: "sess-1", - connectUrl: "wss://browserbase.example/session", - liveUrl: "https://browserbase.example/live/sess-1", - }), - }; - }); - - const provider = new BrowserbaseProvider({ - apiKey: "bb-key", - projectId: "proj-123", - baseUrl: apiServer.baseUrl, - timeoutSeconds: 123, - }); - - const session = await provider.createSession({ - viewport: { width: 1440, height: 900 }, - contextId: "ctx-1", - proxy: true, - }); - - expect(apiServer.requests).toHaveLength(1); - expect(apiServer.requests[0]).toMatchObject({ - method: "POST", - path: "/v1/sessions", - headers: { - "content-type": "application/json", - "x-bb-api-key": "bb-key", - }, - }); - expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ - projectId: "proj-123", - browserSettings: { - viewport: { width: 1440, height: 900 }, - context: { id: "ctx-1" }, - }, - proxies: true, - timeout: 123, - }); - - expect(session).toEqual({ - cdpUrl: "wss://browserbase.example/session", - sessionId: "sess-1", - metadata: { - id: "sess-1", - connectUrl: "wss://browserbase.example/session", - liveUrl: "https://browserbase.example/live/sess-1", - }, - }); - }); - - it("throws a descriptive error when session creation fails", async () => { - apiServer = await startApiServer(() => ({ - status: 400, - body: "bad request", - })); - - const provider = new BrowserbaseProvider({ - apiKey: "bb-key", - projectId: "proj-123", - baseUrl: apiServer.baseUrl, - }); - - await expect(provider.createSession()).rejects.toThrow( - "Browserbase session creation failed (400): bad request", - ); - }); - - it("closes a session by requesting release", async () => { - apiServer = await startApiServer((request) => { - expect(request.method).toBe("POST"); - expect(request.path).toBe("/v1/sessions/sess-1"); - return { status: 200 }; - }); - - const provider = new BrowserbaseProvider({ - apiKey: "bb-key", - projectId: "proj-123", - baseUrl: apiServer.baseUrl, - }); - - await provider.closeSession("sess-1"); - - expect(apiServer.requests).toHaveLength(1); - expect(apiServer.requests[0]).toMatchObject({ - method: "POST", - path: "/v1/sessions/sess-1", - headers: { - "content-type": "application/json", - "x-bb-api-key": "bb-key", - }, - }); - expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ - status: "REQUEST_RELEASE", - }); - }); -}); - describe("BrowserUseProvider", () => { let apiServer: ApiServer | null = null; diff --git a/packages/core/src/chat/settings-panel.svelte b/packages/core/src/chat/settings-panel.svelte index 92d44ef..82c46a7 100644 --- a/packages/core/src/chat/settings-panel.svelte +++ b/packages/core/src/chat/settings-panel.svelte @@ -60,8 +60,6 @@ let braveApiKey = $state(savedWeb.apiKeys.brave || ""); let serperApiKey = $state(savedWeb.apiKeys.serper || ""); let exaApiKey = $state(savedWeb.apiKeys.exa || ""); - let browserbaseApiKey = $state(savedWeb.apiKeys.browserbase || ""); - let browserbaseProjectId = $state(savedWeb.apiKeys.browserbaseProjectId || ""); let browserUseApiKey = $state(savedWeb.apiKeys.browserUse || ""); let showAdvancedWebKeys = $state(false); @@ -172,8 +170,6 @@ braveApiKey: string; serperApiKey: string; exaApiKey: string; - browserbaseApiKey: string; - browserbaseProjectId: string; browserUseApiKey: string; }>, ) { @@ -184,8 +180,6 @@ braveApiKey = updates.braveApiKey ?? braveApiKey; serperApiKey = updates.serperApiKey ?? serperApiKey; exaApiKey = updates.exaApiKey ?? exaApiKey; - browserbaseApiKey = updates.browserbaseApiKey ?? browserbaseApiKey; - browserbaseProjectId = updates.browserbaseProjectId ?? browserbaseProjectId; browserUseApiKey = updates.browserUseApiKey ?? browserUseApiKey; saveWebConfig({ @@ -196,8 +190,6 @@ brave: braveApiKey, serper: serperApiKey, exa: exaApiKey, - browserbase: browserbaseApiKey, - browserbaseProjectId, browserUse: browserUseApiKey, }, }); @@ -801,36 +793,6 @@ Configure one of the providers below.

-
- - - -
- {@render apiKeyField("API Key", browserbaseApiKey, (v) => { browserbaseApiKey = v; updateWebSettings({ browserbaseApiKey }); }, "bb-api-...")} - -

- Get credentials at browserbase.com/settings -

-
-
-
diff --git a/packages/sdk/src/vfs/custom-commands.ts b/packages/sdk/src/vfs/custom-commands.ts index 9d79db6..e2f6958 100644 --- a/packages/sdk/src/vfs/custom-commands.ts +++ b/packages/sdk/src/vfs/custom-commands.ts @@ -1,5 +1,4 @@ import { - BrowserbaseProvider, BrowserUseProvider, configureBrowseCommand, executeBrowseCommand, @@ -579,7 +578,7 @@ const imageSearchCmd: DescribedCommand = { }), }; -function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { +function getBrowserProvider(): BrowserUseProvider | null { const webConfig = loadWebConfig(); const browserUseApiKey = webConfig.apiKeys?.browserUse; @@ -587,15 +586,6 @@ function getBrowserProvider(): BrowserbaseProvider | BrowserUseProvider | null { return new BrowserUseProvider({ apiKey: browserUseApiKey }); } - const browserbaseApiKey = webConfig.apiKeys?.browserbase; - const browserbaseProjectId = webConfig.apiKeys?.browserbaseProjectId; - if (browserbaseApiKey && browserbaseProjectId) { - return new BrowserbaseProvider({ - apiKey: browserbaseApiKey, - projectId: browserbaseProjectId, - }); - } - return null; } diff --git a/packages/sdk/src/web/config.ts b/packages/sdk/src/web/config.ts index bbccb9f..75de01e 100644 --- a/packages/sdk/src/web/config.ts +++ b/packages/sdk/src/web/config.ts @@ -6,8 +6,6 @@ export interface WebConfig { exa?: string; brave?: string; serper?: string; - browserbase?: string; - browserbaseProjectId?: string; browserUse?: string; }; } From 7ed17f2d9055472d7a073cefe2d27b27a6855d88 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Sun, 29 Mar 2026 16:07:04 +0800 Subject: [PATCH 11/12] chore: remove live previewer --- packages/browser/src/browser.ts | 4 - packages/browser/src/command.ts | 7 +- packages/browser/src/index.ts | 2 + packages/browser/src/providers/browser-use.ts | 1 - packages/browser/src/providers/browserbase.ts | 137 ++++++++++++++ packages/browser/src/providers/index.ts | 4 + packages/browser/src/providers/types.ts | 1 - packages/browser/tests/browser.test.ts | 5 +- .../tests/integration/providers.test.ts | 105 ++++++++++- packages/browser/tests/providers.test.ts | 167 +++++++++++++++++- packages/core/src/chat/browser-viewer.svelte | 112 +++--------- packages/core/src/chat/settings-panel.svelte | 26 +++ packages/sdk/src/vfs/custom-commands.ts | 14 +- packages/sdk/src/web/config.ts | 1 + 14 files changed, 474 insertions(+), 112 deletions(-) create mode 100644 packages/browser/src/providers/browserbase.ts diff --git a/packages/browser/src/browser.ts b/packages/browser/src/browser.ts index 994d997..168196c 100644 --- a/packages/browser/src/browser.ts +++ b/packages/browser/src/browser.ts @@ -91,10 +91,6 @@ export class Browser { return this.session?.sessionId; } - get sessionMetadata(): Record | undefined { - return this.session?.metadata; - } - get cdpUrl(): string | undefined { return this.session?.cdpUrl; } diff --git a/packages/browser/src/command.ts b/packages/browser/src/command.ts index d4fc8e9..eed43a2 100644 --- a/packages/browser/src/command.ts +++ b/packages/browser/src/command.ts @@ -5,7 +5,6 @@ let activeBrowser: Browser | null = null; export interface BrowseSessionEvent { active: boolean; - liveUrl?: string; sessionId?: string; } @@ -22,10 +21,8 @@ export function onBrowseSessionChange( export function getBrowseSessionState(): BrowseSessionEvent { if (!activeBrowser) return { active: false }; - const metadata = activeBrowser.sessionMetadata; return { active: true, - liveUrl: metadata?.liveUrl as string | undefined, sessionId: activeBrowser.sessionId, }; } @@ -296,11 +293,10 @@ export async function executeBrowseCommand( waitUntil: flags.wait ?? "load", timeoutMs: flags.timeout ? parseInt(flags.timeout, 10) : undefined, }); + emitSessionChange(); const result: Record = { url: await activeBrowser.page.getUrl(), }; - const liveUrl = activeBrowser.sessionMetadata?.liveUrl; - if (liveUrl) result.liveUrl = liveUrl; return { stdout: output(result, json), stderr: "", exitCode: 0 }; } @@ -319,7 +315,6 @@ export async function executeBrowseCommand( { status: "connected", sessionId: activeBrowser.sessionId, - liveUrl: activeBrowser.sessionMetadata?.liveUrl, url, title, }, diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts index 46e2a80..21fd3b8 100644 --- a/packages/browser/src/index.ts +++ b/packages/browser/src/index.ts @@ -34,6 +34,8 @@ export { type TypeResult, } from "./page.js"; export { + type BrowserbaseConfig, + BrowserbaseProvider, type BrowserProvider, type BrowserSession, type BrowserUseConfig, diff --git a/packages/browser/src/providers/browser-use.ts b/packages/browser/src/providers/browser-use.ts index 66fd6d9..baf5a70 100644 --- a/packages/browser/src/providers/browser-use.ts +++ b/packages/browser/src/providers/browser-use.ts @@ -84,7 +84,6 @@ export class BrowserUseProvider implements BrowserProvider { return { cdpUrl, sessionId: data.id, - metadata: { ...data }, }; } diff --git a/packages/browser/src/providers/browserbase.ts b/packages/browser/src/providers/browserbase.ts new file mode 100644 index 0000000..14cc4e3 --- /dev/null +++ b/packages/browser/src/providers/browserbase.ts @@ -0,0 +1,137 @@ +import type { + BrowserProvider, + BrowserSession, + CreateSessionOptions, +} from "./types.js"; + +const BROWSERBASE_API = "https://api.browserbase.com"; + +export interface BrowserbaseConfig { + apiKey: string; + projectId?: string; + baseUrl?: string; + corsProxyUrl?: string; + region?: "us-west-2" | "us-east-1" | "eu-central-1" | "ap-southeast-1"; + keepAlive?: boolean; + timeoutSeconds?: number; + proxy?: boolean; +} + +interface BrowserbaseCreateBody { + projectId?: string; + region?: string; + keepAlive?: boolean; + timeout?: number; + proxies?: boolean; + browserSettings?: { + viewport?: { width: number; height: number }; + context?: { id: string }; + }; +} + +interface BrowserbaseCreateResponse { + id: string; + connectUrl: string; + signingKey: string; + seleniumRemoteUrl: string; + createdAt: string; + expiresAt: string; + projectId: string; + status: string; + region: string; +} + +export class BrowserbaseProvider implements BrowserProvider { + readonly name = "browserbase"; + private config: BrowserbaseConfig; + + constructor(config: BrowserbaseConfig) { + this.config = config; + } + + private proxyFetch(url: string, init: RequestInit): Promise { + if (this.config.corsProxyUrl) { + return fetch( + `${this.config.corsProxyUrl}/?url=${encodeURIComponent(url)}`, + init, + ); + } + return fetch(url, init); + } + + async createSession(options?: CreateSessionOptions): Promise { + const baseUrl = this.config.baseUrl ?? BROWSERBASE_API; + const body: BrowserbaseCreateBody = {}; + + if (this.config.projectId) { + body.projectId = this.config.projectId; + } + if (this.config.region) { + body.region = this.config.region; + } + if (this.config.keepAlive !== undefined) { + body.keepAlive = this.config.keepAlive; + } + if (this.config.timeoutSeconds !== undefined) { + body.timeout = this.config.timeoutSeconds; + } + if (this.config.proxy ?? options?.proxy) { + body.proxies = true; + } + + const browserSettings: BrowserbaseCreateBody["browserSettings"] = {}; + if (options?.viewport) { + browserSettings.viewport = options.viewport; + } + if (options?.contextId) { + browserSettings.context = { id: options.contextId }; + } + if (Object.keys(browserSettings).length > 0) { + body.browserSettings = browserSettings; + } + + const response = await this.proxyFetch(`${baseUrl}/v1/sessions`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-BB-API-Key": this.config.apiKey, + }, + body: JSON.stringify(body), + }); + + if (response.status === 401 || response.status === 403) { + throw new Error( + `Browserbase authentication failed (${response.status}). Check your BROWSERBASE_API_KEY.`, + ); + } + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Browserbase session creation failed (${response.status}): ${text}`, + ); + } + + const data: BrowserbaseCreateResponse = await response.json(); + if (!data.connectUrl || !data.id) { + throw new Error("Browserbase response missing connectUrl or id"); + } + + return { + cdpUrl: data.connectUrl, + sessionId: data.id, + }; + } + + async closeSession(sessionId: string): Promise { + const baseUrl = this.config.baseUrl ?? BROWSERBASE_API; + await this.proxyFetch(`${baseUrl}/v1/sessions/${sessionId}`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-BB-API-Key": this.config.apiKey, + }, + body: JSON.stringify({ status: "REQUEST_RELEASE" }), + }).catch(() => {}); + } +} diff --git a/packages/browser/src/providers/index.ts b/packages/browser/src/providers/index.ts index eb76380..4b1fd4f 100644 --- a/packages/browser/src/providers/index.ts +++ b/packages/browser/src/providers/index.ts @@ -1,3 +1,7 @@ +export { + type BrowserbaseConfig, + BrowserbaseProvider, +} from "./browserbase.js"; export { type BrowserUseConfig, BrowserUseProvider } from "./browser-use.js"; export type { BrowserProvider, diff --git a/packages/browser/src/providers/types.ts b/packages/browser/src/providers/types.ts index bfbdf38..0b65dbd 100644 --- a/packages/browser/src/providers/types.ts +++ b/packages/browser/src/providers/types.ts @@ -1,7 +1,6 @@ export interface BrowserSession { cdpUrl: string; sessionId: string; - metadata?: Record; } export interface BrowserProvider { diff --git a/packages/browser/tests/browser.test.ts b/packages/browser/tests/browser.test.ts index 1059b50..73a1f3a 100644 --- a/packages/browser/tests/browser.test.ts +++ b/packages/browser/tests/browser.test.ts @@ -43,7 +43,7 @@ function createProvider(session?: Partial): FakeProvider { const resolvedSession: BrowserSession = { cdpUrl: "wss://cdp.example/session", sessionId: "session-1", - metadata: { liveUrl: "https://live.example/session-1" }, + ...session, }; @@ -196,9 +196,6 @@ describe("Browser", () => { ]); expect(browser.sessionId).toBe("session-1"); expect(browser.cdpUrl).toBe("wss://cdp.example/session"); - expect(browser.sessionMetadata).toEqual({ - liveUrl: "https://live.example/session-1", - }); await browser.close(); diff --git a/packages/browser/tests/integration/providers.test.ts b/packages/browser/tests/integration/providers.test.ts index af12f8a..36ef099 100644 --- a/packages/browser/tests/integration/providers.test.ts +++ b/packages/browser/tests/integration/providers.test.ts @@ -1,5 +1,9 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest"; -import { Browser, BrowserUseProvider } from "../../src/index.js"; +import { + Browser, + BrowserbaseProvider, + BrowserUseProvider, +} from "../../src/index.js"; import { type FixtureServer, startFixtureServer, @@ -99,3 +103,102 @@ suite("BrowserUseProvider", () => { ); }, 30000); }); + +const bbApiKey = process.env.BROWSERBASE_API_KEY; +const bbProjectId = process.env.BROWSERBASE_PROJECT_ID; +const bbSuite = bbApiKey ? describe : describe.skip; + +bbSuite("BrowserbaseProvider", () => { + let fixture: FixtureServer; + + beforeAll(async () => { + fixture = await startFixtureServer(); + }); + + afterAll(async () => { + await stopServer(fixture.server); + }); + + it("creates a session and returns a valid CDP url", async () => { + const provider = new BrowserbaseProvider({ + apiKey: bbApiKey!, + projectId: bbProjectId, + timeoutSeconds: 60, + }); + + const session = await provider.createSession(); + try { + expect(session.sessionId).toBeTruthy(); + expect(session.cdpUrl).toMatch(/^wss?:\/\//); + } finally { + await provider.closeSession(session.sessionId); + } + }, 30000); + + it("connects, navigates, and reads page content", async () => { + const provider = new BrowserbaseProvider({ + apiKey: bbApiKey!, + projectId: bbProjectId, + timeoutSeconds: 60, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const title = await browser.page.getTitle(); + expect(title).toContain("Example Domain"); + + const text = await browser.page.getText("h1"); + expect(text).toContain("Example Domain"); + } finally { + await browser.close(); + } + }, 60000); + + it("takes a screenshot", async () => { + const provider = new BrowserbaseProvider({ + apiKey: bbApiKey!, + projectId: bbProjectId, + timeoutSeconds: 60, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const screenshot = await browser.page.screenshot(); + expect(screenshot.format).toBe("png"); + expect(screenshot.base64.length).toBeGreaterThan(1000); + } finally { + await browser.close(); + } + }, 60000); + + it("captures an accessibility snapshot", async () => { + const provider = new BrowserbaseProvider({ + apiKey: bbApiKey!, + projectId: bbProjectId, + timeoutSeconds: 60, + }); + + const browser = await Browser.launch({ provider }); + try { + await browser.page.goto("https://example.com"); + const snap = await browser.page.snapshot({ interactive: true }); + expect(snap.tree).toBeTruthy(); + expect(snap.tree).toContain("Example Domain"); + } finally { + await browser.close(); + } + }, 60000); + + it("rejects with auth error for invalid key", async () => { + const provider = new BrowserbaseProvider({ + apiKey: "bb__invalid_key_for_testing", + timeoutSeconds: 60, + }); + + await expect(provider.createSession()).rejects.toThrow( + /authentication failed|401|403/i, + ); + }, 30000); +}); diff --git a/packages/browser/tests/providers.test.ts b/packages/browser/tests/providers.test.ts index 6a24102..9c4c124 100644 --- a/packages/browser/tests/providers.test.ts +++ b/packages/browser/tests/providers.test.ts @@ -1,6 +1,7 @@ import { once } from "node:events"; import http from "node:http"; import { afterEach, describe, expect, it } from "vitest"; +import { BrowserbaseProvider } from "../src/providers/browserbase.js"; import { BrowserUseProvider } from "../src/providers/browser-use.js"; interface CapturedRequest { @@ -156,10 +157,6 @@ describe("BrowserUseProvider", () => { expect(session).toEqual({ cdpUrl: "wss://browser-use.example/devtools/browser-1/ws", sessionId: "browser-1", - metadata: { - id: "browser-1", - cdpUrl: `${apiServer.baseUrl}/devtools/browser-1`, - }, }); }); @@ -189,3 +186,165 @@ describe("BrowserUseProvider", () => { await expect(provider.closeSession("browser-1")).resolves.toBeUndefined(); }); }); + +describe("BrowserbaseProvider", () => { + let apiServer: ApiServer | null = null; + + afterEach(async () => { + if (apiServer) { + await apiServer.close(); + apiServer = null; + } + }); + + it("creates a session and returns the connectUrl as cdpUrl", async () => { + apiServer = await startApiServer((request) => { + if (request.path === "/v1/sessions") { + expect(request.method).toBe("POST"); + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: "session-abc", + connectUrl: "wss://connect.browserbase.com/session-abc", + signingKey: "sk-123", + seleniumRemoteUrl: "https://connect.browserbase.com/webdriver", + createdAt: "2025-01-01T00:00:00Z", + expiresAt: "2025-01-01T00:05:00Z", + projectId: "proj-1", + status: "RUNNING", + region: "us-west-2", + }), + }; + } + + return { status: 404, body: "not found" }; + }); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + baseUrl: apiServer.baseUrl, + projectId: "proj-1", + region: "us-west-2", + timeoutSeconds: 300, + proxy: true, + }); + + const session = await provider.createSession(); + + expect(apiServer.requests).toHaveLength(1); + expect(apiServer.requests[0]).toMatchObject({ + method: "POST", + path: "/v1/sessions", + headers: { + "content-type": "application/json", + "x-bb-api-key": "bb-key", + }, + }); + expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ + projectId: "proj-1", + region: "us-west-2", + timeout: 300, + proxies: true, + }); + + expect(session).toEqual({ + cdpUrl: "wss://connect.browserbase.com/session-abc", + sessionId: "session-abc", + }); + }); + + it("passes viewport and contextId through browserSettings", async () => { + apiServer = await startApiServer((request) => { + if (request.path === "/v1/sessions") { + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: "session-xyz", + connectUrl: "wss://connect.browserbase.com/session-xyz", + signingKey: "sk-456", + seleniumRemoteUrl: "https://connect.browserbase.com/webdriver", + createdAt: "2025-01-01T00:00:00Z", + expiresAt: "2025-01-01T00:05:00Z", + projectId: "proj-1", + status: "RUNNING", + region: "us-east-1", + }), + }; + } + return { status: 404, body: "not found" }; + }); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + baseUrl: apiServer.baseUrl, + }); + + await provider.createSession({ + viewport: { width: 1920, height: 1080 }, + contextId: "ctx-1", + }); + + const body = JSON.parse(apiServer.requests[0]!.body); + expect(body.browserSettings).toEqual({ + viewport: { width: 1920, height: 1080 }, + context: { id: "ctx-1" }, + }); + }); + + it("throws a clear authentication error for 401 and 403 responses", async () => { + apiServer = await startApiServer(() => ({ + status: 401, + body: "unauthorized", + })); + + const provider = new BrowserbaseProvider({ + apiKey: "bad-key", + baseUrl: apiServer.baseUrl, + }); + + await expect(provider.createSession()).rejects.toThrow( + "Browserbase authentication failed (401). Check your BROWSERBASE_API_KEY.", + ); + }); + + it("closes a session by sending REQUEST_RELEASE", async () => { + apiServer = await startApiServer((request) => { + if ( + request.path === "/v1/sessions/session-abc" && + request.method === "POST" + ) { + return { + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ id: "session-abc", status: "COMPLETED" }), + }; + } + return { status: 404, body: "not found" }; + }); + + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + baseUrl: apiServer.baseUrl, + }); + + await provider.closeSession("session-abc"); + + expect(apiServer.requests).toHaveLength(1); + expect(apiServer.requests[0]).toMatchObject({ + method: "POST", + path: "/v1/sessions/session-abc", + }); + expect(JSON.parse(apiServer.requests[0]!.body)).toEqual({ + status: "REQUEST_RELEASE", + }); + }); + + it("swallows close errors", async () => { + const baseUrl = await getUnusedBaseUrl(); + const provider = new BrowserbaseProvider({ + apiKey: "bb-key", + baseUrl, + }); + + await expect(provider.closeSession("session-abc")).resolves.toBeUndefined(); + }); +}); diff --git a/packages/core/src/chat/browser-viewer.svelte b/packages/core/src/chat/browser-viewer.svelte index 9d47cae..9b25cf8 100644 --- a/packages/core/src/chat/browser-viewer.svelte +++ b/packages/core/src/chat/browser-viewer.svelte @@ -3,115 +3,49 @@ closeActiveBrowser, onBrowseSessionChange, getBrowseSessionState, - type BrowseSessionEvent, } from "@office-agents/sdk"; - import { Globe, X, Minimize2, Maximize2, Square } from "lucide-svelte"; + import { Globe, Square } from "lucide-svelte"; import { onDestroy } from "svelte"; - const initialState = getBrowseSessionState(); - let session = $state(initialState); - let visible = $state(initialState.active && !!initialState.liveUrl); - let expanded = $state(true); + let active = $state(getBrowseSessionState().active); let stopping = $state(false); const unsub = onBrowseSessionChange((event) => { - session = event; - if (event.active && event.liveUrl) { - visible = true; - expanded = true; - } else { - visible = false; - stopping = false; - } + active = event.active; + if (!event.active) stopping = false; }); onDestroy(unsub); - function hidePreview() { - visible = false; - } - async function stopBrowser() { stopping = true; await closeActiveBrowser(); } - - function toggleExpand() { - expanded = !expanded; - } - - const hasLiveUrl = $derived(session.active && !!session.liveUrl); -{#if visible && hasLiveUrl} +{#if active}
-
- - Live Browser -
-
- - - -
+ + Browser session active
- {#if expanded} -
- -
- {/if} +
{/if} - -{#if hasLiveUrl && !visible} - -{/if} diff --git a/packages/core/src/chat/settings-panel.svelte b/packages/core/src/chat/settings-panel.svelte index 82c46a7..6e2db4d 100644 --- a/packages/core/src/chat/settings-panel.svelte +++ b/packages/core/src/chat/settings-panel.svelte @@ -61,6 +61,7 @@ let serperApiKey = $state(savedWeb.apiKeys.serper || ""); let exaApiKey = $state(savedWeb.apiKeys.exa || ""); let browserUseApiKey = $state(savedWeb.apiKeys.browserUse || ""); + let browserbaseApiKey = $state(savedWeb.apiKeys.browserbase || ""); let showAdvancedWebKeys = $state(false); let oauthFlow = $state( @@ -171,6 +172,7 @@ serperApiKey: string; exaApiKey: string; browserUseApiKey: string; + browserbaseApiKey: string; }>, ) { webSearchProvider = updates.searchProvider ?? webSearchProvider; @@ -181,6 +183,7 @@ serperApiKey = updates.serperApiKey ?? serperApiKey; exaApiKey = updates.exaApiKey ?? exaApiKey; browserUseApiKey = updates.browserUseApiKey ?? browserUseApiKey; + browserbaseApiKey = updates.browserbaseApiKey ?? browserbaseApiKey; saveWebConfig({ searchProvider: webSearchProvider, @@ -191,6 +194,7 @@ serper: serperApiKey, exa: exaApiKey, browserUse: browserUseApiKey, + browserbase: browserbaseApiKey, }, }); } @@ -810,6 +814,28 @@
+
+ + + +
+ {@render apiKeyField("API Key", browserbaseApiKey, (v) => { browserbaseApiKey = v; updateWebSettings({ browserbaseApiKey }); }, "bb_live_...")} +

+ Get an API key at browserbase.com/settings +

+ {#if browserbaseApiKey && browserUseApiKey} +

+ Both providers configured — Browserbase will be used. +

+ {/if} +
+
+
diff --git a/packages/sdk/src/vfs/custom-commands.ts b/packages/sdk/src/vfs/custom-commands.ts index e2f6958..71e488a 100644 --- a/packages/sdk/src/vfs/custom-commands.ts +++ b/packages/sdk/src/vfs/custom-commands.ts @@ -1,8 +1,10 @@ import { + BrowserbaseProvider, BrowserUseProvider, configureBrowseCommand, executeBrowseCommand, } from "@office-agents/browser"; +import type { BrowserProvider } from "@office-agents/browser"; import type { CustomCommand } from "just-bash/browser"; import { defineCommand } from "just-bash/browser"; import { loadPdfDocument } from "../pdf"; @@ -578,9 +580,17 @@ const imageSearchCmd: DescribedCommand = { }), }; -function getBrowserProvider(): BrowserUseProvider | null { +function getBrowserProvider(): BrowserProvider | null { const webConfig = loadWebConfig(); + const browserbaseApiKey = webConfig.apiKeys?.browserbase; + if (browserbaseApiKey) { + return new BrowserbaseProvider({ + apiKey: browserbaseApiKey, + corsProxyUrl: getProxyUrl(), + }); + } + const browserUseApiKey = webConfig.apiKeys?.browserUse; if (browserUseApiKey) { return new BrowserUseProvider({ apiKey: browserUseApiKey }); @@ -591,7 +601,7 @@ function getBrowserProvider(): BrowserUseProvider | null { const browseCmd: DescribedCommand = { promptSnippet: - "- browse [options] — Open a cloud browser session to interact with a web page. Supports navigation, screenshots, clicks, form filling, and data extraction.", + "- browse — Cloud browser. Run `browse --help` first to see all commands.", isAvailable: () => getBrowserProvider() !== null, command: defineCommand("browse", async (args, ctx) => { configureBrowseCommand({ diff --git a/packages/sdk/src/web/config.ts b/packages/sdk/src/web/config.ts index 75de01e..35e3acc 100644 --- a/packages/sdk/src/web/config.ts +++ b/packages/sdk/src/web/config.ts @@ -7,6 +7,7 @@ export interface WebConfig { brave?: string; serper?: string; browserUse?: string; + browserbase?: string; }; } From 4b4aefd258edce931610f9be676cc6cca05d7924 Mon Sep 17 00:00:00 2001 From: Li Yang <76434265+hewliyang@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:24:47 +0800 Subject: [PATCH 12/12] Stuff --- .gitignore | 3 + packages/browser/PARITY_CHECKLIST.md | 138 ++ packages/browser/package.json | 6 + packages/browser/src/browser.ts | 491 +++++- packages/browser/src/command.ts | 1211 +++++++++++++-- packages/browser/src/index.ts | 18 +- packages/browser/src/markdown.ts | 75 + packages/browser/src/page.ts | 636 +++++++- packages/browser/src/providers/index.ts | 2 +- packages/browser/tests/browser.test.ts | 112 +- packages/browser/tests/command.test.ts | 435 ++++++ packages/browser/tests/integration.test.ts | 134 ++ packages/browser/tests/integration/helpers.ts | 57 +- packages/browser/tests/providers.test.ts | 2 +- packages/core/src/chat/browser-viewer.svelte | 154 +- packages/core/src/chat/chat-controller.ts | 22 + packages/core/src/chat/settings-panel.svelte | 1314 ++++++++++------- packages/core/src/index.ts | 11 + packages/sdk/src/browse-manager.ts | 74 + packages/sdk/src/index.ts | 26 +- packages/sdk/src/provider-keys.ts | 151 ++ packages/sdk/src/runtime.ts | 4 +- packages/sdk/src/vfs/custom-commands.ts | 31 +- packages/sdk/src/web/config.ts | 18 + packages/sdk/src/web/fetch.ts | 39 +- packages/word/DESIGN.md | 304 ++++ pnpm-lock.yaml | 14 + 27 files changed, 4613 insertions(+), 869 deletions(-) create mode 100644 packages/browser/PARITY_CHECKLIST.md create mode 100644 packages/browser/src/markdown.ts create mode 100644 packages/browser/tests/command.test.ts create mode 100644 packages/sdk/src/browse-manager.ts create mode 100644 packages/sdk/src/provider-keys.ts create mode 100644 packages/word/DESIGN.md diff --git a/.gitignore b/.gitignore index 4eaacd8..3df5645 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,6 @@ npm-debug.log* *.key *.pem headless-test/ + +# Wrangler local state +.wrangler/ diff --git a/packages/browser/PARITY_CHECKLIST.md b/packages/browser/PARITY_CHECKLIST.md new file mode 100644 index 0000000..c7e94ad --- /dev/null +++ b/packages/browser/PARITY_CHECKLIST.md @@ -0,0 +1,138 @@ +# browse ↔ agent-browser parity checklist + +Scope: browser automation and agent-facing CLI ergonomics for `@office-agents/browser`'s `browse` command. + +Out of scope for now: native install/upgrade flows, daemon/dashboard/session registry, iOS/device-management, local DevTools proxying, video/trace/profiler, auth vault, and other features that depend on the Rust daemon architecture from `agent-browser`. + +## Target + +Reach ~90% parity for the browser-automation subset by closing the biggest command-surface gaps and adding command-contract tests. + +## Command parity + +### Navigation / session +- [x] `open` +- [x] `back` +- [x] `forward` +- [x] `reload` +- [x] `status` +- [x] `stop` +- [x] `connect ` +- [x] `close | quit | exit` +- [x] `close --all` (local alias for single in-process session) + +### Core actions +- [x] `click ` +- [x] `click --new-tab` +- [x] `dblclick ` +- [x] `type` (`type ` plus focused-element fallback) +- [x] `fill ` +- [x] `press ` +- [x] `key` alias for `press` +- [x] `keydown ` +- [x] `keyup ` +- [x] `keyboard type ` +- [x] `keyboard inserttext ` +- [x] `hover` +- [x] `focus` +- [x] `check` +- [x] `uncheck` +- [x] `select` +- [x] `drag ` +- [x] `upload ` + +### Scrolling / waiting +- [x] `scroll` (directional mode + low-level XY deltas) +- [x] `scrollintoview | scrollinto` +- [x] `wait ` +- [x] `wait ` +- [x] `wait --text` +- [x] `wait --url` +- [x] `wait --load` +- [x] `wait --fn` +- [x] wait state parity (`visible|hidden|attached|detached`) + +### Artifacts +- [x] `snapshot` +- [ ] `snapshot --selector` +- [x] `screenshot` (selector/path parity) +- [x] `pdf` (base64 + file output parity) +- [x] `download` (URL or selector-driven path parity) + +### Get / state +- [x] `get text` +- [x] `get html` +- [x] `get value` +- [x] `get attr` +- [x] `get title` +- [x] `get url` +- [x] `get count` +- [x] `get box` +- [x] `get styles` +- [x] `get cdp-url` +- [x] `is visible` +- [x] `is enabled` +- [x] `is checked` + +### Semantic locators +- [x] `find role` +- [x] `find text` +- [x] `find label` +- [x] `find placeholder` +- [x] `find alt` +- [x] `find title` +- [x] `find testid` +- [x] `find first` +- [x] `find last` +- [x] `find nth` + +### Mouse / keyboard namespaces +- [x] `mouse move` +- [x] `mouse down` +- [x] `mouse up` +- [x] `mouse wheel` + +### Browser settings +- [x] `set viewport` +- [x] `set device` +- [x] `set geo` +- [x] `set offline` +- [x] `set headers` +- [x] `set credentials | set auth` +- [x] `set media` (color scheme + reduced-motion parity) + +### Cookies / storage / tabs +- [x] `cookies` +- [x] `cookies set` +- [x] `cookies clear` +- [x] `storage local` +- [x] `storage session` +- [x] `tab` +- [x] `tab new` +- [x] `tab close` + +### Possible later +- [ ] `clipboard` +- [ ] `batch` +- [ ] `network ...` +- [ ] `inspect` +- [ ] `console` +- [ ] `errors` +- [ ] `highlight` + +## Test expansion plan + +### Phase 1 — command contract tests +- [x] add `tests/command.test.ts` +- [x] cover existing commands with fake `Browser` / `Page` +- [x] add tests for new aliases and error cases + +### Phase 2 — page/integration coverage +- [x] drag + upload fixtures +- [x] semantic locator integration tests +- [x] device / screenshot selector coverage +- [x] scroll direction + scroll-into-view coverage + +### Phase 3 — docs +- [ ] update `packages/browser/README.md` +- [ ] update `browse --help` output diff --git a/packages/browser/package.json b/packages/browser/package.json index b4eac7c..a7dc03e 100644 --- a/packages/browser/package.json +++ b/packages/browser/package.json @@ -35,6 +35,12 @@ "test:integration": "vitest run tests/integration.test.ts tests/integration/", "test:install-browsers": "playwright install chromium" }, + "dependencies": { + "@mozilla/readability": "^0.6.0", + "@types/turndown": "^5.0.6", + "linkedom": "^0.18.12", + "turndown": "^7.2.2" + }, "devDependencies": { "devtools-protocol": "^0.0.1602427", "playwright": "1.57.0", diff --git a/packages/browser/src/browser.ts b/packages/browser/src/browser.ts index 168196c..c03709d 100644 --- a/packages/browser/src/browser.ts +++ b/packages/browser/src/browser.ts @@ -33,19 +33,65 @@ export interface BrowserTab { active: boolean; } +export interface BrowserPreviewState { + connected: boolean; + live: boolean; + frameBase64: string | null; + tabs: BrowserTab[]; + url: string; + title: string; +} + +interface TargetInfoLike { + targetId: string; + type: string; + url: string; + title: string; +} + +type PreviewListener = (state: BrowserPreviewState) => void; + const defaultBrowserDependencies: BrowserDependencies = { connectCdp: (wsUrl, options) => CdpClient.connect(wsUrl, options), attachToFirstPage: (cdp) => Page.attachToFirstPage(cdp), attachToTarget: (cdp, targetId) => Page.attachToTarget(cdp, targetId), }; +function isTrackableTarget(target: { type?: string }): boolean { + return target.type === "page"; +} + export class Browser { private cdp: CdpClient | null = null; private provider: BrowserProvider | null = null; private session: BrowserSession | null = null; + private directCdpUrl: string | null = null; private _page: Page | null = null; private currentTargetId: string | null = null; private deps: BrowserDependencies = defaultBrowserDependencies; + private trackedTargets = new Map(); + private trackedTargetOrder: string[] = []; + private previewListeners = new Set(); + private previewState: BrowserPreviewState = { + connected: false, + live: false, + frameBase64: null, + tabs: [], + url: "", + title: "", + }; + private previewCleanup: (() => Promise) | null = null; + private previewGeneration = 0; + private attachQueue: Promise = Promise.resolve(); + private readonly onTargetCreatedBound = (params: unknown) => { + void this.handleTargetCreated(params); + }; + private readonly onTargetInfoChangedBound = (params: unknown) => { + void this.handleTargetInfoChanged(params); + }; + private readonly onTargetDestroyedBound = (params: unknown) => { + void this.handleTargetDestroyed(params); + }; private constructor() {} @@ -63,6 +109,7 @@ export class Browser { ); browser._page = await browser.deps.attachToFirstPage(browser.cdp); browser.currentTargetId = browser._page.targetId ?? null; + await browser.initialize(); } catch (err) { await browser.close(); throw err; @@ -73,12 +120,14 @@ export class Browser { static async connect(options: ConnectOptions): Promise { const browser = new Browser(); browser.deps = { ...defaultBrowserDependencies, ...options.deps }; + browser.directCdpUrl = options.cdpUrl; browser.cdp = await browser.deps.connectCdp( options.cdpUrl, options.cdpOptions, ); browser._page = await browser.deps.attachToFirstPage(browser.cdp); browser.currentTargetId = browser._page.targetId ?? null; + await browser.initialize(); return browser; } @@ -92,7 +141,7 @@ export class Browser { } get cdpUrl(): string | undefined { - return this.session?.cdpUrl; + return this.session?.cdpUrl ?? this.directCdpUrl ?? undefined; } private get cdpClient(): CdpClient { @@ -100,58 +149,407 @@ export class Browser { return this.cdp; } - private async attachToTarget(targetId: string): Promise { - const previousSessionId = this._page?.sessionId; - if (previousSessionId) { - await this.cdpClient - .send("Target.detachFromTarget", { - sessionId: previousSessionId, - }) - .catch(() => {}); - this.cdpClient.releaseSession(previousSessionId, "detached by client"); - } - const page = await this.deps.attachToTarget(this.cdpClient, targetId); - this._page = page; - this.currentTargetId = targetId; + private async initialize(): Promise { + await this.enableTargetDiscovery(); + await this.refreshTargets(); + this.installTargetListeners(); + await this.syncCurrentPageInfo(); + this.previewState.connected = true; + this.emitPreview(); + } + + private async enableTargetDiscovery(): Promise { + await this.cdpClient + .send("Target.setDiscoverTargets", { discover: true }) + .catch(() => {}); + } + + private installTargetListeners(): void { + this.cdpClient.on("Target.targetCreated", this.onTargetCreatedBound); + this.cdpClient.on( + "Target.targetInfoChanged", + this.onTargetInfoChangedBound, + ); + this.cdpClient.on("Target.targetDestroyed", this.onTargetDestroyedBound); + } + + private removeTargetListeners(): void { + if (!this.cdp) return; + this.cdp.off("Target.targetCreated", this.onTargetCreatedBound); + this.cdp.off("Target.targetInfoChanged", this.onTargetInfoChangedBound); + this.cdp.off("Target.targetDestroyed", this.onTargetDestroyedBound); + } + + private async refreshTargets(): Promise { + const currentTargetId = this.currentTargetId; + try { + const { targetInfos } = await this.cdpClient.send("Target.getTargets"); + const nextOrder: string[] = []; + const nextTargets = new Map(); + + for (const target of targetInfos) { + if (!isTrackableTarget(target)) continue; + nextOrder.push(target.targetId); + nextTargets.set(target.targetId, { + url: target.url, + title: target.title, + }); + } + + if (currentTargetId && !nextTargets.has(currentTargetId)) { + nextOrder.push(currentTargetId); + nextTargets.set(currentTargetId, { + url: this.previewState.url, + title: this.previewState.title, + }); + } + + this.trackedTargetOrder = nextOrder; + this.trackedTargets = nextTargets; + } catch { + if (currentTargetId) { + this.upsertTrackedTarget(currentTargetId, { + url: this.previewState.url, + title: this.previewState.title, + }); + } + } + this.emitPreview(); + } + + private upsertTrackedTarget( + targetId: string, + data: { url: string; title: string }, + ): void { + this.trackedTargets.set(targetId, data); + if (!this.trackedTargetOrder.includes(targetId)) { + this.trackedTargetOrder.push(targetId); + } + this.emitPreview(); + } + + private removeTrackedTarget(targetId: string): void { + this.trackedTargets.delete(targetId); + this.trackedTargetOrder = this.trackedTargetOrder.filter( + (id) => id !== targetId, + ); + this.emitPreview(); + } + + getTabsSnapshot(): BrowserTab[] { + return this.trackedTargetOrder + .filter((targetId) => this.trackedTargets.has(targetId)) + .map((targetId, index) => { + const target = this.trackedTargets.get(targetId)!; + return { + index, + targetId, + url: target.url, + title: target.title, + active: targetId === this.currentTargetId, + }; + }); + } + + getPreviewStateSnapshot(): BrowserPreviewState { + return { + ...this.previewState, + connected: this.previewState.connected && !!this.cdp, + tabs: this.getTabsSnapshot(), + url: + this.currentTargetId && this.trackedTargets.has(this.currentTargetId) + ? (this.trackedTargets.get(this.currentTargetId)?.url ?? "") + : this.previewState.url, + title: + this.currentTargetId && this.trackedTargets.has(this.currentTargetId) + ? (this.trackedTargets.get(this.currentTargetId)?.title ?? "") + : this.previewState.title, + }; + } + + subscribePreview(listener: PreviewListener): () => void { + this.previewListeners.add(listener); + listener(this.getPreviewStateSnapshot()); + if (this.previewListeners.size === 1) { + void this.restartPreview(); + } + return () => { + this.previewListeners.delete(listener); + if (this.previewListeners.size === 0) { + void this.stopPreview(); + } + }; + } + + private emitPreview(): void { + const snapshot = this.getPreviewStateSnapshot(); + for (const listener of this.previewListeners) { + try { + listener(snapshot); + } catch {} + } + } + + private async syncCurrentPageInfo(): Promise { + if (!this._page || !this.currentTargetId) return; + try { + const info = await this._page.getInfo(); + this.previewState.url = info.url; + this.previewState.title = info.title; + this.upsertTrackedTarget(this.currentTargetId, info); + return; + } catch {} + + if (!this.trackedTargets.has(this.currentTargetId)) { + this.upsertTrackedTarget(this.currentTargetId, { + url: this.previewState.url, + title: this.previewState.title, + }); + } + } + + private async activateTarget(targetId: string): Promise { await this.cdpClient .send("Target.activateTarget", { targetId }) .catch(() => {}); - return page; + } + + private async runExclusive(task: () => Promise): Promise { + const previous = this.attachQueue; + let release!: () => void; + this.attachQueue = new Promise((resolve) => { + release = resolve; + }); + await previous; + try { + return await task(); + } finally { + release(); + } + } + + private async attachToTarget(targetId: string): Promise { + return this.runExclusive(async () => { + const currentPage = this._page; + if ( + currentPage && + this.currentTargetId === targetId && + currentPage.cdpSession && + !currentPage.cdpSession.isDetached + ) { + await this.activateTarget(targetId); + await this.syncCurrentPageInfo(); + await this.restartPreview(); + return currentPage; + } + + const previousSessionId = currentPage?.sessionId; + if (previousSessionId) { + await this.cdpClient + .send("Target.detachFromTarget", { + sessionId: previousSessionId, + }) + .catch(() => {}); + this.cdpClient.releaseSession(previousSessionId, "detached by client"); + } + + const page = await this.deps.attachToTarget(this.cdpClient, targetId); + this._page = page; + this.currentTargetId = targetId; + await this.activateTarget(targetId); + await this.syncCurrentPageInfo(); + await this.restartPreview(); + this.emitPreview(); + return page; + }); + } + + private async followTarget(targetId: string): Promise { + if (!this.cdp || targetId === this.currentTargetId) return; + await this.attachToTarget(targetId).catch(() => {}); + } + + private async handleTargetCreated(params: unknown): Promise { + const target = + params && typeof params === "object" && "targetInfo" in params + ? (params.targetInfo as TargetInfoLike) + : null; + if (!target || !isTrackableTarget(target)) return; + + const alreadyTracked = this.trackedTargets.has(target.targetId); + this.upsertTrackedTarget(target.targetId, { + url: target.url, + title: target.title, + }); + + if (!alreadyTracked && target.targetId !== this.currentTargetId) { + await this.followTarget(target.targetId); + } + } + + private async handleTargetInfoChanged(params: unknown): Promise { + const target = + params && typeof params === "object" && "targetInfo" in params + ? (params.targetInfo as TargetInfoLike) + : null; + if (!target || !isTrackableTarget(target)) return; + + const alreadyTracked = this.trackedTargets.has(target.targetId); + this.upsertTrackedTarget(target.targetId, { + url: target.url, + title: target.title, + }); + + if (!alreadyTracked && target.targetId !== this.currentTargetId) { + await this.followTarget(target.targetId); + } + } + + private async handleTargetDestroyed(params: unknown): Promise { + const targetId = + params && typeof params === "object" && "targetId" in params + ? String(params.targetId) + : ""; + if (!targetId) return; + + const wasActive = targetId === this.currentTargetId; + this.removeTrackedTarget(targetId); + + if (!wasActive) return; + + const nextTargetId = this.trackedTargetOrder[0] ?? null; + if (!nextTargetId) { + this.currentTargetId = null; + this.previewState.url = ""; + this.previewState.title = ""; + this.previewState.frameBase64 = null; + this.previewState.live = false; + await this.stopPreview(); + this.emitPreview(); + return; + } + + await this.followTarget(nextTargetId); + } + + private async stopPreview(): Promise { + this.previewGeneration += 1; + const cleanup = this.previewCleanup; + this.previewCleanup = null; + if (cleanup) { + await cleanup().catch(() => {}); + } + this.previewState.live = false; + this.emitPreview(); + } + + private async restartPreview(): Promise { + if (this.previewListeners.size === 0) return; + + const generation = this.previewGeneration + 1; + this.previewGeneration = generation; + + const cleanup = this.previewCleanup; + this.previewCleanup = null; + if (cleanup) { + await cleanup().catch(() => {}); + } + + const session = this._page?.cdpSession; + if (!session || session.isDetached) { + this.previewState.live = false; + this.previewState.frameBase64 = null; + this.emitPreview(); + return; + } + + const onFrameNavigated = (params: unknown) => { + if (generation !== this.previewGeneration) return; + const frame = + params && typeof params === "object" && "frame" in params + ? (params.frame as { url?: string; parentId?: string }) + : null; + if (!frame || frame.parentId) return; + const url = frame.url ?? ""; + this.previewState.url = url; + if (this.currentTargetId) { + const current = this.trackedTargets.get(this.currentTargetId); + this.upsertTrackedTarget(this.currentTargetId, { + url, + title: current?.title ?? this.previewState.title, + }); + } + this.emitPreview(); + }; + + const onScreencastFrame = (params: unknown) => { + if (generation !== this.previewGeneration) return; + const payload = params as { data?: string; sessionId?: number } | null; + if (!payload?.data) return; + this.previewState.frameBase64 = payload.data; + this.previewState.live = true; + this.emitPreview(); + if (typeof payload.sessionId === "number") { + void session + .send("Page.screencastFrameAck", { sessionId: payload.sessionId }) + .catch(() => {}); + } + }; + + session.on("Page.frameNavigated", onFrameNavigated); + session.on("Page.screencastFrame", onScreencastFrame); + + this.previewCleanup = async () => { + session.off("Page.frameNavigated", onFrameNavigated); + session.off("Page.screencastFrame", onScreencastFrame); + if (!session.isDetached) { + await session.send("Page.stopScreencast").catch(() => {}); + } + }; + + try { + await session.send("Page.startScreencast", { + format: "jpeg", + quality: 80, + maxWidth: 1280, + maxHeight: 720, + everyNthFrame: 1, + }); + this.previewState.live = true; + } catch { + this.previewState.live = false; + } + this.emitPreview(); } async listTabs(): Promise { - const { targetInfos } = await this.cdpClient.send("Target.getTargets"); - return targetInfos - .filter((target) => target.type === "page") - .map((target, index) => ({ - index, - targetId: target.targetId, - url: target.url, - title: target.title, - active: target.targetId === this.currentTargetId, - })); + await this.refreshTargets(); + return this.getTabsSnapshot(); } async newTab(url = "about:blank"): Promise { const { targetId } = await this.cdpClient.send("Target.createTarget", { url, }); + this.upsertTrackedTarget(targetId, { url, title: "" }); await this.attachToTarget(targetId); - return this.listTabs(); + return this.getTabsSnapshot(); } async switchTab(index: number): Promise { - const tabs = await this.listTabs(); + await this.refreshTargets(); + const tabs = this.getTabsSnapshot(); const tab = tabs[index]; if (!tab) { throw new Error(`No tab at index ${index}`); } await this.attachToTarget(tab.targetId); - return this.listTabs(); + return this.getTabsSnapshot(); } async closeTab(index?: number): Promise { - const tabs = await this.listTabs(); + await this.refreshTargets(); + const tabs = this.getTabsSnapshot(); if (!tabs.length) return tabs; const targetTab = index === undefined ? tabs.find((tab) => tab.active) : tabs[index]; @@ -165,21 +563,34 @@ export class Browser { targetId: targetTab.targetId, }); - const remaining = await this.listTabs(); + this.removeTrackedTarget(targetTab.targetId); + if (targetTab.targetId === this.currentTargetId) { + this.currentTargetId = null; + } + + await this.refreshTargets(); + let remaining = this.getTabsSnapshot(); if (!remaining.length) { const { targetId } = await this.cdpClient.send("Target.createTarget", { url: "about:blank", }); + this.upsertTrackedTarget(targetId, { url: "about:blank", title: "" }); await this.attachToTarget(targetId); - return this.listTabs(); + return this.getTabsSnapshot(); + } + + if (!remaining.some((tab) => tab.active)) { + const next = remaining[Math.min(targetTab.index, remaining.length - 1)]; + await this.attachToTarget(next.targetId); + remaining = this.getTabsSnapshot(); } - const next = remaining[Math.min(targetTab.index, remaining.length - 1)]; - await this.attachToTarget(next.targetId); - return this.listTabs(); + return remaining; } async close(): Promise { + await this.stopPreview(); + this.removeTargetListeners(); if (this.cdp) { await this.cdp.close(); this.cdp = null; @@ -188,7 +599,19 @@ export class Browser { await this.provider.closeSession(this.session.sessionId).catch(() => {}); this.session = null; } + this.directCdpUrl = null; this.currentTargetId = null; this._page = null; + this.trackedTargets.clear(); + this.trackedTargetOrder = []; + this.previewState = { + connected: false, + live: false, + frameBase64: null, + tabs: [], + url: "", + title: "", + }; + this.emitPreview(); } } diff --git a/packages/browser/src/command.ts b/packages/browser/src/command.ts index eed43a2..362f3ed 100644 --- a/packages/browser/src/command.ts +++ b/packages/browser/src/command.ts @@ -1,102 +1,287 @@ -import { Browser, type BrowserOptions } from "./browser.js"; +import { + Browser, + type BrowserOptions, + type BrowserPreviewState, +} from "./browser.js"; import type { BrowserProvider } from "./providers/types.js"; -let activeBrowser: Browser | null = null; - export interface BrowseSessionEvent { active: boolean; sessionId?: string; } -type BrowseSessionListener = (event: BrowseSessionEvent) => void; - -const sessionListeners = new Set(); - -export function onBrowseSessionChange( - listener: BrowseSessionListener, -): () => void { - sessionListeners.add(listener); - return () => sessionListeners.delete(listener); -} - -export function getBrowseSessionState(): BrowseSessionEvent { - if (!activeBrowser) return { active: false }; - return { - active: true, - sessionId: activeBrowser.sessionId, - }; +export interface BrowsePreviewEvent extends BrowserPreviewState { + active: boolean; + sessionId?: string; } -function emitSessionChange(): void { - const event = getBrowseSessionState(); - for (const listener of sessionListeners) { - try { - listener(event); - } catch {} - } -} +type BrowseSessionListener = (event: BrowseSessionEvent) => void; +type BrowsePreviewListener = (event: BrowsePreviewEvent) => void; export interface BrowseCommandConfig { getProvider: () => BrowserProvider | null; writeFile?: (path: string, data: Uint8Array) => Promise; + readFile?: (path: string) => Promise; launchBrowser?: (options: BrowserOptions) => Promise; + connectBrowser?: (options: { cdpUrl: string }) => Promise; } -let config: BrowseCommandConfig | null = null; -let lifecycleCleanupInstalled = false; +export class BrowseCli { + private activeBrowser: Browser | null = null; + private readonly sessionListeners = new Set(); + private readonly previewListeners = new Set(); + private previewBridgeCleanup: (() => void) | null = null; + private previewState: BrowsePreviewEvent = { + active: false, + connected: false, + live: false, + frameBase64: null, + tabs: [], + url: "", + title: "", + }; + private lifecycleCleanupInstalled = false; + private config: BrowseCommandConfig | null = null; -async function closeAndClearActiveBrowser(): Promise { - const browser = activeBrowser; - if (!browser) return; - activeBrowser = null; - emitSessionChange(); - await browser.close().catch(() => {}); -} + constructor(config?: BrowseCommandConfig) { + if (config) this.configure(config); + } -function installLifecycleCleanup(): void { - if (lifecycleCleanupInstalled || typeof window === "undefined") { - return; + onSessionChange(listener: BrowseSessionListener): () => void { + this.sessionListeners.add(listener); + return () => this.sessionListeners.delete(listener); } - lifecycleCleanupInstalled = true; - const cleanup = () => { - void closeAndClearActiveBrowser(); - }; + onPreviewChange(listener: BrowsePreviewListener): () => void { + this.previewListeners.add(listener); + if (this.previewListeners.size === 1 && this.activeBrowser) { + this.attachPreviewBridge(); + } else { + listener(this.getPreviewState()); + } + return () => { + this.previewListeners.delete(listener); + if (this.previewListeners.size === 0) { + this.detachPreviewBridge(); + this.previewState = this.getCurrentPreviewEvent(); + } + }; + } - window.addEventListener("pagehide", cleanup); - window.addEventListener("beforeunload", cleanup); -} + getSessionState(): BrowseSessionEvent { + if (!this.activeBrowser) return { active: false }; + return { + active: true, + sessionId: this.activeBrowser.sessionId, + }; + } -export function configureBrowseCommand(cfg: BrowseCommandConfig): void { - config = cfg; - installLifecycleCleanup(); -} + getPreviewState(): BrowsePreviewEvent { + if (!this.previewBridgeCleanup) { + return this.getCurrentPreviewEvent(); + } + return this.previewState; + } + + getActiveBrowser(): Browser | null { + return this.activeBrowser; + } + + getProvider(): BrowserProvider { + const provider = this.config?.getProvider(); + if (!provider) { + throw new Error( + "No browser provider configured. Set a browser provider in settings.", + ); + } + return provider; + } + + getBrowserOrThrow(): Browser { + if (!this.activeBrowser) { + throw new Error("No browser session. Run 'browse open ' first."); + } + return this.activeBrowser; + } + + setBrowser(browser: Browser | null): void { + this.setActiveBrowser(browser); + } -function getProvider(): BrowserProvider { - const provider = config?.getProvider(); - if (!provider) { - throw new Error( - "No browser provider configured. Set a browser provider in settings.", + getLaunchBrowser(): (options: BrowserOptions) => Promise { + return this.config?.launchBrowser ?? Browser.launch; + } + + getConnectBrowser(): (options: { cdpUrl: string }) => Promise { + return ( + this.config?.connectBrowser ?? ((options) => Browser.connect(options)) ); } - return provider; + + getReadFile(): ((path: string) => Promise) | undefined { + return this.config?.readFile; + } + + getWriteFile(): + | ((path: string, data: Uint8Array) => Promise) + | undefined { + return this.config?.writeFile; + } + + configure(config: BrowseCommandConfig): void { + this.config = config; + this.installLifecycleCleanup(); + } + + async switchTab(index: number): Promise { + if (!this.activeBrowser) { + throw new Error("No browser session. Run 'browse open ' first."); + } + await this.activeBrowser.switchTab(index); + } + + async closeActiveBrowser(): Promise { + const browser = this.activeBrowser; + if (!browser) return; + this.setActiveBrowser(null); + await browser.close().catch(() => {}); + } + + async dispose(): Promise { + this.config = null; + await this.closeActiveBrowser(); + } + + private emitSessionChange(): void { + const event = this.getSessionState(); + for (const listener of this.sessionListeners) { + try { + listener(event); + } catch {} + } + } + + private getCurrentPreviewEvent( + browser: Browser | null = this.activeBrowser, + ): BrowsePreviewEvent { + if (!browser) { + return { + active: false, + connected: false, + live: false, + frameBase64: null, + tabs: [], + url: "", + title: "", + }; + } + + return { + active: true, + sessionId: browser.sessionId, + ...browser.getPreviewStateSnapshot(), + }; + } + + private emitPreviewChange(event = this.getPreviewState()): void { + this.previewState = event; + for (const listener of this.previewListeners) { + try { + listener(event); + } catch {} + } + } + + private attachPreviewBridge(): void { + if ( + this.previewBridgeCleanup || + this.previewListeners.size === 0 || + !this.activeBrowser + ) { + return; + } + + this.previewBridgeCleanup = this.activeBrowser.subscribePreview((state) => { + this.emitPreviewChange({ + active: true, + sessionId: this.activeBrowser?.sessionId, + ...state, + }); + }); + } + + private detachPreviewBridge(): void { + if (!this.previewBridgeCleanup) return; + this.previewBridgeCleanup(); + this.previewBridgeCleanup = null; + } + + private setActiveBrowser(browser: Browser | null): void { + this.detachPreviewBridge(); + this.activeBrowser = browser; + this.emitSessionChange(); + if (this.activeBrowser && this.previewListeners.size > 0) { + this.attachPreviewBridge(); + } else { + this.emitPreviewChange(this.getCurrentPreviewEvent(browser)); + } + } + + private installLifecycleCleanup(): void { + if (this.lifecycleCleanupInstalled || typeof window === "undefined") { + return; + } + this.lifecycleCleanupInstalled = true; + + const cleanup = () => { + void this.closeActiveBrowser(); + }; + + window.addEventListener("pagehide", cleanup); + window.addEventListener("beforeunload", cleanup); + } + + async executeCommand( + args: string[], + ): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return executeBrowseCommandWith(this, args); + } } const HELP = `Usage: browse [options] +Tips: + The browse session stays alive across commands in the same shell/runtime. + Chain commands in one bash call when possible, e.g.: + browse open hewliyang.com && browse markdown hewliyang.md + For harvesting multiple URLs, prefer tabs in one shell call, e.g.: + browse open site1.com && browse tab new site2.com && browse tab 0 && browse markdown site1.md && browse tab 1 && browse markdown site2.md + Do not chain snapshot refs blindly; read the snapshot output first, then use refs like @e2. + Core: open [--wait=load|domcontentloaded|networkidle] [--timeout=ms] + goto + navigate + connect snapshot [-i|--interactive] [-c|--compact] [-d N|--depth=N] - click + click [--new-tab] dblclick + focus + type + type Type into the currently focused element fill [--no-enter] - type [--delay=ms] press + key + keydown + keyup + keyboard type + keyboard inserttext hover | hover - focus check uncheck select + drag + upload eval Get: @@ -107,6 +292,8 @@ Get: get value get attr get count + get box + get styles get cdp-url State: @@ -114,6 +301,18 @@ State: is enabled is checked +Find: + find role [value] [--name ] [--exact] + find text + find label