diff --git a/.gitignore b/.gitignore
index c4432f8..e8752ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ mcp-comet.config.json
.claude/settings.local.json
.claude/plans
-tools/
+/tools/
# Internal Documentation & Plans (local only)
docs/claude-code-guide/
docs/plans/
@@ -28,4 +28,5 @@ coverage/
video/
.playwright-mcp/
# worktrees
-.worktrees/*
\ No newline at end of file
+.worktrees/*
+.claude/worktrees/
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 9d7544b..f100c87 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,6 +1,6 @@
# AGENTS.md — MCP Comet
-MCP Comet is a TypeScript MCP (Model Context Protocol) server that automates the Perplexity Comet browser via Chrome DevTools Protocol (CDP). It exposes 13 tools over stdio for prompting, polling, screenshots, tab management, source extraction, and mode switching.
+MCP Comet is a TypeScript MCP (Model Context Protocol) server that automates the Perplexity Comet browser via Chrome DevTools Protocol (CDP). It exposes 14 tools over stdio for prompting, polling, screenshots, tab management, source extraction, and mode switching.
## Commands
@@ -27,7 +27,7 @@ Four layers, top to bottom:
MCP Tools (server.ts) → UI Automation (src/ui/) → CDP Transport (src/cdp/) → Comet Browser
```
-- **server.ts** — Single file defining all 13 tools via `McpServer.tool()`. Contains `startServer()`, tool definitions, Zod schemas, and all handler logic. This is the main file to edit when adding/modifying tools.
+- **server.ts** — Single file defining all 14 tools via `McpServer.tool()`. Contains `startServer()`, tool definitions, Zod schemas, and all handler logic. This is the main file to edit when adding/modifying tools.
- **src/ui/** — Functions that return JavaScript strings (evaluated in the browser via `Runtime.evaluate`). Each `build*Script()` function returns a self-contained IIFE string. **Do not** pass complex objects — everything must serialize to a JS expression.
- **src/cdp/client.ts** — `CDPClient` singleton (`CDPClient.getInstance()`) managing WebSocket connections, auto-reconnect with exponential backoff, and an operation queue (`enqueue()`) to serialize concurrent CDP calls.
- **src/selectors/** — Version-keyed CSS selector sets (`SelectorSet`). `v145.ts` is the current set. New Comet/Chrome versions get a new `v{version}.ts` file registered in `index.ts`. Unknown versions fall back to the latest known set.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7cba7c6..75e3924 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,41 @@
# Changelog
+## [1.2.0] - 2026-04-14
+
+### Added
+
+- `comet_approve_action` tool: click primary or cancel buttons on Comet permission/confirmation prompts (14th tool)
+- `awaiting_action` agent status: detects when Comet is waiting for user confirmation before executing an action
+- `actionPrompt` and `actionButtons` fields in poll/wait responses when a permission prompt is active
+- `COMET_OVERRIDE_VIEWPORT` configuration option (default: `false`) to control whether the browser viewport is overridden on connect
+- Dynamic viewport detection via `Page.getLayoutMetrics()` for accurate screenshots regardless of window size
+- Response truncation marker at 8000 chars guiding users to `comet_get_page_content` for full text
+- `ACTION_BANNER` selector for detecting Comet's `@container/banner` permission prompt containers
+
+### Fixed
+
+- **Permission prompt detection**: `comet_wait` and `comet_poll` now correctly report `awaiting_action` status instead of `completed` when Comet shows a permission prompt
+- **Browser window resize**: `Emulation.setDeviceMetricsOverride` no longer resizes the browser window by default — viewport override is now opt-in via `COMET_OVERRIDE_VIEWPORT=true`
+- **Race condition — concurrent connects**: Multiple simultaneous `ensureConnected()` calls are now deduplicated via a shared promise guard
+- **Race condition — concurrent asks**: `comet_ask` now uses a mutex to prevent concurrent prompt submissions from corrupting each other
+- **Mode read navigation**: `comet_mode` (read) no longer navigates away from the current page when already on the Perplexity home page
+- **Mode switch reliability**: `comet_mode` (switch) now invokes React's `onMouseDown` handler directly via fiber props instead of `item.click()`, which silently failed because Comet's typeahead menu items use `onMouseDown`, not `onClick`
+- **Editor clearing**: Mode switching now clears residual Lexical editor text (select-all + delete + backspace safety net) instead of relying on page reload, which did not clear Lexical state
+- **Mode read editor clearing**: `comet_mode` (read) now also clears existing editor text before typing `/` and presses Escape + Backspace between retries to prevent `/` character accumulation
+- **Prose filter over-trimming**: Short questions under 20 chars (previously 100) are no longer excluded from response text
+- **Submit verification**: `buildSubmitPromptScript` now focuses the input element before submitting and verifies the input was cleared
+- **Gitignore scope**: `tools/` pattern changed to `/tools/` (root-level only) to unblock staging test files in `tests/unit/tools/`
+- **Status parsing hardening**: `parseAgentStatus` now validates all fields with defaults instead of raw casting — prevents `TypeError` crashes when the browser returns incomplete status data
+- **Empty prompt rejection**: `comet_ask` now requires non-empty prompts via `z.string().min(1)` validation
+- **Switch tab validation**: `comet_switch_tab` returns a clear error when called with no `tabId` or `title` instead of showing "undefined"
+- **Boundary value guards**: `comet_wait` and `comet_get_page_content` now handle `timeout=0` and `maxLength=0` gracefully instead of producing confusing behavior
+
+### Changed
+
+- 14 tools total (was 13)
+- Removed unused `AgentState` enum and dead `AgentStatus` interface — replaced with canonical `AgentStatus` type with `AgentStatusValue` union (`'idle' | 'working' | 'completed' | 'awaiting_action'`)
+- Removed dead `timeout` parameter from `comet_ask` schema (was accepted but never used)
+
## [1.1.2] - 2026-04-11
### Fixed
diff --git a/README.md b/README.md
index 96abac2..7de2c18 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
- 7 modes · 13 tools · zero-friction setup · full browser control
+ 7 modes · 14 tools · zero-friction setup · full browser control
Quick Start ·
Tool Reference ·
@@ -145,6 +145,7 @@ mcp-comet call comet_get_sources
- `comet_poll`: returns live status and partial progress.
- `comet_wait`: waits for completion and returns the full response.
- `comet_stop`: stops a running task.
+- `comet_approve_action`: approves or cancels Comet permission prompts.
### Query
@@ -182,6 +183,7 @@ Full reference: [docs/tools.md](docs/tools.md)
| Multi-perspective debate | `comet_mode(model-council)` -> `comet_ask` -> `comet_wait` |
| Visual evidence capture | `comet_screenshot` -> pass image into your vision-capable model |
| Resume old investigations | `comet_list_conversations` -> `comet_open_conversation` -> `comet_get_page_content` |
+| Action with permission prompt | `comet_ask` -> `comet_wait` -> `comet_approve_action` |
---
diff --git a/docs/architecture.md b/docs/architecture.md
index a50512f..3e658a0 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -3,7 +3,7 @@
MCP Comet uses a four-layer architecture:
```
-MCP Tools (13 tools)
+MCP Tools (14 tools)
|
UI Automation (selectors, input, status, extraction, navigation)
|
@@ -14,7 +14,7 @@ Perplexity Comet Browser (Chromium)
## MCP Layer
-MCP Comet exposes 13 MCP tools grouped by function:
+MCP Comet exposes 14 MCP tools grouped by function:
**Session**
@@ -31,6 +31,7 @@ MCP Comet exposes 13 MCP tools grouped by function:
|------|---------|
| `comet_ask` | Send a prompt and wait for the response |
| `comet_mode` | Switch Comet focus mode |
+| `comet_approve_action` | Approve or cancel permission prompts |
**Content**
@@ -57,7 +58,7 @@ Selectors are ordered arrays of CSS selectors. Each strategy tries selectors in
### Typeahead Mode Detection
-When switching modes via `comet_mode`, MCP Comet reads the SVG icon `href` from typeahead menu items with the `.bg-subtle` class. Icon IDs map to mode names:
+When switching modes via `comet_mode`, MCP Comet opens the typeahead menu by typing `/` into the Lexical editor via `document.execCommand('insertText')`. It then reads the SVG icon `href` from typeahead menu items with the `.bg-subtle` class to detect the active mode. Icon IDs map to mode names:
| Icon ID | Mode |
|---------|------|
@@ -70,6 +71,8 @@ When switching modes via `comet_mode`, MCP Comet reads the SVG icon `href` from
If icon detection fails, the system falls back to URL-based mode detection.
+For mode switching, the target menu item's React `onMouseDown` prop is invoked directly via fiber props (`__reactProps$`). This is necessary because Comet's typeahead items use React's `onMouseDown` handler, not the standard DOM `click()` method. Before opening the typeahead, the editor is cleared of any existing text using select-all + delete + backspace, since Lexical editor state persists across page navigations.
+
### Collapsed Citation Expansion
Sources with collapsed citation text (matching the pattern `^\w+\+\d+$`, such as "arXiv+3") do not expose a URL directly. MCP Comet clicks these elements to reveal the full source URL, then re-extracts sources in a second pass. This two-pass strategy ensures complete source collection.
diff --git a/docs/configuration.md b/docs/configuration.md
index 53c64a2..79c9dba 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -27,6 +27,7 @@ The three most commonly used variables:
| `COMET_USER_DATA_DIR` | null | Path to a custom Chrome user data directory. Use this to persist cookies, local storage, and other browser profile data across sessions (for example, `~/.config/mcp-comet/chrome-profile`). When unset, Comet uses a temporary profile each launch. |
| `COMET_WINDOW_WIDTH` | 1440 | Browser window width in pixels at launch. Controls the initial viewport dimensions of the Comet browser window. |
| `COMET_WINDOW_HEIGHT` | 900 | Browser window height in pixels at launch. Controls the initial viewport dimensions of the Comet browser window. |
+| `COMET_OVERRIDE_VIEWPORT` | false | Override the browser viewport via CDP `setDeviceMetricsOverride` on connect. **Warning:** enabling this physically resizes the browser window. When disabled (default), screenshots use the actual viewport dimensions. |
## Priority
@@ -66,6 +67,9 @@ Create `mcp-comet.config.json` in your project root. Keys use camelCase (not the
"windowWidth": 1440,
"windowHeight": 900,
+ // Override browser viewport via CDP on connect (resizes the window)
+ "overrideViewport": false,
+
// Reconnection behavior
"maxReconnectAttempts": 5,
"maxReconnectDelay": 5000,
diff --git a/docs/contributing.md b/docs/contributing.md
index d690047..fb71d17 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -35,7 +35,7 @@ Run `npm run lint` before committing to catch style issues early.
```
src/
cli.ts -- CLI entry point (start, call, detect commands)
- server.ts -- MCP server with 13 tool handlers
+ server.ts -- MCP server with 14 tool handlers
config.ts -- Configuration loading + validation
errors.ts -- 9 error subclasses with codes
index.ts -- Library entry point (exports startServer)
diff --git a/docs/integration.md b/docs/integration.md
index cb51a5a..636458e 100644
--- a/docs/integration.md
+++ b/docs/integration.md
@@ -51,7 +51,7 @@ Claude Code reads MCP server configuration from `~/.claude/claude_desktop_config
}
```
-After updating the configuration file, restart Claude Code. MCP Comet will appear as an MCP server exposing 13 tools.
+After updating the configuration file, restart Claude Code. MCP Comet will appear as an MCP server exposing 14 tools.
**Verify:** Ask Claude "What MCP tools do you have available?" The list should include `comet_connect`, `comet_ask`, `comet_wait`, and the other tools documented in [tools.md](tools.md).
diff --git a/docs/tools.md b/docs/tools.md
index 11de2fa..440aa84 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -1,6 +1,6 @@
# Tool Reference
-This document provides a complete reference for all 13 MCP tools exposed by the MCP Comet server. Each tool entry includes a description, parameter table, response format, CLI example, and implementation notes.
+This document provides a complete reference for all 14 MCP tools exposed by the MCP Comet server. Each tool entry includes a description, parameter table, response format, CLI example, and implementation notes.
Tools are consumed via the Model Context Protocol (MCP) stdio transport. All tools automatically connect to or launch the Comet browser if no active session exists (see [Connection Lifecycle](#connection-lifecycle)).
@@ -21,9 +21,10 @@ Tools are consumed via the Model Context Protocol (MCP) stdio transport. All too
11. [comet_list_conversations](#11-comet_list_conversations)
12. [comet_open_conversation](#12-comet_open_conversation)
13. [comet_get_page_content](#13-comet_get_page_content)
-14. [Common Patterns](#common-patterns)
-15. [Error Responses](#error-responses)
-16. [Connection Lifecycle](#connection-lifecycle)
+14. [comet_approve_action](#14-comet_approve_action)
+15. [Common Patterns](#common-patterns)
+16. [Error Responses](#error-responses)
+17. [Connection Lifecycle](#connection-lifecycle)
---
@@ -70,41 +71,30 @@ Connected to Comet on port {port} (Chrome/{version}), target {targetId}
## 2. comet_ask
-Send a prompt to Perplexity Comet and poll until the agent responds or times out.
+Send a prompt to Perplexity Comet and return immediately.
-This is the primary interaction tool. It types the prompt into the Comet input field, submits it, and polls for the response using a non-blocking loop with stall detection. The response includes any agent steps and the final answer text.
+This is the primary interaction tool. It types the prompt into the Comet input field, submits it, and returns immediately. Use `comet_poll` or `comet_wait` to retrieve the response.
### Parameters
-| Parameter | Type | Required | Default | Description |
-|-----------|---------|----------|----------------------------------|--------------------------------------------------|
-| `prompt` | string | Yes | | The question or instruction to send |
-| `newChat` | boolean | No | `false` | Start a fresh chat before sending the prompt |
-| `timeout` | number | No | `180000` (`COMET_RESPONSE_TIMEOUT`) | Maximum wait time in ms for the agent response |
+| Parameter | Type | Required | Default | Description |
+|-----------|---------|----------|---------|----------------------------------------------|
+| `prompt` | string | Yes | | The question or instruction to send |
+| `newChat` | boolean | No | `false` | Start a fresh chat before sending the prompt |
### Response
-**Completed (within timeout):**
+**Success:**
```
-{response text}
-
-Steps:
- - {step 1}
- - {step 2}
+Prompt submitted successfully. Use comet_poll to track status or comet_wait to block until completion.
```
-**Timeout (agent still working):**
+**Concurrent ask blocked:**
```
-Agent is still working. Use comet_poll to check status.
-
-Steps so far:
- - {step 1}
-
-Partial response:
-{partial text}
+Another prompt is currently being submitted. Please wait and try again.
```
-**Error codes:** `CDP_CONNECTION_FAILED`, `EVALUATION_FAILED`, `TIMEOUT`
+**Error codes:** `CDP_CONNECTION_FAILED`, `EVALUATION_FAILED`
### CLI Example
@@ -115,16 +105,14 @@ Partial response:
```json
{
"prompt": "Compare GPT-4 and Claude 3.5 on reasoning benchmarks",
- "newChat": true,
- "timeout": 300000
+ "newChat": true
}
```
### Notes
-- **Stall detection:** If the response length does not grow for 10 consecutive polls, the tool breaks out of the polling loop and returns whatever has been collected so far.
-- **Response stabilization:** After the agent transitions to `idle` or `completed`, the tool performs up to 5 additional settle polls (1 second apart) to ensure the response text has finished rendering.
-- **Pre-send state capture:** Before typing, the tool captures the current prose count and last prose text to accurately detect new responses versus pre-existing content.
+- **Immediate return:** The tool submits the prompt and returns right away — it does not wait for the response. Use `comet_poll` to check progress or `comet_wait` to block until completion.
+- **Concurrency guard:** Only one `comet_ask` can run at a time. If a second call is made while the first is still submitting, it returns immediately with a "try again" message.
- **newChat behavior:** When `true`, closes all extra tabs, disconnects, reconnects, and navigates to the Perplexity home page before sending the prompt.
- If `newChat` is `false` and the main tab differs from the current target, the tool automatically switches to the main tab.
@@ -134,7 +122,7 @@ Partial response:
Poll the current agent status, steps, and response content.
-Returns a snapshot of the Comet agent state. Use this to check progress after `comet_ask` times out, or to implement custom polling logic in your own agent loop.
+Returns a snapshot of the Comet agent state. Use this to check progress after `comet_ask`, or to implement custom polling logic in your own agent loop.
### Parameters
@@ -145,25 +133,29 @@ None.
**Success:**
```json
{
- "status": "working" | "idle" | "completed",
+ "status": "working" | "idle" | "completed" | "awaiting_action",
"steps": ["step 1", "step 2"],
"currentStep": "current step text",
"response": "agent response text so far",
"hasStopButton": true,
"hasLoadingSpinner": true,
- "proseCount": 3
+ "proseCount": 3,
+ "actionPrompt": "Create a new issue on GitHub?",
+ "actionButtons": ["Create Issue", "Cancel"]
}
```
| Field | Type | Description |
|---------------------|----------|------------------------------------------------------------|
-| `status` | string | Agent status: `"working"`, `"idle"`, or `"completed"` |
+| `status` | string | Agent status: `"working"`, `"idle"`, `"completed"`, or `"awaiting_action"` |
| `steps` | string[] | List of completed step descriptions |
| `currentStep` | string | Currently executing step (may be empty) |
-| `response` | string | Response text extracted so far (may be partial) |
+| `response` | string | Response text extracted so far (may be partial). Truncated at 8000 chars with a marker to use `comet_get_page_content` for full text. |
| `hasStopButton` | boolean | Whether the stop/cancel button is visible |
| `hasLoadingSpinner` | boolean | Whether a loading spinner is visible |
| `proseCount` | number | Number of prose elements detected on the page |
+| `actionPrompt` | string | Permission prompt text (only when `status` is `"awaiting_action"`) |
+| `actionButtons` | string[] | Available action button labels (only when `status` is `"awaiting_action"`) |
### CLI Example
@@ -183,7 +175,7 @@ None.
Poll until the current agent finishes responding and return the full response.
-Designed to be used after `comet_ask` times out. It continues polling the agent status until the response completes, another timeout is reached, or stall detection triggers.
+Designed to be used after `comet_ask` to wait for the full response. It continues polling the agent status until the response completes, a timeout is reached, or stall detection triggers.
### Parameters
@@ -213,6 +205,17 @@ Partial response:
{partial text}
```
+**Awaiting action (permission prompt):**
+```
+⚠️ Comet is awaiting your permission.
+
+Prompt: Create a new issue on GitHub?
+
+Available actions: Create Issue, Cancel
+
+Use comet_approve_action to approve or cancel the action.
+```
+
### CLI Example
```json
@@ -227,8 +230,9 @@ Partial response:
- **Stall detection:** Breaks out of the polling loop if the response length does not grow for 10 consecutive polls.
- **Response stabilization:** After the agent transitions to `idle` or `completed`, performs up to 5 settle polls (1 second apart) to ensure the response has finished rendering.
-- Default timeout is 120 seconds (2 minutes), shorter than `comet_ask` default of 180 seconds.
+- Default timeout is 120 seconds (2 minutes).
- Returns `"Agent completed with no visible response."` if the agent finishes but no response text was found.
+- Response text is truncated at 8000 characters. If the response is truncated, a marker will indicate this — use `comet_get_page_content` to retrieve the full text.
---
@@ -378,6 +382,7 @@ Switch to computer use:
- **Mode switching** only works on the home page or a new chat. The tool automatically navigates to `https://www.perplexity.ai` before attempting a switch.
- The tool opens the slash-command typeahead menu by typing `/` in the input field, then selects the desired mode from the dropdown.
- Up to 10 retry attempts for mode switching in case the typeahead menu does not appear immediately.
+- When querying the current mode, the tool uses up to 5 attempts (separate from the 10 used for switching).
- When querying, the tool first checks the URL for the `computer` mode indicator, then falls back to opening the typeahead menu to read the active mode.
---
@@ -446,6 +451,11 @@ At least one of `tabId` or `title` must be provided. If both are given, `tabId`
### Response
+**No criteria provided:**
+```
+Provide at least one of tabId or title.
+```
+
**Success:**
```
Switched to tab [{id}] {title} — {url}
@@ -645,11 +655,71 @@ Title: {page title}
---
+## 14. comet_approve_action
+
+Click an action button on a Comet permission/confirmation prompt.
+
+When Comet asks for permission to execute an action (e.g., creating a GitHub issue, sending an email), the agent enters the `awaiting_action` state. Use this tool to approve or cancel the pending action.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|--------|----------|------------|------------------------------------------------------------------------------------------------|
+| `action` | enum | No | `"primary"` | Which button to click: `"primary"` (approve/confirm) or `"cancel"` (dismiss the prompt). |
+
+### Response
+
+**Approved:**
+```
+Action approved: clicked "Create Issue" button.
+```
+
+**Cancelled:**
+```
+Action cancelled: clicked "Cancel" button.
+```
+
+**No banner found:**
+```
+No action banner found. The agent may not be awaiting an action.
+```
+
+**Fallback (no bg-button-bg found):**
+```
+Action approved: clicked "Create Issue" button. (fallback: no bg-button-bg found, clicked first non-cancel button)
+```
+
+### CLI Example
+
+Approve the action:
+```json
+{}
+```
+
+```json
+{ "action": "primary" }
+```
+
+Cancel the action:
+```json
+{ "action": "cancel" }
+```
+
+### Notes
+
+- This tool should be called after `comet_wait` or `comet_poll` returns `status: "awaiting_action"`.
+- The tool detects action banners via the `@container/banner` CSS selector used by Comet's UI.
+- Primary buttons are identified by the `bg-button-bg` CSS class (filled/action style).
+- Cancel buttons are identified by the `border-subtle` CSS class or the text "Cancel".
+- If no primary button with `bg-button-bg` is found, the tool falls back to clicking the first non-cancel button.
+
+---
+
## Common Patterns
### 1. Ask and Wait (Simple)
-Use `comet_ask` with the default timeout (180 seconds). This handles most queries in a single call.
+Use `comet_ask` to submit the prompt, then `comet_wait` to block until the response is ready. This handles most queries in two calls.
```json
{ "prompt": "What is the current state of fusion energy research?" }
@@ -657,28 +727,28 @@ Use `comet_ask` with the default timeout (180 seconds). This handles most querie
### 2. Ask + Poll (Custom Loop)
-Use `comet_ask` with a short timeout, then loop `comet_poll` in your agent to implement custom progress reporting or conditional logic.
+Use `comet_ask` to submit the prompt, then loop `comet_poll` in your agent to implement custom progress reporting or conditional logic.
```json
-{ "prompt": "Deep research on AI safety", "timeout": 30000 }
+{ "prompt": "Deep research on AI safety" }
```
Then poll:
```json
{}
```
-(call `comet_poll` repeatedly until `status` is `"completed"` or `"idle"`)
+(call `comet_poll` repeatedly until `status` is `"completed"`, `"idle"`, or `"awaiting_action"`)
### 3. Ask + Wait (Two-Phase)
-Use `comet_ask` (which may timeout for long-running queries), then `comet_wait` to collect the full result. This is useful when you want to start a query and check back later.
+Use `comet_ask` to submit the prompt, then `comet_wait` to collect the full result. This is useful when you want to start a query and check back later.
Step 1:
```json
{ "prompt": "Write a comprehensive analysis of global semiconductor supply chains" }
```
-Step 2 (if Step 1 times out):
+Step 2:
```json
{ "timeout": 300000 }
```
@@ -707,6 +777,27 @@ Use `comet_ask` to get a response, then `comet_get_sources` to extract the cited
{}
```
+### 6. Action Approval (Permission Prompts)
+
+When Comet asks for permission to perform an action (e.g., "Create a new issue on GitHub"), use the approval flow:
+
+Step 1 — ask a task that triggers a permission prompt:
+```json
+{ "prompt": "Create a new issue on GitHub about the build failure" }
+```
+
+Step 2 — `comet_wait` returns `awaiting_action` status with prompt text and available buttons.
+
+Step 3 — approve or cancel:
+```json
+{ "action": "primary" }
+```
+
+Or cancel:
+```json
+{ "action": "cancel" }
+```
+
---
## Error Responses
@@ -773,12 +864,16 @@ All tools call `ensureConnected()` before executing, which auto-connects if no a
Connection behavior is controlled by these configuration values (see [Configuration](configuration.md)):
-| Setting | Default | Description |
-|--------------------------|----------|----------------------------------------------|
-| `COMET_PORT` | `9222` | CDP debug port |
-| `COMET_PATH` | auto | Path to Comet executable |
-| `COMET_TIMEOUT` | `30000` | Comet launch timeout in ms |
-| `COMET_POLL_INTERVAL` | `1000` | Status poll interval in ms |
-| `COMET_MAX_RECONNECT` | `5` | Maximum reconnection attempts |
-| `COMET_RECONNECT_DELAY`| `5000` | Maximum reconnection backoff delay in ms |
+| Setting | Default | Description |
+|----------------------------|----------|----------------------------------------------|
+| `COMET_PORT` | `9222` | CDP debug port |
+| `COMET_PATH` | auto | Path to Comet executable |
+| `COMET_TIMEOUT` | `30000` | Comet launch timeout in ms |
+| `COMET_POLL_INTERVAL` | `1000` | Status poll interval in ms |
+| `COMET_MAX_RECONNECT` | `5` | Maximum reconnection attempts |
+| `COMET_RECONNECT_DELAY` | `5000` | Maximum reconnection backoff delay in ms |
+| `COMET_WINDOW_WIDTH` | `1440` | Browser window width in pixels |
+| `COMET_WINDOW_HEIGHT` | `900` | Browser window height in pixels |
+| `COMET_OVERRIDE_VIEWPORT` | `false` | Override viewport via CDP (resizes window) |
+| `COMET_USER_DATA_DIR` | null | Custom Chrome user data directory |
diff --git a/src/cdp/client.ts b/src/cdp/client.ts
index a10069a..c82bfae 100644
--- a/src/cdp/client.ts
+++ b/src/cdp/client.ts
@@ -89,16 +89,18 @@ export class CDPClient {
await this.criClient.Page.enable()
await this.criClient.Runtime.enable()
- // Set fixed viewport for consistent selectors and screenshots
- try {
- await this.criClient.Emulation.setDeviceMetricsOverride({
- width: this.config.windowWidth,
- height: this.config.windowHeight,
- deviceScaleFactor: 1,
- mobile: false,
- })
- } catch {
- this.logger.debug('Could not set viewport metrics')
+ // Only override viewport when explicitly enabled (resizes the browser window)
+ if (this.config.overrideViewport) {
+ try {
+ await this.criClient.Emulation.setDeviceMetricsOverride({
+ width: this.config.windowWidth,
+ height: this.config.windowHeight,
+ deviceScaleFactor: 1,
+ mobile: false,
+ })
+ } catch {
+ this.logger.debug('Could not set viewport metrics')
+ }
}
this.state.connected = true
@@ -159,20 +161,28 @@ export class CDPClient {
})
}
+ /** Hard-reload the current page (clears Lexical editor state). */
+ async reload(): Promise {
+ return this.enqueue(async () => {
+ await this.withAutoReconnect(async () => {
+ if (!this.criClient) throw new CDPConnectionError('Not connected')
+ await this.criClient.Page.enable()
+ await this.criClient.Page.reload({ ignoreCache: true })
+ await this.criClient.Page.loadEventFired()
+ })
+ })
+ }
+
async screenshot(format: 'png' | 'jpeg' = 'png'): Promise {
return this.enqueue(async () => {
await this.ensureHealthyConnection()
return await this.withAutoReconnect(async () => {
if (!this.criClient) throw new CDPConnectionError('Not connected')
await this.criClient.Page.enable()
- // Use explicit clip to avoid 0-width viewport issues (Chrome 145+)
- const clip = {
- x: 0,
- y: 0,
- width: this.config.windowWidth,
- height: this.config.windowHeight,
- scale: 1,
- }
+
+ // Get actual viewport dimensions instead of assuming config values
+ const clip = await this.getViewportClip()
+
const { data } = await Promise.race([
this.criClient.Page.captureScreenshot({ format, clip }),
new Promise((_, reject) =>
@@ -184,6 +194,22 @@ export class CDPClient {
})
}
+ /** Get viewport clip dimensions from the actual page layout. */
+ private async getViewportClip(): Promise<{ x: number; y: number; width: number; height: number; scale: number }> {
+ if (!this.criClient) return { x: 0, y: 0, width: this.config.windowWidth, height: this.config.windowHeight, scale: 1 }
+ try {
+ const metrics = await this.criClient.Page.getLayoutMetrics()
+ // cssLayoutViewport has the actual viewport size
+ const viewport = (metrics as unknown as Record>).cssLayoutViewport
+ if (viewport && viewport.clientWidth > 0 && viewport.clientHeight > 0) {
+ return { x: 0, y: 0, width: viewport.clientWidth, height: viewport.clientHeight, scale: 1 }
+ }
+ } catch {
+ this.logger.debug('Could not get layout metrics, using config dimensions')
+ }
+ return { x: 0, y: 0, width: this.config.windowWidth, height: this.config.windowHeight, scale: 1 }
+ }
+
async evaluate(expression: string): Promise {
if (!this.criClient) throw new CDPConnectionError('Not connected')
return this.criClient.Runtime.evaluate({
diff --git a/src/config.ts b/src/config.ts
index 9d34b76..bd810f5 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -12,6 +12,7 @@ const DEFAULTS: CometConfig = {
screenshotQuality: 80,
windowWidth: 1440,
windowHeight: 900,
+ overrideViewport: false,
maxReconnectAttempts: 5,
maxReconnectDelay: 5000,
pollInterval: 1000,
@@ -143,6 +144,10 @@ export function loadConfig(overrides?: Partial): CometConfig {
const userDataDirEnv = env('COMET_USER_DATA_DIR')
if (userDataDirEnv !== undefined) envConfig.userDataDir = userDataDirEnv
+ const overrideViewportEnv = env('COMET_OVERRIDE_VIEWPORT')
+ if (overrideViewportEnv !== undefined)
+ envConfig.overrideViewport = overrideViewportEnv === 'true' || overrideViewportEnv === '1'
+
return validatedConfig({
...DEFAULTS,
...fileConfig,
diff --git a/src/prose-filter.ts b/src/prose-filter.ts
index ad6dfc5..73a0e35 100644
--- a/src/prose-filter.ts
+++ b/src/prose-filter.ts
@@ -47,7 +47,7 @@ export function buildFindProseJS(): string {
var isUI = false;
for (var u = 0; u < uiTexts.length; u++) { if (text.indexOf(uiTexts[u]) === 0) { isUI = true; break; } }
if (isUI) continue;
- if (text.length < 100 && text.indexOf('?') === text.length - 1) continue;
+ if (text.length < 20 && text.indexOf('?') === text.length - 1) continue;
results.push(text);
}
results`
diff --git a/src/selectors/types.ts b/src/selectors/types.ts
index f2895b6..28877c7 100644
--- a/src/selectors/types.ts
+++ b/src/selectors/types.ts
@@ -6,4 +6,5 @@ export interface SelectorSet {
LOADING: readonly string[]
TYPEAHEAD_MENU: readonly string[]
MENU_ITEM: readonly string[]
+ ACTION_BANNER: readonly string[]
}
diff --git a/src/selectors/v145.ts b/src/selectors/v145.ts
index ebda748..7647257 100644
--- a/src/selectors/v145.ts
+++ b/src/selectors/v145.ts
@@ -19,4 +19,5 @@ export const v145Selectors: SelectorSet = {
LOADING: ['[class*="animate-spin"]', '[class*="animate-pulse"]'],
TYPEAHEAD_MENU: ['[role="listbox"][aria-label="Typeahead menu"]', '[role="listbox"]'],
MENU_ITEM: ['[role="menuitem"].group\\/item', '[role="menuitem"]'],
+ ACTION_BANNER: ['[class*="@container/banner"]'],
}
diff --git a/src/server.ts b/src/server.ts
index be4cc3a..716112f 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -7,7 +7,7 @@ import { EvaluationError, toMcpError } from './errors.js'
import { createLogger } from './logger.js'
import { buildPreSendStateScript } from './prose-filter.js'
import type { SelectorSet } from './selectors/types.js'
-import type { CategorizedTabs, TabInfo } from './types.js'
+import type { AgentStatus, CategorizedTabs, TabInfo } from './types.js'
import { buildListConversationsScript } from './ui/conversations.js'
import {
buildExpandCollapsedCitationsScript,
@@ -22,6 +22,7 @@ import {
} from './ui/navigation.js'
import { SELECTORS } from './ui/selectors.js'
import { buildGetAgentStatusScript } from './ui/status.js'
+import { buildClickActionButtonScript } from './ui/action.js'
import { buildStopAgentScript } from './ui/stop.js'
import { isPerplexityDomain } from './utils.js'
import { detectCometVersion } from './version.js'
@@ -37,21 +38,35 @@ const client = CDPClient.getInstance(config)
/** Active selector set — updated after each comet_connect to match Comet's Chrome version. */
let activeSelectors: SelectorSet = SELECTORS
+/** Guard: deduplicate concurrent ensureConnected calls. */
+let connectPromise: Promise | null = null
+
/** Ensure the client is connected before using tools. Auto-connects if needed. */
async function ensureConnected(): Promise {
if (client.state.targetId) return
+ if (connectPromise) return connectPromise
logger.info('Auto-connecting to Comet...')
- await client.launchOrConnect()
- await client.closeExtraTabs()
- try {
- const { chromeMajor, selectors } = await detectCometVersion(config.port)
- activeSelectors = selectors
- logger.info(`Auto-connected to Comet Chrome/${chromeMajor}`)
- } catch {
- // Version detection failure is non-fatal
- }
+ connectPromise = (async () => {
+ try {
+ await client.launchOrConnect()
+ await client.closeExtraTabs()
+ try {
+ const { chromeMajor, selectors } = await detectCometVersion(config.port)
+ activeSelectors = selectors
+ logger.info(`Auto-connected to Comet Chrome/${chromeMajor}`)
+ } catch {
+ // Version detection failure is non-fatal
+ }
+ } finally {
+ connectPromise = null
+ }
+ })()
+ return connectPromise
}
+/** Guard: prevent concurrent comet_ask calls from corrupting each other. */
+let askInProgress = false
+
// ---------------------------------------------------------------------------
// Tool definitions (exported for testing)
// ---------------------------------------------------------------------------
@@ -120,9 +135,8 @@ function buildInputSchema(shape: Record): ToolDef['inputSc
// Zod raw shapes for tool parameters
const connectShape = { port: z.number().optional() }
const askShape = {
- prompt: z.string().describe('The question or instruction to send to Perplexity Comet'),
+ prompt: z.string().min(1).describe('The question or instruction to send to Perplexity Comet'),
newChat: z.boolean().optional().describe('Start a fresh chat before sending the prompt'),
- timeout: z.number().optional().describe('Maximum wait time in ms for the agent response'),
}
const screenshotShape = {
format: z.enum(['png', 'jpeg']).optional().describe('Image format (default: png)'),
@@ -147,6 +161,14 @@ const getPageContentShape = {
const waitShape = {
timeout: z.number().optional().describe('Maximum wait time in ms (default: 120000)'),
}
+const approveActionShape = {
+ action: z
+ .enum(['primary', 'cancel'])
+ .optional()
+ .describe(
+ 'Which button to click: "primary" (approve/confirm the action, default) or "cancel" (dismiss the prompt).',
+ ),
+}
export const toolDefinitions: ToolDef[] = [
{
@@ -219,6 +241,12 @@ export const toolDefinitions: ToolDef[] = [
'Poll until the current agent finishes responding and return the full response. Use after comet_ask times out.',
inputSchema: buildInputSchema(waitShape),
},
+ {
+ name: 'comet_approve_action',
+ description:
+ 'Click an action button on a Comet permission/confirmation prompt. Use after comet_wait or comet_poll returns status "awaiting_action".',
+ inputSchema: buildInputSchema(approveActionShape),
+ },
]
// ---------------------------------------------------------------------------
@@ -240,33 +268,39 @@ function extractValue(result: {
return result.result?.value
}
-/** Runtime shape returned by buildGetAgentStatusScript(). */
-interface RawAgentStatus {
- status: string
- steps: string[]
- currentStep: string
- response: string
- hasStopButton: boolean
- hasLoadingSpinner?: boolean
- proseCount?: number
+const DEFAULT_STATUS: AgentStatus = {
+ status: 'idle',
+ steps: [],
+ currentStep: '',
+ response: '',
+ hasStopButton: false,
+ proseCount: 0,
}
-function parseAgentStatus(raw: unknown): RawAgentStatus {
+function parseAgentStatus(raw: unknown): AgentStatus {
+ let parsed: unknown
if (typeof raw === 'string') {
try {
- return JSON.parse(raw) as RawAgentStatus
+ parsed = JSON.parse(raw)
} catch {
- return {
- status: 'idle',
- steps: [],
- currentStep: '',
- response: '',
- hasStopButton: false,
- proseCount: 0,
- }
+ return { ...DEFAULT_STATUS }
}
+ } else {
+ parsed = raw
+ }
+ if (!parsed || typeof parsed !== 'object') return { ...DEFAULT_STATUS }
+ const obj = parsed as Record
+ return {
+ status: (typeof obj.status === 'string' ? obj.status : 'idle') as AgentStatus['status'],
+ steps: Array.isArray(obj.steps) ? obj.steps as string[] : [],
+ currentStep: typeof obj.currentStep === 'string' ? obj.currentStep : '',
+ response: typeof obj.response === 'string' ? obj.response : '',
+ hasStopButton: typeof obj.hasStopButton === 'boolean' ? obj.hasStopButton : false,
+ hasLoadingSpinner: typeof obj.hasLoadingSpinner === 'boolean' ? obj.hasLoadingSpinner : undefined,
+ proseCount: typeof obj.proseCount === 'number' ? obj.proseCount : undefined,
+ actionPrompt: typeof obj.actionPrompt === 'string' ? obj.actionPrompt : undefined,
+ actionButtons: Array.isArray(obj.actionButtons) ? obj.actionButtons as string[] : undefined,
}
- return raw as RawAgentStatus
}
function sleep(ms: number): Promise {
@@ -354,6 +388,9 @@ export async function startServer(): Promise {
askShape,
async ({ prompt, newChat }) => {
try {
+ if (askInProgress) return textResult('Another prompt is currently being submitted. Please wait and try again.')
+ askInProgress = true
+ try {
await ensureConnected()
const normalizedPrompt = client.normalizePrompt(prompt)
// Handle newChat or tab management
@@ -394,6 +431,9 @@ export async function startServer(): Promise {
return textResult(
'Prompt submitted successfully. Use comet_poll to track status or comet_wait to block until completion.',
)
+ } finally {
+ askInProgress = false
+ }
} catch (err) {
return toMcpError(err)
}
@@ -477,21 +517,50 @@ export async function startServer(): Promise {
}
// 2. Open typeahead to read active mode from menu
- await client.navigate('https://www.perplexity.ai')
- await sleep(2000)
+ // Only navigate if not already on the home page (avoid losing active conversation)
+ const urlCheck = await client.safeEvaluate(`window.location.pathname`)
+ const currentPath = extractValue(urlCheck)
+ if (currentPath && currentPath !== '/') {
+ await client.navigate('https://www.perplexity.ai')
+ await sleep(2000)
+ }
+
+ // Clear any existing text in the Lexical editor (state persists across navigations)
+ const readLenRaw = await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ return input ? (input.textContent || input.innerText || '').length : 0;
+ })()`)
+ const readInputLen = Number(extractValue(readLenRaw)) || 0
+ if (readInputLen > 0) {
+ await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ if (input) input.focus();
+ })()`)
+ await sleep(100)
+ await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ if (!input) return;
+ var range = document.createRange();
+ range.selectNodeContents(input);
+ var sel = window.getSelection();
+ sel.removeAllRanges();
+ sel.addRange(range);
+ })()`)
+ await sleep(50)
+ await client.safeEvaluate(`document.execCommand('delete', false, null)`)
+ await sleep(200)
+ }
let currentMode: unknown = 'standard'
for (let attempt = 0; attempt < 5; attempt++) {
- // Focus input, clear, type /
+ // Focus input and type / via execCommand
await client.safeEvaluate(`(function() {
var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
if (input) input.focus();
})()`)
- await client.pressKeyWithModifier('a', 4)
- await client.pressKey('Backspace')
- await sleep(100)
- await client.typeChar('/')
- await sleep(500)
+ await sleep(200)
+ await client.safeEvaluate(`document.execCommand("insertText", false, "/")`)
+ await sleep(800)
const raw = await client.safeEvaluate(buildReadActiveModeScript())
const result = extractValue(raw)
@@ -501,6 +570,9 @@ export async function startServer(): Promise {
await client.pressKey('Escape')
break
}
+ // Close typeahead and clear the / character before retrying
+ await client.pressKey('Escape')
+ await client.pressKey('Backspace')
await sleep(300)
}
@@ -511,26 +583,60 @@ export async function startServer(): Promise {
// Navigate to home page for clean input (mode typeahead only works on new chat page)
await client.navigate('https://www.perplexity.ai')
await sleep(2000)
+
+ // Clear any existing text in the Lexical editor (state persists across navigations)
+ const inputLenRaw = await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ return input ? (input.textContent || input.innerText || '').length : 0;
+ })()`)
+ const inputLen = Number(extractValue(inputLenRaw)) || 0
+ if (inputLen > 0) {
+ // Focus editor
+ await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ if (input) input.focus();
+ })()`)
+ await sleep(100)
+ // Select all content and delete via execCommand
+ await client.safeEvaluate(`(function() {
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ if (!input) return;
+ var range = document.createRange();
+ range.selectNodeContents(input);
+ var sel = window.getSelection();
+ sel.removeAllRanges();
+ sel.addRange(range);
+ })()`)
+ await sleep(50)
+ await client.safeEvaluate(`document.execCommand('delete', false, null)`)
+ await sleep(200)
+ // Safety: press Backspace a few times in case selectAll missed something
+ for (let i = 0; i < 5; i++) {
+ await client.pressKey('Backspace')
+ await sleep(20)
+ }
+ }
+
const MAX_MODE_RETRIES = 10
for (let attempt = 0; attempt < MAX_MODE_RETRIES; attempt++) {
- // Focus input, clear via Cmd+A+Backspace, then type / via CDP
+ // Focus input and type / via execCommand (most reliable for Lexical)
await client.safeEvaluate(`(function() {
var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
if (input) input.focus();
})()`)
- // Select all (Meta/Cmd = modifier 4) and delete
- await client.pressKeyWithModifier('a', 4)
- await client.pressKey('Backspace')
- await sleep(100)
- // Type / via char event (inserts into Lexical editor)
- await client.typeChar('/')
- await sleep(500)
+ await sleep(200)
+ await client.safeEvaluate(`document.execCommand("insertText", false, "/")`)
+ await sleep(800)
const raw = await client.safeEvaluate(buildModeSwitchScript(mode))
const result = extractValue(raw)
if (result !== 'no_listbox_found' && result !== 'no_input_found') {
+ // Close typeahead if still open
+ await client.pressKey('Escape')
return textResult(`Mode switch result: ${result}`)
}
+ // Close typeahead and retry
+ await client.pressKey('Escape')
await sleep(300)
}
return textResult('Mode switch failed: typeahead menu did not appear after retries')
@@ -563,6 +669,9 @@ export async function startServer(): Promise {
switchTabShape,
async ({ tabId, title }) => {
try {
+ if (!tabId && !title) {
+ return textResult('Provide at least one of tabId or title.')
+ }
await ensureConnected()
const targets = await client.listTargets()
let target: TabInfo | undefined
@@ -703,7 +812,7 @@ export async function startServer(): Promise {
async ({ maxLength }) => {
try {
await ensureConnected()
- const len = maxLength ?? 10000
+ const len = (maxLength && maxLength > 0) ? maxLength : 10000
const raw = await client.safeEvaluate(buildExtractPageContentScript(len))
const parsed = JSON.parse(String(extractValue(raw))) as { title: string; text: string }
return textResult(`Title: ${parsed.title}\n\n${parsed.text}`)
@@ -721,7 +830,7 @@ export async function startServer(): Promise {
async ({ timeout }) => {
try {
await ensureConnected()
- const effectiveTimeout = timeout ?? 120000
+ const effectiveTimeout = (timeout && timeout > 0) ? timeout : 120000
const startTime = Date.now()
let lastResponse = ''
let stallCount = 0
@@ -746,6 +855,20 @@ export async function startServer(): Promise {
if (stallCount >= MAX_STALL_POLLS && lastResponse) break
+ // Handle permission/action prompts — break and report
+ if (status.status === 'awaiting_action') {
+ const parts: string[] = ['⚠️ Comet is awaiting your permission.']
+ if (status.actionPrompt) parts.push(`\nPrompt: ${status.actionPrompt}`)
+ if (status.actionButtons && status.actionButtons.length > 0) {
+ parts.push(`\nAvailable actions: ${status.actionButtons.join(', ')}`)
+ }
+ parts.push('\nUse comet_approve_action to approve or cancel the action.')
+ if (collectedSteps.length > 0) {
+ parts.push(`\n\nSteps:\n${collectedSteps.map((s) => ` - ${s}`).join('\n')}`)
+ }
+ return textResult(parts.join(''))
+ }
+
if ((status.status === 'completed' || status.status === 'idle') && lastResponse) {
// Wait for response to stabilize
let settledResponse = lastResponse
@@ -755,6 +878,21 @@ export async function startServer(): Promise {
buildGetAgentStatusScript(activeSelectors),
)
const settledStatus = parseAgentStatus(extractValue(settledRaw))
+
+ // Re-check for awaiting_action after stabilization
+ if (settledStatus.status === 'awaiting_action') {
+ const parts: string[] = ['⚠️ Comet is awaiting your permission.']
+ if (settledStatus.actionPrompt) parts.push(`\nPrompt: ${settledStatus.actionPrompt}`)
+ if (settledStatus.actionButtons && settledStatus.actionButtons.length > 0) {
+ parts.push(`\nAvailable actions: ${settledStatus.actionButtons.join(', ')}`)
+ }
+ parts.push('\nUse comet_approve_action to approve or cancel the action.')
+ if (collectedSteps.length > 0) {
+ parts.push(`\n\nSteps:\n${collectedSteps.map((s) => ` - ${s}`).join('\n')}`)
+ }
+ return textResult(parts.join(''))
+ }
+
const candidate = settledStatus.response || settledResponse
if (candidate.length <= settledResponse.length) break
settledResponse = candidate
@@ -782,6 +920,38 @@ export async function startServer(): Promise {
},
)
+ // 14. comet_approve_action
+ server.tool(
+ 'comet_approve_action',
+ 'Click an action button on a Comet permission/confirmation prompt. Use after comet_wait or comet_poll returns status "awaiting_action".',
+ approveActionShape,
+ async ({ action }) => {
+ try {
+ await ensureConnected()
+ const effectiveAction = action ?? 'primary'
+ const raw = await client.safeEvaluate(buildClickActionButtonScript(effectiveAction))
+ const result = JSON.parse(String(extractValue(raw))) as {
+ clicked: boolean
+ buttonText?: string
+ action?: string
+ error?: string
+ fallback?: boolean
+ }
+
+ if (result.clicked) {
+ return textResult(
+ `Action ${effectiveAction === 'primary' ? 'approved' : 'cancelled'}: clicked "${result.buttonText}" button.${result.fallback ? ' (fallback: no bg-button-bg found, clicked first non-cancel button)' : ''}`,
+ )
+ }
+ return textResult(
+ `No action banner found. ${result.error || 'The agent may not be awaiting an action.'}`,
+ )
+ } catch (err) {
+ return toMcpError(err)
+ }
+ },
+ )
+
// Connect via stdio
const transport = new StdioServerTransport()
await server.connect(transport)
diff --git a/src/types.ts b/src/types.ts
index c09b18a..b352bd3 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,10 +1,17 @@
-export enum AgentState {
- Idle = 'idle',
- Thinking = 'thinking',
- Searching = 'searching',
- Responding = 'responding',
- Completed = 'completed',
- Error = 'error',
+/** Agent status values produced by buildGetAgentStatusScript(). */
+export type AgentStatusValue = 'idle' | 'working' | 'completed' | 'awaiting_action'
+
+/** Canonical agent status returned by status detection scripts. */
+export interface AgentStatus {
+ status: AgentStatusValue
+ steps: string[]
+ currentStep: string
+ response: string
+ hasStopButton: boolean
+ hasLoadingSpinner?: boolean
+ proseCount?: number
+ actionPrompt?: string
+ actionButtons?: string[]
}
export enum TabCategory {
@@ -24,15 +31,6 @@ export interface TabInfo {
category?: TabCategory
}
-export interface AgentStatus {
- state: AgentState
- steps: string[]
- currentStep: string
- response: string
- hasStopButton: boolean
- agentBrowsingUrl: string
-}
-
export interface EvaluateResult {
result: { type: string; value?: unknown; description?: string; objectId?: string }
exceptionDetails?: unknown
@@ -56,6 +54,7 @@ export interface CometConfig {
screenshotQuality: number
windowWidth: number
windowHeight: number
+ overrideViewport: boolean
maxReconnectAttempts: number
maxReconnectDelay: number
pollInterval: number
diff --git a/src/ui/action.ts b/src/ui/action.ts
new file mode 100644
index 0000000..e60be9a
--- /dev/null
+++ b/src/ui/action.ts
@@ -0,0 +1,55 @@
+/**
+ * Build script to click an action button on a Comet permission/confirmation prompt.
+ * @param action - Which button to click: "primary" (the main action, e.g., "Create Issue")
+ * or "cancel" (the cancel/dismiss button). Defaults to "primary".
+ */
+export function buildClickActionButtonScript(action: 'primary' | 'cancel' = 'primary'): string {
+ return `(function() {
+ var bannerSelectors = ['[class*="@container/banner"]'];
+ for (var bs = 0; bs < bannerSelectors.length; bs++) {
+ var banner = document.querySelector(bannerSelectors[bs]);
+ if (!banner) continue;
+
+ var bannerBtns = banner.querySelectorAll('button');
+ var primaryBtn = null;
+ var cancelBtn = null;
+
+ for (var i = 0; i < bannerBtns.length; i++) {
+ var btn = bannerBtns[i];
+ var text = (btn.textContent || '').trim();
+ if (!text || text.length <= 1 || text === 'Show more') continue;
+
+ var classes = btn.className || '';
+ // Primary action buttons have bg-button-bg (filled), cancel has border-subtle (outlined)
+ if (classes.indexOf('bg-button-bg') !== -1) {
+ primaryBtn = btn;
+ } else if (classes.indexOf('border-subtle') !== -1 || text.toLowerCase() === 'cancel') {
+ cancelBtn = btn;
+ }
+ }
+
+ var target = ${action === 'primary' ? 'primaryBtn' : 'cancelBtn'};
+ if (target) {
+ target.click();
+ return JSON.stringify({ clicked: true, buttonText: (target.textContent || '').trim(), action: '${action}' });
+ }
+
+ // Fallback: if no bg-button-bg found, click the first non-cancel button for primary,
+ // or the last button for cancel
+ if (${action === 'primary'} && bannerBtns.length > 0) {
+ for (var j = 0; j < bannerBtns.length; j++) {
+ var t = (bannerBtns[j].textContent || '').trim();
+ if (t && t !== 'Cancel' && t !== 'Show more') {
+ bannerBtns[j].click();
+ return JSON.stringify({ clicked: true, buttonText: t, action: 'primary', fallback: true });
+ }
+ }
+ }
+ if (${action === 'cancel'} && cancelBtn) {
+ cancelBtn.click();
+ return JSON.stringify({ clicked: true, buttonText: (cancelBtn.textContent || '').trim(), action: 'cancel' });
+ }
+ }
+ return JSON.stringify({ clicked: false, error: 'No action banner found' });
+ })()`
+}
diff --git a/src/ui/navigation.ts b/src/ui/navigation.ts
index a36532f..1c57820 100644
--- a/src/ui/navigation.ts
+++ b/src/ui/navigation.ts
@@ -1,11 +1,18 @@
export function buildSubmitPromptScript(): string {
return `(function() {
+ // Find the input element directly instead of relying on focus
+ var input = document.querySelector('#ask-input') || document.querySelector('[contenteditable="true"]');
+ if (input) input.focus();
var active = document.activeElement;
if (active) {
active.dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter', code: 'Enter', bubbles: true}));
active.dispatchEvent(new KeyboardEvent('keyup', {key: 'Enter', code: 'Enter', bubbles: true}));
}
- return 'submitted';
+ // Verify: check if input was cleared (successful submit clears the field)
+ var afterText = '';
+ if (input && input.innerText) afterText = input.innerText.trim();
+ if (afterText.length === 0) return 'submitted';
+ return 'submitted_input_not_cleared';
})()`
}
@@ -24,6 +31,9 @@ const MODE_ICONS: Record = {
* Build script to click a mode item in the typeahead menu.
* The caller MUST inject '/' via CDP Input API before running this script.
* Matching is done by SVG icon href — locale-independent.
+ *
+ * IMPORTANT: Comet uses React's onMouseDown (not onClick) for mode selection.
+ * We invoke the React prop directly for reliability.
*/
export function buildModeSwitchScript(mode: string): string {
const iconHref = MODE_ICONS[mode] ?? (mode ? `#pplx-icon-${mode}` : '')
@@ -45,8 +55,15 @@ export function buildModeSwitchScript(mode: string): string {
for (var u = 0; u < useEls.length; u++) {
var href = useEls[u].getAttribute('xlink:href') || useEls[u].getAttribute('href') || '';
if (href === iconHref) {
- item.click();
- return 'clicked:' + iconHref;
+ // React uses onMouseDown, not onClick — invoke it directly
+ var propsKey = Object.keys(item).find(function(k) { return k.startsWith('__reactProps'); });
+ if (propsKey && item[propsKey] && typeof item[propsKey].onMouseDown === 'function') {
+ item[propsKey].onMouseDown({ preventDefault: function() {} });
+ return 'clicked:' + iconHref;
+ }
+ // Fallback: dispatch mousedown event
+ item.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true }));
+ return 'clicked_fallback:' + iconHref;
}
}
}
diff --git a/src/ui/selectors.ts b/src/ui/selectors.ts
index c46f9b0..2026d65 100644
--- a/src/ui/selectors.ts
+++ b/src/ui/selectors.ts
@@ -19,4 +19,6 @@ export const SELECTORS = {
TYPEAHEAD_MENU: ['[role="listbox"][aria-label="Typeahead menu"]', '[role="listbox"]'] as const,
MENU_ITEM: ['[role="menuitem"].group\\/item', '[role="menuitem"]'] as const,
LOADING: ['[class*="animate-spin"]', '[class*="animate-pulse"]'] as const,
+ // Action/permission prompt banner (Comet shows these before executing actions)
+ ACTION_BANNER: ['[class*="@container/banner"]'] as const,
} as const
diff --git a/src/ui/status.ts b/src/ui/status.ts
index 756d35e..d011747 100644
--- a/src/ui/status.ts
+++ b/src/ui/status.ts
@@ -4,6 +4,7 @@ import { SELECTORS } from './selectors.js'
export function buildGetAgentStatusScript(selectors?: SelectorSet): string {
const loadingSelectors = selectors?.LOADING ?? SELECTORS.LOADING
+ const actionBannerSelectors = selectors?.ACTION_BANNER ?? SELECTORS.ACTION_BANNER
const findProseBody = buildFindProseJS()
return `(function() {
var status = "idle";
@@ -12,6 +13,8 @@ export function buildGetAgentStatusScript(selectors?: SelectorSet): string {
var response = "";
var hasStopButton = false;
var hasLoadingSpinner = false;
+ var actionPrompt = "";
+ var actionButtons = [];
var buttons = document.querySelectorAll('button');
for (var i = 0; i < buttons.length; i++) {
@@ -45,13 +48,39 @@ export function buildGetAgentStatusScript(selectors?: SelectorSet): string {
if (results.length > 0) {
response = results[results.length - 1];
response = response.replace(/View All/g, '').replace(/Show more/g, '').replace(/Ask a follow-up/g, '').replace(/\\d+ sources/g, '');
- if (response.length > 8000) response = response.substring(0, 8000);
+ if (response.length > 8000) response = response.substring(0, 8000) + '\\n\\n[Response truncated. Use comet_get_page_content for the full text.]';
}
if (hasStopButton || hasLoadingSpinner) status = "working";
else if (hasWorkingText) status = "working";
else if (results.length > 0) status = "completed";
- return JSON.stringify({ status: status, steps: steps, currentStep: currentStep, response: response, hasStopButton: hasStopButton, hasLoadingSpinner: hasLoadingSpinner, proseCount: results.length });
+ // Detect action/permission prompts (Comet asks user to confirm before executing actions)
+ var bannerSelectors = ${JSON.stringify([...actionBannerSelectors])};
+ for (var bs = 0; bs < bannerSelectors.length; bs++) {
+ var banner = document.querySelector(bannerSelectors[bs]);
+ if (banner) {
+ // Extract the prompt text (text inside the banner card, excluding button text)
+ var bannerCard = banner.querySelector('[class*="bg-subtle"]');
+ if (bannerCard) {
+ var bannerText = (bannerCard.textContent || '').trim();
+ // Collect action buttons (buttons with visible text, not UI buttons)
+ var bannerBtns = banner.querySelectorAll('button');
+ for (var bb = 0; bb < bannerBtns.length; bb++) {
+ var btnText = (bannerBtns[bb].textContent || '').trim();
+ if (btnText && btnText.length > 1 && btnText !== 'Show more') {
+ actionButtons.push(btnText);
+ // Remove button text from prompt text
+ bannerText = bannerText.replace(btnText, '');
+ }
+ }
+ actionPrompt = bannerText.replace(/\\s+/g, ' ').trim();
+ }
+ status = "awaiting_action";
+ break;
+ }
+ }
+
+ return JSON.stringify({ status: status, steps: steps, currentStep: currentStep, response: response, hasStopButton: hasStopButton, hasLoadingSpinner: hasLoadingSpinner, proseCount: results.length, actionPrompt: actionPrompt, actionButtons: actionButtons });
})()`
}
diff --git a/tests/integration/tools/harness.ts b/tests/integration/tools/harness.ts
index ee627cf..b7aec22 100644
--- a/tests/integration/tools/harness.ts
+++ b/tests/integration/tools/harness.ts
@@ -21,6 +21,7 @@ export const mocks = {
disconnect: vi.fn<() => Promise>().mockResolvedValue(undefined),
connect: vi.fn<(id?: string) => Promise>().mockResolvedValue('target-1'),
navigate: vi.fn<(url: string) => Promise>().mockResolvedValue(undefined),
+ reload: vi.fn<() => Promise>().mockResolvedValue(undefined),
screenshot: vi.fn<(fmt?: string) => Promise>().mockResolvedValue('base64data'),
safeEvaluate: vi
.fn<(expr: string) => Promise<{ result?: { value?: unknown }; exceptionDetails?: unknown }>>()
@@ -72,6 +73,7 @@ export function resetHarness(): void {
mocks.disconnect.mockResolvedValue(undefined)
mocks.connect.mockResolvedValue('target-1')
mocks.navigate.mockResolvedValue(undefined)
+ mocks.reload.mockResolvedValue(undefined)
mocks.screenshot.mockResolvedValue('base64data')
mocks.safeEvaluate.mockResolvedValue({ result: { value: '{}' } })
mocks.listTargets.mockResolvedValue([
diff --git a/tests/integration/tools/ui-tools.test.ts b/tests/integration/tools/ui-tools.test.ts
index 3c3439d..77edc17 100644
--- a/tests/integration/tools/ui-tools.test.ts
+++ b/tests/integration/tools/ui-tools.test.ts
@@ -73,28 +73,29 @@ describe('UI control tool handlers', () => {
expect(result.content[0].type).toBe('text')
expect(result.content[0].text).toContain('Mode switch result')
expect(result.content[0].text).toContain('clicked:#pplx-icon-telescope')
- })
+ }, 15000)
it('retries mode switch when listbox not immediately available', async () => {
mocks.safeEvaluate.mockReset()
mocks.pressKey.mockClear()
- // Each retry attempt calls safeEvaluate twice (focus + mode switch)
- // Attempt 1: focus(default) + mode(no_listbox_found)
- // Attempt 2: focus(default) + mode(no_listbox_found)
- // Attempt 3: focus(default) + mode(clicked)
+ // Input length check (returns 0 → skip clearing) + 3 retry attempts × 3 calls each
mocks.safeEvaluate
- .mockResolvedValueOnce({ result: { value: undefined } })
- .mockResolvedValueOnce({ result: { value: 'no_listbox_found' } })
- .mockResolvedValueOnce({ result: { value: undefined } })
- .mockResolvedValueOnce({ result: { value: 'no_listbox_found' } })
- .mockResolvedValueOnce({ result: { value: undefined } })
- .mockResolvedValueOnce({ result: { value: 'clicked:#pplx-icon-telescope' } })
+ .mockResolvedValueOnce({ result: { value: 0 } }) // input length check
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 1: focus
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 1: insertText
+ .mockResolvedValueOnce({ result: { value: 'no_listbox_found' } }) // attempt 1: mode switch
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 2: focus
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 2: insertText
+ .mockResolvedValueOnce({ result: { value: 'no_listbox_found' } }) // attempt 2: mode switch
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 3: focus
+ .mockResolvedValueOnce({ result: { value: undefined } }) // attempt 3: insertText
+ .mockResolvedValueOnce({ result: { value: 'clicked:#pplx-icon-telescope' } }) // attempt 3: mode switch
const handler = getHandler('comet_mode')
const result = await handler({ mode: 'deep-research' })
expect(result.content[0].text).toContain('clicked:#pplx-icon-telescope')
- expect(mocks.safeEvaluate).toHaveBeenCalledTimes(6)
- }, 10000)
+ expect(mocks.safeEvaluate).toHaveBeenCalledTimes(10)
+ }, 20000)
it('returns failure after max retries when listbox never appears', async () => {
mocks.safeEvaluate.mockReset()
@@ -103,9 +104,9 @@ describe('UI control tool handlers', () => {
const result = await handler({ mode: 'deep-research' })
expect(result.content[0].text).toContain('Mode switch failed')
- // 10 retries × 2 safeEvaluate calls each = 20
- expect(mocks.safeEvaluate).toHaveBeenCalledTimes(20)
- }, 15000)
+ // 1 input length check + 10 retries × 3 safeEvaluate calls each (focus + insertText + mode switch) = 31
+ expect(mocks.safeEvaluate).toHaveBeenCalledTimes(31)
+ }, 30000)
it('returns error response when safeEvaluate fails', async () => {
mocks.safeEvaluate.mockRejectedValue(new Error('Evaluate failed'))
@@ -237,15 +238,12 @@ describe('UI control tool handlers', () => {
expect(result.content[0].text).toContain('Tab not found')
})
- it('returns tab not found when no criteria provided', async () => {
- mocks.listTargets.mockResolvedValue([
- { id: 'target-1', url: 'https://www.perplexity.ai', type: 'page', title: 'Perplexity' },
- ])
+ it('returns early error when no criteria provided', async () => {
const handler = getHandler('comet_switch_tab')
const result = await handler({})
expect(result.content[0].type).toBe('text')
- expect(result.content[0].text).toContain('Tab not found')
+ expect(result.content[0].text).toContain('Provide at least one')
})
it('returns error response when connect fails', async () => {
@@ -283,4 +281,68 @@ describe('UI control tool handlers', () => {
expect(result.content[0].text).toContain('Main')
})
})
+
+ describe('comet_approve_action', () => {
+ it('approves primary action by default', async () => {
+ mocks.safeEvaluate.mockResolvedValue({
+ result: {
+ value: JSON.stringify({ clicked: true, buttonText: 'Create Issue', action: 'primary' }),
+ },
+ })
+ const handler = getHandler('comet_approve_action')
+ const result = await handler({})
+
+ expect(result.content[0].type).toBe('text')
+ expect(result.content[0].text).toContain('approved')
+ expect(result.content[0].text).toContain('Create Issue')
+ })
+
+ it('cancels when action is cancel', async () => {
+ mocks.safeEvaluate.mockResolvedValue({
+ result: {
+ value: JSON.stringify({ clicked: true, buttonText: 'Cancel', action: 'cancel' }),
+ },
+ })
+ const handler = getHandler('comet_approve_action')
+ const result = await handler({ action: 'cancel' })
+
+ expect(result.content[0].type).toBe('text')
+ expect(result.content[0].text).toContain('cancelled')
+ expect(result.content[0].text).toContain('Cancel')
+ })
+
+ it('reports fallback when bg-button-bg not found', async () => {
+ mocks.safeEvaluate.mockResolvedValue({
+ result: {
+ value: JSON.stringify({ clicked: true, buttonText: 'Confirm', action: 'primary', fallback: true }),
+ },
+ })
+ const handler = getHandler('comet_approve_action')
+ const result = await handler({})
+
+ expect(result.content[0].text).toContain('fallback')
+ })
+
+ it('reports no action banner when not found', async () => {
+ mocks.safeEvaluate.mockResolvedValue({
+ result: {
+ value: JSON.stringify({ clicked: false, error: 'No action banner found' }),
+ },
+ })
+ const handler = getHandler('comet_approve_action')
+ const result = await handler({})
+
+ expect(result.content[0].type).toBe('text')
+ expect(result.content[0].text).toContain('No action banner found')
+ })
+
+ it('returns error response when safeEvaluate fails', async () => {
+ mocks.safeEvaluate.mockRejectedValue(new Error('Evaluate failed'))
+ const handler = getHandler('comet_approve_action')
+ const result = await handler({})
+
+ expect(result.isError).toBe(true)
+ expect(result.content[0].text).toContain('Evaluate failed')
+ })
+ })
})
diff --git a/tests/unit/selectors/index.test.ts b/tests/unit/selectors/index.test.ts
index eb4a745..cc63fc8 100644
--- a/tests/unit/selectors/index.test.ts
+++ b/tests/unit/selectors/index.test.ts
@@ -28,6 +28,7 @@ describe('getSelectorsForVersion', () => {
expect(selectors.LOADING).toBeDefined()
expect(selectors.TYPEAHEAD_MENU).toBeDefined()
expect(selectors.MENU_ITEM).toBeDefined()
+ expect(selectors.ACTION_BANNER).toBeDefined()
})
it('returns fallback (v145) selectors for unknown version 999', async () => {
diff --git a/tests/unit/selectors/v145.test.ts b/tests/unit/selectors/v145.test.ts
index 45b7730..963ff8f 100644
--- a/tests/unit/selectors/v145.test.ts
+++ b/tests/unit/selectors/v145.test.ts
@@ -10,6 +10,7 @@ describe('v145Selectors', () => {
expect(v145Selectors.LOADING).toBeDefined()
expect(v145Selectors.TYPEAHEAD_MENU).toBeDefined()
expect(v145Selectors.MENU_ITEM).toBeDefined()
+ expect(v145Selectors.ACTION_BANNER).toBeDefined()
})
it('each category is a non-empty array', async () => {
@@ -22,6 +23,7 @@ describe('v145Selectors', () => {
'LOADING',
'TYPEAHEAD_MENU',
'MENU_ITEM',
+ 'ACTION_BANNER',
] as const
for (const category of categories) {
@@ -40,6 +42,7 @@ describe('v145Selectors', () => {
'LOADING',
'TYPEAHEAD_MENU',
'MENU_ITEM',
+ 'ACTION_BANNER',
] as const
for (const category of categories) {
diff --git a/tests/unit/tools/handlers.test.ts b/tests/unit/tools/handlers.test.ts
index 3f5af43..1e6d422 100644
--- a/tests/unit/tools/handlers.test.ts
+++ b/tests/unit/tools/handlers.test.ts
@@ -17,9 +17,9 @@ vi.mock('../../../src/logger.js', () => ({
}))
describe('toolDefinitions', () => {
- it('has 13 tools with correct names', async () => {
+ it('has 14 tools with correct names', async () => {
const { toolDefinitions } = await import('../../../src/server.js')
- expect(toolDefinitions).toHaveLength(13)
+ expect(toolDefinitions).toHaveLength(14)
const names = toolDefinitions.map((t) => t.name)
expect(names).toContain('comet_connect')
expect(names).toContain('comet_ask')
diff --git a/tests/unit/tools/registry.test.ts b/tests/unit/tools/registry.test.ts
index 0090ed3..0a2ec77 100644
--- a/tests/unit/tools/registry.test.ts
+++ b/tests/unit/tools/registry.test.ts
@@ -2,8 +2,8 @@ import { describe, expect, it } from 'vitest'
import { toolDefinitions } from '../../../src/server.js'
describe('toolDefinitions', () => {
- it('has 13 tools', () => {
- expect(toolDefinitions).toHaveLength(13)
+ it('has 14 tools', () => {
+ expect(toolDefinitions).toHaveLength(14)
})
it('has all expected names', () => {
@@ -21,6 +21,7 @@ describe('toolDefinitions', () => {
expect(names).toContain('comet_open_conversation')
expect(names).toContain('comet_get_page_content')
expect(names).toContain('comet_wait')
+ expect(names).toContain('comet_approve_action')
})
it('each tool has name, description, inputSchema', () => {
diff --git a/tests/unit/types.test.ts b/tests/unit/types.test.ts
index b97014f..c5076ee 100644
--- a/tests/unit/types.test.ts
+++ b/tests/unit/types.test.ts
@@ -1,20 +1,45 @@
import { describe, expect, it } from 'vitest'
import {
- AgentState,
type AgentStatus,
+ type AgentStatusValue,
type CometConfig,
TabCategory,
type TabInfo,
} from '../../src/types.js'
-describe('AgentState', () => {
- it('has all expected states', () => {
- expect(AgentState.Idle).toBe('idle')
- expect(AgentState.Thinking).toBe('thinking')
- expect(AgentState.Searching).toBe('searching')
- expect(AgentState.Responding).toBe('responding')
- expect(AgentState.Completed).toBe('completed')
- expect(AgentState.Error).toBe('error')
+describe('AgentStatusValue', () => {
+ it('accepts all valid status values', () => {
+ const values: AgentStatusValue[] = ['idle', 'working', 'completed', 'awaiting_action']
+ expect(values).toHaveLength(4)
+ })
+})
+
+describe('AgentStatus', () => {
+ it('accepts valid shape with required fields', () => {
+ const status: AgentStatus = {
+ status: 'working',
+ steps: ['step 1'],
+ currentStep: 'step 2',
+ response: 'text',
+ hasStopButton: true,
+ }
+ expect(status.status).toBe('working')
+ })
+
+ it('accepts optional fields', () => {
+ const status: AgentStatus = {
+ status: 'awaiting_action',
+ steps: [],
+ currentStep: '',
+ response: '',
+ hasStopButton: false,
+ hasLoadingSpinner: true,
+ proseCount: 3,
+ actionPrompt: 'Create issue?',
+ actionButtons: ['Create', 'Cancel'],
+ }
+ expect(status.actionPrompt).toBe('Create issue?')
+ expect(status.actionButtons).toEqual(['Create', 'Cancel'])
})
})
@@ -41,20 +66,6 @@ describe('TabInfo', () => {
})
})
-describe('AgentStatus', () => {
- it('accepts valid shape', () => {
- const status: AgentStatus = {
- state: AgentState.Idle,
- steps: [],
- currentStep: '',
- response: '',
- hasStopButton: false,
- agentBrowsingUrl: '',
- }
- expect(status.state).toBe('idle')
- })
-})
-
describe('CometConfig', () => {
it('accepts valid shape', () => {
const config: CometConfig = {
@@ -67,10 +78,13 @@ describe('CometConfig', () => {
screenshotQuality: 80,
windowWidth: 1440,
windowHeight: 900,
+ overrideViewport: false,
maxReconnectAttempts: 5,
maxReconnectDelay: 5000,
pollInterval: 1000,
+ userDataDir: null,
}
expect(config.port).toBe(9222)
+ expect(config.overrideViewport).toBe(false)
})
})
diff --git a/tests/unit/ui/action.test.ts b/tests/unit/ui/action.test.ts
new file mode 100644
index 0000000..a9b62bd
--- /dev/null
+++ b/tests/unit/ui/action.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, it } from 'vitest'
+import { buildClickActionButtonScript } from '../../../src/ui/action.js'
+
+describe('buildClickActionButtonScript', () => {
+ describe('primary action', () => {
+ it('wraps in IIFE', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toMatch(/\(function\(\)/)
+ expect(s).toMatch(/\}\)\(\)/)
+ })
+
+ it('looks for action banner containers', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('@container/banner')
+ })
+
+ it('detects primary buttons by bg-button-bg class', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('bg-button-bg')
+ })
+
+ it('detects cancel buttons by border-subtle class', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('border-subtle')
+ })
+
+ it('clicks the target button', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('.click()')
+ })
+
+ it('returns JSON result with clicked status', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('clicked:')
+ expect(s).toContain('buttonText:')
+ })
+
+ it('skips Show more buttons', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('Show more')
+ })
+
+ it('has fallback for buttons without bg-button-bg', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('fallback')
+ })
+
+ it('returns clicked: false when no banner found', () => {
+ const s = buildClickActionButtonScript('primary')
+ expect(s).toContain('No action banner found')
+ expect(s).toContain('clicked: false')
+ })
+ })
+
+ describe('cancel action', () => {
+ it('targets cancelBtn when action is cancel', () => {
+ const s = buildClickActionButtonScript('cancel')
+ expect(s).toContain('cancelBtn')
+ expect(s).toContain('cancel')
+ })
+
+ it('detects cancel by border-subtle class or Cancel text', () => {
+ const s = buildClickActionButtonScript('cancel')
+ expect(s).toContain('border-subtle')
+ expect(s).toContain("'cancel'")
+ })
+ })
+})
diff --git a/tests/unit/ui/navigation.test.ts b/tests/unit/ui/navigation.test.ts
index 745094b..29149d1 100644
--- a/tests/unit/ui/navigation.test.ts
+++ b/tests/unit/ui/navigation.test.ts
@@ -63,6 +63,19 @@ describe('buildModeSwitchScript', () => {
expect(s).toMatch(/^\(function\(\)\s*\{[\s\S]*\}\)\(\)$/)
})
+ it('uses React onMouseDown prop instead of click()', () => {
+ const s = buildModeSwitchScript('deep-research')
+ expect(s).toContain('__reactProps')
+ expect(s).toContain('onMouseDown')
+ expect(s).not.toContain('item.click()')
+ })
+
+ it('dispatches mousedown as fallback', () => {
+ const s = buildModeSwitchScript('deep-research')
+ expect(s).toContain('MouseEvent')
+ expect(s).toContain('mousedown')
+ })
+
it('does not use setTimeout', () => {
const s = buildModeSwitchScript('deep-research')
expect(s).not.toContain('setTimeout')
diff --git a/tests/unit/ui/selectors.test.ts b/tests/unit/ui/selectors.test.ts
index 92753af..4517449 100644
--- a/tests/unit/ui/selectors.test.ts
+++ b/tests/unit/ui/selectors.test.ts
@@ -24,4 +24,8 @@ describe('SELECTORS', () => {
expect(SELECTORS.MENU_ITEM[0]).toBe('[role="menuitem"].group\\/item')
expect(SELECTORS.MENU_ITEM.length).toBeGreaterThanOrEqual(1)
})
+ it('ACTION_BANNER targets the permission prompt container', () => {
+ expect(SELECTORS.ACTION_BANNER.length).toBeGreaterThanOrEqual(1)
+ expect(SELECTORS.ACTION_BANNER[0]).toContain('banner')
+ })
})
diff --git a/tests/unit/ui/status.test.ts b/tests/unit/ui/status.test.ts
index 1e32883..ce8616c 100644
--- a/tests/unit/ui/status.test.ts
+++ b/tests/unit/ui/status.test.ts
@@ -62,4 +62,55 @@ describe('buildGetAgentStatusScript', () => {
expect(s).toContain('Searching')
expect(s).toContain('Navigating to')
})
+
+ // Action/permission prompt detection
+ describe('action prompt detection', () => {
+ it('includes action banner selectors', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('bannerSelectors')
+ expect(s).toContain('@container/banner')
+ })
+
+ it('returns actionPrompt field', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('actionPrompt')
+ })
+
+ it('returns actionButtons field', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('actionButtons')
+ })
+
+ it('sets status to awaiting_action when banner found', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('awaiting_action')
+ })
+
+ it('extracts prompt text from bg-subtle banner card', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('bg-subtle')
+ })
+
+ it('skips Show more buttons when collecting action buttons', () => {
+ const s = buildGetAgentStatusScript()
+ // The script should filter out 'Show more' from action buttons
+ expect(s).toContain('Show more')
+ })
+
+ it('includes actionPrompt and actionButtons in JSON output', () => {
+ const s = buildGetAgentStatusScript()
+ expect(s).toContain('actionPrompt: actionPrompt')
+ expect(s).toContain('actionButtons: actionButtons')
+ })
+
+ it('accepts custom ACTION_BANNER selectors', () => {
+ const customSelectors = {
+ ...SELECTORS,
+ ACTION_BANNER: ['.custom-banner', '[data-action-prompt]'],
+ }
+ const s = buildGetAgentStatusScript(customSelectors)
+ expect(s).toContain('.custom-banner')
+ expect(s).toContain('[data-action-prompt]')
+ })
+ })
})