Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions packages/core/lib/v3/handlers/handlerUtils/actHandlerUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ export async function performUnderstudyMethod(
args: Array.from(args),
});

// Arm the watcher before the action so we do not miss very fast URL changes,
// but start the timeout window only after the click returns.
const navWatcher = shouldWaitForPostClickSettle(method)
? watchMainFrameUrlChangeStart(page, page.url())
: null;

try {
const handler = METHOD_HANDLER_MAP[method] ?? null;

Expand Down Expand Up @@ -106,6 +112,11 @@ export async function performUnderstudyMethod(
);
}
}
if (navWatcher && (await navWatcher.wait(400))) {
await waitForDomNetworkQuiet(page.mainFrame(), domSettleTimeoutMs).catch(
() => {},
);
}
Comment on lines +115 to +119
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unconditional 400ms latency on every click/doubleClick

navWatcher.wait(400) always blocks for up to 400ms after every click or doubleClick action, even when the click does not trigger any navigation. The timer in wait() only resolves early if a Page.frameNavigated or Page.navigatedWithinDocument event fires within the window — if neither fires, the 400ms timeout runs to completion before returning false.

In agentic workflows that perform many clicks (selecting options, toggling checkboxes, closing modals, etc.) this adds up to 400ms of dead wait per non-navigating click. A task with 20 clicks would incur up to 8 seconds of pure idle time.

Consider whether the window should start from when the click handler is dispatched rather than when wait() is called, or whether a shorter window (e.g. 150–200ms) would be enough to catch the typical "JS deferred with setTimeout" pattern while reducing the worst-case overhead on non-navigating clicks.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ALTERNATIVE IDEA (which I prefer) would be add an arg to act({..., expectNavigation: true | false}) and only wait for nav if true is passed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a much better approach. The explicit expectNavigation flag avoids the entire detection problem — no 400ms timeout penalty on non-navigating clicks, and the caller already knows whether they expect navigation (e.g., clicking a submit button vs toggling a checkbox).

The automatic detection forces a trade-off between latency (shorter window = faster but might miss slow navigations) and reliability (longer window = catches more cases but adds overhead everywhere). An explicit flag sidesteps this entirely and makes the behavior predictable.

The only downside is slightly more verbose call sites when navigation is expected, but that's a small price for eliminating 8+ seconds of idle time across a multi-click task.

} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
const stack = e instanceof Error ? e.stack : undefined;
Expand All @@ -126,10 +137,111 @@ export async function performUnderstudyMethod(
}
throw new UnderstudyCommandException(msg, e);
} finally {
navWatcher?.dispose();
SessionFileLogger.logUnderstudyActionCompleted();
}
}

function shouldWaitForPostClickSettle(method: string): boolean {
return method === "click" || method === "doubleClick";
}

function watchMainFrameUrlChangeStart(
page: Page,
initialUrl: string,
): {
wait: (timeoutMs: number) => Promise<boolean>;
dispose: () => void;
} {
const session = page.mainFrame().session;
const initialMainFrameId = page.mainFrameId();
let matched = false;
let disposed = false;
let waiter:
| {
resolve: (value: boolean) => void;
timer: ReturnType<typeof setTimeout>;
}
| undefined;

const finish = (value: boolean) => {
if (disposed) return;
// Latch a successful match so wait() can return immediately when a very
// fast navigation starts before the post-click timeout window begins.
if (value) matched = true;
if (waiter) {
clearTimeout(waiter.timer);
const current = waiter;
waiter = undefined;
current.resolve(value);
}
};

const dispose = () => {
// dispose() is called from cleanup paths that can overlap, so it must be
// safe to run more than once.
if (disposed) return;
disposed = true;
session.off("Page.frameNavigated", onFrameNavigated);
session.off("Page.navigatedWithinDocument", onNavigatedWithinDocument);
if (waiter) {
clearTimeout(waiter.timer);
waiter.resolve(false);
waiter = undefined;
}
};

// Main-frame identity can shift during navigation. Treat both the original
// root frame id and the current page.mainFrameId() as authoritative.
const isMainFrame = (frameId: string): boolean =>
frameId === initialMainFrameId || frameId === page.mainFrameId();

// We only treat main-frame events with an actual URL change as navigation
// start. Real pages can emit same-URL history events that should not trigger
// a post-click settle wait.
const onFrameNavigated = (event: Protocol.Page.FrameNavigatedEvent) => {
if (
isMainFrame(event.frame.id) &&
event.frame.url &&
event.frame.url !== initialUrl
) {
finish(true);
}
};

const onNavigatedWithinDocument = (
event: Protocol.Page.NavigatedWithinDocumentEvent,
) => {
if (isMainFrame(event.frameId) && event.url !== initialUrl) {
finish(true);
}
};

session.on("Page.frameNavigated", onFrameNavigated);
session.on("Page.navigatedWithinDocument", onNavigatedWithinDocument);

return {
wait: async (timeoutMs: number) => {
// If navigation started while the click was still in flight, surface that
// immediately instead of burning the full timeout budget after the click.
if (matched) return true;
if (disposed) return false;
return await new Promise<boolean>((resolve) => {
waiter = {
resolve,
timer: setTimeout(() => {
// Clear waiter before resolving so a late navigation event cannot
// race in and resolve the same wait() call twice.
waiter = undefined;
resolve(false);
}, timeoutMs),
};
});
},
dispose,
};
}

/* ===================== Handlers & Map ===================== */

const METHOD_HANDLER_MAP: Record<
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { expect, test } from "@playwright/test";
import { V3 } from "../../lib/v3/v3.js";
import { performUnderstudyMethod } from "../../lib/v3/handlers/handlerUtils/actHandlerUtils.js";
import { closeV3 } from "./testUtils.js";
import { v3DynamicTestConfig } from "./v3.dynamic.config.js";

async function loadDelayedDocumentNavigationFixture(
v3: V3,
delayMs: number,
): Promise<{
page: ReturnType<V3["context"]["pages"]>[number];
sourceUrl: string;
targetUrl: string;
}> {
const page = v3.context.pages()[0];
const sourceUrl = "https://example.com/";
const targetUrl = `https://example.com/?nav-race=${delayMs}`;

await page.goto(sourceUrl, { waitUntil: "load" });

await page.mainFrame().evaluate(
({ delay, nextUrl }) => {
document.open();
document.write(`<!DOCTYPE html>
<html>
<body>
<button id="go" type="button">Go</button>
</body>
</html>`);
document.close();

(window as typeof window & { __navScheduled?: boolean }).__navScheduled =
false;
document.getElementById("go")?.addEventListener("click", () => {
(
window as typeof window & { __navScheduled?: boolean }
).__navScheduled = true;
setTimeout(() => {
window.location.assign(nextUrl);
}, delay);
});
},
{ delay: delayMs, nextUrl: targetUrl },
);

return {
page,
sourceUrl,
targetUrl,
};
}

test.describe("performUnderstudyMethod navigation race", () => {
let v3: V3;

test.beforeEach(async () => {
v3 = new V3(v3DynamicTestConfig);
await v3.init();
});

test.afterEach(async () => {
await closeV3(v3);
});

test("waits for navigation that starts within 400ms of click", async () => {
const { page, targetUrl } = await loadDelayedDocumentNavigationFixture(
v3,
250,
);

await performUnderstudyMethod(
page,
page.mainFrame(),
"click",
"xpath=/html/body/button",
[],
3_000,
);

expect(page.url()).toBe(targetUrl);
});

test("does not wait for navigation that starts after 400ms of click", async () => {
const { page, sourceUrl, targetUrl } =
await loadDelayedDocumentNavigationFixture(v3, 900);

await performUnderstudyMethod(
page,
page.mainFrame(),
"click",
"xpath=/html/body/button",
[],
3_000,
);

expect(page.url()).toBe(sourceUrl);
expect(
await page.evaluate(() => {
return (window as typeof window & { __navScheduled?: boolean })
.__navScheduled;
}),
).toBe(true);

await expect
.poll(() => page.url(), {
timeout: 3_000,
})
.toBe(targetUrl);
});
});