diff --git a/.changeset/swift-roses-design.md b/.changeset/swift-roses-design.md new file mode 100644 index 000000000..ed3aaa685 --- /dev/null +++ b/.changeset/swift-roses-design.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +store mapping of CDP frame ID -> page diff --git a/evals/deterministic/tests/BrowserContext/multiPage.test.ts b/evals/deterministic/tests/BrowserContext/multiPage.test.ts index 041cf1c3f..87c5cfc8c 100644 --- a/evals/deterministic/tests/BrowserContext/multiPage.test.ts +++ b/evals/deterministic/tests/BrowserContext/multiPage.test.ts @@ -1,7 +1,6 @@ import { test, expect } from "@playwright/test"; import { Stagehand } from "@browserbasehq/stagehand"; import StagehandConfig from "@/evals/deterministic/stagehand.config"; -import { Page } from "@browserbasehq/stagehand"; import http from "http"; import express from "express"; @@ -128,26 +127,24 @@ test.describe("StagehandContext - Multi-page Support", () => { * Test popup handling */ test("should handle popups with enhanced capabilities", async () => { - const mainPage = stagehand.page; - let popupPage: Page | null = null; + await stagehand.page.goto(`http://localhost:${serverPort}/page1`); + await stagehand.page.click("#popupBtn"); - mainPage.on("popup", (page: Page) => { - popupPage = page; - }); + await expect.poll(() => stagehand.context.pages().length).toBe(2); - await mainPage.goto(`http://localhost:${serverPort}/page1`); - await mainPage.click("#popupBtn"); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const [_original, popupPage] = stagehand.context.pages(); - // Verify popup has enhanced capabilities - expect(popupPage).not.toBeNull(); - expect(typeof popupPage.act).toBe("function"); - expect(typeof popupPage.extract).toBe("function"); - expect(typeof popupPage.observe).toBe("function"); + await popupPage.waitForLoadState(); - if (popupPage) { - await popupPage.waitForLoadState(); - expect(await popupPage.title()).toBe("Page 2"); - } + const get = (k: string) => + (popupPage as unknown as Record)[k]; + + expect(typeof get("act")).toBe("function"); + expect(typeof get("extract")).toBe("function"); + expect(typeof get("observe")).toBe("function"); + + expect(await popupPage.title()).toBe("Page 2"); }); /** diff --git a/lib/StagehandContext.ts b/lib/StagehandContext.ts index 41b6fe1fd..f93186f1e 100644 --- a/lib/StagehandContext.ts +++ b/lib/StagehandContext.ts @@ -1,17 +1,20 @@ import type { BrowserContext as PlaywrightContext, + CDPSession, Page as PlaywrightPage, } from "playwright"; import { Stagehand } from "./index"; import { StagehandPage } from "./StagehandPage"; import { Page } from "../types/page"; import { EnhancedContext } from "../types/context"; +import { Protocol } from "devtools-protocol"; export class StagehandContext { private readonly stagehand: Stagehand; private readonly intContext: EnhancedContext; private pageMap: WeakMap; private activeStagehandPage: StagehandPage | null = null; + private readonly frameIdMap: Map = new Map(); private constructor(context: PlaywrightContext, stagehand: Stagehand) { this.stagehand = stagehand; @@ -83,6 +86,7 @@ export class StagehandContext { const existingPages = context.pages(); for (const page of existingPages) { const stagehandPage = await instance.createStagehandPage(page); + await instance.attachFrameNavigatedListener(page); // Set the first page as active if (!instance.activeStagehandPage) { instance.setActivePage(stagehandPage); @@ -90,17 +94,43 @@ export class StagehandContext { } context.on("page", (pwPage) => { - instance.handleNewPlaywrightPage(pwPage).catch((err) => - stagehand.logger({ - category: "context", - message: `Failed to initialise new page: ${err}`, - level: 0, - }), - ); + instance + .attachFrameNavigatedListener(pwPage) + .catch((err) => + stagehand.logger({ + category: "cdp", + message: `Failed to attach frameNavigated listener: ${err}`, + level: 0, + }), + ) + .finally(() => + instance.handleNewPlaywrightPage(pwPage).catch((err) => + stagehand.logger({ + category: "context", + message: `Failed to initialise new page: ${err}`, + level: 0, + }), + ), + ); }); return instance; } + public get frameIdLookup(): ReadonlyMap { + return this.frameIdMap; + } + + public registerFrameId(frameId: string, page: StagehandPage): void { + this.frameIdMap.set(frameId, page); + } + + public unregisterFrameId(frameId: string): void { + this.frameIdMap.delete(frameId); + } + + public getStagehandPageByFrameId(frameId: string): StagehandPage | undefined { + return this.frameIdMap.get(frameId); + } public get context(): EnhancedContext { return this.intContext; @@ -140,4 +170,33 @@ export class StagehandContext { } this.setActivePage(stagehandPage); } + + private async attachFrameNavigatedListener( + pwPage: PlaywrightPage, + ): Promise { + const shPage = this.pageMap.get(pwPage); + if (!shPage) return; + const session: CDPSession = await this.intContext.newCDPSession(pwPage); + await session.send("Page.enable"); + + pwPage.once("close", () => { + this.unregisterFrameId(shPage.frameId); + }); + + session.on( + "Page.frameNavigated", + (evt: Protocol.Page.FrameNavigatedEvent): void => { + const { frame } = evt; + + if (!frame.parentId) { + const oldId = shPage.frameId; + if (frame.id !== oldId) { + if (oldId) this.unregisterFrameId(oldId); + this.registerFrameId(frame.id, shPage); + shPage.updateRootFrameId(frame.id); + } + } + }, + ); + } } diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 353631963..d4cce467b 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -56,6 +56,16 @@ export class StagehandPage { [undefined, 0], ]); + private rootFrameId!: string; + + public get frameId(): string { + return this.rootFrameId; + } + + public updateRootFrameId(newId: string): void { + this.rootFrameId = newId; + } + constructor( page: PlaywrightPage, stagehand: Stagehand, @@ -354,9 +364,11 @@ ${scriptContent} \ const rawGoto: typeof target.goto = Object.getPrototypeOf(target).goto.bind(target); return async (url: string, options: GotoOptions) => { - this.intContext.setActivePage(this); const result = this.api - ? await this.api.goto(url, options) + ? await this.api.goto(url, { + ...options, + frameId: this.frameId, + }) : await rawGoto(url, options); this.stagehand.addToHistory("navigate", { url, options }, result); @@ -629,7 +641,10 @@ ${scriptContent} \ const observeResult = actionOrOptions as ObserveResult; if (this.api) { - const result = await this.api.act(observeResult); + const result = await this.api.act({ + ...observeResult, + frameId: this.frameId, + }); await this._refreshPageFromAPI(); this.stagehand.addToHistory("act", observeResult, result); return result; @@ -661,7 +676,8 @@ ${scriptContent} \ const { action, modelName, modelClientOptions } = actionOrOptions; if (this.api) { - const result = await this.api.act(actionOrOptions); + const opts = { ...actionOrOptions, frameId: this.frameId }; + const result = await this.api.act(opts); await this._refreshPageFromAPI(); this.stagehand.addToHistory("act", actionOrOptions, result); return result; @@ -722,7 +738,7 @@ ${scriptContent} \ if (!instructionOrOptions) { let result: ExtractResult; if (this.api) { - result = await this.api.extract({}); + result = await this.api.extract({ frameId: this.frameId }); } else { result = await this.extractHandler.extract(); } @@ -759,7 +775,8 @@ ${scriptContent} \ } if (this.api) { - const result = await this.api.extract(options); + const opts = { ...options, frameId: this.frameId }; + const result = await this.api.extract(opts); this.stagehand.addToHistory("extract", instructionOrOptions, result); return result; } @@ -866,7 +883,8 @@ ${scriptContent} \ } if (this.api) { - const result = await this.api.observe(options); + const opts = { ...options, frameId: this.frameId }; + const result = await this.api.observe(opts); this.stagehand.addToHistory("observe", instructionOrOptions, result); return result; } diff --git a/lib/api.ts b/lib/api.ts index 6689f4633..de60d2b8d 100644 --- a/lib/api.ts +++ b/lib/api.ts @@ -123,7 +123,7 @@ export class StagehandAPI { if (!options.schema) { return this.execute>({ method: "extract", - args: {}, + args: { ...options }, }); } const parsedSchema = zodToJsonSchema(options.schema); diff --git a/types/playwright.ts b/types/playwright.ts index bb623576b..8ec437cf7 100644 --- a/types/playwright.ts +++ b/types/playwright.ts @@ -16,4 +16,5 @@ export interface GotoOptions { timeout?: number; waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit"; referer?: string; + frameId?: string; } diff --git a/types/stagehand.ts b/types/stagehand.ts index d6ed42d85..706a45a3b 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -116,6 +116,7 @@ export interface ActOptions { domSettleTimeoutMs?: number; timeoutMs?: number; iframes?: boolean; + frameId?: string; } export interface ActResult { @@ -136,6 +137,7 @@ export interface ExtractOptions { useTextExtract?: boolean; selector?: string; iframes?: boolean; + frameId?: string; } export type ExtractResult = z.infer; @@ -152,6 +154,7 @@ export interface ObserveOptions { onlyVisible?: boolean; drawOverlay?: boolean; iframes?: boolean; + frameId?: string; } export interface ObserveResult {