Skip to content

Commit 64c1072

Browse files
dont automatically close tabs (#844)
# why - when a new tab is opened, we call `page.goto()` with the URL of the new tab, and immediately close the old tab - this is problematic, because on some websites you are required to keep the old tab open in order to continue browsing in the new tab - this PR removes that behaviour: we no longer close tabs implicitly # what changed - added a live page proxy in `index.ts` which always points to the `focused` page/tab - added a listener in `StagehandContext.ts` which listens for new pages, and initializes them as Stagehand pages - this means that you can call act/extract/observe on all open pages, even if they are not the focused page - preserved expected behaviour when users define `const page = stagehand.page` and then use `page` throughout their code, they expect `page` to always point to the "focused" page # test plan - added two new `act` evals that open & close tabs - run `act`, `regression` and `combination` evals --------- Co-authored-by: miguel <[email protected]> Co-authored-by: Miguel <[email protected]>
1 parent 890ffcc commit 64c1072

File tree

12 files changed

+333
-107
lines changed

12 files changed

+333
-107
lines changed

.changeset/little-hornets-tan.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
don't automatically close tabs

evals/deterministic/tests/BrowserContext/multiPage.test.ts

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -189,21 +189,14 @@ test.describe("StagehandContext - Multi-page Support", () => {
189189
* Test active page tracking
190190
*/
191191
test("should update stagehand.page when creating new pages", async () => {
192-
const initialPage = stagehand.page;
192+
const initialTitle = await stagehand.page.title(); // "about:blank" → ""
193193

194-
// Create a new page and verify it becomes active
194+
// Create a new page
195195
const newPage = await stagehand.context.newPage();
196-
expect(stagehand.page).toBe(newPage);
197-
expect(stagehand.page).not.toBe(initialPage);
198-
199-
// Navigate and verify it's still the active page
200196
await newPage.goto(`http://localhost:${serverPort}/page1`);
201-
expect(stagehand.page).toBe(newPage);
202-
expect(await stagehand.page.title()).toBe("Page 1");
203197

204-
// Create another page and verify it becomes active
205-
const anotherPage = await stagehand.context.newPage();
206-
expect(stagehand.page).toBe(anotherPage);
207-
expect(stagehand.page).not.toBe(newPage);
198+
// The proxy should now forward to the new page:
199+
expect(await stagehand.page.title()).toBe("Page 1");
200+
expect(await stagehand.page.title()).not.toBe(initialTitle);
208201
});
209202
});

evals/evals.config.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,18 @@
367367
"name": "no_js_click",
368368
"categories": ["act", "regression"]
369369
},
370+
{
371+
"name": "tab_handling",
372+
"categories": ["act"]
373+
},
374+
{
375+
"name": "agent/kayak",
376+
"categories": ["agent"]
377+
},
378+
{
379+
"name": "multi_tab",
380+
"categories": ["act"]
381+
},
370382
{
371383
"name": "shadow_dom",
372384
"categories": ["act"]

evals/evaluator.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ export class Evaluator {
3434
modelClientOptions?: ClientOptions,
3535
) {
3636
this.stagehand = stagehand;
37-
this.modelName = modelName || "gemini-2.0-flash";
37+
this.modelName = modelName || "google/gemini-2.0-flash";
3838
this.modelClientOptions = modelClientOptions || {
39-
apiKey: process.env.GOOGLE_API_KEY || "",
39+
apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY || "",
4040
};
4141
}
4242

evals/tasks/agent/kayak.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { Evaluator } from "@/evals/evaluator";
3+
4+
export const kayak: EvalFunction = async ({
5+
debugUrl,
6+
sessionUrl,
7+
stagehand,
8+
logger,
9+
}) => {
10+
try {
11+
const evaluator = new Evaluator(stagehand);
12+
await stagehand.page.goto("https://www.kayak.com");
13+
const agent = stagehand.agent({
14+
provider: "openai",
15+
model: "computer-use-preview",
16+
instructions: `You are a helpful assistant that can help me find flights. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
17+
options: {
18+
apiKey: process.env.OPENAI_API_KEY,
19+
},
20+
});
21+
await agent.execute({
22+
instruction: "Find flights from San Francisco to Tokyo next week",
23+
maxSteps: 15,
24+
});
25+
await agent.execute({
26+
instruction: "Sort the flights by price",
27+
maxSteps: 5,
28+
});
29+
30+
if (stagehand.context.pages().length !== 2) {
31+
return {
32+
_success: false,
33+
message: "No new pages were opened",
34+
debugUrl,
35+
sessionUrl,
36+
logs: logger.getLogs(),
37+
};
38+
}
39+
const { evaluation, reasoning } = await evaluator.evaluate({
40+
question: "Are the flights shown sorted by price?",
41+
});
42+
43+
const success = evaluation === "YES";
44+
if (!success) {
45+
return {
46+
_success: false,
47+
message: reasoning,
48+
debugUrl,
49+
sessionUrl,
50+
logs: logger.getLogs(),
51+
};
52+
}
53+
return {
54+
_success: true,
55+
debugUrl,
56+
sessionUrl,
57+
logs: logger.getLogs(),
58+
};
59+
} catch (error) {
60+
return {
61+
_success: false,
62+
message: error.message,
63+
debugUrl,
64+
sessionUrl,
65+
logs: logger.getLogs(),
66+
};
67+
} finally {
68+
stagehand.close();
69+
}
70+
};

evals/tasks/multi_tab.ts

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const multi_tab: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
try {
10+
const stagehandPage = stagehand.page;
11+
await stagehandPage.goto(
12+
"https://browserbase.github.io/stagehand-eval-sites/sites/five-tab/",
13+
);
14+
15+
await stagehandPage.act({
16+
action: "click the button to open the other page",
17+
});
18+
await stagehandPage.act({
19+
action: "click the button to open the other page",
20+
});
21+
await stagehandPage.act({
22+
action: "click the button to open the other page",
23+
});
24+
await stagehandPage.act({
25+
action: "click the button to open the other page",
26+
});
27+
28+
let currentPageUrl = stagehandPage.url();
29+
let expectedUrl =
30+
"https://browserbase.github.io/stagehand-eval-sites/sites/five-tab/page5.html";
31+
32+
if (currentPageUrl !== expectedUrl) {
33+
return {
34+
_success: false,
35+
message: "expected URL does not match current URL",
36+
debugUrl,
37+
sessionUrl,
38+
logs: logger.getLogs(),
39+
};
40+
}
41+
42+
// try acting on the first page again
43+
const pages = stagehand.context.pages();
44+
const page1 = pages[0];
45+
await page1.act({
46+
action: "click the button to open the other page",
47+
});
48+
49+
// stagehandPage.url() should point to the URL of the active page
50+
currentPageUrl = stagehandPage.url();
51+
expectedUrl =
52+
"https://browserbase.github.io/stagehand-eval-sites/sites/five-tab/page2.html";
53+
if (currentPageUrl !== expectedUrl) {
54+
return {
55+
_success: false,
56+
message: "expected URL does not match current URL",
57+
debugUrl,
58+
sessionUrl,
59+
logs: logger.getLogs(),
60+
};
61+
}
62+
63+
const page2text = await stagehandPage.extract();
64+
const expectedPage2text = "You've made it to page 2";
65+
66+
if (page2text.page_text.includes(expectedPage2text)) {
67+
return {
68+
_success: true,
69+
debugUrl,
70+
sessionUrl,
71+
logs: logger.getLogs(),
72+
};
73+
}
74+
return {
75+
_success: false,
76+
message: `extracted page text: ${page2text.page_text} does not match expected page text: ${expectedPage2text}`,
77+
debugUrl,
78+
sessionUrl,
79+
logs: logger.getLogs(),
80+
};
81+
} catch (error) {
82+
return {
83+
_success: false,
84+
message: error.message,
85+
debugUrl,
86+
sessionUrl,
87+
logs: logger.getLogs(),
88+
};
89+
} finally {
90+
await stagehand.close();
91+
}
92+
};

evals/tasks/tab_handling.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const tab_handling: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
try {
10+
await stagehand.page.goto(
11+
"https://browserbase.github.io/stagehand-eval-sites/sites/new-tab/",
12+
);
13+
14+
await stagehand.page.act({
15+
action: "click the button to open the other page",
16+
});
17+
18+
const pages = stagehand.context.pages();
19+
const page1 = pages[0];
20+
const page2 = pages[1];
21+
22+
// extract all the text from the first page
23+
const extraction1 = await page1.extract();
24+
// extract all the text from the second page
25+
const extraction2 = await page2.extract();
26+
27+
const extraction1Success = extraction1.page_text.includes("Welcome!");
28+
const extraction2Success = extraction2.page_text.includes(
29+
"You’re on the other page",
30+
);
31+
32+
return {
33+
_success: extraction1Success && extraction2Success,
34+
debugUrl,
35+
sessionUrl,
36+
logs: logger.getLogs(),
37+
};
38+
} catch (error) {
39+
return {
40+
_success: false,
41+
message: error.message,
42+
debugUrl,
43+
sessionUrl,
44+
logs: logger.getLogs(),
45+
};
46+
} finally {
47+
await stagehand.close();
48+
}
49+
};

lib/StagehandContext.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,16 @@ export class StagehandContext {
8989
}
9090
}
9191

92+
context.on("page", (pwPage) => {
93+
instance.handleNewPlaywrightPage(pwPage).catch((err) =>
94+
stagehand.logger({
95+
category: "context",
96+
message: `Failed to initialise new page: ${err}`,
97+
level: 0,
98+
}),
99+
);
100+
});
101+
92102
return instance;
93103
}
94104

@@ -122,4 +132,12 @@ export class StagehandContext {
122132
public getActivePage(): StagehandPage | null {
123133
return this.activeStagehandPage;
124134
}
135+
136+
private async handleNewPlaywrightPage(pwPage: PlaywrightPage): Promise<void> {
137+
let stagehandPage = this.pageMap.get(pwPage);
138+
if (!stagehandPage) {
139+
stagehandPage = await this.createStagehandPage(pwPage);
140+
}
141+
this.setActivePage(stagehandPage);
142+
}
125143
}

lib/StagehandPage.ts

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,7 @@ export class StagehandPage {
8888
const value = target[prop];
8989
// If the property is a function, wrap it to update active page before execution
9090
if (typeof value === "function" && prop !== "on") {
91-
return (...args: unknown[]) => {
92-
// Update active page before executing the method
93-
this.intContext.setActivePage(this);
94-
return value.apply(target, args);
95-
};
91+
return (...args: unknown[]) => value.apply(target, args);
9692
}
9793
return value;
9894
},
@@ -287,7 +283,6 @@ ${scriptContent} \
287283
prop === "$$eval"
288284
) {
289285
return async (...args: unknown[]) => {
290-
this.intContext.setActivePage(this);
291286
// Make sure helpers exist
292287
await this.ensureStagehandScript();
293288
return (value as (...a: unknown[]) => unknown).apply(
@@ -316,10 +311,7 @@ ${scriptContent} \
316311
>;
317312

318313
const method = this[prop as keyof StagehandPage] as EnhancedMethod;
319-
return async (options: unknown) => {
320-
this.intContext.setActivePage(this);
321-
return method.call(this, options);
322-
};
314+
return (options: unknown) => method.call(this, options);
323315
}
324316

325317
// Handle screenshots with CDP
@@ -425,10 +417,7 @@ ${scriptContent} \
425417

426418
// For all other method calls, update active page
427419
if (typeof value === "function") {
428-
return (...args: unknown[]) => {
429-
this.intContext.setActivePage(this);
430-
return value.apply(target, args);
431-
};
420+
return (...args: unknown[]) => value.apply(target, args);
432421
}
433422

434423
return value;

0 commit comments

Comments
 (0)