Skip to content

Commit 8bcb5d7

Browse files
add native selectOptionFromDropdown (#842)
# why - improved dropdown handling for OS level dropdowns # what changed - added native `selectOptionFromDropdown` function in `actHandlerUtils.ts` - updated prompting such that the LLM only chooses `selectOptionFromDropdown` in specific cases (when the dropdown is OS level) # test plan - `act` evals # fast follow PRs - add documentation explaining the various types of dropdowns, and how to prompt stagehand to handle each of them. # to do - [x] better prompting - [x] add a timeout for locator - [x] add multiple evals --------- Co-authored-by: Miguel <[email protected]>
1 parent b077d3f commit 8bcb5d7

File tree

9 files changed

+225
-1
lines changed

9 files changed

+225
-1
lines changed

.changeset/swift-jokes-write.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
improved handling for OS level dropdowns

evals/evals.config.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,18 @@
382382
{
383383
"name": "shadow_dom",
384384
"categories": ["act"]
385+
},
386+
{
387+
"name": "os_dropdown",
388+
"categories": ["act"]
389+
},
390+
{
391+
"name": "custom_dropdown",
392+
"categories": ["act"]
393+
},
394+
{
395+
"name": "hidden_input_dropdown",
396+
"categories": ["act"]
385397
}
386398
]
387399
}

evals/tasks/custom_dropdown.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const custom_dropdown: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
/**
10+
* This eval is meant to test whether we do not incorrectly attempt
11+
* the selectOptionFromDropdown method (defined in actHandlerUtils.ts) on a
12+
* 'dropdown' that is not a <select> element.
13+
*
14+
* This kind of dropdown must be clicked to be expanded before being interacted
15+
* with.
16+
*/
17+
18+
try {
19+
const page = stagehand.page;
20+
await page.goto(
21+
"https://browserbase.github.io/stagehand-eval-sites/sites/expand-dropdown/",
22+
);
23+
24+
await page.act("click the 'Select a Country' dropdown");
25+
26+
// we are expecting stagehand to click the dropdown to expand it,
27+
// and therefore the available options should now be contained in the full
28+
// a11y tree.
29+
30+
// to test, we'll grab the full a11y tree, and make sure it contains 'Canada'
31+
const extraction = await page.extract();
32+
const fullTree = extraction.page_text;
33+
34+
if (fullTree.includes("Canada")) {
35+
return {
36+
_success: true,
37+
debugUrl,
38+
sessionUrl,
39+
logs: logger.getLogs(),
40+
};
41+
}
42+
return {
43+
_success: false,
44+
message: "unable to expand the dropdown",
45+
debugUrl,
46+
sessionUrl,
47+
logs: logger.getLogs(),
48+
};
49+
} catch (error) {
50+
return {
51+
_success: false,
52+
message: `error attempting to select an option from the dropdown: ${error.message}`,
53+
debugUrl,
54+
sessionUrl,
55+
logs: logger.getLogs(),
56+
};
57+
} finally {
58+
await stagehand.close();
59+
}
60+
};

evals/tasks/hidden_input_dropdown.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const hidden_input_dropdown: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
/**
10+
* This eval is meant to test whether we do not incorrectly attempt
11+
* the selectOptionFromDropdown method (defined in actHandlerUtils.ts) on a
12+
* hidden input 'dropdown'.
13+
*
14+
* This kind of dropdown must be clicked to be expanded before being interacted
15+
* with.
16+
*/
17+
18+
try {
19+
const page = stagehand.page;
20+
await page.goto(
21+
"https://browserbase.github.io/stagehand-eval-sites/sites/hidden-input-dropdown/",
22+
);
23+
24+
await page.act("click to expand the 'Favourite Colour' dropdown");
25+
26+
// we are expecting stagehand to click the dropdown to expand it,
27+
// and therefore the available options should now be contained in the full
28+
// a11y tree.
29+
30+
// to test, we'll grab the full a11y tree, and make sure it contains 'Green'
31+
const extraction = await page.extract();
32+
const fullTree = extraction.page_text;
33+
34+
if (fullTree.includes("Green")) {
35+
return {
36+
_success: true,
37+
debugUrl,
38+
sessionUrl,
39+
logs: logger.getLogs(),
40+
};
41+
}
42+
return {
43+
_success: false,
44+
message: "unable to expand the dropdown",
45+
debugUrl,
46+
sessionUrl,
47+
logs: logger.getLogs(),
48+
};
49+
} catch (error) {
50+
return {
51+
_success: false,
52+
message: `error attempting click to expand the dropdown: ${error.message}`,
53+
debugUrl,
54+
sessionUrl,
55+
logs: logger.getLogs(),
56+
};
57+
} finally {
58+
await stagehand.close();
59+
}
60+
};

evals/tasks/os_dropdown.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const os_dropdown: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
/**
10+
* This eval is meant to test whether we can correctly select an element
11+
* from an OS level dropdown
12+
*/
13+
14+
try {
15+
const page = stagehand.page;
16+
await page.goto(
17+
"https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/",
18+
);
19+
20+
await page.act(
21+
"choose 'Smog Check Technician' from the 'License Type' dropdown",
22+
);
23+
const selectedOption = await page
24+
.locator(
25+
"xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked",
26+
)
27+
.textContent();
28+
29+
if (selectedOption === "Smog Check Technician") {
30+
return {
31+
_success: true,
32+
debugUrl,
33+
sessionUrl,
34+
logs: logger.getLogs(),
35+
};
36+
}
37+
return {
38+
_success: false,
39+
message: "incorrect option selected from the dropdown",
40+
debugUrl,
41+
sessionUrl,
42+
logs: logger.getLogs(),
43+
};
44+
} catch (error) {
45+
return {
46+
_success: false,
47+
message: `error attempting to select an option from the dropdown: ${error.message}`,
48+
debugUrl,
49+
sessionUrl,
50+
logs: logger.getLogs(),
51+
};
52+
} finally {
53+
await stagehand.close();
54+
}
55+
};

lib/a11y/utils.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,14 @@ async function cleanStructuralNodes(
303303
if (tagName) node.role = tagName;
304304
}
305305

306+
if (
307+
node.role === "combobox" &&
308+
node.encodedId !== undefined &&
309+
tagNameMap[node.encodedId] === "select"
310+
) {
311+
node.role = "select";
312+
}
313+
306314
// 5. drop redundant StaticText children
307315
const pruned = removeRedundantStaticTextChildren(node, cleanedChildren);
308316
if (!pruned.length && (node.role === "generic" || node.role === "none")) {

lib/handlers/handlerUtils/actHandlerUtils.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ export const methodHandlerMap: Record<
5959
click: clickElement,
6060
nextChunk: scrollToNextChunk,
6161
prevChunk: scrollToPreviousChunk,
62+
selectOptionFromDropdown: selectOption,
6263
};
6364

6465
export async function scrollToNextChunk(ctx: MethodHandlerContext) {
@@ -349,6 +350,26 @@ export async function pressKey(ctx: MethodHandlerContext) {
349350
}
350351
}
351352

353+
export async function selectOption(ctx: MethodHandlerContext) {
354+
const { locator, xpath, args, logger } = ctx;
355+
try {
356+
const text = args[0]?.toString() || "";
357+
await locator.selectOption(text, { timeout: 5000 });
358+
} catch (e) {
359+
logger({
360+
category: "action",
361+
message: "error selecting option",
362+
level: 0,
363+
auxiliary: {
364+
error: { value: e.message, type: "string" },
365+
trace: { value: e.stack, type: "string" },
366+
xpath: { value: xpath, type: "string" },
367+
},
368+
});
369+
throw new PlaywrightCommandException(e.message);
370+
}
371+
}
372+
352373
export async function clickElement(ctx: MethodHandlerContext) {
353374
const {
354375
locator,

lib/prompt.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ export function buildActObservePrompt(
159159
ONLY return one action. If multiple actions are relevant, return the most relevant one.
160160
If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.
161161
If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here.
162-
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.`;
162+
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.
163+
If the action implies choosing an option from a dropdown, AND the corresponding element is a 'select' element, choose the selectOptionFromDropdown method. The argument should be the text of the option to select.
164+
If the action implies choosing an option from a dropdown, and the corresponding element is NOT a 'select' element, choose the click method.`;
163165

164166
// Add variable names (not values) to the instruction if any
165167
if (variables && Object.keys(variables).length > 0) {

types/act.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export enum SupportedPlaywrightAction {
3939
SCROLL = "scrollTo",
4040
NEXT_CHUNK = "nextChunk",
4141
PREV_CHUNK = "prevChunk",
42+
SELECT_OPTION_FROM_DROPDOWN = "selectOptionFromDropdown",
4243
}
4344

4445
/**

0 commit comments

Comments
 (0)