Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/odd-snakes-film.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Add SupportedUnderstudyActions to observe system prompt
3 changes: 2 additions & 1 deletion packages/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ This directory contains the Stagehand monorepo packages:

- **core** - The main Stagehand package
- **evals** - Evals CLI
- **docs** - [Docs](https://docs.stagehand.dev)
- **docs** - [Docs](https://docs.stagehand.dev)
- **server** - Fastify server wrapping the core package for different language clients
4 changes: 3 additions & 1 deletion packages/core/lib/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,15 @@ export async function observe({
userProvidedInstructions,
logger,
logInferenceToFile = false,
supportedActions,
}: {
instruction: string;
domElements: string;
llmClient: LLMClient;
userProvidedInstructions?: string;
logger: (message: LogLine) => void;
logInferenceToFile?: boolean;
supportedActions?: string[];
}) {
const isGPT5 = llmClient.modelName.includes("gpt-5"); // TODO: remove this as we update support for gpt-5 configuration options

Expand Down Expand Up @@ -271,7 +273,7 @@ export async function observe({
type ObserveResponse = z.infer<typeof observeSchema>;

const messages: ChatMessage[] = [
buildObserveSystemPrompt(userProvidedInstructions),
buildObserveSystemPrompt(userProvidedInstructions, supportedActions),
buildObserveUserMessage(instruction, domElements),
];

Expand Down
8 changes: 7 additions & 1 deletion packages/core/lib/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,21 @@ Extracted content: ${JSON.stringify(extractionResponse, null, 2)}`,
// observe
export function buildObserveSystemPrompt(
userProvidedInstructions?: string,
supportedActions?: string[],
): ChatMessage {
const actionsString = supportedActions?.length
? `\n\nSupported actions: ${supportedActions.join(", ")}`
: "";

const observeSystemPrompt = `
You are helping the user automate the browser by finding elements based on what the user wants to observe in the page.

You will be given:
1. a instruction of elements to observe
2. a hierarchical accessibility tree showing the semantic structure of the page. The tree is a hybrid of the DOM and the accessibility tree.

Return an array of elements that match the instruction if they exist, otherwise return an empty array.`;
Return an array of elements that match the instruction if they exist, otherwise return an empty array.
When returning elements, include the appropriate method from the supported actions list.${actionsString}`;
const content = observeSystemPrompt.replace(/\s+/g, " ");

return {
Expand Down
53 changes: 45 additions & 8 deletions packages/core/lib/v3/handlers/actHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
diffCombinedTrees,
} from "../understudy/a11y/snapshot";
import { LLMClient } from "../llm/LLMClient";
import { SupportedPlaywrightAction } from "../types/private";
import { SupportedUnderstudyAction } from "../types/private";
import { EncodedId } from "../types/private/internal";
import {
AvailableModel,
Expand Down Expand Up @@ -157,7 +157,7 @@ export class ActHandler {

const actInstruction = buildActPrompt(
instruction,
Object.values(SupportedPlaywrightAction),
Object.values(SupportedUnderstudyAction),
variables,
);

Expand Down Expand Up @@ -218,13 +218,13 @@ export class ActHandler {
const stepTwoInstructions = buildStepTwoPrompt(
instruction,
previousAction,
Object.values(SupportedPlaywrightAction).filter(
Object.values(SupportedUnderstudyAction).filter(
(
action,
): action is Exclude<
SupportedPlaywrightAction,
SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN
> => action !== SupportedPlaywrightAction.SELECT_OPTION_FROM_DROPDOWN,
SupportedUnderstudyAction,
SupportedUnderstudyAction.SELECT_OPTION_FROM_DROPDOWN
> => action !== SupportedUnderstudyAction.SELECT_OPTION_FROM_DROPDOWN,
),
variables,
);
Expand Down Expand Up @@ -364,7 +364,7 @@ export class ActHandler {

const instruction = buildActPrompt(
actCommand,
Object.values(SupportedPlaywrightAction),
Object.values(SupportedUnderstudyAction),
{},
);

Expand Down Expand Up @@ -471,10 +471,47 @@ function normalizeActInferenceElement(
return undefined;
}

// For dragAndDrop, convert element ID in arguments to xpath (target element)
let resolvedArgs = hasArgs ? args : undefined;
if (method === "dragAndDrop" && hasArgs && args.length > 0) {
const targetArg = args[0];
// Check if argument looks like an element ID (e.g., "1-67")
if (typeof targetArg === "string" && /^\d+-\d+$/.test(targetArg)) {
const argXpath = xpathMap[targetArg as EncodedId];
const trimmedArgXpath = trimTrailingTextNode(argXpath);
if (trimmedArgXpath) {
resolvedArgs = [`xpath=${trimmedArgXpath}`, ...args.slice(1)];
} else {
// Target element lookup failed, filter out this action
v3Logger({
category: "action",
message: "dragAndDrop target element lookup failed",
level: 1,
auxiliary: {
targetElementId: { value: targetArg, type: "string" },
sourceElementId: { value: elementId, type: "string" },
},
});
return undefined;
}
} else {
v3Logger({
category: "action",
message: "dragAndDrop target element invalid ID format",
level: 0,
auxiliary: {
targetElementId: { value: String(targetArg), type: "string" },
sourceElementId: { value: elementId, type: "string" },
},
});
return undefined;
}
}

return {
description,
method,
arguments: hasArgs ? args : undefined,
arguments: resolvedArgs,
selector: `xpath=${trimmed}`,
} as Action;
}
Expand Down
54 changes: 53 additions & 1 deletion packages/core/lib/v3/handlers/observeHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import { v3Logger } from "../logger";
import { V3FunctionName } from "../types/public/methods";
import { captureHybridSnapshot } from "../understudy/a11y/snapshot";
import { LLMClient } from "../llm/LLMClient";
import { ObserveHandlerParams } from "../types/private/handlers";
import {
ObserveHandlerParams,
SupportedUnderstudyAction,
} from "../types/private/handlers";
import { EncodedId } from "../types/private/internal";
import { Action } from "../types/public/methods";
import {
Expand Down Expand Up @@ -114,6 +117,7 @@ export class ObserveHandler {
userProvidedInstructions: this.systemPrompt,
logger: v3Logger,
logInferenceToFile: this.logInferenceToFile,
supportedActions: Object.values(SupportedUnderstudyAction),
});

const {
Expand Down Expand Up @@ -145,8 +149,56 @@ export class ObserveHandler {
const trimmedXpath = trimTrailingTextNode(xpath);
if (!trimmedXpath) return undefined;

// For dragAndDrop, convert element ID in arguments to xpath (target element)
let resolvedArgs = rest.arguments;
if (
rest.method === "dragAndDrop" &&
Array.isArray(rest.arguments) &&
rest.arguments.length > 0
) {
const targetArg = rest.arguments[0];
// Check if argument looks like an element ID (e.g., "1-67")
if (
typeof targetArg === "string" &&
/^\d+-\d+$/.test(targetArg)
) {
const argXpath = combinedXpathMap[targetArg as EncodedId];
const trimmedArgXpath = trimTrailingTextNode(argXpath);
if (trimmedArgXpath) {
resolvedArgs = [
`xpath=${trimmedArgXpath}`,
...rest.arguments.slice(1),
];
} else {
// Target element lookup failed, filter out this action
v3Logger({
category: "observation",
message: "dragAndDrop target element lookup failed",
level: 0,
auxiliary: {
targetElementId: { value: targetArg, type: "string" },
sourceElementId: { value: elementId, type: "string" },
},
});
return undefined;
}
} else {
v3Logger({
category: "observation",
message: "dragAndDrop target element invalid ID format",
level: 0,
auxiliary: {
targetElementId: { value: targetArg, type: "string" },
sourceElementId: { value: elementId, type: "string" },
},
});
return undefined;
}
}

return {
...rest,
arguments: resolvedArgs,
selector: `xpath=${trimmedXpath}`,
} as {
description: string;
Expand Down
6 changes: 4 additions & 2 deletions packages/core/lib/v3/types/private/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ export interface ObserveHandlerParams {
page: Page;
}

// We can use this enum to list the actions supported in performPlaywrightMethod
export enum SupportedPlaywrightAction {
// We can use this enum to list the actions supported in performUnderstudyMethod
export enum SupportedUnderstudyAction {
CLICK = "click",
FILL = "fill",
TYPE = "type",
Expand All @@ -38,4 +38,6 @@ export enum SupportedPlaywrightAction {
PREV_CHUNK = "prevChunk",
SELECT_OPTION_FROM_DROPDOWN = "selectOptionFromDropdown",
HOVER = "hover",
DOUBLE_CLICK = "doubleClick",
DRAG_AND_DROP = "dragAndDrop",
}
Loading