diff --git a/.changeset/fifty-gifts-win.md b/.changeset/fifty-gifts-win.md
new file mode 100644
index 000000000..f8846dc51
--- /dev/null
+++ b/.changeset/fifty-gifts-win.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+fix issue where screenshot masks were not being applied to dialog elements
diff --git a/packages/core/lib/v3/dom/screenshotScripts/resolveMaskRect.ts b/packages/core/lib/v3/dom/screenshotScripts/resolveMaskRect.ts
index 293ba6ccb..805074ffc 100644
--- a/packages/core/lib/v3/dom/screenshotScripts/resolveMaskRect.ts
+++ b/packages/core/lib/v3/dom/screenshotScripts/resolveMaskRect.ts
@@ -3,9 +3,39 @@ export type MaskRect = {
y: number;
width: number;
height: number;
+ rootToken?: string | null;
};
-export function resolveMaskRect(this: Element | null): MaskRect | null {
+export function resolveMaskRect(
+ this: Element | null,
+ maskToken?: string,
+): MaskRect | null {
+ function safeClosest(el: Element | null, selector: string): Element | null {
+ try {
+ return el && typeof el.closest === "function"
+ ? el.closest(selector)
+ : null;
+ } catch {
+ return null;
+ }
+ }
+
+ function safeMatches(el: Element | null, selector: string): boolean {
+ try {
+ return !!el && typeof el.matches === "function" && el.matches(selector);
+ } catch {
+ return false;
+ }
+ }
+
+ function findTopLayerRoot(el: Element | null): Element | null {
+ const dialog = safeClosest(el, "dialog[open]");
+ if (dialog) return dialog;
+ const popover = safeClosest(el, "[popover]");
+ if (popover && safeMatches(popover, ":popover-open")) return popover;
+ return null;
+ }
+
if (!this || typeof this.getBoundingClientRect !== "function") return null;
const rect = this.getBoundingClientRect();
if (!rect) return null;
@@ -13,10 +43,45 @@ export function resolveMaskRect(this: Element | null): MaskRect | null {
if (!style) return null;
if (style.visibility === "hidden" || style.display === "none") return null;
if (rect.width <= 0 || rect.height <= 0) return null;
+
+ const root = findTopLayerRoot(this);
+ if (root) {
+ const rootRect = root.getBoundingClientRect();
+ if (!rootRect) return null;
+ let rootToken: string | null = null;
+ if (maskToken) {
+ try {
+ const existing = root.getAttribute("data-stagehand-mask-root");
+ if (existing && existing.startsWith(maskToken)) {
+ rootToken = existing;
+ } else {
+ rootToken =
+ maskToken + "_root_" + Math.random().toString(36).slice(2);
+ root.setAttribute("data-stagehand-mask-root", rootToken);
+ }
+ } catch {
+ rootToken = null;
+ }
+ }
+ return {
+ x:
+ rect.left -
+ rootRect.left -
+ (root.clientLeft || 0) +
+ (root.scrollLeft || 0),
+ y:
+ rect.top - rootRect.top - (root.clientTop || 0) + (root.scrollTop || 0),
+ width: rect.width,
+ height: rect.height,
+ rootToken,
+ };
+ }
+
return {
x: rect.left + window.scrollX,
y: rect.top + window.scrollY,
width: rect.width,
height: rect.height,
+ rootToken: null,
};
}
diff --git a/packages/core/lib/v3/tests/page-screenshot.spec.ts b/packages/core/lib/v3/tests/page-screenshot.spec.ts
index e5894f971..ee56a34a8 100644
--- a/packages/core/lib/v3/tests/page-screenshot.spec.ts
+++ b/packages/core/lib/v3/tests/page-screenshot.spec.ts
@@ -263,4 +263,68 @@ test.describe("Page.screenshot options", () => {
await fs.unlink(tempPath).catch(() => {});
}
});
+
+ test("masks elements inside dialog top layer", async () => {
+ const page = v3.context.pages()[0];
+
+ const html = `
+
+
+
+
+
+
+
+
+
+
+
+ `;
+
+ await page.goto("data:text/html," + encodeURIComponent(html));
+
+ const targetId = page.targetId();
+ const originalScreenshot = Frame.prototype.screenshot;
+ let dialogMaskCount = 0;
+
+ Frame.prototype.screenshot = async function screenshotSpy(options) {
+ const frame = this as Frame;
+ if (frame.pageId === targetId) {
+ dialogMaskCount = await frame.evaluate(() => {
+ const dialog = document.querySelector("dialog[open]");
+ if (!dialog) return 0;
+ return dialog.querySelectorAll("[data-stagehand-mask]").length;
+ });
+ return Buffer.from("stub-image");
+ }
+ return originalScreenshot.call(this, options);
+ };
+
+ try {
+ await page.screenshot({
+ mask: [page.locator("#dialog-input")],
+ });
+ expect(dialogMaskCount).toBeGreaterThan(0);
+ } finally {
+ Frame.prototype.screenshot = originalScreenshot;
+ }
+ });
});
diff --git a/packages/core/lib/v3/understudy/screenshotUtils.ts b/packages/core/lib/v3/understudy/screenshotUtils.ts
index c37fab11f..791a35f7f 100644
--- a/packages/core/lib/v3/understudy/screenshotUtils.ts
+++ b/packages/core/lib/v3/understudy/screenshotUtils.ts
@@ -206,15 +206,27 @@ export async function applyMaskOverlays(
locators: Locator[],
color: string,
): Promise {
- const rectsByFrame = new Map();
+ type MaskRectSpec = ScreenshotClip & { rootToken?: string | null };
+ const rectsByFrame = new Map<
+ Frame,
+ { rects: MaskRectSpec[]; rootTokens: Set }
+ >();
+
+ const token = `__v3_mask_${Date.now()}_${Math.random().toString(36).slice(2)}`;
for (const locator of locators) {
try {
- const info = await resolveMaskRects(locator);
+ const info = await resolveMaskRects(locator, token);
if (!info) continue;
- const list = rectsByFrame.get(info.frame) ?? [];
- list.push(...info.rects);
- rectsByFrame.set(info.frame, list);
+ const entry = rectsByFrame.get(info.frame) ?? {
+ rects: [],
+ rootTokens: new Set(),
+ };
+ entry.rects.push(...info.rects);
+ for (const rect of info.rects) {
+ if (rect.rootToken) entry.rootTokens.add(rect.rootToken);
+ }
+ rectsByFrame.set(info.frame, entry);
} catch {
// ignore individual locator failures
}
@@ -224,19 +236,42 @@ export async function applyMaskOverlays(
return async () => {};
}
- const token = `__v3_mask_${Date.now()}_${Math.random().toString(36).slice(2)}`;
-
await Promise.all(
- Array.from(rectsByFrame.entries()).map(([frame, rects]) =>
+ Array.from(rectsByFrame.entries()).map(([frame, { rects }]) =>
frame
.evaluate(
({ rects, color, token }) => {
try {
const doc = document;
if (!doc) return;
- const root = doc.documentElement || doc.body;
- if (!root) return;
for (const rect of rects) {
+ const defaultRoot = doc.documentElement || doc.body;
+ if (!defaultRoot) return;
+ const root = rect.rootToken
+ ? doc.querySelector(
+ `[data-stagehand-mask-root="${rect.rootToken}"]`,
+ ) || defaultRoot
+ : defaultRoot;
+ if (!root) continue;
+ if (rect.rootToken) {
+ try {
+ const style = window.getComputedStyle(root as Element);
+ if (style && style.position === "static") {
+ const rootEl = root as HTMLElement;
+ if (
+ !rootEl.hasAttribute("data-stagehand-mask-root-pos")
+ ) {
+ rootEl.setAttribute(
+ "data-stagehand-mask-root-pos",
+ rootEl.style.position || "",
+ );
+ }
+ rootEl.style.position = "relative";
+ }
+ } catch {
+ // ignore
+ }
+ }
const el = doc.createElement("div");
el.setAttribute("data-stagehand-mask", token);
el.style.position = "absolute";
@@ -249,7 +284,7 @@ export async function applyMaskOverlays(
el.style.zIndex = "2147483647";
el.style.opacity = "1";
el.style.mixBlendMode = "normal";
- root.appendChild(el);
+ (root as Element).appendChild(el);
}
} catch {
// ignore
@@ -263,20 +298,37 @@ export async function applyMaskOverlays(
return async () => {
await Promise.all(
- Array.from(rectsByFrame.keys()).map((frame) =>
+ Array.from(rectsByFrame.entries()).map(([frame, { rootTokens }]) =>
frame
- .evaluate((token) => {
- try {
- const doc = document;
- if (!doc) return;
- const nodes = doc.querySelectorAll(
- `[data-stagehand-mask="${token}"]`,
- );
- nodes.forEach((node) => node.remove());
- } catch {
- // ignore
- }
- }, token)
+ .evaluate(
+ ({ token, rootTokens }) => {
+ try {
+ const doc = document;
+ if (!doc) return;
+ const nodes = doc.querySelectorAll(
+ `[data-stagehand-mask="${token}"]`,
+ );
+ nodes.forEach((node) => node.remove());
+ for (const rootToken of rootTokens) {
+ const root = doc.querySelector(
+ `[data-stagehand-mask-root="${rootToken}"]`,
+ ) as HTMLElement | null;
+ if (!root) continue;
+ const prev = root.getAttribute(
+ "data-stagehand-mask-root-pos",
+ );
+ if (prev !== null) {
+ root.style.position = prev;
+ root.removeAttribute("data-stagehand-mask-root-pos");
+ }
+ root.removeAttribute("data-stagehand-mask-root");
+ }
+ } catch {
+ // ignore
+ }
+ },
+ { token, rootTokens: Array.from(rootTokens) },
+ )
.catch(() => {}),
),
);
@@ -285,7 +337,11 @@ export async function applyMaskOverlays(
async function resolveMaskRects(
locator: Locator,
-): Promise<{ frame: Frame; rects: ScreenshotClip[] } | null> {
+ maskToken: string,
+): Promise<{
+ frame: Frame;
+ rects: Array;
+} | null> {
const frame = locator.getFrame();
const session = frame.session;
let resolved: Array<{
@@ -295,11 +351,15 @@ async function resolveMaskRects(
try {
resolved = await locator.resolveNodesForMask();
- const rects: ScreenshotClip[] = [];
+ const rects: Array = [];
for (const { objectId } of resolved) {
try {
- const rect = await resolveMaskRectForObject(session, objectId);
+ const rect = await resolveMaskRectForObject(
+ session,
+ objectId,
+ maskToken,
+ );
if (rect) rects.push(rect);
} catch {
// ignore individual element failures
@@ -321,12 +381,14 @@ async function resolveMaskRects(
async function resolveMaskRectForObject(
session: CDPSessionLike,
objectId: Protocol.Runtime.RemoteObjectId,
-): Promise {
+ maskToken: string,
+): Promise<(ScreenshotClip & { rootToken?: string | null }) | null> {
const result = await session.send(
"Runtime.callFunctionOn",
{
objectId,
functionDeclaration: screenshotScriptSources.resolveMaskRect,
+ arguments: [{ value: maskToken }],
returnByValue: true,
},
);
@@ -335,7 +397,9 @@ async function resolveMaskRectForObject(
return null;
}
- const rect = result.result.value as ScreenshotClip | null;
+ const rect = result.result.value as
+ | (ScreenshotClip & { rootToken?: string | null })
+ | null;
if (!rect) return null;
const { x, y, width, height } = rect;
@@ -350,7 +414,16 @@ async function resolveMaskRectForObject(
return null;
}
- return { x, y, width, height };
+ return {
+ x,
+ y,
+ width,
+ height,
+ rootToken:
+ rect.rootToken && typeof rect.rootToken === "string"
+ ? rect.rootToken
+ : undefined,
+ };
}
export async function runScreenshotCleanups(