From ff146d5a75549fe7673723fc1d0d35e7dc3280a3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 10 May 2025 13:00:43 +0700 Subject: [PATCH 1/4] Refactor LocalBrowserProvider to support wsEndpoint in options and improve browser connection handling --- src/browser-providers/local.ts | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/browser-providers/local.ts b/src/browser-providers/local.ts index 976c772..890438a 100644 --- a/src/browser-providers/local.ts +++ b/src/browser-providers/local.ts @@ -1,15 +1,23 @@ -import { chromium, Browser, LaunchOptions } from "playwright"; +import { chromium, Browser, LaunchOptions, ConnectOptions } from "playwright"; import BrowserProvider from "@/types/browser-providers/types"; export class LocalBrowserProvider extends BrowserProvider { - options: Omit, "channel"> | undefined; + options: (Omit, "channel"> & { wsEndpoint?: string,args? : []}) | ConnectOptions & { wsEndpoint?: string,args? : []} | undefined; session: Browser | undefined; - constructor(options?: Omit, "channel">) { + constructor(options?: Omit, "channel"> & { wsEndpoint?: string,args? : []}) { super(); this.options = options; } async start(): Promise { - const launchArgs = this.options?.args ?? []; + + if (this.options && 'wsEndpoint' in this.options && this.options.wsEndpoint) { + const browser = await chromium.connect(this.options.wsEndpoint,{ + ...this.options + }); + this.session = browser; + return this.session; + } + const launchArgs = this.options?.args || []; const browser = await chromium.launch({ ...(this.options ?? {}), channel: "chrome", From 53406b4b1d756b92e64b0fe7fb47fe0b398b943b Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 10 May 2025 14:24:30 +0700 Subject: [PATCH 2/4] Add wsEndpoint configuration to README and update LocalBrowserProvider to use connectOverCDP --- README.md | 35 ++++++++++++++++++++++++++++++++++ src/browser-providers/local.ts | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 463e364..0ac48c2 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,41 @@ await agent.closeAgent(); ## Usage Guide +### Config Browser with playwright + +You can configure the browser with custom options when initializing HyperAgent. The `options` parameter allows you to set various browser launch options, excluding `headless` and `channel`. Additionally, you can specify a `wsEndpoint` for connecting to a remote browser and provide custom `args`. + +```typescript +import { HyperAgent } from "@hyperbrowser/agent"; + +const agent = new HyperAgent({ + options: { + wsEndpoint: "ws://localhost:3000", // Connect to a remote browser + args: ["--no-sandbox", "--disable-setuid-sandbox"], // Custom browser arguments + slowMo: 50, // Slow down operations for debugging + }, +}); + +// Use the agent as usual +const result = await agent.executeTask( + "Navigate to example.com and extract the page title" +); +console.log(result.output); + +// Clean up +await agent.closeAgent(); +``` + +### Available Options + +- `wsEndpoint` (optional): WebSocket endpoint for connecting to a remote browser. +- `args` (optional): Array of Chromium command-line switches. +- Other options from `LaunchOptions` (excluding `headless` and `channel`). + +Refer to the [Playwright documentation](https://playwright.dev/docs/api/class-browsertype#browser-type-launch) for more details on available options. +``` + + ### Multi-Page Management ```typescript diff --git a/src/browser-providers/local.ts b/src/browser-providers/local.ts index 890438a..01d6ed3 100644 --- a/src/browser-providers/local.ts +++ b/src/browser-providers/local.ts @@ -11,7 +11,7 @@ export class LocalBrowserProvider extends BrowserProvider { async start(): Promise { if (this.options && 'wsEndpoint' in this.options && this.options.wsEndpoint) { - const browser = await chromium.connect(this.options.wsEndpoint,{ + const browser = await chromium.connectOverCDP(this.options.wsEndpoint,{ ...this.options }); this.session = browser; From e486b01ecaf10ac07b397f74c646ebe8793cf463 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 10 May 2025 14:53:12 +0700 Subject: [PATCH 3/4] Update README and LocalBrowserProvider to use endpointURL instead of wsEndpoint for browser configuration --- README.md | 21 +++++++-------------- src/browser-providers/local.ts | 8 ++++---- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 0ac48c2..e100109 100644 --- a/README.md +++ b/README.md @@ -130,17 +130,20 @@ await agent.closeAgent(); ## Usage Guide -### Config Browser with playwright +### Config Browser with playwright endpointURL -You can configure the browser with custom options when initializing HyperAgent. The `options` parameter allows you to set various browser launch options, excluding `headless` and `channel`. Additionally, you can specify a `wsEndpoint` for connecting to a remote browser and provide custom `args`. +- **`endpointURL`**: A CDP WebSocket endpoint or HTTP URL to connect to. For example, `http://localhost:9222/` or `ws://127.0.0.1:9222/devtools/browser/387adf4c-243f-4051-a181-46798f4a46f4`. +- **`slowMo`**: Slows down operations by the specified number of milliseconds for debugging purposes. +- **`args`**: An array of custom arguments to pass to the browser instance. + +These options provide flexibility for connecting to remote browsers or customizing the browser's behavior during automation. ```typescript import { HyperAgent } from "@hyperbrowser/agent"; const agent = new HyperAgent({ options: { - wsEndpoint: "ws://localhost:3000", // Connect to a remote browser - args: ["--no-sandbox", "--disable-setuid-sandbox"], // Custom browser arguments + endpointURL: "ws://localhost:3000", // or http://localhost:9222 Connect to a remote browser slowMo: 50, // Slow down operations for debugging }, }); @@ -155,16 +158,6 @@ console.log(result.output); await agent.closeAgent(); ``` -### Available Options - -- `wsEndpoint` (optional): WebSocket endpoint for connecting to a remote browser. -- `args` (optional): Array of Chromium command-line switches. -- Other options from `LaunchOptions` (excluding `headless` and `channel`). - -Refer to the [Playwright documentation](https://playwright.dev/docs/api/class-browsertype#browser-type-launch) for more details on available options. -``` - - ### Multi-Page Management ```typescript diff --git a/src/browser-providers/local.ts b/src/browser-providers/local.ts index 01d6ed3..51b3798 100644 --- a/src/browser-providers/local.ts +++ b/src/browser-providers/local.ts @@ -2,16 +2,16 @@ import { chromium, Browser, LaunchOptions, ConnectOptions } from "playwright"; import BrowserProvider from "@/types/browser-providers/types"; export class LocalBrowserProvider extends BrowserProvider { - options: (Omit, "channel"> & { wsEndpoint?: string,args? : []}) | ConnectOptions & { wsEndpoint?: string,args? : []} | undefined; + options: (Omit, "channel"> & { endpointURL?: string,args? : []}) | ConnectOptions & { endpointURL?: string,args? : []} | undefined; session: Browser | undefined; - constructor(options?: Omit, "channel"> & { wsEndpoint?: string,args? : []}) { + constructor(options?: Omit, "channel"> & { endpointURL?: string,args? : []}) { super(); this.options = options; } async start(): Promise { - if (this.options && 'wsEndpoint' in this.options && this.options.wsEndpoint) { - const browser = await chromium.connectOverCDP(this.options.wsEndpoint,{ + if (this.options && 'endpointURL' in this.options && this.options.endpointURL) { + const browser = await chromium.connectOverCDP(this.options.endpointURL,{ ...this.options }); this.session = browser; From 9f865cc766f3ed11bb3410398497c10e25b9b4a2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 10 May 2025 22:32:20 +0700 Subject: [PATCH 4/4] Fix initBrowser to use existing context if available --- src/agent/index.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index 6e22aa9..702376d 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -101,7 +101,8 @@ export class HyperAgent { public async initBrowser(): Promise { if (!this.browser) { this.browser = await this.browserProvider.start(); - this.context = await this.browser.newContext({ + const defaltContext = this.browser.contexts().at(0) + this.context = defaltContext || await this.browser.newContext({ viewport: null, });