Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions examples/page-actions/scan.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* # Page Scan Example
*
* This example demonstrates how to use HyperAgent to scan a page to get all (or atleast most)
* actions that can be performed on that page.
*
* ## Prerequisites
*
* 1. Node.js environment
* 2. OpenAI API key set in your .env file (OPENAI_API_KEY)
*
* ## Running the Example
*
* ```bash
* yarn ts-node -r tsconfig-paths/register examples/page-actions/scan.ts <url>
* ```
*/

import "dotenv/config";
import { HyperAgent } from "@hyperbrowser/agent";

import chalk from "chalk";
import { ChatOpenAI } from "@langchain/openai";

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));

async function runEval(url: string) {
if (!url) {
throw new Error("Please provide a URL as a command line argument");
}

const llm = new ChatOpenAI({
apiKey: process.env.OPENAI_API_KEY,
model: "gpt-4o",
});

const agent = new HyperAgent({
llm: llm,
debug: true,
});

const page = await agent.newPage();
await page.goto(url);
await sleep(5_000);
console.log("Done with page.");
const result = await page.scan();
await agent.closeAgent();
console.log(chalk.green.bold("\nResult:"));
console.log(chalk.white(JSON.stringify(result, null, 2)));
return result;
}

(async () => {
const url = process.argv[process.argv.length - 1];
await runEval(url);
})().catch((error) => {
console.error(chalk.red("Error:"), error);
process.exit(1);
});
1 change: 1 addition & 0 deletions src/agent/actions/complete-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export type CompleteValidateActionType = z.infer<
export const CompletionValidateActionDefinition: AgentActionDefinition = {
type: "taskCompleteValidation",
actionParams: CompletionValidateAction,
shouldIgnoreActionForScan: true,
run: async (
ctx: ActionContext,
action: CompleteValidateActionType
Expand Down
1 change: 1 addition & 0 deletions src/agent/actions/complete-with-output-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export const generateCompleteActionWithOutputDefinition = (
return {
type: "complete" as const,
actionParams: actionParamsSchema,
shouldIgnoreActionForScan: true,
run: async (
ctx: ActionContext,
actionParams: CompeleteActionWithOutputSchema
Expand Down
5 changes: 4 additions & 1 deletion src/agent/actions/complete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ export const CompleteAction = z

export type CompleteActionType = z.infer<typeof CompleteAction>;

export const CompleteActionDefinition: AgentActionDefinition = {
export const CompleteActionDefinition: AgentActionDefinition<
typeof CompleteAction
> = {
type: "complete" as const,
shouldIgnoreActionForScan: true,
actionParams: CompleteAction,
run: async (): Promise<ActionOutput> => {
return { success: true, message: "Task Complete" };
Expand Down
3 changes: 2 additions & 1 deletion src/agent/actions/go-to-url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ export type GoToUrlActionType = z.infer<typeof GoToUrlAction>;

export const GoToURLActionDefinition: AgentActionDefinition = {
type: "goToUrl" as const,
shouldIgnoreActionForScan: true,
actionParams: GoToUrlAction,
run: async (ctx: ActionContext, action: GoToUrlActionType) => {
const { url } = action;
await ctx.page.goto(url);
return { success: true, message: `Navigated to ${url}` };
},
pprintAction: function(params: GoToUrlActionType): string {
pprintAction: function (params: GoToUrlActionType): string {
return `Navigate to URL: ${params.url}`;
},
};
3 changes: 2 additions & 1 deletion src/agent/actions/input-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ export const InputTextActionDefinition: AgentActionDefinition<InputTextActionTyp
type: "inputText" as const,
actionParams: InputTextAction,
run: async (ctx: ActionContext, action) => {
let { index, text } = action;
const { index } = action;
let { text } = action;
const locator = getLocator(ctx, index);
for (const variable of ctx.variables) {
text = text.replace(`<<${variable.key}>>`, variable.value);
Expand Down
6 changes: 4 additions & 2 deletions src/agent/actions/key-press.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ export const KeyPressAction = z

export type KeyPressActionType = z.infer<typeof KeyPressAction>;

export const KeyPressActionDefinition: AgentActionDefinition = {
export const KeyPressActionDefinition: AgentActionDefinition<
typeof KeyPressAction
> = {
type: "keyPress" as const,
actionParams: KeyPressAction,
run: async (ctx: ActionContext, action: KeyPressActionType) => {
Expand Down Expand Up @@ -128,7 +130,7 @@ export const KeyPressActionDefinition: AgentActionDefinition = {
message: `Pressed key "${text}"`,
};
},
pprintAction: function(params: KeyPressActionType): string {
pprintAction: function (params: KeyPressActionType): string {
return `Press key "${params.text}"`;
},
};
7 changes: 5 additions & 2 deletions src/agent/actions/page-back.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ export const PageBackAction = z

export type PageBackActionType = z.infer<typeof PageBackAction>;

export const PageBackActionDefinition: AgentActionDefinition = {
export const PageBackActionDefinition: AgentActionDefinition<
typeof PageBackAction
> = {
type: "pageBack" as const,
shouldIgnoreActionForScan: true,
actionParams: PageBackAction,
run: async (ctx: ActionContext) => {
await ctx.page.goBack();
return { success: true, message: "Navigated back to the previous page" };
},
pprintAction: function(): string {
pprintAction: function (): string {
return "Navigate back to previous page";
},
};
7 changes: 5 additions & 2 deletions src/agent/actions/page-forward.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ export const PageForwardAction = z

export type PageForwardActionType = z.infer<typeof PageForwardAction>;

export const PageForwardActionDefinition: AgentActionDefinition = {
export const PageForwardActionDefinition: AgentActionDefinition<
typeof PageForwardAction
> = {
type: "pageForward" as const,
shouldIgnoreActionForScan: true,
actionParams: PageForwardAction,
run: async (ctx: ActionContext) => {
await ctx.page.goForward();
return { success: true, message: "Navigated forward to the next page" };
},
pprintAction: function(): string {
pprintAction: function (): string {
return "Navigate forward to next page";
},
};
2 changes: 1 addition & 1 deletion src/agent/actions/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export const PDFAction = z

export type PDFActionType = z.infer<typeof PDFAction>;

export const PDFActionDefinition: AgentActionDefinition = {
export const PDFActionDefinition: AgentActionDefinition<typeof PDFAction> = {
type: "analyzePdf" as const,
actionParams: PDFAction,
run: async (ctx: ActionContext, action: PDFActionType) => {
Expand Down
7 changes: 5 additions & 2 deletions src/agent/actions/refresh-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ export const RefreshPageAction = z

export type RefreshPageActionType = z.infer<typeof RefreshPageAction>;

export const RefreshPageActionDefinition: AgentActionDefinition = {
export const RefreshPageActionDefinition: AgentActionDefinition<
typeof RefreshPageAction
> = {
type: "refreshPage" as const,
shouldIgnoreActionForScan: true,
actionParams: RefreshPageAction,
run: async (ctx: ActionContext) => {
await ctx.page.reload();
return { success: true, message: "Succesfully refreshed a page." };
},
pprintAction: function(): string {
pprintAction: function (): string {
return "Refresh current page";
},
};
7 changes: 5 additions & 2 deletions src/agent/actions/scroll.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ export const ScrollAction = z

export type ScrollActionType = z.infer<typeof ScrollAction>;

export const ScrollActionDefinition: AgentActionDefinition = {
export const ScrollActionDefinition: AgentActionDefinition<
typeof ScrollAction
> = {
type: "scroll" as const,
shouldIgnoreActionForScan: true,
actionParams: ScrollAction,
run: async (ctx: ActionContext, action: ScrollActionType) => {
const { direction } = action;
Expand All @@ -32,7 +35,7 @@ export const ScrollActionDefinition: AgentActionDefinition = {
}
return { success: true, message: `Scrolled ${direction}` };
},
pprintAction: function(params: ScrollActionType): string {
pprintAction: function (params: ScrollActionType): string {
return `Scroll ${params.direction}`;
},
};
2 changes: 1 addition & 1 deletion src/agent/actions/select-option.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export const SelectOptionAction = z
.describe("The numeric index of the element to select an option."),
text: z.string().describe("The text of the option to select."),
})
.describe("Select an option from a dropdown element");
.describe("Select an option from a dropdown element. Use this only when interacting with a dropdown, select element or something which acts similarly.");

export type SelectOptionActionType = z.infer<typeof SelectOptionAction>;

Expand Down
3 changes: 2 additions & 1 deletion src/agent/actions/thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export type ThinkingActionType = z.infer<typeof ThinkingAction>;

export const ThinkingActionDefinition: AgentActionDefinition = {
type: "thinkAction" as const,
shouldIgnoreActionForScan: true,
actionParams: ThinkingAction,
run: async (ctx: ActionContext, action: ThinkingActionType) => {
const { thought } = action;
Expand All @@ -25,7 +26,7 @@ export const ThinkingActionDefinition: AgentActionDefinition = {
message: `A simple thought process about your next steps. You thought about: ${thought}`,
};
},
pprintAction: function(params: ThinkingActionType): string {
pprintAction: function (params: ThinkingActionType): string {
return `Think about: "${params.thought}"`;
},
};
8 changes: 8 additions & 0 deletions src/agent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { runAgentTask } from "./tools/agent";
import { HyperPage, HyperVariable } from "@/types/agent/types";
import { z } from "zod";
import { ErrorEmitter } from "@/utils";
import { PageScanFn } from "./tools/page-actions/scan";

export class HyperAgent<T extends BrowserProviders = "Local"> {
private llm: BaseChatModel;
Expand Down Expand Up @@ -592,6 +593,13 @@ export class HyperAgent<T extends BrowserProviders = "Local"> {
return JSON.parse(res.output as string);
}
};
hyperPage.scan = () =>
PageScanFn({
page,
llm: this.llm,
tokenLimit: this.tokenLimit,
actions: this.getActions(),
});
return hyperPage;
}
}
Loading