Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions scripts/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const agent = new HyperAgent({
debug: true,
browserProvider: "Hyperbrowser",
tokenLimit: 50000,
generateScript: true,
hyperbrowserConfig: {
sessionConfig: {
useProxy: true,
Expand All @@ -18,10 +19,7 @@ const agent = new HyperAgent({

(async () => {
const result = await agent.executeTask(
`Go to https://hiveword.com/location-name-generator and use extract action to get the top two countries;
get the capitals of these two countries.
Then go to https://flights.google.com and find the cheapest flight from the first capital to the second capital,
departing on a month from today, and returning on 45 days from today.`,
`Find the price and return policy for the Dyson Airwrap on Amazon`,
{
debugOnAgentOutput: (agentOutput: AgentOutput) => {
console.log("\n" + chalk.cyan.bold("===== AGENT OUTPUT ====="));
Expand Down
1 change: 1 addition & 0 deletions src/agent/actions/scroll.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const ScrollActionDefinition: AgentActionDefinition = {
const scrollByUpDown = ${direction === "up" ? "-window.innerHeight" : direction === "down" ? "window.innerHeight" : "0"};
const scrollByLeftRight = ${direction === "left" ? "-window.innerWidth" : direction === "right" ? "window.innerWidth" : "0"};
window.scrollBy(scrollByLeftRight, scrollByUpDown);
console.log(\`Scrolled \${direction}\`);
});
`;
},
Expand Down
25 changes: 25 additions & 0 deletions src/agent/messages/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { getScrollInfo } from "./utils";
import { retry } from "@/utils/retry";
import { DOMState } from "@/context-providers/dom/types";
import { HyperVariable } from "@/types/agent/types";
import { UserFeedback } from "@/utils/action";

export const buildAgentStepMessages = async (
baseMessages: BaseMessageLike[],
Expand All @@ -14,6 +15,7 @@ export const buildAgentStepMessages = async (
domState: DOMState,
screenshot: string,
variables: HyperVariable[],
userFeedback?: UserFeedback,
): Promise<BaseMessageLike[]> => {
const messages = [...baseMessages];

Expand Down Expand Up @@ -79,6 +81,29 @@ export const buildAgentStepMessages = async (
}
}

// Add user feedback if provided
if (userFeedback && !userFeedback.approved && userFeedback.message) {
let feedbackContent = `=== User Feedback ===
IMPORTANT: The user has provided feedback about your planned actions:
${userFeedback.message}`;

// Include the rejected actions if available
if (userFeedback.lastPlannedActions) {
feedbackContent += `\n\nThe actions you planned that the user is providing feedback on were:
${JSON.stringify(userFeedback.lastPlannedActions.actions, null, 2)}

Your reasoning was: "${userFeedback.lastPlannedActions.thoughts}"
Your next goal was: "${userFeedback.lastPlannedActions.nextGoal}"`;
}

feedbackContent += `\n\nPlease carefully consider this feedback and adjust your approach accordingly. The user is helping you correct mistakes or improve your strategy.`;

messages.push({
role: "user",
content: feedbackContent,
});
}

// Add elements section with DOM tree
messages.push({
role: "user",
Expand Down
14 changes: 14 additions & 0 deletions src/agent/messages/system-prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ ${OUTPUT_FORMAT}
## Common action examples:
${EXAMPLE_ACTIONS}

# User Feedback (CRITICAL - READ CAREFULLY)
When you see a "=== User Feedback ===" section in the input:
- This means a human has reviewed your previous planned actions and provided guidance to redo the actions
- You MUST:
1. Carefully read and understand the user's feedback
2. Acknowledge the feedback in your thoughts (e.g., "I see the user wants me to...")
3. Adjust your strategy based on the feedback
4. Explain in your thoughts how you're incorporating the feedback
- The feedback section includes:
* The user's specific corrections or suggestions
* Your previously planned actions that were rejected
* Your previous reasoning that needs adjustment
- NEVER ignore user feedback - it represents direct human intervention to help you succeed

# Rules
1. FINAL GOAL COMPLETION:
- Only use the "complete" action when you have fully accomplished everything specified in the task
Expand Down
82 changes: 52 additions & 30 deletions src/agent/tools/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ import {
AgentActionDefinition,
} from "@/types";
import { getDom } from "@/context-providers/dom";
import { initActionScript, wrapUpActionScript } from "@/utils/action";
import {
initActionScript,
wrapUpActionScript,
getUserFeedback,
UserFeedback,
} from "@/utils/action";
import { retry } from "@/utils/retry";
import { sleep } from "@/utils/sleep";

Expand Down Expand Up @@ -266,40 +271,57 @@ export const runAgentTask = async (
}
}

// Build Agent Step Messages
const msgs = await buildAgentStepMessages(
baseMsgs,
taskState.steps,
taskState.task,
page,
domState,
trimmedScreenshot as string,
Object.values(ctx.variables),
);
// Invoke LLM and get user's feedback if generateScript is enabled
let agentOutput;
let userFeedback: UserFeedback | undefined;

// Store Agent Step Messages for Debugging
if (ctx.debug) {
fs.writeFileSync(
`${debugStepDir}/msgs.json`,
JSON.stringify(msgs, null, 2),
do {
// Build Agent Step Messages
const msgs = await buildAgentStepMessages(
baseMsgs,
taskState.steps,
taskState.task,
page,
domState,
trimmedScreenshot as string,
Object.values(ctx.variables),
userFeedback,
);
}

// Invoke LLM
const agentOutput = await retry({
func: () => llmStructured.invoke(msgs),
});
// Store Agent Step Messages for Debugging
if (ctx.debug) {
fs.writeFileSync(
`${debugStepDir}/msgs.json`,
JSON.stringify(msgs, null, 2),
);
}

params?.debugOnAgentOutput?.(agentOutput);
// Invoke LLM
agentOutput = await retry({
func: () => llmStructured.invoke(msgs),
});

// Status Checks
if ((taskState.status as TaskStatus) == TaskStatus.PAUSED) {
await sleep(100);
continue;
}
if (endTaskStatuses.has(taskState.status)) {
break;
}
params?.debugOnAgentOutput?.(agentOutput);

// Status Checks
if ((taskState.status as TaskStatus) == TaskStatus.PAUSED) {
await sleep(100);
continue;
}
if (endTaskStatuses.has(taskState.status)) {
break;
}

// Only check user feedback if generateScript is enabled
if (ctx.generateScript) {
userFeedback = await getUserFeedback();
// If user didn't approve, add the rejected actions to feedback
if (!userFeedback.approved) {
userFeedback.lastPlannedActions = agentOutput;
}
console.log(userFeedback);
}
} while (ctx.generateScript && userFeedback && !userFeedback.approved);

// Run Actions
const agentStepActions = agentOutput.actions;
Expand Down
43 changes: 42 additions & 1 deletion src/utils/action.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import * as inquirer from "@inquirer/prompts";
import fs from "fs";
import prettier from "prettier";

import { HyperAgentConfig } from "@/types";

import { ActionContext } from "@/types";
export type UserFeedback = {
approved: boolean;
message?: string;
lastPlannedActions?: any; // The agent output that was rejected
};

export function initActionScript(
actionLogFile: string,
Expand Down Expand Up @@ -83,9 +89,44 @@ export async function wrapUpActionScript(actionLogFile: string) {
`,
);
fs.appendFileSync(actionLogFile, `})();`);

await formatActionScript(actionLogFile);
}

export async function formatActionScript(actionLogFile: string) {
const formatted = await prettier.format(
fs.readFileSync(actionLogFile, "utf-8"),
{ filepath: actionLogFile },
);
fs.writeFileSync(actionLogFile, formatted);
}

export async function getUserFeedback(
timeoutDuration: number = 10000,
): Promise<UserFeedback> {
const userApproval = await Promise.race([
inquirer.select({
message: "Do you like this planned step?",
choices: [
{ value: "yes", description: "Looks good, let's execute it" },
{ value: "no", description: "Provide feedback for improvement" },
],
}),
new Promise<string>((resolve) =>
setTimeout(() => resolve("yes"), timeoutDuration),
),
]);

if (userApproval === "yes") {
return { approved: true, message: "User indicated the step went well" };
}

const improvementFeedback = await inquirer.input({
message: "Please tell me what you want to improve:",
required: true,
});
return {
approved: false,
message: `User requested improvement with feedback: "${improvementFeedback}"`,
};
}