From d92bd1c64efa58a654bca11a7a469c95bc3b9298 Mon Sep 17 00:00:00 2001 From: QRex-v0 <217984643+QRex-v0@users.noreply.github.com> Date: Thu, 24 Jul 2025 14:43:14 -0700 Subject: [PATCH 1/3] Take UserFeedback for each step; msg not feed into LLM so far --- scripts/test.ts | 5 +-- src/agent/tools/agent.ts | 77 ++++++++++++++++++++++++---------------- src/utils/action.ts | 42 +++++++++++++++++++++- 3 files changed, 89 insertions(+), 35 deletions(-) diff --git a/scripts/test.ts b/scripts/test.ts index 30dc180..84b008b 100644 --- a/scripts/test.ts +++ b/scripts/test.ts @@ -18,10 +18,7 @@ const agent = new HyperAgent({ (async () => { const result = await agent.executeTask( - `Go to https://hiveword.com/location-name-generator and use extract action to get the top two countries; - get the capitals of these two countries. - Then go to https://flights.google.com and find the cheapest flight from the first capital to the second capital, - departing on a month from today, and returning on 45 days from today.`, + `Find the price and return policy for the Dyson Airwrap on Amazon`, { debugOnAgentOutput: (agentOutput: AgentOutput) => { console.log("\n" + chalk.cyan.bold("===== AGENT OUTPUT =====")); diff --git a/src/agent/tools/agent.ts b/src/agent/tools/agent.ts index 4792722..06af6f0 100644 --- a/src/agent/tools/agent.ts +++ b/src/agent/tools/agent.ts @@ -9,7 +9,12 @@ import { AgentActionDefinition, } from "@/types"; import { getDom } from "@/context-providers/dom"; -import { initActionScript, wrapUpActionScript } from "@/utils/action"; +import { + initActionScript, + wrapUpActionScript, + getUserFeedback, + UserFeedback, +} from "@/utils/action"; import { retry } from "@/utils/retry"; import { sleep } from "@/utils/sleep"; @@ -266,40 +271,52 @@ export const runAgentTask = async ( } } - // Build Agent Step Messages - const msgs = await buildAgentStepMessages( - baseMsgs, - taskState.steps, - taskState.task, - page, - domState, - trimmedScreenshot as string, - Object.values(ctx.variables), - ); + // Invoke LLM and get user's feedback if generateScript is enabled + let agentOutput; + let userFeedback: UserFeedback | undefined; - // Store Agent Step Messages for Debugging - if (ctx.debug) { - fs.writeFileSync( - `${debugStepDir}/msgs.json`, - JSON.stringify(msgs, null, 2), + do { + // Build Agent Step Messages + const msgs = await buildAgentStepMessages( + baseMsgs, + taskState.steps, + taskState.task, + page, + domState, + trimmedScreenshot as string, + Object.values(ctx.variables), ); - } - // Invoke LLM - const agentOutput = await retry({ - func: () => llmStructured.invoke(msgs), - }); + // Store Agent Step Messages for Debugging + if (ctx.debug) { + fs.writeFileSync( + `${debugStepDir}/msgs.json`, + JSON.stringify(msgs, null, 2), + ); + } - params?.debugOnAgentOutput?.(agentOutput); + // Invoke LLM + agentOutput = await retry({ + func: () => llmStructured.invoke(msgs), + }); - // Status Checks - if ((taskState.status as TaskStatus) == TaskStatus.PAUSED) { - await sleep(100); - continue; - } - if (endTaskStatuses.has(taskState.status)) { - break; - } + params?.debugOnAgentOutput?.(agentOutput); + + // Status Checks + if ((taskState.status as TaskStatus) == TaskStatus.PAUSED) { + await sleep(100); + continue; + } + if (endTaskStatuses.has(taskState.status)) { + break; + } + + // Only check user feedback if generateScript is enabled + if (ctx.generateScript) { + userFeedback = await getUserFeedback(); + console.log(userFeedback); + } + } while (ctx.generateScript && userFeedback && !userFeedback.approved); // Run Actions const agentStepActions = agentOutput.actions; diff --git a/src/utils/action.ts b/src/utils/action.ts index 75da0bf..9ca058b 100644 --- a/src/utils/action.ts +++ b/src/utils/action.ts @@ -1,8 +1,13 @@ +import * as inquirer from "@inquirer/prompts"; import fs from "fs"; import prettier from "prettier"; + import { HyperAgentConfig } from "@/types"; -import { ActionContext } from "@/types"; +export type UserFeedback = { + approved: boolean; + message?: string; +}; export function initActionScript( actionLogFile: string, @@ -83,9 +88,44 @@ export async function wrapUpActionScript(actionLogFile: string) { `, ); fs.appendFileSync(actionLogFile, `})();`); + + formatActionScript(actionLogFile); +} + +export async function formatActionScript(actionLogFile: string) { const formatted = await prettier.format( fs.readFileSync(actionLogFile, "utf-8"), { filepath: actionLogFile }, ); fs.writeFileSync(actionLogFile, formatted); } + +export async function getUserFeedback( + timeoutDuration: number = 10000, +): Promise { + const userApproval = await Promise.race([ + inquirer.select({ + message: "Do you like this planned step?", + choices: [ + { value: "yes", description: "Looks good, let's execute it" }, + { value: "no", description: "Provide feedback for improvement" }, + ], + }), + new Promise((resolve) => + setTimeout(() => resolve("yes"), timeoutDuration), + ), + ]); + + if (userApproval === "yes") { + return { approved: true, message: "User indicated the step went well" }; + } + + const improvementFeedback = await inquirer.input({ + message: "Please tell me what you want to improve:", + required: true, + }); + return { + approved: false, + message: `User requested improvement with feedback: "${improvementFeedback}"`, + }; +} From cb4aacb744c1bef3b46ff819ae5ddddd4f91f1cb Mon Sep 17 00:00:00 2001 From: QRex-v0 <217984643+QRex-v0@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:00:54 -0700 Subject: [PATCH 2/3] Finish the basic logic --- scripts/test.ts | 1 + src/agent/messages/builder.ts | 25 +++++++++++++++++++++++++ src/agent/messages/system-prompt.ts | 14 ++++++++++++++ src/agent/tools/agent.ts | 5 +++++ src/utils/action.ts | 1 + 5 files changed, 46 insertions(+) diff --git a/scripts/test.ts b/scripts/test.ts index 84b008b..60c4979 100644 --- a/scripts/test.ts +++ b/scripts/test.ts @@ -9,6 +9,7 @@ const agent = new HyperAgent({ debug: true, browserProvider: "Hyperbrowser", tokenLimit: 50000, + generateScript: true, hyperbrowserConfig: { sessionConfig: { useProxy: true, diff --git a/src/agent/messages/builder.ts b/src/agent/messages/builder.ts index ff21bf0..3c44317 100644 --- a/src/agent/messages/builder.ts +++ b/src/agent/messages/builder.ts @@ -5,6 +5,7 @@ import { getScrollInfo } from "./utils"; import { retry } from "@/utils/retry"; import { DOMState } from "@/context-providers/dom/types"; import { HyperVariable } from "@/types/agent/types"; +import { UserFeedback } from "@/utils/action"; export const buildAgentStepMessages = async ( baseMessages: BaseMessageLike[], @@ -14,6 +15,7 @@ export const buildAgentStepMessages = async ( domState: DOMState, screenshot: string, variables: HyperVariable[], + userFeedback?: UserFeedback, ): Promise => { const messages = [...baseMessages]; @@ -79,6 +81,29 @@ export const buildAgentStepMessages = async ( } } + // Add user feedback if provided + if (userFeedback && !userFeedback.approved && userFeedback.message) { + let feedbackContent = `=== User Feedback === +IMPORTANT: The user has provided feedback about your planned actions: +${userFeedback.message}`; + + // Include the rejected actions if available + if (userFeedback.lastPlannedActions) { + feedbackContent += `\n\nThe actions you planned that the user is providing feedback on were: +${JSON.stringify(userFeedback.lastPlannedActions.actions, null, 2)} + + Your reasoning was: "${userFeedback.lastPlannedActions.thoughts}" + Your next goal was: "${userFeedback.lastPlannedActions.nextGoal}"`; + } + + feedbackContent += `\n\nPlease carefully consider this feedback and adjust your approach accordingly. The user is helping you correct mistakes or improve your strategy.`; + + messages.push({ + role: "user", + content: feedbackContent, + }); + } + // Add elements section with DOM tree messages.push({ role: "user", diff --git a/src/agent/messages/system-prompt.ts b/src/agent/messages/system-prompt.ts index be15b97..7492809 100644 --- a/src/agent/messages/system-prompt.ts +++ b/src/agent/messages/system-prompt.ts @@ -47,6 +47,20 @@ ${OUTPUT_FORMAT} ## Common action examples: ${EXAMPLE_ACTIONS} +# User Feedback (CRITICAL - READ CAREFULLY) +When you see a "=== User Feedback ===" section in the input: +- This means a human has reviewed your previous planned actions and provided guidance to redo the actions +- You MUST: + 1. Carefully read and understand the user's feedback + 2. Acknowledge the feedback in your thoughts (e.g., "I see the user wants me to...") + 3. Adjust your strategy based on the feedback + 4. Explain in your thoughts how you're incorporating the feedback +- The feedback section includes: + * The user's specific corrections or suggestions + * Your previously planned actions that were rejected + * Your previous reasoning that needs adjustment +- NEVER ignore user feedback - it represents direct human intervention to help you succeed + # Rules 1. FINAL GOAL COMPLETION: - Only use the "complete" action when you have fully accomplished everything specified in the task diff --git a/src/agent/tools/agent.ts b/src/agent/tools/agent.ts index 06af6f0..a66c7fb 100644 --- a/src/agent/tools/agent.ts +++ b/src/agent/tools/agent.ts @@ -285,6 +285,7 @@ export const runAgentTask = async ( domState, trimmedScreenshot as string, Object.values(ctx.variables), + userFeedback, ); // Store Agent Step Messages for Debugging @@ -314,6 +315,10 @@ export const runAgentTask = async ( // Only check user feedback if generateScript is enabled if (ctx.generateScript) { userFeedback = await getUserFeedback(); + // If user didn't approve, add the rejected actions to feedback + if (!userFeedback.approved) { + userFeedback.lastPlannedActions = agentOutput; + } console.log(userFeedback); } } while (ctx.generateScript && userFeedback && !userFeedback.approved); diff --git a/src/utils/action.ts b/src/utils/action.ts index 9ca058b..5a00974 100644 --- a/src/utils/action.ts +++ b/src/utils/action.ts @@ -7,6 +7,7 @@ import { HyperAgentConfig } from "@/types"; export type UserFeedback = { approved: boolean; message?: string; + lastPlannedActions?: any; // The agent output that was rejected }; export function initActionScript( From e3aa22d13b3a5ab366a19e1b32b0585337dd0f47 Mon Sep 17 00:00:00 2001 From: QRex-v0 <217984643+QRex-v0@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:07:44 -0700 Subject: [PATCH 3/3] minor: (1) add console print for scroll action (2) async function missing await, added it back --- src/agent/actions/scroll.ts | 1 + src/utils/action.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/agent/actions/scroll.ts b/src/agent/actions/scroll.ts index f0d482e..7f34731 100644 --- a/src/agent/actions/scroll.ts +++ b/src/agent/actions/scroll.ts @@ -42,6 +42,7 @@ export const ScrollActionDefinition: AgentActionDefinition = { const scrollByUpDown = ${direction === "up" ? "-window.innerHeight" : direction === "down" ? "window.innerHeight" : "0"}; const scrollByLeftRight = ${direction === "left" ? "-window.innerWidth" : direction === "right" ? "window.innerWidth" : "0"}; window.scrollBy(scrollByLeftRight, scrollByUpDown); + console.log(\`Scrolled \${direction}\`); }); `; }, diff --git a/src/utils/action.ts b/src/utils/action.ts index 5a00974..b431afe 100644 --- a/src/utils/action.ts +++ b/src/utils/action.ts @@ -90,7 +90,7 @@ export async function wrapUpActionScript(actionLogFile: string) { ); fs.appendFileSync(actionLogFile, `})();`); - formatActionScript(actionLogFile); + await formatActionScript(actionLogFile); } export async function formatActionScript(actionLogFile: string) {