From d425bddf3fb37fcfc6651351eba7ce4ab2c1b6b0 Mon Sep 17 00:00:00 2001 From: Shri Sukhani Date: Thu, 15 Jan 2026 16:15:32 -0800 Subject: [PATCH] Add batch fetch web API --- src/services/web/index.ts | 182 ++++++++++++++++++++++++++++++++++- src/types/index.ts | 8 ++ src/types/web/batch-fetch.ts | 44 +++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 src/types/web/batch-fetch.ts diff --git a/src/services/web/index.ts b/src/services/web/index.ts index 6e18b5c..bf05bfa 100644 --- a/src/services/web/index.ts +++ b/src/services/web/index.ts @@ -4,10 +4,190 @@ import { BaseService } from "../base"; import { HyperbrowserError } from "../../client"; import { FetchParams, FetchResponse } from "../../types/web/fetch"; import { WebSearchParams, WebSearchResponse } from "../../types/web/search"; +import { + BatchFetchJobResponse, + BatchFetchJobStatusResponse, + GetBatchFetchJobParams, + StartBatchFetchJobParams, + StartBatchFetchJobResponse, +} from "../../types/web/batch-fetch"; import { FetchOutputJson } from "../../types/web/common"; -import { isZodSchema } from "../../utils"; +import { isZodSchema, sleep } from "../../utils"; +import { POLLING_ATTEMPTS } from "../../types/constants"; + +export class BatchFetchService extends BaseService { + /** + * Start a new batch fetch job + * @param params The parameters for the batch fetch job + */ + async start(params: StartBatchFetchJobParams): Promise { + try { + // Handle JSON schema serialization if needed (similar to Python SDK) + if (params.outputs?.formats) { + for (const output of params.outputs.formats) { + if (typeof output === "object" && "type" in output && output.type === "json") { + const jsonOutput = output as FetchOutputJson; + if (jsonOutput.schema) { + if (isZodSchema(jsonOutput.schema)) { + try { + output.schema = toJSONSchema(jsonOutput.schema); + } catch { + output.schema = zodToJsonSchema(jsonOutput.schema as any); + } + } + } + } + } + } + + return await this.request("/web/batch-fetch", { + method: "POST", + body: JSON.stringify(params), + }); + } catch (error) { + if (error instanceof HyperbrowserError) { + throw error; + } + throw new HyperbrowserError("Failed to start batch fetch job", undefined); + } + } + + /** + * Get the status of a batch fetch job + * @param id The ID of the batch fetch job to get + */ + async getStatus(id: string): Promise { + try { + return await this.request(`/web/batch-fetch/${id}/status`); + } catch (error) { + if (error instanceof HyperbrowserError) { + throw error; + } + throw new HyperbrowserError(`Failed to get batch fetch job ${id} status`, undefined); + } + } + + /** + * Get the details of a batch fetch job + * @param id The ID of the batch fetch job to get + * @param params Optional parameters to filter the batch fetch job + */ + async get(id: string, params?: GetBatchFetchJobParams): Promise { + try { + return await this.request(`/web/batch-fetch/${id}`, undefined, { + page: params?.page, + batchSize: params?.batchSize, + }); + } catch (error) { + if (error instanceof HyperbrowserError) { + throw error; + } + throw new HyperbrowserError(`Failed to get batch fetch job ${id}`, undefined); + } + } + + /** + * Start a batch fetch job and wait for it to complete + * @param params The parameters for the batch fetch job + * @param returnAllPages Whether to return all pages in the batch fetch job response + */ + async startAndWait( + params: StartBatchFetchJobParams, + returnAllPages: boolean = true + ): Promise { + const job = await this.start(params); + const jobId = job.jobId; + if (!jobId) { + throw new HyperbrowserError("Failed to start batch fetch job", undefined); + } + + let failures = 0; + let jobStatus: BatchFetchJobResponse["status"] = "pending"; + while (true) { + try { + const { status } = await this.getStatus(jobId); + if (status === "completed" || status === "failed") { + jobStatus = status; + break; + } + } catch (error) { + failures++; + if (failures >= POLLING_ATTEMPTS) { + throw new HyperbrowserError( + `Failed to poll batch fetch job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}` + ); + } + } + await sleep(2000); + } + + failures = 0; + if (!returnAllPages) { + while (true) { + try { + return await this.get(jobId); + } catch (error) { + failures++; + if (failures >= POLLING_ATTEMPTS) { + throw new HyperbrowserError( + `Failed to get batch fetch job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}` + ); + } + } + await sleep(500); + } + } + + failures = 0; + + const jobResponse: BatchFetchJobResponse = { + jobId, + status: jobStatus, + data: [], + currentPageBatch: 0, + totalPageBatches: 0, + totalPages: 0, + batchSize: 100, + }; + let firstCheck = true; + + while (firstCheck || jobResponse.currentPageBatch < jobResponse.totalPageBatches) { + try { + const tmpJobResponse = await this.get(jobId, { + page: jobResponse.currentPageBatch + 1, + batchSize: 100, + }); + if (tmpJobResponse.data) { + jobResponse.data?.push(...tmpJobResponse.data); + } + jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch; + jobResponse.totalPages = tmpJobResponse.totalPages; + jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches; + jobResponse.batchSize = tmpJobResponse.batchSize; + jobResponse.error = tmpJobResponse.error; + failures = 0; + firstCheck = false; + } catch (error) { + failures++; + if (failures >= POLLING_ATTEMPTS) { + throw new HyperbrowserError( + `Failed to get batch page ${jobResponse.currentPageBatch} for job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}` + ); + } + } + await sleep(500); + } + return jobResponse; + } +} export class WebService extends BaseService { + public readonly batchFetch: BatchFetchService; + + constructor(apiKey: string, baseUrl: string, timeout: number) { + super(apiKey, baseUrl, timeout); + this.batchFetch = new BatchFetchService(apiKey, baseUrl, timeout); + } /** * Fetch a URL and extract content * @param params The parameters for the fetch request diff --git a/src/types/index.ts b/src/types/index.ts index 4dd055a..bbddfbb 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -180,6 +180,14 @@ export { ComputerActionResponseDataClipboardText, } from "./computer-action"; export { FetchParams, FetchResponse, FetchResponseData, FetchStatus } from "./web/fetch"; +export { + StartBatchFetchJobParams, + StartBatchFetchJobResponse, + GetBatchFetchJobParams, + BatchFetchJobStatusResponse, + BatchFetchJobResponse, + BatchFetchJobStatus, +} from "./web/batch-fetch"; export { WebSearchParams, WebSearchResponse, diff --git a/src/types/web/batch-fetch.ts b/src/types/web/batch-fetch.ts new file mode 100644 index 0000000..845fc64 --- /dev/null +++ b/src/types/web/batch-fetch.ts @@ -0,0 +1,44 @@ +import { + FetchStealthMode, + FetchOutputOptions, + FetchBrowserOptions, + FetchNavigationOptions, + FetchCacheOptions, + PageData, +} from "./common"; +import { FetchStatus } from "./fetch"; + +export type BatchFetchJobStatus = FetchStatus; + +export interface StartBatchFetchJobParams { + urls: string[]; + stealth?: FetchStealthMode; + outputs?: FetchOutputOptions; + browser?: FetchBrowserOptions; + navigation?: FetchNavigationOptions; + cache?: FetchCacheOptions; +} + +export interface GetBatchFetchJobParams { + page?: number; + batchSize?: number; +} + +export interface StartBatchFetchJobResponse { + jobId: string; +} + +export interface BatchFetchJobStatusResponse { + status: BatchFetchJobStatus; +} + +export interface BatchFetchJobResponse { + jobId: string; + status: BatchFetchJobStatus; + error?: string; + data?: PageData[]; + totalPages: number; + totalPageBatches: number; + currentPageBatch: number; + batchSize: number; +}