Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 181 additions & 1 deletion src/services/web/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,190 @@ import { BaseService } from "../base";
import { HyperbrowserError } from "../../client";
import { FetchParams, FetchResponse } from "../../types/web/fetch";
import { WebSearchParams, WebSearchResponse } from "../../types/web/search";
import {
BatchFetchJobResponse,
BatchFetchJobStatusResponse,
GetBatchFetchJobParams,
StartBatchFetchJobParams,
StartBatchFetchJobResponse,
} from "../../types/web/batch-fetch";
import { FetchOutputJson } from "../../types/web/common";
import { isZodSchema } from "../../utils";
import { isZodSchema, sleep } from "../../utils";
import { POLLING_ATTEMPTS } from "../../types/constants";

export class BatchFetchService extends BaseService {
/**
* Start a new batch fetch job
* @param params The parameters for the batch fetch job
*/
async start(params: StartBatchFetchJobParams): Promise<StartBatchFetchJobResponse> {
try {
// Handle JSON schema serialization if needed (similar to Python SDK)
if (params.outputs?.formats) {
for (const output of params.outputs.formats) {
if (typeof output === "object" && "type" in output && output.type === "json") {
const jsonOutput = output as FetchOutputJson;
if (jsonOutput.schema) {
if (isZodSchema(jsonOutput.schema)) {
try {
output.schema = toJSONSchema(jsonOutput.schema);
} catch {
output.schema = zodToJsonSchema(jsonOutput.schema as any);
}
}
}
}
}
}

return await this.request<StartBatchFetchJobResponse>("/web/batch-fetch", {
method: "POST",
body: JSON.stringify(params),
});
} catch (error) {
if (error instanceof HyperbrowserError) {
throw error;
}
throw new HyperbrowserError("Failed to start batch fetch job", undefined);
}
}

/**
* Get the status of a batch fetch job
* @param id The ID of the batch fetch job to get
*/
async getStatus(id: string): Promise<BatchFetchJobStatusResponse> {
try {
return await this.request<BatchFetchJobStatusResponse>(`/web/batch-fetch/${id}/status`);
} catch (error) {
if (error instanceof HyperbrowserError) {
throw error;
}
throw new HyperbrowserError(`Failed to get batch fetch job ${id} status`, undefined);
}
}

/**
* Get the details of a batch fetch job
* @param id The ID of the batch fetch job to get
* @param params Optional parameters to filter the batch fetch job
*/
async get(id: string, params?: GetBatchFetchJobParams): Promise<BatchFetchJobResponse> {
try {
return await this.request<BatchFetchJobResponse>(`/web/batch-fetch/${id}`, undefined, {
page: params?.page,
batchSize: params?.batchSize,
});
} catch (error) {
if (error instanceof HyperbrowserError) {
throw error;
}
throw new HyperbrowserError(`Failed to get batch fetch job ${id}`, undefined);
}
}

/**
* Start a batch fetch job and wait for it to complete
* @param params The parameters for the batch fetch job
* @param returnAllPages Whether to return all pages in the batch fetch job response
*/
async startAndWait(
params: StartBatchFetchJobParams,
returnAllPages: boolean = true
): Promise<BatchFetchJobResponse> {
const job = await this.start(params);
const jobId = job.jobId;
if (!jobId) {
throw new HyperbrowserError("Failed to start batch fetch job", undefined);
}

let failures = 0;
let jobStatus: BatchFetchJobResponse["status"] = "pending";
while (true) {
try {
const { status } = await this.getStatus(jobId);
if (status === "completed" || status === "failed") {
jobStatus = status;
break;
}
} catch (error) {
failures++;
if (failures >= POLLING_ATTEMPTS) {
throw new HyperbrowserError(
`Failed to poll batch fetch job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}`
);
}
}
await sleep(2000);
}

failures = 0;
if (!returnAllPages) {
while (true) {
try {
return await this.get(jobId);
} catch (error) {
failures++;
if (failures >= POLLING_ATTEMPTS) {
throw new HyperbrowserError(
`Failed to get batch fetch job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}`
);
}
}
await sleep(500);
}
}

failures = 0;

const jobResponse: BatchFetchJobResponse = {
jobId,
status: jobStatus,
data: [],
currentPageBatch: 0,
totalPageBatches: 0,
totalPages: 0,
batchSize: 100,
};
let firstCheck = true;

while (firstCheck || jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
try {
const tmpJobResponse = await this.get(jobId, {
page: jobResponse.currentPageBatch + 1,
batchSize: 100,
});
if (tmpJobResponse.data) {
jobResponse.data?.push(...tmpJobResponse.data);
}
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
jobResponse.totalPages = tmpJobResponse.totalPages;
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
jobResponse.batchSize = tmpJobResponse.batchSize;
jobResponse.error = tmpJobResponse.error;
failures = 0;
firstCheck = false;
} catch (error) {
failures++;
if (failures >= POLLING_ATTEMPTS) {
throw new HyperbrowserError(
`Failed to get batch page ${jobResponse.currentPageBatch} for job ${jobId} after ${POLLING_ATTEMPTS} attempts: ${error}`
);
}
}
await sleep(500);
}
return jobResponse;
}
}

export class WebService extends BaseService {
public readonly batchFetch: BatchFetchService;

constructor(apiKey: string, baseUrl: string, timeout: number) {
super(apiKey, baseUrl, timeout);
this.batchFetch = new BatchFetchService(apiKey, baseUrl, timeout);
}
/**
* Fetch a URL and extract content
* @param params The parameters for the fetch request
Expand Down
8 changes: 8 additions & 0 deletions src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ export {
ComputerActionResponseDataClipboardText,
} from "./computer-action";
export { FetchParams, FetchResponse, FetchResponseData, FetchStatus } from "./web/fetch";
export {
StartBatchFetchJobParams,
StartBatchFetchJobResponse,
GetBatchFetchJobParams,
BatchFetchJobStatusResponse,
BatchFetchJobResponse,
BatchFetchJobStatus,
} from "./web/batch-fetch";
export {
WebSearchParams,
WebSearchResponse,
Expand Down
44 changes: 44 additions & 0 deletions src/types/web/batch-fetch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import {
FetchStealthMode,
FetchOutputOptions,
FetchBrowserOptions,
FetchNavigationOptions,
FetchCacheOptions,
PageData,
} from "./common";
import { FetchStatus } from "./fetch";

export type BatchFetchJobStatus = FetchStatus;

export interface StartBatchFetchJobParams {
urls: string[];
stealth?: FetchStealthMode;
outputs?: FetchOutputOptions;
browser?: FetchBrowserOptions;
navigation?: FetchNavigationOptions;
cache?: FetchCacheOptions;
}

export interface GetBatchFetchJobParams {
page?: number;
batchSize?: number;
}

export interface StartBatchFetchJobResponse {
jobId: string;
}

export interface BatchFetchJobStatusResponse {
status: BatchFetchJobStatus;
}

export interface BatchFetchJobResponse {
jobId: string;
status: BatchFetchJobStatus;
error?: string;
data?: PageData[];
totalPages: number;
totalPageBatches: number;
currentPageBatch: number;
batchSize: number;
}