diff --git a/package.json b/package.json index 529d483..cd6b353 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scrapegraph-js", - "version": "2.0.1", + "version": "2.1.0", "description": "Official JavaScript/TypeScript SDK for the ScrapeGraph AI API — smart web scraping powered by AI", "type": "module", "main": "dist/index.js", diff --git a/src/index.ts b/src/index.ts index b43f67e..aa69918 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,55 +13,112 @@ export { } from "./scrapegraphai.js"; export type { - ApiFetchConfig, - ApiFetchContentType, - ApiHtmlMode, - ApiScrapeFormatEntry, - ApiScrapeRequest, - ApiScrapeResponse, - ApiScrapeFormat, - ApiScrapeResultMap, - ApiExtractRequest, - ApiExtractResponse, - ApiSearchRequest, - ApiSearchResponse, - ApiSearchResult, - ApiCrawlRequest, - ApiCrawlResponse, - ApiCrawlResult, - ApiCrawlPage, - ApiCrawlStatus, - ApiCrawlPageStatus, - ApiMonitorCreateInput, - ApiMonitorUpdateInput, - ApiMonitorResponse, - ApiMonitorResult, - ApiMonitorDiffs, - ApiMonitorActivityParams, - ApiMonitorActivityResponse, - ApiMonitorTickEntry, - ApiMonitorTickStatus, - ApiHistoryFilter, - ApiHistoryEntry, - ApiHistoryPage, - ApiHistoryService, - ApiHistoryStatus, - ApiCreditsResponse, - ApiHealthResponse, ApiResult, - ApiTokenUsage, - ApiChunkerMetadata, - ApiBranding, + Branding, + BrandingColors, + BrandingFontEntry, + BrandingImages, + BrandingMetadata, + BrandingPersonality, + BrandingTypography, + ChunkerMetadata, + ContentPageMetadata, + CrawlHistoryEntry, + CrawlPage, + CrawlPageStatus, + CrawlRequest, + CrawlResponse, + CrawlResult, + CrawlStatus, + CreditsJobs, + CreditsResponse, + ExtractHistoryEntry, + ExtractRequest, + ExtractResponse, + FetchConfig, + FetchContentType, + FetchMode, + FetchWarning, + FetchWarningReason, + FormatConfig, + FormatError, + FormatMetadataMap, + FormatResponseMap, + FormatType, + HealthResponse, + HistoryEntry, + HistoryFilter, + HistoryPage, + HistoryPagination, + HistoryStatus, + HtmlMode, + ImageChange, + ImageContentType, + JobsStatus, + JsonChange, + MarkdownFormatConfig, + HtmlFormatConfig, + ScreenshotFormatConfig, + JsonFormatConfig, + LinksFormatConfig, + ImagesFormatConfig, + SummaryFormatConfig, + BrandingFormatConfig, + MockConfig, + MonitorActivityRequest, + MonitorActivityResponse, + MonitorCreateRequest, + MonitorDiffs, + MonitorHistoryEntry, + MonitorRefs, + MonitorResponse, + MonitorResult, + MonitorTickEntry, + MonitorTickStatus, + MonitorUpdateRequest, + PageResponse, + ScrapeHistoryEntry, + ScrapeMetadata, + ScrapeRequest, + ScrapeResponse, + ScrapeResultMap, + ScreenshotData, + SearchHistoryEntry, + SearchMetadata, + SearchRequest, + SearchResponse, + SearchResult, + Service, + SetChange, + TextChange, + TimeRange, + TokenUsage, + WebhookStatus, } from "./types.js"; export { - apiScrapeRequestSchema, - apiExtractRequestBaseSchema, - apiSearchRequestSchema, - apiCrawlRequestSchema, - apiMonitorCreateSchema, - apiMonitorUpdateSchema, - apiHistoryFilterSchema, - apiFetchConfigSchema, - apiScrapeFormatEntrySchema, + brandingFormatConfigSchema, + crawlRequestSchema, + extractRequestSchema, + fetchConfigSchema, + fetchContentTypeSchema, + fetchModeSchema, + formatConfigSchema, + historyFilterSchema, + htmlFormatConfigSchema, + htmlModeSchema, + imagesFormatConfigSchema, + jsonFormatConfigSchema, + linksFormatConfigSchema, + markdownFormatConfigSchema, + mockConfigSchema, + monitorActivityRequestSchema, + monitorCreateRequestSchema, + monitorUpdateRequestSchema, + scrapeRequestSchema, + screenshotFormatConfigSchema, + searchRequestSchema, + serviceSchema, + summaryFormatConfigSchema, + timeRangeSchema, } from "./schemas.js"; diff --git a/src/schemas.ts b/src/schemas.ts index dd8e2ab..13f9ae3 100644 --- a/src/schemas.ts +++ b/src/schemas.ts @@ -1,12 +1,24 @@ import { z } from "zod"; -export const apiServiceEnumSchema = z.enum(["scrape", "extract", "search", "monitor", "crawl"]); +export const serviceSchema = z.enum(["scrape", "extract", "search", "monitor", "crawl"]); -export const apiStatusEnumSchema = z.enum(["completed", "failed"]); +export const htmlModeSchema = z.enum(["normal", "reader", "prune"]); -export const apiHtmlModeSchema = z.enum(["normal", "reader", "prune"]); +export const fetchModeSchema = z.enum(["auto", "fast", "js"]); -export const apiFetchContentTypeSchema = z.enum([ +export const timeRangeSchema = z.enum([ + "past_hour", + "past_24_hours", + "past_week", + "past_month", + "past_year", +]); + +export const crawlStatusSchema = z.enum(["running", "completed", "failed", "paused", "deleted"]); + +export const crawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]); + +export const fetchContentTypeSchema = z.enum([ "text/html", "application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -28,21 +40,19 @@ export const apiFetchContentTypeSchema = z.enum([ "application/x-latex", ]); -export const apiUserPromptSchema = z.string().min(1).max(10_000); +export const userPromptSchema = z.string().min(1).max(10_000); -export const apiUrlSchema = z.string().url(); +export const urlSchema = z.string().url(); -export const apiPaginationSchema = z.object({ +export const paginationSchema = z.object({ page: z.coerce.number().int().positive().default(1), limit: z.coerce.number().int().positive().max(100).default(20), }); -export const apiUuidParamSchema = z.object({ +export const uuidParamSchema = z.object({ id: z.string().regex(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i), }); -export const apiFetchModeSchema = z.enum(["auto", "fast", "js"]); - export const FETCH_CONFIG_DEFAULTS = { mode: "auto", stealth: false, @@ -51,8 +61,16 @@ export const FETCH_CONFIG_DEFAULTS = { scrolls: 0, } as const; -export const apiFetchConfigSchema = z.object({ - mode: apiFetchModeSchema.default(FETCH_CONFIG_DEFAULTS.mode), +export const mockConfigSchema = z.object({ + minKb: z.number().int().min(1).max(1000).default(1), + maxKb: z.number().int().min(1).max(1000).default(5), + minSleep: z.number().int().min(0).max(30000).default(5), + maxSleep: z.number().int().min(0).max(30000).default(15), + writeToBucket: z.boolean().default(false), +}); + +export const fetchConfigSchema = z.object({ + mode: fetchModeSchema.default(FETCH_CONFIG_DEFAULTS.mode), stealth: z.boolean().default(FETCH_CONFIG_DEFAULTS.stealth), timeout: z.number().int().min(1000).max(60000).default(FETCH_CONFIG_DEFAULTS.timeout), wait: z.number().int().min(0).max(30000).default(FETCH_CONFIG_DEFAULTS.wait), @@ -64,115 +82,73 @@ export const apiFetchConfigSchema = z.object({ .transform((v) => v.toLowerCase()) .optional(), scrolls: z.number().int().min(0).max(100).default(FETCH_CONFIG_DEFAULTS.scrolls), - mock: z - .union([ - z.boolean(), - z.object({ - minKb: z.number().int().min(1).max(1000).default(1), - maxKb: z.number().int().min(1).max(1000).default(5), - minSleep: z.number().int().min(0).max(30000).default(5), - maxSleep: z.number().int().min(0).max(30000).default(15), - writeToBucket: z.boolean().default(false), - }), - ]) - .default(false), + mock: z.union([z.boolean(), mockConfigSchema]).default(false), }); -export const apiHistoryFilterSchema = z.object({ +export const historyFilterSchema = z.object({ page: z.coerce.number().int().positive().default(1), limit: z.coerce.number().int().min(1).max(100).default(20), - service: apiServiceEnumSchema.optional(), + service: serviceSchema.optional(), }); -export const apiScrapeContentFormatSchema = z.enum([ - "markdown", - "html", - "links", - "images", - "summary", - "json", - "branding", -]); - -export const apiScrapeCaptureFormatSchema = z.enum(["screenshot"]); - -export const apiScrapeFormatSchema = z.enum([ - ...apiScrapeContentFormatSchema.options, - ...apiScrapeCaptureFormatSchema.options, -]); - -export const apiMarkdownConfigSchema = z.object({ - mode: apiHtmlModeSchema.default("normal"), +export const markdownFormatConfigSchema = z.object({ + type: z.literal("markdown"), + mode: htmlModeSchema.default("normal"), }); -export const apiHtmlConfigSchema = z.object({ - mode: apiHtmlModeSchema.default("normal"), +export const htmlFormatConfigSchema = z.object({ + type: z.literal("html"), + mode: htmlModeSchema.default("normal"), }); -export const apiScreenshotConfigSchema = z.object({ +export const screenshotFormatConfigSchema = z.object({ + type: z.literal("screenshot"), fullPage: z.boolean().default(false), width: z.number().int().min(320).max(3840).default(1440), height: z.number().int().min(200).max(2160).default(900), quality: z.number().int().min(1).max(100).default(80), }); -export const apiScrapeJsonConfigSchema = z.object({ - prompt: apiUserPromptSchema, - schema: z.record(z.string(), z.unknown()).optional(), - mode: apiHtmlModeSchema.default("normal"), -}); - -export const apiScrapeSummaryConfigSchema = z.object({}); - -export const apiScrapeMarkdownFormatSchema = apiMarkdownConfigSchema.extend({ - type: z.literal("markdown"), -}); - -export const apiScrapeHtmlFormatSchema = apiHtmlConfigSchema.extend({ - type: z.literal("html"), -}); - -export const apiScrapeScreenshotFormatSchema = apiScreenshotConfigSchema.extend({ - type: z.literal("screenshot"), -}); - -export const apiScrapeJsonFormatSchema = apiScrapeJsonConfigSchema.extend({ +export const jsonFormatConfigSchema = z.object({ type: z.literal("json"), + prompt: userPromptSchema, + schema: z.record(z.string(), z.unknown()).optional(), + mode: htmlModeSchema.default("normal"), }); -export const apiScrapeLinksFormatSchema = z.object({ +export const linksFormatConfigSchema = z.object({ type: z.literal("links"), }); -export const apiScrapeImagesFormatSchema = z.object({ +export const imagesFormatConfigSchema = z.object({ type: z.literal("images"), }); -export const apiScrapeSummaryFormatSchema = apiScrapeSummaryConfigSchema.extend({ +export const summaryFormatConfigSchema = z.object({ type: z.literal("summary"), }); -export const apiScrapeBrandingFormatSchema = z.object({ +export const brandingFormatConfigSchema = z.object({ type: z.literal("branding"), }); -export const apiScrapeFormatEntrySchema = z.discriminatedUnion("type", [ - apiScrapeMarkdownFormatSchema, - apiScrapeHtmlFormatSchema, - apiScrapeScreenshotFormatSchema, - apiScrapeJsonFormatSchema, - apiScrapeLinksFormatSchema, - apiScrapeImagesFormatSchema, - apiScrapeSummaryFormatSchema, - apiScrapeBrandingFormatSchema, +export const formatConfigSchema = z.discriminatedUnion("type", [ + markdownFormatConfigSchema, + htmlFormatConfigSchema, + screenshotFormatConfigSchema, + jsonFormatConfigSchema, + linksFormatConfigSchema, + imagesFormatConfigSchema, + summaryFormatConfigSchema, + brandingFormatConfigSchema, ]); -export const apiScrapeRequestSchema = z.object({ - url: apiUrlSchema, - contentType: apiFetchContentTypeSchema.optional(), - fetchConfig: apiFetchConfigSchema.optional(), +export const scrapeRequestSchema = z.object({ + url: urlSchema, + contentType: fetchContentTypeSchema.optional(), + fetchConfig: fetchConfigSchema.optional(), formats: z - .array(apiScrapeFormatEntrySchema) + .array(formatConfigSchema) .min(1) .refine((formats) => new Set(formats.map((format) => format.type)).size === formats.length, { message: "duplicate format types not allowed", @@ -180,78 +156,77 @@ export const apiScrapeRequestSchema = z.object({ .default([{ type: "markdown", mode: "normal" }]), }); -export const apiExtractRequestBaseSchema = z +export const extractRequestSchema = z .object({ - url: apiUrlSchema.optional(), + url: urlSchema.optional(), html: z.string().optional(), markdown: z.string().optional(), - mode: apiHtmlModeSchema.default("normal"), - prompt: apiUserPromptSchema, + mode: htmlModeSchema.default("normal"), + prompt: userPromptSchema, schema: z.record(z.string(), z.unknown()).optional(), - contentType: apiFetchContentTypeSchema.optional(), - fetchConfig: apiFetchConfigSchema.optional(), + contentType: fetchContentTypeSchema.optional(), + fetchConfig: fetchConfigSchema.optional(), }) .refine((d) => d.url || d.html || d.markdown, { message: "Either url, html, or markdown is required", }); -export const apiSearchRequestSchema = z +export const searchRequestSchema = z .object({ query: z.string().min(1).max(500), numResults: z.number().int().min(1).max(20).default(3), format: z.enum(["html", "markdown"]).default("markdown"), - mode: apiHtmlModeSchema.default("prune"), - fetchConfig: apiFetchConfigSchema.optional(), - prompt: apiUserPromptSchema.optional(), + mode: htmlModeSchema.default("prune"), + fetchConfig: fetchConfigSchema.optional(), + prompt: userPromptSchema.optional(), schema: z.record(z.string(), z.unknown()).optional(), locationGeoCode: z.string().max(10).optional(), - timeRange: z - .enum(["past_hour", "past_24_hours", "past_week", "past_month", "past_year"]) - .optional(), + timeRange: timeRangeSchema.optional(), }) .refine((d) => !d.schema || d.prompt, { message: "schema requires prompt", }); -export const apiMonitorCreateSchema = z.object({ - url: apiUrlSchema, +export const monitorCreateRequestSchema = z.object({ + url: urlSchema, name: z.string().max(200).optional(), formats: z - .array(apiScrapeFormatEntrySchema) + .array(formatConfigSchema) .min(1) .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { message: "duplicate format types not allowed", }) .default([{ type: "markdown", mode: "normal" }]), - webhookUrl: apiUrlSchema.optional(), + webhookUrl: urlSchema.optional(), interval: z.string().min(1).max(100), - fetchConfig: apiFetchConfigSchema.optional(), + fetchConfig: fetchConfigSchema.optional(), }); -export const apiMonitorUpdateSchema = z +export const monitorUpdateRequestSchema = z .object({ name: z.string().max(200).optional(), formats: z - .array(apiScrapeFormatEntrySchema) + .array(formatConfigSchema) .min(1) .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { message: "duplicate format types not allowed", }) .optional(), - webhookUrl: apiUrlSchema.nullable().optional(), + webhookUrl: urlSchema.nullable().optional(), interval: z.string().min(1).max(100).optional(), - fetchConfig: apiFetchConfigSchema.optional(), + fetchConfig: fetchConfigSchema.optional(), }) .partial(); -export const apiCrawlStatusSchema = z.enum(["running", "completed", "failed", "paused", "deleted"]); - -export const apiCrawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]); +export const monitorActivityRequestSchema = z.object({ + limit: z.coerce.number().int().min(1).max(100).default(20), + cursor: z.string().optional(), +}); -export const apiCrawlRequestSchema = z.object({ - url: apiUrlSchema, +export const crawlRequestSchema = z.object({ + url: urlSchema, formats: z - .array(apiScrapeFormatEntrySchema) + .array(formatConfigSchema) .min(1) .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { message: "duplicate format types not allowed", @@ -263,6 +238,6 @@ export const apiCrawlRequestSchema = z.object({ allowExternal: z.boolean().default(false), includePatterns: z.array(z.string()).optional(), excludePatterns: z.array(z.string()).optional(), - contentTypes: z.array(apiFetchContentTypeSchema).optional(), - fetchConfig: apiFetchConfigSchema.optional(), + contentTypes: z.array(fetchContentTypeSchema).optional(), + fetchConfig: fetchConfigSchema.optional(), }); diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index c636275..b022e98 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -1,24 +1,24 @@ import { env } from "./env.js"; import type { - ApiCrawlRequest, - ApiCrawlResponse, - ApiCreditsResponse, - ApiExtractRequest, - ApiExtractResponse, - ApiHealthResponse, - ApiHistoryEntry, - ApiHistoryFilter, - ApiHistoryPage, - ApiMonitorActivityParams, - ApiMonitorActivityResponse, - ApiMonitorCreateInput, - ApiMonitorResponse, - ApiMonitorUpdateInput, ApiResult, - ApiScrapeRequest, - ApiScrapeResponse, - ApiSearchRequest, - ApiSearchResponse, + CrawlRequest, + CrawlResponse, + CreditsResponse, + ExtractRequest, + ExtractResponse, + HealthResponse, + HistoryEntry, + HistoryFilter, + HistoryPage, + MonitorActivityRequest, + MonitorActivityResponse, + MonitorCreateRequest, + MonitorResponse, + MonitorUpdateRequest, + ScrapeRequest, + ScrapeResponse, + SearchRequest, + SearchResponse, } from "./types.js"; const BASE_URL = process.env.SGAI_API_URL || "https://v2-api.scrapegraphai.com/api"; @@ -110,10 +110,10 @@ async function request( export async function scrape( apiKey: string, - params: ApiScrapeRequest, -): Promise> { + params: ScrapeRequest, +): Promise> { try { - const { data, elapsedMs } = await request("POST", "/scrape", apiKey, params); + const { data, elapsedMs } = await request("POST", "/scrape", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -122,15 +122,10 @@ export async function scrape( export async function extract( apiKey: string, - params: ApiExtractRequest, -): Promise> { + params: ExtractRequest, +): Promise> { try { - const { data, elapsedMs } = await request( - "POST", - "/extract", - apiKey, - params, - ); + const { data, elapsedMs } = await request("POST", "/extract", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -139,28 +134,28 @@ export async function extract( export async function search( apiKey: string, - params: ApiSearchRequest, -): Promise> { + params: SearchRequest, +): Promise> { try { - const { data, elapsedMs } = await request("POST", "/search", apiKey, params); + const { data, elapsedMs } = await request("POST", "/search", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function getCredits(apiKey: string): Promise> { +export async function getCredits(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/credits", apiKey); + const { data, elapsedMs } = await request("GET", "/credits", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function checkHealth(apiKey: string): Promise> { +export async function checkHealth(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/health", apiKey); + const { data, elapsedMs } = await request("GET", "/health", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -168,7 +163,7 @@ export async function checkHealth(apiKey: string): Promise> { + async list(apiKey: string, params?: HistoryFilter): Promise> { try { const qs = new URLSearchParams(); if (params?.page) qs.set("page", String(params.page)); @@ -176,16 +171,16 @@ export const history = { if (params?.service) qs.set("service", params.service); const query = qs.toString(); const path = query ? `/history?${query}` : "/history"; - const { data, elapsedMs } = await request("GET", path, apiKey); + const { data, elapsedMs } = await request("GET", path, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } }, - async get(apiKey: string, id: string): Promise> { + async get(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request("GET", `/history/${id}`, apiKey); + const { data, elapsedMs } = await request("GET", `/history/${id}`, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -194,18 +189,18 @@ export const history = { }; export const crawl = { - async start(apiKey: string, params: ApiCrawlRequest): Promise> { + async start(apiKey: string, params: CrawlRequest): Promise> { try { - const { data, elapsedMs } = await request("POST", "/crawl", apiKey, params); + const { data, elapsedMs } = await request("POST", "/crawl", apiKey, params); return ok(data, elapsedMs); } catch (err) { return fail(err); } }, - async get(apiKey: string, id: string): Promise> { + async get(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request("GET", `/crawl/${id}`, apiKey); + const { data, elapsedMs } = await request("GET", `/crawl/${id}`, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -249,12 +244,9 @@ export const crawl = { }; export const monitor = { - async create( - apiKey: string, - params: ApiMonitorCreateInput, - ): Promise> { + async create(apiKey: string, params: MonitorCreateRequest): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "POST", "/monitor", apiKey, @@ -266,22 +258,18 @@ export const monitor = { } }, - async list(apiKey: string): Promise> { + async list(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/monitor", apiKey); + const { data, elapsedMs } = await request("GET", "/monitor", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } }, - async get(apiKey: string, id: string): Promise> { + async get(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request( - "GET", - `/monitor/${id}`, - apiKey, - ); + const { data, elapsedMs } = await request("GET", `/monitor/${id}`, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -291,10 +279,10 @@ export const monitor = { async update( apiKey: string, id: string, - params: ApiMonitorUpdateInput, - ): Promise> { + params: MonitorUpdateRequest, + ): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "PATCH", `/monitor/${id}`, apiKey, @@ -319,9 +307,9 @@ export const monitor = { } }, - async pause(apiKey: string, id: string): Promise> { + async pause(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "POST", `/monitor/${id}/pause`, apiKey, @@ -332,9 +320,9 @@ export const monitor = { } }, - async resume(apiKey: string, id: string): Promise> { + async resume(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "POST", `/monitor/${id}/resume`, apiKey, @@ -348,15 +336,15 @@ export const monitor = { async activity( apiKey: string, id: string, - params?: ApiMonitorActivityParams, - ): Promise> { + params?: MonitorActivityRequest, + ): Promise> { try { const qs = new URLSearchParams(); if (params?.limit) qs.set("limit", String(params.limit)); if (params?.cursor) qs.set("cursor", params.cursor); const query = qs.toString(); const path = query ? `/monitor/${id}/activity?${query}` : `/monitor/${id}/activity`; - const { data, elapsedMs } = await request("GET", path, apiKey); + const { data, elapsedMs } = await request("GET", path, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); @@ -377,32 +365,31 @@ function resolveApiKey(opts?: ScrapeGraphAIInput): string { export function ScrapeGraphAI(opts?: ScrapeGraphAIInput) { const key = resolveApiKey(opts); return { - scrape: (params: ApiScrapeRequest) => scrape(key, params), - extract: (params: ApiExtractRequest) => extract(key, params), - search: (params: ApiSearchRequest) => search(key, params), + scrape: (params: ScrapeRequest) => scrape(key, params), + extract: (params: ExtractRequest) => extract(key, params), + search: (params: SearchRequest) => search(key, params), credits: () => getCredits(key), healthy: () => checkHealth(key), history: { - list: (params?: ApiHistoryFilter) => history.list(key, params), + list: (params?: HistoryFilter) => history.list(key, params), get: (id: string) => history.get(key, id), }, crawl: { - start: (params: ApiCrawlRequest) => crawl.start(key, params), + start: (params: CrawlRequest) => crawl.start(key, params), get: (id: string) => crawl.get(key, id), stop: (id: string) => crawl.stop(key, id), resume: (id: string) => crawl.resume(key, id), delete: (id: string) => crawl.delete(key, id), }, monitor: { - create: (params: ApiMonitorCreateInput) => monitor.create(key, params), + create: (params: MonitorCreateRequest) => monitor.create(key, params), list: () => monitor.list(key), get: (id: string) => monitor.get(key, id), - update: (id: string, params: ApiMonitorUpdateInput) => monitor.update(key, id, params), + update: (id: string, params: MonitorUpdateRequest) => monitor.update(key, id, params), delete: (id: string) => monitor.delete(key, id), pause: (id: string) => monitor.pause(key, id), resume: (id: string) => monitor.resume(key, id), - activity: (id: string, params?: ApiMonitorActivityParams) => - monitor.activity(key, id, params), + activity: (id: string, params?: MonitorActivityRequest) => monitor.activity(key, id, params), }, }; } diff --git a/src/types.ts b/src/types.ts index d27747e..5f9111e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,32 +1,46 @@ import type { z } from "zod"; import type { - apiCrawlRequestSchema, - apiExtractRequestBaseSchema, - apiFetchConfigSchema, - apiFetchContentTypeSchema, - apiHistoryFilterSchema, - apiHtmlModeSchema, - apiMonitorCreateSchema, - apiMonitorUpdateSchema, - apiScrapeFormatEntrySchema, - apiScrapeRequestSchema, - apiSearchRequestSchema, + crawlRequestSchema, + extractRequestSchema, + fetchConfigSchema, + fetchContentTypeSchema, + fetchModeSchema, + formatConfigSchema, + historyFilterSchema, + htmlModeSchema, + mockConfigSchema, + monitorActivityRequestSchema, + monitorCreateRequestSchema, + monitorUpdateRequestSchema, + scrapeRequestSchema, + searchRequestSchema, + timeRangeSchema, } from "./schemas.js"; -export type ApiFetchConfig = z.input; -export type ApiFetchContentType = z.infer; -export type ApiHtmlMode = z.infer; -export type ApiScrapeFormatEntry = z.input; - -export type ApiScrapeRequest = z.input; -export type ApiExtractRequest = z.input; -export type ApiSearchRequest = z.input; -export type ApiCrawlRequest = z.input; -export type ApiMonitorCreateInput = z.input; -export type ApiMonitorUpdateInput = z.input; -export type ApiHistoryFilter = z.input; - -export type ApiScrapeFormat = +export type Service = "scrape" | "extract" | "search" | "monitor" | "crawl"; +export type HtmlMode = z.infer; +export type FetchMode = z.infer; +export type TimeRange = z.infer; +export type CrawlStatus = "running" | "completed" | "failed" | "paused" | "deleted"; +export type CrawlPageStatus = "completed" | "failed" | "skipped"; +export type HistoryStatus = "completed" | "failed" | "running" | "paused" | "deleted"; +export type MonitorTickStatus = "completed" | "failed" | "paused" | "running"; +export type FetchContentType = z.infer; + +export type MockConfig = z.input; +export type FetchConfig = z.input; + +export type MarkdownFormatConfig = z.input & { type: "markdown" }; +export type HtmlFormatConfig = z.input & { type: "html" }; +export type ScreenshotFormatConfig = z.input & { type: "screenshot" }; +export type JsonFormatConfig = z.input & { type: "json" }; +export type LinksFormatConfig = z.input & { type: "links" }; +export type ImagesFormatConfig = z.input & { type: "images" }; +export type SummaryFormatConfig = z.input & { type: "summary" }; +export type BrandingFormatConfig = z.input & { type: "branding" }; +export type FormatConfig = z.input; + +export type FormatType = | "markdown" | "html" | "links" @@ -36,30 +50,29 @@ export type ApiScrapeFormat = | "branding" | "screenshot"; -export interface ApiTokenUsage { +export type ScrapeRequest = z.input; +export type ExtractRequest = z.input; +export type SearchRequest = z.input; +export type CrawlRequest = z.input; +export type MonitorCreateRequest = z.input; +export type MonitorUpdateRequest = z.input; +export type MonitorActivityRequest = z.input; +export type HistoryFilter = z.input; + +export interface TokenUsage { promptTokens: number; completionTokens: number; } -export interface ApiChunkerMetadata { +export interface ChunkerMetadata { chunks: { size: number }[]; } -export interface ApiFetchWarning { - reason: "too_short" | "empty" | "bot_blocked" | "spa_shell" | "soft_404"; - provider?: string; -} +export type FetchWarningReason = "too_short" | "empty" | "bot_blocked" | "spa_shell" | "soft_404"; -export interface ScrapeMetadata { +export interface FetchWarning { + reason: FetchWarningReason; provider?: string; - contentType: string; - elapsedMs?: number; - warnings?: ApiFetchWarning[]; - ocr?: { - model: string; - pagesProcessed: number; - pages: ContentPageMetadata[]; - }; } export interface ContentPageMetadata { @@ -76,7 +89,19 @@ export interface ContentPageMetadata { dimensions: { dpi: number; height: number; width: number }; } -export interface ApiBrandingColors { +export interface ScrapeMetadata { + provider?: string; + contentType: string; + elapsedMs?: number; + warnings?: FetchWarning[]; + ocr?: { + model: string; + pagesProcessed: number; + pages: ContentPageMetadata[]; + }; +} + +export interface BrandingColors { primary: string; accent: string; background: string; @@ -84,42 +109,42 @@ export interface ApiBrandingColors { link: string; } -export interface ApiBrandingFontEntry { +export interface BrandingFontEntry { family: string; fallback: string; } -export interface ApiBrandingTypography { - primary: ApiBrandingFontEntry; - heading: ApiBrandingFontEntry; - mono: ApiBrandingFontEntry; +export interface BrandingTypography { + primary: BrandingFontEntry; + heading: BrandingFontEntry; + mono: BrandingFontEntry; sizes: { h1: string; h2: string; body: string }; } -export interface ApiBrandingImages { +export interface BrandingImages { logo: string; favicon: string; ogImage: string; } -export interface ApiBrandingPersonality { +export interface BrandingPersonality { tone: string; energy: "high" | "medium" | "low"; targetAudience: string; } -export interface ApiBranding { +export interface Branding { colorScheme: "light" | "dark"; - colors: ApiBrandingColors; - typography: ApiBrandingTypography; - images: ApiBrandingImages; + colors: BrandingColors; + typography: BrandingTypography; + images: BrandingImages; spacing: { baseUnit: number; borderRadius: string }; frameworkHints: string[]; - personality: ApiBrandingPersonality; + personality: BrandingPersonality; confidence: number; } -export interface ApiBrandingMetadata { +export interface BrandingMetadata { title: string; description: string; favicon: string; @@ -131,91 +156,88 @@ export interface ApiBrandingMetadata { ogUrl: string; } -export interface ApiScrapeScreenshotData { +export interface ScreenshotData { url: string; width: number; height: number; } -export interface ApiScrapeFormatError { +export interface FormatError { code: string; error: string; } -export interface ApiScrapeFormatResponseMap { +export interface FormatResponseMap { markdown: string[]; html: string[]; links: string[]; images: string[]; summary: string; json: Record; - branding: ApiBranding; - screenshot: ApiScrapeScreenshotData; + branding: Branding; + screenshot: ScreenshotData; } -export type ApiImageContentType = Extract; +export type ImageContentType = Extract; -export interface ApiScrapeFormatMetadataMap { +export interface FormatMetadataMap { markdown: Record; html: Record; links: { count: number }; images: { count: number }; - summary: { chunker?: ApiChunkerMetadata }; - json: { chunker: ApiChunkerMetadata; raw?: string | null }; - branding: { branding: ApiBrandingMetadata }; - screenshot: { contentType: ApiImageContentType; provider?: string }; + summary: { chunker?: ChunkerMetadata }; + json: { chunker: ChunkerMetadata; raw?: string | null }; + branding: { branding: BrandingMetadata }; + screenshot: { contentType: ImageContentType; provider?: string }; } -export type ApiScrapeResultMap = Partial<{ - [K in ApiScrapeFormat]: { - data: ApiScrapeFormatResponseMap[K]; - metadata?: ApiScrapeFormatMetadataMap[K]; +export type ScrapeResultMap = Partial<{ + [K in FormatType]: { + data: FormatResponseMap[K]; + metadata?: FormatMetadataMap[K]; }; }>; -export interface ApiScrapeResponse { - results: ApiScrapeResultMap; +export interface ScrapeResponse { + results: ScrapeResultMap; metadata: ScrapeMetadata; - errors?: Partial<{ [K in ApiScrapeFormat]: ApiScrapeFormatError }>; + errors?: Partial<{ [K in FormatType]: FormatError }>; } -export interface ApiExtractResponse { +export interface ExtractResponse { raw: string | null; json: Record | null; - usage: ApiTokenUsage; + usage: TokenUsage; metadata: { - chunker: ApiChunkerMetadata; + chunker: ChunkerMetadata; fetch?: { provider?: string }; }; } -export interface ApiSearchResult { +export interface SearchResult { url: string; title: string; content: string; provider?: string; } -export interface ApiSearchMetadata { +export interface SearchMetadata { search: { provider?: string }; pages: { requested: number; scraped: number }; - chunker?: ApiChunkerMetadata; + chunker?: ChunkerMetadata; } -export interface ApiSearchResponse { - results: ApiSearchResult[]; +export interface SearchResponse { + results: SearchResult[]; json?: Record | null; raw?: string | null; - usage?: ApiTokenUsage; - metadata: ApiSearchMetadata; + usage?: TokenUsage; + metadata: SearchMetadata; } -export type ApiCrawlStatus = "running" | "completed" | "failed" | "paused" | "deleted"; -export type ApiCrawlPageStatus = "completed" | "failed" | "skipped"; - -export interface ApiCrawlPage { +export interface CrawlPage { url: string; - status: ApiCrawlPageStatus; + status: CrawlPageStatus; depth: number; parentUrl: string | null; links: string[]; @@ -227,15 +249,15 @@ export interface ApiCrawlPage { error?: string; } -export interface ApiCrawlResult { - status: ApiCrawlStatus; +export interface CrawlResult { + status: CrawlStatus; reason?: string; total: number; finished: number; - pages: ApiCrawlPage[]; + pages: CrawlPage[]; } -export interface ApiCrawlResponse extends ApiCrawlResult { +export interface CrawlResponse extends CrawlResult { id: string; } @@ -262,7 +284,7 @@ export interface ImageChange { mask?: string; } -export interface ApiMonitorDiffs { +export interface MonitorDiffs { markdown?: TextChange[]; html?: TextChange[]; json?: JsonChange[]; @@ -273,135 +295,125 @@ export interface ApiMonitorDiffs { branding?: JsonChange[]; } -export type ApiMonitorRefs = Partial>; +export type MonitorRefs = Partial>; -export interface ApiWebhookStatus { +export interface WebhookStatus { sentAt: string; statusCode: number | null; error?: string; } -export interface ApiMonitorResult { +export interface MonitorResult { changed: boolean; - diffs: ApiMonitorDiffs; - refs: ApiMonitorRefs; - webhookStatus?: ApiWebhookStatus; + diffs: MonitorDiffs; + refs: MonitorRefs; + webhookStatus?: WebhookStatus; } -export interface ApiMonitorResponse { +export interface MonitorResponse { cronId: string; scheduleId: string; interval: string; status: "active" | "paused"; - config: ApiMonitorCreateInput; + config: MonitorCreateRequest; createdAt: string; updatedAt: string; } -export type ApiMonitorTickStatus = "completed" | "failed" | "paused" | "running"; - -export interface ApiMonitorTickEntry { +export interface MonitorTickEntry { id: string; - status: ApiMonitorTickStatus; + status: MonitorTickStatus; createdAt: string; elapsedMs: number; changed: boolean; - diffs: ApiMonitorDiffs; + diffs: MonitorDiffs; error?: string; } -export interface ApiMonitorActivityResponse { - ticks: ApiMonitorTickEntry[]; +export interface MonitorActivityResponse { + ticks: MonitorTickEntry[]; nextCursor: string | null; } -export interface ApiMonitorActivityParams { - limit?: number; - cursor?: string; -} - -export type ApiHistoryService = "scrape" | "extract" | "search" | "monitor" | "crawl"; -export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused" | "deleted"; - -interface ApiHistoryBase { +interface HistoryBase { id: string; - status: ApiHistoryStatus; + status: HistoryStatus; error: unknown; elapsedMs: number; createdAt: string; requestParentId: string | null; } -export interface ApiScrapeHistoryEntry extends ApiHistoryBase { +export interface ScrapeHistoryEntry extends HistoryBase { service: "scrape"; - params: ApiScrapeRequest; - result: ApiScrapeResponse; + params: ScrapeRequest; + result: ScrapeResponse; } -export interface ApiExtractHistoryEntry extends ApiHistoryBase { +export interface ExtractHistoryEntry extends HistoryBase { service: "extract"; - params: ApiExtractRequest; - result: ApiExtractResponse; + params: ExtractRequest; + result: ExtractResponse; } -export interface ApiSearchHistoryEntry extends ApiHistoryBase { +export interface SearchHistoryEntry extends HistoryBase { service: "search"; - params: ApiSearchRequest; - result: ApiSearchResponse; + params: SearchRequest; + result: SearchResponse; } -export interface ApiMonitorHistoryEntry extends ApiHistoryBase { +export interface MonitorHistoryEntry extends HistoryBase { service: "monitor"; params: { cronId: string; url: string }; - result: ApiMonitorResult; + result: MonitorResult; } -export interface ApiCrawlHistoryEntry extends ApiHistoryBase { +export interface CrawlHistoryEntry extends HistoryBase { service: "crawl"; params: { url: string; maxPages: number }; - result: ApiCrawlResult; + result: CrawlResult; } -export type ApiHistoryEntry = - | ApiScrapeHistoryEntry - | ApiExtractHistoryEntry - | ApiSearchHistoryEntry - | ApiMonitorHistoryEntry - | ApiCrawlHistoryEntry; +export type HistoryEntry = + | ScrapeHistoryEntry + | ExtractHistoryEntry + | SearchHistoryEntry + | MonitorHistoryEntry + | CrawlHistoryEntry; -export interface ApiPageResponse { +export interface HistoryPagination { + page: number; + limit: number; + total: number; +} + +export interface PageResponse { data: T[]; - pagination: { - page: number; - limit: number; - total: number; - }; + pagination: HistoryPagination; } -export type ApiHistoryPage = ApiPageResponse; +export type HistoryPage = PageResponse; -export interface ApiJobsStatus { +export interface JobsStatus { used: number; limit: number; } -export interface ApiCreditsResponse { +export interface CreditsJobs { + crawl: JobsStatus; + monitor: JobsStatus; +} + +export interface CreditsResponse { remaining: number; used: number; plan: string; - jobs: { - crawl: ApiJobsStatus; - monitor: ApiJobsStatus; - }; + jobs: CreditsJobs; } -export interface ApiHealthResponse { - status: string; +export interface HealthResponse { + status: "ok" | "degraded"; uptime: number; - services?: { - redis: "ok" | "down"; - db: "ok" | "down"; - }; } export interface ApiResult {