diff --git a/apps/translation-worker/package.json b/apps/translation-worker/package.json index 1d6ba51fe..284152860 100644 --- a/apps/translation-worker/package.json +++ b/apps/translation-worker/package.json @@ -4,6 +4,7 @@ "type": "module", "scripts": { "check": "tsc --noEmit", + "test": "bun test test", "dev": "wrangler dev -c wrangler.jsonc -c ../web/wrangler.jsonc -c ../docs/wrangler.jsonc", "deploy": "wrangler deploy" } diff --git a/apps/translation-worker/src/index.ts b/apps/translation-worker/src/index.ts index 01d5ff6f9..206e5a948 100644 --- a/apps/translation-worker/src/index.ts +++ b/apps/translation-worker/src/index.ts @@ -121,6 +121,7 @@ const MAX_BATCH_CHARS = 1_500 const MAX_BATCH_ITEMS = 32 const TRANSLATION_BATCHES_PER_QUEUE_JOB = 1 const TRANSLATION_MODEL_ATTEMPTS = 3 +const TRANSLATION_SINGLE_TEXT_ATTEMPTS = 2 const TRANSLATION_QUEUE_RETRY_DELAY_SECONDS = 60 const AI_OUTPUT_PREVIEW_CHARS = 240 @@ -902,6 +903,41 @@ function aiPayloadPreview(value: unknown): string { return text.replace(/\s+/g, ' ').trim().slice(0, AI_OUTPUT_PREVIEW_CHARS) } +function unquotePlainTranslation(value: string): string { + const trimmed = stripJsonFence(value).trim() + if (trimmed.startsWith('"') && trimmed.endsWith('"')) { + const parsed = parseJsonValue(trimmed) + if (typeof parsed === 'string') return parsed.trim() + } + return trimmed +} + +function plainTranslationFromUnknown(value: unknown, depth = 0): string | null { + if (depth > 3) return null + if (typeof value === 'string') { + const trimmed = stripJsonFence(value).trim() + const parsed = parseJsonValue(trimmed) + if (parsed !== null && typeof parsed !== 'string') { + const nested = plainTranslationFromUnknown(parsed, depth + 1) + if (nested) return nested + } + return unquotePlainTranslation(trimmed) + } + + const array = stringArrayFromUnknown(value) + if (array?.length === 1) return unquotePlainTranslation(array[0]) + + const record = recordOf(value) + if (!record) return null + + for (const key of ['translation', 'translated', 'translatedText', 'text', 'response', 'result', 'output', 'data']) { + const nested = plainTranslationFromUnknown(record[key], depth + 1) + if (nested) return nested + } + + return null +} + function normalizedTranslationValue(value: string): string { return value.trim().replace(/\s+/g, ' ') } @@ -989,7 +1025,69 @@ async function translateBatch(env: Env, targetLanguage: string, batch: string[]) }) } - throw new Error(`Translation model failed after ${TRANSLATION_MODEL_ATTEMPTS} attempts for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`) + console.warn('Translation batch JSON failed; falling back to single-text translation', { + targetLanguage, + batchSize: batch.length, + error: lastError?.message ?? 'unknown error', + }) + return await translateBatchIndividually(env, targetLanguage, batch) +} + +async function translateSingleText(env: Env, targetLanguage: string, text: string): Promise { + const model = env.TRANSLATION_MODEL || DEFAULT_MODEL + let lastError: Error | null = null + + for (let attempt = 1; attempt <= TRANSLATION_SINGLE_TEXT_ATTEMPTS; attempt += 1) { + let payload: unknown = '' + try { + const result = await env.AI.run(model, { + temperature: 0, + max_tokens: Math.min(2048, Math.max(256, text.length * 3 + 128)), + messages: [ + { + role: 'system', + content: [ + 'You translate one Capgo website string for the target locale.', + 'Translate naturally for the user cultural context; do not translate word for word.', + 'Preserve brand names, product names, developer terms, URLs, code identifiers, file paths, package names, language codes, numbers, punctuation, and whitespace meaning.', + 'Preserve terms like Capgo, Capacitor, code, API, SDK, CLI, npm, bun, GitHub, and Cloudflare when they are names or technical terms.', + 'Return only the translated text. Do not return JSON, Markdown, labels, explanations, quotes around the whole answer, or extra lines.', + ].join(' '), + }, + { + role: 'user', + content: JSON.stringify({ targetLanguage, text }), + }, + ], + }) + + payload = extractAiPayload(result) + const translated = plainTranslationFromUnknown(payload) + if (translated) return translated + lastError = new Error(`Translation model returned empty text for ${targetLanguage}`) + } catch (error) { + lastError = error instanceof Error ? error : new Error(errorMessage(error)) + } + + console.warn('Single-text translation response rejected', { + targetLanguage, + attempt, + maxAttempts: TRANSLATION_SINGLE_TEXT_ATTEMPTS, + error: lastError.message, + outputPreview: aiPayloadPreview(payload), + }) + } + + throw new Error(`Single-text translation failed for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`) +} + +async function translateBatchIndividually(env: Env, targetLanguage: string, batch: string[]): Promise { + const translated: string[] = [] + for (const text of batch) { + translated.push(await translateSingleText(env, targetLanguage, text)) + } + assertTranslatedBatch(targetLanguage, batch, translated) + return translated } function logPathnameFromUrl(value: string): string { @@ -1686,6 +1784,10 @@ async function serveTranslated(request: Request, env: Env, requestUrl: URL, loca return temporaryEnglishRedirectResponse(requestUrl, isHead) } +export const __translationWorkerTest = { + TRANSLATION_CACHE_VERSION, +} + export default { async fetch(request: Request, env: Env): Promise { const requestUrl = new URL(request.url) diff --git a/apps/translation-worker/test/index.test.ts b/apps/translation-worker/test/index.test.ts new file mode 100644 index 000000000..b37674ad6 --- /dev/null +++ b/apps/translation-worker/test/index.test.ts @@ -0,0 +1,174 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import worker, { __translationWorkerTest } from '../src/index' + +type AiInput = { + messages: { role: 'system' | 'user'; content: string }[] + temperature?: number + max_tokens?: number +} + +type AiHandler = (model: string, input: AiInput) => Promise | unknown + +class MemoryCache { + readonly store = new Map() + + key(input: RequestInfo | URL): string { + const request = input instanceof Request ? input : new Request(input) + return `${request.method}:${request.url}` + } + + async match(input: RequestInfo | URL): Promise { + return this.store.get(this.key(input))?.clone() + } + + async put(input: RequestInfo | URL, response: Response): Promise { + this.store.set(this.key(input), response.clone()) + } + + async delete(input: RequestInfo | URL): Promise { + return this.store.delete(this.key(input)) + } +} + +class MemoryR2 { + readonly store = new Map() + + async get(key: string): Promise<{ text(): Promise } | null> { + const value = this.store.get(key) + return value === undefined ? null : { text: async () => value } + } + + async put(key: string, value: string): Promise { + this.store.set(key, value) + } + + async delete(key: string): Promise { + this.store.delete(key) + } +} + +function lastUserPayload(input: AiInput): Record { + return JSON.parse(input.messages[input.messages.length - 1]?.content ?? '{}') as Record +} + +function createEnv(aiHandler: AiHandler, html = '

Ship updates instantly

Pricing') { + const calls: AiInput[] = [] + const queueMessages: unknown[] = [] + const r2 = new MemoryR2() + const origin = { + fetch: async () => + new Response(html, { + headers: { + 'Content-Type': 'text/html; charset=utf-8', + }, + }), + } + + return { + calls, + r2, + queueMessages, + env: { + TRANSLATION_MODEL: 'test-model', + AI: { + run: async (model: string, input: AiInput) => { + calls.push(input) + return await aiHandler(model, input) + }, + }, + WEB: origin, + DOCS: origin, + TRANSLATION_QUEUE: { + send: async (message: unknown) => { + queueMessages.push(message) + }, + }, + TRANSLATION_STORE: r2, + }, + } +} + +const originalWarn = console.warn +const originalError = console.error +const originalCaches = (globalThis as typeof globalThis & { caches?: unknown }).caches + +beforeEach(() => { + ;(globalThis as typeof globalThis & { caches: { default: MemoryCache } }).caches = { default: new MemoryCache() } + console.warn = () => {} + console.error = () => {} +}) + +afterEach(() => { + ;(globalThis as typeof globalThis & { caches?: unknown }).caches = originalCaches + console.warn = originalWarn + console.error = originalError +}) + +describe('translation worker queue', () => { + test('falls back to single-text translation when the model never returns batch JSON', async () => { + const { calls, env } = createEnv((_model, input) => { + const payload = lastUserPayload(input) + if (Array.isArray(payload.texts)) return 'this is not json' + return `JA:${payload.text}` + }) + + await worker.queue( + { + messages: [ + { + body: { + url: 'https://capgo.app/ja/', + locale: 'ja', + cacheVersion: __translationWorkerTest.TRANSLATION_CACHE_VERSION, + reason: 'miss', + }, + }, + ], + }, + env, + ) + + expect(calls.some((input) => Array.isArray(lastUserPayload(input).texts))).toBe(true) + expect(calls.some((input) => typeof lastUserPayload(input).text === 'string')).toBe(true) + + const response = await worker.fetch(new Request('https://capgo.app/ja/'), env) + expect(response.status).toBe(200) + expect(response.headers.get('X-Capgo-Translation-Cache')).toBe('HIT') + + const body = await response.text() + expect(body).toContain('lang="ja"') + expect(body).toContain('JA:Ship updates instantly') + expect(body).toContain('JA:Pricing') + expect(body).not.toContain('this is not json') + }) + + test('uses queue retry without throwing when both batch and single-text translation fail', async () => { + const { env } = createEnv(() => { + throw new Error('AI unavailable') + }) + const retryCalls: unknown[] = [] + + await worker.queue( + { + messages: [ + { + id: 'message-1', + attempts: 1, + body: { + url: 'https://capgo.app/ko/?session_id=secret', + locale: 'ko', + cacheVersion: __translationWorkerTest.TRANSLATION_CACHE_VERSION, + reason: 'miss', + }, + retry: (options?: unknown) => { + retryCalls.push(options) + }, + }, + ], + }, + env, + ) + + expect(retryCalls).toEqual([{ delaySeconds: 60 }]) + }) +}) diff --git a/package.json b/package.json index eda0bdc98..c38a02028 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "build:after": "bun run repair_sitemap && bun run seo:check", "ci:verify:web": "cd apps/web && bun run check && cd ../.. && bun run clean:build-cache && bun run build:web && bun run build:after", "ci:verify:docs": "bun run check:docs-mirror && cd apps/docs && bun run check && cd ../.. && bun run build:docs", - "ci:verify:translation": "cd apps/translation-worker && bun run check", + "ci:verify:translation": "cd apps/translation-worker && bun run check && bun run test", "check:docs-mirror": "bun run scripts/check-plugin-doc-mirrors.ts", "check": "bun run check:docs-mirror && cd apps/web && bun run check && cd ../docs && bun run check && cd ../translation-worker && bun run check", "seo:check": "seo-checker --output github",