Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/translation-worker/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"type": "module",
"scripts": {
"check": "tsc --noEmit",
"test": "bun test test",
"dev": "wrangler dev -c wrangler.jsonc -c ../web/wrangler.jsonc -c ../docs/wrangler.jsonc",
"deploy": "wrangler deploy"
}
Expand Down
104 changes: 103 additions & 1 deletion apps/translation-worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ const MAX_BATCH_CHARS = 1_500
const MAX_BATCH_ITEMS = 32
const TRANSLATION_BATCHES_PER_QUEUE_JOB = 1
const TRANSLATION_MODEL_ATTEMPTS = 3
const TRANSLATION_SINGLE_TEXT_ATTEMPTS = 2
const TRANSLATION_QUEUE_RETRY_DELAY_SECONDS = 60
const AI_OUTPUT_PREVIEW_CHARS = 240

Expand Down Expand Up @@ -902,6 +903,41 @@ function aiPayloadPreview(value: unknown): string {
return text.replace(/\s+/g, ' ').trim().slice(0, AI_OUTPUT_PREVIEW_CHARS)
}

function unquotePlainTranslation(value: string): string {
const trimmed = stripJsonFence(value).trim()
if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
const parsed = parseJsonValue(trimmed)
if (typeof parsed === 'string') return parsed.trim()
}
return trimmed
}

function plainTranslationFromUnknown(value: unknown, depth = 0): string | null {
if (depth > 3) return null
if (typeof value === 'string') {
const trimmed = stripJsonFence(value).trim()
const parsed = parseJsonValue(trimmed)
if (parsed !== null && typeof parsed !== 'string') {
const nested = plainTranslationFromUnknown(parsed, depth + 1)
if (nested) return nested
}
return unquotePlainTranslation(trimmed)
}

const array = stringArrayFromUnknown(value)
if (array?.length === 1) return unquotePlainTranslation(array[0])

const record = recordOf(value)
if (!record) return null

for (const key of ['translation', 'translated', 'translatedText', 'text', 'response', 'result', 'output', 'data']) {
const nested = plainTranslationFromUnknown(record[key], depth + 1)
if (nested) return nested
}

return null
}

function normalizedTranslationValue(value: string): string {
return value.trim().replace(/\s+/g, ' ')
}
Expand Down Expand Up @@ -989,7 +1025,69 @@ async function translateBatch(env: Env, targetLanguage: string, batch: string[])
})
}

throw new Error(`Translation model failed after ${TRANSLATION_MODEL_ATTEMPTS} attempts for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`)
console.warn('Translation batch JSON failed; falling back to single-text translation', {
targetLanguage,
batchSize: batch.length,
error: lastError?.message ?? 'unknown error',
})
return await translateBatchIndividually(env, targetLanguage, batch)
}

async function translateSingleText(env: Env, targetLanguage: string, text: string): Promise<string> {
const model = env.TRANSLATION_MODEL || DEFAULT_MODEL
let lastError: Error | null = null

for (let attempt = 1; attempt <= TRANSLATION_SINGLE_TEXT_ATTEMPTS; attempt += 1) {
let payload: unknown = ''
try {
const result = await env.AI.run(model, {
temperature: 0,
max_tokens: Math.min(2048, Math.max(256, text.length * 3 + 128)),
messages: [
{
role: 'system',
content: [
'You translate one Capgo website string for the target locale.',
'Translate naturally for the user cultural context; do not translate word for word.',
'Preserve brand names, product names, developer terms, URLs, code identifiers, file paths, package names, language codes, numbers, punctuation, and whitespace meaning.',
'Preserve terms like Capgo, Capacitor, code, API, SDK, CLI, npm, bun, GitHub, and Cloudflare when they are names or technical terms.',
'Return only the translated text. Do not return JSON, Markdown, labels, explanations, quotes around the whole answer, or extra lines.',
].join(' '),
},
{
role: 'user',
content: JSON.stringify({ targetLanguage, text }),
},
],
})

payload = extractAiPayload(result)
const translated = plainTranslationFromUnknown(payload)
if (translated) return translated
lastError = new Error(`Translation model returned empty text for ${targetLanguage}`)
} catch (error) {
lastError = error instanceof Error ? error : new Error(errorMessage(error))
}

console.warn('Single-text translation response rejected', {
targetLanguage,
attempt,
maxAttempts: TRANSLATION_SINGLE_TEXT_ATTEMPTS,
error: lastError.message,
outputPreview: aiPayloadPreview(payload),
})
}

throw new Error(`Single-text translation failed for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`)
}

async function translateBatchIndividually(env: Env, targetLanguage: string, batch: string[]): Promise<string[]> {
const translated: string[] = []
for (const text of batch) {
translated.push(await translateSingleText(env, targetLanguage, text))
}
assertTranslatedBatch(targetLanguage, batch, translated)
return translated
}

function logPathnameFromUrl(value: string): string {
Expand Down Expand Up @@ -1686,6 +1784,10 @@ async function serveTranslated(request: Request, env: Env, requestUrl: URL, loca
return temporaryEnglishRedirectResponse(requestUrl, isHead)
}

export const __translationWorkerTest = {
TRANSLATION_CACHE_VERSION,
}

export default {
async fetch(request: Request, env: Env): Promise<Response> {
const requestUrl = new URL(request.url)
Expand Down
174 changes: 174 additions & 0 deletions apps/translation-worker/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
import worker, { __translationWorkerTest } from '../src/index'

type AiInput = {
messages: { role: 'system' | 'user'; content: string }[]
temperature?: number
max_tokens?: number
}

type AiHandler = (model: string, input: AiInput) => Promise<unknown> | unknown

Check warning on line 10 in apps/translation-worker/test/index.test.ts

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

'unknown' overrides all other types in this union type.

See more on https://sonarcloud.io/project/issues?id=Cap-go_website&issues=AZ3lIaJYoylcDvyM1OLn&open=AZ3lIaJYoylcDvyM1OLn&pullRequest=618

class MemoryCache {
readonly store = new Map<string, Response>()

key(input: RequestInfo | URL): string {
const request = input instanceof Request ? input : new Request(input)
return `${request.method}:${request.url}`
}

async match(input: RequestInfo | URL): Promise<Response | undefined> {
return this.store.get(this.key(input))?.clone()
}

async put(input: RequestInfo | URL, response: Response): Promise<void> {
this.store.set(this.key(input), response.clone())
}

async delete(input: RequestInfo | URL): Promise<boolean> {
return this.store.delete(this.key(input))
}
}

class MemoryR2 {
readonly store = new Map<string, string>()

async get(key: string): Promise<{ text(): Promise<string> } | null> {
const value = this.store.get(key)
return value === undefined ? null : { text: async () => value }
}

async put(key: string, value: string): Promise<void> {
this.store.set(key, value)
}

async delete(key: string): Promise<void> {
this.store.delete(key)
}
}

function lastUserPayload(input: AiInput): Record<string, unknown> {
return JSON.parse(input.messages[input.messages.length - 1]?.content ?? '{}') as Record<string, unknown>

Check warning on line 51 in apps/translation-worker/test/index.test.ts

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Prefer `.at(…)` over `[….length - index]`.

See more on https://sonarcloud.io/project/issues?id=Cap-go_website&issues=AZ3lIaJYoylcDvyM1OLo&open=AZ3lIaJYoylcDvyM1OLo&pullRequest=618
}

function createEnv(aiHandler: AiHandler, html = '<html lang="en"><body><h1>Ship updates instantly</h1><a href="/pricing">Pricing</a></body></html>') {
const calls: AiInput[] = []
const queueMessages: unknown[] = []
const r2 = new MemoryR2()
const origin = {
fetch: async () =>
new Response(html, {
headers: {
'Content-Type': 'text/html; charset=utf-8',
},
}),
}

return {
calls,
r2,
queueMessages,
env: {
TRANSLATION_MODEL: 'test-model',
AI: {
run: async (model: string, input: AiInput) => {
calls.push(input)
return await aiHandler(model, input)
},
},
WEB: origin,
DOCS: origin,
TRANSLATION_QUEUE: {
send: async (message: unknown) => {
queueMessages.push(message)
},
},
TRANSLATION_STORE: r2,
},
}
}

const originalWarn = console.warn
const originalError = console.error
const originalCaches = (globalThis as typeof globalThis & { caches?: unknown }).caches

beforeEach(() => {
;(globalThis as typeof globalThis & { caches: { default: MemoryCache } }).caches = { default: new MemoryCache() }
console.warn = () => {}
console.error = () => {}
})

afterEach(() => {
;(globalThis as typeof globalThis & { caches?: unknown }).caches = originalCaches
console.warn = originalWarn
console.error = originalError
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.

describe('translation worker queue', () => {
test('falls back to single-text translation when the model never returns batch JSON', async () => {
const { calls, env } = createEnv((_model, input) => {
const payload = lastUserPayload(input)
if (Array.isArray(payload.texts)) return 'this is not json'
return `JA:${payload.text}`
})

await worker.queue(
{
messages: [
{
body: {
url: 'https://capgo.app/ja/',
locale: 'ja',
cacheVersion: __translationWorkerTest.TRANSLATION_CACHE_VERSION,
reason: 'miss',
},
},
],
},
env,
)

expect(calls.some((input) => Array.isArray(lastUserPayload(input).texts))).toBe(true)
expect(calls.some((input) => typeof lastUserPayload(input).text === 'string')).toBe(true)

const response = await worker.fetch(new Request('https://capgo.app/ja/'), env)
expect(response.status).toBe(200)
expect(response.headers.get('X-Capgo-Translation-Cache')).toBe('HIT')

const body = await response.text()
expect(body).toContain('lang="ja"')
expect(body).toContain('JA:Ship updates instantly')
expect(body).toContain('JA:Pricing')
expect(body).not.toContain('this is not json')
})

test('uses queue retry without throwing when both batch and single-text translation fail', async () => {
const { env } = createEnv(() => {
throw new Error('AI unavailable')
})
const retryCalls: unknown[] = []

await worker.queue(
{
messages: [
{
id: 'message-1',
attempts: 1,
body: {
url: 'https://capgo.app/ko/?session_id=secret',
locale: 'ko',
cacheVersion: __translationWorkerTest.TRANSLATION_CACHE_VERSION,
reason: 'miss',
},
retry: (options?: unknown) => {
retryCalls.push(options)
},
},
],
},
env,
)

expect(retryCalls).toEqual([{ delaySeconds: 60 }])
})
})
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"build:after": "bun run repair_sitemap && bun run seo:check",
"ci:verify:web": "cd apps/web && bun run check && cd ../.. && bun run clean:build-cache && bun run build:web && bun run build:after",
"ci:verify:docs": "bun run check:docs-mirror && cd apps/docs && bun run check && cd ../.. && bun run build:docs",
"ci:verify:translation": "cd apps/translation-worker && bun run check",
"ci:verify:translation": "cd apps/translation-worker && bun run check && bun run test",
"check:docs-mirror": "bun run scripts/check-plugin-doc-mirrors.ts",
"check": "bun run check:docs-mirror && cd apps/web && bun run check && cd ../docs && bun run check && cd ../translation-worker && bun run check",
"seo:check": "seo-checker --output github",
Expand Down
Loading