From 33ce2ce7808dbd7483895cf964635091b11849b7 Mon Sep 17 00:00:00 2001 From: Martin Donadieu Date: Fri, 1 May 2026 21:21:55 +0200 Subject: [PATCH 1/2] Harden translation model retries --- apps/translation-worker/src/index.ts | 165 +++++++++++++++++++++------ 1 file changed, 127 insertions(+), 38 deletions(-) diff --git a/apps/translation-worker/src/index.ts b/apps/translation-worker/src/index.ts index dd4c9dec5..6f6ac1210 100644 --- a/apps/translation-worker/src/index.ts +++ b/apps/translation-worker/src/index.ts @@ -29,12 +29,18 @@ type QueueBinding = { send(message: T): Promise } +type QueueRetryOptions = { + delaySeconds?: number +} + type QueueMessage = { - body: T + readonly body: T + readonly attempts?: number + retry?: (options?: QueueRetryOptions) => void } type MessageBatch = { - messages: QueueMessage[] + readonly messages: readonly QueueMessage[] } type R2ObjectBody = { @@ -113,6 +119,9 @@ const MAX_HTML_BYTES = 1_500_000 const MAX_BATCH_CHARS = 1_500 const MAX_BATCH_ITEMS = 32 const TRANSLATION_BATCHES_PER_QUEUE_JOB = 1 +const TRANSLATION_MODEL_ATTEMPTS = 3 +const TRANSLATION_QUEUE_RETRY_DELAY_SECONDS = 60 +const AI_OUTPUT_PREVIEW_CHARS = 240 const LANGUAGE_NAMES: Record = { de: 'German', @@ -783,14 +792,19 @@ function extractChoicesText(choices: unknown): string { return '' } -function extractAiText(result: unknown): string { - if (typeof result === 'string') return result +function extractAiPayload(result: unknown): unknown { + if (typeof result === 'string' || Array.isArray(result)) return result const record = recordOf(result) if (!record) return '' - for (const key of ['response', 'text', 'result']) { - if (typeof record[key] === 'string') return record[key] as string + for (const key of ['response', 'text', 'result', 'output']) { + const value = record[key] + if (Array.isArray(value)) { + const text = extractContentText(value) + return text || value + } + if (typeof value === 'string' || Array.isArray(value) || recordOf(value)) return value } return extractChoicesText(record.choices) @@ -833,21 +847,58 @@ function extractJsonArray(value: string): string | null { return value.slice(start, end + 1) } -function parseTranslationArray(rawText: string): string[] | null { - const trimmed = stripJsonFence(rawText) +function parseJsonValue(value: string): unknown { try { - const parsed = JSON.parse(trimmed) - return Array.isArray(parsed) && parsed.every((item) => typeof item === 'string') ? parsed : null + return JSON.parse(value) } catch { - const jsonArray = extractJsonArray(trimmed) - if (!jsonArray) return null + return null + } +} + +function stringArrayFromUnknown(value: unknown): string[] | null { + return Array.isArray(value) && value.every((item) => typeof item === 'string') ? value : null +} + +function translationArrayFromUnknown(value: unknown): string[] | null { + const directArray = stringArrayFromUnknown(value) + if (directArray) return directArray + + const record = recordOf(value) + if (!record) return null + + for (const key of ['translations', 'translated', 'translatedTexts', 'texts', 'items', 'result', 'response', 'data']) { + const nestedArray = stringArrayFromUnknown(record[key]) + if (nestedArray) return nestedArray + } + + return null +} + +function parseTranslationArray(rawValue: unknown): string[] | null { + const directArray = translationArrayFromUnknown(rawValue) + if (directArray) return directArray + if (typeof rawValue !== 'string') return null + + const trimmed = stripJsonFence(rawValue) + const parsed = translationArrayFromUnknown(parseJsonValue(trimmed)) + if (parsed) return parsed + + const jsonArray = extractJsonArray(trimmed) + return jsonArray ? translationArrayFromUnknown(parseJsonValue(jsonArray)) : null +} + +function aiPayloadPreview(value: unknown): string { + let text = '' + if (typeof value === 'string') { + text = value + } else { try { - const parsed = JSON.parse(jsonArray) - return Array.isArray(parsed) && parsed.every((item) => typeof item === 'string') ? parsed : null + text = JSON.stringify(value) ?? '' } catch { - return null + text = String(value) } } + return text.replace(/\s+/g, ' ').trim().slice(0, AI_OUTPUT_PREVIEW_CHARS) } function normalizedTranslationValue(value: string): string { @@ -884,30 +935,59 @@ function assertTranslatedBatch(targetLanguage: string, batch: string[], translat async function translateBatch(env: Env, targetLanguage: string, batch: string[]): Promise { const model = env.TRANSLATION_MODEL || DEFAULT_MODEL - const result = await env.AI.run(model, { - temperature: 0, - max_tokens: 8192, - messages: [ - { - role: 'system', - content: - 'You translate website copy. Translate every human-readable label, heading, sentence, and paragraph into the target language, including short navigation labels. Return only a JSON array of strings with the same length and order as the input. Preserve only brand names, product names, URLs, code identifiers, file paths, language codes, numbers, punctuation, and whitespace meaning. Do not leave English copy unchanged unless it is one of those preserved terms. Do not add explanations.', - }, - { - role: 'user', - content: JSON.stringify({ targetLanguage, texts: batch }), - }, - ], - }) + let lastError: Error | null = null + + for (let attempt = 1; attempt <= TRANSLATION_MODEL_ATTEMPTS; attempt += 1) { + const result = await env.AI.run(model, { + temperature: 0, + max_tokens: 8192, + messages: [ + { + role: 'system', + content: [ + 'You translate Capgo website copy for the target locale.', + 'Translate naturally for the user cultural context; do not translate word for word.', + 'Translate every human-readable label, heading, sentence, and paragraph into the target language, including short navigation labels.', + 'Preserve brand names, product names, developer terms, URLs, code identifiers, file paths, package names, language codes, numbers, punctuation, and whitespace meaning. Preserve terms like Capgo, Capacitor, code, API, SDK, CLI, npm, bun, GitHub, and Cloudflare when they are names or technical terms.', + `Return only valid JSON: one JSON array of exactly ${batch.length} strings, in the same order as the input. Do not return Markdown, an object, keys, comments, or explanations. Escape quotes and newlines inside JSON strings.`, + attempt > 1 ? 'Your previous response was rejected. Fix the format and return only the JSON array.' : '', + ] + .filter(Boolean) + .join(' '), + }, + { + role: 'user', + content: JSON.stringify({ targetLanguage, texts: batch }), + }, + ], + }) + + const payload = extractAiPayload(result) + const translated = parseTranslationArray(payload) + if (!translated) { + lastError = new Error(`Translation model returned invalid JSON for ${targetLanguage}`) + } else if (translated.length !== batch.length) { + lastError = new Error(`Translation model returned ${translated.length} strings for ${batch.length} ${targetLanguage} strings`) + } else { + try { + assertTranslatedBatch(targetLanguage, batch, translated) + return translated + } catch (error) { + lastError = error instanceof Error ? error : new Error(errorMessage(error)) + } + } - const translated = parseTranslationArray(extractAiText(result)) - if (!translated) throw new Error(`Translation model returned invalid JSON for ${targetLanguage}`) - if (translated.length !== batch.length) { - throw new Error(`Translation model returned ${translated.length} strings for ${batch.length} ${targetLanguage} strings`) + console.warn('Translation model response rejected', { + targetLanguage, + attempt, + maxAttempts: TRANSLATION_MODEL_ATTEMPTS, + batchSize: batch.length, + error: lastError.message, + outputPreview: aiPayloadPreview(payload), + }) } - assertTranslatedBatch(targetLanguage, batch, translated) - return translated + throw new Error(`Translation model failed after ${TRANSLATION_MODEL_ATTEMPTS} attempts for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`) } function renderTranslatedHtml(parts: HtmlPart[], segments: Segment[], translations: string[]): string { @@ -1623,8 +1703,17 @@ export default { try { await processTranslationJob(message.body, env) } catch (error) { - console.error('Failed to process translated page queue job', { job: message.body, error: errorMessage(error) }) - throw error + console.error('Failed to process translated page queue job; retrying message', { + job: message.body, + attempts: message.attempts, + retryDelaySeconds: TRANSLATION_QUEUE_RETRY_DELAY_SECONDS, + error: errorMessage(error), + }) + if (typeof message.retry === 'function') { + message.retry({ delaySeconds: TRANSLATION_QUEUE_RETRY_DELAY_SECONDS }) + } else { + throw error + } } } }, From 28ddebc30e1a6a9b5e44689bca6fdc79739d8c26 Mon Sep 17 00:00:00 2001 From: Martin Donadieu Date: Fri, 1 May 2026 21:28:23 +0200 Subject: [PATCH 2/2] Address translation retry review --- apps/translation-worker/src/index.ts | 86 ++++++++++++++++------------ 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/apps/translation-worker/src/index.ts b/apps/translation-worker/src/index.ts index 6f6ac1210..01d5ff6f9 100644 --- a/apps/translation-worker/src/index.ts +++ b/apps/translation-worker/src/index.ts @@ -34,6 +34,7 @@ type QueueRetryOptions = { } type QueueMessage = { + readonly id?: string readonly body: T readonly attempts?: number retry?: (options?: QueueRetryOptions) => void @@ -866,7 +867,7 @@ function translationArrayFromUnknown(value: unknown): string[] | null { const record = recordOf(value) if (!record) return null - for (const key of ['translations', 'translated', 'translatedTexts', 'texts', 'items', 'result', 'response', 'data']) { + for (const key of ['translations', 'translated', 'translatedTexts', 'items', 'result', 'response', 'data']) { const nestedArray = stringArrayFromUnknown(record[key]) if (nestedArray) return nestedArray } @@ -938,43 +939,44 @@ async function translateBatch(env: Env, targetLanguage: string, batch: string[]) let lastError: Error | null = null for (let attempt = 1; attempt <= TRANSLATION_MODEL_ATTEMPTS; attempt += 1) { - const result = await env.AI.run(model, { - temperature: 0, - max_tokens: 8192, - messages: [ - { - role: 'system', - content: [ - 'You translate Capgo website copy for the target locale.', - 'Translate naturally for the user cultural context; do not translate word for word.', - 'Translate every human-readable label, heading, sentence, and paragraph into the target language, including short navigation labels.', - 'Preserve brand names, product names, developer terms, URLs, code identifiers, file paths, package names, language codes, numbers, punctuation, and whitespace meaning. Preserve terms like Capgo, Capacitor, code, API, SDK, CLI, npm, bun, GitHub, and Cloudflare when they are names or technical terms.', - `Return only valid JSON: one JSON array of exactly ${batch.length} strings, in the same order as the input. Do not return Markdown, an object, keys, comments, or explanations. Escape quotes and newlines inside JSON strings.`, - attempt > 1 ? 'Your previous response was rejected. Fix the format and return only the JSON array.' : '', - ] - .filter(Boolean) - .join(' '), - }, - { - role: 'user', - content: JSON.stringify({ targetLanguage, texts: batch }), - }, - ], - }) - - const payload = extractAiPayload(result) - const translated = parseTranslationArray(payload) - if (!translated) { - lastError = new Error(`Translation model returned invalid JSON for ${targetLanguage}`) - } else if (translated.length !== batch.length) { - lastError = new Error(`Translation model returned ${translated.length} strings for ${batch.length} ${targetLanguage} strings`) - } else { - try { + let payload: unknown = '' + try { + const result = await env.AI.run(model, { + temperature: 0, + max_tokens: 8192, + messages: [ + { + role: 'system', + content: [ + 'You translate Capgo website copy for the target locale.', + 'Translate naturally for the user cultural context; do not translate word for word.', + 'Translate every human-readable label, heading, sentence, and paragraph into the target language, including short navigation labels.', + 'Preserve brand names, product names, developer terms, URLs, code identifiers, file paths, package names, language codes, numbers, punctuation, and whitespace meaning. Preserve terms like Capgo, Capacitor, code, API, SDK, CLI, npm, bun, GitHub, and Cloudflare when they are names or technical terms.', + `Return only valid JSON: one JSON array of exactly ${batch.length} strings, in the same order as the input. Do not return Markdown, an object, keys, comments, or explanations. Escape quotes and newlines inside JSON strings.`, + attempt > 1 ? 'Your previous response was rejected. Fix the format and return only the JSON array.' : '', + ] + .filter(Boolean) + .join(' '), + }, + { + role: 'user', + content: JSON.stringify({ targetLanguage, texts: batch }), + }, + ], + }) + + payload = extractAiPayload(result) + const translated = parseTranslationArray(payload) + if (!translated) { + lastError = new Error(`Translation model returned invalid JSON for ${targetLanguage}`) + } else if (translated.length !== batch.length) { + lastError = new Error(`Translation model returned ${translated.length} strings for ${batch.length} ${targetLanguage} strings`) + } else { assertTranslatedBatch(targetLanguage, batch, translated) return translated - } catch (error) { - lastError = error instanceof Error ? error : new Error(errorMessage(error)) } + } catch (error) { + lastError = error instanceof Error ? error : new Error(errorMessage(error)) } console.warn('Translation model response rejected', { @@ -990,6 +992,14 @@ async function translateBatch(env: Env, targetLanguage: string, batch: string[]) throw new Error(`Translation model failed after ${TRANSLATION_MODEL_ATTEMPTS} attempts for ${targetLanguage}: ${lastError?.message ?? 'unknown error'}`) } +function logPathnameFromUrl(value: string): string { + try { + return new URL(value).pathname + } catch { + return '' + } +} + function renderTranslatedHtml(parts: HtmlPart[], segments: Segment[], translations: string[]): string { return parts .map((part) => { @@ -1704,7 +1714,11 @@ export default { await processTranslationJob(message.body, env) } catch (error) { console.error('Failed to process translated page queue job; retrying message', { - job: message.body, + messageId: message.id, + pathname: logPathnameFromUrl(message.body.url), + locale: message.body.locale, + reason: message.body.reason, + cacheVersion: message.body.cacheVersion, attempts: message.attempts, retryDelaySeconds: TRANSLATION_QUEUE_RETRY_DELAY_SECONDS, error: errorMessage(error),