From fec49489a5451bae55a8459ca051b6350abbe15e Mon Sep 17 00:00:00 2001 From: ramsani <97571563+ramsani@users.noreply.github.com> Date: Sat, 16 May 2026 16:27:07 -0700 Subject: [PATCH 1/3] feat(i18n): add i18n-detect.py for language detection and updates --- scripts/i18n-detect.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 scripts/i18n-detect.py diff --git a/scripts/i18n-detect.py b/scripts/i18n-detect.py new file mode 100644 index 0000000..be1aa48 --- /dev/null +++ b/scripts/i18n-detect.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Detect language from text using character frequency analysis.""" + +from __future__ import annotations +import re + +SPANISH_MARKERS = {"á","é","í","ó","ú","ñ","ü","¿","¡","los","las","del","que","es","en","con","para","por","su","una"} +ENGLISH_MARKERS = {"the","and","of","to","is","in","it","you","that","was","for","are","as","be","this","have","from"} + +def detect_language(text: str) -> str: + """Detect language from text. Returns 'en' or 'es'.""" + if not text: + return "en" + text_lower = text.lower() + words = set(re.findall(r"[a-záéíóúñü]+", text_lower)) + if not words: + return "en" + es_score = len(words & SPANISH_MARKERS) + en_score = len(words & ENGLISH_MARKERS) + return "es" if es_score > en_score else "en" + +def update_html_lang(html_path, lang: str) -> bool: + """Update attribute in an HTML file.""" + import re + html = html_path.read_text(encoding="utf-8") + if re.search(r']*lang=["\']' + lang + r'["\']', html, re.IGNORECASE): + return False # already correct + html = re.sub(r']*)lang=["\'][^"\']+["\']', f' Date: Sat, 16 May 2026 16:27:11 -0700 Subject: [PATCH 2/3] feat(i18n): add --lang flag to deliver-artifact.py --- scripts/deliver-artifact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/deliver-artifact.py b/scripts/deliver-artifact.py index 36ebab8..08cd65f 100755 --- a/scripts/deliver-artifact.py +++ b/scripts/deliver-artifact.py @@ -123,4 +123,4 @@ def main() -> int: if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(main()) \ No newline at end of file From 326d942fae7c3197f996d12e400c3a92b13544c1 Mon Sep 17 00:00:00 2001 From: ramsani <97571563+ramsani@users.noreply.github.com> Date: Sat, 16 May 2026 16:27:13 -0700 Subject: [PATCH 3/3] feat(i18n): add test_i18n.py validating language detection and phrases --- tests/test_i18n.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_i18n.py diff --git a/tests/test_i18n.py b/tests/test_i18n.py new file mode 100644 index 0000000..2e8e004 --- /dev/null +++ b/tests/test_i18n.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +"""Test i18n language detection and --lang flag.""" +import subprocess, sys, json +from pathlib import Path + +def test_detect_english(): + r = subprocess.run([sys.executable, "-c", "from scripts.i18n_detect import detect_language; print(detect_language('The architecture consists of three main services'))"], capture_output=True, text=True, cwd="/tmp/html-explainer-clean") + assert r.returncode == 0 and r.stdout.strip() == "en" + print("PASS: English text detected as 'en'") + +def test_detect_spanish(): + r = subprocess.run([sys.executable, "-c", "from scripts.i18n_detect import detect_language; print(detect_language('La arquitectura consiste en tres servicios principales'))"], capture_output=True, text=True, cwd="/tmp/html-explainer-clean") + assert r.returncode == 0 and r.stdout.strip() == "es" + print("PASS: Spanish text detected as 'es'") + +def test_phrases_coverage(): + phrases = json.load(open("/tmp/html-explainer-clean/scripts/i18n-phrases.json")) + assert len(phrases["en"]) >= 20, f"Expected 20+ phrases, got {len(phrases["en"])}" + assert len(phrases["es"]) >= 20 + assert set(phrases["en"].keys()) == set(phrases["es"].keys()), "ES and EN must have same keys" + print(f"PASS: i18n-phrases.json has {len(phrases["en"])} phrases in both EN and ES") + +if __name__ == "__main__": + test_detect_english() + test_detect_spanish() + test_phrases_coverage() + print("\nAll i18n tests passed.") \ No newline at end of file