diff --git a/scripts/deliver-artifact.py b/scripts/deliver-artifact.py index 36ebab8..08cd65f 100755 --- a/scripts/deliver-artifact.py +++ b/scripts/deliver-artifact.py @@ -123,4 +123,4 @@ def main() -> int: if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(main()) \ No newline at end of file diff --git a/scripts/i18n-detect.py b/scripts/i18n-detect.py new file mode 100644 index 0000000..be1aa48 --- /dev/null +++ b/scripts/i18n-detect.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Detect language from text using character frequency analysis.""" + +from __future__ import annotations +import re + +SPANISH_MARKERS = {"á","é","í","ó","ú","ñ","ü","¿","¡","los","las","del","que","es","en","con","para","por","su","una"} +ENGLISH_MARKERS = {"the","and","of","to","is","in","it","you","that","was","for","are","as","be","this","have","from"} + +def detect_language(text: str) -> str: + """Detect language from text. Returns 'en' or 'es'.""" + if not text: + return "en" + text_lower = text.lower() + words = set(re.findall(r"[a-záéíóúñü]+", text_lower)) + if not words: + return "en" + es_score = len(words & SPANISH_MARKERS) + en_score = len(words & ENGLISH_MARKERS) + return "es" if es_score > en_score else "en" + +def update_html_lang(html_path, lang: str) -> bool: + """Update attribute in an HTML file.""" + import re + html = html_path.read_text(encoding="utf-8") + if re.search(r']*lang=["\']' + lang + r'["\']', html, re.IGNORECASE): + return False # already correct + html = re.sub(r']*)lang=["\'][^"\']+["\']', f'= 20, f"Expected 20+ phrases, got {len(phrases["en"])}" + assert len(phrases["es"]) >= 20 + assert set(phrases["en"].keys()) == set(phrases["es"].keys()), "ES and EN must have same keys" + print(f"PASS: i18n-phrases.json has {len(phrases["en"])} phrases in both EN and ES") + +if __name__ == "__main__": + test_detect_english() + test_detect_spanish() + test_phrases_coverage() + print("\nAll i18n tests passed.") \ No newline at end of file