diff --git a/.github/workflows/l10n.yml b/.github/workflows/l10n.yml index f5f1871f7e2..1d244c6fba5 100644 --- a/.github/workflows/l10n.yml +++ b/.github/workflows/l10n.yml @@ -1245,10 +1245,138 @@ jobs: exit 1 fi + l10n_locale_embedding_cargo_install: + name: L10n/Locale Embedding - Cargo Install + runs-on: ubuntu-latest + env: + SCCACHE_GHA_ENABLED: "true" + RUSTC_WRAPPER: "sccache" + steps: + - uses: actions/checkout@v5 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + with: + key: cargo-install-locale-embedding + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + - name: Install prerequisites + run: | + sudo apt-get -y update + sudo apt-get -y install libselinux1-dev locales + # Generate French locale for testing + sudo locale-gen --keep-existing fr_FR.UTF-8 + locale -a | grep -i fr || exit 1 + + - name: Test English locale embedding (default) + run: | + export LANG=en_US.UTF-8 + export LC_ALL=en_US.UTF-8 + + echo "Building uu_yes with LANG=$LANG" + cargo build --package uu_yes --release + + # Find the generated embedded_locales.rs + locale_file=$(find target/release/build -path "*/uu_yes-*/out/embedded_locales.rs" -o -path "*/uucore-*/out/embedded_locales.rs" | head -1) + if [ -z "$locale_file" ]; then + echo "ERROR: Could not find embedded_locales.rs" + exit 1 + fi + + echo "Found embedded_locales.rs at: $locale_file" + echo "Checking embedded locales..." + + # Should contain en-US + if grep -q 'yes/en-US\.ftl' "$locale_file" || grep -q 'uucore/en-US\.ftl' "$locale_file"; then + echo "✓ Found en-US locale (fallback)" + else + echo "✗ ERROR: en-US locale not found" + exit 1 + fi + + # Should NOT contain fr-FR when building with en_US.UTF-8 + if grep -q 'yes/fr-FR\.ftl' "$locale_file" || grep -q 'uucore/fr-FR\.ftl' "$locale_file"; then + echo "✗ ERROR: Unexpectedly found fr-FR locale when LANG=en_US.UTF-8" + exit 1 + else + echo "✓ Correctly omitted fr-FR locale" + fi + + echo "✓ SUCCESS: English locale embedding working correctly" + + - name: Test French locale embedding (system locale) + run: | + export LANG=fr_FR.UTF-8 + export LC_ALL=fr_FR.UTF-8 + + # Clean previous build to ensure fresh compile + cargo clean -p uu_yes + cargo clean -p uucore + + echo "Building uu_yes with LANG=$LANG" + cargo build --package uu_yes --release + + # Find the generated embedded_locales.rs + locale_file=$(find target/release/build -path "*/uu_yes-*/out/embedded_locales.rs" -o -path "*/uucore-*/out/embedded_locales.rs" | head -1) + if [ -z "$locale_file" ]; then + echo "ERROR: Could not find embedded_locales.rs" + exit 1 + fi + + echo "Found embedded_locales.rs at: $locale_file" + echo "Checking embedded locales..." + + # Should contain en-US (fallback) + if grep -q 'yes/en-US\.ftl' "$locale_file" || grep -q 'uucore/en-US\.ftl' "$locale_file"; then + echo "✓ Found en-US locale (fallback)" + else + echo "✗ ERROR: en-US locale not found" + exit 1 + fi + + # Should contain fr-FR when building with fr_FR.UTF-8 + if grep -q 'yes/fr-FR\.ftl' "$locale_file" || grep -q 'uucore/fr-FR\.ftl' "$locale_file"; then + echo "✓ Found fr-FR locale (system locale from LANG)" + else + echo "Note: fr-FR locale not found - this is expected if French translation doesn't exist yet" + echo "::notice::French locale for 'yes' utility may not be available" + fi + + echo "✓ SUCCESS: System locale detection working correctly" + + - name: Test locale count is reasonable + run: | + export LANG=fr_FR.UTF-8 + cargo clean -p uu_yes + cargo build --package uu_yes --release + + locale_file=$(find target/release/build -path "*/uucore-*/out/embedded_locales.rs" | head -1) + if [ -z "$locale_file" ]; then + echo "ERROR: Could not find uucore embedded_locales.rs" + exit 1 + fi + + # Count embedded locales (should be en-US + system locale, not all locales) + locale_count=$(grep -c '/en-US\.ftl\|/fr-FR\.ftl' "$locale_file" || echo "0") + echo "uu_yes has $locale_count embedded locale entries for yes utility" + + # For a single utility build, should have minimal locales (en-US + optionally system locale) + # Not the full multicall set + total_match_count=$(grep -c '=> Some(r###' "$locale_file" || echo "0") + echo "Total embedded entries: $total_match_count" + + if [ "$total_match_count" -le 10 ]; then + echo "✓ SUCCESS: Locale embedding is targeted ($total_match_count entries)" + else + echo "::warning::More locales than expected ($total_match_count entries)" + echo "This might be expected for utility + uucore locales" + fi + l10n_locale_embedding_regression_test: name: L10n/Locale Embedding Regression Test runs-on: ubuntu-latest - needs: [l10n_locale_embedding_cat, l10n_locale_embedding_ls, l10n_locale_embedding_multicall] + needs: [l10n_locale_embedding_cat, l10n_locale_embedding_ls, l10n_locale_embedding_multicall, l10n_locale_embedding_cargo_install] steps: - name: All locale embedding tests passed run: echo "✓ All locale embedding tests passed successfully" diff --git a/src/uucore/build.rs b/src/uucore/build.rs index d5637ef3f55..f79b3922b7b 100644 --- a/src/uucore/build.rs +++ b/src/uucore/build.rs @@ -31,15 +31,21 @@ pub fn main() -> Result<(), Box> { // Try to detect if we're building for a specific utility by checking build configuration // This attempts to identify individual utility builds vs multicall binary builds let target_utility = detect_target_utility(); + let locales_to_embed = get_locales_to_embed(); match target_utility { Some(util_name) => { // Embed only the specific utility's locale (cat.ftl for cat for example) - embed_single_utility_locale(&mut embedded_file, &project_root()?, &util_name)?; + embed_single_utility_locale( + &mut embedded_file, + &project_root()?, + &util_name, + &locales_to_embed, + )?; } None => { // Embed all utility locales (multicall binary or fallback) - embed_all_utility_locales(&mut embedded_file, &project_root()?)?; + embed_all_utility_locales(&mut embedded_file, &project_root()?, &locales_to_embed)?; } } @@ -118,38 +124,20 @@ fn embed_single_utility_locale( embedded_file: &mut std::fs::File, project_root: &Path, util_name: &str, + locales_to_embed: &(String, Option), ) -> Result<(), Box> { - use std::fs; - - // Embed the specific utility's locale - let locale_path = project_root - .join("src/uu") - .join(util_name) - .join("locales/en-US.ftl"); - - if locale_path.exists() { - let content = fs::read_to_string(&locale_path)?; - writeln!(embedded_file, " // Locale for {util_name}")?; - writeln!( - embedded_file, - " \"{util_name}/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - - // Tell Cargo to rerun if this file changes - println!("cargo:rerun-if-changed={}", locale_path.display()); - } + // Embed utility-specific locales + embed_component_locales(embedded_file, locales_to_embed, util_name, |locale| { + project_root + .join("src/uu") + .join(util_name) + .join(format!("locales/{locale}.ftl")) + })?; // Always embed uucore locale file if it exists - let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl"); - if uucore_locale_path.exists() { - let content = fs::read_to_string(&uucore_locale_path)?; - writeln!(embedded_file, " // Common uucore locale")?; - writeln!( - embedded_file, - " \"uucore/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - println!("cargo:rerun-if-changed={}", uucore_locale_path.display()); - } + embed_component_locales(embedded_file, locales_to_embed, "uucore", |locale| { + project_root.join(format!("src/uucore/locales/{locale}.ftl")) + })?; Ok(()) } @@ -158,6 +146,7 @@ fn embed_single_utility_locale( fn embed_all_utility_locales( embedded_file: &mut std::fs::File, project_root: &Path, + locales_to_embed: &(String, Option), ) -> Result<(), Box> { use std::fs; @@ -166,7 +155,7 @@ fn embed_all_utility_locales( if !src_uu_dir.exists() { // When src/uu doesn't exist (e.g., standalone uucore from crates.io), // embed a static list of utility locales that are commonly used - embed_static_utility_locales(embedded_file)?; + embed_static_utility_locales(embedded_file, locales_to_embed)?; return Ok(()); } @@ -183,31 +172,17 @@ fn embed_all_utility_locales( // Embed locale files for each utility for util_name in &util_dirs { - let locale_path = src_uu_dir.join(util_name).join("locales/en-US.ftl"); - if locale_path.exists() { - let content = fs::read_to_string(&locale_path)?; - writeln!(embedded_file, " // Locale for {util_name}")?; - writeln!( - embedded_file, - " \"{util_name}/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - - // Tell Cargo to rerun if this file changes - println!("cargo:rerun-if-changed={}", locale_path.display()); - } + embed_component_locales(embedded_file, locales_to_embed, util_name, |locale| { + src_uu_dir + .join(util_name) + .join(format!("locales/{locale}.ftl")) + })?; } // Also embed uucore locale file if it exists - let uucore_locale_path = project_root.join("src/uucore/locales/en-US.ftl"); - if uucore_locale_path.exists() { - let content = fs::read_to_string(&uucore_locale_path)?; - writeln!(embedded_file, " // Common uucore locale")?; - writeln!( - embedded_file, - " \"uucore/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - println!("cargo:rerun-if-changed={}", uucore_locale_path.display()); - } + embed_component_locales(embedded_file, locales_to_embed, "uucore", |locale| { + project_root.join(format!("src/uucore/locales/{locale}.ftl")) + })?; embedded_file.flush()?; Ok(()) @@ -215,6 +190,7 @@ fn embed_all_utility_locales( fn embed_static_utility_locales( embedded_file: &mut std::fs::File, + locales_to_embed: &(String, Option), ) -> Result<(), Box> { use std::env; @@ -229,15 +205,9 @@ fn embed_static_utility_locales( }; // First, try to embed uucore locales - critical for common translations like "Usage:" - let uucore_locale_file = Path::new(&manifest_dir).join("locales/en-US.ftl"); - if uucore_locale_file.is_file() { - let content = std::fs::read_to_string(&uucore_locale_file)?; - writeln!(embedded_file, " // Common uucore locale")?; - writeln!( - embedded_file, - " \"uucore/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - } + embed_component_locales(embedded_file, locales_to_embed, "uucore", |locale| { + Path::new(&manifest_dir).join(format!("locales/{locale}.ftl")) + })?; // Collect and sort for deterministic builds let mut entries: Vec<_> = std::fs::read_dir(registry_dir)? @@ -251,15 +221,12 @@ fn embed_static_utility_locales( // Match uu_- if let Some((util_part, _)) = dir_name.split_once('-') { if let Some(util_name) = util_part.strip_prefix("uu_") { - let locale_file = entry.path().join("locales/en-US.ftl"); - if locale_file.is_file() { - let content = std::fs::read_to_string(&locale_file)?; - writeln!(embedded_file, " // Locale for {util_name}")?; - writeln!( - embedded_file, - " \"{util_name}/en-US.ftl\" => Some(r###\"{content}\"###)," - )?; - } + embed_component_locales( + embedded_file, + locales_to_embed, + util_name, + |locale| entry.path().join(format!("locales/{locale}.ftl")), + )?; } } } @@ -267,3 +234,251 @@ fn embed_static_utility_locales( Ok(()) } + +/// Determines which locales to embed into the binary. +/// +/// To support localized messages in installed binaries (e.g., via `cargo install`), +/// this function identifies the user's current locale from the `LANG` environment +/// variable. +/// +/// It always includes "en-US" to ensure that a fallback is available if the +/// system locale's translation file is missing or if `LANG` is not set. +fn get_locales_to_embed() -> (String, Option) { + let system_locale = env::var("LANG").ok().and_then(|lang| { + let locale = lang.split('.').next()?.replace('_', "-"); + if locale != "en-US" && !locale.is_empty() { + Some(locale) + } else { + None + } + }); + ("en-US".to_string(), system_locale) +} + +/// Helper function to iterate over the locales to embed. +fn for_each_locale( + locales: &(String, Option), + mut f: F, +) -> Result<(), Box> +where + F: FnMut(&str) -> Result<(), Box>, +{ + f(&locales.0)?; + if let Some(ref system_locale) = locales.1 { + f(system_locale)?; + } + Ok(()) +} + +/// Helper function to embed a single locale file. +fn embed_locale_file( + embedded_file: &mut std::fs::File, + locale_path: &Path, + locale_key: &str, + locale: &str, + component: &str, +) -> Result<(), Box> { + use std::fs; + + if locale_path.exists() || locale_path.is_file() { + let content = fs::read_to_string(locale_path)?; + writeln!( + embedded_file, + " // Locale for {component} ({locale})" + )?; + writeln!( + embedded_file, + " \"{locale_key}\" => Some(r###\"{content}\"###)," + )?; + + // Tell Cargo to rerun if this file changes + println!("cargo:rerun-if-changed={}", locale_path.display()); + } + Ok(()) +} + +/// Higher-level helper to embed locale files for a component with a path pattern. +/// This eliminates the repetitive for_each_locale + embed_locale_file pattern. +fn embed_component_locales( + embedded_file: &mut std::fs::File, + locales: &(String, Option), + component_name: &str, + path_builder: F, +) -> Result<(), Box> +where + F: Fn(&str) -> std::path::PathBuf, +{ + for_each_locale(locales, |locale| { + let locale_path = path_builder(locale); + embed_locale_file( + embedded_file, + &locale_path, + &format!("{component_name}/{locale}.ftl"), + locale, + component_name, + ) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_locales_to_embed_no_lang() { + unsafe { + env::remove_var("LANG"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + + unsafe { + env::set_var("LANG", ""); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "en_US.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn get_locales_to_embed_with_lang() { + unsafe { + env::set_var("LANG", "fr_FR.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("fr-FR".to_string())); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "zh_CN.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("zh-CN".to_string())); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "de"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("de".to_string())); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn get_locales_to_embed_invalid_lang() { + // invalid locale format + unsafe { + env::set_var("LANG", "invalid"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("invalid".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // numeric values + unsafe { + env::set_var("LANG", "123"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("123".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // special characters + unsafe { + env::set_var("LANG", "@@@@"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("@@@@".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // malformed locale (no country code but with encoding) + unsafe { + env::set_var("LANG", "en.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("en".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // valid format but unusual locale + unsafe { + env::set_var("LANG", "XX_YY.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("XX-YY".to_string())); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn for_each_locale_basic() { + let locales = ("en-US".to_string(), Some("fr-FR".to_string())); + let mut collected = Vec::new(); + + for_each_locale(&locales, |locale| { + collected.push(locale.to_string()); + Ok(()) + }) + .unwrap(); + + assert_eq!(collected, vec!["en-US", "fr-FR"]); + } + + #[test] + fn for_each_locale_no_system_locale() { + let locales = ("en-US".to_string(), None); + let mut collected = Vec::new(); + + for_each_locale(&locales, |locale| { + collected.push(locale.to_string()); + Ok(()) + }) + .unwrap(); + + assert_eq!(collected, vec!["en-US"]); + } + + #[test] + fn for_each_locale_error_handling() { + let locales = ("en-US".to_string(), Some("fr-FR".to_string())); + + let result = for_each_locale(&locales, |_locale| Err("test error".into())); + + assert!(result.is_err()); + } +}