From 6aa5827b3215e69866218a9aaca17a0141dbda2f Mon Sep 17 00:00:00 2001 From: danielmeppiel Date: Tue, 24 Mar 2026 04:28:19 +0100 Subject: [PATCH 1/2] fix: centralize YAML I/O with UTF-8 encoding for cross-platform safety Add yaml_io.py module (load_yaml, dump_yaml, yaml_to_str) that guarantees UTF-8 encoding and allow_unicode=True on all YAML file operations. This prevents silent mojibake on Windows where the default file encoding is cp1252. Migrated 20 YAML I/O sites across 15 source files to use the new helpers. Added CI guard that catches any yaml.dump/safe_dump writing to a file handle outside yaml_io.py. Addresses the root cause behind #387: non-ASCII characters in apm.yml (e.g. accented author names) are now preserved as real UTF-8 on all platforms instead of being escaped as \xNN sequences. Based on the investigation in #388 by @alopezsanchez. Co-authored-by: Alejandro Lopez Sanchez Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/ci.yml | 15 ++ CHANGELOG.md | 1 + src/apm_cli/bundle/lockfile_enrichment.py | 14 +- src/apm_cli/bundle/plugin_exporter.py | 3 +- src/apm_cli/commands/_helpers.py | 9 +- src/apm_cli/commands/install.py | 8 +- src/apm_cli/commands/uninstall/cli.py | 8 +- src/apm_cli/commands/uninstall/engine.py | 4 +- src/apm_cli/compilation/agents_compiler.py | 4 +- src/apm_cli/core/script_runner.py | 15 +- src/apm_cli/deps/aggregator.py | 4 +- src/apm_cli/deps/github_downloader.py | 16 +-- src/apm_cli/deps/lockfile.py | 5 +- src/apm_cli/deps/plugin_parser.py | 6 +- src/apm_cli/deps/verifier.py | 4 +- src/apm_cli/integration/mcp_integrator.py | 4 +- src/apm_cli/models/apm_package.py | 4 +- src/apm_cli/utils/yaml_io.py | 55 +++++++ tests/unit/test_deps.py | 4 +- tests/unit/test_yaml_io.py | 158 +++++++++++++++++++++ 20 files changed, 280 insertions(+), 61 deletions(-) create mode 100644 src/apm_cli/utils/yaml_io.py create mode 100644 tests/unit/test_yaml_io.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05b2e203..67fb8181 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,21 @@ jobs: - name: Install dependencies run: uv sync --extra dev --extra build + - name: Check YAML encoding safety + run: | + # Ensure YAML file I/O goes through yaml_io helpers. + # Catches yaml.dump/safe_dump writing to a file handle outside yaml_io.py. + VIOLATIONS=$(grep -rn --include='*.py' \ + -E 'yaml\.(safe_)?dump\(.+,\s*[a-zA-Z_]+\b' src/apm_cli/ \ + | grep -v 'utils/yaml_io.py' \ + | grep -v '# yaml-io-exempt' \ + || true) + if [ -n "$VIOLATIONS" ]; then + echo "::error::Direct yaml.dump() to file handle detected. Use yaml_io.dump_yaml() instead:" + echo "$VIOLATIONS" + exit 1 + fi + - name: Run tests run: uv run pytest tests/unit tests/test_console.py -n auto --dist worksteal diff --git a/CHANGELOG.md b/CHANGELOG.md index 9729befa..271b511f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Cross-platform YAML encoding: all `apm.yml` read/write operations now use explicit UTF-8 encoding via centralized `yaml_io` helpers, preventing silent mojibake on Windows cp1252 terminals; non-ASCII characters (e.g. accented author names) are preserved as real UTF-8 instead of `\xNN` escape sequences (#387, based on #388 by @alopezsanchez) - Virtual package types (files, collections, subdirectories) now respect `ARTIFACTORY_ONLY=1`, matching the primary zip-archive proxy-only behavior (#418) - `apm pack --target claude` no longer produces an empty bundle when skills/agents are installed under `.github/` -- cross-target path mapping remaps `skills/` and `agents/` to the pack target prefix (#420) diff --git a/src/apm_cli/bundle/lockfile_enrichment.py b/src/apm_cli/bundle/lockfile_enrichment.py index ed0452de..34febfe4 100644 --- a/src/apm_cli/bundle/lockfile_enrichment.py +++ b/src/apm_cli/bundle/lockfile_enrichment.py @@ -146,13 +146,9 @@ def enrich_lockfile_for_pack( break pack_meta["mapped_from"] = sorted(used_src_prefixes) - pack_section = yaml.dump( - {"pack": pack_meta}, - default_flow_style=False, - sort_keys=False, - ) - - lockfile_yaml = yaml.dump( - data, default_flow_style=False, sort_keys=False, allow_unicode=True - ) + from ..utils.yaml_io import yaml_to_str + + pack_section = yaml_to_str({"pack": pack_meta}) + + lockfile_yaml = yaml_to_str(data) return pack_section + lockfile_yaml diff --git a/src/apm_cli/bundle/plugin_exporter.py b/src/apm_cli/bundle/plugin_exporter.py index 45f6214e..096840ff 100644 --- a/src/apm_cli/bundle/plugin_exporter.py +++ b/src/apm_cli/bundle/plugin_exporter.py @@ -285,7 +285,8 @@ def _get_dev_dependency_urls(apm_yml_path: Path) -> Set[Tuple[str, str]]: ``github/awesome-copilot``). """ try: - data = yaml.safe_load(apm_yml_path.read_text(encoding="utf-8")) + from ..utils.yaml_io import load_yaml + data = load_yaml(apm_yml_path) except (yaml.YAMLError, OSError, ValueError): return set() if not isinstance(data, dict): diff --git a/src/apm_cli/commands/_helpers.py b/src/apm_cli/commands/_helpers.py index 09158a5d..d1fdd8db 100644 --- a/src/apm_cli/commands/_helpers.py +++ b/src/apm_cli/commands/_helpers.py @@ -331,9 +331,8 @@ def _update_gitignore_for_apm_modules(logger=None): def _load_apm_config(): """Load configuration from apm.yml.""" if Path(APM_YML_FILENAME).exists(): - with open(APM_YML_FILENAME, "r") as f: - yaml = _lazy_yaml() - return yaml.safe_load(f) + from ..utils.yaml_io import load_yaml + return load_yaml(APM_YML_FILENAME) return None @@ -457,5 +456,5 @@ def _create_minimal_apm_yml(config, plugin=False): apm_yml_data["scripts"] = {} # Write apm.yml - with open(APM_YML_FILENAME, "w") as f: - yaml.safe_dump(apm_yml_data, f, default_flow_style=False, sort_keys=False) + from ..utils.yaml_io import dump_yaml + dump_yaml(apm_yml_data, APM_YML_FILENAME) diff --git a/src/apm_cli/commands/install.py b/src/apm_cli/commands/install.py index e7a0ca73..4775085d 100644 --- a/src/apm_cli/commands/install.py +++ b/src/apm_cli/commands/install.py @@ -82,8 +82,8 @@ def _validate_and_add_packages_to_apm_yml(packages, dry_run=False, dev=False, lo # Read current apm.yml try: - with open(apm_yml_path, "r") as f: - data = yaml.safe_load(f) or {} + from ..utils.yaml_io import load_yaml + data = load_yaml(apm_yml_path) or {} except Exception as e: if logger: logger.error(f"Failed to read {APM_YML_FILENAME}: {e}") @@ -200,8 +200,8 @@ def _validate_and_add_packages_to_apm_yml(packages, dry_run=False, dev=False, lo # Write back to apm.yml try: - with open(apm_yml_path, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) + from ..utils.yaml_io import dump_yaml + dump_yaml(data, apm_yml_path) if logger: logger.success(f"Updated {APM_YML_FILENAME} with {len(validated_packages)} new package(s)") except Exception as e: diff --git a/src/apm_cli/commands/uninstall/cli.py b/src/apm_cli/commands/uninstall/cli.py index 0cca0523..b85b5d01 100644 --- a/src/apm_cli/commands/uninstall/cli.py +++ b/src/apm_cli/commands/uninstall/cli.py @@ -54,12 +54,11 @@ def uninstall(ctx, packages, dry_run, verbose): logger.start(f"Uninstalling {len(packages)} package(s)...") # Read current apm.yml - import yaml + from ...utils.yaml_io import load_yaml, dump_yaml apm_yml_path = Path(APM_YML_FILENAME) try: - with open(apm_yml_path, "r") as f: - data = yaml.safe_load(f) or {} + data = load_yaml(apm_yml_path) or {} except Exception as e: logger.error(f"Failed to read {APM_YML_FILENAME}: {e}") sys.exit(1) @@ -88,8 +87,7 @@ def uninstall(ctx, packages, dry_run, verbose): logger.progress(f"Removed {package} from apm.yml") data["dependencies"]["apm"] = current_deps try: - with open(apm_yml_path, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) + dump_yaml(data, apm_yml_path) logger.success(f"Updated {APM_YML_FILENAME} (removed {len(packages_to_remove)} package(s))") except Exception as e: logger.error(f"Failed to write {APM_YML_FILENAME}: {e}") diff --git a/src/apm_cli/commands/uninstall/engine.py b/src/apm_cli/commands/uninstall/engine.py index a12aed3c..b768e3ce 100644 --- a/src/apm_cli/commands/uninstall/engine.py +++ b/src/apm_cli/commands/uninstall/engine.py @@ -179,8 +179,8 @@ def _cleanup_transitive_orphans(lockfile, packages_to_remove, apm_modules_dir, a # Determine remaining deps to avoid removing still-needed packages remaining_deps = builtins.set() try: - with open(apm_yml_path, "r") as f: - updated_data = yaml.safe_load(f) or {} + from ...utils.yaml_io import load_yaml + updated_data = load_yaml(apm_yml_path) or {} for dep_str in updated_data.get("dependencies", {}).get("apm", []) or []: try: ref = _parse_dependency_entry(dep_str) diff --git a/src/apm_cli/compilation/agents_compiler.py b/src/apm_cli/compilation/agents_compiler.py index 0df0ec41..1df5b4d2 100644 --- a/src/apm_cli/compilation/agents_compiler.py +++ b/src/apm_cli/compilation/agents_compiler.py @@ -73,8 +73,8 @@ def from_apm_yml(cls, **overrides) -> 'CompilationConfig': import yaml if Path('apm.yml').exists(): - with open('apm.yml', 'r') as f: - apm_config = yaml.safe_load(f) or {} + from ..utils.yaml_io import load_yaml + apm_config = load_yaml('apm.yml') or {} # Look for compilation section compilation_config = apm_config.get('compilation', {}) diff --git a/src/apm_cli/core/script_runner.py b/src/apm_cli/core/script_runner.py index da8c05e2..6c79ae2b 100644 --- a/src/apm_cli/core/script_runner.py +++ b/src/apm_cli/core/script_runner.py @@ -229,8 +229,8 @@ def _load_config(self) -> Optional[Dict]: if not config_path.exists(): return None - with open(config_path, "r") as f: - return yaml.safe_load(f) + from ..utils.yaml_io import load_yaml + return load_yaml(config_path) def _auto_compile_prompts( self, command: str, params: Dict[str, str] @@ -808,8 +808,8 @@ def _add_dependency_to_config(self, package_ref: str) -> None: return # Load current config - with open(config_path, "r") as f: - config = yaml.safe_load(f) or {} + from ..utils.yaml_io import load_yaml, dump_yaml + config = load_yaml(config_path) or {} # Ensure dependencies.apm section exists if "dependencies" not in config: @@ -822,8 +822,7 @@ def _add_dependency_to_config(self, package_ref: str) -> None: config["dependencies"]["apm"].append(package_ref) # Write back to file - with open(config_path, "w") as f: - yaml.dump(config, f, default_flow_style=False, sort_keys=False) + dump_yaml(config, config_path) print(f" [i] Added {package_ref} to apm.yml dependencies") @@ -838,8 +837,8 @@ def _create_minimal_config(self) -> None: "description": "Auto-generated for zero-config virtual package execution", } - with open("apm.yml", "w") as f: - yaml.dump(minimal_config, f, default_flow_style=False, sort_keys=False) + from ..utils.yaml_io import dump_yaml + dump_yaml(minimal_config, "apm.yml") print(f" [i] Created minimal apm.yml for zero-config execution") diff --git a/src/apm_cli/deps/aggregator.py b/src/apm_cli/deps/aggregator.py index 4090b923..a6a6976d 100644 --- a/src/apm_cli/deps/aggregator.py +++ b/src/apm_cli/deps/aggregator.py @@ -59,8 +59,8 @@ def sync_workflow_dependencies(output_file="apm.yml"): try: # Create the file - with open(output_file, 'w', encoding='utf-8') as f: - yaml.dump(apm_config, f, default_flow_style=False) + from ..utils.yaml_io import dump_yaml + dump_yaml(apm_config, output_file) return True, apm_config['servers'] except Exception as e: print(f"Error writing to {output_file}: {e}") diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 0d141d27..b69eaa11 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -1678,12 +1678,10 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat package.version = short_sha apm_yml_path = target_path / "apm.yml" if apm_yml_path.exists(): - import yaml as _yaml - with open(apm_yml_path, "r", encoding="utf-8") as _f: - _data = _yaml.safe_load(_f) or {} + from ..utils.yaml_io import load_yaml, dump_yaml + _data = load_yaml(apm_yml_path) or {} _data["version"] = short_sha - with open(apm_yml_path, "w", encoding="utf-8") as _f: - _yaml.dump(_data, _f, default_flow_style=False, sort_keys=False) + dump_yaml(_data, apm_yml_path) # Update progress - complete if progress_obj and progress_task_id is not None: @@ -1986,12 +1984,10 @@ def download_package( # Keep the synthesized apm.yml in sync apm_yml_path = target_path / "apm.yml" if apm_yml_path.exists(): - import yaml as _yaml - with open(apm_yml_path, "r", encoding="utf-8") as _f: - _data = _yaml.safe_load(_f) or {} + from ..utils.yaml_io import load_yaml, dump_yaml + _data = load_yaml(apm_yml_path) or {} _data["version"] = short_sha - with open(apm_yml_path, "w", encoding="utf-8") as _f: - _yaml.dump(_data, _f, default_flow_style=False, sort_keys=False) + dump_yaml(_data, apm_yml_path) # Create and return PackageInfo return PackageInfo( diff --git a/src/apm_cli/deps/lockfile.py b/src/apm_cli/deps/lockfile.py index 01046bca..521a842e 100644 --- a/src/apm_cli/deps/lockfile.py +++ b/src/apm_cli/deps/lockfile.py @@ -181,9 +181,8 @@ def to_yaml(self) -> str: data["mcp_servers"] = sorted(self.mcp_servers) if self.mcp_configs: data["mcp_configs"] = dict(sorted(self.mcp_configs.items())) - return yaml.dump( - data, default_flow_style=False, sort_keys=False, allow_unicode=True - ) + from ..utils.yaml_io import yaml_to_str + return yaml_to_str(data) @classmethod def from_yaml(cls, yaml_str: str) -> "LockFile": diff --git a/src/apm_cli/deps/plugin_parser.py b/src/apm_cli/deps/plugin_parser.py index 5cc2fbca..b0e70bd8 100644 --- a/src/apm_cli/deps/plugin_parser.py +++ b/src/apm_cli/deps/plugin_parser.py @@ -497,7 +497,8 @@ def _generate_apm_yml(manifest: Dict[str, Any]) -> str: # field. Default to hybrid so the standard pipeline handles all components. apm_package['type'] = 'hybrid' - return yaml.dump(apm_package, default_flow_style=False, sort_keys=False) + from ..utils.yaml_io import yaml_to_str + return yaml_to_str(apm_package) def synthesize_plugin_json_from_apm_yml(apm_yml_path: Path) -> dict: @@ -521,7 +522,8 @@ def synthesize_plugin_json_from_apm_yml(apm_yml_path: Path) -> dict: raise FileNotFoundError(f"apm.yml not found: {apm_yml_path}") try: - data = yaml.safe_load(apm_yml_path.read_text(encoding="utf-8")) + from ..utils.yaml_io import load_yaml + data = load_yaml(apm_yml_path) except yaml.YAMLError as exc: raise ValueError(f"Invalid YAML in {apm_yml_path}: {exc}") from exc diff --git a/src/apm_cli/deps/verifier.py b/src/apm_cli/deps/verifier.py index bffae576..c11e1866 100644 --- a/src/apm_cli/deps/verifier.py +++ b/src/apm_cli/deps/verifier.py @@ -21,8 +21,8 @@ def load_apm_config(config_file="apm.yml"): print(f"Configuration file {config_file} not found.") return None - with open(config_path, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) + from ..utils.yaml_io import load_yaml + config = load_yaml(config_path) return config except Exception as e: diff --git a/src/apm_cli/integration/mcp_integrator.py b/src/apm_cli/integration/mcp_integrator.py index 3ea5e067..54819b17 100644 --- a/src/apm_cli/integration/mcp_integrator.py +++ b/src/apm_cli/integration/mcp_integrator.py @@ -895,8 +895,8 @@ def install( apm_yml = Path("apm.yml") if apm_yml.exists(): - with open(apm_yml, "r", encoding="utf-8") as f: - apm_config = yaml.safe_load(f) + from apm_cli.utils.yaml_io import load_yaml + apm_config = load_yaml(apm_yml) except Exception: apm_config = None diff --git a/src/apm_cli/models/apm_package.py b/src/apm_cli/models/apm_package.py index 8058b34a..367177eb 100644 --- a/src/apm_cli/models/apm_package.py +++ b/src/apm_cli/models/apm_package.py @@ -99,8 +99,8 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": return cached try: - with open(apm_yml_path, 'r', encoding='utf-8') as f: - data = yaml.safe_load(f) + from ..utils.yaml_io import load_yaml + data = load_yaml(apm_yml_path) except yaml.YAMLError as e: raise ValueError(f"Invalid YAML format in {apm_yml_path}: {e}") diff --git a/src/apm_cli/utils/yaml_io.py b/src/apm_cli/utils/yaml_io.py new file mode 100644 index 00000000..13912993 --- /dev/null +++ b/src/apm_cli/utils/yaml_io.py @@ -0,0 +1,55 @@ +"""Cross-platform YAML I/O with guaranteed UTF-8 encoding. + +All YAML file operations in apm_cli should use these helpers to ensure +consistent encoding (UTF-8) and formatting (unicode, block style, key +order preserved). This prevents silent mojibake on Windows where the +default file encoding is cp1252, not UTF-8. + +Public API:: + + load_yaml(path) -- read a .yml/.yaml file -> dict | None + dump_yaml(data, path) -- write dict -> .yml/.yaml file + yaml_to_str(data) -- serialize dict -> YAML string +""" + +from pathlib import Path +from typing import Any, Dict, Optional, Union + +import yaml + +# Shared defaults matching existing codebase convention. +_DUMP_DEFAULTS: Dict[str, Any] = dict( + default_flow_style=False, + sort_keys=False, + allow_unicode=True, +) + + +def load_yaml(path: Union[str, Path]) -> Optional[Dict[str, Any]]: + """Load a YAML file with explicit UTF-8 encoding. + + Returns parsed data or ``None`` for empty files. + Raises ``FileNotFoundError`` or ``yaml.YAMLError`` on failure. + """ + with open(path, "r", encoding="utf-8") as fh: + return yaml.safe_load(fh) + + +def dump_yaml( + data: Any, + path: Union[str, Path], + *, + sort_keys: bool = False, +) -> None: + """Write data to a YAML file with UTF-8 encoding and unicode support.""" + with open(path, "w", encoding="utf-8") as fh: + yaml.safe_dump(data, fh, **{**_DUMP_DEFAULTS, "sort_keys": sort_keys}) + + +def yaml_to_str(data: Any, *, sort_keys: bool = False) -> str: + """Serialize data to a YAML string with unicode support. + + Use instead of bare ``yaml.dump()`` when building YAML content + for later file writes or string returns. + """ + return yaml.safe_dump(data, **{**_DUMP_DEFAULTS, "sort_keys": sort_keys}) diff --git a/tests/unit/test_deps.py b/tests/unit/test_deps.py index a4faa06e..7cbcee40 100644 --- a/tests/unit/test_deps.py +++ b/tests/unit/test_deps.py @@ -59,8 +59,8 @@ def test_scan_workflows_for_dependencies( self.assertEqual(mock_frontmatter_load.call_count, 2) @patch("apm_cli.deps.aggregator.scan_workflows_for_dependencies") - @patch("builtins.open", new_callable=mock_open) - @patch("yaml.dump") + @patch("apm_cli.utils.yaml_io.open", new_callable=mock_open) + @patch("apm_cli.utils.yaml_io.yaml.safe_dump") def test_sync_workflow_dependencies(self, mock_yaml_dump, mock_file, mock_scan): """Test syncing workflow dependencies to apm.yml.""" # Mock scan_workflows_for_dependencies to return a set of servers diff --git a/tests/unit/test_yaml_io.py b/tests/unit/test_yaml_io.py new file mode 100644 index 00000000..8044adb8 --- /dev/null +++ b/tests/unit/test_yaml_io.py @@ -0,0 +1,158 @@ +"""Tests for apm_cli.utils.yaml_io -- cross-platform UTF-8 YAML I/O.""" + +import pytest +import yaml + +from apm_cli.utils.yaml_io import dump_yaml, load_yaml, yaml_to_str + + +class TestLoadYaml: + """Tests for load_yaml().""" + + def test_load_utf8_content(self, tmp_path): + """Non-ASCII content is read correctly.""" + p = tmp_path / "test.yml" + p.write_text("author: \"Lopez\"\n", encoding="utf-8") + data = load_yaml(p) + assert data["author"] == "Lopez" + + def test_load_unicode_author(self, tmp_path): + """Unicode characters (accented, CJK) are preserved.""" + p = tmp_path / "test.yml" + # YAML \xF3 escape is decoded by the parser into the real char + p.write_text("author: \"L\\xF3pez\"\n", encoding="utf-8") + data = load_yaml(p) + assert data["author"] == "L\u00f3pez" + + def test_load_real_utf8_bytes(self, tmp_path): + """Real UTF-8 encoded non-ASCII round-trips correctly.""" + p = tmp_path / "test.yml" + # Write raw UTF-8 bytes (as allow_unicode=True would produce) + content = "author: L\u00f3pez\norg: \u7530\u4e2d\u592a\u90ce\n" + p.write_text(content, encoding="utf-8") + data = load_yaml(p) + assert data["author"] == "L\u00f3pez" + assert data["org"] == "\u7530\u4e2d\u592a\u90ce" + + def test_load_empty_file(self, tmp_path): + """Empty YAML file returns None.""" + p = tmp_path / "empty.yml" + p.write_text("", encoding="utf-8") + assert load_yaml(p) is None + + def test_load_file_not_found(self): + """Missing file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError): + load_yaml("/nonexistent/path.yml") + + def test_load_invalid_yaml(self, tmp_path): + """Malformed YAML raises yaml.YAMLError.""" + p = tmp_path / "bad.yml" + p.write_text(":\n - :\n bad: [unmatched", encoding="utf-8") + with pytest.raises(yaml.YAMLError): + load_yaml(p) + + +class TestDumpYaml: + """Tests for dump_yaml().""" + + def test_dump_utf8_roundtrip(self, tmp_path): + """Non-ASCII data survives write -> read cycle.""" + p = tmp_path / "test.yml" + dump_yaml({"author": "L\u00f3pez"}, p) + assert load_yaml(p)["author"] == "L\u00f3pez" + + def test_dump_unicode_not_escaped(self, tmp_path): + """File contains real UTF-8, not \\xNN escape sequences.""" + p = tmp_path / "test.yml" + dump_yaml({"author": "L\u00f3pez"}, p) + raw = p.read_bytes() + assert b"\\xf3" not in raw + assert b"\\xF3" not in raw + assert "L\u00f3pez".encode("utf-8") in raw + + def test_dump_cjk_characters(self, tmp_path): + """CJK characters are written as real UTF-8.""" + p = tmp_path / "test.yml" + dump_yaml({"author": "\u7530\u4e2d\u592a\u90ce"}, p) + raw = p.read_text(encoding="utf-8") + assert "\u7530\u4e2d\u592a\u90ce" in raw + assert "\\u" not in raw + + def test_dump_preserves_key_order(self, tmp_path): + """Keys stay in insertion order (sort_keys=False default).""" + p = tmp_path / "test.yml" + dump_yaml({"z": 1, "a": 2, "m": 3}, p) + lines = p.read_text(encoding="utf-8").strip().split("\n") + keys = [line.split(":")[0] for line in lines] + assert keys == ["z", "a", "m"] + + def test_dump_sort_keys_option(self, tmp_path): + """sort_keys=True sorts alphabetically.""" + p = tmp_path / "test.yml" + dump_yaml({"z": 1, "a": 2, "m": 3}, p, sort_keys=True) + lines = p.read_text(encoding="utf-8").strip().split("\n") + keys = [line.split(":")[0] for line in lines] + assert keys == ["a", "m", "z"] + + def test_dump_block_style(self, tmp_path): + """Output uses block style (not flow/inline).""" + p = tmp_path / "test.yml" + dump_yaml({"items": ["a", "b", "c"]}, p) + raw = p.read_text(encoding="utf-8") + assert "- a" in raw + assert "{" not in raw + + +class TestYamlToStr: + """Tests for yaml_to_str().""" + + def test_unicode_preserved(self): + """String serialization preserves unicode characters.""" + result = yaml_to_str({"author": "\u7530\u4e2d\u592a\u90ce"}) + assert "\u7530\u4e2d\u592a\u90ce" in result + assert "\\u" not in result + + def test_latin_unicode(self): + """Latin extended characters preserved.""" + result = yaml_to_str({"name": "L\u00f3pez S\u00e1nchez"}) + assert "L\u00f3pez" in result + assert "\\x" not in result + + def test_preserves_key_order(self): + """Keys stay in insertion order by default.""" + result = yaml_to_str({"z": 1, "a": 2}) + assert result.index("z") < result.index("a") + + def test_returns_string(self): + """Return type is str, not bytes.""" + result = yaml_to_str({"key": "value"}) + assert isinstance(result, str) + + +class TestCrossPlatformSafety: + """Simulate the Windows cp1252 mismatch scenario.""" + + def test_utf8_written_reads_back_correctly(self, tmp_path): + """Verify that dump_yaml output reads back identically via load_yaml. + + This is the core regression test: on Windows without explicit + encoding, the read would produce mojibake. + """ + p = tmp_path / "test.yml" + original = { + "name": "my-project", + "author": "Alejandro L\u00f3pez S\u00e1nchez", + "description": "A project by \u7530\u4e2d\u592a\u90ce", + } + dump_yaml(original, p) + loaded = load_yaml(p) + assert loaded == original + + def test_raw_bytes_are_utf8(self, tmp_path): + """The file on disk is valid UTF-8 (not cp1252 or latin-1).""" + p = tmp_path / "test.yml" + dump_yaml({"author": "L\u00f3pez"}, p) + raw_bytes = p.read_bytes() + decoded = raw_bytes.decode("utf-8") + assert "L\u00f3pez" in decoded From 3b4b9e350645effc2997c2602aa4ce299d3014a7 Mon Sep 17 00:00:00 2001 From: danielmeppiel Date: Tue, 24 Mar 2026 04:58:05 +0100 Subject: [PATCH 2/2] fix: remove dead yaml imports and fix CI guard regex Address PR review comments: - Remove unused 'import yaml' from 5 files after yaml_io migration - Switch CI guard from grep -E (POSIX ERE) to grep -P (PCRE) for reliable \s and \b matching on Ubuntu runners - Add PR number to CHANGELOG entry Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/ci.yml | 4 ++-- CHANGELOG.md | 2 +- src/apm_cli/commands/_helpers.py | 2 -- src/apm_cli/commands/install.py | 2 -- src/apm_cli/commands/uninstall/engine.py | 1 - src/apm_cli/compilation/agents_compiler.py | 1 - src/apm_cli/integration/mcp_integrator.py | 2 -- 7 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 67fb8181..25a1d436 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,8 +43,8 @@ jobs: run: | # Ensure YAML file I/O goes through yaml_io helpers. # Catches yaml.dump/safe_dump writing to a file handle outside yaml_io.py. - VIOLATIONS=$(grep -rn --include='*.py' \ - -E 'yaml\.(safe_)?dump\(.+,\s*[a-zA-Z_]+\b' src/apm_cli/ \ + VIOLATIONS=$(grep -rn --include='*.py' -P \ + 'yaml\.(safe_)?dump\(.+,\s*[a-zA-Z_]\w*\b' src/apm_cli/ \ | grep -v 'utils/yaml_io.py' \ | grep -v '# yaml-io-exempt' \ || true) diff --git a/CHANGELOG.md b/CHANGELOG.md index 271b511f..d6f19334 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Cross-platform YAML encoding: all `apm.yml` read/write operations now use explicit UTF-8 encoding via centralized `yaml_io` helpers, preventing silent mojibake on Windows cp1252 terminals; non-ASCII characters (e.g. accented author names) are preserved as real UTF-8 instead of `\xNN` escape sequences (#387, based on #388 by @alopezsanchez) +- Cross-platform YAML encoding: all `apm.yml` read/write operations now use explicit UTF-8 encoding via centralized `yaml_io` helpers, preventing silent mojibake on Windows cp1252 terminals; non-ASCII characters (e.g. accented author names) are preserved as real UTF-8 instead of `\xNN` escape sequences (#387, #433) -- based on #388 by @alopezsanchez - Virtual package types (files, collections, subdirectories) now respect `ARTIFACTORY_ONLY=1`, matching the primary zip-archive proxy-only behavior (#418) - `apm pack --target claude` no longer produces an empty bundle when skills/agents are installed under `.github/` -- cross-target path mapping remaps `skills/` and `agents/` to the pack target prefix (#420) diff --git a/src/apm_cli/commands/_helpers.py b/src/apm_cli/commands/_helpers.py index d1fdd8db..284256e7 100644 --- a/src/apm_cli/commands/_helpers.py +++ b/src/apm_cli/commands/_helpers.py @@ -439,8 +439,6 @@ def _create_minimal_apm_yml(config, plugin=False): config: dict with name, version, description, author keys. plugin: if True, include a devDependencies section. """ - yaml = _lazy_yaml() - # Create minimal apm.yml structure apm_yml_data = { "name": config["name"], diff --git a/src/apm_cli/commands/install.py b/src/apm_cli/commands/install.py index 4775085d..dcbf772a 100644 --- a/src/apm_cli/commands/install.py +++ b/src/apm_cli/commands/install.py @@ -76,8 +76,6 @@ def _validate_and_add_packages_to_apm_yml(packages, dry_run=False, dev=False, lo import tempfile from pathlib import Path - import yaml - apm_yml_path = Path(APM_YML_FILENAME) # Read current apm.yml diff --git a/src/apm_cli/commands/uninstall/engine.py b/src/apm_cli/commands/uninstall/engine.py index b768e3ce..134f4a79 100644 --- a/src/apm_cli/commands/uninstall/engine.py +++ b/src/apm_cli/commands/uninstall/engine.py @@ -147,7 +147,6 @@ def _remove_packages_from_disk(packages_to_remove, apm_modules_dir, logger): def _cleanup_transitive_orphans(lockfile, packages_to_remove, apm_modules_dir, apm_yml_path, logger): """Remove orphaned transitive deps and return (removed_count, actual_orphan_keys).""" - import yaml if not lockfile or not apm_modules_dir.exists(): return 0, builtins.set() diff --git a/src/apm_cli/compilation/agents_compiler.py b/src/apm_cli/compilation/agents_compiler.py index 1df5b4d2..00ae1828 100644 --- a/src/apm_cli/compilation/agents_compiler.py +++ b/src/apm_cli/compilation/agents_compiler.py @@ -70,7 +70,6 @@ def from_apm_yml(cls, **overrides) -> 'CompilationConfig': # Try to load from apm.yml try: from pathlib import Path - import yaml if Path('apm.yml').exists(): from ..utils.yaml_io import load_yaml diff --git a/src/apm_cli/integration/mcp_integrator.py b/src/apm_cli/integration/mcp_integrator.py index 54819b17..80c3e604 100644 --- a/src/apm_cli/integration/mcp_integrator.py +++ b/src/apm_cli/integration/mcp_integrator.py @@ -891,8 +891,6 @@ def install( if apm_config is None: # Lazy load -- only when the caller doesn't provide it try: - import yaml - apm_yml = Path("apm.yml") if apm_yml.exists(): from apm_cli.utils.yaml_io import load_yaml