-
Notifications
You must be signed in to change notification settings - Fork 156
fix: extend explicit UTF-8 encoding to 5 remaining open() call sites #1068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,3 +92,45 @@ def test_corrupted_file_returns_empty(self, tmp_path): | |
|
|
||
| registry_mod._invalidate_cache() | ||
| assert registry_mod.get_registered_marketplaces() == [] | ||
|
|
||
|
|
||
| class TestRegistryUtf8RoundTrip: | ||
| """Registry persistence preserves non-ASCII content (Windows cp1252/cp950 guard).""" | ||
|
|
||
| def test_add_and_read_non_ascii_marketplace(self): | ||
| # Note: name/owner/repo are typically ASCII per the marketplace spec, | ||
| # but the registry file itself must still be UTF-8 to handle any | ||
| # non-ASCII content that may flow through future fields. We use a | ||
| # description-bearing source by writing a custom entry directly. | ||
| src = MarketplaceSource(name="cafe-mkt", owner="cafe-org", repo="plugins-\u958b\u59cb") | ||
| registry_mod.add_marketplace(src) | ||
|
Comment on lines
+104
to
+106
|
||
|
|
||
| # Force re-load from disk by clearing the cache. | ||
| registry_mod._invalidate_cache() | ||
| fetched = registry_mod.get_marketplace_by_name("cafe-mkt") | ||
| assert fetched is not None | ||
| assert fetched.repo == "plugins-\u958b\u59cb" | ||
|
|
||
| def test_registry_file_is_readable_with_utf8_external_writes(self, tmp_path): | ||
| """A registry file written externally with raw UTF-8 (ensure_ascii=False) | ||
| must still load cleanly. This is the regression case for cp1252/cp950 | ||
| Windows locales where the default open() would fail to decode.""" | ||
| import json as _json | ||
|
|
||
| path = registry_mod._marketplaces_path() | ||
| # Ensure parent dir exists. | ||
| registry_mod._ensure_file() | ||
| payload = { | ||
| "marketplaces": [ | ||
| {"name": "cafe-mkt", "owner": "o", "repo": "repo-\u4e2d\u6587"}, | ||
| ] | ||
| } | ||
| # Write raw UTF-8 (no \uXXXX escaping) to mimic what a non-Python | ||
| # tool or a future writer with ensure_ascii=False would produce. | ||
| with open(path, "w", encoding="utf-8") as f: | ||
| _json.dump(payload, f, ensure_ascii=False) | ||
|
|
||
| registry_mod._invalidate_cache() | ||
| fetched = registry_mod.get_marketplace_by_name("cafe-mkt") | ||
| assert fetched is not None | ||
| assert fetched.repo == "repo-\u4e2d\u6587" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| """Tests for apm_cli.config module-level config file I/O. | ||
|
|
||
| These tests exercise the round-trip of non-ASCII content through the global | ||
| config file to guard against the cp1252/cp950 UnicodeDecodeError class of | ||
| bugs on Windows when ``open()`` is called without an explicit encoding. | ||
| """ | ||
|
|
||
| import json | ||
|
|
||
| import pytest | ||
|
|
||
| from apm_cli import config as config_mod | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def isolated_config(tmp_path, monkeypatch): | ||
| """Point CONFIG_DIR / CONFIG_FILE to a temp directory and clear cache.""" | ||
| config_dir = tmp_path / ".apm" | ||
| config_file = config_dir / "config.json" | ||
| monkeypatch.setattr(config_mod, "CONFIG_DIR", str(config_dir)) | ||
| monkeypatch.setattr(config_mod, "CONFIG_FILE", str(config_file)) | ||
| monkeypatch.setattr(config_mod, "_config_cache", None) | ||
| return config_file | ||
|
|
||
|
|
||
| class TestConfigUtf8RoundTrip: | ||
| """Round-trip non-ASCII content through the config file.""" | ||
|
|
||
| def test_update_config_preserves_non_ascii(self, isolated_config): | ||
| non_ascii_value = "/Users/cafe/projets/\u958b\u59cb" | ||
| config_mod.update_config({"copilot_cowork_skills_dir": non_ascii_value}) | ||
|
|
||
|
Comment on lines
+29
to
+32
|
||
| # Force re-read from disk by invalidating the cache. | ||
| config_mod._invalidate_config_cache() | ||
| loaded = config_mod.get_config() | ||
|
|
||
| assert loaded["copilot_cowork_skills_dir"] == non_ascii_value | ||
|
|
||
| def test_config_file_is_utf8_on_disk(self, isolated_config): | ||
| non_ascii_value = "# \u958b\u59cb -- cafe" | ||
| config_mod.update_config({"note": non_ascii_value}) | ||
|
|
||
| # Read raw bytes and decode as UTF-8 to assert the on-disk encoding. | ||
| raw = isolated_config.read_bytes() | ||
| decoded = json.loads(raw.decode("utf-8")) | ||
| assert decoded["note"] == non_ascii_value | ||
|
|
||
| def test_ensure_config_exists_uses_utf8(self, isolated_config, monkeypatch): | ||
| # Force ensure_config_exists() to create the file. | ||
| config_mod.ensure_config_exists() | ||
| assert isolated_config.exists() | ||
| # File must be readable as UTF-8 JSON. | ||
| json.loads(isolated_config.read_bytes().decode("utf-8")) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -154,3 +154,30 @@ def test_str_representation(self): | |
| str_repr = str(runtime) | ||
| assert "CopilotRuntime" in str_repr | ||
| assert "test-model" in str_repr | ||
|
|
||
|
|
||
| class TestMcpConfigUtf8RoundTrip: | ||
| """Reading MCP config preserves non-ASCII content (Windows cp1252/cp950 guard).""" | ||
|
|
||
| def test_get_mcp_servers_reads_non_ascii(self, tmp_path): | ||
| import json as _json | ||
|
|
||
| mcp_path = tmp_path / "mcp-config.json" | ||
| servers = { | ||
| "servers": { | ||
| "demo-cafe": { | ||
| "command": "node", | ||
| "args": ["server.js"], | ||
| "description": "\u4e2d\u6587 description -- cafe", | ||
| } | ||
|
Comment on lines
+169
to
+172
|
||
| } | ||
| } | ||
| mcp_path.write_bytes(_json.dumps(servers).encode("utf-8")) | ||
|
|
||
| with patch.object(CopilotRuntime, "is_available", return_value=True): | ||
| runtime = CopilotRuntime() | ||
| with patch.object(CopilotRuntime, "get_mcp_config_path", return_value=mcp_path): | ||
| got = runtime.get_mcp_servers() | ||
|
|
||
| assert "demo-cafe" in got | ||
| assert got["demo-cafe"]["description"] == "\u4e2d\u6587 description -- cafe" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| """Tests for apm_cli.models.plugin module I/O. | ||
|
|
||
| Round-trips non-ASCII content through Plugin.from_path to guard against | ||
| cp1252/cp950 UnicodeDecodeError on Windows when reading plugin.json. | ||
| """ | ||
|
|
||
| import json | ||
|
|
||
| from apm_cli.models.plugin import Plugin | ||
|
|
||
|
|
||
| class TestPluginUtf8RoundTrip: | ||
| """Round-trip non-ASCII content through plugin.json reads.""" | ||
|
|
||
| def test_from_path_reads_non_ascii_metadata(self, tmp_path): | ||
| metadata = { | ||
| "id": "demo-plugin", | ||
| "name": "Demo plugin -- cafe", | ||
| "version": "1.0.0", | ||
| "description": "Plugin de demo with \u4e2d\u6587 description", | ||
| "author": "Cafe Author", | ||
|
Comment on lines
+18
to
+21
|
||
| } | ||
| plugin_json = tmp_path / "plugin.json" | ||
| plugin_json.write_bytes(json.dumps(metadata).encode("utf-8")) | ||
|
|
||
| plugin = Plugin.from_path(tmp_path) | ||
|
|
||
| assert plugin.metadata.id == "demo-plugin" | ||
| assert plugin.metadata.name == "Demo plugin -- cafe" | ||
| assert plugin.metadata.description == "Plugin de demo with \u4e2d\u6587 description" | ||
| assert plugin.metadata.author == "Cafe Author" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These new tests include non-ASCII literals in the Python source (e.g. in
description/ plugin names). The repo requires Python source files to remain printable ASCII; please use ASCII-only escape sequences ("\u....") or construct UTF-8 bytes via escapes so runtime values remain non-ASCII without non-ASCII source text.This issue also appears on line 442 of the same file.