From be5614286e121879917d854d7b60c6be30f2d3a7 Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Tue, 25 Nov 2025 13:39:43 -0300 Subject: [PATCH 1/4] Subprocess cmd.exe with /U to output UTF-16LE Peasant comment fix: /init is now open source (as part of WSL2). Fixes: #6716 --- cloudinit/sources/DataSourceWSL.py | 9 +++++++-- tools/ds-identify | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cloudinit/sources/DataSourceWSL.py b/cloudinit/sources/DataSourceWSL.py index 222ec2a0f63..d8fa0864926 100644 --- a/cloudinit/sources/DataSourceWSL.py +++ b/cloudinit/sources/DataSourceWSL.py @@ -97,8 +97,13 @@ def find_home() -> PurePath: # But we know that `/init` is the interpreter, so we can run it directly. # See /proc/sys/fs/binfmt_misc/WSLInterop[-late] # inside any WSL instance for more details. - home, _ = subp.subp(["/init", cmd.as_posix(), "/C", "echo %USERPROFILE%"]) - home = home.rstrip() + # Invoking with "/U" makes it output UTF-16LE, which is more predictable + # than ANSI Code Pages for anything above the ASCII range. + home, _ = subp.subp( + ["/init", cmd.as_posix(), "/U", "/C", "echo.%USERPROFILE%"], + decode=False, + ) + home = home.decode("utf-16-le").rstrip() if not home: raise subp.ProcessExecutionError( "No output from cmd.exe to show the user profile dir." diff --git a/tools/ds-identify b/tools/ds-identify index 02c70a62c2a..c2a6d69eabe 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -1708,7 +1708,9 @@ WSL_path() { WSL_run_cmd() { local val="" exepath="$1" shift - _RET=$(/init "$exepath" /c "$@" 2>/dev/null) + # Using the '/u' flag to enforce Unicode (UTF-16 LE), thus we need to decode it afterwards. + # It's more reliable than the default ANSI Code Pages for anything above the ASCII range. + _RET=$(/init "$exepath" /u /c "$@" 2>/dev/null | iconv -f UTF-16LE -t UTF-8) } WSL_profile_dir() { @@ -1719,10 +1721,10 @@ WSL_profile_dir() { for m in $@; do cmdexe="$m/Windows/System32/cmd.exe" if command -v "$cmdexe" > /dev/null 2>&1; then - # Here WSL's proprietary `/init` is used to start the Windows cmd.exe + # Here WSL's `/init` is used to start the Windows cmd.exe # to output the Windows user profile directory path, which is # held by the environment variable %USERPROFILE%. - WSL_run_cmd "$cmdexe" "echo %USERPROFILE%" + WSL_run_cmd "$cmdexe" "echo.%USERPROFILE%" profiledir="${_RET%%[[:cntrl:]]}" if [ -n "$profiledir" ]; then # wslpath is a program supplied by WSL itself that translates Windows and Linux paths, From afffef46941fa54768f581c634a3c181ac5a60f9 Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Thu, 5 Feb 2026 10:48:08 -0300 Subject: [PATCH 2/4] Takes into consideration exceptions from find_home() That function can now throw UnicodeDecodeError, which inherits from ValueError, so we should catch ValueError as before. --- cloudinit/sources/DataSourceWSL.py | 3 ++- tests/unittests/sources/test_wsl.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cloudinit/sources/DataSourceWSL.py b/cloudinit/sources/DataSourceWSL.py index d8fa0864926..607f8686368 100644 --- a/cloudinit/sources/DataSourceWSL.py +++ b/cloudinit/sources/DataSourceWSL.py @@ -90,6 +90,7 @@ def find_home() -> PurePath: raises: IOError when no mountpoint with cmd.exe is found ProcessExecutionError when either cmd.exe is unable to retrieve the user's home directory + UnicodeDecodeError when cmd.exe /U outputs invalid UTF16LE """ cmd = cmd_executable() @@ -448,7 +449,7 @@ def _get_data(self) -> bool: try: user_home = find_home() - except IOError as e: + except (IOError, ValueError) as e: LOG.debug("Unable to detect WSL datasource: %s", e) return False diff --git a/tests/unittests/sources/test_wsl.py b/tests/unittests/sources/test_wsl.py index a5300ff8af0..75c7fb5b524 100644 --- a/tests/unittests/sources/test_wsl.py +++ b/tests/unittests/sources/test_wsl.py @@ -144,6 +144,19 @@ def test_cmd_exe_no_win_mounts(self, m_mounts, m_os_access): with pytest.raises(IOError): wsl.cmd_executable() + @mock.patch("cloudinit.sources.DataSourceWSL.cmd_executable") + @mock.patch("cloudinit.util.subp.subp") + def test_find_home_raises(self, m_subp, m_cmd): + # The value really doesn't matter. + m_cmd.return_value = PurePath("/mnt/c/cmd.exe") + m_subp.return_value = util.subp.SubpResult( + "I am UTF-8 🦄 !".encode("utf-8"), "\r\n".encode("utf-8") + ) + # Checking for ValueError instead of UnicodeDecodeError because + # that's what we catch at the call sites. + with pytest.raises(ValueError): + wsl.find_home() + @pytest.mark.parametrize( "linux_distro_value,files", ( From 1a5be453ae6f98b8016cbac4c77aaaceb5a7f555 Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Thu, 19 Feb 2026 16:49:50 -0300 Subject: [PATCH 3/4] Links to MS documentation about WSL interop How /init is used to launch Windows binaries. --- tools/ds-identify | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/ds-identify b/tools/ds-identify index c2a6d69eabe..da8d8c98717 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -1724,6 +1724,8 @@ WSL_profile_dir() { # Here WSL's `/init` is used to start the Windows cmd.exe # to output the Windows user profile directory path, which is # held by the environment variable %USERPROFILE%. + # See https://wsl.dev/technical-documentation/interop/ for more information on how /init + # is used to launch Windows binaries. WSL_run_cmd "$cmdexe" "echo.%USERPROFILE%" profiledir="${_RET%%[[:cntrl:]]}" if [ -n "$profiledir" ]; then From 706a5a0ffd5970e134699ff15b1c1bb8df308057 Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Fri, 20 Feb 2026 08:46:14 -0300 Subject: [PATCH 4/4] Pass long CLI flags to iconv for readability --- tools/ds-identify | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ds-identify b/tools/ds-identify index da8d8c98717..d0117e83c47 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -1710,7 +1710,7 @@ WSL_run_cmd() { shift # Using the '/u' flag to enforce Unicode (UTF-16 LE), thus we need to decode it afterwards. # It's more reliable than the default ANSI Code Pages for anything above the ASCII range. - _RET=$(/init "$exepath" /u /c "$@" 2>/dev/null | iconv -f UTF-16LE -t UTF-8) + _RET=$(/init "$exepath" /u /c "$@" 2>/dev/null | iconv --from-code UTF-16LE --to-code UTF-8) } WSL_profile_dir() {