diff --git a/cloudinit/sources/DataSourceWSL.py b/cloudinit/sources/DataSourceWSL.py index 222ec2a0f63..607f8686368 100644 --- a/cloudinit/sources/DataSourceWSL.py +++ b/cloudinit/sources/DataSourceWSL.py @@ -90,6 +90,7 @@ def find_home() -> PurePath: raises: IOError when no mountpoint with cmd.exe is found ProcessExecutionError when either cmd.exe is unable to retrieve the user's home directory + UnicodeDecodeError when cmd.exe /U outputs invalid UTF16LE """ cmd = cmd_executable() @@ -97,8 +98,13 @@ def find_home() -> PurePath: # But we know that `/init` is the interpreter, so we can run it directly. # See /proc/sys/fs/binfmt_misc/WSLInterop[-late] # inside any WSL instance for more details. - home, _ = subp.subp(["/init", cmd.as_posix(), "/C", "echo %USERPROFILE%"]) - home = home.rstrip() + # Invoking with "/U" makes it output UTF-16LE, which is more predictable + # than ANSI Code Pages for anything above the ASCII range. + home, _ = subp.subp( + ["/init", cmd.as_posix(), "/U", "/C", "echo.%USERPROFILE%"], + decode=False, + ) + home = home.decode("utf-16-le").rstrip() if not home: raise subp.ProcessExecutionError( "No output from cmd.exe to show the user profile dir." @@ -443,7 +449,7 @@ def _get_data(self) -> bool: try: user_home = find_home() - except IOError as e: + except (IOError, ValueError) as e: LOG.debug("Unable to detect WSL datasource: %s", e) return False diff --git a/tests/unittests/sources/test_wsl.py b/tests/unittests/sources/test_wsl.py index a5300ff8af0..75c7fb5b524 100644 --- a/tests/unittests/sources/test_wsl.py +++ b/tests/unittests/sources/test_wsl.py @@ -144,6 +144,19 @@ def test_cmd_exe_no_win_mounts(self, m_mounts, m_os_access): with pytest.raises(IOError): wsl.cmd_executable() + @mock.patch("cloudinit.sources.DataSourceWSL.cmd_executable") + @mock.patch("cloudinit.util.subp.subp") + def test_find_home_raises(self, m_subp, m_cmd): + # The value really doesn't matter. + m_cmd.return_value = PurePath("/mnt/c/cmd.exe") + m_subp.return_value = util.subp.SubpResult( + "I am UTF-8 🦄 !".encode("utf-8"), "\r\n".encode("utf-8") + ) + # Checking for ValueError instead of UnicodeDecodeError because + # that's what we catch at the call sites. + with pytest.raises(ValueError): + wsl.find_home() + @pytest.mark.parametrize( "linux_distro_value,files", ( diff --git a/tools/ds-identify b/tools/ds-identify index 02c70a62c2a..d0117e83c47 100755 --- a/tools/ds-identify +++ b/tools/ds-identify @@ -1708,7 +1708,9 @@ WSL_path() { WSL_run_cmd() { local val="" exepath="$1" shift - _RET=$(/init "$exepath" /c "$@" 2>/dev/null) + # Using the '/u' flag to enforce Unicode (UTF-16 LE), thus we need to decode it afterwards. + # It's more reliable than the default ANSI Code Pages for anything above the ASCII range. + _RET=$(/init "$exepath" /u /c "$@" 2>/dev/null | iconv --from-code UTF-16LE --to-code UTF-8) } WSL_profile_dir() { @@ -1719,10 +1721,12 @@ WSL_profile_dir() { for m in $@; do cmdexe="$m/Windows/System32/cmd.exe" if command -v "$cmdexe" > /dev/null 2>&1; then - # Here WSL's proprietary `/init` is used to start the Windows cmd.exe + # Here WSL's `/init` is used to start the Windows cmd.exe # to output the Windows user profile directory path, which is # held by the environment variable %USERPROFILE%. - WSL_run_cmd "$cmdexe" "echo %USERPROFILE%" + # See https://wsl.dev/technical-documentation/interop/ for more information on how /init + # is used to launch Windows binaries. + WSL_run_cmd "$cmdexe" "echo.%USERPROFILE%" profiledir="${_RET%%[[:cntrl:]]}" if [ -n "$profiledir" ]; then # wslpath is a program supplied by WSL itself that translates Windows and Linux paths,