From 1f52702367776921fab98899d13aff12a6d7e981 Mon Sep 17 00:00:00 2001 From: Ed Morley <501702+edmorley@users.noreply.github.com> Date: Fri, 14 Nov 2025 12:47:40 +0000 Subject: [PATCH] Further improve `.python-version` and `runtime.txt` error handling Some further improvements on-top of those made in #1958, based on build error message scenarios seen in Honeycomb. Now, if `.python-version` contains a single ESC control code (which gets categorised as "very short file"), or contains any of the ASCI control codes that result in the file being categorised as "data" (such as NUL), then the "invalid python version" error message variant is shown instead of the "invalid text encoding" variant. In addition, any NULs in the file are substituted with a placeholder to avoid this Bash warning: ``` /tmp/buildpack/lib/python_version.sh: line 104: warning: command substitution: ignored null byte in input ``` See: https://manpages.ubuntu.com/manpages/noble/en/man1/file.1.html GUS-W-20220514. --- CHANGELOG.md | 2 ++ lib/python_version.sh | 27 ++++++++++-------- .../runtime_txt_invalid_version/runtime.txt | Bin 144 -> 146 bytes spec/hatchet/python_version_spec.rb | 4 +-- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e5f4b7af..b3417b0e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## [Unreleased] - Updated uv from 0.9.7 to 0.9.9. ([#1961](https://github.com/heroku/heroku-buildpack-python/pull/1961)) +- Improved the error message shown for `.python-version` files that contain unexpected ASCII control code characters. ([#1962](https://github.com/heroku/heroku-buildpack-python/pull/1962)) +- Fixed Bash command substitution warnings from being shown if `runtime.txt` contains null byte characters. ([#1962](https://github.com/heroku/heroku-buildpack-python/pull/1962)) ## [v318] - 2025-11-12 diff --git a/lib/python_version.sh b/lib/python_version.sh index 85bdafd57..a55c883f6 100644 --- a/lib/python_version.sh +++ b/lib/python_version.sh @@ -152,28 +152,28 @@ function python_version::parse_python_version_file() { continue fi - # If we didn't find a valid Python version string, we check the file encoding so that we + # If we didn't find a valid Python version string, we check the text encoding so that we # can display a more helpful error message if it turns out that the version was valid but - # that the file was just saved in the wrong encoding. + # that the file was just saved in the wrong encoding (such as UTF-8 with BOM or UTF-16). # - # Example valid values: + # Example values `file` can return: # `ASCII text` # `ASCII text, with CRLF line terminators` # `ASCII text, with no line terminators` # `Unicode text, UTF-8 text` - # - # Example invalid values: # `Unicode text, UTF-8 (with BOM) text` # `Unicode text, UTF-16, little-endian text, with CRLF line terminators` - # `data` (for example when NUL or CTRL characters found) + # `data` (such as when the file contains a NUL or other control code characters) + # `very short file (no magic)` (such as when the file contains a single ESC character) # - # Note: File can also return `very short file (no magic)` (eg a file that contains just a newline) - # and `empty`, but we won't see those here since we're iterating over trimmed lines. + # Note: File can also return `empty` but in that case we wouldn't be iterating over found lines. local file_encoding - file_encoding="$(file --brief --dereference "${python_version_file_path}")" + # We exclude some file type tests to avoid false positives, since we only need the encoding. + file_encoding="$(file --brief --dereference --exclude json --exclude soft "${python_version_file_path}")" case "${file_encoding}" in - *"ASCII text"* | *"UTF-8 text"*) + # Cases where the text encoding isn't the issue, and so the version itself must be invalid. + *"ASCII text"* | *"UTF-8 text"* | *"very short file"* | "data") # Replace everything but printable ASCII, spaces and tabs with the Unicode replacement # character, so any invisible unwanted characters (such as ASCII control codes or the # Unicode zero width space character) are visible in the error message. @@ -211,19 +211,20 @@ function python_version::parse_python_version_file() { build_data::set_string "failure_detail" "${version:0:100}" exit 1 ;; + # Unsupported text encodings such as UTF-8 with BOM or UTF-16. *) output::error <<-EOF Error: Unable to read .python-version. Your .python-version file couldn't be read because it's using - an unsupported file encoding: + an unsupported text encoding: ${file_encoding} Configure your editor to save files as UTF-8, without a BOM, then delete and recreate the file using the correct encoding. If that doesn't work, make sure you don't have a .gitattributes - file that's overriding the file encoding. + file that's overriding the text encoding. Note: On Windows, if you pipe or redirect output to a file it can result in the file being encoded in UTF-16 LE when @@ -290,12 +291,14 @@ function python_version::parse_python_version_file() { # Outputs all populated (non-empty and not commented with '#') lines from the passed file, # with leading/trailing whitespace (including Unicode whitespace) trimmed from each line. +# We replace any NUL characters with a placeholder since Bash variables can't store them. function python_version::read_trimmed_version_lines() { local file="${1}" LC_ALL=C.UTF-8 sed \ --regexp-extended \ --expression 's/^[[:space:]]+//' \ --expression 's/[[:space:]]+$//' \ + --expression 's/\x0/␀/' \ --expression '/^(#|$)/d' \ "${file}" } diff --git a/spec/fixtures/runtime_txt_invalid_version/runtime.txt b/spec/fixtures/runtime_txt_invalid_version/runtime.txt index f4ae593e64d2b5f2b89d0a1a65a4e1fbef91fe17..14025c7f8d862c84b9f1ad10faf9056b4a186093 100644 GIT binary patch delta 9 QcmbQhIEit>1SSS901ib0Q2+n{ delta 6 NcmbQlIDv7(1ONwW0!07- diff --git a/spec/hatchet/python_version_spec.rb b/spec/hatchet/python_version_spec.rb index 663e54d90..13cd9c51c 100644 --- a/spec/hatchet/python_version_spec.rb +++ b/spec/hatchet/python_version_spec.rb @@ -365,14 +365,14 @@ remote: ! Error: Unable to read .python-version. remote: ! remote: ! Your .python-version file couldn't be read because it's using - remote: ! an unsupported file encoding: + remote: ! an unsupported text encoding: remote: ! Unicode text, UTF-8 (with BOM) text, with CRLF line terminators remote: ! remote: ! Configure your editor to save files as UTF-8, without a BOM, remote: ! then delete and recreate the file using the correct encoding. remote: ! remote: ! If that doesn't work, make sure you don't have a .gitattributes - remote: ! file that's overriding the file encoding. + remote: ! file that's overriding the text encoding. remote: ! remote: ! Note: On Windows, if you pipe or redirect output to a file remote: ! it can result in the file being encoded in UTF-16 LE when