From f09db3bbe1446824b038c888aa937493126249cd Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Mon, 1 Dec 2025 17:20:12 +0000 Subject: [PATCH 01/14] Implement run-time Python interpreter searching There is currently an assumption that the Python interpreter used while building .pex files (i.e. the value of `DefaultInterpreter`) is also the Python interpreter that runs them. This can be overridden with the `shebang` parameter on individual `python_binary` and `python_test` targets, which sets the shebang in the generated .pex file to a specific value. The functionality this provides is limited: - If additional interpreter arguments are specified by the target, the generated .pex file will depend on a shell at `/bin/sh` in the run-time environment (which isn't guaranteed to exist, e.g. in distroless containers), as well as `/usr/bin/env` (also not guaranteed to exist). - It is possible to build the .pex file using a Python interpreter defined as a build target in the repo, but it isn't possible to run the .pex file using that interpreter because the .pex file's shebang is (by definition) static, but the interpreter's path relative to the .pex file's path varies depending on the context (e.g. whether the .pex file exists inside a build environment vs outside of one). This was discussed in #247. Provide more flexibility in how .pex files are run by prepending a native-code preamble that implements dynamic searching of Python interpreters. The behaviour of this preamble is controlled at run time by a special zip member within the .pex file, `.bootstrap/PLZ_PREAMBLE_CONFIG`, which contains a list of candidate paths for Python interpreters under which the .pex file should be executed and a list of arguments to pass to the interpreter when attempting to execute it. The lists can be customised per `python_binary` or `python_test` target via the `runtime_interpreters` and `runtime_interpreter_args` parameters respectively; new plugin configuration options, `DefaultRuntimeInterpreters` and `DefaultRuntimeInterpreterArgs`, provide default values. If both are unspecified, the plugin's current behaviour is maintained (i.e. the same interpreter is used to both build and run the code). The flexibility offered by the new .pex preamble is sufficient to be able to remove `python_binary` and `python_test`'s `shebang` parameter (and the `DefaultShebang` plugin configuration option) altogether. The new .pex preamble is compact enough to prepend to all .pex files in a repo without collectively making them prohibitively large: ~98KB statically linked with musl on Linux, ~83KB dynamically linked on Darwin (which doesn't have a stable kernel ABI, and strongly discourages static linking). Among other possibilities, this allows for the creation of .pex files that are truly portable between run-time environments of the same operating system and architecture - e.g., the preamble can be configured to attempt to execute an interpreter that only exists within a production environment (such as a distroless container with its interpreter at `/python/bin/python3`), falling back on an interpreter defined as a build target in the repo if the .pex file is still located within Please's development/testing environment. --- .github/workflows/plugin_test.yaml | 26 +- .plzconfig | 56 +++- build_defs/python.build_defs | 122 ++++++-- plugins/BUILD | 2 +- test/BUILD | 2 +- test/interpreter_options_repo/BUILD_FILE | 8 +- test/strip_source_test.py | 2 +- third_party/cc/cjson/BUILD | 7 + third_party/cc/cjson/cjson.build | 18 ++ third_party/cc/kuba_zip/BUILD | 7 + third_party/cc/kuba_zip/kuba_zip.build | 24 ++ third_party/cc/log_c/BUILD | 7 + third_party/cc/log_c/log_c.build | 11 + third_party/cc/openbsd/BUILD | 25 ++ third_party/go/BUILD | 2 +- tools/please_pex/BUILD | 1 + tools/please_pex/pex/BUILD | 14 +- tools/please_pex/pex/pex.go | 68 +++-- tools/please_pex/pex_main.go | 46 +-- tools/please_pex/preamble/BUILD | 33 ++ tools/please_pex/preamble/path.c | 234 +++++++++++++++ tools/please_pex/preamble/path.h | 9 + tools/please_pex/preamble/pexerror.c | 98 ++++++ tools/please_pex/preamble/pexerror.h | 42 +++ tools/please_pex/preamble/preamble.c | 366 +++++++++++++++++++++++ tools/please_pex/preamble/preamble.go | 67 +++++ tools/please_pex/preamble/util.h | 47 +++ tools/please_pex/zip/writer.go | 19 +- 28 files changed, 1252 insertions(+), 111 deletions(-) create mode 100644 third_party/cc/cjson/BUILD create mode 100644 third_party/cc/cjson/cjson.build create mode 100644 third_party/cc/kuba_zip/BUILD create mode 100644 third_party/cc/kuba_zip/kuba_zip.build create mode 100644 third_party/cc/log_c/BUILD create mode 100644 third_party/cc/log_c/log_c.build create mode 100644 third_party/cc/openbsd/BUILD create mode 100644 tools/please_pex/preamble/BUILD create mode 100644 tools/please_pex/preamble/path.c create mode 100644 tools/please_pex/preamble/path.h create mode 100644 tools/please_pex/preamble/pexerror.c create mode 100644 tools/please_pex/preamble/pexerror.h create mode 100644 tools/please_pex/preamble/preamble.c create mode 100644 tools/please_pex/preamble/preamble.go create mode 100644 tools/please_pex/preamble/util.h diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 4d1b1418..d4877e10 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -28,6 +28,8 @@ jobs: runs-on: ${{ (inputs.platform == 'freebsd_amd64' || inputs.platform == 'linux_amd64') && 'ubuntu-24.04' || (inputs.platform == 'linux_arm64' && 'ubuntu-24.04-arm' || (inputs.platform == 'darwin_amd64' && 'macos-15-intel' || (inputs.platform == 'darwin_arm64' && 'macos-15' || 'unknown'))) }} steps: - name: Install Python in CI environment + # actions/setup-python doesn't support FreeBSD - instead, interpreters for the supported Python versions are + # pre-installed in the FreeBSD runner image. if: ${{ inputs.platform != 'freebsd_amd64' }} id: python uses: actions/setup-python@v6 @@ -36,14 +38,11 @@ jobs: update-environment: false - name: Check out code uses: actions/checkout@v5 - - name: Configure plugin's default Python interpreter - run: | - _python_path="${{ inputs.platform == 'freebsd_amd64' && format('python{0}', inputs.python) || steps.python.outputs.python-path }}" - echo "PLZ_ARGS=-o plugin.python.defaultinterpreter:$_python_path" >> $GITHUB_ENV - name: Configure plugin to use please_pex tool built from source if: inputs.please_pex_from_repo - run: | - echo "PLZ_ARGS=${PLZ_ARGS:+$PLZ_ARGS }-o plugin.python.pextool://tools/please_pex" >> $GITHUB_ENV + # PexTool can't be set to //tools/please_pex for e2e tests because it can't be built inside the e2e test + # environment - skip the e2e tests if we're using //tools/please_pex as the pex tool. + run: echo "PLZ_ARGS=${PLZ_ARGS:+$PLZ_ARGS }-o plugin.python.pextool://tools/please_pex -e plz_e2e_test" >> $GITHUB_ENV - name: Run tests (in nested runner) if: ${{ inputs.platform == 'freebsd_amd64' }} uses: cross-platform-actions/action@v0.30.0 @@ -55,11 +54,22 @@ jobs: shell: bash environment_variables: PLZ_ARGS shutdown_vm: true - run: ./pleasew test --keep_going --log_file plz-out/log/test.log ${{ inputs.test_targets }} + run: | + _external_interpreter="python${{ inputs.python }}" + _in_repo_interpreter="//third_party/cc/cpython:cpython_${{ inputs.python }}|python" + echo "*** Running tests using external Python interpreter: $_external_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_external_interpreter" test --rerun --keep_going --log_file plz-out/log/test-external_interpreter.log ${{ inputs.test_targets }} + echo "*** Running tests using in-repo Python interpreter: $_in_repo_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_in_repo_interpreter" test --rerun --keep_going --log_file plz-out/log/test-in_repo_interpreter.log ${{ inputs.test_targets }} - name: Run tests (on host) if: ${{ inputs.platform != 'freebsd_amd64' }} run: | - ./pleasew test --keep_going --log_file plz-out/log/test.log ${{ inputs.test_targets }} + _external_interpreter="${{ steps.python.outputs.python-path }}" + _in_repo_interpreter="//third_party/cc/cpython:cpython_${{ inputs.python }}|python" + echo "*** Running tests using external Python interpreter: $_external_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_external_interpreter" test --rerun --keep_going --log_file plz-out/log/test-external_interpreter.log ${{ inputs.test_targets }} + echo "*** Running tests using in-repo Python interpreter: $_in_repo_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_in_repo_interpreter" test --rerun --keep_going --log_file plz-out/log/test-in_repo_interpreter.log ${{ inputs.test_targets }} - name: Archive logs uses: actions/upload-artifact@v4 with: diff --git a/.plzconfig b/.plzconfig index 4d24c805..b12da7e7 100644 --- a/.plzconfig +++ b/.plzconfig @@ -1,11 +1,31 @@ [Please] -Version = >=17.21.0 +Version = >=17.25.1 [Build] hashcheckers = sha256 [Plugin "cc"] Target = //plugins:cc +LdGarbageCollection = true + +DefaultOptCFlags = -std=c99 +DefaultOptCFlags = -Os +DefaultOptCFlags = -DNDEBUG +DefaultOptCFlags = -Wall +DefaultOptCFlags = -Wextra +DefaultOptCFlags = -Werror +DefaultOptCFlags = -flto +DefaultOptCFlags = -D_LARGEFILE64_SOURCE +DefaultOptCFlags = -D_FILE_OFFSET_BITS=64 + +DefaultDbgCFlags = -std=c99 +DefaultDbgCFlags = -g3 +DefaultDbgCFlags = -DDEBUG +DefaultDbgCFlags = -Wall +DefaultDbgCFlags = -Wextra +DefaultDbgCFlags = -Werror +DefaultDbgCFlags = -D_LARGEFILE64_SOURCE +DefaultDbgCFlags = -D_FILE_OFFSET_BITS=64 [Plugin "go"] Target = //plugins:go @@ -21,6 +41,7 @@ Target = //plugins:shell [Plugin "e2e"] Target = //plugins:e2e +PleaseVersion = 17.25.1 DefaultPlugin = //test:python-rules [PluginDefinition] @@ -30,12 +51,27 @@ name = python ConfigKey = DefaultInterpreter DefaultValue = python3 Inherit = true -Help = The Python interpreter to use for any case where one is not explicitly specified +Help = The default Python interpreter to use when building .pex files. -[PluginConfig "default_shebang"] +[PluginConfig "interpreter_options"] +ConfigKey = InterpreterOptions +Optional = true +Repeatable = true +Help = A list of additional arguments to pass to the Python interpreter when building .pex files. + +[PluginConfig "default_runtime_interpreters"] +ConfigKey = DefaultRuntimeInterpreters +Optional = true +Repeatable = true Inherit = true +Help = A list of possible paths to Python interpreters with which to run .pex files. + +[PluginConfig "default_runtime_interpreter_args"] +ConfigKey = DefaultRuntimeInterpreterArgs Optional = true -Help = The shebang to set on binaries / tests where one is not explicitly specified +Repeatable = true +Inherit = true +Help = A list of additional arguments to pass to the Python interpreter when running .pex files. [PluginConfig "pex_tool"] ConfigKey = PexTool @@ -43,12 +79,6 @@ DefaultValue = //tools:please_pex Inherit = true Help = A path or build label for the pex tool, which is used to create .pex files from python sources -[PluginConfig "interpreter_options"] -ConfigKey = InterpreterOptions -Optional = true -Repeatable = true -Help = Additional command line arguments to pass to the Python interpreter. - [PluginConfig "test_runner"] ConfigKey = TestRunner DefaultValue = unittest @@ -146,6 +176,12 @@ DefaultValue = "info" Help = Passes verbosity level to wheel_tool, for example: wheel_tool --verbosity info Optional = true +[PluginConfig "default_preamble_verbosity"] +ConfigKey = DefaultPreambleVerbosity +DefaultValue = error +Inherit = true +Help = The default logging level for the .pex preamble. Must be one of trace, debug, info, warn, error or fatal. + [PluginConfig "feature_flags"] DefaultValue = "" Repeatable = true diff --git a/build_defs/python.build_defs b/build_defs/python.build_defs index e41da18e..a394f9be 100644 --- a/build_defs/python.build_defs +++ b/build_defs/python.build_defs @@ -94,8 +94,8 @@ def python_library(name:str, srcs:list=[], resources:list=[], deps:list=[], visi def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=None, deps:list=[], data:list=None, visibility:list=None, test_only:bool=False, zip_safe:bool=None, site:bool=False, strip:bool=False, interpreter:str=CONFIG.PYTHON.DEFAULT_INTERPRETER, - shebang:str=CONFIG.PYTHON.DEFAULT_SHEBANG, module_dir:str=CONFIG.PYTHON.MODULE_DIR, - labels:list&features&tags=[]): + runtime_interpreters:list=None, runtime_interpreter_args:list=None, + module_dir:str=CONFIG.PYTHON.MODULE_DIR, labels:list&features&tags=[]): """Generates a Python binary target. This compiles all source files together into a single .pex file which can @@ -123,11 +123,19 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N strip (bool): Strips source code from the output .pex file, leaving just bytecode. site (bool): Allows the Python interpreter to import site; conversely if False, it will be started with the -S flag to avoid importing site. - interpreter (str): The Python interpreter to use. Defaults to the value of the - Python.DefaultInterpreter setting. - shebang (str): Exact shebang to apply to the generated file. Defaults to the value - of the python.defaultshebang setting, if not set we will - determine something appropriate for the given interpreter. + interpreter (str): The path to the Python interpreter with which to compile the Python source + files. Defaults to the value of the DefaultInterpreter plugin configuration + option. + runtime_interpreters (list): A list of possible paths to Python interpreters with which to run + the generated .pex file. At run time, the leftmost element containing + a path to an executable file will be chosen. If unspecified, the + value of the DefaultRuntimeInterpreters plugin configuration option + will be used; if that is unspecified, the value of the interpreter + parameter will be used (meaning that the interpreter used to compile + the sources must exist at the same path in the run-time environment). + runtime_interpreter_args (list): A list of additional arguments to pass to the Python interpreter + when running the generated .pex file. Defaults to the value of + the DefaultRuntimeInterpreterArgs plugin configuration option. module_dir (str): The path to the third party python directory in python import path format. Defaults to value of python.moduledir setting. labels (list): Labels to apply to this rule. @@ -145,11 +153,14 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N test_only=test_only, ) - shebang = shebang or _interpreter_cmd(interpreter) + interpreter_opts, interpreter_deps = _runtime_interpreter(interpreter, runtime_interpreters, runtime_interpreter_args) zipsafe_flag = '' if zip_safe is False else '--zip_safe' - interpreter_opts = _interpreter_opts(CONFIG.PYTHON.INTERPRETER_OPTIONS) - cmd = '$TOOLS_PEX -s "%s" -m "%s" %s %s' % ( - shebang, module_dir, zipsafe_flag, interpreter_opts) + cmd = '$TOOLS_PEX --preamble_verbosity="%s" -m "%s" %s %s' % ( + CONFIG.PYTHON.DEFAULT_PREAMBLE_VERBOSITY, + module_dir, + zipsafe_flag, + interpreter_opts, + ) # If content hashing feature flag is enabled, we use the hash of the built # dependencies instead of the RULE_HASH as the base of the pex extraction @@ -180,13 +191,14 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N output_is_complete=True, tools={ 'interpreter': [interpreter], + 'runtime_interpreters': interpreter_deps, 'pex': [CONFIG.PYTHON.PEX_TOOL], }, test_only=test_only, ) # This rule concatenates the .pex with all the other precompiled zip files from dependent rules. - cmd = '$TOOL z -i . -s .pex.zip -s .whl --preamble_from="$SRC" --include_other --add_init_py --strict' + cmd = '$TOOL z --include "$SRC" -i . -s .pex.zip -s .whl --preamble_from "$SRC" --include_other --add_init_py --strict' if strip: cmd += ' --strip_py' @@ -195,6 +207,7 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N return build_rule( name=name, srcs=[pex_rule], + runtime_deps=interpreter_deps, deps=[lib_rule], outs=[out or (name + '.pex')], data=data, @@ -205,7 +218,7 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N output_is_complete=True, building_description="Creating pex...", visibility=visibility, - requires=['py', interpreter], + requires=['py'], tools=[CONFIG.ARCAT_TOOL], # This makes the python_library rule the dependency for other python_library or # python_test rules that try to import it. Does mean that they cannot collect a .pex @@ -223,8 +236,8 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: labels:list&features&tags=[], size:str=None, flags:str='', visibility:list=None, sandbox:bool=None, timeout:int=0, flaky:bool|int=0, env:dict=None, test_outputs:list=None, zip_safe:bool=None, interpreter:str=CONFIG.PYTHON.DEFAULT_INTERPRETER, - site:bool=False, test_runner:str=None, shebang:str=CONFIG.PYTHON.DEFAULT_SHEBANG, - module_dir:str=CONFIG.PYTHON.MODULE_DIR): + runtime_interpreters:list=None, runtime_interpreter_args:list=None, + site:bool=False, test_runner:str=None, module_dir:str=CONFIG.PYTHON.MODULE_DIR): """Generates a Python test target. This works very similarly to python_binary; it is also a single .pex file @@ -257,11 +270,19 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: If set to explicitly True or False, the output will be marked appropriately; by default it will be safe unless any of the transitive dependencies are themselves marked as not zip-safe. - interpreter (str): The Python interpreter to use. Defaults to the value of the - Python.DefaultInterpreter setting. - shebang (str): Exact shebang to apply to the generated file. Defaults to the value - of the python.defaultshebang setting, if not set we will - determine something appropriate for the given interpreter. + interpreter (str): The path to the Python interpreter with which to compile the Python source + files. Defaults to the value of the DefaultInterpreter plugin configuration + option. + runtime_interpreters (list): A list of possible paths to Python interpreters with which to run + the generated .pex file. At run time, the leftmost element containing + a path to an executable file will be chosen. If unspecified, the + value of the DefaultRuntimeInterpreters plugin configuration option + will be used; if that is unspecified, the value of the interpreter + parameter will be used (meaning that the interpreter used to compile + the sources must exist at the same path in the run-time environment). + runtime_interpreter_args (list): A list of additional arguments to pass to the Python interpreter + when running the generated .pex file. Defaults to the value of + the DefaultRuntimeInterpreterArgs plugin configuration option. module_dir (str): The path to the third party python directory in python import path format. Defaults to value of python.moduledir setting. site (bool): Allows the Python interpreter to import site; conversely if False, it will be @@ -270,9 +291,13 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: `unittest`, `pytest`, or a custom test runner entry point. """ test_runner = test_runner or CONFIG.PYTHON.TEST_RUNNER - shebang = shebang or _interpreter_cmd(interpreter) - interpreter_opts = _interpreter_opts(CONFIG.PYTHON.INTERPRETER_OPTIONS) - cmd = f'$TOOLS_PEX -t -s "{shebang}" -m "{module_dir}" -r "{test_runner}" --zip_safe --add_test_runner_deps {interpreter_opts} --stamp="$RULE_HASH"' + interpreter_opts, interpreter_deps = _runtime_interpreter(interpreter, runtime_interpreters, runtime_interpreter_args) + cmd = '$TOOLS_PEX --preamble_verbosity="%s" -t -m "%s" -r "%s" --zip_safe --add_test_runner_deps %s --stamp="$RULE_HASH"' % ( + CONFIG.PYTHON.DEFAULT_PREAMBLE_VERBOSITY, + module_dir, + test_runner, + interpreter_opts, + ) if site: cmd += ' -S' @@ -306,6 +331,7 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: deps=deps, tools={ 'interpreter': [interpreter], + 'runtime_interpreters': interpreter_deps, 'pex': [CONFIG.PYTHON.PEX_TOOL], }, labels = labels, @@ -326,7 +352,7 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: deps = [pex_rule, lib_rule] - test_cmd = f'$TOOL $TEST {flags}' if site else f'$TOOL -S $TEST {flags}' + test_cmd = f"$TEST {flags}" worker_cmd = f'$(worker {worker})' if worker else "" if worker_cmd: test_cmd = f'{worker_cmd} && {test_cmd} ' @@ -349,11 +375,12 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: return build_rule( name=name, srcs=[pex_rule], + runtime_deps=interpreter_deps, deps=deps, data=data, outs=[f'{name}.pex'], labels=labels + ['test_results_dir'], - cmd='$TOOL z -i . -s .pex.zip -s .whl --preamble_from="$SRC" --include_other --add_init_py --strict', + cmd='$TOOL z --include "$SRC" -i . -s .pex.zip -s .whl --preamble_from "$SRC" --include_other --add_init_py --strict', test_cmd=test_cmd, debug_cmd=debug_cmd, needs_transitive_deps=True, @@ -368,9 +395,8 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: flaky=flaky, env=env, test_outputs=test_outputs, - requires=['py', 'test', interpreter], + requires=['py', 'test'], tools=[CONFIG.ARCAT_TOOL], - test_tools = [interpreter], ) @@ -744,12 +770,44 @@ def _wheel_entrypoint_binary(name:str, entrypoint:str, lib_rule, visibility, tes visibility = visibility, ) -def _interpreter_cmd(interpreter:str): - return f'$(out {interpreter})' if looks_like_build_label(interpreter) else interpreter - -def _interpreter_opts(opts:list): - return " ".join([f'--interpreter_options="{o}"' for o in opts]) +def _runtime_interpreter(build_interp:str, run_interps:list, run_args:list): + """Returns a string of please_pex options defining the run-time Python interpreter paths and + arguments to pass to the interpreter during invocation (falling back to default values where + necessary), and a list containing the names of the subset of those interpreters that are defined + as build targets. + """ + opts = [] + targets = [] + for i in (run_interps or CONFIG.PYTHON.DEFAULT_RUNTIME_INTERPRETERS or [build_interp]): + if looks_like_build_label(i): + i_target = i.rpartition("|")[0] if "|" in i else i + # The location of a Python interpreter defined as a build target within the repo + # relative to the .pex output of a python_binary/python_test target that lists it as a + # run-time dependency varies depending on the context. If the target is running within a + # build environment (e.g. under `plz build` or `plz test`, either directly or as a + # dependency of some other target), Please will also copy the interpreter into the build + # directory if it is listed as a run-time dependency of the target. If the target is + # running outside a build environment (e.g. under `plz run`), Please will build the + # interpreter target, outputting it to plz-out/bin/. Relative to each of these paths, + # the interpreter will exist at the path given by passing its target name to the + # $(location) built-in function. However, this context is a run-time property of the + # .pex file; by prepending the interpreter path with "$PLZ_BIN_PATH/", the preamble will + # detect the context and substitute an appropriate path in place of $PLZ_BIN_PATH. + opts += [f"--interpreters='$PLZ_BIN_PATH/$(location {i})'"] + # If the target name ends with an entry point name, strip it - runtime_deps won't accept + # target names containing entry point names (which isn't really a problem for us, + # because the entire target will have to be built for the entry point to be provided + # anyway). + if canonicalise(i_target) != i: + targets += [i_target] + else: + opts += [f"--interpreters='{i}'"] + opts += [ + f"--interpreter_args='{a}'" + for a in (run_args or CONFIG.PYTHON.DEFAULT_RUNTIME_INTERPRETER_ARGS or []) + ] + return " ".join(opts), targets def _patch_cmd(patch): diff --git a/plugins/BUILD b/plugins/BUILD index a0c714c2..5f173fe9 100644 --- a/plugins/BUILD +++ b/plugins/BUILD @@ -1,6 +1,6 @@ plugin_repo( name = "cc", - revision = "v0.7.0", + revision = "v0.7.1", ) plugin_repo( diff --git a/test/BUILD b/test/BUILD index 56de60d2..dfdc610f 100644 --- a/test/BUILD +++ b/test/BUILD @@ -23,8 +23,8 @@ e2e_test_plugin( "//build_defs:archs", "//build_defs:python", "//build_defs:version", - "//tools:please_pex", "//:plzconfig", + CONFIG.PYTHON.PEX_TOOL, ], visibility = [ "//test/...", diff --git a/test/interpreter_options_repo/BUILD_FILE b/test/interpreter_options_repo/BUILD_FILE index 94913ca1..611bf3e3 100644 --- a/test/interpreter_options_repo/BUILD_FILE +++ b/test/interpreter_options_repo/BUILD_FILE @@ -1,11 +1,11 @@ subinclude("///python//build_defs:python") package( - # If InterpreterOptions is being honoured, the Python interpreter should execute plzmodule.py - # as the __main__ module instead of main.py, causing "Executed plzmodule.py" to be printed on - # stdout rather than "Executed binary.py". + # If DefaultRuntimeInterpreterArgs is being honoured, the Python interpreter should execute + # plzmodule.py as the __main__ module instead of main.py, causing "Executed plzmodule.py" to be + # printed on stdout rather than "Executed binary.py". python = { - "interpreter_options": ["-m", "plzmodule"], + "default_runtime_interpreter_args": ["-m", "plzmodule"], }, ) diff --git a/test/strip_source_test.py b/test/strip_source_test.py index c6df1d99..7b67229e 100644 --- a/test/strip_source_test.py +++ b/test/strip_source_test.py @@ -13,7 +13,7 @@ def setUp(self): def test_can_run_binary(self): """Test that the dependent binary can be run successfully.""" - subprocess.check_call([os.environ["TOOLS"], self.filename]) + subprocess.check_call([self.filename]) def test_does_not_have_py_file(self): """Test that the binary doesn't have the source in it.""" diff --git a/third_party/cc/cjson/BUILD b/third_party/cc/cjson/BUILD new file mode 100644 index 00000000..8205d91d --- /dev/null +++ b/third_party/cc/cjson/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "cjson", + build_file = "cjson.build", + hashes = ["83fb7750db0601dca735868b8fb1da1318da2d2a1331a9a7da923cb891d26ea9"], + repo = "DaveGamble/cJSON", + revision = "v1.7.19", +) diff --git a/third_party/cc/cjson/cjson.build b/third_party/cc/cjson/cjson.build new file mode 100644 index 00000000..1505c429 --- /dev/null +++ b/third_party/cc/cjson/cjson.build @@ -0,0 +1,18 @@ +subinclude("///cc//build_defs:c") + +package( + cc = { + "default_dbg_cflags": CONFIG.CC.DEFAULT_DBG_CFLAGS + ["-std=c89"], + "default_opt_cflags": CONFIG.CC.DEFAULT_OPT_CFLAGS + ["-std=c89"], + }, +) + +c_library( + name = "cjson", + srcs = ["cJSON.c"], + hdrs = ["cJSON.h"], + includes = ["."], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/kuba_zip/BUILD b/third_party/cc/kuba_zip/BUILD new file mode 100644 index 00000000..ae99e0e7 --- /dev/null +++ b/third_party/cc/kuba_zip/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "kuba_zip", + build_file = "kuba_zip.build", + hashes = ["39961a0598a2f62272eb237df8923077a0508e6ec89fdb82bd500706b579daf3"], + repo = "kuba--/zip", + revision = "v0.3.5", +) diff --git a/third_party/cc/kuba_zip/kuba_zip.build b/third_party/cc/kuba_zip/kuba_zip.build new file mode 100644 index 00000000..f7ff97cc --- /dev/null +++ b/third_party/cc/kuba_zip/kuba_zip.build @@ -0,0 +1,24 @@ +subinclude("///cc//build_defs:c") + +package( + cc = { + "default_dbg_cflags": CONFIG.CC.DEFAULT_DBG_CFLAGS + ["-std=gnu99"], + "default_opt_cflags": CONFIG.CC.DEFAULT_OPT_CFLAGS + ["-std=gnu99"], + }, +) + +c_library( + name = "kuba_zip", + srcs = ["src/zip.c"], + hdrs = ["src/zip.h"], + defines = [ + # Disable some unnecessary miniz features to save space. + "MINIZ_NO_TIME", + "MINIZ_NO_ZLIB_COMPATIBLE_NAMES", + ], + includes = ["src"], + private_hdrs = ["src/miniz.h"], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/log_c/BUILD b/third_party/cc/log_c/BUILD new file mode 100644 index 00000000..cb64f1c3 --- /dev/null +++ b/third_party/cc/log_c/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "log_c", + build_file = "log_c.build", + hashes = ["a1972da6886cfa3abf30b1bc8dbf2f67e4d219dbadcea91b96e1a979dab6267d"], + repo = "chrisnovakovic/log.c", + revision = "20d9411165ed1072229f982e2259b515e0301814", +) diff --git a/third_party/cc/log_c/log_c.build b/third_party/cc/log_c/log_c.build new file mode 100644 index 00000000..35166d6c --- /dev/null +++ b/third_party/cc/log_c/log_c.build @@ -0,0 +1,11 @@ +subinclude("///cc//build_defs:c") + +c_library( + name = "log_c", + srcs = ["src/log.c"], + hdrs = ["src/log.h"], + includes = ["src"], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/openbsd/BUILD b/third_party/cc/openbsd/BUILD new file mode 100644 index 00000000..ec5e3e2c --- /dev/null +++ b/third_party/cc/openbsd/BUILD @@ -0,0 +1,25 @@ +subinclude("///cc//build_defs:c") + +remote_file( + name = "queue_h_upstream", + out = "queue_upstream.h", + url = "https://raw.githubusercontent.com/openbsd/src/f9d861d10df22eff555a4387e0151a3215209fd1/sys/sys/queue.h", +) + +genrule( + name = "queue_h_patched", + srcs = [":queue_h_upstream"], + outs = ["queue.h"], + # On OpenBSD, sys/_null.h defines NULL if it isn't already defined. NULL is widely defined on the platforms we + # support, so strip out this include to save ourselves another dependency. + cmd = "grep -vFx '#include ' $SRCS > $OUTS", +) + +c_library( + name = "queue", + hdrs = [":queue_h_patched"], + includes = ["."], + visibility = [ + "//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/go/BUILD b/third_party/go/BUILD index 7e8c7d34..0be24b3b 100644 --- a/third_party/go/BUILD +++ b/third_party/go/BUILD @@ -59,7 +59,7 @@ go_repo( go_repo( module = "golang.org/x/sys", - version = "v0.20.0", + version = "v0.26.0", ) go_repo( diff --git a/tools/please_pex/BUILD b/tools/please_pex/BUILD index d641609b..14831086 100644 --- a/tools/please_pex/BUILD +++ b/tools/please_pex/BUILD @@ -14,6 +14,7 @@ go_binary( "///third_party/go/github.com_peterebden_go-cli-init_v5//logging", "///third_party/go/gopkg.in_op_go-logging.v1//:go-logging.v1", "//tools/please_pex/pex", + "//tools/please_pex/preamble:preamble_go", ], ) diff --git a/tools/please_pex/pex/BUILD b/tools/please_pex/pex/BUILD index 2eb95343..bdc776e4 100644 --- a/tools/please_pex/pex/BUILD +++ b/tools/please_pex/pex/BUILD @@ -3,9 +3,19 @@ subinclude("///go//build_defs:go") go_library( name = "pex", srcs = ["pex.go"], - resources = glob(["**/*.py"]), - visibility = ["//tools/please_pex:all"], + resources = glob(["**/*.py"]) + [":preamble"], + visibility = [ + "//tools/please_pex:all", + ], deps = [ + "//tools/please_pex/preamble:preamble_go", "//tools/please_pex/zip", ], ) + +genrule( + name = "preamble", + srcs = ["//tools/please_pex/preamble"], + outs = ["preamble"], + cmd = "cp $SRCS $OUTS", +) diff --git a/tools/please_pex/pex/pex.go b/tools/please_pex/pex/pex.go index 1bb8e4d4..2dfc63db 100644 --- a/tools/please_pex/pex/pex.go +++ b/tools/please_pex/pex/pex.go @@ -7,13 +7,16 @@ package pex import ( "bytes" "embed" + "encoding/json" "fmt" + "io/fs" "log" "os" "path" "path/filepath" "strings" + "github.com/please-build/python-rules/tools/please_pex/preamble" "github.com/please-build/python-rules/tools/please_pex/zip" ) @@ -30,13 +33,13 @@ const ( //go:embed *.py //go:embed test_runners/*.py //go:embed debuggers/*.py +//go:embed preamble var files embed.FS // A Writer implements writing a .pex file in various steps. type Writer struct { + preambleConfig *preamble.Config zipSafe bool - noSite bool - shebang string realEntryPoint string pexStamp string testSrcs []string @@ -47,36 +50,28 @@ type Writer struct { } // NewWriter constructs a new Writer. -func NewWriter(entryPoint, interpreter string, options []string, stamp string, zipSafe, noSite bool) *Writer { +func NewWriter(entryPoint string, interpreters []string, interpreterArgs []string, stamp string, zipSafe, noSite bool) *Writer { + if interpreterArgs == nil { + interpreterArgs = make([]string, 0) + } pw := &Writer{ + preambleConfig: &preamble.Config{ + Interpreters: interpreters, + InterpreterArgs: interpreterArgs, + }, zipSafe: zipSafe, - noSite: noSite, realEntryPoint: toPythonPath(entryPoint), pexStamp: stamp, } - pw.SetShebang(interpreter, options) + if noSite { + pw.preambleConfig.InterpreterArgs = append(pw.preambleConfig.InterpreterArgs, "-S") + } return pw } -// SetShebang sets the leading shebang that will be written to the file. -func (pw *Writer) SetShebang(shebang string, options []string) { - shebang = strings.TrimSpace(fmt.Sprintf("%s %s", shebang, strings.Join(options, " "))) - if !path.IsAbs(shebang) { - shebang = "/usr/bin/env " + shebang - } - if pw.noSite { - // In many environments shebangs cannot have more than one argument; we can work around - // that by treating it as a shell script. - if strings.Contains(shebang, " ") { - shebang = "#!/bin/sh\nexec " + shebang + ` -S $0 "$@"` - } else { - shebang += " -S" - } - } - if !strings.HasPrefix(shebang, "#") { - shebang = "#!" + shebang - } - pw.shebang = shebang + "\n" +// SetPreambleVerbosity sets the preamble's default minimum logging level. +func (pw *Writer) SetPreambleVerbosity(verbosity preamble.Verbosity) { + pw.preambleConfig.Verbosity = verbosity } // SetTest sets this Writer to write tests using the given sources. @@ -162,10 +157,22 @@ func (pw *Writer) Write(out, moduleDir string) error { f := zip.NewFile(out, true) defer f.Close() - // Write preamble (i.e. the shebang that makes it executable) - if err := f.WritePreamble([]byte(pw.shebang)); err != nil { + // Write preamble (i.e. the binary that makes the .pex executable) + nf := mustOpen("preamble") + defer nf.Close() + if err := f.WritePreambleFile(nf); err != nil { return err } + + // Write preamble configuration file + preambleConfig, err := json.Marshal(&pw.preambleConfig) + if err != nil { + return fmt.Errorf("marshal preamble configuration: %w", err) + } + if err := f.WriteFile(preamble.ConfigPath, preambleConfig, 0644); err != nil { + return fmt.Errorf("write preamble configuration: %w", err) + } + // Non-zip-safe pexes need portalocker if !pw.zipSafe { pw.includeLibs = append(pw.includeLibs, ".bootstrap/portalocker") @@ -229,6 +236,15 @@ func toPythonPath(p string) string { return strings.ReplaceAll(p[:len(p)-len(ext)], "/", ".") } +// mustOpen opens the given file from the embedded set. It dies on error. +func mustOpen(filename string) fs.File { + f, err := files.Open(filename) + if err != nil { + panic(err) + } + return f +} + // mustRead reads the given file from the embedded set. It dies on error. func mustRead(filename string) []byte { b, err := files.ReadFile(filename) diff --git a/tools/please_pex/pex_main.go b/tools/please_pex/pex_main.go index f5f86594..a2f49088 100644 --- a/tools/please_pex/pex_main.go +++ b/tools/please_pex/pex_main.go @@ -7,27 +7,28 @@ import ( cli "github.com/peterebden/go-cli-init/v5/flags" l "github.com/peterebden/go-cli-init/v5/logging" "github.com/please-build/python-rules/tools/please_pex/pex" + "github.com/please-build/python-rules/tools/please_pex/preamble" ) var log = logging.MustGetLogger("please_pex") var opts = struct { - Usage string - Verbosity l.Verbosity `short:"v" long:"verbosity" default:"warning" description:"Verbosity of output (higher number = more output)"` - Out string `short:"o" long:"out" env:"OUT" description:"Output file"` - EntryPoint string `short:"e" long:"entry_point" env:"SRC" description:"Entry point to pex file"` - ModuleDir string `short:"m" long:"module_dir" description:"Python module dir to implicitly load modules from"` - TestSrcs []string `long:"test_srcs" env:"SRCS" env-delim:" " description:"Test source files"` - Interpreter string `short:"i" long:"interpreter" env:"TOOLS_INTERPRETER" description:"Python interpreter to use"` - TestRunner string `short:"r" long:"test_runner" default:"unittest" description:"Test runner to use"` - Shebang string `short:"s" long:"shebang" description:"Explicitly set shebang to this"` - Stamp string `long:"stamp" description:"Unique value used to derive cache directory for pex"` - InterpreterOptions []string `long:"interpreter_options" description:"Additional command line arguments to pass to the Python interpreter"` - Test bool `short:"t" long:"test" description:"True if we're to build a test"` - Debug pex.Debugger `short:"d" long:"debug" optional:"true" optional-value:"pdb" choice:"pdb" choice:"debugpy" description:"Debugger to generate a debugging pex"` - Site bool `short:"S" long:"site" description:"Allow the pex to import site at startup"` - ZipSafe bool `long:"zip_safe" description:"Marks this pex as zip-safe"` - AddTestRunnerDeps bool `long:"add_test_runner_deps" description:"True if test-runner dependencies should be baked into test binaries"` + Usage string + Verbosity l.Verbosity `short:"v" long:"verbosity" default:"warning" description:"Verbosity of output (higher number = more output)"` + Out string `short:"o" long:"out" env:"OUT" description:"Output file"` + EntryPoint string `short:"e" long:"entry_point" env:"SRC" description:"Entry point to pex file"` + ModuleDir string `short:"m" long:"module_dir" description:"Python module dir to implicitly load modules from"` + TestSrcs []string `long:"test_srcs" env:"SRCS" env-delim:" " description:"Test source files"` + PreambleVerbosity preamble.Verbosity `long:"preamble_verbosity" optional:"true" optional-value:"level" choice:"trace" choice:"debug" choice:"info" choice:"warn" choice:"error" choice:"fatal" description:"The preamble's default minimum logging level"` + Interpreters []string `short:"i" long:"interpreters" required:"true" description:"Python interpreters to attempt to invoke at run-time"` + InterpreterArgs []string `long:"interpreter_args" description:"Additional command line arguments to pass to the Python interpreter"` + TestRunner string `short:"r" long:"test_runner" default:"unittest" description:"Test runner to use"` + Stamp string `long:"stamp" description:"Unique value used to derive cache directory for pex"` + Test bool `short:"t" long:"test" description:"True if we're to build a test"` + Debug pex.Debugger `short:"d" long:"debug" optional:"true" optional-value:"pdb" choice:"pdb" choice:"debugpy" description:"Debugger to generate a debugging pex"` + Site bool `short:"S" long:"site" description:"Allow the pex to import site at startup"` + ZipSafe bool `long:"zip_safe" description:"Marks this pex as zip-safe"` + AddTestRunnerDeps bool `long:"add_test_runner_deps" description:"True if test-runner dependencies should be baked into test binaries"` }{ Usage: ` please_pex is a tool to create .pex files for Python. @@ -42,10 +43,15 @@ func main() { cli.ParseFlagsOrDie("please_pex", &opts, nil) l.InitLogging(opts.Verbosity) w := pex.NewWriter( - opts.EntryPoint, opts.Interpreter, opts.InterpreterOptions, opts.Stamp, - opts.ZipSafe, !opts.Site) - if opts.Shebang != "" { - w.SetShebang(opts.Shebang, opts.InterpreterOptions) + opts.EntryPoint, + opts.Interpreters, + opts.InterpreterArgs, + opts.Stamp, + opts.ZipSafe, + !opts.Site, + ) + if opts.PreambleVerbosity != "" { + w.SetPreambleVerbosity(opts.PreambleVerbosity) } if opts.Test { w.SetTest(opts.TestSrcs, opts.TestRunner, opts.AddTestRunnerDeps) diff --git a/tools/please_pex/preamble/BUILD b/tools/please_pex/preamble/BUILD new file mode 100644 index 00000000..7541045e --- /dev/null +++ b/tools/please_pex/preamble/BUILD @@ -0,0 +1,33 @@ +subinclude( + "///cc//build_defs:c", + "///go//build_defs:go", +) + +c_binary( + name = "preamble", + srcs = glob(["*.c"]), + hdrs = glob(["*.h"]), + compiler_flags = ["-std=c11"], + defines = [ + "_XOPEN_SOURCE=600", + ], + static = True, + visibility = [ + "//tools/please_pex/pex:preamble", + ], + deps = [ + "///third_party/cc/cjson/cjson//:cjson", + "///third_party/cc/kuba_zip/kuba_zip//:kuba_zip", + "///third_party/cc/log_c/log_c//:log_c", + "//third_party/cc/openbsd:queue", + ], +) + +go_library( + name = "preamble_go", + srcs = glob(["*.go"]), + visibility = [ + "//tools/please_pex/pex", + "//tools/please_pex:pex_main", + ], +) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c new file mode 100644 index 00000000..9d04d1a2 --- /dev/null +++ b/tools/please_pex/preamble/path.c @@ -0,0 +1,234 @@ +#include +#include + +#if defined(__linux__) +#include +#include +#elif defined(__APPLE__) +#include +#elif defined(__FreeBSD__) +#include +#include +#endif + +#include "log.h" +#include "path.h" +#include "pexerror.h" +#include "util.h" + +#ifdef __linux__ + +/* + * get_path_max returns the maximum length of a relative path name when the given path is the + * current working directory (although the path need not actually be a directory, or even exist). + */ +static int get_path_max(char *path) { + int path_max = pathconf(path, _PC_PATH_MAX); + + if (path_max <= 0) { + // PATH_MAX may be defined in , but this is not a POSIX requirement. If it isn't + // defined, fall back to 4096 (as recommended by Linux's realpath(3) man page). +#ifdef PATH_MAX + path_max = PATH_MAX; +#else + path_max = 4096; +#endif + } + + return path_max; +} + +/* + * get_link_path resolves the symbolic link at the path lpath and stores the link's destination path + * in rpath. It returns NULL on success and an error on failure. + */ +static err_t *get_link_path(char *lpath, char **rpath) { + struct stat sb = { 0 }; + int slen = 0; + int rlen = 0; + err_t *err = NULL; + + // Get the length of lpath's destination path so we can allocate a buffer of that length for + // readlink to write to (plus one byte, so we can determine whether readlink has truncated the + // path it writes, and also for the trailing null we'll append to the path afterwards). If lpath + // is a magic symlink (in which case sb.st_size is 0), assume the destination path is PATH_MAX + // bytes long - it's an overestimate, but at least the buffer will be large enough for readlink + // to safely write to. + if (lstat(lpath, &sb) == -1) { + err = err_from_errno("lstat"); + goto end; + } + slen = (sb.st_size == 0 ? get_path_max(lpath) : sb.st_size) + 1; + + MALLOC(*rpath, char *, slen); + + if ((rlen = readlink(lpath, (*rpath), slen)) == -1) { + err = err_from_errno("readlink"); + goto end; + } + // If readlink filled the buffer, it truncated the destination path it wrote. In this case, the + // value is untrustworthy, so we're better off not using it. + if (slen == rlen) { + err = err_from_str("readlink truncated destination path"); + goto end; + } + + // Otherwise, add the trailing null to the destination path that readlink omitted. + (*rpath)[rlen] = 0; + +end: + if (err != NULL) { + FREE(*rpath); + } + + return err; +} + +#endif /* __linux__ */ + +/* + * get_pex_dir stores the path to the .pex file in pex_dir. It returns NULL on success and an error + * on failure. + */ +err_t *get_pex_dir(char **pex_dir) { + char *exe_path = NULL; + char *exe_dir = NULL; + err_t *err = NULL; + +#if defined(__linux__) + if ((err = get_link_path("/proc/self/exe", &exe_path)) != NULL) { + err = err_wrap("get_link_path", err); + goto end; + } +#elif defined(__APPLE__) + uint32_t len = 0; + + // Call _NSGetExecutablePath once to find out how long the executable path is, then again after + // we've allocated that much memory to store it. + _NSGetExecutablePath(NULL, &len); + MALLOC(exe_path, char, len); + if (_NSGetExecutablePath(exe_path, &len) != 0) { + err = err_from_str("_NSGetExecutablePath failure"); + goto end; + } +#elif defined(__FreeBSD__) + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + size_t len = 0; + + // Call sysctl once to find out how long the executable path is, then again after we've + // allocated that much memory to store it. + if (sysctl(mib, NELEMS(mib), NULL, &len, NULL, 0) == -1) { + err = err_from_errno("sysctl lookup"); + goto end; + } + MALLOC(exe_path, char, len); + if (sysctl(mib, NELEMS(mib), exe_path, &len, NULL, 0) == -1) { + err = err_from_errno("sysctl write"); + goto end; + } +#else +#error "Unsupported operating system" +#endif + + exe_dir = dirname(exe_path); + if (((*pex_dir) = strdup(exe_dir)) == NULL) { + err = err_from_errno("strdup"); + goto end; + } + +end: +#ifdef __APPLE__ + FREE(exe_path); +#endif + + return err; +} + +/* + * get_plz_bin_path stores the path to the directory containing Please's binary outputs in path. It + * returns NULL on success and an error on failure. + * + * If the .pex file is running from within a Please build environment (which is assumed if PLZ_ENV + * is defined in the environment), get_plz_bin_path returns the path to the build environment's root + * directory (which is assumed to be the value of TMP_DIR in the environment, as created by Please). + * Otherwise, get_plz_bin_path returns the path to the bin/ subdirectory within the plz-out/ directory + * to which this .pex file belongs. If the .pex file does not exist within a plz-out/ directory + * tree, get_plz_bin_path fails. + */ +err_t *get_plz_bin_path(char **path) { + char *pex_dir = NULL; + char *tmp_dir = NULL; + char *pex_realpath = NULL; + char *tmp_realpath = NULL; + size_t pex_len = 0; + size_t tmp_len = 0; + err_t *err = NULL; + + if ((err = get_pex_dir(&pex_dir)) != NULL) { + err = err_wrap("get_pex_dir", err); + goto end; + } + + if (getenv("PLZ_ENV") != NULL) { + if ((tmp_dir = getenv("TMP_DIR")) == NULL) { + // There are two circumstances under which this might happen: either if this is a Please + // build environment but TMP_DIR was unset before the .pex file was executed, or if this + // isn't a Please build environment and PLZ_ENV just happened to be set for some other + // reason. The latter is more likely than the former (many build definitions rely on + // TMP_DIR being defined, which would lead to much more breakage besides this function + // failing) - therefore, optimise for the latter case. + log_warn("PLZ_ENV is defined but TMP_DIR is not; assuming this is not a Please build environment"); + goto no_plz_env; + } + + // Identify whether the .pex file exists inside the build environment. + if ((pex_realpath = realpath(pex_dir, NULL)) == NULL) { + err = err_from_errno("realpath pex_dir"); + goto end; + } + if ((tmp_realpath = realpath(tmp_dir, NULL)) == NULL) { + err = err_from_errno("realpath tmp_dir"); + goto end; + } + + pex_len = strlen(pex_realpath); + tmp_len = strlen(tmp_realpath); + + if ( + strncmp(tmp_realpath, pex_realpath, tmp_len) == 0 && + ( + (pex_len == tmp_len) || + (pex_len - tmp_len >= 1 && pex_realpath[tmp_len] == '/') + ) + ) { + if (((*path) = strdup(tmp_realpath)) == NULL) { + err = err_from_errno("strdup"); + } + goto end; + } + } + +no_plz_env: + // Try searching the .pex file path component-wise for the last instance of "plz-out". If one is + // found, assume the binary outputs are in the bin/ subdirectory within that directory. There's + // no need to check now whether bin/ actually exists - if it doesn't, the call to execvp() later + // will fail anyway. + while (!STREQ(basename(pex_dir), "plz-out") && !STREQ(pex_dir, "/") && !STREQ(pex_dir, ".")) { + pex_dir = dirname(pex_dir); + } + if (STREQ(pex_dir, "/") || STREQ(pex_dir, ".")) { + // If we reached the left-most component in the path, the .pex file doesn't exist within a + // plz-out/ directory tree. + goto end; + } + MALLOC(*path, char, strlen(pex_dir) + strlen("/bin") + 1); + if (sprintf(*path, "%s/bin", pex_dir) < 0) { + err = err_from_errno("sprintf path"); + } + +end: + FREE(pex_realpath); + FREE(tmp_realpath); + + return err; +} diff --git a/tools/please_pex/preamble/path.h b/tools/please_pex/preamble/path.h new file mode 100644 index 00000000..560b6af9 --- /dev/null +++ b/tools/please_pex/preamble/path.h @@ -0,0 +1,9 @@ +#ifndef __PATH_H__ + +#define __PATH_H__ + +#include "pexerror.h" + +err_t *get_plz_bin_path(char **path); + +#endif /* !__PATH_H__ */ diff --git a/tools/please_pex/preamble/pexerror.c b/tools/please_pex/preamble/pexerror.c new file mode 100644 index 00000000..31157c52 --- /dev/null +++ b/tools/please_pex/preamble/pexerror.c @@ -0,0 +1,98 @@ +/* + * pexerror.c implements a simple error reporting mechanism. + * + * Errors are singly-linked lists (of type err_t) whose nodes (of type errmsg_t) are strings + * containing diagnostic messages. This allows for the formation of a Go-like error chain, with + * diagnostic messages becoming progressively more specific as the tail of the list is approached. + */ + +#include +#include +#include +#include + +#include "pexerror.h" +#include "util.h" + +/* + * err_from_str returns an error from the given diagnostic message. + */ +err_t *err_from_str(char *msg) { + err_t *err = NULL; + + MALLOC(err, err_t, 1); + *err = (err_t){ 0 }; + SLIST_INIT(err); + + return err_wrap(msg, err); +} + +/* + * err_from_errno returns an error from the given diagnostic message that wraps another error + * whose diagnotic message is derived from the current value of errno. + * + * This function is used to report the failure of a system call or library function. Because errno + * doesn't indicate which system call or library function encountered the error, the diagnostic + * message passed in msg typically includes the name of the system call or library function. + * + * Note that err_from_errno doesn't check that the value of errno is significant; it is therefore + * the responsibility of the caller to ensure that a system call or library function that sets + * errno has actually returned a value indicating failure before calling err_from_errno. + */ +err_t *err_from_errno(char *msg) { + err_t *err = err_from_str(strerror(errno)); + + return err_wrap(msg, err); +} + +/* + * err_wrap creates an error from the given diagnostic message, using it to wrap the given error, + * and then returns the newly-created error. + */ +err_t *err_wrap(char *msg, err_t *wrapped) { + errmsg_t *first = SLIST_FIRST(wrapped); + errmsg_t *errmsg = NULL; + size_t len = strlen(msg); + + if (wrapped == NULL) { + fprintf(stderr, "err_wrap called with null value for wrapped"); + exit(EX_SOFTWARE); + } + + MALLOC(errmsg, errmsg_t, 1); + errmsg->msg = msg; + errmsg->len = len; + errmsg->flen = len + (first == NULL ? 0 : 2 + first->flen); // 2 extra bytes for ": " separator + + SLIST_INSERT_HEAD(wrapped, errmsg, wrapped); + + return wrapped; +} + +/* + * err_str extracts the diagnostic messages from the given error chain and formats them as a + * null-terminated string. Messages are included in the returned string in the order in which they + * appear in the error chain, with successive errors delimited by ": ". + */ +char *err_str(err_t *err) { + errmsg_t *errmsg = NULL; + char *catmsg = NULL; + size_t i = 0; + + errmsg = SLIST_FIRST(err); + MALLOC(catmsg, char, errmsg->flen + 1); // 1 extra byte for trailing null + + SLIST_FOREACH(errmsg, err, wrapped) { + strncpy(catmsg + i, errmsg->msg, errmsg->len); + i += errmsg->len; + // Don't append ": " to the final error in the list. + if (errmsg->len != errmsg->flen) { + strcpy(catmsg + i, ": "); + i += 2; + } + } + + catmsg[i] = 0; + + return catmsg; +} diff --git a/tools/please_pex/preamble/pexerror.h b/tools/please_pex/preamble/pexerror.h new file mode 100644 index 00000000..ddcc9af1 --- /dev/null +++ b/tools/please_pex/preamble/pexerror.h @@ -0,0 +1,42 @@ +#ifndef __PEXERROR_H__ + +#define __PEXERROR_H__ + +#include "queue.h" + +typedef struct errmsg_t { + /* + * len is the length of the diagnostic message stored in msg. It excludes the trailing null. + */ + size_t len; + + /* + * flen is the expected length of the error chain with this error at its head when it is + * formatted by err_str. It is the length of the diagnostic message in this error plus the + * lengths of the diagnostic messages stored in each of the errors wrapped by this error, with + * an additional two bytes per wrapped error to account for the ": " separator that err_str + * inserts between diagnostic messages. + */ + size_t flen; + + /* + * msg is a diagnostic message describing this error. + */ + char *msg; + + /* + * wrapped is an error that is wrapped by this error. + */ + SLIST_ENTRY(errmsg_t) wrapped; +} errmsg_t; + +SLIST_HEAD(err_t, errmsg_t); + +typedef struct err_t err_t; + +err_t *err_from_str(char *msg); +err_t *err_from_errno(char *msg); +err_t *err_wrap(char *msg, err_t *wrapped); +char *err_str(err_t *err); + +#endif /* __PEXERROR_H__ */ diff --git a/tools/please_pex/preamble/preamble.c b/tools/please_pex/preamble/preamble.c new file mode 100644 index 00000000..548091cb --- /dev/null +++ b/tools/please_pex/preamble/preamble.c @@ -0,0 +1,366 @@ +#include +#include + +#include "cJSON.h" +#include "log.h" +#include "path.h" +#include "pexerror.h" +#include "queue.h" +#include "util.h" +#include "zip.h" + +#define JSON_STRING(x) (cJSON_IsString(x) && (x)->valuestring != NULL) +#define JSON_STRLEN(x) strlen((x)->valuestring) + +#define PREAMBLE_CONFIG_PATH ".bootstrap/PLZ_PREAMBLE_CONFIG" + +/* + * init_log configures log.c to log initially at error level and to include the current date and + * time (but not source location) in log messages. + */ +static void init_log() { + // In the early stages of the preamble, default to LOG_ERROR (although everything will be fatal + // until we're in a position to call set_log_verbosity anyway, so this is more of a formality). + log_set_level(LOG_ERROR); + + log_set_time_format("%Y-%m-%d %H:%M:%S"); +} + +/* + * parse_log_level returns the log.c logging level represented by the given string, or -1 if the + * given string does not represent a known logging level. It is the inverse of the string_log_level + * function. + */ +static int parse_log_level(char *level) { + if (level == NULL) { + return -1; + } else if (STREQ(level, "trace")) { + return LOG_TRACE; + } else if (STREQ(level, "debug")) { + return LOG_DEBUG; + } else if (STREQ(level, "info")) { + return LOG_INFO; + } else if (STREQ(level, "warn")) { + return LOG_WARN; + } else if (STREQ(level, "error")) { + return LOG_ERROR; + } else if (STREQ(level, "fatal")) { + return LOG_FATAL; + } else { + return -1; + } +} + +/* + * string_log_level returns a stringified representation of the given log.c logging level. It is + * equivalent to log.c's log_level_string function, except it returns lower-case text. + */ +static char *string_log_level(int level) { + if (level == LOG_TRACE) { + return "trace"; + } else if (level == LOG_DEBUG) { + return "debug"; + } else if (level == LOG_INFO) { + return "info"; + } else if (level == LOG_WARN) { + return "warn"; + } else if (level == LOG_ERROR) { + return "error"; + } else if (level == LOG_FATAL) { + return "fatal"; + } + return ""; +} + +/* + * set_log_verbosity sets log.c's logging level based on the value of the PLZ_PEX_PREAMBLE_VERBOSITY + * environment variable, falling back to the default logging level set in the .pex preamble + * configuration if PLZ_PEX_PREAMBLE_VERBOSITY does not have a recognised value (and falling back + * further to "error" if the .pex preamble configuration also does not contain a recognised value). + */ +static void set_log_verbosity(const cJSON *config) { + char *env_verbosity = getenv("PLZ_PEX_PREAMBLE_VERBOSITY"); + int level = parse_log_level(env_verbosity); + const cJSON *json_verbosity = NULL; + + if (level == -1) { + json_verbosity = cJSON_GetObjectItemCaseSensitive(config, "verbosity"); + if (!JSON_STRING(json_verbosity) || (level = parse_log_level(json_verbosity->valuestring)) == -1) { + level = LOG_ERROR; + } + + if (env_verbosity != NULL) { + log_error("Unknown logging level '%s'; defaulting to '%s'", env_verbosity, string_log_level(level)); + } + } + + log_set_level(level); +} + +/* + * get_config reads and parses the .pex preamble configuration, a JSON-encoded file at + * .bootstrap/PLZ_PREAMBLE_CONFIG in the zip archive, and returns it as a cJSON struct. It returns + * NULL on failure. + */ +static const cJSON *get_config(char *pex_path) { + struct zip_t *zip = NULL; + int ziperr = 0; + char *configbuf = NULL; + size_t configsize = 0; + const cJSON *config = NULL; + + zip = zip_openwitherror(pex_path, 0, 'r', &ziperr); + if (ziperr < 0) { + log_fatal("Failed to open .pex as zip file: %s", zip_strerror(ziperr)); + goto end; + } + + if ((ziperr = zip_entry_open(zip, PREAMBLE_CONFIG_PATH)) < 0) { + log_fatal("Failed to open .pex preamble configuration: %s", zip_strerror(ziperr)); + goto end; + } + + if ((ziperr = (int)zip_entry_read(zip, (void **)&configbuf, &configsize)) < 0) { + log_fatal("Failed to read .pex preamble configuration: %s", zip_strerror(ziperr)); + goto end; + } + + zip_entry_close(zip); + + zip_close(zip); + + if ((config = cJSON_ParseWithLength(configbuf, configsize)) == NULL) { + log_fatal("Failed to parse .pex preamble configuration: JSON syntax error near '%s'", cJSON_GetErrorPtr()); + goto end; + } + +end: + FREE(configbuf); + + return config; +} + +/* + * get_interpreter_args extracts the array of interpreter command line arguments from the value of + * "interpreter_args" in the given .pex preamble configuration and stores them in the same order in + * the tail queue given by args; it also stores the length of this queue in len. It returns NULL on + * success and an error on failure. + */ +static err_t *get_interpreter_args(const cJSON *config, int *len, strlist_t **args) { + const cJSON *json_args = NULL; + const cJSON *json_arg = NULL; + strlist_elem_t *arg = NULL; + err_t *err = NULL; + + STAILQ_NEW(*args, strlist_t); + + json_args = cJSON_GetObjectItemCaseSensitive(config, "interpreter_args"); + if (json_args == NULL || cJSON_IsNull(json_args)) { + log_debug("interpreter_args is null; no arguments added"); + goto end; + } else if (!cJSON_IsArray(json_args)) { + err = err_from_str("interpreter_args must be an array or null"); + goto end; + } + + *len = cJSON_GetArraySize(json_args); + if (*len == 0) { + log_debug("interpreter_args is empty; no arguments added"); + goto end; + } + + cJSON_ArrayForEach(json_arg, json_args) { + if (!JSON_STRING(json_arg)) { + err = err_from_str("elements in interpreter_args must be strings"); + goto end; + } + + STAILQ_ENTRY_NEW(arg, strlist_elem_t); + MALLOC(arg->str, char, JSON_STRLEN(json_arg) + 1); + strcpy(arg->str, json_arg->valuestring); + STAILQ_INSERT_TAIL(*args, arg, next); + log_debug("Added argument from interpreter_args: %s", arg->str); + } + +end: + if (err != NULL) { + if (arg != NULL) { + FREE(arg->str); + FREE(arg); + } + + if (*args != NULL) { + while (!STAILQ_EMPTY(*args)) { + arg = STAILQ_FIRST(*args); + STAILQ_REMOVE_HEAD(*args, next); + FREE(arg->str); + FREE(arg); + } + } + } + + return err; +} + +/* + * get_interpreters extracts the array of interpreter paths from the value of "interpreters" in the + * given .pex preamble configuration and stores them in the same order in the tail queue given by + * interps. It returns NULL on success and an error on failure. + */ +static err_t *get_interpreters(const cJSON *config, strlist_t **interps) { + strlist_elem_t *interp = NULL; + char *plz_bin_path = NULL; + const cJSON *json_interps = NULL; + const cJSON *json_interp = NULL; + err_t *err = NULL; + + json_interps = cJSON_GetObjectItemCaseSensitive(config, "interpreters"); + if (!cJSON_IsArray(json_interps)) { + err = err_from_str("interpreters must be an array"); + goto end; + } + + if (cJSON_GetArraySize(json_interps) == 0) { + err = err_from_str("interpreters must not be empty"); + goto end; + } + + STAILQ_NEW(*interps, strlist_t); + + cJSON_ArrayForEach(json_interp, json_interps) { + if (!JSON_STRING(json_interp)) { + err = err_from_str("elements in interpreters must be strings"); + goto end; + } + + STAILQ_ENTRY_NEW(interp, strlist_elem_t); + + if (STRPREFIX(json_interp->valuestring, "$PLZ_BIN_PATH/")) { + if (plz_bin_path == NULL) { + if ((err = get_plz_bin_path(&plz_bin_path)) != NULL) { + err = err_wrap("PLZ_BIN_PATH resolution failure", err); + goto end; + } + log_debug("Resolved $PLZ_BIN_PATH to %s", plz_bin_path); + } + + // Replace "$PLZ_BIN_PATH" with the path to Please's binary outputs directory at the + // start of the interpreter path. + MALLOC(interp->str, char, strlen(plz_bin_path) + JSON_STRLEN(json_interp) - strlen("$PLZ_BIN_PATH") + 1); // 1 extra byte for trailing null + strncpy(interp->str, plz_bin_path, strlen(plz_bin_path)); + strcpy(interp->str + strlen(plz_bin_path), json_interp->valuestring + strlen("$PLZ_BIN_PATH")); + } else { + MALLOC(interp->str, char, JSON_STRLEN(json_interp) + 1); // 1 extra byte for trailing null + strcpy(interp->str, json_interp->valuestring); + } + + STAILQ_INSERT_TAIL(*interps, interp, next); + + log_debug("Added interpreter to search path: %s", interp->str); + } + +end: + FREE(plz_bin_path); + + if (err != NULL) { + if (interp != NULL) { + FREE(interp->str); + FREE(interp); + } + + if (*interps != NULL) { + while (!STAILQ_EMPTY(*interps)) { + interp = STAILQ_FIRST(*interps); + STAILQ_REMOVE_HEAD(*interps, next); + FREE(interp->str); + FREE(interp); + } + } + } + + return err; +} + +int main(int argc, char **argv) { + err_t *err = NULL; + const cJSON *config = NULL; + int config_args_len = 0; + strlist_t *config_args = NULL; + strlist_elem_t *config_arg = NULL; + strlist_t *interps = NULL; + strlist_elem_t *interp = NULL; + char **interp_argv = NULL; + int i = 0; + + init_log(); + + if (argc == 0) { + log_fatal("Failed to execute .pex preamble: argv is empty"); + return 1; + } + + if ((config = get_config(argv[0])) == NULL) { + return 1; + } + + set_log_verbosity(config); + + if ((err = get_interpreter_args(config, &config_args_len, &config_args)) != NULL) { + log_fatal("Failed to get interpreter arguments from .pex preamble configuration: %s", err_str(err)); + return 1; + } + + if ((err = get_interpreters(config, &interps)) != NULL) { + log_fatal("Failed to get interpreters from .pex preamble configuration: %s", err_str(err)); + return 1; + } + + // interp_argv is the array of arguments that will be passed to execvp(): + // + // { + // interpreter, + // config_args[0], + // # ... + // config_args[n], + // argv[0], + // # ... + // argv[n], + // NULL + // } + MALLOC(interp_argv, char *, 1 + config_args_len + argc + 1); + + // The first element is the Python interpreter path, which is substituted in during each + // execvp() invocation. The remaining elements are static for each execvp() invocation: + // - the interpreter arguments from the .pex preamble configuration (if any); + STAILQ_FOREACH(config_arg, config_args, next) { + interp_argv[1 + i] = config_arg->str; + i++; + } + // - the name of the currently-executing program (which is also the .pex file that the + // Python interpreter needs to execute) and the command line arguments given to this + // process (if any); + for (i = 0; i < argc; i++) { + interp_argv[1 + config_args_len + i] = argv[i]; + } + // - NULL (the terminator for execvp()). + interp_argv[1 + config_args_len + argc] = NULL; + + STAILQ_FOREACH(interp, interps, next) { + interp_argv[0] = interp->str; + + log_debug("Attempting to execute interpreter %s", interp_argv[0]); + + execvp(interp_argv[0], interp_argv); + if (errno == ENOENT) { + // ENOENT isn't necessarily an error case (there are legitimate reasons for any given + // interpreter not to exist), hence logging this at info level rather than error. + log_info("%s does not exist or could not be executed", interp_argv[0]); + } else { + log_error("Failed to execute %s: %s", interp_argv[0], strerror(errno)); + } + } + + log_fatal("Failed to execute any interpreters in the interpreter search path"); + + return 1; +} diff --git a/tools/please_pex/preamble/preamble.go b/tools/please_pex/preamble/preamble.go new file mode 100644 index 00000000..846cb306 --- /dev/null +++ b/tools/please_pex/preamble/preamble.go @@ -0,0 +1,67 @@ +// Package preamble is the Go interface to the .pex preamble format. +package preamble + +import ( + "fmt" + "strings" +) + +// ConfigPath is the zip file member within the .pex archive containing the .pex preamble +// configuration. It is typically the first member of the archive (for performance reasons), but +// does not necessarily have to be. +const ConfigPath = ".bootstrap/PLZ_PREAMBLE_CONFIG" + +// Verbosity represents a logging level supported by the .pex preamble. +type Verbosity string + +const ( + LogTrace Verbosity = "trace" + LogDebug = "debug" + LogInfo = "info" + LogWarn = "warn" + LogError = "error" + LogFatal = "fatal" +) + +// UnmarshalFlag parses a string representation of a logging level and returns its corresponding +// constant. +func (v *Verbosity) UnmarshalFlag(in string) error { + switch strings.ToLower(in) { + case "trace": + *v = LogTrace + return nil + case "debug": + *v = LogDebug + return nil + case "info": + *v = LogInfo + return nil + case "warn": + *v = LogWarn + return nil + case "error": + *v = LogError + return nil + case "fatal": + *v = LogFatal + return nil + } + return fmt.Errorf("invalid preamble logging level '%s'", in) +} + +// Config represents a .pex preamble configuration. +type Config struct { + // Verbosity controls the preamble's minimum log level. It may be overridden at run time by the + // value of the PLZ_PEX_PREAMBLE_VERBOSITY environment variable. + Verbosity Verbosity `json:"verbosity,omitempty"` + + // Interpreters is a list of relative or absolute paths to Python interpreters that the preamble + // should attempt to invoke in the order in which they are given. The preamble duplicates the + // actions of the shell when attempting to locate an interpreter with a relative path that does not + // contain a forward slash. + Interpreters []string `json:"interpreters"` + + // InterpreterArgs is a list of command line arguments that the preamble should pass to the Python + // interpreters when attempting to invoke them. + InterpreterArgs []string `json:"interpreter_args,omitempty"` +} diff --git a/tools/please_pex/preamble/util.h b/tools/please_pex/preamble/util.h new file mode 100644 index 00000000..5bb2f93d --- /dev/null +++ b/tools/please_pex/preamble/util.h @@ -0,0 +1,47 @@ +#ifndef __UTIL_H__ + +#define __UTIL_H__ + +#include +#include +#include +#include + +#define STREQ(x, y) (strcmp((x), (y)) == 0) +#define STRPREFIX(s, pre) (strncmp(pre, s, strlen(pre)) == 0) + +#define NELEMS(x) ((int)(sizeof(x) / sizeof((x)[0]))) + +#define MALLOC(ptr, type, size) \ + if (((ptr) = malloc(sizeof(type) * (size))) == NULL) { \ + perror("malloc"); \ + exit(EX_OSERR); \ + } + +#define FREE(x) if ((x) != NULL) { free(x); (x) = NULL; } + +/* + * STAILQ_NEW allocates memory for and initialises a singly-linked tail queue. + */ +#define STAILQ_NEW(headname, type) \ + MALLOC(headname, type, 1); \ + (*headname) = (type){ 0 }; \ + STAILQ_INIT(headname); + +/* + * STAILQ_ENTRY_NEW allocates memory for and initialises an entry in a singly-linked tail queue. + */ +#define STAILQ_ENTRY_NEW(var, type) \ + MALLOC(var, type, 1); \ + (*var) = (type){ 0 }; + +typedef struct strlist_elem_t { + char *str; + STAILQ_ENTRY(strlist_elem_t) next; +} strlist_elem_t; + +STAILQ_HEAD(strlist_t, strlist_elem_t); + +typedef struct strlist_t strlist_t; + +#endif /* !__UTIL_H__ */ diff --git a/tools/please_pex/zip/writer.go b/tools/please_pex/zip/writer.go index 6949fa67..ed024822 100644 --- a/tools/please_pex/zip/writer.go +++ b/tools/please_pex/zip/writer.go @@ -7,6 +7,7 @@ import ( "encoding/binary" "fmt" "io" + "io/fs" "os" "path" "path/filepath" @@ -28,7 +29,7 @@ const modTimeDOS = 10785 type File struct { f io.WriteCloser w *zip.Writer - preambleLength int + preambleLength int64 filename string input string // Include and Exclude are prefixes of filenames to include or exclude from the zipfile. @@ -415,14 +416,22 @@ func (f *File) WriteDir(filename string) error { return nil } -// WritePreamble writes a preamble to the zipfile. -func (f *File) WritePreamble(preamble []byte) error { - f.preambleLength += len(preamble) - f.w.SetOffset(int64(f.preambleLength)) +// WritePreambleBytes writes a byte slice as a preamble to the zipfile. +func (f *File) WritePreambleBytes(preamble []byte) error { + f.preambleLength += int64(len(preamble)) + f.w.SetOffset(f.preambleLength) _, err := f.f.Write(preamble) return err } +// WritePreambleFile writes a file as a preamble to the zipfile. +func (f *File) WritePreambleFile(preamble fs.File) error { + length, err := io.Copy(f.f, preamble) + f.preambleLength += length + f.w.SetOffset(f.preambleLength) + return err +} + // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file. // This is important so our output is deterministic. func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error { From d6dfcf89e9dcbaf19ebc6a7cfb0bfb459acd7d6f Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 2 Dec 2025 22:17:55 +0000 Subject: [PATCH 02/14] Put `defines` on a single line --- tools/please_pex/preamble/BUILD | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/please_pex/preamble/BUILD b/tools/please_pex/preamble/BUILD index 7541045e..162c44a3 100644 --- a/tools/please_pex/preamble/BUILD +++ b/tools/please_pex/preamble/BUILD @@ -8,9 +8,7 @@ c_binary( srcs = glob(["*.c"]), hdrs = glob(["*.h"]), compiler_flags = ["-std=c11"], - defines = [ - "_XOPEN_SOURCE=600", - ], + defines = ["_XOPEN_SOURCE=600"], static = True, visibility = [ "//tools/please_pex/pex:preamble", From 9b2381a9cc78c9d85eff2ad9b9443ab2cce8a2d6 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 2 Dec 2025 22:22:52 +0000 Subject: [PATCH 03/14] More sensible var name for preamble file --- tools/please_pex/pex/pex.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/please_pex/pex/pex.go b/tools/please_pex/pex/pex.go index 2dfc63db..6bfb6b1d 100644 --- a/tools/please_pex/pex/pex.go +++ b/tools/please_pex/pex/pex.go @@ -158,9 +158,9 @@ func (pw *Writer) Write(out, moduleDir string) error { defer f.Close() // Write preamble (i.e. the binary that makes the .pex executable) - nf := mustOpen("preamble") - defer nf.Close() - if err := f.WritePreambleFile(nf); err != nil { + preambleFile := mustOpen("preamble") + defer preambleFile.Close() + if err := f.WritePreambleFile(preambleFile); err != nil { return err } From 40ff77ccc3f5ce6cce5a3b94a4de248c41a65091 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 2 Dec 2025 22:58:04 +0000 Subject: [PATCH 04/14] Always free `exe_path` --- tools/please_pex/preamble/path.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index 9d04d1a2..e065f575 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -137,9 +137,7 @@ err_t *get_pex_dir(char **pex_dir) { } end: -#ifdef __APPLE__ FREE(exe_path); -#endif return err; } From 74f023ade2193787a27e3087006bcc33ebee9a65 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 2 Dec 2025 22:59:43 +0000 Subject: [PATCH 05/14] `_dir_realpath` var names --- tools/please_pex/preamble/path.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index e065f575..84e8a043 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -156,8 +156,8 @@ err_t *get_pex_dir(char **pex_dir) { err_t *get_plz_bin_path(char **path) { char *pex_dir = NULL; char *tmp_dir = NULL; - char *pex_realpath = NULL; - char *tmp_realpath = NULL; + char *pex_dir_realpath = NULL; + char *tmp_dir_realpath = NULL; size_t pex_len = 0; size_t tmp_len = 0; err_t *err = NULL; @@ -180,26 +180,26 @@ err_t *get_plz_bin_path(char **path) { } // Identify whether the .pex file exists inside the build environment. - if ((pex_realpath = realpath(pex_dir, NULL)) == NULL) { + if ((pex_dir_realpath = realpath(pex_dir, NULL)) == NULL) { err = err_from_errno("realpath pex_dir"); goto end; } - if ((tmp_realpath = realpath(tmp_dir, NULL)) == NULL) { + if ((tmp_dir_realpath = realpath(tmp_dir, NULL)) == NULL) { err = err_from_errno("realpath tmp_dir"); goto end; } - pex_len = strlen(pex_realpath); - tmp_len = strlen(tmp_realpath); + pex_len = strlen(pex_dir_realpath); + tmp_len = strlen(tmp_dir_realpath); if ( - strncmp(tmp_realpath, pex_realpath, tmp_len) == 0 && + strncmp(tmp_dir_realpath, pex_dir_realpath, tmp_len) == 0 && ( (pex_len == tmp_len) || - (pex_len - tmp_len >= 1 && pex_realpath[tmp_len] == '/') + (pex_len - tmp_len >= 1 && pex_dir_realpath[tmp_len] == '/') ) ) { - if (((*path) = strdup(tmp_realpath)) == NULL) { + if (((*path) = strdup(tmp_dir_realpath)) == NULL) { err = err_from_errno("strdup"); } goto end; @@ -225,8 +225,8 @@ err_t *get_plz_bin_path(char **path) { } end: - FREE(pex_realpath); - FREE(tmp_realpath); + FREE(pex_dir_realpath); + FREE(tmp_dir_realpath); return err; } From 15741e77233f840d15f3f1401ab57b56b0984a28 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Tue, 2 Dec 2025 23:05:13 +0000 Subject: [PATCH 06/14] Simplify length comparisons --- tools/please_pex/preamble/path.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index 84e8a043..5efd83e7 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -196,7 +196,7 @@ err_t *get_plz_bin_path(char **path) { strncmp(tmp_dir_realpath, pex_dir_realpath, tmp_len) == 0 && ( (pex_len == tmp_len) || - (pex_len - tmp_len >= 1 && pex_dir_realpath[tmp_len] == '/') + (pex_len > tmp_len && pex_dir_realpath[tmp_len] == '/') ) ) { if (((*path) = strdup(tmp_dir_realpath)) == NULL) { From d5f21d7499a3c608c59dabf616dc13a35edac9b9 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 00:28:44 +0000 Subject: [PATCH 07/14] Use `err_t` and `const` more --- tools/please_pex/preamble/pexerror.c | 6 +++--- tools/please_pex/preamble/pexerror.h | 8 ++++---- tools/please_pex/preamble/preamble.c | 27 +++++++++++++++------------ 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/tools/please_pex/preamble/pexerror.c b/tools/please_pex/preamble/pexerror.c index 31157c52..c0a1aa4f 100644 --- a/tools/please_pex/preamble/pexerror.c +++ b/tools/please_pex/preamble/pexerror.c @@ -17,7 +17,7 @@ /* * err_from_str returns an error from the given diagnostic message. */ -err_t *err_from_str(char *msg) { +err_t *err_from_str(const char *msg) { err_t *err = NULL; MALLOC(err, err_t, 1); @@ -39,7 +39,7 @@ err_t *err_from_str(char *msg) { * the responsibility of the caller to ensure that a system call or library function that sets * errno has actually returned a value indicating failure before calling err_from_errno. */ -err_t *err_from_errno(char *msg) { +err_t *err_from_errno(const char *msg) { err_t *err = err_from_str(strerror(errno)); return err_wrap(msg, err); @@ -49,7 +49,7 @@ err_t *err_from_errno(char *msg) { * err_wrap creates an error from the given diagnostic message, using it to wrap the given error, * and then returns the newly-created error. */ -err_t *err_wrap(char *msg, err_t *wrapped) { +err_t *err_wrap(const char *msg, err_t *wrapped) { errmsg_t *first = SLIST_FIRST(wrapped); errmsg_t *errmsg = NULL; size_t len = strlen(msg); diff --git a/tools/please_pex/preamble/pexerror.h b/tools/please_pex/preamble/pexerror.h index ddcc9af1..306d0afa 100644 --- a/tools/please_pex/preamble/pexerror.h +++ b/tools/please_pex/preamble/pexerror.h @@ -22,7 +22,7 @@ typedef struct errmsg_t { /* * msg is a diagnostic message describing this error. */ - char *msg; + const char *msg; /* * wrapped is an error that is wrapped by this error. @@ -34,9 +34,9 @@ SLIST_HEAD(err_t, errmsg_t); typedef struct err_t err_t; -err_t *err_from_str(char *msg); -err_t *err_from_errno(char *msg); -err_t *err_wrap(char *msg, err_t *wrapped); +err_t *err_from_str(const char *msg); +err_t *err_from_errno(const char *msg); +err_t *err_wrap(const char *msg, err_t *wrapped); char *err_str(err_t *err); #endif /* __PEXERROR_H__ */ diff --git a/tools/please_pex/preamble/preamble.c b/tools/please_pex/preamble/preamble.c index 548091cb..da23f108 100644 --- a/tools/please_pex/preamble/preamble.c +++ b/tools/please_pex/preamble/preamble.c @@ -99,45 +99,47 @@ static void set_log_verbosity(const cJSON *config) { /* * get_config reads and parses the .pex preamble configuration, a JSON-encoded file at - * .bootstrap/PLZ_PREAMBLE_CONFIG in the zip archive, and returns it as a cJSON struct. It returns - * NULL on failure. + * .bootstrap/PLZ_PREAMBLE_CONFIG in the zip archive, and stores it in config. It returns NULL on + * success and an error on failure. */ -static const cJSON *get_config(char *pex_path) { +static err_t *get_config(char *pex_path, const cJSON **config) { struct zip_t *zip = NULL; int ziperr = 0; char *configbuf = NULL; size_t configsize = 0; - const cJSON *config = NULL; + err_t *err = NULL; zip = zip_openwitherror(pex_path, 0, 'r', &ziperr); if (ziperr < 0) { - log_fatal("Failed to open .pex as zip file: %s", zip_strerror(ziperr)); + err = err_wrap("open .pex", err_from_str(zip_strerror(ziperr))); goto end; } if ((ziperr = zip_entry_open(zip, PREAMBLE_CONFIG_PATH)) < 0) { - log_fatal("Failed to open .pex preamble configuration: %s", zip_strerror(ziperr)); + err = err_wrap("open .pex configuration", err_from_str(zip_strerror(ziperr))); goto end; } if ((ziperr = (int)zip_entry_read(zip, (void **)&configbuf, &configsize)) < 0) { - log_fatal("Failed to read .pex preamble configuration: %s", zip_strerror(ziperr)); + err = err_wrap("read .pex configuration", err_from_str(zip_strerror(ziperr))); goto end; } zip_entry_close(zip); - zip_close(zip); - if ((config = cJSON_ParseWithLength(configbuf, configsize)) == NULL) { - log_fatal("Failed to parse .pex preamble configuration: JSON syntax error near '%s'", cJSON_GetErrorPtr()); + if (((*config) = cJSON_ParseWithLength(configbuf, configsize)) == NULL) { + err = err_wrap( + "parse .pex configuration", + err_wrap("JSON syntax error near", err_from_str(cJSON_GetErrorPtr())) + ); goto end; } end: FREE(configbuf); - return config; + return err; } /* @@ -299,7 +301,8 @@ int main(int argc, char **argv) { return 1; } - if ((config = get_config(argv[0])) == NULL) { + if ((err = get_config(argv[0], &config)) != NULL) { + log_fatal("Failed to get .pex preamble configuration: %s", err_str(err)); return 1; } From 1b0c04f5261bfcbf388bf55484bb3c58e217b4dd Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 00:34:33 +0000 Subject: [PATCH 08/14] Return an error if `get_plz_bin_path` badly fails --- tools/please_pex/preamble/path.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index 5efd83e7..8376f762 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -217,6 +217,7 @@ err_t *get_plz_bin_path(char **path) { if (STREQ(pex_dir, "/") || STREQ(pex_dir, ".")) { // If we reached the left-most component in the path, the .pex file doesn't exist within a // plz-out/ directory tree. + err = err_from_str(".pex file is in neither a Please build environment nor a Please repo"); goto end; } MALLOC(*path, char, strlen(pex_dir) + strlen("/bin") + 1); From fa3203cd84cd726f6df3044fca580919d6bb2af0 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 01:10:24 +0000 Subject: [PATCH 09/14] Remove `readlink`-based logic for getting .pex path on Linux --- tools/please_pex/preamble/path.c | 80 ++------------------------------ 1 file changed, 3 insertions(+), 77 deletions(-) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index 8376f762..e6d23df7 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -1,10 +1,6 @@ #include -#include -#if defined(__linux__) -#include -#include -#elif defined(__APPLE__) +#if defined(__APPLE__) #include #elif defined(__FreeBSD__) #include @@ -16,76 +12,6 @@ #include "pexerror.h" #include "util.h" -#ifdef __linux__ - -/* - * get_path_max returns the maximum length of a relative path name when the given path is the - * current working directory (although the path need not actually be a directory, or even exist). - */ -static int get_path_max(char *path) { - int path_max = pathconf(path, _PC_PATH_MAX); - - if (path_max <= 0) { - // PATH_MAX may be defined in , but this is not a POSIX requirement. If it isn't - // defined, fall back to 4096 (as recommended by Linux's realpath(3) man page). -#ifdef PATH_MAX - path_max = PATH_MAX; -#else - path_max = 4096; -#endif - } - - return path_max; -} - -/* - * get_link_path resolves the symbolic link at the path lpath and stores the link's destination path - * in rpath. It returns NULL on success and an error on failure. - */ -static err_t *get_link_path(char *lpath, char **rpath) { - struct stat sb = { 0 }; - int slen = 0; - int rlen = 0; - err_t *err = NULL; - - // Get the length of lpath's destination path so we can allocate a buffer of that length for - // readlink to write to (plus one byte, so we can determine whether readlink has truncated the - // path it writes, and also for the trailing null we'll append to the path afterwards). If lpath - // is a magic symlink (in which case sb.st_size is 0), assume the destination path is PATH_MAX - // bytes long - it's an overestimate, but at least the buffer will be large enough for readlink - // to safely write to. - if (lstat(lpath, &sb) == -1) { - err = err_from_errno("lstat"); - goto end; - } - slen = (sb.st_size == 0 ? get_path_max(lpath) : sb.st_size) + 1; - - MALLOC(*rpath, char *, slen); - - if ((rlen = readlink(lpath, (*rpath), slen)) == -1) { - err = err_from_errno("readlink"); - goto end; - } - // If readlink filled the buffer, it truncated the destination path it wrote. In this case, the - // value is untrustworthy, so we're better off not using it. - if (slen == rlen) { - err = err_from_str("readlink truncated destination path"); - goto end; - } - - // Otherwise, add the trailing null to the destination path that readlink omitted. - (*rpath)[rlen] = 0; - -end: - if (err != NULL) { - FREE(*rpath); - } - - return err; -} - -#endif /* __linux__ */ - /* * get_pex_dir stores the path to the .pex file in pex_dir. It returns NULL on success and an error * on failure. @@ -96,8 +22,8 @@ err_t *get_pex_dir(char **pex_dir) { err_t *err = NULL; #if defined(__linux__) - if ((err = get_link_path("/proc/self/exe", &exe_path)) != NULL) { - err = err_wrap("get_link_path", err); + if ((exe_path = realpath("/proc/self/exe", NULL)) == NULL) { + err = err_from_errno("realpath /proc/self/exe"); goto end; } #elif defined(__APPLE__) From f8b19e7ff29c2efd2bda1481ce5b388ffbaebee4 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 01:30:41 +0000 Subject: [PATCH 10/14] Simplify `path.c` further --- tools/please_pex/preamble/BUILD | 2 +- tools/please_pex/preamble/path.c | 41 ++++++++++++++++---------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/please_pex/preamble/BUILD b/tools/please_pex/preamble/BUILD index 162c44a3..ee59249e 100644 --- a/tools/please_pex/preamble/BUILD +++ b/tools/please_pex/preamble/BUILD @@ -8,7 +8,7 @@ c_binary( srcs = glob(["*.c"]), hdrs = glob(["*.h"]), compiler_flags = ["-std=c11"], - defines = ["_XOPEN_SOURCE=600"], + defines = ["_XOPEN_SOURCE=700"], static = True, visibility = [ "//tools/please_pex/pex:preamble", diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c index e6d23df7..de7d749d 100644 --- a/tools/please_pex/preamble/path.c +++ b/tools/please_pex/preamble/path.c @@ -13,19 +13,17 @@ #include "util.h" /* - * get_pex_dir stores the path to the .pex file in pex_dir. It returns NULL on success and an error - * on failure. + * get_pex_dir stores the canonical absolute path to the .pex file in pex_dir. It returns NULL on + * success and an error on failure. */ err_t *get_pex_dir(char **pex_dir) { char *exe_path = NULL; + char *exe_realpath = NULL; char *exe_dir = NULL; err_t *err = NULL; #if defined(__linux__) - if ((exe_path = realpath("/proc/self/exe", NULL)) == NULL) { - err = err_from_errno("realpath /proc/self/exe"); - goto end; - } + exe_path = "/proc/self/exe"; #elif defined(__APPLE__) uint32_t len = 0; @@ -56,14 +54,22 @@ err_t *get_pex_dir(char **pex_dir) { #error "Unsupported operating system" #endif - exe_dir = dirname(exe_path); + if ((exe_realpath = realpath(exe_path, NULL)) == NULL) { + err = err_from_errno("realpath"); + goto end; + } + + exe_dir = dirname(exe_realpath); if (((*pex_dir) = strdup(exe_dir)) == NULL) { err = err_from_errno("strdup"); goto end; } end: +#ifndef __linux__ FREE(exe_path); +#endif + FREE(exe_realpath); return err; } @@ -82,10 +88,9 @@ err_t *get_pex_dir(char **pex_dir) { err_t *get_plz_bin_path(char **path) { char *pex_dir = NULL; char *tmp_dir = NULL; - char *pex_dir_realpath = NULL; char *tmp_dir_realpath = NULL; - size_t pex_len = 0; - size_t tmp_len = 0; + size_t pex_dir_len = 0; + size_t tmp_dir_len = 0; err_t *err = NULL; if ((err = get_pex_dir(&pex_dir)) != NULL) { @@ -106,23 +111,19 @@ err_t *get_plz_bin_path(char **path) { } // Identify whether the .pex file exists inside the build environment. - if ((pex_dir_realpath = realpath(pex_dir, NULL)) == NULL) { - err = err_from_errno("realpath pex_dir"); - goto end; - } if ((tmp_dir_realpath = realpath(tmp_dir, NULL)) == NULL) { err = err_from_errno("realpath tmp_dir"); goto end; } - pex_len = strlen(pex_dir_realpath); - tmp_len = strlen(tmp_dir_realpath); + pex_dir_len = strlen(pex_dir); + tmp_dir_len = strlen(tmp_dir_realpath); if ( - strncmp(tmp_dir_realpath, pex_dir_realpath, tmp_len) == 0 && + strncmp(tmp_dir_realpath, pex_dir, tmp_dir_len) == 0 && ( - (pex_len == tmp_len) || - (pex_len > tmp_len && pex_dir_realpath[tmp_len] == '/') + (pex_dir_len == tmp_dir_len) || + (pex_dir_len > tmp_dir_len && pex_dir[tmp_dir_len] == '/') ) ) { if (((*path) = strdup(tmp_dir_realpath)) == NULL) { @@ -152,7 +153,7 @@ err_t *get_plz_bin_path(char **path) { } end: - FREE(pex_dir_realpath); + FREE(pex_dir); FREE(tmp_dir_realpath); return err; From de89e0ba626e2ec9be8bf7293646b7613f3da8b4 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 01:37:34 +0000 Subject: [PATCH 11/14] Document `_XOPEN_SOURCE` requirement --- tools/please_pex/preamble/BUILD | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/please_pex/preamble/BUILD b/tools/please_pex/preamble/BUILD index ee59249e..d4b504c6 100644 --- a/tools/please_pex/preamble/BUILD +++ b/tools/please_pex/preamble/BUILD @@ -8,7 +8,11 @@ c_binary( srcs = glob(["*.c"]), hdrs = glob(["*.h"]), compiler_flags = ["-std=c11"], - defines = ["_XOPEN_SOURCE=700"], + defines = [ + # path.c relies on realpath(3)'s resolved_path == NULL feature, which was not standardised + # until POSIX.1-2008: + "_XOPEN_SOURCE=700", + ], static = True, visibility = [ "//tools/please_pex/pex:preamble", From f039131b4c09a2ebc4bbaf5a487382d183061473 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 11:16:15 +0000 Subject: [PATCH 12/14] Remove unnecessary slice initialiser --- tools/please_pex/pex/pex.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/please_pex/pex/pex.go b/tools/please_pex/pex/pex.go index 6bfb6b1d..2a758ebb 100644 --- a/tools/please_pex/pex/pex.go +++ b/tools/please_pex/pex/pex.go @@ -51,9 +51,6 @@ type Writer struct { // NewWriter constructs a new Writer. func NewWriter(entryPoint string, interpreters []string, interpreterArgs []string, stamp string, zipSafe, noSite bool) *Writer { - if interpreterArgs == nil { - interpreterArgs = make([]string, 0) - } pw := &Writer{ preambleConfig: &preamble.Config{ Interpreters: interpreters, From 3490d25c05357a192fe047f32be99a50b491d8e1 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 13:41:22 +0000 Subject: [PATCH 13/14] More sensible `wrapped` null check --- tools/please_pex/preamble/pexerror.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/please_pex/preamble/pexerror.c b/tools/please_pex/preamble/pexerror.c index c0a1aa4f..b0edb1c6 100644 --- a/tools/please_pex/preamble/pexerror.c +++ b/tools/please_pex/preamble/pexerror.c @@ -50,7 +50,7 @@ err_t *err_from_errno(const char *msg) { * and then returns the newly-created error. */ err_t *err_wrap(const char *msg, err_t *wrapped) { - errmsg_t *first = SLIST_FIRST(wrapped); + errmsg_t *first = NULL; errmsg_t *errmsg = NULL; size_t len = strlen(msg); @@ -59,6 +59,8 @@ err_t *err_wrap(const char *msg, err_t *wrapped) { exit(EX_SOFTWARE); } + first = SLIST_FIRST(wrapped); + MALLOC(errmsg, errmsg_t, 1); errmsg->msg = msg; errmsg->len = len; From a7262eb961a5e3a82dea300ed718802e891c98f3 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Wed, 3 Dec 2025 13:46:11 +0000 Subject: [PATCH 14/14] `!__PEXERROR_H__` --- tools/please_pex/preamble/pexerror.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/please_pex/preamble/pexerror.h b/tools/please_pex/preamble/pexerror.h index 306d0afa..4f44f948 100644 --- a/tools/please_pex/preamble/pexerror.h +++ b/tools/please_pex/preamble/pexerror.h @@ -39,4 +39,4 @@ err_t *err_from_errno(const char *msg); err_t *err_wrap(const char *msg, err_t *wrapped); char *err_str(err_t *err); -#endif /* __PEXERROR_H__ */ +#endif /* !__PEXERROR_H__ */