diff --git a/.github/workflows/plugin_test.yaml b/.github/workflows/plugin_test.yaml index 4d1b1418..d4877e10 100644 --- a/.github/workflows/plugin_test.yaml +++ b/.github/workflows/plugin_test.yaml @@ -28,6 +28,8 @@ jobs: runs-on: ${{ (inputs.platform == 'freebsd_amd64' || inputs.platform == 'linux_amd64') && 'ubuntu-24.04' || (inputs.platform == 'linux_arm64' && 'ubuntu-24.04-arm' || (inputs.platform == 'darwin_amd64' && 'macos-15-intel' || (inputs.platform == 'darwin_arm64' && 'macos-15' || 'unknown'))) }} steps: - name: Install Python in CI environment + # actions/setup-python doesn't support FreeBSD - instead, interpreters for the supported Python versions are + # pre-installed in the FreeBSD runner image. if: ${{ inputs.platform != 'freebsd_amd64' }} id: python uses: actions/setup-python@v6 @@ -36,14 +38,11 @@ jobs: update-environment: false - name: Check out code uses: actions/checkout@v5 - - name: Configure plugin's default Python interpreter - run: | - _python_path="${{ inputs.platform == 'freebsd_amd64' && format('python{0}', inputs.python) || steps.python.outputs.python-path }}" - echo "PLZ_ARGS=-o plugin.python.defaultinterpreter:$_python_path" >> $GITHUB_ENV - name: Configure plugin to use please_pex tool built from source if: inputs.please_pex_from_repo - run: | - echo "PLZ_ARGS=${PLZ_ARGS:+$PLZ_ARGS }-o plugin.python.pextool://tools/please_pex" >> $GITHUB_ENV + # PexTool can't be set to //tools/please_pex for e2e tests because it can't be built inside the e2e test + # environment - skip the e2e tests if we're using //tools/please_pex as the pex tool. + run: echo "PLZ_ARGS=${PLZ_ARGS:+$PLZ_ARGS }-o plugin.python.pextool://tools/please_pex -e plz_e2e_test" >> $GITHUB_ENV - name: Run tests (in nested runner) if: ${{ inputs.platform == 'freebsd_amd64' }} uses: cross-platform-actions/action@v0.30.0 @@ -55,11 +54,22 @@ jobs: shell: bash environment_variables: PLZ_ARGS shutdown_vm: true - run: ./pleasew test --keep_going --log_file plz-out/log/test.log ${{ inputs.test_targets }} + run: | + _external_interpreter="python${{ inputs.python }}" + _in_repo_interpreter="//third_party/cc/cpython:cpython_${{ inputs.python }}|python" + echo "*** Running tests using external Python interpreter: $_external_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_external_interpreter" test --rerun --keep_going --log_file plz-out/log/test-external_interpreter.log ${{ inputs.test_targets }} + echo "*** Running tests using in-repo Python interpreter: $_in_repo_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_in_repo_interpreter" test --rerun --keep_going --log_file plz-out/log/test-in_repo_interpreter.log ${{ inputs.test_targets }} - name: Run tests (on host) if: ${{ inputs.platform != 'freebsd_amd64' }} run: | - ./pleasew test --keep_going --log_file plz-out/log/test.log ${{ inputs.test_targets }} + _external_interpreter="${{ steps.python.outputs.python-path }}" + _in_repo_interpreter="//third_party/cc/cpython:cpython_${{ inputs.python }}|python" + echo "*** Running tests using external Python interpreter: $_external_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_external_interpreter" test --rerun --keep_going --log_file plz-out/log/test-external_interpreter.log ${{ inputs.test_targets }} + echo "*** Running tests using in-repo Python interpreter: $_in_repo_interpreter" + ./pleasew -o "plugin.python.defaultinterpreter:$_in_repo_interpreter" test --rerun --keep_going --log_file plz-out/log/test-in_repo_interpreter.log ${{ inputs.test_targets }} - name: Archive logs uses: actions/upload-artifact@v4 with: diff --git a/.plzconfig b/.plzconfig index 4d24c805..b12da7e7 100644 --- a/.plzconfig +++ b/.plzconfig @@ -1,11 +1,31 @@ [Please] -Version = >=17.21.0 +Version = >=17.25.1 [Build] hashcheckers = sha256 [Plugin "cc"] Target = //plugins:cc +LdGarbageCollection = true + +DefaultOptCFlags = -std=c99 +DefaultOptCFlags = -Os +DefaultOptCFlags = -DNDEBUG +DefaultOptCFlags = -Wall +DefaultOptCFlags = -Wextra +DefaultOptCFlags = -Werror +DefaultOptCFlags = -flto +DefaultOptCFlags = -D_LARGEFILE64_SOURCE +DefaultOptCFlags = -D_FILE_OFFSET_BITS=64 + +DefaultDbgCFlags = -std=c99 +DefaultDbgCFlags = -g3 +DefaultDbgCFlags = -DDEBUG +DefaultDbgCFlags = -Wall +DefaultDbgCFlags = -Wextra +DefaultDbgCFlags = -Werror +DefaultDbgCFlags = -D_LARGEFILE64_SOURCE +DefaultDbgCFlags = -D_FILE_OFFSET_BITS=64 [Plugin "go"] Target = //plugins:go @@ -21,6 +41,7 @@ Target = //plugins:shell [Plugin "e2e"] Target = //plugins:e2e +PleaseVersion = 17.25.1 DefaultPlugin = //test:python-rules [PluginDefinition] @@ -30,12 +51,27 @@ name = python ConfigKey = DefaultInterpreter DefaultValue = python3 Inherit = true -Help = The Python interpreter to use for any case where one is not explicitly specified +Help = The default Python interpreter to use when building .pex files. -[PluginConfig "default_shebang"] +[PluginConfig "interpreter_options"] +ConfigKey = InterpreterOptions +Optional = true +Repeatable = true +Help = A list of additional arguments to pass to the Python interpreter when building .pex files. + +[PluginConfig "default_runtime_interpreters"] +ConfigKey = DefaultRuntimeInterpreters +Optional = true +Repeatable = true Inherit = true +Help = A list of possible paths to Python interpreters with which to run .pex files. + +[PluginConfig "default_runtime_interpreter_args"] +ConfigKey = DefaultRuntimeInterpreterArgs Optional = true -Help = The shebang to set on binaries / tests where one is not explicitly specified +Repeatable = true +Inherit = true +Help = A list of additional arguments to pass to the Python interpreter when running .pex files. [PluginConfig "pex_tool"] ConfigKey = PexTool @@ -43,12 +79,6 @@ DefaultValue = //tools:please_pex Inherit = true Help = A path or build label for the pex tool, which is used to create .pex files from python sources -[PluginConfig "interpreter_options"] -ConfigKey = InterpreterOptions -Optional = true -Repeatable = true -Help = Additional command line arguments to pass to the Python interpreter. - [PluginConfig "test_runner"] ConfigKey = TestRunner DefaultValue = unittest @@ -146,6 +176,12 @@ DefaultValue = "info" Help = Passes verbosity level to wheel_tool, for example: wheel_tool --verbosity info Optional = true +[PluginConfig "default_preamble_verbosity"] +ConfigKey = DefaultPreambleVerbosity +DefaultValue = error +Inherit = true +Help = The default logging level for the .pex preamble. Must be one of trace, debug, info, warn, error or fatal. + [PluginConfig "feature_flags"] DefaultValue = "" Repeatable = true diff --git a/build_defs/python.build_defs b/build_defs/python.build_defs index e41da18e..a394f9be 100644 --- a/build_defs/python.build_defs +++ b/build_defs/python.build_defs @@ -94,8 +94,8 @@ def python_library(name:str, srcs:list=[], resources:list=[], deps:list=[], visi def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=None, deps:list=[], data:list=None, visibility:list=None, test_only:bool=False, zip_safe:bool=None, site:bool=False, strip:bool=False, interpreter:str=CONFIG.PYTHON.DEFAULT_INTERPRETER, - shebang:str=CONFIG.PYTHON.DEFAULT_SHEBANG, module_dir:str=CONFIG.PYTHON.MODULE_DIR, - labels:list&features&tags=[]): + runtime_interpreters:list=None, runtime_interpreter_args:list=None, + module_dir:str=CONFIG.PYTHON.MODULE_DIR, labels:list&features&tags=[]): """Generates a Python binary target. This compiles all source files together into a single .pex file which can @@ -123,11 +123,19 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N strip (bool): Strips source code from the output .pex file, leaving just bytecode. site (bool): Allows the Python interpreter to import site; conversely if False, it will be started with the -S flag to avoid importing site. - interpreter (str): The Python interpreter to use. Defaults to the value of the - Python.DefaultInterpreter setting. - shebang (str): Exact shebang to apply to the generated file. Defaults to the value - of the python.defaultshebang setting, if not set we will - determine something appropriate for the given interpreter. + interpreter (str): The path to the Python interpreter with which to compile the Python source + files. Defaults to the value of the DefaultInterpreter plugin configuration + option. + runtime_interpreters (list): A list of possible paths to Python interpreters with which to run + the generated .pex file. At run time, the leftmost element containing + a path to an executable file will be chosen. If unspecified, the + value of the DefaultRuntimeInterpreters plugin configuration option + will be used; if that is unspecified, the value of the interpreter + parameter will be used (meaning that the interpreter used to compile + the sources must exist at the same path in the run-time environment). + runtime_interpreter_args (list): A list of additional arguments to pass to the Python interpreter + when running the generated .pex file. Defaults to the value of + the DefaultRuntimeInterpreterArgs plugin configuration option. module_dir (str): The path to the third party python directory in python import path format. Defaults to value of python.moduledir setting. labels (list): Labels to apply to this rule. @@ -145,11 +153,14 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N test_only=test_only, ) - shebang = shebang or _interpreter_cmd(interpreter) + interpreter_opts, interpreter_deps = _runtime_interpreter(interpreter, runtime_interpreters, runtime_interpreter_args) zipsafe_flag = '' if zip_safe is False else '--zip_safe' - interpreter_opts = _interpreter_opts(CONFIG.PYTHON.INTERPRETER_OPTIONS) - cmd = '$TOOLS_PEX -s "%s" -m "%s" %s %s' % ( - shebang, module_dir, zipsafe_flag, interpreter_opts) + cmd = '$TOOLS_PEX --preamble_verbosity="%s" -m "%s" %s %s' % ( + CONFIG.PYTHON.DEFAULT_PREAMBLE_VERBOSITY, + module_dir, + zipsafe_flag, + interpreter_opts, + ) # If content hashing feature flag is enabled, we use the hash of the built # dependencies instead of the RULE_HASH as the base of the pex extraction @@ -180,13 +191,14 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N output_is_complete=True, tools={ 'interpreter': [interpreter], + 'runtime_interpreters': interpreter_deps, 'pex': [CONFIG.PYTHON.PEX_TOOL], }, test_only=test_only, ) # This rule concatenates the .pex with all the other precompiled zip files from dependent rules. - cmd = '$TOOL z -i . -s .pex.zip -s .whl --preamble_from="$SRC" --include_other --add_init_py --strict' + cmd = '$TOOL z --include "$SRC" -i . -s .pex.zip -s .whl --preamble_from "$SRC" --include_other --add_init_py --strict' if strip: cmd += ' --strip_py' @@ -195,6 +207,7 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N return build_rule( name=name, srcs=[pex_rule], + runtime_deps=interpreter_deps, deps=[lib_rule], outs=[out or (name + '.pex')], data=data, @@ -205,7 +218,7 @@ def python_binary(name:str, main:str, srcs:list=[], resources:list=[], out:str=N output_is_complete=True, building_description="Creating pex...", visibility=visibility, - requires=['py', interpreter], + requires=['py'], tools=[CONFIG.ARCAT_TOOL], # This makes the python_library rule the dependency for other python_library or # python_test rules that try to import it. Does mean that they cannot collect a .pex @@ -223,8 +236,8 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: labels:list&features&tags=[], size:str=None, flags:str='', visibility:list=None, sandbox:bool=None, timeout:int=0, flaky:bool|int=0, env:dict=None, test_outputs:list=None, zip_safe:bool=None, interpreter:str=CONFIG.PYTHON.DEFAULT_INTERPRETER, - site:bool=False, test_runner:str=None, shebang:str=CONFIG.PYTHON.DEFAULT_SHEBANG, - module_dir:str=CONFIG.PYTHON.MODULE_DIR): + runtime_interpreters:list=None, runtime_interpreter_args:list=None, + site:bool=False, test_runner:str=None, module_dir:str=CONFIG.PYTHON.MODULE_DIR): """Generates a Python test target. This works very similarly to python_binary; it is also a single .pex file @@ -257,11 +270,19 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: If set to explicitly True or False, the output will be marked appropriately; by default it will be safe unless any of the transitive dependencies are themselves marked as not zip-safe. - interpreter (str): The Python interpreter to use. Defaults to the value of the - Python.DefaultInterpreter setting. - shebang (str): Exact shebang to apply to the generated file. Defaults to the value - of the python.defaultshebang setting, if not set we will - determine something appropriate for the given interpreter. + interpreter (str): The path to the Python interpreter with which to compile the Python source + files. Defaults to the value of the DefaultInterpreter plugin configuration + option. + runtime_interpreters (list): A list of possible paths to Python interpreters with which to run + the generated .pex file. At run time, the leftmost element containing + a path to an executable file will be chosen. If unspecified, the + value of the DefaultRuntimeInterpreters plugin configuration option + will be used; if that is unspecified, the value of the interpreter + parameter will be used (meaning that the interpreter used to compile + the sources must exist at the same path in the run-time environment). + runtime_interpreter_args (list): A list of additional arguments to pass to the Python interpreter + when running the generated .pex file. Defaults to the value of + the DefaultRuntimeInterpreterArgs plugin configuration option. module_dir (str): The path to the third party python directory in python import path format. Defaults to value of python.moduledir setting. site (bool): Allows the Python interpreter to import site; conversely if False, it will be @@ -270,9 +291,13 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: `unittest`, `pytest`, or a custom test runner entry point. """ test_runner = test_runner or CONFIG.PYTHON.TEST_RUNNER - shebang = shebang or _interpreter_cmd(interpreter) - interpreter_opts = _interpreter_opts(CONFIG.PYTHON.INTERPRETER_OPTIONS) - cmd = f'$TOOLS_PEX -t -s "{shebang}" -m "{module_dir}" -r "{test_runner}" --zip_safe --add_test_runner_deps {interpreter_opts} --stamp="$RULE_HASH"' + interpreter_opts, interpreter_deps = _runtime_interpreter(interpreter, runtime_interpreters, runtime_interpreter_args) + cmd = '$TOOLS_PEX --preamble_verbosity="%s" -t -m "%s" -r "%s" --zip_safe --add_test_runner_deps %s --stamp="$RULE_HASH"' % ( + CONFIG.PYTHON.DEFAULT_PREAMBLE_VERBOSITY, + module_dir, + test_runner, + interpreter_opts, + ) if site: cmd += ' -S' @@ -306,6 +331,7 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: deps=deps, tools={ 'interpreter': [interpreter], + 'runtime_interpreters': interpreter_deps, 'pex': [CONFIG.PYTHON.PEX_TOOL], }, labels = labels, @@ -326,7 +352,7 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: deps = [pex_rule, lib_rule] - test_cmd = f'$TOOL $TEST {flags}' if site else f'$TOOL -S $TEST {flags}' + test_cmd = f"$TEST {flags}" worker_cmd = f'$(worker {worker})' if worker else "" if worker_cmd: test_cmd = f'{worker_cmd} && {test_cmd} ' @@ -349,11 +375,12 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: return build_rule( name=name, srcs=[pex_rule], + runtime_deps=interpreter_deps, deps=deps, data=data, outs=[f'{name}.pex'], labels=labels + ['test_results_dir'], - cmd='$TOOL z -i . -s .pex.zip -s .whl --preamble_from="$SRC" --include_other --add_init_py --strict', + cmd='$TOOL z --include "$SRC" -i . -s .pex.zip -s .whl --preamble_from "$SRC" --include_other --add_init_py --strict', test_cmd=test_cmd, debug_cmd=debug_cmd, needs_transitive_deps=True, @@ -368,9 +395,8 @@ def python_test(name:str, srcs:list, data:list|dict=[], resources:list=[], deps: flaky=flaky, env=env, test_outputs=test_outputs, - requires=['py', 'test', interpreter], + requires=['py', 'test'], tools=[CONFIG.ARCAT_TOOL], - test_tools = [interpreter], ) @@ -744,12 +770,44 @@ def _wheel_entrypoint_binary(name:str, entrypoint:str, lib_rule, visibility, tes visibility = visibility, ) -def _interpreter_cmd(interpreter:str): - return f'$(out {interpreter})' if looks_like_build_label(interpreter) else interpreter - -def _interpreter_opts(opts:list): - return " ".join([f'--interpreter_options="{o}"' for o in opts]) +def _runtime_interpreter(build_interp:str, run_interps:list, run_args:list): + """Returns a string of please_pex options defining the run-time Python interpreter paths and + arguments to pass to the interpreter during invocation (falling back to default values where + necessary), and a list containing the names of the subset of those interpreters that are defined + as build targets. + """ + opts = [] + targets = [] + for i in (run_interps or CONFIG.PYTHON.DEFAULT_RUNTIME_INTERPRETERS or [build_interp]): + if looks_like_build_label(i): + i_target = i.rpartition("|")[0] if "|" in i else i + # The location of a Python interpreter defined as a build target within the repo + # relative to the .pex output of a python_binary/python_test target that lists it as a + # run-time dependency varies depending on the context. If the target is running within a + # build environment (e.g. under `plz build` or `plz test`, either directly or as a + # dependency of some other target), Please will also copy the interpreter into the build + # directory if it is listed as a run-time dependency of the target. If the target is + # running outside a build environment (e.g. under `plz run`), Please will build the + # interpreter target, outputting it to plz-out/bin/. Relative to each of these paths, + # the interpreter will exist at the path given by passing its target name to the + # $(location) built-in function. However, this context is a run-time property of the + # .pex file; by prepending the interpreter path with "$PLZ_BIN_PATH/", the preamble will + # detect the context and substitute an appropriate path in place of $PLZ_BIN_PATH. + opts += [f"--interpreters='$PLZ_BIN_PATH/$(location {i})'"] + # If the target name ends with an entry point name, strip it - runtime_deps won't accept + # target names containing entry point names (which isn't really a problem for us, + # because the entire target will have to be built for the entry point to be provided + # anyway). + if canonicalise(i_target) != i: + targets += [i_target] + else: + opts += [f"--interpreters='{i}'"] + opts += [ + f"--interpreter_args='{a}'" + for a in (run_args or CONFIG.PYTHON.DEFAULT_RUNTIME_INTERPRETER_ARGS or []) + ] + return " ".join(opts), targets def _patch_cmd(patch): diff --git a/plugins/BUILD b/plugins/BUILD index a0c714c2..5f173fe9 100644 --- a/plugins/BUILD +++ b/plugins/BUILD @@ -1,6 +1,6 @@ plugin_repo( name = "cc", - revision = "v0.7.0", + revision = "v0.7.1", ) plugin_repo( diff --git a/test/BUILD b/test/BUILD index 56de60d2..dfdc610f 100644 --- a/test/BUILD +++ b/test/BUILD @@ -23,8 +23,8 @@ e2e_test_plugin( "//build_defs:archs", "//build_defs:python", "//build_defs:version", - "//tools:please_pex", "//:plzconfig", + CONFIG.PYTHON.PEX_TOOL, ], visibility = [ "//test/...", diff --git a/test/interpreter_options_repo/BUILD_FILE b/test/interpreter_options_repo/BUILD_FILE index 94913ca1..611bf3e3 100644 --- a/test/interpreter_options_repo/BUILD_FILE +++ b/test/interpreter_options_repo/BUILD_FILE @@ -1,11 +1,11 @@ subinclude("///python//build_defs:python") package( - # If InterpreterOptions is being honoured, the Python interpreter should execute plzmodule.py - # as the __main__ module instead of main.py, causing "Executed plzmodule.py" to be printed on - # stdout rather than "Executed binary.py". + # If DefaultRuntimeInterpreterArgs is being honoured, the Python interpreter should execute + # plzmodule.py as the __main__ module instead of main.py, causing "Executed plzmodule.py" to be + # printed on stdout rather than "Executed binary.py". python = { - "interpreter_options": ["-m", "plzmodule"], + "default_runtime_interpreter_args": ["-m", "plzmodule"], }, ) diff --git a/test/strip_source_test.py b/test/strip_source_test.py index c6df1d99..7b67229e 100644 --- a/test/strip_source_test.py +++ b/test/strip_source_test.py @@ -13,7 +13,7 @@ def setUp(self): def test_can_run_binary(self): """Test that the dependent binary can be run successfully.""" - subprocess.check_call([os.environ["TOOLS"], self.filename]) + subprocess.check_call([self.filename]) def test_does_not_have_py_file(self): """Test that the binary doesn't have the source in it.""" diff --git a/third_party/cc/cjson/BUILD b/third_party/cc/cjson/BUILD new file mode 100644 index 00000000..8205d91d --- /dev/null +++ b/third_party/cc/cjson/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "cjson", + build_file = "cjson.build", + hashes = ["83fb7750db0601dca735868b8fb1da1318da2d2a1331a9a7da923cb891d26ea9"], + repo = "DaveGamble/cJSON", + revision = "v1.7.19", +) diff --git a/third_party/cc/cjson/cjson.build b/third_party/cc/cjson/cjson.build new file mode 100644 index 00000000..1505c429 --- /dev/null +++ b/third_party/cc/cjson/cjson.build @@ -0,0 +1,18 @@ +subinclude("///cc//build_defs:c") + +package( + cc = { + "default_dbg_cflags": CONFIG.CC.DEFAULT_DBG_CFLAGS + ["-std=c89"], + "default_opt_cflags": CONFIG.CC.DEFAULT_OPT_CFLAGS + ["-std=c89"], + }, +) + +c_library( + name = "cjson", + srcs = ["cJSON.c"], + hdrs = ["cJSON.h"], + includes = ["."], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/kuba_zip/BUILD b/third_party/cc/kuba_zip/BUILD new file mode 100644 index 00000000..ae99e0e7 --- /dev/null +++ b/third_party/cc/kuba_zip/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "kuba_zip", + build_file = "kuba_zip.build", + hashes = ["39961a0598a2f62272eb237df8923077a0508e6ec89fdb82bd500706b579daf3"], + repo = "kuba--/zip", + revision = "v0.3.5", +) diff --git a/third_party/cc/kuba_zip/kuba_zip.build b/third_party/cc/kuba_zip/kuba_zip.build new file mode 100644 index 00000000..f7ff97cc --- /dev/null +++ b/third_party/cc/kuba_zip/kuba_zip.build @@ -0,0 +1,24 @@ +subinclude("///cc//build_defs:c") + +package( + cc = { + "default_dbg_cflags": CONFIG.CC.DEFAULT_DBG_CFLAGS + ["-std=gnu99"], + "default_opt_cflags": CONFIG.CC.DEFAULT_OPT_CFLAGS + ["-std=gnu99"], + }, +) + +c_library( + name = "kuba_zip", + srcs = ["src/zip.c"], + hdrs = ["src/zip.h"], + defines = [ + # Disable some unnecessary miniz features to save space. + "MINIZ_NO_TIME", + "MINIZ_NO_ZLIB_COMPATIBLE_NAMES", + ], + includes = ["src"], + private_hdrs = ["src/miniz.h"], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/log_c/BUILD b/third_party/cc/log_c/BUILD new file mode 100644 index 00000000..cb64f1c3 --- /dev/null +++ b/third_party/cc/log_c/BUILD @@ -0,0 +1,7 @@ +github_repo( + name = "log_c", + build_file = "log_c.build", + hashes = ["a1972da6886cfa3abf30b1bc8dbf2f67e4d219dbadcea91b96e1a979dab6267d"], + repo = "chrisnovakovic/log.c", + revision = "20d9411165ed1072229f982e2259b515e0301814", +) diff --git a/third_party/cc/log_c/log_c.build b/third_party/cc/log_c/log_c.build new file mode 100644 index 00000000..35166d6c --- /dev/null +++ b/third_party/cc/log_c/log_c.build @@ -0,0 +1,11 @@ +subinclude("///cc//build_defs:c") + +c_library( + name = "log_c", + srcs = ["src/log.c"], + hdrs = ["src/log.h"], + includes = ["src"], + visibility = [ + "@//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/cc/openbsd/BUILD b/third_party/cc/openbsd/BUILD new file mode 100644 index 00000000..ec5e3e2c --- /dev/null +++ b/third_party/cc/openbsd/BUILD @@ -0,0 +1,25 @@ +subinclude("///cc//build_defs:c") + +remote_file( + name = "queue_h_upstream", + out = "queue_upstream.h", + url = "https://raw.githubusercontent.com/openbsd/src/f9d861d10df22eff555a4387e0151a3215209fd1/sys/sys/queue.h", +) + +genrule( + name = "queue_h_patched", + srcs = [":queue_h_upstream"], + outs = ["queue.h"], + # On OpenBSD, sys/_null.h defines NULL if it isn't already defined. NULL is widely defined on the platforms we + # support, so strip out this include to save ourselves another dependency. + cmd = "grep -vFx '#include ' $SRCS > $OUTS", +) + +c_library( + name = "queue", + hdrs = [":queue_h_patched"], + includes = ["."], + visibility = [ + "//tools/please_pex/preamble/...", + ], +) diff --git a/third_party/go/BUILD b/third_party/go/BUILD index 7e8c7d34..0be24b3b 100644 --- a/third_party/go/BUILD +++ b/third_party/go/BUILD @@ -59,7 +59,7 @@ go_repo( go_repo( module = "golang.org/x/sys", - version = "v0.20.0", + version = "v0.26.0", ) go_repo( diff --git a/tools/please_pex/BUILD b/tools/please_pex/BUILD index d641609b..14831086 100644 --- a/tools/please_pex/BUILD +++ b/tools/please_pex/BUILD @@ -14,6 +14,7 @@ go_binary( "///third_party/go/github.com_peterebden_go-cli-init_v5//logging", "///third_party/go/gopkg.in_op_go-logging.v1//:go-logging.v1", "//tools/please_pex/pex", + "//tools/please_pex/preamble:preamble_go", ], ) diff --git a/tools/please_pex/pex/BUILD b/tools/please_pex/pex/BUILD index 2eb95343..bdc776e4 100644 --- a/tools/please_pex/pex/BUILD +++ b/tools/please_pex/pex/BUILD @@ -3,9 +3,19 @@ subinclude("///go//build_defs:go") go_library( name = "pex", srcs = ["pex.go"], - resources = glob(["**/*.py"]), - visibility = ["//tools/please_pex:all"], + resources = glob(["**/*.py"]) + [":preamble"], + visibility = [ + "//tools/please_pex:all", + ], deps = [ + "//tools/please_pex/preamble:preamble_go", "//tools/please_pex/zip", ], ) + +genrule( + name = "preamble", + srcs = ["//tools/please_pex/preamble"], + outs = ["preamble"], + cmd = "cp $SRCS $OUTS", +) diff --git a/tools/please_pex/pex/pex.go b/tools/please_pex/pex/pex.go index 1bb8e4d4..2a758ebb 100644 --- a/tools/please_pex/pex/pex.go +++ b/tools/please_pex/pex/pex.go @@ -7,13 +7,16 @@ package pex import ( "bytes" "embed" + "encoding/json" "fmt" + "io/fs" "log" "os" "path" "path/filepath" "strings" + "github.com/please-build/python-rules/tools/please_pex/preamble" "github.com/please-build/python-rules/tools/please_pex/zip" ) @@ -30,13 +33,13 @@ const ( //go:embed *.py //go:embed test_runners/*.py //go:embed debuggers/*.py +//go:embed preamble var files embed.FS // A Writer implements writing a .pex file in various steps. type Writer struct { + preambleConfig *preamble.Config zipSafe bool - noSite bool - shebang string realEntryPoint string pexStamp string testSrcs []string @@ -47,36 +50,25 @@ type Writer struct { } // NewWriter constructs a new Writer. -func NewWriter(entryPoint, interpreter string, options []string, stamp string, zipSafe, noSite bool) *Writer { +func NewWriter(entryPoint string, interpreters []string, interpreterArgs []string, stamp string, zipSafe, noSite bool) *Writer { pw := &Writer{ + preambleConfig: &preamble.Config{ + Interpreters: interpreters, + InterpreterArgs: interpreterArgs, + }, zipSafe: zipSafe, - noSite: noSite, realEntryPoint: toPythonPath(entryPoint), pexStamp: stamp, } - pw.SetShebang(interpreter, options) + if noSite { + pw.preambleConfig.InterpreterArgs = append(pw.preambleConfig.InterpreterArgs, "-S") + } return pw } -// SetShebang sets the leading shebang that will be written to the file. -func (pw *Writer) SetShebang(shebang string, options []string) { - shebang = strings.TrimSpace(fmt.Sprintf("%s %s", shebang, strings.Join(options, " "))) - if !path.IsAbs(shebang) { - shebang = "/usr/bin/env " + shebang - } - if pw.noSite { - // In many environments shebangs cannot have more than one argument; we can work around - // that by treating it as a shell script. - if strings.Contains(shebang, " ") { - shebang = "#!/bin/sh\nexec " + shebang + ` -S $0 "$@"` - } else { - shebang += " -S" - } - } - if !strings.HasPrefix(shebang, "#") { - shebang = "#!" + shebang - } - pw.shebang = shebang + "\n" +// SetPreambleVerbosity sets the preamble's default minimum logging level. +func (pw *Writer) SetPreambleVerbosity(verbosity preamble.Verbosity) { + pw.preambleConfig.Verbosity = verbosity } // SetTest sets this Writer to write tests using the given sources. @@ -162,10 +154,22 @@ func (pw *Writer) Write(out, moduleDir string) error { f := zip.NewFile(out, true) defer f.Close() - // Write preamble (i.e. the shebang that makes it executable) - if err := f.WritePreamble([]byte(pw.shebang)); err != nil { + // Write preamble (i.e. the binary that makes the .pex executable) + preambleFile := mustOpen("preamble") + defer preambleFile.Close() + if err := f.WritePreambleFile(preambleFile); err != nil { return err } + + // Write preamble configuration file + preambleConfig, err := json.Marshal(&pw.preambleConfig) + if err != nil { + return fmt.Errorf("marshal preamble configuration: %w", err) + } + if err := f.WriteFile(preamble.ConfigPath, preambleConfig, 0644); err != nil { + return fmt.Errorf("write preamble configuration: %w", err) + } + // Non-zip-safe pexes need portalocker if !pw.zipSafe { pw.includeLibs = append(pw.includeLibs, ".bootstrap/portalocker") @@ -229,6 +233,15 @@ func toPythonPath(p string) string { return strings.ReplaceAll(p[:len(p)-len(ext)], "/", ".") } +// mustOpen opens the given file from the embedded set. It dies on error. +func mustOpen(filename string) fs.File { + f, err := files.Open(filename) + if err != nil { + panic(err) + } + return f +} + // mustRead reads the given file from the embedded set. It dies on error. func mustRead(filename string) []byte { b, err := files.ReadFile(filename) diff --git a/tools/please_pex/pex_main.go b/tools/please_pex/pex_main.go index f5f86594..a2f49088 100644 --- a/tools/please_pex/pex_main.go +++ b/tools/please_pex/pex_main.go @@ -7,27 +7,28 @@ import ( cli "github.com/peterebden/go-cli-init/v5/flags" l "github.com/peterebden/go-cli-init/v5/logging" "github.com/please-build/python-rules/tools/please_pex/pex" + "github.com/please-build/python-rules/tools/please_pex/preamble" ) var log = logging.MustGetLogger("please_pex") var opts = struct { - Usage string - Verbosity l.Verbosity `short:"v" long:"verbosity" default:"warning" description:"Verbosity of output (higher number = more output)"` - Out string `short:"o" long:"out" env:"OUT" description:"Output file"` - EntryPoint string `short:"e" long:"entry_point" env:"SRC" description:"Entry point to pex file"` - ModuleDir string `short:"m" long:"module_dir" description:"Python module dir to implicitly load modules from"` - TestSrcs []string `long:"test_srcs" env:"SRCS" env-delim:" " description:"Test source files"` - Interpreter string `short:"i" long:"interpreter" env:"TOOLS_INTERPRETER" description:"Python interpreter to use"` - TestRunner string `short:"r" long:"test_runner" default:"unittest" description:"Test runner to use"` - Shebang string `short:"s" long:"shebang" description:"Explicitly set shebang to this"` - Stamp string `long:"stamp" description:"Unique value used to derive cache directory for pex"` - InterpreterOptions []string `long:"interpreter_options" description:"Additional command line arguments to pass to the Python interpreter"` - Test bool `short:"t" long:"test" description:"True if we're to build a test"` - Debug pex.Debugger `short:"d" long:"debug" optional:"true" optional-value:"pdb" choice:"pdb" choice:"debugpy" description:"Debugger to generate a debugging pex"` - Site bool `short:"S" long:"site" description:"Allow the pex to import site at startup"` - ZipSafe bool `long:"zip_safe" description:"Marks this pex as zip-safe"` - AddTestRunnerDeps bool `long:"add_test_runner_deps" description:"True if test-runner dependencies should be baked into test binaries"` + Usage string + Verbosity l.Verbosity `short:"v" long:"verbosity" default:"warning" description:"Verbosity of output (higher number = more output)"` + Out string `short:"o" long:"out" env:"OUT" description:"Output file"` + EntryPoint string `short:"e" long:"entry_point" env:"SRC" description:"Entry point to pex file"` + ModuleDir string `short:"m" long:"module_dir" description:"Python module dir to implicitly load modules from"` + TestSrcs []string `long:"test_srcs" env:"SRCS" env-delim:" " description:"Test source files"` + PreambleVerbosity preamble.Verbosity `long:"preamble_verbosity" optional:"true" optional-value:"level" choice:"trace" choice:"debug" choice:"info" choice:"warn" choice:"error" choice:"fatal" description:"The preamble's default minimum logging level"` + Interpreters []string `short:"i" long:"interpreters" required:"true" description:"Python interpreters to attempt to invoke at run-time"` + InterpreterArgs []string `long:"interpreter_args" description:"Additional command line arguments to pass to the Python interpreter"` + TestRunner string `short:"r" long:"test_runner" default:"unittest" description:"Test runner to use"` + Stamp string `long:"stamp" description:"Unique value used to derive cache directory for pex"` + Test bool `short:"t" long:"test" description:"True if we're to build a test"` + Debug pex.Debugger `short:"d" long:"debug" optional:"true" optional-value:"pdb" choice:"pdb" choice:"debugpy" description:"Debugger to generate a debugging pex"` + Site bool `short:"S" long:"site" description:"Allow the pex to import site at startup"` + ZipSafe bool `long:"zip_safe" description:"Marks this pex as zip-safe"` + AddTestRunnerDeps bool `long:"add_test_runner_deps" description:"True if test-runner dependencies should be baked into test binaries"` }{ Usage: ` please_pex is a tool to create .pex files for Python. @@ -42,10 +43,15 @@ func main() { cli.ParseFlagsOrDie("please_pex", &opts, nil) l.InitLogging(opts.Verbosity) w := pex.NewWriter( - opts.EntryPoint, opts.Interpreter, opts.InterpreterOptions, opts.Stamp, - opts.ZipSafe, !opts.Site) - if opts.Shebang != "" { - w.SetShebang(opts.Shebang, opts.InterpreterOptions) + opts.EntryPoint, + opts.Interpreters, + opts.InterpreterArgs, + opts.Stamp, + opts.ZipSafe, + !opts.Site, + ) + if opts.PreambleVerbosity != "" { + w.SetPreambleVerbosity(opts.PreambleVerbosity) } if opts.Test { w.SetTest(opts.TestSrcs, opts.TestRunner, opts.AddTestRunnerDeps) diff --git a/tools/please_pex/preamble/BUILD b/tools/please_pex/preamble/BUILD new file mode 100644 index 00000000..d4b504c6 --- /dev/null +++ b/tools/please_pex/preamble/BUILD @@ -0,0 +1,35 @@ +subinclude( + "///cc//build_defs:c", + "///go//build_defs:go", +) + +c_binary( + name = "preamble", + srcs = glob(["*.c"]), + hdrs = glob(["*.h"]), + compiler_flags = ["-std=c11"], + defines = [ + # path.c relies on realpath(3)'s resolved_path == NULL feature, which was not standardised + # until POSIX.1-2008: + "_XOPEN_SOURCE=700", + ], + static = True, + visibility = [ + "//tools/please_pex/pex:preamble", + ], + deps = [ + "///third_party/cc/cjson/cjson//:cjson", + "///third_party/cc/kuba_zip/kuba_zip//:kuba_zip", + "///third_party/cc/log_c/log_c//:log_c", + "//third_party/cc/openbsd:queue", + ], +) + +go_library( + name = "preamble_go", + srcs = glob(["*.go"]), + visibility = [ + "//tools/please_pex/pex", + "//tools/please_pex:pex_main", + ], +) diff --git a/tools/please_pex/preamble/path.c b/tools/please_pex/preamble/path.c new file mode 100644 index 00000000..de7d749d --- /dev/null +++ b/tools/please_pex/preamble/path.c @@ -0,0 +1,160 @@ +#include + +#if defined(__APPLE__) +#include +#elif defined(__FreeBSD__) +#include +#include +#endif + +#include "log.h" +#include "path.h" +#include "pexerror.h" +#include "util.h" + +/* + * get_pex_dir stores the canonical absolute path to the .pex file in pex_dir. It returns NULL on + * success and an error on failure. + */ +err_t *get_pex_dir(char **pex_dir) { + char *exe_path = NULL; + char *exe_realpath = NULL; + char *exe_dir = NULL; + err_t *err = NULL; + +#if defined(__linux__) + exe_path = "/proc/self/exe"; +#elif defined(__APPLE__) + uint32_t len = 0; + + // Call _NSGetExecutablePath once to find out how long the executable path is, then again after + // we've allocated that much memory to store it. + _NSGetExecutablePath(NULL, &len); + MALLOC(exe_path, char, len); + if (_NSGetExecutablePath(exe_path, &len) != 0) { + err = err_from_str("_NSGetExecutablePath failure"); + goto end; + } +#elif defined(__FreeBSD__) + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + size_t len = 0; + + // Call sysctl once to find out how long the executable path is, then again after we've + // allocated that much memory to store it. + if (sysctl(mib, NELEMS(mib), NULL, &len, NULL, 0) == -1) { + err = err_from_errno("sysctl lookup"); + goto end; + } + MALLOC(exe_path, char, len); + if (sysctl(mib, NELEMS(mib), exe_path, &len, NULL, 0) == -1) { + err = err_from_errno("sysctl write"); + goto end; + } +#else +#error "Unsupported operating system" +#endif + + if ((exe_realpath = realpath(exe_path, NULL)) == NULL) { + err = err_from_errno("realpath"); + goto end; + } + + exe_dir = dirname(exe_realpath); + if (((*pex_dir) = strdup(exe_dir)) == NULL) { + err = err_from_errno("strdup"); + goto end; + } + +end: +#ifndef __linux__ + FREE(exe_path); +#endif + FREE(exe_realpath); + + return err; +} + +/* + * get_plz_bin_path stores the path to the directory containing Please's binary outputs in path. It + * returns NULL on success and an error on failure. + * + * If the .pex file is running from within a Please build environment (which is assumed if PLZ_ENV + * is defined in the environment), get_plz_bin_path returns the path to the build environment's root + * directory (which is assumed to be the value of TMP_DIR in the environment, as created by Please). + * Otherwise, get_plz_bin_path returns the path to the bin/ subdirectory within the plz-out/ directory + * to which this .pex file belongs. If the .pex file does not exist within a plz-out/ directory + * tree, get_plz_bin_path fails. + */ +err_t *get_plz_bin_path(char **path) { + char *pex_dir = NULL; + char *tmp_dir = NULL; + char *tmp_dir_realpath = NULL; + size_t pex_dir_len = 0; + size_t tmp_dir_len = 0; + err_t *err = NULL; + + if ((err = get_pex_dir(&pex_dir)) != NULL) { + err = err_wrap("get_pex_dir", err); + goto end; + } + + if (getenv("PLZ_ENV") != NULL) { + if ((tmp_dir = getenv("TMP_DIR")) == NULL) { + // There are two circumstances under which this might happen: either if this is a Please + // build environment but TMP_DIR was unset before the .pex file was executed, or if this + // isn't a Please build environment and PLZ_ENV just happened to be set for some other + // reason. The latter is more likely than the former (many build definitions rely on + // TMP_DIR being defined, which would lead to much more breakage besides this function + // failing) - therefore, optimise for the latter case. + log_warn("PLZ_ENV is defined but TMP_DIR is not; assuming this is not a Please build environment"); + goto no_plz_env; + } + + // Identify whether the .pex file exists inside the build environment. + if ((tmp_dir_realpath = realpath(tmp_dir, NULL)) == NULL) { + err = err_from_errno("realpath tmp_dir"); + goto end; + } + + pex_dir_len = strlen(pex_dir); + tmp_dir_len = strlen(tmp_dir_realpath); + + if ( + strncmp(tmp_dir_realpath, pex_dir, tmp_dir_len) == 0 && + ( + (pex_dir_len == tmp_dir_len) || + (pex_dir_len > tmp_dir_len && pex_dir[tmp_dir_len] == '/') + ) + ) { + if (((*path) = strdup(tmp_dir_realpath)) == NULL) { + err = err_from_errno("strdup"); + } + goto end; + } + } + +no_plz_env: + // Try searching the .pex file path component-wise for the last instance of "plz-out". If one is + // found, assume the binary outputs are in the bin/ subdirectory within that directory. There's + // no need to check now whether bin/ actually exists - if it doesn't, the call to execvp() later + // will fail anyway. + while (!STREQ(basename(pex_dir), "plz-out") && !STREQ(pex_dir, "/") && !STREQ(pex_dir, ".")) { + pex_dir = dirname(pex_dir); + } + if (STREQ(pex_dir, "/") || STREQ(pex_dir, ".")) { + // If we reached the left-most component in the path, the .pex file doesn't exist within a + // plz-out/ directory tree. + err = err_from_str(".pex file is in neither a Please build environment nor a Please repo"); + goto end; + } + MALLOC(*path, char, strlen(pex_dir) + strlen("/bin") + 1); + if (sprintf(*path, "%s/bin", pex_dir) < 0) { + err = err_from_errno("sprintf path"); + } + +end: + FREE(pex_dir); + FREE(tmp_dir_realpath); + + return err; +} diff --git a/tools/please_pex/preamble/path.h b/tools/please_pex/preamble/path.h new file mode 100644 index 00000000..560b6af9 --- /dev/null +++ b/tools/please_pex/preamble/path.h @@ -0,0 +1,9 @@ +#ifndef __PATH_H__ + +#define __PATH_H__ + +#include "pexerror.h" + +err_t *get_plz_bin_path(char **path); + +#endif /* !__PATH_H__ */ diff --git a/tools/please_pex/preamble/pexerror.c b/tools/please_pex/preamble/pexerror.c new file mode 100644 index 00000000..b0edb1c6 --- /dev/null +++ b/tools/please_pex/preamble/pexerror.c @@ -0,0 +1,100 @@ +/* + * pexerror.c implements a simple error reporting mechanism. + * + * Errors are singly-linked lists (of type err_t) whose nodes (of type errmsg_t) are strings + * containing diagnostic messages. This allows for the formation of a Go-like error chain, with + * diagnostic messages becoming progressively more specific as the tail of the list is approached. + */ + +#include +#include +#include +#include + +#include "pexerror.h" +#include "util.h" + +/* + * err_from_str returns an error from the given diagnostic message. + */ +err_t *err_from_str(const char *msg) { + err_t *err = NULL; + + MALLOC(err, err_t, 1); + *err = (err_t){ 0 }; + SLIST_INIT(err); + + return err_wrap(msg, err); +} + +/* + * err_from_errno returns an error from the given diagnostic message that wraps another error + * whose diagnotic message is derived from the current value of errno. + * + * This function is used to report the failure of a system call or library function. Because errno + * doesn't indicate which system call or library function encountered the error, the diagnostic + * message passed in msg typically includes the name of the system call or library function. + * + * Note that err_from_errno doesn't check that the value of errno is significant; it is therefore + * the responsibility of the caller to ensure that a system call or library function that sets + * errno has actually returned a value indicating failure before calling err_from_errno. + */ +err_t *err_from_errno(const char *msg) { + err_t *err = err_from_str(strerror(errno)); + + return err_wrap(msg, err); +} + +/* + * err_wrap creates an error from the given diagnostic message, using it to wrap the given error, + * and then returns the newly-created error. + */ +err_t *err_wrap(const char *msg, err_t *wrapped) { + errmsg_t *first = NULL; + errmsg_t *errmsg = NULL; + size_t len = strlen(msg); + + if (wrapped == NULL) { + fprintf(stderr, "err_wrap called with null value for wrapped"); + exit(EX_SOFTWARE); + } + + first = SLIST_FIRST(wrapped); + + MALLOC(errmsg, errmsg_t, 1); + errmsg->msg = msg; + errmsg->len = len; + errmsg->flen = len + (first == NULL ? 0 : 2 + first->flen); // 2 extra bytes for ": " separator + + SLIST_INSERT_HEAD(wrapped, errmsg, wrapped); + + return wrapped; +} + +/* + * err_str extracts the diagnostic messages from the given error chain and formats them as a + * null-terminated string. Messages are included in the returned string in the order in which they + * appear in the error chain, with successive errors delimited by ": ". + */ +char *err_str(err_t *err) { + errmsg_t *errmsg = NULL; + char *catmsg = NULL; + size_t i = 0; + + errmsg = SLIST_FIRST(err); + MALLOC(catmsg, char, errmsg->flen + 1); // 1 extra byte for trailing null + + SLIST_FOREACH(errmsg, err, wrapped) { + strncpy(catmsg + i, errmsg->msg, errmsg->len); + i += errmsg->len; + // Don't append ": " to the final error in the list. + if (errmsg->len != errmsg->flen) { + strcpy(catmsg + i, ": "); + i += 2; + } + } + + catmsg[i] = 0; + + return catmsg; +} diff --git a/tools/please_pex/preamble/pexerror.h b/tools/please_pex/preamble/pexerror.h new file mode 100644 index 00000000..4f44f948 --- /dev/null +++ b/tools/please_pex/preamble/pexerror.h @@ -0,0 +1,42 @@ +#ifndef __PEXERROR_H__ + +#define __PEXERROR_H__ + +#include "queue.h" + +typedef struct errmsg_t { + /* + * len is the length of the diagnostic message stored in msg. It excludes the trailing null. + */ + size_t len; + + /* + * flen is the expected length of the error chain with this error at its head when it is + * formatted by err_str. It is the length of the diagnostic message in this error plus the + * lengths of the diagnostic messages stored in each of the errors wrapped by this error, with + * an additional two bytes per wrapped error to account for the ": " separator that err_str + * inserts between diagnostic messages. + */ + size_t flen; + + /* + * msg is a diagnostic message describing this error. + */ + const char *msg; + + /* + * wrapped is an error that is wrapped by this error. + */ + SLIST_ENTRY(errmsg_t) wrapped; +} errmsg_t; + +SLIST_HEAD(err_t, errmsg_t); + +typedef struct err_t err_t; + +err_t *err_from_str(const char *msg); +err_t *err_from_errno(const char *msg); +err_t *err_wrap(const char *msg, err_t *wrapped); +char *err_str(err_t *err); + +#endif /* !__PEXERROR_H__ */ diff --git a/tools/please_pex/preamble/preamble.c b/tools/please_pex/preamble/preamble.c new file mode 100644 index 00000000..da23f108 --- /dev/null +++ b/tools/please_pex/preamble/preamble.c @@ -0,0 +1,369 @@ +#include +#include + +#include "cJSON.h" +#include "log.h" +#include "path.h" +#include "pexerror.h" +#include "queue.h" +#include "util.h" +#include "zip.h" + +#define JSON_STRING(x) (cJSON_IsString(x) && (x)->valuestring != NULL) +#define JSON_STRLEN(x) strlen((x)->valuestring) + +#define PREAMBLE_CONFIG_PATH ".bootstrap/PLZ_PREAMBLE_CONFIG" + +/* + * init_log configures log.c to log initially at error level and to include the current date and + * time (but not source location) in log messages. + */ +static void init_log() { + // In the early stages of the preamble, default to LOG_ERROR (although everything will be fatal + // until we're in a position to call set_log_verbosity anyway, so this is more of a formality). + log_set_level(LOG_ERROR); + + log_set_time_format("%Y-%m-%d %H:%M:%S"); +} + +/* + * parse_log_level returns the log.c logging level represented by the given string, or -1 if the + * given string does not represent a known logging level. It is the inverse of the string_log_level + * function. + */ +static int parse_log_level(char *level) { + if (level == NULL) { + return -1; + } else if (STREQ(level, "trace")) { + return LOG_TRACE; + } else if (STREQ(level, "debug")) { + return LOG_DEBUG; + } else if (STREQ(level, "info")) { + return LOG_INFO; + } else if (STREQ(level, "warn")) { + return LOG_WARN; + } else if (STREQ(level, "error")) { + return LOG_ERROR; + } else if (STREQ(level, "fatal")) { + return LOG_FATAL; + } else { + return -1; + } +} + +/* + * string_log_level returns a stringified representation of the given log.c logging level. It is + * equivalent to log.c's log_level_string function, except it returns lower-case text. + */ +static char *string_log_level(int level) { + if (level == LOG_TRACE) { + return "trace"; + } else if (level == LOG_DEBUG) { + return "debug"; + } else if (level == LOG_INFO) { + return "info"; + } else if (level == LOG_WARN) { + return "warn"; + } else if (level == LOG_ERROR) { + return "error"; + } else if (level == LOG_FATAL) { + return "fatal"; + } + return ""; +} + +/* + * set_log_verbosity sets log.c's logging level based on the value of the PLZ_PEX_PREAMBLE_VERBOSITY + * environment variable, falling back to the default logging level set in the .pex preamble + * configuration if PLZ_PEX_PREAMBLE_VERBOSITY does not have a recognised value (and falling back + * further to "error" if the .pex preamble configuration also does not contain a recognised value). + */ +static void set_log_verbosity(const cJSON *config) { + char *env_verbosity = getenv("PLZ_PEX_PREAMBLE_VERBOSITY"); + int level = parse_log_level(env_verbosity); + const cJSON *json_verbosity = NULL; + + if (level == -1) { + json_verbosity = cJSON_GetObjectItemCaseSensitive(config, "verbosity"); + if (!JSON_STRING(json_verbosity) || (level = parse_log_level(json_verbosity->valuestring)) == -1) { + level = LOG_ERROR; + } + + if (env_verbosity != NULL) { + log_error("Unknown logging level '%s'; defaulting to '%s'", env_verbosity, string_log_level(level)); + } + } + + log_set_level(level); +} + +/* + * get_config reads and parses the .pex preamble configuration, a JSON-encoded file at + * .bootstrap/PLZ_PREAMBLE_CONFIG in the zip archive, and stores it in config. It returns NULL on + * success and an error on failure. + */ +static err_t *get_config(char *pex_path, const cJSON **config) { + struct zip_t *zip = NULL; + int ziperr = 0; + char *configbuf = NULL; + size_t configsize = 0; + err_t *err = NULL; + + zip = zip_openwitherror(pex_path, 0, 'r', &ziperr); + if (ziperr < 0) { + err = err_wrap("open .pex", err_from_str(zip_strerror(ziperr))); + goto end; + } + + if ((ziperr = zip_entry_open(zip, PREAMBLE_CONFIG_PATH)) < 0) { + err = err_wrap("open .pex configuration", err_from_str(zip_strerror(ziperr))); + goto end; + } + + if ((ziperr = (int)zip_entry_read(zip, (void **)&configbuf, &configsize)) < 0) { + err = err_wrap("read .pex configuration", err_from_str(zip_strerror(ziperr))); + goto end; + } + + zip_entry_close(zip); + zip_close(zip); + + if (((*config) = cJSON_ParseWithLength(configbuf, configsize)) == NULL) { + err = err_wrap( + "parse .pex configuration", + err_wrap("JSON syntax error near", err_from_str(cJSON_GetErrorPtr())) + ); + goto end; + } + +end: + FREE(configbuf); + + return err; +} + +/* + * get_interpreter_args extracts the array of interpreter command line arguments from the value of + * "interpreter_args" in the given .pex preamble configuration and stores them in the same order in + * the tail queue given by args; it also stores the length of this queue in len. It returns NULL on + * success and an error on failure. + */ +static err_t *get_interpreter_args(const cJSON *config, int *len, strlist_t **args) { + const cJSON *json_args = NULL; + const cJSON *json_arg = NULL; + strlist_elem_t *arg = NULL; + err_t *err = NULL; + + STAILQ_NEW(*args, strlist_t); + + json_args = cJSON_GetObjectItemCaseSensitive(config, "interpreter_args"); + if (json_args == NULL || cJSON_IsNull(json_args)) { + log_debug("interpreter_args is null; no arguments added"); + goto end; + } else if (!cJSON_IsArray(json_args)) { + err = err_from_str("interpreter_args must be an array or null"); + goto end; + } + + *len = cJSON_GetArraySize(json_args); + if (*len == 0) { + log_debug("interpreter_args is empty; no arguments added"); + goto end; + } + + cJSON_ArrayForEach(json_arg, json_args) { + if (!JSON_STRING(json_arg)) { + err = err_from_str("elements in interpreter_args must be strings"); + goto end; + } + + STAILQ_ENTRY_NEW(arg, strlist_elem_t); + MALLOC(arg->str, char, JSON_STRLEN(json_arg) + 1); + strcpy(arg->str, json_arg->valuestring); + STAILQ_INSERT_TAIL(*args, arg, next); + log_debug("Added argument from interpreter_args: %s", arg->str); + } + +end: + if (err != NULL) { + if (arg != NULL) { + FREE(arg->str); + FREE(arg); + } + + if (*args != NULL) { + while (!STAILQ_EMPTY(*args)) { + arg = STAILQ_FIRST(*args); + STAILQ_REMOVE_HEAD(*args, next); + FREE(arg->str); + FREE(arg); + } + } + } + + return err; +} + +/* + * get_interpreters extracts the array of interpreter paths from the value of "interpreters" in the + * given .pex preamble configuration and stores them in the same order in the tail queue given by + * interps. It returns NULL on success and an error on failure. + */ +static err_t *get_interpreters(const cJSON *config, strlist_t **interps) { + strlist_elem_t *interp = NULL; + char *plz_bin_path = NULL; + const cJSON *json_interps = NULL; + const cJSON *json_interp = NULL; + err_t *err = NULL; + + json_interps = cJSON_GetObjectItemCaseSensitive(config, "interpreters"); + if (!cJSON_IsArray(json_interps)) { + err = err_from_str("interpreters must be an array"); + goto end; + } + + if (cJSON_GetArraySize(json_interps) == 0) { + err = err_from_str("interpreters must not be empty"); + goto end; + } + + STAILQ_NEW(*interps, strlist_t); + + cJSON_ArrayForEach(json_interp, json_interps) { + if (!JSON_STRING(json_interp)) { + err = err_from_str("elements in interpreters must be strings"); + goto end; + } + + STAILQ_ENTRY_NEW(interp, strlist_elem_t); + + if (STRPREFIX(json_interp->valuestring, "$PLZ_BIN_PATH/")) { + if (plz_bin_path == NULL) { + if ((err = get_plz_bin_path(&plz_bin_path)) != NULL) { + err = err_wrap("PLZ_BIN_PATH resolution failure", err); + goto end; + } + log_debug("Resolved $PLZ_BIN_PATH to %s", plz_bin_path); + } + + // Replace "$PLZ_BIN_PATH" with the path to Please's binary outputs directory at the + // start of the interpreter path. + MALLOC(interp->str, char, strlen(plz_bin_path) + JSON_STRLEN(json_interp) - strlen("$PLZ_BIN_PATH") + 1); // 1 extra byte for trailing null + strncpy(interp->str, plz_bin_path, strlen(plz_bin_path)); + strcpy(interp->str + strlen(plz_bin_path), json_interp->valuestring + strlen("$PLZ_BIN_PATH")); + } else { + MALLOC(interp->str, char, JSON_STRLEN(json_interp) + 1); // 1 extra byte for trailing null + strcpy(interp->str, json_interp->valuestring); + } + + STAILQ_INSERT_TAIL(*interps, interp, next); + + log_debug("Added interpreter to search path: %s", interp->str); + } + +end: + FREE(plz_bin_path); + + if (err != NULL) { + if (interp != NULL) { + FREE(interp->str); + FREE(interp); + } + + if (*interps != NULL) { + while (!STAILQ_EMPTY(*interps)) { + interp = STAILQ_FIRST(*interps); + STAILQ_REMOVE_HEAD(*interps, next); + FREE(interp->str); + FREE(interp); + } + } + } + + return err; +} + +int main(int argc, char **argv) { + err_t *err = NULL; + const cJSON *config = NULL; + int config_args_len = 0; + strlist_t *config_args = NULL; + strlist_elem_t *config_arg = NULL; + strlist_t *interps = NULL; + strlist_elem_t *interp = NULL; + char **interp_argv = NULL; + int i = 0; + + init_log(); + + if (argc == 0) { + log_fatal("Failed to execute .pex preamble: argv is empty"); + return 1; + } + + if ((err = get_config(argv[0], &config)) != NULL) { + log_fatal("Failed to get .pex preamble configuration: %s", err_str(err)); + return 1; + } + + set_log_verbosity(config); + + if ((err = get_interpreter_args(config, &config_args_len, &config_args)) != NULL) { + log_fatal("Failed to get interpreter arguments from .pex preamble configuration: %s", err_str(err)); + return 1; + } + + if ((err = get_interpreters(config, &interps)) != NULL) { + log_fatal("Failed to get interpreters from .pex preamble configuration: %s", err_str(err)); + return 1; + } + + // interp_argv is the array of arguments that will be passed to execvp(): + // + // { + // interpreter, + // config_args[0], + // # ... + // config_args[n], + // argv[0], + // # ... + // argv[n], + // NULL + // } + MALLOC(interp_argv, char *, 1 + config_args_len + argc + 1); + + // The first element is the Python interpreter path, which is substituted in during each + // execvp() invocation. The remaining elements are static for each execvp() invocation: + // - the interpreter arguments from the .pex preamble configuration (if any); + STAILQ_FOREACH(config_arg, config_args, next) { + interp_argv[1 + i] = config_arg->str; + i++; + } + // - the name of the currently-executing program (which is also the .pex file that the + // Python interpreter needs to execute) and the command line arguments given to this + // process (if any); + for (i = 0; i < argc; i++) { + interp_argv[1 + config_args_len + i] = argv[i]; + } + // - NULL (the terminator for execvp()). + interp_argv[1 + config_args_len + argc] = NULL; + + STAILQ_FOREACH(interp, interps, next) { + interp_argv[0] = interp->str; + + log_debug("Attempting to execute interpreter %s", interp_argv[0]); + + execvp(interp_argv[0], interp_argv); + if (errno == ENOENT) { + // ENOENT isn't necessarily an error case (there are legitimate reasons for any given + // interpreter not to exist), hence logging this at info level rather than error. + log_info("%s does not exist or could not be executed", interp_argv[0]); + } else { + log_error("Failed to execute %s: %s", interp_argv[0], strerror(errno)); + } + } + + log_fatal("Failed to execute any interpreters in the interpreter search path"); + + return 1; +} diff --git a/tools/please_pex/preamble/preamble.go b/tools/please_pex/preamble/preamble.go new file mode 100644 index 00000000..846cb306 --- /dev/null +++ b/tools/please_pex/preamble/preamble.go @@ -0,0 +1,67 @@ +// Package preamble is the Go interface to the .pex preamble format. +package preamble + +import ( + "fmt" + "strings" +) + +// ConfigPath is the zip file member within the .pex archive containing the .pex preamble +// configuration. It is typically the first member of the archive (for performance reasons), but +// does not necessarily have to be. +const ConfigPath = ".bootstrap/PLZ_PREAMBLE_CONFIG" + +// Verbosity represents a logging level supported by the .pex preamble. +type Verbosity string + +const ( + LogTrace Verbosity = "trace" + LogDebug = "debug" + LogInfo = "info" + LogWarn = "warn" + LogError = "error" + LogFatal = "fatal" +) + +// UnmarshalFlag parses a string representation of a logging level and returns its corresponding +// constant. +func (v *Verbosity) UnmarshalFlag(in string) error { + switch strings.ToLower(in) { + case "trace": + *v = LogTrace + return nil + case "debug": + *v = LogDebug + return nil + case "info": + *v = LogInfo + return nil + case "warn": + *v = LogWarn + return nil + case "error": + *v = LogError + return nil + case "fatal": + *v = LogFatal + return nil + } + return fmt.Errorf("invalid preamble logging level '%s'", in) +} + +// Config represents a .pex preamble configuration. +type Config struct { + // Verbosity controls the preamble's minimum log level. It may be overridden at run time by the + // value of the PLZ_PEX_PREAMBLE_VERBOSITY environment variable. + Verbosity Verbosity `json:"verbosity,omitempty"` + + // Interpreters is a list of relative or absolute paths to Python interpreters that the preamble + // should attempt to invoke in the order in which they are given. The preamble duplicates the + // actions of the shell when attempting to locate an interpreter with a relative path that does not + // contain a forward slash. + Interpreters []string `json:"interpreters"` + + // InterpreterArgs is a list of command line arguments that the preamble should pass to the Python + // interpreters when attempting to invoke them. + InterpreterArgs []string `json:"interpreter_args,omitempty"` +} diff --git a/tools/please_pex/preamble/util.h b/tools/please_pex/preamble/util.h new file mode 100644 index 00000000..5bb2f93d --- /dev/null +++ b/tools/please_pex/preamble/util.h @@ -0,0 +1,47 @@ +#ifndef __UTIL_H__ + +#define __UTIL_H__ + +#include +#include +#include +#include + +#define STREQ(x, y) (strcmp((x), (y)) == 0) +#define STRPREFIX(s, pre) (strncmp(pre, s, strlen(pre)) == 0) + +#define NELEMS(x) ((int)(sizeof(x) / sizeof((x)[0]))) + +#define MALLOC(ptr, type, size) \ + if (((ptr) = malloc(sizeof(type) * (size))) == NULL) { \ + perror("malloc"); \ + exit(EX_OSERR); \ + } + +#define FREE(x) if ((x) != NULL) { free(x); (x) = NULL; } + +/* + * STAILQ_NEW allocates memory for and initialises a singly-linked tail queue. + */ +#define STAILQ_NEW(headname, type) \ + MALLOC(headname, type, 1); \ + (*headname) = (type){ 0 }; \ + STAILQ_INIT(headname); + +/* + * STAILQ_ENTRY_NEW allocates memory for and initialises an entry in a singly-linked tail queue. + */ +#define STAILQ_ENTRY_NEW(var, type) \ + MALLOC(var, type, 1); \ + (*var) = (type){ 0 }; + +typedef struct strlist_elem_t { + char *str; + STAILQ_ENTRY(strlist_elem_t) next; +} strlist_elem_t; + +STAILQ_HEAD(strlist_t, strlist_elem_t); + +typedef struct strlist_t strlist_t; + +#endif /* !__UTIL_H__ */ diff --git a/tools/please_pex/zip/writer.go b/tools/please_pex/zip/writer.go index 6949fa67..ed024822 100644 --- a/tools/please_pex/zip/writer.go +++ b/tools/please_pex/zip/writer.go @@ -7,6 +7,7 @@ import ( "encoding/binary" "fmt" "io" + "io/fs" "os" "path" "path/filepath" @@ -28,7 +29,7 @@ const modTimeDOS = 10785 type File struct { f io.WriteCloser w *zip.Writer - preambleLength int + preambleLength int64 filename string input string // Include and Exclude are prefixes of filenames to include or exclude from the zipfile. @@ -415,14 +416,22 @@ func (f *File) WriteDir(filename string) error { return nil } -// WritePreamble writes a preamble to the zipfile. -func (f *File) WritePreamble(preamble []byte) error { - f.preambleLength += len(preamble) - f.w.SetOffset(int64(f.preambleLength)) +// WritePreambleBytes writes a byte slice as a preamble to the zipfile. +func (f *File) WritePreambleBytes(preamble []byte) error { + f.preambleLength += int64(len(preamble)) + f.w.SetOffset(f.preambleLength) _, err := f.f.Write(preamble) return err } +// WritePreambleFile writes a file as a preamble to the zipfile. +func (f *File) WritePreambleFile(preamble fs.File) error { + length, err := io.Copy(f.f, preamble) + f.preambleLength += length + f.w.SetOffset(f.preambleLength) + return err +} + // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file. // This is important so our output is deterministic. func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error {