diff --git a/.bazelignore b/.bazelignore index 9bcb523a38..713903d832 100644 --- a/.bazelignore +++ b/.bazelignore @@ -25,4 +25,5 @@ examples/pip_parse_vendored/bazel-pip_parse_vendored examples/py_proto_library/bazel-py_proto_library tests/integration/compile_pip_requirements/bazel-compile_pip_requirements tests/integration/ignore_root_user_error/bazel-ignore_root_user_error +tests/integration/local_toolchains/bazel-local_toolchains tests/integration/pip_repository_entry_points/bazel-pip_repository_entry_points diff --git a/.bazelrc b/.bazelrc index 61fd0e7601..94cfb93350 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/dupe_requirements,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/pip_repository_entry_points,tests/integration/py_cc_toolchain_registered test --test_output=errors diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b0d526870..61e9d95cd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ A brief description of the categories of changes: [x.x.x]: https://github.com/bazelbuild/rules_python/releases/tag/x.x.x ### Changed +* (toolchains) Optional toolchain dependency: `py_binary`, `py_test`, and + `py_library` now depend on the `//python:exec_tools_toolchain_type` for build + tools. * (deps): Bumped `bazel_skylib` to 1.6.1. * (bzlmod): The `python` and internal `rules_python` extensions have been @@ -37,6 +40,28 @@ A brief description of the categories of changes: for this package before will be deleted automatically. ### Added +* (rules) Precompiling Python source at build time is available. but is + disabled by default, for now. Set + `@rules_python//python/config_settings:precompile=enabled` to enable it + by default. A subsequent release will enable it by default. See the + [Precompiling docs][precompile-docs] and API reference docs for more + information on precompiling. Note this requires Bazel 7+ and the Pystar rule + implementation enabled. + ([#1761](https://github.com/bazelbuild/rules_python/issues/1761)) +* (rules) Attributes and flags to control precompile behavior: `precompile`, + `precompile_optimize_level`, `precompile_source_retention`, + `precompile_invalidation_mode`, and `pyc_collection` +* (toolchains) The target runtime toolchain (`//python:toolchain_type`) has + two new optional attributes: `pyc_tag` (tells the pyc filename infix to use) and + `implementation_name` (tells the Python implementation name). +* (toolchains) A toolchain type for build tools has been added: + `//python:exec_tools_toolchain_type`. +* (providers) `PyInfo` has two new attributes: `direct_pyc_files` and + `transitive_pyc_files`, which tell the pyc files a target makes available + directly and transitively, respectively. +* `//python:features.bzl` added to allow easy feature-detection in the future. + +[precompile-docs]: /precompiling ## [0.32.2] - 2024-05-14 diff --git a/docs/sphinx/index.md b/docs/sphinx/index.md index eadd3ac11a..13cfa56aa4 100644 --- a/docs/sphinx/index.md +++ b/docs/sphinx/index.md @@ -60,6 +60,7 @@ pypi-dependencies toolchains pip coverage +precompiling gazelle Contributing support diff --git a/docs/sphinx/precompiling.md b/docs/sphinx/precompiling.md new file mode 100644 index 0000000000..791fc41145 --- /dev/null +++ b/docs/sphinx/precompiling.md @@ -0,0 +1,95 @@ +# Precompiling + +Precompiling is compiling Python source files (`.py` files) into byte code (`.pyc` +files) at build +time instead of runtime. Doing it at build time can improve performance by +skipping that work at runtime. + +Precompiling is enabled by default, so there typically isn't anything special +you must do to use it. + + +## Overhead of precompiling + +While precompiling helps runtime performance, it has two main costs: +1. Increasing the size (count and disk usage) of runfiles. It approximately + double the count of the runfiles because for every `.py` file, there is also + a `.pyc` file. Compiled files are generally around the same size as the + source files, so it approximately doubles the disk usage. +2. Precompiling requires running an extra action at build time. While + compiling itself isn't that expensive, the overhead can become noticable + as more files need to be compiled. + +## Binary-level opt-in + +Because of the costs of precompiling, it may not be feasible to globally enable it +for your repo for everything. For example, some binaries may be +particularly large, and doubling the number of runfiles isn't doable. + +If this is the case, there's an alternative way to more selectively and +incrementally control precompiling on a per-binry basis. + +To use this approach, the two basic steps are: +1. Disable pyc files from being automatically added to runfiles: + `--@rules_python//python/config_settings:precompile_add_to_runfiles=decided_elsewhere`, +2. Set the `pyc_collection` attribute on the binaries/tests that should or should + not use precompiling. + +The default for the `pyc_collection` attribute is controlled by a flag, so you +can use an opt-in or opt-out approach by setting the flag: +* targets must opt-out: `--@rules_python//python/config_settings:pyc_collection=include_pyc`, +* targets must opt-in: `--@rules_python//python/config_settings:pyc_collection=disabled`, + +## Advanced precompiler customization + +The default implementation of the precompiler is a persistent, multiplexed, +sandbox-aware, cancellation-enabled, json-protocol worker that uses the same +interpreter as the target toolchain. This works well for local builds, but may +not work as well for remote execution builds. To customize the precompiler, two +mechanisms are available: + +* The exec tools toolchain allows customizing the precompiler binary used with + the `precompiler` attribute. Arbitrary binaries are supported. +* The execution requirements can be customized using + `--@rules_python//tools/precompiler:execution_requirements`. This is a list + flag that can be repeated. Each entry is a key=value that is added to the + execution requirements of the `PyPrecompile` action. Note that this flag + is specific to the rules_python precompiler. If a custom binary is used, + this flag will have to be propagated from the custom binary using the + `testing.ExecutionInfo` provider; refer to the `py_interpreter_program` an + +The default precompiler implementation is an asynchronous/concurrent +implementation. If you find it has bugs or hangs, please report them. In the +meantime, the flag `--worker_extra_flag=PyPrecompile=--worker_impl=serial` can +be used to switch to a synchronous/serial implementation that may not perform +as well, but is less likely to have issues. + +The `execution_requirements` keys of most relevance are: +* `supports-workers`: 1 or 0, to indicate if a regular persistent worker is + desired. +* `supports-multiplex-workers`: 1 o 0, to indicate if a multiplexed persistent + worker is desired. +* `requires-worker-protocol`: json or proto; the rules_python precompiler + currently only supports json. +* `supports-multiplex-sandboxing`: 1 or 0, to indicate if sanboxing is of the + worker is supported. +* `supports-worker-cancellation`: 1 or 1, to indicate if requests to the worker + can be cancelled. + +Note that any execution requirements values can be specified in the flag. + +## Known issues, caveats, and idiosyncracies + +* Precompiling requires Bazel 7+ with the Pystar rule implementation enabled. +* Mixing rules_python PyInfo with Bazel builtin PyInfo will result in pyc files + being dropped. +* Precompiled files may not be used in certain cases prior to Python 3.11. This + occurs due Python adding the directory of the binary's main `.py` file, which + causes the module to be found in the workspace source directory instead of + within the binary's runfiles directory (where the pyc files are). This can + usually be worked around by removing `sys.path[0]` (or otherwise ensuring the + runfiles directory comes before the repos source directory in `sys.path`). +* The pyc filename does not include the optimization level (e.g. + `foo.cpython-39.opt-2.pyc`). This works fine (it's all byte code), but also + means the interpreter `-O` argument can't be used -- doing so will cause the + interpreter to look for the non-existent `opt-N` named files. diff --git a/python/BUILD.bazel b/python/BUILD.bazel index d5863473d7..51404701a5 100644 --- a/python/BUILD.bazel +++ b/python/BUILD.bazel @@ -71,6 +71,11 @@ bzl_library( ], ) +bzl_library( + name = "features_bzl", + srcs = ["features.bzl"], +) + bzl_library( name = "packaging_bzl", srcs = ["packaging.bzl"], @@ -292,6 +297,11 @@ alias( actual = "@bazel_tools//tools/python:toolchain_type", ) +toolchain_type( + name = "exec_tools_toolchain_type", + visibility = ["//visibility:public"], +) + # Definitions for a Python toolchain that, at execution time, attempts to detect # a platform runtime having the appropriate major Python version. Consider this # a toolchain of last resort. diff --git a/python/config_settings/BUILD.bazel b/python/config_settings/BUILD.bazel index a017f9767e..a0e59f70c0 100644 --- a/python/config_settings/BUILD.bazel +++ b/python/config_settings/BUILD.bazel @@ -1,3 +1,11 @@ +load("@bazel_skylib//rules:common_settings.bzl", "string_flag") +load( + "//python/private:flags.bzl", + "PrecompileAddToRunfilesFlag", + "PrecompileFlag", + "PrecompileSourceRetentionFlag", + "PycCollectionFlag", +) load(":config_settings.bzl", "construct_config_settings") filegroup( @@ -12,3 +20,35 @@ filegroup( construct_config_settings( name = "construct_config_settings", ) + +string_flag( + name = "precompile", + build_setting_default = PrecompileFlag.AUTO, + values = sorted(PrecompileFlag.__members__.values()), + # NOTE: Only public because its an implicit dependency + visibility = ["//visibility:public"], +) + +string_flag( + name = "precompile_source_retention", + build_setting_default = PrecompileSourceRetentionFlag.KEEP_SOURCE, + values = sorted(PrecompileSourceRetentionFlag.__members__.values()), + # NOTE: Only public because its an implicit dependency + visibility = ["//visibility:public"], +) + +string_flag( + name = "precompile_add_to_runfiles", + build_setting_default = PrecompileAddToRunfilesFlag.ALWAYS, + values = sorted(PrecompileAddToRunfilesFlag.__members__.values()), + # NOTE: Only public because its an implicit dependency + visibility = ["//visibility:public"], +) + +string_flag( + name = "pyc_collection", + build_setting_default = PycCollectionFlag.DISABLED, + values = sorted(PycCollectionFlag.__members__.values()), + # NOTE: Only public because its an implicit dependency + visibility = ["//visibility:public"], +) diff --git a/tests/support/test_platforms.bzl b/python/features.bzl similarity index 61% rename from tests/support/test_platforms.bzl rename to python/features.bzl index 3ff3c507fc..3a10532c6e 100644 --- a/tests/support/test_platforms.bzl +++ b/python/features.bzl @@ -1,4 +1,4 @@ -# Copyright 2023 The Bazel Authors. All rights reserved. +# Copyright 2024 The Bazel Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Constants for referring to platforms.""" +"""Allows detecting of rules_python features that aren't easily detected.""" -# Explicit Label() calls are required so that it resolves in @rules_python -# context instead of e.g. the @rules_testing context. -MAC = Label("//tests/support:mac") -LINUX = Label("//tests/support:linux") -WINDOWS = Label("//tests/support:windows") +features = struct( + precompile = True, +) diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index fdbd20b896..181175679a 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -86,11 +86,25 @@ bzl_library( ], ) +bzl_library( + name = "enum_bzl", + srcs = ["enum.bzl"], +) + bzl_library( name = "envsubst_bzl", srcs = ["envsubst.bzl"], ) +bzl_library( + name = "flags_bzl", + srcs = ["flags.bzl"], + deps = [ + ":enum_bzl", + "@bazel_skylib//rules:common_settings", + ], +) + bzl_library( name = "full_version_bzl", srcs = ["full_version.bzl"], @@ -166,6 +180,18 @@ bzl_library( ], ) +bzl_library( + name = "py_exec_tools_toolchain_bzl", + srcs = ["py_exec_tools_toolchain.bzl"], + deps = ["//python/private/common:providers_bzl"], +) + +bzl_library( + name = "py_interpreter_program_bzl", + srcs = ["py_interpreter_program.bzl"], + deps = ["@bazel_skylib//rules:common_settings"], +) + bzl_library( name = "py_package_bzl", srcs = ["py_package.bzl"], @@ -192,6 +218,7 @@ bzl_library( name = "py_toolchain_suite_bzl", srcs = ["py_toolchain_suite.bzl"], deps = [ + ":toolchain_types_bzl", "@bazel_skylib//lib:selects", ], ) @@ -256,6 +283,11 @@ bzl_library( ], ) +bzl_library( + name = "toolchain_types_bzl", + srcs = ["toolchain_types.bzl"], +) + bzl_library( name = "util_bzl", srcs = ["util.bzl"], diff --git a/python/private/common/BUILD.bazel b/python/private/common/BUILD.bazel index 2f0683bf1b..e258f8a5eb 100644 --- a/python/private/common/BUILD.bazel +++ b/python/private/common/BUILD.bazel @@ -32,7 +32,10 @@ bzl_library( ":providers_bzl", ":py_internal_bzl", ":semantics_bzl", + "//python/private:enum_bzl", + "//python/private:flags_bzl", "//python/private:reexports_bzl", + "@bazel_skylib//rules:common_settings", ], ) @@ -46,9 +49,12 @@ bzl_library( name = "common_bazel_bzl", srcs = ["common_bazel.bzl"], deps = [ + ":attributes_bzl", ":common_bzl", ":providers_bzl", ":py_internal_bzl", + "//python/private:py_interpreter_program_bzl", + "//python/private:toolchain_types_bzl", "@bazel_skylib//lib:paths", ], ) @@ -124,8 +130,11 @@ bzl_library( ":common_bzl", ":providers_bzl", ":py_internal_bzl", + "//python/private:flags_bzl", "//python/private:rules_cc_srcs_bzl", + "//python/private:toolchain_types_bzl", "@bazel_skylib//lib:dicts", + "@bazel_skylib//rules:common_settings", ], ) @@ -143,7 +152,10 @@ bzl_library( ":common_bzl", ":providers_bzl", ":py_internal_bzl", + "//python/private:flags_bzl", + "//python/private:toolchain_types_bzl", "@bazel_skylib//lib:dicts", + "@bazel_skylib//rules:common_settings", ], ) diff --git a/python/private/common/attributes.bzl b/python/private/common/attributes.bzl index d4f853247f..eb70055787 100644 --- a/python/private/common/attributes.bzl +++ b/python/private/common/attributes.bzl @@ -13,7 +13,10 @@ # limitations under the License. """Attributes for Python rules.""" +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load("@rules_cc//cc:defs.bzl", "CcInfo") +load("//python/private:enum.bzl", "enum") +load("//python/private:flags.bzl", "PrecompileFlag") load("//python/private:reexports.bzl", "BuiltinPyInfo") load(":common.bzl", "union_attrs") load(":providers.bzl", "PyInfo") @@ -28,6 +31,97 @@ _PackageSpecificationInfo = getattr(py_internal, "PackageSpecificationInfo", Non _STAMP_VALUES = [-1, 0, 1] +def _precompile_attr_get_effective_value(ctx): + precompile_flag = PrecompileFlag.get_effective_value(ctx) + + if precompile_flag == PrecompileFlag.FORCE_ENABLED: + return PrecompileAttr.ENABLED + if precompile_flag == PrecompileFlag.FORCE_DISABLED: + return PrecompileAttr.DISABLED + + precompile_attr = ctx.attr.precompile + if precompile_attr == PrecompileAttr.INHERIT: + precompile = precompile_flag + else: + precompile = precompile_attr + + # Guard against bad final states because the two enums are similar with + # magic values. + if precompile not in ( + PrecompileAttr.ENABLED, + PrecompileAttr.DISABLED, + PrecompileAttr.IF_GENERATED_SOURCE, + ): + fail("Unexpected final precompile value: {}".format(repr(precompile))) + + return precompile + +# buildifier: disable=name-conventions +PrecompileAttr = enum( + # Determine the effective value from --precompile + INHERIT = "inherit", + # Compile Python source files at build time. Note that + # --precompile_add_to_runfiles affects how the compiled files are included + # into a downstream binary. + ENABLED = "enabled", + # Don't compile Python source files at build time. + DISABLED = "disabled", + # Compile Python source files, but only if they're a generated file. + IF_GENERATED_SOURCE = "if_generated_source", + get_effective_value = _precompile_attr_get_effective_value, +) + +# buildifier: disable=name-conventions +PrecompileInvalidationModeAttr = enum( + # Automatically pick a value based on build settings. + AUTO = "auto", + # Use the pyc file if the hash of the originating source file matches the + # hash recorded in the pyc file. + CHECKED_HASH = "checked_hash", + # Always use the pyc file, even if the originating source has changed. + UNCHECKED_HASH = "unchecked_hash", +) + +def _precompile_source_retention_get_effective_value(ctx): + attr_value = ctx.attr.precompile_source_retention + if attr_value == PrecompileSourceRetentionAttr.INHERIT: + attr_value = ctx.attr._precompile_source_retention_flag[BuildSettingInfo].value + + if attr_value not in ( + PrecompileSourceRetentionAttr.KEEP_SOURCE, + PrecompileSourceRetentionAttr.OMIT_SOURCE, + PrecompileSourceRetentionAttr.OMIT_IF_GENERATED_SOURCE, + ): + fail("Unexpected final precompile_source_retention value: {}".format(repr(attr_value))) + return attr_value + +# buildifier: disable=name-conventions +PrecompileSourceRetentionAttr = enum( + INHERIT = "inherit", + KEEP_SOURCE = "keep_source", + OMIT_SOURCE = "omit_source", + OMIT_IF_GENERATED_SOURCE = "omit_if_generated_source", + get_effective_value = _precompile_source_retention_get_effective_value, +) + +def _pyc_collection_attr_is_pyc_collection_enabled(ctx): + pyc_collection = ctx.attr.pyc_collection + if pyc_collection == PycCollectionAttr.INHERIT: + pyc_collection = ctx.attr._pyc_collection_flag[BuildSettingInfo].value + + if pyc_collection not in (PycCollectionAttr.INCLUDE_PYC, PycCollectionAttr.DISABLED): + fail("Unexpected final pyc_collection value: {}".format(repr(pyc_collection))) + + return pyc_collection == PycCollectionAttr.INCLUDE_PYC + +# buildifier: disable=name-conventions +PycCollectionAttr = enum( + INHERIT = "inherit", + INCLUDE_PYC = "include_pyc", + DISABLED = "disabled", + is_pyc_collection_enabled = _pyc_collection_attr_is_pyc_collection_enabled, +) + def create_stamp_attr(**kwargs): return { "stamp": attr.int( @@ -180,6 +274,70 @@ These are typically `py_library` rules. Targets that only provide data files used at runtime belong in the `data` attribute. +""", + ), + "precompile": attr.string( + doc = """ +Whether py source files should be precompiled. + +See also: `--precompile` flag, which can override this attribute in some cases. + +Values: + +* `inherit`: Determine the value from the --precompile flag. +* `enabled`: Compile Python source files at build time. Note that + --precompile_add_to_runfiles affects how the compiled files are included into + a downstream binary. +* `disabled`: Don't compile Python source files at build time. +* `if_generated_source`: Compile Python source files, but only if they're a + generated file. +""", + default = PrecompileAttr.INHERIT, + values = sorted(PrecompileAttr.__members__.values()), + ), + "precompile_invalidation_mode": attr.string( + doc = """ +How precompiled files should be verified to be up-to-date with their associated +source files. Possible values are: +* `auto`: The effective value will be automatically determined by other build + settings. +* `checked_hash`: Use the pyc file if the hash of the source file matches the hash + recorded in the pyc file. This is most useful when working with code that + you may modify. +* `unchecked_hash`: Always use the pyc file; don't check the pyc's hash against + the source file. This is most useful when the code won't be modified. + +For more information on pyc invalidation modes, see +https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode +""", + default = PrecompileInvalidationModeAttr.AUTO, + values = sorted(PrecompileInvalidationModeAttr.__members__.values()), + ), + "precompile_optimize_level": attr.int( + doc = """ +The optimization level for precompiled files. + +For more information about optimization levels, see the `compile()` function's +`optimize` arg docs at https://docs.python.org/3/library/functions.html#compile + +NOTE: The value `-1` means "current interpreter", which will be the interpreter +used _at build time when pycs are generated_, not the interpreter used at +runtime when the code actually runs. +""", + default = 0, + ), + "precompile_source_retention": attr.string( + default = PrecompileSourceRetentionAttr.INHERIT, + values = sorted(PrecompileSourceRetentionAttr.__members__.values()), + doc = """ +Determines, when a source file is compiled, if the source file is kept +in the resulting output or not. Valid values are: + +* `inherit`: Inherit the value from the `--precompile_source_retention` flag. +* `keep_source`: Include the original Python source. +* `omit_source`: Don't include the orignal py source. +* `omit_if_generated_source`: Keep the original source if it's a regular source + file, but omit it if it's a generated file. """, ), # Required attribute, but details vary by rule. @@ -191,6 +349,18 @@ attribute. # Required attribute, but the details vary by rule. # Use create_srcs_version_attr to create one. "srcs_version": None, + "_precompile_add_to_runfiles_flag": attr.label( + default = "//python/config_settings:precompile_add_to_runfiles", + providers = [BuildSettingInfo], + ), + "_precompile_flag": attr.label( + default = "//python/config_settings:precompile", + providers = [BuildSettingInfo], + ), + "_precompile_source_retention_flag": attr.label( + default = "//python/config_settings:precompile_source_retention", + providers = [BuildSettingInfo], + ), }, allow_none = True, ) diff --git a/python/private/common/common.bzl b/python/private/common/common.bzl index 75c117f5cd..cfa7db7a2d 100644 --- a/python/private/common/common.bzl +++ b/python/private/common/common.bzl @@ -29,8 +29,6 @@ _coverage_common = coverage_common _py_builtins = py_internal PackageSpecificationInfo = getattr(py_internal, "PackageSpecificationInfo", None) -TOOLCHAIN_TYPE = "@bazel_tools//tools/python:toolchain_type" - # Extensions without the dot _PYTHON_SOURCE_EXTENSIONS = ["py"] @@ -338,7 +336,7 @@ def collect_runfiles(ctx, files): collect_default = True, ) -def create_py_info(ctx, *, direct_sources, imports): +def create_py_info(ctx, *, direct_sources, direct_pyc_files, imports): """Create PyInfo provider. Args: @@ -346,6 +344,7 @@ def create_py_info(ctx, *, direct_sources, imports): direct_sources: depset of Files; the direct, raw `.py` sources for the target. This should only be Python source files. It should not include pyc files. + direct_pyc_files: depset of Files; the direct `.pyc` sources for the target. imports: depset of strings; the import path values to propagate. Returns: @@ -358,6 +357,7 @@ def create_py_info(ctx, *, direct_sources, imports): has_py3_only_sources = ctx.attr.srcs_version in ("PY3", "PY3ONLY") transitive_sources_depsets = [] # list of depsets transitive_sources_files = [] # list of Files + transitive_pyc_depsets = [direct_pyc_files] # list of depsets for target in ctx.attr.deps: # PyInfo may not be present e.g. cc_library rules. if PyInfo in target or BuiltinPyInfo in target: @@ -366,6 +366,10 @@ def create_py_info(ctx, *, direct_sources, imports): uses_shared_libraries = uses_shared_libraries or info.uses_shared_libraries has_py2_only_sources = has_py2_only_sources or info.has_py2_only_sources has_py3_only_sources = has_py3_only_sources or info.has_py3_only_sources + + # BuiltinPyInfo doesn't have this field. + if hasattr(info, "transitive_pyc_files"): + transitive_pyc_depsets.append(info.transitive_pyc_files) else: # TODO(b/228692666): Remove this once non-PyInfo targets are no # longer supported in `deps`. @@ -412,11 +416,17 @@ def create_py_info(ctx, *, direct_sources, imports): has_py2_only_sources = has_py2_only_sources, has_py3_only_sources = has_py3_only_sources, uses_shared_libraries = uses_shared_libraries, + direct_pyc_files = direct_pyc_files, + transitive_pyc_files = depset(transitive = transitive_pyc_depsets), ) # TODO(b/203567235): Set `uses_shared_libraries` field, though the Bazel # docs indicate it's unused in Bazel and may be removed. py_info = PyInfo(**py_info_kwargs) + + # Remove args that BuiltinPyInfo doesn't support + py_info_kwargs.pop("direct_pyc_files") + py_info_kwargs.pop("transitive_pyc_files") builtin_py_info = BuiltinPyInfo(**py_info_kwargs) return py_info, deps_transitive_sources, builtin_py_info diff --git a/python/private/common/common_bazel.bzl b/python/private/common/common_bazel.bzl index a03721334d..8f86fb5796 100644 --- a/python/private/common/common_bazel.bzl +++ b/python/private/common/common_bazel.bzl @@ -15,6 +15,9 @@ load("@bazel_skylib//lib:paths.bzl", "paths") load("@rules_cc//cc:defs.bzl", "CcInfo", "cc_common") +load("//python/private:py_interpreter_program.bzl", "PyInterpreterProgramInfo") +load("//python/private:toolchain_types.bzl", "EXEC_TOOLS_TOOLCHAIN_TYPE", "TARGET_TOOLCHAIN_TYPE") +load(":attributes.bzl", "PrecompileAttr", "PrecompileInvalidationModeAttr", "PrecompileSourceRetentionAttr") load(":common.bzl", "is_bool") load(":providers.bzl", "PyCcLinkParamsProvider") load(":py_internal.bzl", "py_internal") @@ -55,12 +58,160 @@ def maybe_precompile(ctx, srcs): srcs: List of Files; the inputs to maybe precompile. Returns: - List of Files; the desired output files derived from the input sources. + Struct of precompiling results with fields: + * `keep_srcs`: list of File; the input sources that should be included + as default outputs and runfiles. + * `pyc_files`: list of File; the precompiled files. + * `py_to_pyc_map`: dict of src File input to pyc File output. If a source + file wasn't precompiled, it won't be in the dict. """ - _ = ctx # @unused - # Precompilation isn't implemented yet, so just return srcs as-is - return srcs + # The exec tools toolchain and precompiler are optional. Rather than + # fail, just skip precompiling, as its mostly just an optimization. + exec_tools_toolchain = ctx.toolchains[EXEC_TOOLS_TOOLCHAIN_TYPE] + if exec_tools_toolchain == None or exec_tools_toolchain.exec_tools.precompiler == None: + precompile = PrecompileAttr.DISABLED + else: + precompile = PrecompileAttr.get_effective_value(ctx) + + source_retention = PrecompileSourceRetentionAttr.get_effective_value(ctx) + + result = struct( + keep_srcs = [], + pyc_files = [], + py_to_pyc_map = {}, + ) + for src in srcs: + # The logic below is a bit convoluted. The gist is: + # * If precompiling isn't done, add the py source to default outputs. + # Otherwise, the source retention flag decides. + # * In order to determine `use_pycache`, we have to know if the source + # is being added to the default outputs. + is_generated_source = not src.is_source + should_precompile = ( + precompile == PrecompileAttr.ENABLED or + (precompile == PrecompileAttr.IF_GENERATED_SOURCE and is_generated_source) + ) + keep_source = ( + not should_precompile or + source_retention == PrecompileSourceRetentionAttr.KEEP_SOURCE or + (source_retention == PrecompileSourceRetentionAttr.OMIT_IF_GENERATED_SOURCE and not is_generated_source) + ) + if should_precompile: + pyc = _precompile(ctx, src, use_pycache = keep_source) + result.pyc_files.append(pyc) + result.py_to_pyc_map[src] = pyc + if keep_source: + result.keep_srcs.append(src) + + return result + +def _precompile(ctx, src, *, use_pycache): + """Compile a py file to pyc. + + Args: + ctx: rule context. + src: File object to compile + use_pycache: bool. True if the output should be within the `__pycache__` + sub-directory. False if it should be alongside the original source + file. + + Returns: + File of the generated pyc file. + """ + exec_tools_info = ctx.toolchains[EXEC_TOOLS_TOOLCHAIN_TYPE].exec_tools + target_toolchain = ctx.toolchains[TARGET_TOOLCHAIN_TYPE].py3_runtime + + # These args control starting the precompiler, e.g., when run as a worker, + # these args are only passed once. + precompiler_startup_args = ctx.actions.args() + + env = {} + tools = [] + + precompiler = exec_tools_info.precompiler + if PyInterpreterProgramInfo in precompiler: + precompiler_executable = exec_tools_info.exec_interpreter[DefaultInfo].files_to_run + program_info = precompiler[PyInterpreterProgramInfo] + env.update(program_info.env) + precompiler_startup_args.add_all(program_info.interpreter_args) + default_info = precompiler[DefaultInfo] + precompiler_startup_args.add(default_info.files_to_run.executable) + tools.append(default_info.files_to_run) + elif precompiler[DefaultInfo].files_to_run: + precompiler_executable = precompiler[DefaultInfo].files_to_run + else: + fail(("Unrecognized precompiler: target '{}' does not provide " + + "PyInterpreterProgramInfo nor appears to be executable").format( + precompiler, + )) + + stem = src.basename[:-(len(src.extension) + 1)] + if use_pycache: + if not target_toolchain.pyc_tag: + fail("Unable to create __pycache__ pyc: pyc_tag is empty") + pyc_path = "__pycache__/{stem}.{tag}.pyc".format( + stem = stem, + tag = target_toolchain.pyc_tag, + ) + else: + pyc_path = "{}.pyc".format(stem) + + pyc = ctx.actions.declare_file(pyc_path, sibling = src) + + invalidation_mode = ctx.attr.precompile_invalidation_mode + if invalidation_mode == PrecompileInvalidationModeAttr.AUTO: + if ctx.var["COMPILATION_MODE"] == "opt": + invalidation_mode = PrecompileInvalidationModeAttr.UNCHECKED_HASH + else: + invalidation_mode = PrecompileInvalidationModeAttr.CHECKED_HASH + + # Though --modify_execution_info exists, it can only set keys with + # empty values, which doesn't work for persistent worker settings. + execution_requirements = {} + if testing.ExecutionInfo in precompiler: + execution_requirements.update(precompiler[testing.ExecutionInfo].requirements) + + # These args are passed for every precompilation request, e.g. as part of + # a request to a worker process. + precompile_request_args = ctx.actions.args() + + # Always use param files so that it can be run as a persistent worker + precompile_request_args.use_param_file("@%s", use_always = True) + precompile_request_args.set_param_file_format("multiline") + + precompile_request_args.add("--invalidation_mode", invalidation_mode) + precompile_request_args.add("--src", src) + + # NOTE: src.short_path is used because src.path contains the platform and + # build-specific hash portions of the path, which we don't want in the + # pyc data. Note, however, for remote-remote files, short_path will + # have the repo name, which is likely to contain extraneous info. + precompile_request_args.add("--src_name", src.short_path) + precompile_request_args.add("--pyc", pyc) + precompile_request_args.add("--optimize", ctx.attr.precompile_optimize_level) + + version_info = target_toolchain.interpreter_version_info + python_version = "{}.{}".format(version_info.major, version_info.minor) + precompile_request_args.add("--python_version", python_version) + + ctx.actions.run( + executable = precompiler_executable, + arguments = [precompiler_startup_args, precompile_request_args], + inputs = [src], + outputs = [pyc], + mnemonic = "PyPrecompile", + progress_message = "Python precompiling %{input} into %{output}", + tools = tools, + env = env | { + "PYTHONHASHSEED": "0", # Helps avoid non-deterministic behavior + "PYTHONNOUSERSITE": "1", # Helps avoid non-deterministic behavior + "PYTHONSAFEPATH": "1", # Helps avoid incorrect import issues + }, + execution_requirements = execution_requirements, + toolchain = EXEC_TOOLS_TOOLCHAIN_TYPE, + ) + return pyc def get_imports(ctx): """Gets the imports from a rule's `imports` attribute. diff --git a/python/private/common/providers.bzl b/python/private/common/providers.bzl index 0b43413dc0..ab56fbef4d 100644 --- a/python/private/common/providers.bzl +++ b/python/private/common/providers.bzl @@ -34,13 +34,47 @@ def _define_provider(doc, fields, **kwargs): return provider("Stub, not used", fields = []), None return provider(doc = doc, fields = fields, **kwargs) +def _optional_int(value): + return int(value) if value != None else None + +def interpreter_version_info_struct_from_dict(info_dict): + """Create a struct of interpreter version info from a dict from an attribute. + + Args: + info_dict: dict of versio info fields. See interpreter_version_info + provider field docs. + + Returns: + struct of version info; see interpreter_version_info provider field docs. + """ + info_dict = dict(info_dict) # Copy in case the original is frozen + if info_dict: + if not ("major" in info_dict and "minor" in info_dict): + fail("interpreter_version_info must have at least two keys, 'major' and 'minor'") + version_info_struct = struct( + major = _optional_int(info_dict.pop("major", None)), + minor = _optional_int(info_dict.pop("minor", None)), + micro = _optional_int(info_dict.pop("micro", None)), + releaselevel = str(info_dict.pop("releaselevel")) if "releaselevel" in info_dict else None, + serial = _optional_int(info_dict.pop("serial", None)), + ) + + if len(info_dict.keys()) > 0: + fail("unexpected keys {} in interpreter_version_info".format( + str(info_dict.keys()), + )) + + return version_info_struct + def _PyRuntimeInfo_init( *, + implementation_name = None, interpreter_path = None, interpreter = None, files = None, coverage_tool = None, coverage_files = None, + pyc_tag = None, python_version, stub_shebang = None, bootstrap_template = None, @@ -81,32 +115,16 @@ def _PyRuntimeInfo_init( if not stub_shebang: stub_shebang = DEFAULT_STUB_SHEBANG - if interpreter_version_info: - if not ("major" in interpreter_version_info and "minor" in interpreter_version_info): - fail("interpreter_version_info must have at least two keys, 'major' and 'minor'") - - _interpreter_version_info = dict(**interpreter_version_info) - interpreter_version_info = struct( - major = int(_interpreter_version_info.pop("major")), - minor = int(_interpreter_version_info.pop("minor")), - micro = int(_interpreter_version_info.pop("micro")) if "micro" in _interpreter_version_info else None, - releaselevel = str(_interpreter_version_info.pop("releaselevel")) if "releaselevel" in _interpreter_version_info else None, - serial = int(_interpreter_version_info.pop("serial")) if "serial" in _interpreter_version_info else None, - ) - - if len(_interpreter_version_info.keys()) > 0: - fail("unexpected keys {} in interpreter_version_info".format( - str(_interpreter_version_info.keys()), - )) - return { "bootstrap_template": bootstrap_template, "coverage_files": coverage_files, "coverage_tool": coverage_tool, "files": files, + "implementation_name": implementation_name, "interpreter": interpreter, "interpreter_path": interpreter_path, - "interpreter_version_info": interpreter_version_info, + "interpreter_version_info": interpreter_version_info_struct_from_dict(interpreter_version_info), + "pyc_tag": pyc_tag, "python_version": python_version, "stub_shebang": stub_shebang, } @@ -143,6 +161,7 @@ the same conventions as the standard CPython interpreter. "The value of `interpreter` need not be included in this field. If " + "this is a platform runtime then this field is `None`." ), + "implementation_name": "Optional string; the Python implementation name (`sys.implementation.name`)", "interpreter": ( "If this is an in-build runtime, this field is a `File` representing " + "the interpreter. Otherwise, this is `None`. Note that an in-build " + @@ -166,6 +185,12 @@ the same conventions as the standard CPython interpreter. " * releaselevel: optional str, the release level\n" + " * serial: optional int, the serial number of the release" ), + "pyc_tag": """ +Optional string; the tag portion of a pyc filename, e.g. the `cpython-39` infix +of `foo.cpython-39.pyc`. See PEP 3147. If not specified, it will be computed +from `implementation_name` and `interpreter_version_info`. If no pyc_tag is +available, then only source-less pyc generation will function correctly. +""", "python_version": ( "Indicates whether this runtime uses Python major version 2 or 3. " + "Valid values are (only) `\"PY2\"` and " + @@ -194,7 +219,9 @@ def _PyInfo_init( uses_shared_libraries = False, imports = depset(), has_py2_only_sources = False, - has_py3_only_sources = False): + has_py3_only_sources = False, + direct_pyc_files = depset(), + transitive_pyc_files = depset()): _check_arg_type("transitive_sources", "depset", transitive_sources) # Verify it's postorder compatible, but retain is original ordering. @@ -204,10 +231,14 @@ def _PyInfo_init( _check_arg_type("imports", "depset", imports) _check_arg_type("has_py2_only_sources", "bool", has_py2_only_sources) _check_arg_type("has_py3_only_sources", "bool", has_py3_only_sources) + _check_arg_type("direct_pyc_files", "depset", direct_pyc_files) + _check_arg_type("transitive_pyc_files", "depset", transitive_pyc_files) return { + "direct_pyc_files": direct_pyc_files, "has_py2_only_sources": has_py2_only_sources, "has_py3_only_sources": has_py2_only_sources, "imports": imports, + "transitive_pyc_files": transitive_pyc_files, "transitive_sources": transitive_sources, "uses_shared_libraries": uses_shared_libraries, } @@ -216,6 +247,10 @@ PyInfo, _unused_raw_py_info_ctor = _define_provider( doc = "Encapsulates information provided by the Python rules.", init = _PyInfo_init, fields = { + "direct_pyc_files": """ +depset[File] of precompiled Python files that are considered directly provided +by the target. +""", "has_py2_only_sources": "Whether any of this target's transitive sources requires a Python 2 runtime.", "has_py3_only_sources": "Whether any of this target's transitive sources requires a Python 3 runtime.", "imports": """\ @@ -223,6 +258,10 @@ A depset of import path strings to be added to the `PYTHONPATH` of executable Python targets. These are accumulated from the transitive `deps`. The order of the depset is not guaranteed and may be changed in the future. It is recommended to use `default` order (the default). +""", + "transitive_pyc_files": """ +depset[File] of direct and transitive precompiled Python files that are provied +by the target. """, "transitive_sources": """\ A (`postorder`-compatible) depset of `.py` files appearing in the target's diff --git a/python/private/common/py_executable.bzl b/python/private/common/py_executable.bzl index 03608930f0..cf7d6fad50 100644 --- a/python/private/common/py_executable.bzl +++ b/python/private/common/py_executable.bzl @@ -14,13 +14,21 @@ """Common functionality between test/binary executables.""" load("@bazel_skylib//lib:dicts.bzl", "dicts") +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load("@rules_cc//cc:defs.bzl", "cc_common") +load("//python/private:flags.bzl", "PrecompileAddToRunfilesFlag") load("//python/private:reexports.bzl", "BuiltinPyRuntimeInfo") +load( + "//python/private:toolchain_types.bzl", + "EXEC_TOOLS_TOOLCHAIN_TYPE", + TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE", +) load( ":attributes.bzl", "AGNOSTIC_EXECUTABLE_ATTRS", "COMMON_ATTRS", "PY_SRCS_ATTRS", + "PycCollectionAttr", "SRCS_VERSION_ALL_VALUES", "create_srcs_attr", "create_srcs_version_attr", @@ -28,7 +36,6 @@ load( load(":cc_helper.bzl", "cc_helper") load( ":common.bzl", - "TOOLCHAIN_TYPE", "check_native_allowed", "collect_imports", "collect_runfiles", @@ -43,6 +50,7 @@ load( load( ":providers.bzl", "PyCcLinkParamsProvider", + "PyInfo", "PyRuntimeInfo", ) load(":py_internal.bzl", "py_internal") @@ -80,6 +88,23 @@ Optional; the name of the source file that is the main entry point of the application. This file must also be listed in `srcs`. If left unspecified, `name`, with `.py` appended, is used instead. If `name` does not match any filename in `srcs`, `main` must be specified. +""", + ), + "pyc_collection": attr.string( + default = PycCollectionAttr.INHERIT, + values = sorted(PycCollectionAttr.__members__.values()), + doc = """ +Determines whether pyc files from dependencies should be manually included. + +NOTE: This setting is only useful with `--precompile_add_to_runfiles=decided_elsewhere`. + +Valid values are: +* `include_pyc`: Add pyc files from dependencies in the binary (from + `PyInfo.transitive_pyc_files`. +* `disabled`: Don't explicitly add pyc files from dependencies. Note that + pyc files may still come from dependencies if a target includes them as + part of their runfiles (such as when `--precompile_add_to_runfiles=always` + is used). """, ), # TODO(b/203567235): In Google, this attribute is deprecated, and can @@ -93,6 +118,10 @@ filename in `srcs`, `main` must be specified. values = ["PY2", "PY3"], doc = "Defunct, unused, does nothing.", ), + "_pyc_collection_flag": attr.label( + default = "//python/config_settings:pyc_collection", + providers = [BuildSettingInfo], + ), "_windows_constraints": attr.label_list( default = [ "@platforms//os:windows", @@ -125,9 +154,19 @@ def py_executable_base_impl(ctx, *, semantics, is_test, inherited_environment = main_py = determine_main(ctx) direct_sources = filter_to_py_srcs(ctx.files.srcs) - output_sources = semantics.maybe_precompile(ctx, direct_sources) + precompile_result = semantics.maybe_precompile(ctx, direct_sources) + + # Sourceless precompiled builds omit the main py file from outputs, so + # main has to be pointed to the precompiled main instead. + if main_py not in precompile_result.keep_srcs: + main_py = precompile_result.py_to_pyc_map[main_py] + direct_pyc_files = depset(precompile_result.pyc_files) + + default_outputs = precompile_result.keep_srcs + precompile_result.pyc_files + executable = _declare_executable_file(ctx) + default_outputs.append(executable) + imports = collect_imports(ctx, semantics) - executable, files_to_build = _compute_outputs(ctx, output_sources) runtime_details = _get_runtime_details(ctx, semantics) if ctx.configuration.coverage_enabled: @@ -151,7 +190,8 @@ def py_executable_base_impl(ctx, *, semantics, is_test, inherited_environment = ctx, executable = executable, extra_deps = extra_deps, - files_to_build = files_to_build, + main_py_files = depset([main_py] + precompile_result.keep_srcs), + direct_pyc_files = direct_pyc_files, extra_common_runfiles = [ runtime_details.runfiles, cc_details.extra_runfiles, @@ -171,11 +211,8 @@ def py_executable_base_impl(ctx, *, semantics, is_test, inherited_environment = native_deps_details = native_deps_details, runfiles_details = runfiles_details, ) - files_to_build = depset(transitive = [ - exec_result.extra_files_to_build, - files_to_build, - ]) - extra_exec_runfiles = ctx.runfiles(transitive_files = files_to_build) + + extra_exec_runfiles = ctx.runfiles(transitive_files = exec_result.extra_files_to_build) runfiles_details = struct( default_runfiles = runfiles_details.default_runfiles.merge(extra_exec_runfiles), data_runfiles = runfiles_details.data_runfiles.merge(extra_exec_runfiles), @@ -188,7 +225,8 @@ def py_executable_base_impl(ctx, *, semantics, is_test, inherited_environment = main_py = main_py, imports = imports, direct_sources = direct_sources, - files_to_build = files_to_build, + direct_pyc_files = direct_pyc_files, + default_outputs = depset(default_outputs, transitive = [exec_result.extra_files_to_build]), runtime_details = runtime_details, cc_info = cc_details.cc_info_for_propagating, inherited_environment = inherited_environment, @@ -208,15 +246,13 @@ def _validate_executable(ctx): fail("It is not allowed to use Python 2") check_native_allowed(ctx) -def _compute_outputs(ctx, output_sources): +def _declare_executable_file(ctx): if target_platform_has_any_constraint(ctx, ctx.attr._windows_constraints): executable = ctx.actions.declare_file(ctx.label.name + ".exe") else: executable = ctx.actions.declare_file(ctx.label.name) - # TODO(b/208657718): Remove output_sources from the default outputs - # once the depot is cleaned up. - return executable, depset([executable] + output_sources) + return executable def _get_runtime_details(ctx, semantics): """Gets various information about the Python runtime to use. @@ -347,7 +383,8 @@ def _get_base_runfiles_for_binary( *, executable, extra_deps, - files_to_build, + main_py_files, + direct_pyc_files, extra_common_runfiles, semantics): """Returns the set of runfiles necessary prior to executable creation. @@ -360,7 +397,8 @@ def _get_base_runfiles_for_binary( executable: The main executable output. extra_deps: List of Targets; additional targets whose runfiles will be added to the common runfiles. - files_to_build: depset of File of the default outputs to add into runfiles. + main_py_files: depset of File of the default outputs to add into runfiles. + direct_pyc_files: depset of File of pyc files directly from this target. extra_common_runfiles: List of runfiles; additional runfiles that will be added to the common runfiles. semantics: A `BinarySemantics` struct; see `create_binary_semantics_struct`. @@ -370,9 +408,20 @@ def _get_base_runfiles_for_binary( * default_runfiles: The default runfiles * data_runfiles: The data runfiles """ + common_runfiles_depsets = [main_py_files] + + if ctx.attr._precompile_add_to_runfiles_flag[BuildSettingInfo].value == PrecompileAddToRunfilesFlag.ALWAYS: + common_runfiles_depsets.append(direct_pyc_files) + elif PycCollectionAttr.is_pyc_collection_enabled(ctx): + common_runfiles_depsets.append(direct_pyc_files) + for dep in (ctx.attr.deps + extra_deps): + if PyInfo not in dep: + continue + common_runfiles_depsets.append(dep[PyInfo].transitive_pyc_files) + common_runfiles = collect_runfiles(ctx, depset( direct = [executable], - transitive = [files_to_build], + transitive = common_runfiles_depsets, )) if extra_deps: common_runfiles = common_runfiles.merge_all([ @@ -712,7 +761,8 @@ def _create_providers( executable, main_py, direct_sources, - files_to_build, + direct_pyc_files, + default_outputs, runfiles_details, imports, cc_info, @@ -729,7 +779,8 @@ def _create_providers( direct_sources: list of Files; the direct, raw `.py` sources for the target. This should only be Python source files. It should not include pyc files. - files_to_build: depset of Files; the files for DefaultInfo.files + direct_pyc_files: depset of File; the direct pyc files for the target. + default_outputs: depset of Files; the files for DefaultInfo.files runfiles_details: runfiles that will become the default and data runfiles. imports: depset of strings; the import paths to propagate cc_info: optional CcInfo; Linking information to propagate as @@ -748,7 +799,7 @@ def _create_providers( providers = [ DefaultInfo( executable = executable, - files = files_to_build, + files = default_outputs, default_runfiles = _py_builtins.make_runfiles_respect_legacy_external_runfiles( ctx, runfiles_details.default_runfiles, @@ -798,6 +849,7 @@ def _create_providers( py_info, deps_transitive_sources, builtin_py_info = create_py_info( ctx, direct_sources = depset(direct_sources), + direct_pyc_files = direct_pyc_files, imports = imports, ) @@ -852,7 +904,10 @@ def create_base_executable_rule(*, attrs, fragments = [], **kwargs): return rule( # TODO: add ability to remove attrs, i.e. for imports attr attrs = dicts.add(EXECUTABLE_ATTRS, attrs), - toolchains = [TOOLCHAIN_TYPE] + _CC_TOOLCHAINS, + toolchains = [ + TOOLCHAIN_TYPE, + config_common.toolchain_type(EXEC_TOOLS_TOOLCHAIN_TYPE, mandatory = False), + ] + _CC_TOOLCHAINS, fragments = fragments, **kwargs ) diff --git a/python/private/common/py_executable_bazel.bzl b/python/private/common/py_executable_bazel.bzl index ff4e3c1fb0..1c41fc15e5 100644 --- a/python/private/common/py_executable_bazel.bzl +++ b/python/private/common/py_executable_bazel.bzl @@ -59,8 +59,10 @@ the `srcs` of Python targets as required. ), "_launcher": attr.label( cfg = "target", - default = "@bazel_tools//tools/launcher:launcher", - executable = True, + # NOTE: This is an executable, but is only used for Windows. It + # can't have executable=True because the backing target is an + # empty target for other platforms. + default = "//tools/launcher:launcher", ), "_py_interpreter": attr.label( # The configuration_field args are validated when called; @@ -332,10 +334,11 @@ def _create_windows_exe_launcher( launch_info.add(python_binary_path, format = "python_bin_path=%s") launch_info.add("1" if use_zip_file else "0", format = "use_zip_file=%s") + launcher = ctx.attr._launcher[DefaultInfo].files_to_run.executable ctx.actions.run( executable = ctx.executable._windows_launcher_maker, - arguments = [ctx.executable._launcher.path, launch_info, output.path], - inputs = [ctx.executable._launcher], + arguments = [launcher.path, launch_info, output.path], + inputs = [launcher], outputs = [output], mnemonic = "PyBuildLauncher", progress_message = "Creating launcher for %{label}", diff --git a/python/private/common/py_library.bzl b/python/private/common/py_library.bzl index 28ee7bf4b6..b927a4fb9a 100644 --- a/python/private/common/py_library.bzl +++ b/python/private/common/py_library.bzl @@ -14,6 +14,13 @@ """Implementation of py_library rule.""" load("@bazel_skylib//lib:dicts.bzl", "dicts") +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") +load("//python/private:flags.bzl", "PrecompileAddToRunfilesFlag") +load( + "//python/private:toolchain_types.bzl", + "EXEC_TOOLS_TOOLCHAIN_TYPE", + TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE", +) load( ":attributes.bzl", "COMMON_ATTRS", @@ -57,14 +64,25 @@ def py_library_impl(ctx, *, semantics): """ check_native_allowed(ctx) direct_sources = filter_to_py_srcs(ctx.files.srcs) - output_sources = depset(semantics.maybe_precompile(ctx, direct_sources)) - runfiles = collect_runfiles(ctx = ctx, files = output_sources) + precompile_result = semantics.maybe_precompile(ctx, direct_sources) + direct_pyc_files = depset(precompile_result.pyc_files) + default_outputs = depset(precompile_result.keep_srcs, transitive = [direct_pyc_files]) + + extra_runfiles_depsets = [depset(precompile_result.keep_srcs)] + if ctx.attr._precompile_add_to_runfiles_flag[BuildSettingInfo].value == PrecompileAddToRunfilesFlag.ALWAYS: + extra_runfiles_depsets.append(direct_pyc_files) + + runfiles = collect_runfiles( + ctx = ctx, + files = depset(transitive = extra_runfiles_depsets), + ) cc_info = semantics.get_cc_info_for_library(ctx) py_info, deps_transitive_sources, builtins_py_info = create_py_info( ctx, direct_sources = depset(direct_sources), imports = collect_imports(ctx, semantics), + direct_pyc_files = direct_pyc_files, ) # TODO(b/253059598): Remove support for extra actions; https://github.com/bazelbuild/bazel/issues/16455 @@ -76,7 +94,7 @@ def py_library_impl(ctx, *, semantics): ) return [ - DefaultInfo(files = output_sources, runfiles = runfiles), + DefaultInfo(files = default_outputs, runfiles = runfiles), py_info, builtins_py_info, create_instrumented_files_info(ctx), @@ -95,8 +113,23 @@ def create_py_library_rule(*, attrs = {}, **kwargs): """ return rule( attrs = dicts.add(LIBRARY_ATTRS, attrs), + toolchains = [ + config_common.toolchain_type(TOOLCHAIN_TYPE, mandatory = False), + config_common.toolchain_type(EXEC_TOOLS_TOOLCHAIN_TYPE, mandatory = False), + ], # TODO(b/253818097): fragments=py is only necessary so that # RequiredConfigFragmentsTest passes fragments = ["py"], + doc = """ +A library of Python code that can be depended upon. + +Default outputs: +* The input Python sources +* The precompiled artifacts from the sources. + +NOTE: Precompilation affects which of the default outputs are included in the +resulting runfiles. See the precompile-related attributes and flags for +more information. +""", **kwargs ) diff --git a/python/private/common/py_runtime_rule.bzl b/python/private/common/py_runtime_rule.bzl index 190158bed0..53d925cdba 100644 --- a/python/private/common/py_runtime_rule.bzl +++ b/python/private/common/py_runtime_rule.bzl @@ -86,21 +86,35 @@ def _py_runtime_impl(ctx): # fail("Using Python 2 is not supported and disabled; see " + # "https://github.com/bazelbuild/bazel/issues/15684") + pyc_tag = ctx.attr.pyc_tag + if not pyc_tag and (ctx.attr.implementation_name and + interpreter_version_info.get("major") and + interpreter_version_info.get("minor")): + pyc_tag = "{}-{}{}".format( + ctx.attr.implementation_name, + interpreter_version_info["major"], + interpreter_version_info["minor"], + ) + py_runtime_info_kwargs = dict( interpreter_path = interpreter_path or None, interpreter = interpreter, files = runtime_files if hermetic else None, coverage_tool = coverage_tool, coverage_files = coverage_files, + pyc_tag = pyc_tag, python_version = python_version, stub_shebang = ctx.attr.stub_shebang, bootstrap_template = ctx.file.bootstrap_template, interpreter_version_info = interpreter_version_info, + implementation_name = ctx.attr.implementation_name, ) builtin_py_runtime_info_kwargs = dict(py_runtime_info_kwargs) - # Pop this property as it does not exist on BuiltinPyRuntimeInfo + # Pop these because they don't exist on BuiltinPyRuntimeInfo builtin_py_runtime_info_kwargs.pop("interpreter_version_info") + builtin_py_runtime_info_kwargs.pop("pyc_tag") + builtin_py_runtime_info_kwargs.pop("implementation_name") if not IS_BAZEL_7_OR_HIGHER: builtin_py_runtime_info_kwargs.pop("bootstrap_template") @@ -211,6 +225,9 @@ These files will be added to the runfiles of Python binaries that use this runtime. For a platform runtime this attribute must not be set. """, ), + "implementation_name": attr.string( + doc = "The Python implementation name (`sys.implementation.name`)", + ), "interpreter": attr.label( # We set `allow_files = True` to allow specifying executable # targets from rules that have more than one default output, @@ -253,6 +270,14 @@ values are strings, most are converted to ints. The supported keys are: """, mandatory = False, ), + "pyc_tag": attr.string( + doc = """ +Optional string; the tag portion of a pyc filename, e.g. the `cpython-39` infix +of `foo.cpython-39.pyc`. See PEP 3147. If not specified, it will be computed +from `implementation_name` and `interpreter_version_info`. If no pyc_tag is +available, then only source-less pyc generation will function correctly. +""", + ), "python_version": attr.string( default = "PY3", values = ["PY2", "PY3"], diff --git a/python/private/enum.bzl b/python/private/enum.bzl new file mode 100644 index 0000000000..011d9fbda1 --- /dev/null +++ b/python/private/enum.bzl @@ -0,0 +1,36 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Enum-like object utilities + +This is a separate file to minimize transitive loads. +""" + +def enum(**kwargs): + """Creates a struct whose primary purpose is to be like an enum. + + Args: + **kwargs: The fields of the returned struct. All uppercase names will + be treated as enum values and added to `__members__`. + + Returns: + `struct` with the given values. It also has the field `__members__`, + which is a dict of the enum names and values. + """ + members = { + key: value + for key, value in kwargs.items() + if key.upper() == key + } + return struct(__members__ = members, **kwargs) diff --git a/python/private/flags.bzl b/python/private/flags.bzl new file mode 100644 index 0000000000..36d305da8a --- /dev/null +++ b/python/private/flags.bzl @@ -0,0 +1,93 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Values and helpers for flags. + +NOTE: The transitive loads of this should be kept minimal. This avoids loading +unnecessary files when all that are needed are flag definitions. +""" + +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") +load("//python/private:enum.bzl", "enum") + +def _precompile_flag_get_effective_value(ctx): + value = ctx.attr._precompile_flag[BuildSettingInfo].value + if value == PrecompileFlag.AUTO: + value = PrecompileFlag.DISABLED + return value + +# Determines if Python source files should be compiled at build time. +# +# NOTE: The flag value is overridden by the target-level attribute, except +# for the case of `force_enabled` and `forced_disabled`. +# buildifier: disable=name-conventions +PrecompileFlag = enum( + # Automatically decide the effective value based on environment, + # target platform, etc. + AUTO = "auto", + # Compile Python source files at build time. Note that + # --precompile_add_to_runfiles affects how the compiled files are included + # into a downstream binary. + ENABLED = "enabled", + # Don't compile Python source files at build time. + DISABLED = "disabled", + # Compile Python source files, but only if they're a generated file. + IF_GENERATED_SOURCE = "if_generated_source", + # Like `enabled`, except overrides target-level setting. This is mostly + # useful for development, testing enabling precompilation more broadly, or + # as an escape hatch if build-time compiling is not available. + FORCE_ENABLED = "force_enabled", + # Like `disabled`, except overrides target-level setting. This is useful + # useful for development, testing enabling precompilation more broadly, or + # as an escape hatch if build-time compiling is not available. + FORCE_DISABLED = "force_disabled", + get_effective_value = _precompile_flag_get_effective_value, +) + +# Determines if, when a source file is compiled, if the source file is kept +# in the resulting output or not. +# buildifier: disable=name-conventions +PrecompileSourceRetentionFlag = enum( + # Include the original py source in the output. + KEEP_SOURCE = "keep_source", + # Don't include the original py source. + OMIT_SOURCE = "omit_source", + # Keep the original py source if it's a regular source file, but omit it + # if it's a generated file. + OMIT_IF_GENERATED_SOURCE = "omit_if_generated_source", +) + +# Determines if a target adds its compiled files to its runfiles. When a target +# compiles its files, but doesn't add them to its own runfiles, it relies on +# a downstream target to retrieve them from `PyInfo.transitive_pyc_files` +# buildifier: disable=name-conventions +PrecompileAddToRunfilesFlag = enum( + # Always include the compiled files in the target's runfiles. + ALWAYS = "always", + # Don't include the compiled files in the target's runfiles; they are + # still added to `PyInfo.transitive_pyc_files`. See also: + # `py_binary.pyc_collection` attribute. This is useful for allowing + # incrementally enabling precompilation on a per-binary basis. + DECIDED_ELSEWHERE = "decided_elsewhere", +) + +# Determine if `py_binary` collects transitive pyc files. +# NOTE: This flag is only respect if `py_binary.pyc_collection` is `inherit`. +# buildifier: disable=name-conventions +PycCollectionFlag = enum( + # Include `PyInfo.transitive_pyc_files` as part of the binary. + INCLUDE_PYC = "include_pyc", + # Don't include `PyInfo.transitive_pyc_files` as part of the binary. + DISABLED = "disabled", +) diff --git a/python/private/py_exec_tools_info.bzl b/python/private/py_exec_tools_info.bzl new file mode 100644 index 0000000000..3011f531c8 --- /dev/null +++ b/python/private/py_exec_tools_info.bzl @@ -0,0 +1,77 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implementation of the exec tools toolchain provider.""" + +PyExecToolsInfo = provider( + doc = "Build tools used as part of building Python programs.", + fields = { + "exec_interpreter": """ +Optional Target; an interpreter valid for running in the exec configuration. +When running it in an action, use `DefaultInfo.files_to_run` to ensure all its +files are appropriately available. An exec interpreter may not be available, +e.g. if all the exec tools are prebuilt binaries. + +NOTE: this interpreter is really only for use when a build tool cannot use +the Python toolchain itself. When possible, prefeer to define a `py_binary` +instead and use it via a `cfg=exec` attribute; this makes it much easier +to setup the runtime environment for the binary. See also: +`py_interpreter_program` rule. + +NOTE: What interpreter is used depends on the toolchain constraints. Ensure +the proper target constraints are being applied when obtaining this from +the toolchain. +""", + "exec_interpreter_version_info": """ +struct of interpreter version info for `exec_interpreter`. Note this +is for the exec interpreter, not the target interpreter. For version information +about the target Python runtime, use the `//python:toolchain_type` toolchain +information. +""", + "precompiler": """ +Optional Target. The tool to use for generating pyc files. If not available, +precompiling will not be available. + +Must provide one of the following: + * PyInterpreterProgramInfo + * DefaultInfo.files_to_run + +This target provides either the `PyInterpreterProgramInfo` provider or is a +regular executable binary (provides DefaultInfo.files_to_run). When the +`PyInterpreterProgramInfo` provider is present, it means the precompiler program +doesn't know how to find the interpreter itself, so the caller must provide it +when constructing the action invocation for running the precompiler program +(typically `exec_interpreter`). See the `PyInterpreterProgramInfo` provider docs +for details on how to construct an invocation. + +If `testing.ExecutionInfo` is provided, it will be used to set execution +requirements. This can be used to control persistent worker settings. + +The precompiler command line API is: +* `--invalidation_mode`: The type of pyc invalidation mode to use. Should be + one of `unchecked_hash` or `checked_hash`. +* `--optimize`: The optimization level as an integer. +* `--python_version`: The Python version, in `Major.Minor` format, e.g. `3.12` + +The following args are repeated and form a list of 3-tuples of their values. At +least one 3-tuple will be passed. +* `--src`: Path to the source `.py` file to precompile. +* `--src_name`: The human-friendly file name to record in the pyc output. +* `--pyc`: Path to where pyc output should be written. + +NOTE: These arguments _may_ be stored in a file instead, in which case, the +path to that file will be a positional arg starting with `@`, e.g. `@foo/bar`. +The format of the file is one arg per line. +""", + }, +) diff --git a/python/private/py_exec_tools_toolchain.bzl b/python/private/py_exec_tools_toolchain.bzl new file mode 100644 index 0000000000..6036db4e7c --- /dev/null +++ b/python/private/py_exec_tools_toolchain.bzl @@ -0,0 +1,47 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Rule that defines a toolchain for build tools.""" + +load("//python/private/common:providers.bzl", "interpreter_version_info_struct_from_dict") +load(":py_exec_tools_info.bzl", "PyExecToolsInfo") + +def _py_exec_tools_toolchain_impl(ctx): + return [platform_common.ToolchainInfo(exec_tools = PyExecToolsInfo( + exec_interpreter = ctx.attr.exec_interpreter, + exec_interpreter_version_info = interpreter_version_info_struct_from_dict( + ctx.attr.exec_interpreter_version_info, + ), + precompiler = ctx.attr.precompiler, + ))] + +py_exec_tools_toolchain = rule( + implementation = _py_exec_tools_toolchain_impl, + attrs = { + "exec_interpreter": attr.label( + cfg = "exec", + allow_files = True, + doc = "See PyExecToolsInfo.exec_interpreter", + executable = True, + ), + "exec_interpreter_version_info": attr.string_dict( + doc = "See PyExecToolsInfo.exec_interpreter_version_info", + ), + "precompiler": attr.label( + allow_files = True, + cfg = "exec", + doc = "See PyExecToolsInfo.precompiler", + ), + }, +) diff --git a/python/private/py_interpreter_program.bzl b/python/private/py_interpreter_program.bzl new file mode 100644 index 0000000000..833ea53b1c --- /dev/null +++ b/python/private/py_interpreter_program.bzl @@ -0,0 +1,103 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Internal only bootstrap level binary-like rule.""" + +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") + +PyInterpreterProgramInfo = provider( + doc = "Information about how to run a program with an external interpreter.", + fields = { + "env": "dict[str, str] of environment variables to set prior to execution.", + "interpreter_args": "List of strings; additional args to pass " + + "to the interpreter before the main program.", + "main": "File; the .py file that is the entry point.", + }, +) + +def _py_interpreter_program_impl(ctx): + # Bazel requires the executable file to be an output created by this target. + executable = ctx.actions.declare_file(ctx.label.name) + ctx.actions.symlink(output = executable, target_file = ctx.file.main) + execution_requirements = {} + execution_requirements.update([ + value.split("=", 1) + for value in ctx.attr.execution_requirements[BuildSettingInfo].value + if value.strip() + ]) + + return [ + DefaultInfo( + executable = executable, + files = depset([executable]), + runfiles = ctx.runfiles(files = [ + executable, + ]), + ), + PyInterpreterProgramInfo( + env = ctx.attr.env, + interpreter_args = ctx.attr.interpreter_args, + main = ctx.file.main, + ), + testing.ExecutionInfo( + requirements = execution_requirements, + ), + ] + +py_interpreter_program = rule( + doc = """ +Binary-like rule that doesn't require a toolchain becaues its part of +implementing build tools for the toolchain. This rule expects the Python +interprter to be externally provided. + +To run a `py_interpreter_program` as an action, pass it as a tool that is +used by the actual interpreter executable. This ensures its runfiles are +setup. Also pass along any interpreter args, environment, and requirements. + +```starlark +ctx.actions.run( + executable = , + args = ( + target[PyInterpreterProgramInfo].interpreter_args + + [target[DefaultInfo].files_to_run.executable] + ), + tools = target[DefaultInfo].files_to_run, + env = target[PyInterpreterProgramInfo].env, + execution_requirements = target[testing.ExecutionInfo].requirements, +) +``` + +""", + implementation = _py_interpreter_program_impl, + attrs = { + "env": attr.string_dict( + doc = "Environment variables that should set prior to running.", + ), + "execution_requirements": attr.label( + doc = "Execution requirements to set when running it as an action", + providers = [BuildSettingInfo], + ), + "interpreter_args": attr.string_list( + doc = "Args that should be passed to the interpreter.", + ), + "main": attr.label( + doc = "The entry point Python file.", + allow_single_file = True, + ), + }, + # This is set to False because this isn't a binary/executable in the usual + # Bazel sense (even though it sets DefaultInfo.files_to_run). It just holds + # information so that a caller can construct how to execute it correctly. + executable = False, +) diff --git a/python/private/py_toolchain_suite.bzl b/python/private/py_toolchain_suite.bzl index 5b4b6f8972..9971a8a4c3 100644 --- a/python/private/py_toolchain_suite.bzl +++ b/python/private/py_toolchain_suite.bzl @@ -15,9 +15,12 @@ """Create the toolchain defs in a BUILD.bazel file.""" load("@bazel_skylib//lib:selects.bzl", "selects") - -_py_toolchain_type = Label("@bazel_tools//tools/python:toolchain_type") -_py_cc_toolchain_type = Label("//python/cc:toolchain_type") +load( + ":toolchain_types.bzl", + "EXEC_TOOLS_TOOLCHAIN_TYPE", + "PY_CC_TOOLCHAIN_TYPE", + "TARGET_TOOLCHAIN_TYPE", +) def py_toolchain_suite(*, prefix, user_repository_name, python_version, set_python_version_constraint, **kwargs): """For internal use only. @@ -65,7 +68,7 @@ def py_toolchain_suite(*, prefix, user_repository_name, python_version, set_pyth toolchain = "@{user_repository_name}//:python_runtimes".format( user_repository_name = user_repository_name, ), - toolchain_type = _py_toolchain_type, + toolchain_type = TARGET_TOOLCHAIN_TYPE, target_settings = target_settings, **kwargs ) @@ -75,7 +78,23 @@ def py_toolchain_suite(*, prefix, user_repository_name, python_version, set_pyth toolchain = "@{user_repository_name}//:py_cc_toolchain".format( user_repository_name = user_repository_name, ), - toolchain_type = _py_cc_toolchain_type, + toolchain_type = PY_CC_TOOLCHAIN_TYPE, target_settings = target_settings, **kwargs ) + + native.toolchain( + name = "{prefix}_py_exec_tools_toolchain".format(prefix = prefix), + toolchain = "@{user_repository_name}//:py_exec_tools_toolchain".format( + user_repository_name = user_repository_name, + ), + toolchain_type = EXEC_TOOLS_TOOLCHAIN_TYPE, + # The target settings capture the Python version + target_settings = target_settings, + exec_compatible_with = kwargs.get("target_compatible_with"), + ) + + # NOTE: When adding a new toolchain, for WORKSPACE builds to see the + # toolchain, the name must be added to the native.register_toolchains() + # call in python/repositories.bzl. Bzlmod doesn't need anything; it will + # register `:all`. diff --git a/python/private/python_bootstrap_template.txt b/python/private/python_bootstrap_template.txt index 49f4cb5445..8eaedbc4dc 100644 --- a/python/private/python_bootstrap_template.txt +++ b/python/private/python_bootstrap_template.txt @@ -89,6 +89,10 @@ def FindPythonBinary(module_space): """Finds the real Python binary if it's not a normal absolute path.""" return FindBinary(module_space, PYTHON_BINARY) +def PrintVerbose(*args): + if os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"): + print("bootstrap:", *args, file=sys.stderr) + def PrintVerboseCoverage(*args): """Print output if VERBOSE_COVERAGE is non-empty in the environment.""" if os.environ.get("VERBOSE_COVERAGE"): @@ -376,7 +380,10 @@ def _RunExecv(python_program, main_filename, args, env): # type: (str, str, list[str], dict[str, str]) -> ... """Executes the given Python file using the various environment settings.""" os.environ.update(env) - os.execv(python_program, [python_program, main_filename] + args) + PrintVerbose("RunExecv: environ:", os.environ) + argv = [python_program, main_filename] + args + PrintVerbose("RunExecv: argv:", python_program, argv) + os.execv(python_program, argv) def _RunForCoverage(python_program, main_filename, args, env, coverage_entrypoint, workspace): diff --git a/python/private/toolchain_types.bzl b/python/private/toolchain_types.bzl new file mode 100644 index 0000000000..ef81bf3bd4 --- /dev/null +++ b/python/private/toolchain_types.bzl @@ -0,0 +1,23 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Labels to identify toolchain types. + +This is a separate file because things needing the toolchain types (in +particular, toolchain() registrations) shouldn't need to load the entire +implementation of the toolchain. +""" + +TARGET_TOOLCHAIN_TYPE = Label("//python:toolchain_type") +EXEC_TOOLS_TOOLCHAIN_TYPE = Label("//python:exec_tools_toolchain_type") +PY_CC_TOOLCHAIN_TYPE = Label("//python/cc:toolchain_type") diff --git a/python/repositories.bzl b/python/repositories.bzl index eb122b6e7b..26081a6b48 100644 --- a/python/repositories.bzl +++ b/python/repositories.bzl @@ -304,6 +304,7 @@ def _python_repository_impl(rctx): load("@rules_python//python:py_runtime.bzl", "py_runtime") load("@rules_python//python:py_runtime_pair.bzl", "py_runtime_pair") load("@rules_python//python/cc:py_cc_toolchain.bzl", "py_cc_toolchain") +load("@rules_python//python/private:py_exec_tools_toolchain.bzl", "py_exec_tools_toolchain") package(default_visibility = ["//visibility:public"]) @@ -373,6 +374,8 @@ py_runtime( "micro": "{interpreter_version_info_micro}", }}, python_version = "PY3", + implementation_name = 'cpython', + pyc_tag = "cpython-{interpreter_version_info_major}{interpreter_version_info_minor}", ) py_runtime_pair( @@ -387,6 +390,17 @@ py_cc_toolchain( libs = ":libpython", python_version = "{python_version}", ) + +py_exec_tools_toolchain( + name = "py_exec_tools_toolchain", + exec_interpreter = "{python_path}", + exec_interpreter_version_info = {{ + "major": "{interpreter_version_info_major}", + "minor": "{interpreter_version_info_minor}", + "micro": "{interpreter_version_info_micro}", + }}, + precompiler = "@rules_python//tools/precompiler:precompiler", +) """.format( glob_exclude = repr(glob_exclude), glob_include = repr(glob_include), @@ -631,6 +645,10 @@ def python_register_toolchains( toolchain_repo_name = toolchain_repo_name, platform = platform, )) + native.register_toolchains("@{toolchain_repo_name}//:{platform}_py_exec_tools_toolchain".format( + toolchain_repo_name = toolchain_repo_name, + platform = platform, + )) host_toolchain( name = name + "_host", diff --git a/tests/base_rules/precompile/BUILD.bazel b/tests/base_rules/precompile/BUILD.bazel new file mode 100644 index 0000000000..201adbadd6 --- /dev/null +++ b/tests/base_rules/precompile/BUILD.bazel @@ -0,0 +1,3 @@ +load(":precompile_tests.bzl", "precompile_test_suite") + +precompile_test_suite(name = "precompile_tests") diff --git a/tests/base_rules/precompile/precompile_tests.bzl b/tests/base_rules/precompile/precompile_tests.bzl new file mode 100644 index 0000000000..f88a438685 --- /dev/null +++ b/tests/base_rules/precompile/precompile_tests.bzl @@ -0,0 +1,315 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for precompiling behavior.""" + +load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") +load("@rules_testing//lib:analysis_test.bzl", "analysis_test") +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("@rules_testing//lib:truth.bzl", "matching") +load("@rules_testing//lib:util.bzl", rt_util = "util") +load("//python:py_binary.bzl", "py_binary") +load("//python:py_info.bzl", "PyInfo") +load("//python:py_library.bzl", "py_library") +load("//python:py_test.bzl", "py_test") +load("//tests/base_rules:py_info_subject.bzl", "py_info_subject") +load( + "//tests/support:support.bzl", + "CC_TOOLCHAIN", + "PLATFORM_TOOLCHAIN", + "PRECOMPILE", + "PRECOMPILE_ADD_TO_RUNFILES", + "PRECOMPILE_SOURCE_RETENTION", +) + +_TEST_TOOLCHAINS = [PLATFORM_TOOLCHAIN, CC_TOOLCHAIN] + +_tests = [] + +def _test_precompile_enabled_setup(name, py_rule, **kwargs): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_rule, + name = name + "_subject", + precompile = "enabled", + srcs = ["main.py"], + deps = [name + "_lib"], + **kwargs + ) + rt_util.helper_target( + py_library, + name = name + "_lib", + srcs = ["lib.py"], + precompile = "enabled", + ) + analysis_test( + name = name, + impl = _test_precompile_enabled_impl, + target = name + "_subject", + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + }, + ) + +def _test_precompile_enabled_impl(env, target): + target = env.expect.that_target(target) + runfiles = target.runfiles() + runfiles.contains_predicate( + matching.str_matches("__pycache__/main.fakepy-45.pyc"), + ) + runfiles.contains_predicate( + matching.str_matches("/main.py"), + ) + target.default_outputs().contains_at_least_predicates([ + matching.file_path_matches("__pycache__/main.fakepy-45.pyc"), + matching.file_path_matches("/main.py"), + ]) + py_info = target.provider(PyInfo, factory = py_info_subject) + py_info.direct_pyc_files().contains_exactly([ + "{package}/__pycache__/main.fakepy-45.pyc", + ]) + py_info.transitive_pyc_files().contains_exactly([ + "{package}/__pycache__/main.fakepy-45.pyc", + "{package}/__pycache__/lib.fakepy-45.pyc", + ]) + +def _test_precompile_enabled_py_binary(name): + _test_precompile_enabled_setup(name = name, py_rule = py_binary, main = "main.py") + +_tests.append(_test_precompile_enabled_py_binary) + +def _test_precompile_enabled_py_test(name): + _test_precompile_enabled_setup(name = name, py_rule = py_test, main = "main.py") + +_tests.append(_test_precompile_enabled_py_test) + +def _test_precompile_enabled_py_library(name): + _test_precompile_enabled_setup(name = name, py_rule = py_library) + +_tests.append(_test_precompile_enabled_py_library) + +def _test_pyc_only(name): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_binary, + name = name + "_subject", + precompile = "enabled", + srcs = ["main.py"], + main = "main.py", + precompile_source_retention = "omit_source", + ) + analysis_test( + name = name, + impl = _test_pyc_only_impl, + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + ##PRECOMPILE_SOURCE_RETENTION: "omit_source", + }, + target = name + "_subject", + ) + +_tests.append(_test_pyc_only) + +def _test_pyc_only_impl(env, target): + target = env.expect.that_target(target) + runfiles = target.runfiles() + runfiles.contains_predicate( + matching.str_matches("/main.pyc"), + ) + runfiles.not_contains_predicate( + matching.str_endswith("/main.py"), + ) + target.default_outputs().contains_at_least_predicates([ + matching.file_path_matches("/main.pyc"), + ]) + target.default_outputs().not_contains_predicate( + matching.file_basename_equals("main.py"), + ) + +def _test_precompile_if_generated(name): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_binary, + name = name + "_subject", + srcs = [ + "main.py", + rt_util.empty_file("generated1.py"), + ], + main = "main.py", + precompile = "if_generated_source", + ) + analysis_test( + name = name, + impl = _test_precompile_if_generated_impl, + target = name + "_subject", + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + }, + ) + +_tests.append(_test_precompile_if_generated) + +def _test_precompile_if_generated_impl(env, target): + target = env.expect.that_target(target) + runfiles = target.runfiles() + runfiles.contains_predicate( + matching.str_matches("/__pycache__/generated1.fakepy-45.pyc"), + ) + runfiles.not_contains_predicate( + matching.str_matches("main.*pyc"), + ) + target.default_outputs().contains_at_least_predicates([ + matching.file_path_matches("/__pycache__/generated1.fakepy-45.pyc"), + ]) + target.default_outputs().not_contains_predicate( + matching.file_path_matches("main.*pyc"), + ) + +def _test_omit_source_if_generated_source(name): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_binary, + name = name + "_subject", + srcs = [ + "main.py", + rt_util.empty_file("generated2.py"), + ], + main = "main.py", + precompile = "enabled", + ) + analysis_test( + name = name, + impl = _test_omit_source_if_generated_source_impl, + target = name + "_subject", + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + PRECOMPILE_SOURCE_RETENTION: "omit_if_generated_source", + }, + ) + +_tests.append(_test_omit_source_if_generated_source) + +def _test_omit_source_if_generated_source_impl(env, target): + target = env.expect.that_target(target) + runfiles = target.runfiles() + runfiles.contains_predicate( + matching.str_matches("/generated2.pyc"), + ) + runfiles.contains_predicate( + matching.str_matches("__pycache__/main.fakepy-45.pyc"), + ) + target.default_outputs().contains_at_least_predicates([ + matching.file_path_matches("generated2.pyc"), + ]) + target.default_outputs().contains_predicate( + matching.file_path_matches("__pycache__/main.fakepy-45.pyc"), + ) + +def _test_precompile_add_to_runfiles_decided_elsewhere(name): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_binary, + name = name + "_binary", + srcs = ["bin.py"], + main = "bin.py", + deps = [name + "_lib"], + pyc_collection = "include_pyc", + ) + rt_util.helper_target( + py_library, + name = name + "_lib", + srcs = ["lib.py"], + ) + analysis_test( + name = name, + impl = _test_precompile_add_to_runfiles_decided_elsewhere_impl, + targets = { + "binary": name + "_binary", + "library": name + "_lib", + }, + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + PRECOMPILE_ADD_TO_RUNFILES: "decided_elsewhere", + PRECOMPILE: "enabled", + }, + ) + +_tests.append(_test_precompile_add_to_runfiles_decided_elsewhere) + +def _test_precompile_add_to_runfiles_decided_elsewhere_impl(env, targets): + env.expect.that_target(targets.binary).runfiles().contains_at_least([ + "{workspace}/tests/base_rules/precompile/__pycache__/bin.fakepy-45.pyc", + "{workspace}/tests/base_rules/precompile/__pycache__/lib.fakepy-45.pyc", + "{workspace}/tests/base_rules/precompile/bin.py", + "{workspace}/tests/base_rules/precompile/lib.py", + ]) + + env.expect.that_target(targets.library).runfiles().contains_exactly([ + "{workspace}/tests/base_rules/precompile/lib.py", + ]) + +def _test_precompiler_action(name): + if not rp_config.enable_pystar: + rt_util.skip_test(name = name) + return + rt_util.helper_target( + py_binary, + name = name + "_subject", + srcs = ["main2.py"], + main = "main2.py", + precompile = "enabled", + precompile_optimize_level = 2, + precompile_invalidation_mode = "unchecked_hash", + ) + analysis_test( + name = name, + impl = _test_precompiler_action_impl, + target = name + "_subject", + config_settings = { + "//command_line_option:extra_toolchains": _TEST_TOOLCHAINS, + }, + ) + +_tests.append(_test_precompiler_action) + +def _test_precompiler_action_impl(env, target): + #env.expect.that_target(target).runfiles().contains_exactly([]) + action = env.expect.that_target(target).action_named("PyPrecompile") + action.contains_flag_values([ + ("--optimize", "2"), + ("--python_version", "4.5"), + ("--invalidation_mode", "unchecked_hash"), + ]) + action.has_flags_specified(["--src", "--pyc", "--src_name"]) + action.env().contains_at_least({ + "PYTHONHASHSEED": "0", + "PYTHONNOUSERSITE": "1", + "PYTHONSAFEPATH": "1", + }) + +def precompile_test_suite(name): + test_suite( + name = name, + tests = _tests, + ) diff --git a/tests/base_rules/py_executable_base_tests.bzl b/tests/base_rules/py_executable_base_tests.bzl index ce86ecaf11..b6f28026db 100644 --- a/tests/base_rules/py_executable_base_tests.bzl +++ b/tests/base_rules/py_executable_base_tests.bzl @@ -20,7 +20,7 @@ load("@rules_testing//lib:truth.bzl", "matching") load("@rules_testing//lib:util.bzl", rt_util = "util") load("//tests/base_rules:base_tests.bzl", "create_base_tests") load("//tests/base_rules:util.bzl", "WINDOWS_ATTR", pt_util = "util") -load("//tests/support:test_platforms.bzl", "WINDOWS") +load("//tests/support:support.bzl", "WINDOWS") _BuiltinPyRuntimeInfo = PyRuntimeInfo diff --git a/tests/base_rules/py_info_subject.bzl b/tests/base_rules/py_info_subject.bzl index b23308c388..bfed0b335d 100644 --- a/tests/base_rules/py_info_subject.bzl +++ b/tests/base_rules/py_info_subject.bzl @@ -31,9 +31,11 @@ def py_info_subject(info, *, meta): # buildifier: disable=uninitialized public = struct( # go/keep-sorted start + direct_pyc_files = lambda *a, **k: _py_info_subject_direct_pyc_files(self, *a, **k), has_py2_only_sources = lambda *a, **k: _py_info_subject_has_py2_only_sources(self, *a, **k), has_py3_only_sources = lambda *a, **k: _py_info_subject_has_py3_only_sources(self, *a, **k), imports = lambda *a, **k: _py_info_subject_imports(self, *a, **k), + transitive_pyc_files = lambda *a, **k: _py_info_subject_transitive_pyc_files(self, *a, **k), transitive_sources = lambda *a, **k: _py_info_subject_transitive_sources(self, *a, **k), uses_shared_libraries = lambda *a, **k: _py_info_subject_uses_shared_libraries(self, *a, **k), # go/keep-sorted end @@ -44,6 +46,16 @@ def py_info_subject(info, *, meta): ) return public +def _py_info_subject_direct_pyc_files(self): + """Returns a `DepsetFileSubject` for the `direct_pyc_files` attribute. + + Method: PyInfoSubject.direct_pyc_files + """ + return subjects.depset_file( + self.actual.direct_pyc_files, + meta = self.meta.derive("direct_pyc_files()"), + ) + def _py_info_subject_has_py2_only_sources(self): """Returns a `BoolSubject` for the `has_py2_only_sources` attribute. @@ -74,6 +86,16 @@ def _py_info_subject_imports(self): meta = self.meta.derive("imports()"), ) +def _py_info_subject_transitive_pyc_files(self): + """Returns a `DepsetFileSubject` for the `transitive_pyc_files` attribute. + + Method: PyInfoSubject.transitive_pyc_files + """ + return subjects.depset_file( + self.actual.transitive_pyc_files, + meta = self.meta.derive("transitive_pyc_files()"), + ) + def _py_info_subject_transitive_sources(self): """Returns a `DepsetFileSubject` for the `transitive_sources` attribute. diff --git a/tests/base_rules/py_test/py_test_tests.bzl b/tests/base_rules/py_test/py_test_tests.bzl index f4b704e6ca..50c1db27cf 100644 --- a/tests/base_rules/py_test/py_test_tests.bzl +++ b/tests/base_rules/py_test/py_test_tests.bzl @@ -21,7 +21,7 @@ load( "create_executable_tests", ) load("//tests/base_rules:util.bzl", pt_util = "util") -load("//tests/support:test_platforms.bzl", "LINUX", "MAC") +load("//tests/support:support.bzl", "LINUX", "MAC") # Explicit Label() calls are required so that it resolves in @rules_python # context instead of @rules_testing context. diff --git a/tests/support/BUILD.bazel b/tests/support/BUILD.bazel index 0a4c98ccce..3b77cde0c5 100644 --- a/tests/support/BUILD.bazel +++ b/tests/support/BUILD.bazel @@ -17,6 +17,10 @@ # Otherwise, you'll probably have to manually call Label() on these targets # to force them to resolve in the proper context. # ==================== + +load("//python:py_runtime.bzl", "py_runtime") +load("//python:py_runtime_pair.bzl", "py_runtime_pair") + platform( name = "mac", constraint_values = [ @@ -63,3 +67,24 @@ platform( "@platforms//os:windows", ], ) + +py_runtime( + name = "platform_runtime", + implementation_name = "fakepy", + interpreter_path = "/fake/python3.9", + interpreter_version_info = { + "major": "4", + "minor": "5", + }, +) + +py_runtime_pair( + name = "platform_runtime_pair", + py3_runtime = ":platform_runtime", +) + +toolchain( + name = "platform_toolchain", + toolchain = ":platform_runtime_pair", + toolchain_type = "//python:toolchain_type", +) diff --git a/tests/support/support.bzl b/tests/support/support.bzl new file mode 100644 index 0000000000..14a743b8a2 --- /dev/null +++ b/tests/support/support.bzl @@ -0,0 +1,34 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Code that support testing of rules_python code.""" + +# NOTE: Explicit Label() calls are required so that it resolves in @rules_python +# context instead of e.g. the @rules_testing context. +# NOTE: Some labels require str() around Label() because they are passed onto +# rules_testing or as config_setting values, which don't support Label in some +# places. + +MAC = Label("//tests/support:mac") +LINUX = Label("//tests/support:linux") +WINDOWS = Label("//tests/support:windows") + +PLATFORM_TOOLCHAIN = str(Label("//tests/support:platform_toolchain")) +CC_TOOLCHAIN = str(Label("//tests/cc:all")) + +# str() around Label() is necessary because rules_testing's config_settings +# doesn't accept yet Label objects. +PRECOMPILE = str(Label("//python/config_settings:precompile")) +PYC_COLLECTION = str(Label("//python/config_settings:pyc_collection")) +PRECOMPILE_SOURCE_RETENTION = str(Label("//python/config_settings:precompile_source_retention")) +PRECOMPILE_ADD_TO_RUNFILES = str(Label("//python/config_settings:precompile_add_to_runfiles")) diff --git a/tests/toolchains/defs.bzl b/tests/toolchains/defs.bzl index 8776eba919..bfc55199d3 100644 --- a/tests/toolchains/defs.bzl +++ b/tests/toolchains/defs.bzl @@ -193,5 +193,10 @@ def acceptance_tests(): ), python_version = python_version, target_compatible_with = meta.compatible_with, - tags = ["acceptance-test"], + tags = [ + "acceptance-test", + # For some inexplicable reason, these fail locally with + # sandboxing enabled, but not on CI. + "no-sandbox", + ], ) diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index b2aca5cd87..4f42bcb02d 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -29,6 +29,8 @@ filegroup( srcs = [ "BUILD.bazel", "wheelmaker.py", + "//tools/launcher:distribution", + "//tools/precompiler:distribution", "//tools/publish:distribution", ], visibility = ["//:__pkg__"], diff --git a/tools/launcher/BUILD.bazel b/tools/launcher/BUILD.bazel new file mode 100644 index 0000000000..aa4610671b --- /dev/null +++ b/tools/launcher/BUILD.bazel @@ -0,0 +1,33 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filegroup( + name = "distribution", + srcs = glob(["**"]), + visibility = ["//:__subpackages__"], +) + +alias( + name = "launcher", + actual = select({ + "@platforms//os:windows": "@bazel_tools//tools/launcher:launcher", + # The alias.actual value must be non-None, so use an empty target. + "//conditions:default": ":_sentinel_no_launcher", + }), + visibility = ["//visibility:public"], +) + +filegroup( + name = "_sentinel_no_launcher", +) diff --git a/tools/precompiler/BUILD.bazel b/tools/precompiler/BUILD.bazel new file mode 100644 index 0000000000..268f41b032 --- /dev/null +++ b/tools/precompiler/BUILD.bazel @@ -0,0 +1,44 @@ +# Copyright 2017 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@bazel_skylib//rules:common_settings.bzl", "string_list_flag") +load("//python/private:py_interpreter_program.bzl", "py_interpreter_program") # buildifier: disable=bzl-visibility + +filegroup( + name = "distribution", + srcs = glob(["**"]), + visibility = ["//:__subpackages__"], +) + +py_interpreter_program( + name = "precompiler", + execution_requirements = ":execution_requirements", + main = "precompiler.py", + visibility = [ + # Not actually public. Only public so rules_python-generated toolchains + # are able to reference it. + "//visibility:public", + ], +) + +string_list_flag( + name = "execution_requirements", + build_setting_default = [ + "supports-workers=1", + "requires-worker-protocol=json", + "supports-multiplex-sandboxing=1", + "supports-multiplex-workers=1", + "supports-worker-cancellation=1", + ], +) diff --git a/tools/precompiler/precompiler.py b/tools/precompiler/precompiler.py new file mode 100644 index 0000000000..d1b17132e7 --- /dev/null +++ b/tools/precompiler/precompiler.py @@ -0,0 +1,296 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A simple precompiler to generate deterministic pyc files for Bazel.""" + +# NOTE: Imports specific to the persistent worker should only be imported +# when a persistent worker is used. Avoiding the unnecessary imports +# saves significant startup time for non-worker invocations. +import argparse +import py_compile +import sys + + +def _create_parser() -> "argparse.Namespace": + parser = argparse.ArgumentParser(fromfile_prefix_chars="@") + parser.add_argument("--invalidation_mode") + parser.add_argument("--optimize", type=int) + parser.add_argument("--python_version") + + parser.add_argument("--src", action="append", dest="srcs") + parser.add_argument("--src_name", action="append", dest="src_names") + parser.add_argument("--pyc", action="append", dest="pycs") + + parser.add_argument("--persistent_worker", action="store_true") + parser.add_argument("--log_level", default="ERROR") + parser.add_argument("--worker_impl", default="async") + return parser + + +def _compile(options: "argparse.Namespace") -> None: + try: + invalidation_mode = getattr( + py_compile.PycInvalidationMode, options.invalidation_mode.upper() + ) + except AttributeError as e: + raise ValueError( + f"Unknown PycInvalidationMode: {options.invalidation_mode}" + ) from e + + if len(options.srcs) != len(options.src_names) != len(options.pycs): + raise AssertionError( + "Mismatched number of --src, --src_name, and/or --pyc args" + ) + + for src, src_name, pyc in zip(options.srcs, options.src_names, options.pycs): + py_compile.compile( + src, + pyc, + doraise=True, + dfile=src_name, + optimize=options.optimize, + invalidation_mode=invalidation_mode, + ) + return 0 + + +# A stub type alias for readability. +# See the Bazel WorkRequest object definition: +# https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/worker_protocol.proto +JsonWorkerRequest = object + +# A stub type alias for readability. +# See the Bazel WorkResponse object definition: +# https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/worker_protocol.proto +JsonWorkerResponse = object + + +class _SerialPersistentWorker: + """Simple, synchronous, serial persistent worker.""" + + def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"): + self._instream = instream + self._outstream = outstream + self._parser = _create_parser() + + def run(self) -> None: + try: + while True: + request = None + try: + request = self._get_next_request() + if request is None: + _logger.info("Empty request: exiting") + break + response = self._process_request(request) + if response: # May be none for cancel request + self._send_response(response) + except Exception: + _logger.exception("Unhandled error: request=%s", request) + output = ( + f"Unhandled error:\nRequest: {request}\n" + + traceback.format_exc() + ) + request_id = 0 if not request else request.get("requestId", 0) + self._send_response( + { + "exitCode": 3, + "output": output, + "requestId": request_id, + } + ) + finally: + _logger.info("Worker shutting down") + + def _get_next_request(self) -> "object | None": + line = self._instream.readline() + if not line: + return None + return json.loads(line) + + def _process_request(self, request: "JsonWorkRequest") -> "JsonWorkResponse | None": + if request.get("cancel"): + return None + options = self._options_from_request(request) + _compile(options) + response = { + "requestId": request.get("requestId", 0), + "exitCode": 0, + } + return response + + def _options_from_request( + self, request: "JsonWorkResponse" + ) -> "argparse.Namespace": + options = self._parser.parse_args(request["arguments"]) + if request.get("sandboxDir"): + prefix = request["sandboxDir"] + options.srcs = [os.path.join(prefix, v) for v in options.srcs] + options.pycs = [os.path.join(prefix, v) for v in options.pycs] + return options + + def _send_response(self, response: "JsonWorkResponse") -> None: + self._outstream.write(json.dumps(response) + "\n") + self._outstream.flush() + + +class _AsyncPersistentWorker: + """Asynchronous, concurrent, persistent worker.""" + + def __init__(self, reader: "typing.TextIO", writer: "typing.TextIO"): + self._reader = reader + self._writer = writer + self._parser = _create_parser() + self._request_id_to_task = {} + self._task_to_request_id = {} + + @classmethod + async def main(cls, instream: "typing.TextIO", outstream: "typing.TextIO") -> None: + reader, writer = await cls._connect_streams(instream, outstream) + await cls(reader, writer).run() + + @classmethod + async def _connect_streams( + cls, instream: "typing.TextIO", outstream: "typing.TextIO" + ) -> "tuple[asyncio.StreamReader, asyncio.StreamWriter]": + loop = asyncio.get_event_loop() + reader = asyncio.StreamReader() + protocol = asyncio.StreamReaderProtocol(reader) + await loop.connect_read_pipe(lambda: protocol, instream) + + w_transport, w_protocol = await loop.connect_write_pipe( + asyncio.streams.FlowControlMixin, outstream + ) + writer = asyncio.StreamWriter(w_transport, w_protocol, reader, loop) + return reader, writer + + async def run(self) -> None: + while True: + _logger.info("pending requests: %s", len(self._request_id_to_task)) + request = await self._get_next_request() + request_id = request.get("requestId", 0) + task = asyncio.create_task( + self._process_request(request), name=f"request_{request_id}" + ) + self._request_id_to_task[request_id] = task + self._task_to_request_id[task] = request_id + task.add_done_callback(self._handle_task_done) + + async def _get_next_request(self) -> "JsonWorkRequest": + _logger.debug("awaiting line") + line = await self._reader.readline() + _logger.debug("recv line: %s", line) + return json.loads(line) + + def _handle_task_done(self, task: "asyncio.Task") -> None: + request_id = self._task_to_request_id[task] + _logger.info("task done: %s %s", request_id, task) + del self._task_to_request_id[task] + del self._request_id_to_task[request_id] + + async def _process_request(self, request: "JsonWorkRequest") -> None: + _logger.info("request %s: start: %s", request.get("requestId"), request) + try: + if request.get("cancel", False): + await self._process_cancel_request(request) + else: + await self._process_compile_request(request) + except asyncio.CancelledError: + _logger.info( + "request %s: cancel received, stopping processing", + request.get("requestId"), + ) + # We don't send a response because we assume the request that + # triggered cancelling sent the response + raise + except: + _logger.exception("Unhandled error: request=%s", request) + self._send_response( + { + "exitCode": 3, + "output": f"Unhandled error:\nRequest: {request}\n" + + traceback.format_exc(), + "requestId": 0 if not request else request.get("requestId", 0), + } + ) + + async def _process_cancel_request(self, request: "JsonWorkRequest") -> None: + request_id = request.get("requestId", 0) + task = self._request_id_to_task.get(request_id) + if not task: + # It must be already completed, so ignore the request, per spec + return + + task.cancel() + self._send_response({"requestId": request_id, "wasCancelled": True}) + + async def _process_compile_request(self, request: "JsonWorkRequest") -> None: + options = self._options_from_request(request) + # _compile performs a varity of blocking IO calls, so run it separately + await asyncio.to_thread(_compile, options) + self._send_response( + { + "requestId": request.get("requestId", 0), + "exitCode": 0, + } + ) + + def _options_from_request(self, request: "JsonWorkRequest") -> "argparse.Namespace": + options = self._parser.parse_args(request["arguments"]) + if request.get("sandboxDir"): + prefix = request["sandboxDir"] + options.srcs = [os.path.join(prefix, v) for v in options.srcs] + options.pycs = [os.path.join(prefix, v) for v in options.pycs] + return options + + def _send_response(self, response: "JsonWorkResponse") -> None: + _logger.info("request %s: respond: %s", response.get("requestId"), response) + self._writer.write(json.dumps(response).encode("utf8") + b"\n") + + +def main(args: "list[str]") -> int: + options = _create_parser().parse_args(args) + + # Persistent workers are started with the `--persistent_worker` flag. + # See the following docs for details on persistent workers: + # https://bazel.build/remote/persistent + # https://bazel.build/remote/multiplex + # https://bazel.build/remote/creating + if options.persistent_worker: + global asyncio, itertools, json, logging, os, traceback, _logger + import asyncio + import itertools + import json + import logging + import os.path + import traceback + + _logger = logging.getLogger("precompiler") + # Only configure logging for workers. This prevents non-worker + # invocations from spamming stderr with logging info + logging.basicConfig(level=getattr(logging, options.log_level)) + _logger.info("persistent worker: impl=%s", options.worker_impl) + if options.worker_impl == "serial": + _SerialPersistentWorker(sys.stdin, sys.stdout).run() + elif options.worker_impl == "async": + asyncio.run(_AsyncPersistentWorker.main(sys.stdin, sys.stdout)) + else: + raise ValueError(f"Unknown worker impl: {options.worker_impl}") + else: + _compile(options) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:]))