Skip to content
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ Other changes:
* (zipapp) Resolve issue passing through compression settings in
`py_zippapp_binary` targets
([#3646](https://github.com/bazel-contrib/rules_python/issues/3646)).
* (toolchains) The pyc created at runtime in the stdlib should no longer
cause the Python runtime repository to be invalidated. The stdlib pyc files
_may_ be reused in between invocations, depending upon the sandboxing
configuration. See the {any}`RULES_PYTHON_PYCACHE_DIR` environment variable
for more information.
([#3643](https://github.com/bazel-contrib/rules_python/issues/3643)).

{#v0-0-0-added}
### Added
Expand Down
28 changes: 28 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,34 @@ Valid values:
* Other non-empty values mean to use isolated mode.
:::

:::{envvar} RULES_PYTHON_PYCACHE_DIR

Determines the directory that runtime-generated pyc cache files will
be stored in.

This directory may be reused between invocations, depending on the sandboxing
configuration. Setting it to `/dev/null` will, in effect, disable runtime
pyc caching. By setting e.g.
`--sandbox_add_mount_pair=/tmp/rules_python_pycache`, it's possible for pyc
caching to persist across invocations.

**Behavior specific to downloaded runtimes:**
First `RULES_PYTHON_PYCACHE_DIR` is checked. If set, it is used as-is for
the root pycache directory.

Otherwise, the following environment variables are checked in the following
order. Their values will have `rules_python_pycache` appended to them to form
the root pycache directory:
1. `XDG_CACHE_HOME`.
2. `TMP` (non-Windows) or `TEMP` (Windows).
3. The common platform-specific temporary directory (`/tmp` (non-Windows) or
`C:\Temp` (Windows)).

If such a diretory cannot be found, or created, then `/dev/null` will be used,
which will effectively disable pyc caching.

:::

:::{envvar} RULES_PYTHON_REPO_DEBUG

When `1`, repository rules will print debug information about what they're
Expand Down
43 changes: 24 additions & 19 deletions python/private/hermetic_runtime_repo_setup.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,35 @@ def define_hermetic_runtime_toolchain_impl(
"major": version_info.release[0],
"minor": version_info.release[1],
}
files_include = [
"bin/**",
"extensions/**",
"include/**",
"libs/**",
"share/**",
]
files_include += extra_files_glob_include
files_exclude = [
# Unused shared libraries. `python` executable and the `:libpython` target
# depend on `libpython{python_version}.so.1.0`.
"lib/libpython{major}.{minor}*.so".format(**version_dict),
# static libraries
"lib/**/*.a",
# tests for the standard libraries.
"lib/python{major}.{minor}*/**/test/**".format(**version_dict),
"lib/python{major}.{minor}*/**/tests/**".format(**version_dict),
# During pyc creation, temp files named *.pyc.NNN are created
"**/__pycache__/*.pyc.*",
]
files_exclude += extra_files_glob_exclude

native.filegroup(
name = "files",
srcs = native.glob(
include = [
"bin/**",
"extensions/**",
"include/**",
"libs/**",
"share/**",
] + extra_files_glob_include,
include = files_include,
# Platform-agnostic filegroup can't match on all patterns.
allow_empty = True,
exclude = [
# Unused shared libraries. `python` executable and the `:libpython` target
# depend on `libpython{python_version}.so.1.0`.
"lib/libpython{major}.{minor}*.so".format(**version_dict),
# static libraries
"lib/**/*.a",
# tests for the standard libraries.
"lib/python{major}.{minor}*/**/test/**".format(**version_dict),
"lib/python{major}.{minor}*/**/tests/**".format(**version_dict),
# During pyc creation, temp files named *.pyc.NNN are created
"**/__pycache__/*.pyc.*",
] + extra_files_glob_exclude,
exclude = files_exclude,
),
)
cc_import(
Expand Down
93 changes: 93 additions & 0 deletions python/private/python_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,98 @@ def is_standalone_interpreter(rctx, python_interpreter_path, *, logger = None):
logger = logger,
).return_code == 0

def _get_pycache_root(rctx):
"""Calculates and creates the pycache root directory.

Returns:
{type}`path | None` The path to the pycache root, or None if it couldn't
be created.
"""
os_name = repo_utils.get_platforms_os_name(rctx)
is_windows = os_name == "windows"

# 1. RULES_PYTHON_PYCACHE_DIR
res = rctx.getenv("RULES_PYTHON_PYCACHE_DIR")
if res:
res = res + "/" + rctx.name
return repo_utils.mkdir(rctx, res)

# Suffix for cases 2-4
# The first level directory is static and documented so that it is easy to
# use with e.g. --sandbox_add_mount_pair=/tmp/rules_python_pycache
suffix = "rules_python_pycache/{}/{}".format(hash(str(rctx.workspace_root)), rctx.name)

# 2. XDG_CACHE_HOME
res = rctx.getenv("XDG_CACHE_HOME")
if res:
path = repo_utils.mkdir(rctx, rctx.path(res).get_child(suffix))
if path:
return path

# 3. TMP or TEMP
res = rctx.getenv("TMP") or rctx.getenv("TEMP")
if res:
path = repo_utils.mkdir(rctx, rctx.path(res).get_child(suffix))
if path:
return path

# 4. /tmp or Windows equivalent
if is_windows:
path = rctx.path("C:/Temp").get_child(suffix)
else:
path = rctx.path("/tmp").get_child(suffix)

return repo_utils.mkdir(rctx, path)

def _create_pycache_symlinks(rctx, logger):
"""Finds all directories with a .py file and creates __pycache__ symlinks.

Args:
rctx: {type}`repository_ctx` The repository rule's context object.
logger: Optional logger to use for operations.
"""
pycache_root = _get_pycache_root(rctx)
logger.info(lambda: "pycache root: {}".format(pycache_root))
pycache_root_str = str(pycache_root) if pycache_root else None

os_name = repo_utils.get_platforms_os_name(rctx)
null_device = "NUL" if os_name == "windows" else "/dev/null"

queue = [rctx.path(".")]

# Starlark doesn't support recursion, use a loop with a queue.
# Using a large range as a safeguard.
for _ in range(1000000):
if not queue:
break
p = queue.pop()

has_py = False
for child in p.readdir():
# Skip hidden files and directories
if child.basename.startswith("."):
continue

if child.is_dir:
if child.basename == "__pycache__" or str(child) == pycache_root_str:
continue
queue.append(child)
elif child.basename.endswith(".py"):
has_py = True

if has_py:
pycache_dir = p.get_child("__pycache__")
if pycache_root:
pycache_relative = repo_utils.repo_root_relative_path(rctx, pycache_dir)
target_dir = pycache_root.get_child(pycache_relative)

repo_utils.mkdir(rctx, target_dir)
rctx.delete(pycache_dir)
rctx.symlink(target_dir, pycache_dir)
else:
rctx.delete(pycache_dir)
rctx.symlink(null_device, pycache_dir)

def _python_repository_impl(rctx):
if rctx.attr.distutils and rctx.attr.distutils_content:
fail("Only one of (distutils, distutils_content) should be set.")
Expand Down Expand Up @@ -123,6 +215,7 @@ def _python_repository_impl(rctx):
logger = logger,
)

_create_pycache_symlinks(rctx, logger)
python_bin = "python.exe" if ("windows" in platform) else "bin/python3"

if "linux" in platform:
Expand Down
45 changes: 45 additions & 0 deletions python/private/repo_utils.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,49 @@ def _which_describe_failure(binary_name, path):
path = path,
)

def _mkdir(mrctx, path):
path = mrctx.path(path)
if path.exists:
return path

repo_root = str(mrctx.path("."))
path_str = str(path)

if not path_str.startswith(repo_root):
mkdir_bin = mrctx.which("mkdir")
if not mkdir_bin:
return None
res = mrctx.execute([mkdir_bin, "-p", path_str])
if res.return_code != 0:
return None
return path
else:
placeholder = path.get_child(".placeholder")
mrctx.file(placeholder)
mrctx.delete(placeholder)
return path

def _repo_root_relative_path(mrctx, path):
"""Takes a path object and returns a repo-relative path string.

Args:
mrctx: module_ctx or repository_ctx
path: {type}`path` a path within `mrctx`

Returns:
{type}`str` a repo-root-relative path string.
"""
repo_root = str(mrctx.path("."))
path_str = str(path)
relative_path = path_str[len(repo_root):]
if relative_path[0] != "/":
fail("{path} not under {repo_root}".format(
path = path,
repo_root = repo_root,
))
relative_path = relative_path[1:]
return relative_path

def _args_to_str(arguments):
return " ".join([_arg_repr(a) for a in arguments])

Expand Down Expand Up @@ -465,6 +508,8 @@ repo_utils = struct(
get_platforms_os_name = _get_platforms_os_name,
is_repo_debug_enabled = _is_repo_debug_enabled,
logger = _logger,
mkdir = _mkdir,
repo_root_relative_path = _repo_root_relative_path,
which_checked = _which_checked,
which_unchecked = _which_unchecked,
)