diff --git a/examples/pip_install/pip_install_test.py b/examples/pip_install/pip_install_test.py index f9a62ca6e8..eb4d7d8e95 100644 --- a/examples/pip_install/pip_install_test.py +++ b/examples/pip_install/pip_install_test.py @@ -37,12 +37,11 @@ def test_data(self): self.assertListEqual( env.split(" "), [ - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/INSTALL.md", - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/LICENSE", - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/NEWS", - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/README.md", - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/data/share/man/man1/s3cmd.1", - "external/pip/pypi__s3cmd/s3cmd-2.1.0.data/scripts/s3cmd", + "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/INSTALL.md", + "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/LICENSE", + "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/NEWS", + "external/pip/pypi__s3cmd/data/share/doc/packages/s3cmd/README.md", + "external/pip/pypi__s3cmd/data/share/man/man1/s3cmd.1", ], ) @@ -52,12 +51,13 @@ def test_dist_info(self): self.assertListEqual( env.split(" "), [ - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/DESCRIPTION.rst", - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/METADATA", - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/RECORD", - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/WHEEL", - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/metadata.json", - "external/pip/pypi__boto3/boto3-1.14.51.dist-info/top_level.txt", + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/DESCRIPTION.rst", + 'external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/INSTALLER', + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/METADATA", + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/RECORD", + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/WHEEL", + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/metadata.json", + "external/pip/pypi__boto3/site-packages/boto3-1.14.51.dist-info/top_level.txt", ], ) diff --git a/examples/pip_parse/pip_parse_test.py b/examples/pip_parse/pip_parse_test.py index ef684c4294..030e38c077 100644 --- a/examples/pip_parse/pip_parse_test.py +++ b/examples/pip_parse/pip_parse_test.py @@ -35,12 +35,11 @@ def test_data(self): self.assertListEqual( env.split(" "), [ - "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/INSTALL.md", - "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/LICENSE", - "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/NEWS", - "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/doc/packages/s3cmd/README.md", - "external/pypi_s3cmd/s3cmd-2.1.0.data/data/share/man/man1/s3cmd.1", - "external/pypi_s3cmd/s3cmd-2.1.0.data/scripts/s3cmd", + "external/pypi_s3cmd/data/share/doc/packages/s3cmd/INSTALL.md", + "external/pypi_s3cmd/data/share/doc/packages/s3cmd/LICENSE", + "external/pypi_s3cmd/data/share/doc/packages/s3cmd/NEWS", + "external/pypi_s3cmd/data/share/doc/packages/s3cmd/README.md", + "external/pypi_s3cmd/data/share/man/man1/s3cmd.1", ], ) @@ -50,11 +49,12 @@ def test_dist_info(self): self.assertListEqual( env.split(" "), [ - "external/pypi_requests/requests-2.25.1.dist-info/LICENSE", - "external/pypi_requests/requests-2.25.1.dist-info/METADATA", - "external/pypi_requests/requests-2.25.1.dist-info/RECORD", - "external/pypi_requests/requests-2.25.1.dist-info/WHEEL", - "external/pypi_requests/requests-2.25.1.dist-info/top_level.txt", + 'external/pypi_requests/site-packages/requests-2.25.1.dist-info/INSTALLER', + "external/pypi_requests/site-packages/requests-2.25.1.dist-info/LICENSE", + "external/pypi_requests/site-packages/requests-2.25.1.dist-info/METADATA", + "external/pypi_requests/site-packages/requests-2.25.1.dist-info/RECORD", + "external/pypi_requests/site-packages/requests-2.25.1.dist-info/WHEEL", + "external/pypi_requests/site-packages/requests-2.25.1.dist-info/top_level.txt", ], ) diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE index eb712cfc6b..8ee885d468 100644 --- a/examples/pip_repository_annotations/WORKSPACE +++ b/examples/pip_repository_annotations/WORKSPACE @@ -42,7 +42,7 @@ write_file( copy_executables = {"@pip_repository_annotations_example//:data/copy_executable.py": "copied_content/executable.py"}, copy_files = {"@pip_repository_annotations_example//:data/copy_file.txt": "copied_content/file.txt"}, data = [":generated_file"], - data_exclude_glob = ["*.dist-info/WHEEL"], + data_exclude_glob = ["site-packages/*.dist-info/WHEEL"], ), } diff --git a/examples/pip_repository_annotations/pip_repository_annotations_test.py b/examples/pip_repository_annotations/pip_repository_annotations_test.py index 79c354d105..468788f50b 100644 --- a/examples/pip_repository_annotations/pip_repository_annotations_test.py +++ b/examples/pip_repository_annotations/pip_repository_annotations_test.py @@ -69,7 +69,7 @@ def test_data_exclude_glob(self): r = runfiles.Create() dist_info_dir = ( - "pip_repository_annotations_example/external/{}/wheel-{}.dist-info".format( + "pip_repository_annotations_example/external/{}/site-packages/wheel-{}.dist-info".format( self.wheel_pkg_dir(), current_wheel_version, ) diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD index 48214126e4..31d6bb8918 100644 --- a/python/pip_install/extract_wheels/lib/BUILD +++ b/python/pip_install/extract_wheels/lib/BUILD @@ -9,7 +9,6 @@ py_library( "arguments.py", "bazel.py", "namespace_pkgs.py", - "purelib.py", "requirements.py", "wheel.py", ], @@ -135,17 +134,6 @@ py_test( ], ) -py_test( - name = "purelib_test", - size = "small", - srcs = [ - "purelib_test.py", - ], - deps = [ - ":lib", - ], -) - filegroup( name = "distribution", srcs = glob( diff --git a/python/pip_install/extract_wheels/lib/bazel.py b/python/pip_install/extract_wheels/lib/bazel.py index aaca68b563..da1e52c4d5 100644 --- a/python/pip_install/extract_wheels/lib/bazel.py +++ b/python/pip_install/extract_wheels/lib/bazel.py @@ -9,7 +9,6 @@ from python.pip_install.extract_wheels.lib import ( annotation, namespace_pkgs, - purelib, wheel, ) @@ -137,27 +136,18 @@ def generate_build_file_contents( there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`). """ - dist_info_ignores = [ - # RECORD is known to contain sha256 checksums of files which might include the checksums - # of generated files produced when wheels are installed. The file is ignored to avoid - # Bazel caching issues. - "**/*.dist-info/RECORD", - ] - data_exclude = list( set( [ - "*.whl", - "**/__pycache__/**", "**/* *", "**/*.py", "**/*.pyc", - "BUILD.bazel", - "WORKSPACE", - f"{WHEEL_ENTRY_POINT_PREFIX}*.py", + # RECORD is known to contain sha256 checksums of files which might include the checksums + # of generated files produced when wheels are installed. The file is ignored to avoid + # Bazel caching issues. + "**/*.dist-info/RECORD", ] + data_exclude - + dist_info_ignores ) ) @@ -172,12 +162,12 @@ def generate_build_file_contents( filegroup( name = "{dist_info_label}", - srcs = glob(["*.dist-info/**"], allow_empty = True), + srcs = glob(["site-packages/*.dist-info/**"], allow_empty = True), ) filegroup( name = "{data_label}", - srcs = glob(["*.data/**"], allow_empty = True), + srcs = glob(["data/**"], allow_empty = True), ) filegroup( @@ -188,11 +178,11 @@ def generate_build_file_contents( py_library( name = "{name}", - srcs = glob(["**/*.py"], exclude={srcs_exclude}, allow_empty = True), - data = {data} + glob(["**/*"], exclude={data_exclude}), + srcs = glob(["site-packages/**/*.py"], exclude={srcs_exclude}, allow_empty = True), + data = {data} + glob(["site-packages/**/*"], exclude={data_exclude}), # This makes this directory a top-level in the python import # search path for anything that depends on this. - imports = ["."], + imports = ["site-packages"], deps = [{dependencies}], tags = [{tags}], ) @@ -377,9 +367,6 @@ def extract_wheel( shutil.copy(whl.path, directory) whl.unzip(directory) - # Note: Order of operations matters here - purelib.spread_purelib_into_root(directory) - if not enable_implicit_namespace_pkgs: setup_namespace_pkg_compatibility(directory) diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py deleted file mode 100644 index 978e0f18f5..0000000000 --- a/python/pip_install/extract_wheels/lib/purelib.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Functions to make purelibs Bazel compatible""" -import os -import pathlib -import shutil - -from python.pip_install.extract_wheels.lib import wheel - - -def spread_purelib_into_root(wheel_dir: str) -> None: - """Unpacks purelib directories into the root. - - Args: - wheel_dir: The root of the extracted wheel directory. - """ - dist_info = wheel.get_dist_info(wheel_dir) - wheel_metadata_file_path = pathlib.Path(dist_info, "WHEEL") - wheel_metadata_dict = wheel.parse_wheel_meta_file(str(wheel_metadata_file_path)) - - # It is not guaranteed that a WHEEL file author populates 'Root-Is-Purelib'. - # See: https://github.com/bazelbuild/rules_python/issues/435 - root_is_purelib: str = wheel_metadata_dict.get("Root-Is-Purelib", "") - if root_is_purelib.lower() == "true": - # The Python package code is in the root of the Wheel, so no need to 'spread' anything. - return - - dot_data_dir = wheel.get_dot_data_directory(wheel_dir) - # 'Root-Is-Purelib: false' is no guarantee a .data directory exists with - # package code in it. eg. the 'markupsafe' package. - if not dot_data_dir: - return - - for child in pathlib.Path(dot_data_dir).iterdir(): - # TODO(Jonathon): Should all other potential folders get ignored? eg. 'platlib' - if str(child).endswith("purelib"): - _spread_purelib(child, wheel_dir) - - -def backport_copytree(src: pathlib.Path, dst: pathlib.Path): - """Implementation similar to shutil.copytree. - - shutil.copytree before python3.8 does not allow merging one tree with - an existing one. This function does that, while ignoring complications around symlinks, which - can't exist is wheels (See https://bugs.python.org/issue27318). - """ - os.makedirs(dst, exist_ok=True) - for path in src.iterdir(): - if path.is_dir(): - backport_copytree(path, pathlib.Path(dst, path.name)) - elif not pathlib.Path(dst, path.name).exists(): - shutil.copy(path, dst) - - -def _spread_purelib(purelib_dir: pathlib.Path, root_dir: str) -> None: - """Recursively moves all sibling directories of the purelib to the root. - - Args: - purelib_dir: The directory of the purelib. - root_dir: The directory to move files into. - """ - for child in purelib_dir.iterdir(): - if child.is_dir(): - backport_copytree(src=child, dst=pathlib.Path(root_dir, child.name)) - elif not pathlib.Path(root_dir, child.name).exists(): - shutil.copy( - src=str(child), - dst=root_dir, - ) diff --git a/python/pip_install/extract_wheels/lib/purelib_test.py b/python/pip_install/extract_wheels/lib/purelib_test.py deleted file mode 100644 index 02fd9220c5..0000000000 --- a/python/pip_install/extract_wheels/lib/purelib_test.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import unittest -from contextlib import contextmanager -from pathlib import Path -from tempfile import TemporaryDirectory - -from python.pip_install.extract_wheels.lib import purelib - - -class TestPurelibTestCase(unittest.TestCase): - @contextmanager - def setup_faux_unzipped_wheel(self): - files = [ - ("faux_wheel.data/purelib/toplevel/foo.py", "# foo"), - ("faux_wheel.data/purelib/toplevel/dont_overwrite.py", "overwritten"), - ("faux_wheel.data/purelib/toplevel/subdir/baz.py", "overwritten"), - ("toplevel/bar.py", "# bar"), - ("toplevel/dont_overwrite.py", "original"), - ] - with TemporaryDirectory() as td: - self.td_path = Path(td) - self.purelib_path = self.td_path / Path("faux_wheel.data/purelib") - for file_, content in files: - path = self.td_path / Path(file_) - path.parent.mkdir(parents=True, exist_ok=True) - with open(str(path), "w") as f: - f.write(content) - yield - - def test_spread_purelib_(self): - with self.setup_faux_unzipped_wheel(): - purelib._spread_purelib(self.purelib_path, self.td_path) - self.assertTrue(Path(self.td_path, "toplevel/foo.py").exists()) - self.assertTrue(Path(self.td_path, "toplevel/subdir/baz.py").exists()) - with open(Path(self.td_path, "toplevel/dont_overwrite.py")) as original: - self.assertEqual(original.read().strip(), "original") - - -if __name__ == "__main__": - unittest.main() diff --git a/python/pip_install/extract_wheels/lib/wheel.py b/python/pip_install/extract_wheels/lib/wheel.py index 6dab311637..3f101005d0 100644 --- a/python/pip_install/extract_wheels/lib/wheel.py +++ b/python/pip_install/extract_wheels/lib/wheel.py @@ -1,9 +1,5 @@ """Utility class to inspect an extracted wheel directory""" import email -import glob -import os -import stat -import zipfile from typing import Dict, Optional, Set, Tuple import installer @@ -11,21 +7,6 @@ from pip._vendor.packaging.utils import canonicalize_name -def current_umask() -> int: - """Get the current umask which involves having to set it temporarily.""" - mask = os.umask(0) - os.umask(mask) - return mask - - -def set_extracted_file_to_default_mode_plus_executable(path: str) -> None: - """ - Make file present at path have execute for user/group/world - (chmod +x) is no-op on windows per python docs - """ - os.chmod(path, (0o777 & ~current_umask() | 0o111)) - - class Wheel: """Representation of the compressed .whl file""" @@ -90,91 +71,26 @@ def dependencies(self, extras_requested: Optional[Set[str]] = None) -> Set[str]: return dependency_set def unzip(self, directory: str) -> None: - with zipfile.ZipFile(self.path, "r") as whl: - whl.extractall(directory) - # The following logic is borrowed from Pip: - # https://github.com/pypa/pip/blob/cc48c07b64f338ac5e347d90f6cb4efc22ed0d0b/src/pip/_internal/utils/unpacking.py#L240 - for info in whl.infolist(): - name = info.filename - # Do not attempt to modify directories. - if name.endswith("/") or name.endswith("\\"): - continue - mode = info.external_attr >> 16 - # if mode and regular file and any execute permissions for - # user/group/world? - if mode and stat.S_ISREG(mode) and mode & 0o111: - name = os.path.join(directory, name) - set_extracted_file_to_default_mode_plus_executable(name) - - -def get_dist_info(wheel_dir: str) -> str: - """ "Returns the relative path to the dist-info directory if it exists. - - Args: - wheel_dir: The root of the extracted wheel directory. - - Returns: - Relative path to the dist-info directory if it exists, else, None. - """ - dist_info_dirs = glob.glob(os.path.join(wheel_dir, "*.dist-info")) - if not dist_info_dirs: - raise ValueError( - "No *.dist-info directory found. %s is not a valid Wheel." % wheel_dir - ) - - if len(dist_info_dirs) > 1: - raise ValueError( - "Found more than 1 *.dist-info directory. %s is not a valid Wheel." - % wheel_dir - ) - - return dist_info_dirs[0] - - -def get_dot_data_directory(wheel_dir: str) -> Optional[str]: - """Returns the relative path to the data directory if it exists. - - See: https://www.python.org/dev/peps/pep-0491/#the-data-directory - - Args: - wheel_dir: The root of the extracted wheel directory. - - Returns: - Relative path to the data directory if it exists, else, None. - """ - - dot_data_dirs = glob.glob(os.path.join(wheel_dir, "*.data")) - if not dot_data_dirs: - return None - - if len(dot_data_dirs) > 1: - raise ValueError( - "Found more than 1 *.data directory. %s is not a valid Wheel." % wheel_dir + installation_schemes = { + "purelib": "/site-packages", + "platlib": "/site-packages", + "headers": "/include", + "scripts": "/bin", + "data": "/data", + } + destination = installer.destinations.SchemeDictionaryDestination( + installation_schemes, + # TODO Should entry_point scripts also be handled by installer rather than custom code? + interpreter="/dev/null", + script_kind="posix", + destdir=directory, ) - return dot_data_dirs[0] - - -def parse_wheel_meta_file(wheel_dir: str) -> Dict[str, str]: - """Parses the given WHEEL file into a dictionary. - - Args: - wheel_dir: The file path of the WHEEL metadata file in dist-info. - - Returns: - The WHEEL file mapped into a dictionary. - """ - contents = {} - with open(wheel_dir, "r") as wheel_file: - for line in wheel_file: - cleaned = line.strip() - if not cleaned: - continue - try: - key, value = cleaned.split(":", maxsplit=1) - contents[key] = value.strip() - except ValueError: - raise RuntimeError( - "Encounted invalid line in WHEEL file: '%s'" % cleaned - ) - return contents + with installer.sources.WheelFile.open(self.path) as wheel_source: + installer.install( + source=wheel_source, + destination=destination, + additional_metadata={ + "INSTALLER": b"https://github.com/bazelbuild/rules_python", + }, + ) diff --git a/python/pip_install/private/srcs.bzl b/python/pip_install/private/srcs.bzl index 3f20c4596f..a253b66bbb 100644 --- a/python/pip_install/private/srcs.bzl +++ b/python/pip_install/private/srcs.bzl @@ -14,7 +14,6 @@ PIP_INSTALL_PY_SRCS = [ "@rules_python//python/pip_install/extract_wheels/lib:arguments.py", "@rules_python//python/pip_install/extract_wheels/lib:bazel.py", "@rules_python//python/pip_install/extract_wheels/lib:namespace_pkgs.py", - "@rules_python//python/pip_install/extract_wheels/lib:purelib.py", "@rules_python//python/pip_install/extract_wheels/lib:requirements.py", "@rules_python//python/pip_install/extract_wheels/lib:wheel.py", "@rules_python//python/pip_install/parse_requirements_to_bzl:__init__.py",