Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/pip_repository_annotations/WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ write_file(
copy_executables = {"@pip_repository_annotations_example//:data/copy_executable.py": "copied_content/executable.py"},
copy_files = {"@pip_repository_annotations_example//:data/copy_file.txt": "copied_content/file.txt"},
data = [":generated_file"],
data_exclude_glob = ["*.dist-info/WHEEL"],
),
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,34 @@ def test_copy_executables(self):
stdout = proc.stdout.decode("utf-8").strip()
self.assertEqual(stdout, "Hello world from copied executable")

def test_data_exclude_glob(self):
current_wheel_version = "0.37.1"

r = runfiles.Create()
dist_info_dir = (
"pip_repository_annotations_example/external/{}/wheel-{}.dist-info".format(
self.wheel_pkg_dir(),
current_wheel_version,
)
)

# Note: `METADATA` is important as it's consumed by https://docs.python.org/3/library/importlib.metadata.html
# `METADATA` is expected to be there to show dist-info files are included in the runfiles.
metadata_path = r.Rlocation("{}/METADATA".format(dist_info_dir))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a restored regression test from #626 but has new meaning as it now uses METADATA files to ensure this isn't removed from the data attributes of generated targets.


# However, `WHEEL` was explicitly excluded, so it should be missing
wheel_path = r.Rlocation("{}/WHEEL".format(dist_info_dir))

# Because windows does not have `--enable_runfiles` on by default, the
# `runfiles.Rlocation` results will be different on this platform vs
# unix platforms. See `@rules_python//python/runfiles` for more details.
if platform.system() == "Windows":
self.assertIsNotNone(metadata_path)
self.assertIsNone(wheel_path)
else:
self.assertTrue(Path(metadata_path).exists())
self.assertFalse(Path(wheel_path).exists())


if __name__ == "__main__":
unittest.main()
8 changes: 3 additions & 5 deletions python/pip_install/extract_wheels/lib/bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,10 @@ def generate_build_file_contents(
there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`).
"""

# `dist-info` contains non-determinisitc files which can change any time
# the repository rules run. Below is a list of known patterns to these
# files. However, not all files should be ignored as certain packages
# require things like `top_level.txt`.
dist_info_ignores = [
"**/*.dist-info/METADATA",
# RECORD is known to contain sha256 checksums of files which might include the checksums
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there known examples this happens in? Some import lib functions fail still because they use data from RECORD, the files() method on a distribution for instance.

Copy link
Collaborator

@groodt groodt Jun 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I too have been wondering why we're doing so much non-standard unpacking and exclusion of files from wheel packages.

I'm trying to land a change that uses installer to bring some PEP standards into this rule set. See: #715

I think we need a better way to handle non-determinism. I think we need to either make it opt-in somehow, or direct people in how to use a "wheel only" setup (using --only-binary=:all:). If you do that, then the only non-determinism is generated .pyc files, which can be excluded/included via some parameters.

# of generated files produced when wheels are installed. The file is ignored to avoid
# Bazel caching issues.
"**/*.dist-info/RECORD",
]

Expand Down