From 8f477eafe2d563995c6af4c1b46cd2713ab401e7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 Jan 2024 19:05:51 +0000 Subject: [PATCH 1/2] GH-113225: Speed up `pathlib._abc.PathBase.walk(top_down=False)` Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve path objects for directories to visit. This saves the allocation of one path object per directory in user subclasses of `PathBase`, and avoids a second loop. This trick does not apply when walking top-down, because users can affect the walk by modifying *dirnames* in-place. A side effect of this change is that, in bottom-up mode, subdirectories of each directory are visited in reverse order, and that this order doesn't match that of the names in *dirnames*. I suspect this is fine as the order is arbitrary anyway. --- Lib/pathlib/_abc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f75b20a1d5f1e5..d76bc054fe498d 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -921,6 +921,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): with scandir_obj as scandir_it: dirnames = [] filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) for entry in scandir_it: try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) @@ -929,16 +931,15 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): is_dir = False if is_dir: + if not top_down: + paths.append(path._make_child_entry(entry)) dirnames.append(entry.name) else: filenames.append(entry.name) if top_down: yield path, dirnames, filenames - else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] def absolute(self): """Return an absolute version of this path From 32b6a332439bf0c98d1acbe5faece283126244bd Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 Jan 2024 20:58:25 +0000 Subject: [PATCH 2/2] Add NEWS --- .../next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst new file mode 100644 index 00000000000000..0c07f42fd065d2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst @@ -0,0 +1,2 @@ +Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where +possible.