Skip to content

Commit af5c383

Browse files
committed
Add benchmark comparing powerwalk vs os.walk for package discovery
1 parent 558e62c commit af5c383

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

benchmark_walk.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""Benchmark os.walk vs powerwalk for package discovery."""
2+
3+
import os
4+
import time
5+
from collections.abc import Iterable
6+
7+
import powerwalk
8+
9+
from grimp.adaptors.filesystem import FileSystem
10+
from grimp.adaptors.modulefinder import ModuleFinder
11+
from grimp.application.ports import modulefinder
12+
from grimp.domain.valueobjects import Module
13+
14+
15+
class PowerwalkModuleFinder(modulefinder.AbstractModuleFinder):
16+
"""ModuleFinder using powerwalk directly, ignoring AbstractFileSystem."""
17+
18+
def find_package(
19+
self, package_name: str, package_directory: str, file_system=None
20+
) -> modulefinder.FoundPackage:
21+
module_files: list[modulefinder.ModuleFile] = []
22+
23+
for module_filename in self._get_python_files_inside_package(package_directory):
24+
module_name = self._module_name_from_filename(
25+
package_name, module_filename, package_directory
26+
)
27+
module_mtime = os.path.getmtime(module_filename)
28+
module_files.append(
29+
modulefinder.ModuleFile(module=Module(module_name), mtime=module_mtime)
30+
)
31+
32+
return modulefinder.FoundPackage(
33+
name=package_name,
34+
directory=package_directory,
35+
module_files=frozenset(module_files),
36+
)
37+
38+
def _get_python_files_inside_package(self, directory: str) -> Iterable[str]:
39+
"""
40+
Get a list of Python files within the supplied package directory using powerwalk.
41+
42+
Return:
43+
Generator of Python file names.
44+
"""
45+
for entry in powerwalk.walk(directory, filter="**/*.py"):
46+
if entry.is_dir:
47+
continue
48+
49+
yield entry.path_str
50+
51+
def _module_name_from_filename(
52+
self, package_name: str, filename_and_path: str, package_directory: str
53+
) -> str:
54+
"""
55+
Args:
56+
package_name (string) - the importable name of the top level package. Could
57+
be namespaced.
58+
filename_and_path (string) - the full name of the Python file.
59+
package_directory (string) - the full path of the top level Python package directory.
60+
Returns:
61+
Absolute module name for importing (string).
62+
"""
63+
internal_filename_and_path = filename_and_path[len(package_directory) :]
64+
internal_filename_and_path_without_extension = internal_filename_and_path[1:-3]
65+
components = [package_name] + internal_filename_and_path_without_extension.split(os.sep)
66+
if components[-1] == "__init__":
67+
components.pop()
68+
return ".".join(components)
69+
70+
71+
def benchmark(package_name: str, package_directory: str, num_runs: int = 10):
72+
"""Run benchmarks comparing both module finder implementations."""
73+
print(f"Benchmarking package discovery")
74+
print(f"Package: {package_name}")
75+
print(f"Directory: {package_directory}\n")
76+
77+
os_walk_finder = ModuleFinder()
78+
powerwalk_finder = PowerwalkModuleFinder()
79+
file_system = FileSystem()
80+
81+
# Warm-up and verify both produce same results
82+
print("Running warm-up and verification...")
83+
result_os_walk = os_walk_finder.find_package(package_name, package_directory, file_system)
84+
result_powerwalk = powerwalk_finder.find_package(package_name, package_directory)
85+
86+
modules_os_walk = {mf.module.name for mf in result_os_walk.module_files}
87+
modules_powerwalk = {mf.module.name for mf in result_powerwalk.module_files}
88+
89+
print(f"os.walk found: {len(modules_os_walk)} modules")
90+
print(f"powerwalk found: {len(modules_powerwalk)} modules")
91+
92+
# Check for differences
93+
only_in_os_walk = modules_os_walk - modules_powerwalk
94+
only_in_powerwalk = modules_powerwalk - modules_os_walk
95+
96+
if only_in_os_walk:
97+
print(f"\nWARNING: {len(only_in_os_walk)} modules only found by os.walk:")
98+
for m in sorted(only_in_os_walk)[:10]:
99+
print(f" {m}")
100+
if len(only_in_os_walk) > 10:
101+
print(f" ... and {len(only_in_os_walk) - 10} more")
102+
103+
if only_in_powerwalk:
104+
print(f"\nWARNING: {len(only_in_powerwalk)} modules only found by powerwalk:")
105+
for m in sorted(only_in_powerwalk)[:10]:
106+
print(f" {m}")
107+
if len(only_in_powerwalk) > 10:
108+
print(f" ... and {len(only_in_powerwalk) - 10} more")
109+
110+
print(f"\n{'=' * 60}")
111+
print(f"Running {num_runs} iterations each...\n")
112+
113+
# Benchmark os.walk
114+
os_walk_times = []
115+
for i in range(num_runs):
116+
start = time.perf_counter()
117+
result = os_walk_finder.find_package(package_name, package_directory, file_system)
118+
elapsed = time.perf_counter() - start
119+
os_walk_times.append(elapsed)
120+
print(f"os.walk run {i + 1}: {elapsed:.4f}s")
121+
122+
print()
123+
124+
# Benchmark powerwalk
125+
powerwalk_times = []
126+
for i in range(num_runs):
127+
start = time.perf_counter()
128+
result = powerwalk_finder.find_package(package_name, package_directory)
129+
elapsed = time.perf_counter() - start
130+
powerwalk_times.append(elapsed)
131+
print(f"powerwalk run {i + 1}: {elapsed:.4f}s")
132+
133+
# Calculate statistics
134+
print(f"\n{'=' * 60}")
135+
print("Results:")
136+
print(f"{'=' * 60}")
137+
138+
os_walk_avg = sum(os_walk_times) / len(os_walk_times)
139+
os_walk_min = min(os_walk_times)
140+
os_walk_max = max(os_walk_times)
141+
142+
powerwalk_avg = sum(powerwalk_times) / len(powerwalk_times)
143+
powerwalk_min = min(powerwalk_times)
144+
powerwalk_max = max(powerwalk_times)
145+
146+
print(f"\nos.walk:")
147+
print(f" Average: {os_walk_avg:.4f}s")
148+
print(f" Min: {os_walk_min:.4f}s")
149+
print(f" Max: {os_walk_max:.4f}s")
150+
151+
print(f"\npowerwalk:")
152+
print(f" Average: {powerwalk_avg:.4f}s")
153+
print(f" Min: {powerwalk_min:.4f}s")
154+
print(f" Max: {powerwalk_max:.4f}s")
155+
156+
speedup = os_walk_avg / powerwalk_avg
157+
print(f"\nSpeedup: {speedup:.2f}x")
158+
159+
if speedup > 1:
160+
print(f"✓ powerwalk is {speedup:.2f}x faster")
161+
elif speedup < 1:
162+
print(f"✗ powerwalk is {1 / speedup:.2f}x slower")
163+
else:
164+
print("≈ Both methods have similar performance")
165+
166+
167+
if __name__ == "__main__":
168+
benchmark("octoenergy", "/Users/peter.byfield/projects/kraken-core/src/octoenergy")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ dev = [
6161
"sqlalchemy==2.0.35",
6262
"google-cloud-audit-log==0.3.0",
6363
"pyupgrade>=3.21.0",
64+
"powerwalk==0.5.0",
6465
]
6566
docs = [
6667
"sphinx>=7.4.7",

0 commit comments

Comments
 (0)