From d6541af2726514eaf5206de76781651e27f4fd41 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:04:01 +0000 Subject: [PATCH 1/5] Initial plan From 3ac6260ac747530423b31a1f40d8a6b7406215d4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:15:11 +0000 Subject: [PATCH 2/5] Fix: --ignore flag now overrides --erase flag targets Agent-Logs-Url: https://github.com/bittner/pyclean/sessions/3ed23a61-cc64-49a2-ae9f-50c4b6b3b529 Co-authored-by: bittner <665072+bittner@users.noreply.github.com> --- pyclean/erase.py | 10 ++++++ tests/test_erase.py | 76 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/pyclean/erase.py b/pyclean/erase.py index 1ec0af3..f0829c5 100644 --- a/pyclean/erase.py +++ b/pyclean/erase.py @@ -8,10 +8,18 @@ from pathlib import Path from .runner import Runner +from .traversal import should_ignore log = logging.getLogger(__name__) +def _path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: + """Check if path or any of its ancestors matches an ignore pattern.""" + if not isinstance(path, Path): + path = Path(str(path)) + return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) + + def confirm(message): """An interactive confirmation prompt.""" try: @@ -38,6 +46,8 @@ def delete_filesystem_objects( are empty (for both files & directories) when we attempt to remove them. """ all_names = sorted(directory.glob(path_glob), reverse=True) + if Runner.ignore: + all_names = [n for n in all_names if not _path_is_ignored(n, Runner.ignore)] dirs = (name for name in all_names if name.is_dir() and not name.is_symlink()) files = (name for name in all_names if not name.is_dir() or name.is_symlink()) diff --git a/tests/test_erase.py b/tests/test_erase.py index b557fa0..ed5f39f 100644 --- a/tests/test_erase.py +++ b/tests/test_erase.py @@ -14,7 +14,12 @@ import pyclean.cli import pyclean.main -from pyclean.erase import confirm, delete_filesystem_objects, remove_freeform_targets +from pyclean.erase import ( + _path_is_ignored, + confirm, + delete_filesystem_objects, + remove_freeform_targets, +) @patch('pyclean.main.remove_freeform_targets') @@ -213,3 +218,72 @@ def test_confirm_no(mock_input): Does confirm return False for 'no' answer? """ assert confirm('Test message') is False + + +def test_path_is_ignored_for_dir_itself(): + """ + Does _path_is_ignored return True for an ignored directory itself? + """ + assert _path_is_ignored(Path('allure-results'), ['allure-results']) + + +def test_path_is_ignored_for_file_in_ignored_dir(): + """ + Does _path_is_ignored return True for a file inside an ignored directory? + """ + assert _path_is_ignored(Path('allure-results/foo.txt'), ['allure-results']) + + +def test_path_is_ignored_for_nested_path_in_ignored_dir(): + """ + Does _path_is_ignored return True for a deeply nested path inside an ignored directory? + """ + assert _path_is_ignored(Path('allure-results/sub/deep/file.txt'), ['allure-results']) + + +def test_path_is_not_ignored_for_unrelated_path(): + """ + Does _path_is_ignored return False for a path not matching any ignore pattern? + """ + assert not _path_is_ignored(Path('keep.txt'), ['allure-results']) + assert not _path_is_ignored(Path('other/foo.txt'), ['allure-results']) + + +def test_delete_filesystem_objects_skips_ignored_dirs(tmp_path): + """ + Does delete_filesystem_objects skip files and directories in ignored paths? + """ + ignored_dir = tmp_path / 'allure-results' + ignored_dir.mkdir() + ignored_file = ignored_dir / 'foo.txt' + ignored_file.write_text('test') + + args = Namespace(dry_run=False, ignore=['allure-results']) + pyclean.main.Runner.configure(args) + + delete_filesystem_objects(tmp_path, 'allure-results/**/*', prompt=False) + + assert ignored_file.exists(), 'File in ignored directory should not be deleted' + + +def test_delete_filesystem_objects_erases_non_ignored(tmp_path): + """ + Does delete_filesystem_objects still erase non-ignored paths when ignore is set? + """ + ignored_dir = tmp_path / 'allure-results' + ignored_dir.mkdir() + ignored_file = ignored_dir / 'foo.txt' + ignored_file.write_text('test') + non_ignored_file1 = tmp_path / 'keep.txt' + non_ignored_file1.write_text('keep') + non_ignored_file2 = tmp_path / 'erase.txt' + non_ignored_file2.write_text('erase') + + args = Namespace(dry_run=False, ignore=['allure-results']) + pyclean.main.Runner.configure(args) + + delete_filesystem_objects(tmp_path, '*.txt', prompt=False) + + assert ignored_file.exists(), 'File in ignored directory should not be deleted' + assert not non_ignored_file1.exists(), 'Non-ignored file should be deleted' + assert not non_ignored_file2.exists(), 'Non-ignored file should be deleted' From 328c13ddafa9f6cfd8a73aa2c440d2ce35b571b5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:21:59 +0000 Subject: [PATCH 3/5] Refactor: move path_is_ignored helper to traversal module Agent-Logs-Url: https://github.com/bittner/pyclean/sessions/50532ba3-817f-47df-a8fa-c01c1c8f78e3 Co-authored-by: bittner <665072+bittner@users.noreply.github.com> --- pyclean/erase.py | 11 ++--------- pyclean/traversal.py | 7 +++++++ tests/test_erase.py | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pyclean/erase.py b/pyclean/erase.py index f0829c5..a70260a 100644 --- a/pyclean/erase.py +++ b/pyclean/erase.py @@ -8,18 +8,11 @@ from pathlib import Path from .runner import Runner -from .traversal import should_ignore +from .traversal import path_is_ignored log = logging.getLogger(__name__) -def _path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: - """Check if path or any of its ancestors matches an ignore pattern.""" - if not isinstance(path, Path): - path = Path(str(path)) - return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) - - def confirm(message): """An interactive confirmation prompt.""" try: @@ -47,7 +40,7 @@ def delete_filesystem_objects( """ all_names = sorted(directory.glob(path_glob), reverse=True) if Runner.ignore: - all_names = [n for n in all_names if not _path_is_ignored(n, Runner.ignore)] + all_names = [n for n in all_names if not path_is_ignored(n, Runner.ignore)] dirs = (name for name in all_names if name.is_dir() and not name.is_symlink()) files = (name for name in all_names if not name.is_dir() or name.is_symlink()) diff --git a/pyclean/traversal.py b/pyclean/traversal.py index ddf967d..01b99d3 100644 --- a/pyclean/traversal.py +++ b/pyclean/traversal.py @@ -53,6 +53,13 @@ def should_ignore(pathname: str, ignore_patterns: list[str] | None) -> bool: return False +def path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: + """Check if a path or any of its ancestors matches an ignore pattern.""" + if not isinstance(path, Path): + path = Path(str(path)) + return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) + + def descend_and_clean(directory, file_types, dir_names): """ Walk and descend a directory tree, cleaning up files of a certain type diff --git a/tests/test_erase.py b/tests/test_erase.py index ed5f39f..6045260 100644 --- a/tests/test_erase.py +++ b/tests/test_erase.py @@ -15,11 +15,11 @@ import pyclean.cli import pyclean.main from pyclean.erase import ( - _path_is_ignored, confirm, delete_filesystem_objects, remove_freeform_targets, ) +from pyclean.traversal import path_is_ignored @patch('pyclean.main.remove_freeform_targets') @@ -222,31 +222,31 @@ def test_confirm_no(mock_input): def test_path_is_ignored_for_dir_itself(): """ - Does _path_is_ignored return True for an ignored directory itself? + Does path_is_ignored return True for an ignored directory itself? """ - assert _path_is_ignored(Path('allure-results'), ['allure-results']) + assert path_is_ignored(Path('allure-results'), ['allure-results']) def test_path_is_ignored_for_file_in_ignored_dir(): """ - Does _path_is_ignored return True for a file inside an ignored directory? + Does path_is_ignored return True for a file inside an ignored directory? """ - assert _path_is_ignored(Path('allure-results/foo.txt'), ['allure-results']) + assert path_is_ignored(Path('allure-results/foo.txt'), ['allure-results']) def test_path_is_ignored_for_nested_path_in_ignored_dir(): """ - Does _path_is_ignored return True for a deeply nested path inside an ignored directory? + Does path_is_ignored return True for a deeply nested path inside an ignored directory? """ - assert _path_is_ignored(Path('allure-results/sub/deep/file.txt'), ['allure-results']) + assert path_is_ignored(Path('allure-results/sub/deep/file.txt'), ['allure-results']) def test_path_is_not_ignored_for_unrelated_path(): """ - Does _path_is_ignored return False for a path not matching any ignore pattern? + Does path_is_ignored return False for a path not matching any ignore pattern? """ - assert not _path_is_ignored(Path('keep.txt'), ['allure-results']) - assert not _path_is_ignored(Path('other/foo.txt'), ['allure-results']) + assert not path_is_ignored(Path('keep.txt'), ['allure-results']) + assert not path_is_ignored(Path('other/foo.txt'), ['allure-results']) def test_delete_filesystem_objects_skips_ignored_dirs(tmp_path): From 20fb76ac1955123711e9f43b3de3ccc2dcb5affa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:40:56 +0000 Subject: [PATCH 4/5] Refactor: add Runner.is_ignored() method, remove path_is_ignored free-function call in erase.py Agent-Logs-Url: https://github.com/bittner/pyclean/sessions/6eb036ae-d200-4616-bb20-7d471ee1178c Co-authored-by: bittner <665072+bittner@users.noreply.github.com> --- pyclean/erase.py | 3 +-- pyclean/runner.py | 6 ++++++ pyclean/traversal.py | 2 +- tests/test_erase.py | 23 +++++++++++++---------- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pyclean/erase.py b/pyclean/erase.py index a70260a..e06a3a7 100644 --- a/pyclean/erase.py +++ b/pyclean/erase.py @@ -8,7 +8,6 @@ from pathlib import Path from .runner import Runner -from .traversal import path_is_ignored log = logging.getLogger(__name__) @@ -40,7 +39,7 @@ def delete_filesystem_objects( """ all_names = sorted(directory.glob(path_glob), reverse=True) if Runner.ignore: - all_names = [n for n in all_names if not path_is_ignored(n, Runner.ignore)] + all_names = [n for n in all_names if not Runner.is_ignored(n)] dirs = (name for name in all_names if name.is_dir() and not name.is_symlink()) files = (name for name in all_names if not name.is_dir() or name.is_symlink()) diff --git a/pyclean/runner.py b/pyclean/runner.py index 4cd167e..c3ec7b5 100644 --- a/pyclean/runner.py +++ b/pyclean/runner.py @@ -43,6 +43,12 @@ def configure(self, args: Namespace) -> None: self.rmdir_count = 0 self.rmdir_failed = 0 + def is_ignored(self, path: Path) -> bool: + """Check if a path or any of its ancestors matches an ignore pattern.""" + from .traversal import path_is_ignored # avoid circular import at module level + + return path_is_ignored(path, self.ignore) + Runner = CleanupRunner() diff --git a/pyclean/traversal.py b/pyclean/traversal.py index 01b99d3..a1716da 100644 --- a/pyclean/traversal.py +++ b/pyclean/traversal.py @@ -70,7 +70,7 @@ def descend_and_clean(directory, file_types, dir_names): if Path(child.path).suffix in file_types: Runner.unlink(Path(child.path)) elif child.is_dir(): - if should_ignore(child.path, Runner.ignore): + if Runner.is_ignored(Path(child.path)): log.debug('Skipping %s', child.name) else: descend_and_clean(child.path, file_types, dir_names) diff --git a/tests/test_erase.py b/tests/test_erase.py index 6045260..e6c6035 100644 --- a/tests/test_erase.py +++ b/tests/test_erase.py @@ -19,7 +19,6 @@ delete_filesystem_objects, remove_freeform_targets, ) -from pyclean.traversal import path_is_ignored @patch('pyclean.main.remove_freeform_targets') @@ -222,31 +221,35 @@ def test_confirm_no(mock_input): def test_path_is_ignored_for_dir_itself(): """ - Does path_is_ignored return True for an ignored directory itself? + Does Runner.is_ignored return True for an ignored directory itself? """ - assert path_is_ignored(Path('allure-results'), ['allure-results']) + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results')) def test_path_is_ignored_for_file_in_ignored_dir(): """ - Does path_is_ignored return True for a file inside an ignored directory? + Does Runner.is_ignored return True for a file inside an ignored directory? """ - assert path_is_ignored(Path('allure-results/foo.txt'), ['allure-results']) + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results/foo.txt')) def test_path_is_ignored_for_nested_path_in_ignored_dir(): """ - Does path_is_ignored return True for a deeply nested path inside an ignored directory? + Does Runner.is_ignored return True for a deeply nested path inside an ignored directory? """ - assert path_is_ignored(Path('allure-results/sub/deep/file.txt'), ['allure-results']) + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results/sub/deep/file.txt')) def test_path_is_not_ignored_for_unrelated_path(): """ - Does path_is_ignored return False for a path not matching any ignore pattern? + Does Runner.is_ignored return False for a path not matching any ignore pattern? """ - assert not path_is_ignored(Path('keep.txt'), ['allure-results']) - assert not path_is_ignored(Path('other/foo.txt'), ['allure-results']) + pyclean.main.Runner.ignore = ['allure-results'] + assert not pyclean.main.Runner.is_ignored(Path('keep.txt')) + assert not pyclean.main.Runner.is_ignored(Path('other/foo.txt')) def test_delete_filesystem_objects_skips_ignored_dirs(tmp_path): From 8f5407322e43589873173f252be0c0458745e3fc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:47:31 +0000 Subject: [PATCH 5/5] Extract ignore utilities to ignore.py, fix circular import and lint/format issues Agent-Logs-Url: https://github.com/bittner/pyclean/sessions/47d57474-ae0e-4870-acae-97947226a21e Co-authored-by: bittner <665072+bittner@users.noreply.github.com> --- pyclean/debris.py | 2 +- pyclean/folders.py | 2 +- pyclean/ignore.py | 55 +++++++++++++++++++++++++++++++++++++++++ pyclean/runner.py | 4 +-- pyclean/traversal.py | 45 --------------------------------- tests/test_debris.py | 3 ++- tests/test_erase.py | 3 ++- tests/test_traversal.py | 3 ++- 8 files changed, 65 insertions(+), 52 deletions(-) create mode 100644 pyclean/ignore.py diff --git a/pyclean/debris.py b/pyclean/debris.py index 5759eb6..c16cd08 100644 --- a/pyclean/debris.py +++ b/pyclean/debris.py @@ -9,8 +9,8 @@ from pathlib import Path from .erase import delete_filesystem_objects +from .ignore import should_ignore from .runner import Runner -from .traversal import should_ignore log = logging.getLogger(__name__) diff --git a/pyclean/folders.py b/pyclean/folders.py index 3b7c0c3..c71a474 100644 --- a/pyclean/folders.py +++ b/pyclean/folders.py @@ -8,8 +8,8 @@ import os from pathlib import Path +from .ignore import should_ignore from .runner import Runner -from .traversal import should_ignore log = logging.getLogger(__name__) diff --git a/pyclean/ignore.py b/pyclean/ignore.py new file mode 100644 index 0000000..3e8a81d --- /dev/null +++ b/pyclean/ignore.py @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2020 Peter Bittner +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Ignore pattern matching utilities.""" + +from __future__ import annotations + +import os +from pathlib import Path + + +def normalize(path_pattern: str) -> str: + """ + Normalize path separators in a pattern for cross-platform support. + + On Windows, both forward slash and backslash are valid path separators. + On Unix/Posix, only forward slash is valid (backslash can be part of filename). + """ + return path_pattern.replace(os.sep, os.altsep or os.sep) + + +def should_ignore(pathname: str, ignore_patterns: list[str] | None) -> bool: + """ + Check if a path should be ignored based on ignore patterns. + + Patterns can be: + - Simple names like 'bar': matches any directory with that name + - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory + and also ignores everything inside that directory + """ + if not ignore_patterns: + return False + + path = Path(pathname) + + for pattern in ignore_patterns: + pattern_parts = Path(normalize(pattern)).parts + if len(pattern_parts) > 1: + if len(path.parts) < len(pattern_parts): + continue + for i in range(len(path.parts) - len(pattern_parts) + 1): + path_slice = path.parts[i : i + len(pattern_parts)] + if path_slice == pattern_parts: + return True + elif path.name == pattern: + return True + return False + + +def path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: + """Check if a path or any of its ancestors matches an ignore pattern.""" + if not isinstance(path, Path): + path = Path(str(path)) + return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) diff --git a/pyclean/runner.py b/pyclean/runner.py index c3ec7b5..0ca9c3d 100644 --- a/pyclean/runner.py +++ b/pyclean/runner.py @@ -9,6 +9,8 @@ import logging from typing import TYPE_CHECKING +from .ignore import path_is_ignored + if TYPE_CHECKING: from argparse import Namespace from pathlib import Path @@ -45,8 +47,6 @@ def configure(self, args: Namespace) -> None: def is_ignored(self, path: Path) -> bool: """Check if a path or any of its ancestors matches an ignore pattern.""" - from .traversal import path_is_ignored # avoid circular import at module level - return path_is_ignored(path, self.ignore) diff --git a/pyclean/traversal.py b/pyclean/traversal.py index a1716da..49e07c2 100644 --- a/pyclean/traversal.py +++ b/pyclean/traversal.py @@ -15,51 +15,6 @@ log = logging.getLogger(__name__) -def normalize(path_pattern: str) -> str: - """ - Normalize path separators in a pattern for cross-platform support. - - On Windows, both forward slash and backslash are valid path separators. - On Unix/Posix, only forward slash is valid (backslash can be part of filename). - """ - return path_pattern.replace(os.sep, os.altsep or os.sep) - - -def should_ignore(pathname: str, ignore_patterns: list[str] | None) -> bool: - """ - Check if a path should be ignored based on ignore patterns. - - Patterns can be: - - Simple names like 'bar': matches any directory with that name - - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory - and also ignores everything inside that directory - """ - if not ignore_patterns: - return False - - path = Path(pathname) - - for pattern in ignore_patterns: - pattern_parts = Path(normalize(pattern)).parts - if len(pattern_parts) > 1: - if len(path.parts) < len(pattern_parts): - continue - for i in range(len(path.parts) - len(pattern_parts) + 1): - path_slice = path.parts[i : i + len(pattern_parts)] - if path_slice == pattern_parts: - return True - elif path.name == pattern: - return True - return False - - -def path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: - """Check if a path or any of its ancestors matches an ignore pattern.""" - if not isinstance(path, Path): - path = Path(str(path)) - return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) - - def descend_and_clean(directory, file_types, dir_names): """ Walk and descend a directory tree, cleaning up files of a certain type diff --git a/tests/test_debris.py b/tests/test_debris.py index 9309891..5baa565 100644 --- a/tests/test_debris.py +++ b/tests/test_debris.py @@ -13,6 +13,7 @@ from cli_test_helpers import ArgvContext import pyclean.cli +import pyclean.ignore import pyclean.main import pyclean.traversal from pyclean.debris import ( @@ -223,7 +224,7 @@ def test_debris_cleanup_scans_directories_once(): (directory / 'subdir1').mkdir() (directory / 'subdir2').mkdir() - original_should_ignore = pyclean.traversal.should_ignore + original_should_ignore = pyclean.ignore.should_ignore call_count = {'total': 0, 'git_checks': 0} def counting_should_ignore(path, patterns): diff --git a/tests/test_erase.py b/tests/test_erase.py index e6c6035..8da0c76 100644 --- a/tests/test_erase.py +++ b/tests/test_erase.py @@ -237,7 +237,8 @@ def test_path_is_ignored_for_file_in_ignored_dir(): def test_path_is_ignored_for_nested_path_in_ignored_dir(): """ - Does Runner.is_ignored return True for a deeply nested path inside an ignored directory? + Does Runner.is_ignored return True for a deeply nested path inside an ignored + directory? """ pyclean.main.Runner.ignore = ['allure-results'] assert pyclean.main.Runner.is_ignored(Path('allure-results/sub/deep/file.txt')) diff --git a/tests/test_traversal.py b/tests/test_traversal.py index 117c144..982c671 100644 --- a/tests/test_traversal.py +++ b/tests/test_traversal.py @@ -15,7 +15,8 @@ import pyclean.main from pyclean.bytecode import BYTECODE_DIRS, BYTECODE_FILES -from pyclean.traversal import descend_and_clean, normalize, should_ignore +from pyclean.ignore import normalize, should_ignore +from pyclean.traversal import descend_and_clean @patch('pyclean.main.Runner.unlink')