diff --git a/pyclean/debris.py b/pyclean/debris.py index 5759eb6..c16cd08 100644 --- a/pyclean/debris.py +++ b/pyclean/debris.py @@ -9,8 +9,8 @@ from pathlib import Path from .erase import delete_filesystem_objects +from .ignore import should_ignore from .runner import Runner -from .traversal import should_ignore log = logging.getLogger(__name__) diff --git a/pyclean/erase.py b/pyclean/erase.py index 1ec0af3..e06a3a7 100644 --- a/pyclean/erase.py +++ b/pyclean/erase.py @@ -38,6 +38,8 @@ def delete_filesystem_objects( are empty (for both files & directories) when we attempt to remove them. """ all_names = sorted(directory.glob(path_glob), reverse=True) + if Runner.ignore: + all_names = [n for n in all_names if not Runner.is_ignored(n)] dirs = (name for name in all_names if name.is_dir() and not name.is_symlink()) files = (name for name in all_names if not name.is_dir() or name.is_symlink()) diff --git a/pyclean/folders.py b/pyclean/folders.py index 3b7c0c3..c71a474 100644 --- a/pyclean/folders.py +++ b/pyclean/folders.py @@ -8,8 +8,8 @@ import os from pathlib import Path +from .ignore import should_ignore from .runner import Runner -from .traversal import should_ignore log = logging.getLogger(__name__) diff --git a/pyclean/ignore.py b/pyclean/ignore.py new file mode 100644 index 0000000..3e8a81d --- /dev/null +++ b/pyclean/ignore.py @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2020 Peter Bittner +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Ignore pattern matching utilities.""" + +from __future__ import annotations + +import os +from pathlib import Path + + +def normalize(path_pattern: str) -> str: + """ + Normalize path separators in a pattern for cross-platform support. + + On Windows, both forward slash and backslash are valid path separators. + On Unix/Posix, only forward slash is valid (backslash can be part of filename). + """ + return path_pattern.replace(os.sep, os.altsep or os.sep) + + +def should_ignore(pathname: str, ignore_patterns: list[str] | None) -> bool: + """ + Check if a path should be ignored based on ignore patterns. + + Patterns can be: + - Simple names like 'bar': matches any directory with that name + - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory + and also ignores everything inside that directory + """ + if not ignore_patterns: + return False + + path = Path(pathname) + + for pattern in ignore_patterns: + pattern_parts = Path(normalize(pattern)).parts + if len(pattern_parts) > 1: + if len(path.parts) < len(pattern_parts): + continue + for i in range(len(path.parts) - len(pattern_parts) + 1): + path_slice = path.parts[i : i + len(pattern_parts)] + if path_slice == pattern_parts: + return True + elif path.name == pattern: + return True + return False + + +def path_is_ignored(path: Path, ignore_patterns: list[str]) -> bool: + """Check if a path or any of its ancestors matches an ignore pattern.""" + if not isinstance(path, Path): + path = Path(str(path)) + return any(should_ignore(str(p), ignore_patterns) for p in [path, *path.parents]) diff --git a/pyclean/runner.py b/pyclean/runner.py index 4cd167e..0ca9c3d 100644 --- a/pyclean/runner.py +++ b/pyclean/runner.py @@ -9,6 +9,8 @@ import logging from typing import TYPE_CHECKING +from .ignore import path_is_ignored + if TYPE_CHECKING: from argparse import Namespace from pathlib import Path @@ -43,6 +45,10 @@ def configure(self, args: Namespace) -> None: self.rmdir_count = 0 self.rmdir_failed = 0 + def is_ignored(self, path: Path) -> bool: + """Check if a path or any of its ancestors matches an ignore pattern.""" + return path_is_ignored(path, self.ignore) + Runner = CleanupRunner() diff --git a/pyclean/traversal.py b/pyclean/traversal.py index ddf967d..49e07c2 100644 --- a/pyclean/traversal.py +++ b/pyclean/traversal.py @@ -15,44 +15,6 @@ log = logging.getLogger(__name__) -def normalize(path_pattern: str) -> str: - """ - Normalize path separators in a pattern for cross-platform support. - - On Windows, both forward slash and backslash are valid path separators. - On Unix/Posix, only forward slash is valid (backslash can be part of filename). - """ - return path_pattern.replace(os.sep, os.altsep or os.sep) - - -def should_ignore(pathname: str, ignore_patterns: list[str] | None) -> bool: - """ - Check if a path should be ignored based on ignore patterns. - - Patterns can be: - - Simple names like 'bar': matches any directory with that name - - Paths like 'foo/bar': matches 'bar' directory inside 'foo' directory - and also ignores everything inside that directory - """ - if not ignore_patterns: - return False - - path = Path(pathname) - - for pattern in ignore_patterns: - pattern_parts = Path(normalize(pattern)).parts - if len(pattern_parts) > 1: - if len(path.parts) < len(pattern_parts): - continue - for i in range(len(path.parts) - len(pattern_parts) + 1): - path_slice = path.parts[i : i + len(pattern_parts)] - if path_slice == pattern_parts: - return True - elif path.name == pattern: - return True - return False - - def descend_and_clean(directory, file_types, dir_names): """ Walk and descend a directory tree, cleaning up files of a certain type @@ -63,7 +25,7 @@ def descend_and_clean(directory, file_types, dir_names): if Path(child.path).suffix in file_types: Runner.unlink(Path(child.path)) elif child.is_dir(): - if should_ignore(child.path, Runner.ignore): + if Runner.is_ignored(Path(child.path)): log.debug('Skipping %s', child.name) else: descend_and_clean(child.path, file_types, dir_names) diff --git a/tests/test_debris.py b/tests/test_debris.py index 9309891..5baa565 100644 --- a/tests/test_debris.py +++ b/tests/test_debris.py @@ -13,6 +13,7 @@ from cli_test_helpers import ArgvContext import pyclean.cli +import pyclean.ignore import pyclean.main import pyclean.traversal from pyclean.debris import ( @@ -223,7 +224,7 @@ def test_debris_cleanup_scans_directories_once(): (directory / 'subdir1').mkdir() (directory / 'subdir2').mkdir() - original_should_ignore = pyclean.traversal.should_ignore + original_should_ignore = pyclean.ignore.should_ignore call_count = {'total': 0, 'git_checks': 0} def counting_should_ignore(path, patterns): diff --git a/tests/test_erase.py b/tests/test_erase.py index b557fa0..8da0c76 100644 --- a/tests/test_erase.py +++ b/tests/test_erase.py @@ -14,7 +14,11 @@ import pyclean.cli import pyclean.main -from pyclean.erase import confirm, delete_filesystem_objects, remove_freeform_targets +from pyclean.erase import ( + confirm, + delete_filesystem_objects, + remove_freeform_targets, +) @patch('pyclean.main.remove_freeform_targets') @@ -213,3 +217,77 @@ def test_confirm_no(mock_input): Does confirm return False for 'no' answer? """ assert confirm('Test message') is False + + +def test_path_is_ignored_for_dir_itself(): + """ + Does Runner.is_ignored return True for an ignored directory itself? + """ + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results')) + + +def test_path_is_ignored_for_file_in_ignored_dir(): + """ + Does Runner.is_ignored return True for a file inside an ignored directory? + """ + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results/foo.txt')) + + +def test_path_is_ignored_for_nested_path_in_ignored_dir(): + """ + Does Runner.is_ignored return True for a deeply nested path inside an ignored + directory? + """ + pyclean.main.Runner.ignore = ['allure-results'] + assert pyclean.main.Runner.is_ignored(Path('allure-results/sub/deep/file.txt')) + + +def test_path_is_not_ignored_for_unrelated_path(): + """ + Does Runner.is_ignored return False for a path not matching any ignore pattern? + """ + pyclean.main.Runner.ignore = ['allure-results'] + assert not pyclean.main.Runner.is_ignored(Path('keep.txt')) + assert not pyclean.main.Runner.is_ignored(Path('other/foo.txt')) + + +def test_delete_filesystem_objects_skips_ignored_dirs(tmp_path): + """ + Does delete_filesystem_objects skip files and directories in ignored paths? + """ + ignored_dir = tmp_path / 'allure-results' + ignored_dir.mkdir() + ignored_file = ignored_dir / 'foo.txt' + ignored_file.write_text('test') + + args = Namespace(dry_run=False, ignore=['allure-results']) + pyclean.main.Runner.configure(args) + + delete_filesystem_objects(tmp_path, 'allure-results/**/*', prompt=False) + + assert ignored_file.exists(), 'File in ignored directory should not be deleted' + + +def test_delete_filesystem_objects_erases_non_ignored(tmp_path): + """ + Does delete_filesystem_objects still erase non-ignored paths when ignore is set? + """ + ignored_dir = tmp_path / 'allure-results' + ignored_dir.mkdir() + ignored_file = ignored_dir / 'foo.txt' + ignored_file.write_text('test') + non_ignored_file1 = tmp_path / 'keep.txt' + non_ignored_file1.write_text('keep') + non_ignored_file2 = tmp_path / 'erase.txt' + non_ignored_file2.write_text('erase') + + args = Namespace(dry_run=False, ignore=['allure-results']) + pyclean.main.Runner.configure(args) + + delete_filesystem_objects(tmp_path, '*.txt', prompt=False) + + assert ignored_file.exists(), 'File in ignored directory should not be deleted' + assert not non_ignored_file1.exists(), 'Non-ignored file should be deleted' + assert not non_ignored_file2.exists(), 'Non-ignored file should be deleted' diff --git a/tests/test_traversal.py b/tests/test_traversal.py index 117c144..982c671 100644 --- a/tests/test_traversal.py +++ b/tests/test_traversal.py @@ -15,7 +15,8 @@ import pyclean.main from pyclean.bytecode import BYTECODE_DIRS, BYTECODE_FILES -from pyclean.traversal import descend_and_clean, normalize, should_ignore +from pyclean.ignore import normalize, should_ignore +from pyclean.traversal import descend_and_clean @patch('pyclean.main.Runner.unlink')