From 4d493934fe7237edbfac871af0fa949441e76452 Mon Sep 17 00:00:00 2001 From: sergio-sisternes-epam Date: Wed, 4 Mar 2026 17:09:19 +0000 Subject: [PATCH 1/2] fix: replace substring matching with path-component matching in exclusions Replace `ignore in str(current_path)` with `part in (...) for part in relative_parts` in _analyze_project_structure() to prevent false positive exclusions. Directories like 'rebuild/', 'redistribution/', or 'apm_modules_guide/' were incorrectly excluded because their names contain exclusion tokens ('build', 'dist', 'apm_modules') as substrings. Fixes #158 --- src/apm_cli/compilation/context_optimizer.py | 4 +- .../compilation/test_context_optimizer.py | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/apm_cli/compilation/context_optimizer.py b/src/apm_cli/compilation/context_optimizer.py index 66a9aea2..c7b6b381 100644 --- a/src/apm_cli/compilation/context_optimizer.py +++ b/src/apm_cli/compilation/context_optimizer.py @@ -423,7 +423,9 @@ def _analyze_project_structure(self) -> None: continue # Default hardcoded exclusions for backwards compatibility - if any(ignore in str(current_path) for ignore in ['node_modules', '__pycache__', '.git', 'dist', 'build']): + # Use path-component matching to avoid false positives (e.g. "rebuild" matching "build") + relative_parts = current_path.relative_to(self.base_dir).parts + if any(part in ('node_modules', '__pycache__', '.git', 'dist', 'build') for part in relative_parts): continue # Apply configurable exclusion patterns diff --git a/tests/unit/compilation/test_context_optimizer.py b/tests/unit/compilation/test_context_optimizer.py index 9ec96799..9ecf9abc 100644 --- a/tests/unit/compilation/test_context_optimizer.py +++ b/tests/unit/compilation/test_context_optimizer.py @@ -677,6 +677,60 @@ def test_default_exclusions_still_work(self): assert base_path / "custom_exclude" not in cached_dirs # Custom exclusion +class TestSubstringExclusionFalsePositives: + """Test that directory exclusions use component matching, not substring matching (Fixes #158).""" + + def test_directory_containing_exclusion_token_not_excluded(self): + """Directories like 'rebuild/' must NOT be excluded just because 'build' is a substring.""" + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir).resolve() + for name in ["rebuild", "apm_modules_guide", "redistribution", "node_modules_compat"]: + d = base / "src" / name + d.mkdir(parents=True, exist_ok=True) + (d / "file.py").touch() + + optimizer = ContextOptimizer(base_dir=str(base)) + optimizer._analyze_project_structure() + + cached = set(optimizer._directory_cache.keys()) + for name in ["rebuild", "apm_modules_guide", "redistribution", "node_modules_compat"]: + assert base / "src" / name in cached, f"'{name}' was incorrectly excluded" + + def test_exact_exclusion_names_still_excluded(self): + """Directories exactly named 'build', 'dist', etc. must still be excluded.""" + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir).resolve() + for name in ["build", "dist", "node_modules", "__pycache__"]: + d = base / name + d.mkdir(parents=True, exist_ok=True) + (d / "file.py").touch() + (base / "src").mkdir(exist_ok=True) + (base / "src" / "app.py").touch() + + optimizer = ContextOptimizer(base_dir=str(base)) + optimizer._analyze_project_structure() + + cached = set(optimizer._directory_cache.keys()) + assert base / "src" in cached + for name in ["build", "dist", "node_modules", "__pycache__"]: + assert base / name not in cached, f"'{name}' should be excluded" + + def test_nested_exclusion_name_still_excluded(self): + """A directory named 'build' nested under a non-excluded parent must still be excluded.""" + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir).resolve() + (base / "src" / "build").mkdir(parents=True) + (base / "src" / "build" / "output.js").touch() + (base / "src" / "app.py").touch() + + optimizer = ContextOptimizer(base_dir=str(base)) + optimizer._analyze_project_structure() + + cached = set(optimizer._directory_cache.keys()) + assert base / "src" in cached + assert base / "src" / "build" not in cached + + class TestExpandGlobPattern: """Test _expand_glob_pattern brace expansion.""" From 004926370840afeae53c66d16ddec223a8b5e1e1 Mon Sep 17 00:00:00 2001 From: sergio-sisternes-epam Date: Wed, 4 Mar 2026 17:31:21 +0000 Subject: [PATCH 2/2] refactor: address PR #159 review feedback - Reuse already-computed relative_path instead of calling relative_to() a second time, avoiding unguarded ValueError (review comment 1) - Extract DEFAULT_EXCLUDED_DIRNAMES frozenset constant shared by _analyze_project_structure and _should_exclude_subdir (review comment 2) - Update docs/cli-reference.md and docs/compilation.md to clarify that exclusions match on exact path components (review comment 3) --- docs/cli-reference.md | 2 +- docs/compilation.md | 2 +- src/apm_cli/compilation/context_optimizer.py | 14 +++++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 6daacf95..66447e8c 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -724,7 +724,7 @@ Use the `exclude` field to skip directories during compilation. This improves pe - `coverage/**` - Matches "coverage" and all subdirectories - `projects/**/apm/**` - Complex nested matching with `**` -**Default exclusions** (always applied): +**Default exclusions** (always applied, matched on exact path components): - `node_modules`, `__pycache__`, `.git`, `dist`, `build` - Hidden directories (starting with `.`) diff --git a/docs/compilation.md b/docs/compilation.md index 5f7fa76b..31dc92de 100644 --- a/docs/compilation.md +++ b/docs/compilation.md @@ -290,7 +290,7 @@ Use the `exclude` field to skip directories during compilation, improving perfor - Prevent duplicate instruction discovery **Default Exclusions:** -APM always excludes these directories (no configuration needed): +APM always excludes directories whose path contains an exact component matching one of these names (no configuration needed). A directory named `rebuild/` is **not** excluded just because it contains `build` as a substring. - `node_modules` - `__pycache__` - `.git` diff --git a/src/apm_cli/compilation/context_optimizer.py b/src/apm_cli/compilation/context_optimizer.py index c7b6b381..e93ca511 100644 --- a/src/apm_cli/compilation/context_optimizer.py +++ b/src/apm_cli/compilation/context_optimizer.py @@ -29,6 +29,12 @@ list = builtins.list dict = builtins.dict +# Default directory names excluded from compilation scanning. +# Shared across _analyze_project_structure and _should_exclude_subdir. +DEFAULT_EXCLUDED_DIRNAMES = frozenset({ + 'node_modules', '__pycache__', '.git', 'dist', 'build', +}) + @dataclass class DirectoryAnalysis: @@ -422,10 +428,8 @@ def _analyze_project_structure(self) -> None: if any(part.startswith('.') for part in current_path.parts[len(self.base_dir.parts):]): continue - # Default hardcoded exclusions for backwards compatibility - # Use path-component matching to avoid false positives (e.g. "rebuild" matching "build") - relative_parts = current_path.relative_to(self.base_dir).parts - if any(part in ('node_modules', '__pycache__', '.git', 'dist', 'build') for part in relative_parts): + # Default hardcoded exclusions — match on exact path components + if any(part in DEFAULT_EXCLUDED_DIRNAMES for part in relative_path.parts): continue # Apply configurable exclusion patterns @@ -477,7 +481,7 @@ def _should_exclude_subdir(self, path: Path) -> bool: # Also check if subdirectory is a default exclusion dir_name = path.name - if dir_name in ['node_modules', '__pycache__', '.git', 'dist', 'build']: + if dir_name in DEFAULT_EXCLUDED_DIRNAMES: return True # Skip hidden directories