Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/cli-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ Use the `exclude` field to skip directories during compilation. This improves pe
- `projects/**/apm/**` - Complex nested matching with `**`

**Default exclusions** (always applied):
- `node_modules`, `__pycache__`, `.git`, `dist`, `build`
- `node_modules`, `__pycache__`, `.git`, `dist`, `build`, `apm_modules`
- Hidden directories (starting with `.`)

Command-line options always override `apm.yml` settings. Priority order:
Expand Down
1 change: 1 addition & 0 deletions docs/compilation.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ APM always excludes these directories (no configuration needed):
- `.git`
- `dist`
- `build`
- `apm_modules`
- Hidden directories (starting with `.`)

## Advanced Optimization Features
Expand Down
22 changes: 15 additions & 7 deletions src/apm_cli/compilation/context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
list = builtins.list
dict = builtins.dict

# Default directory names excluded from compilation scanning.
# Shared across _analyze_project_structure, _should_exclude_subdir, and _get_all_files.
DEFAULT_EXCLUDED_DIRNAMES = frozenset({
'node_modules', '__pycache__', '.git', 'dist', 'build', 'apm_modules',
})


@dataclass
class DirectoryAnalysis:
Expand Down Expand Up @@ -113,6 +119,7 @@ def __init__(self, base_dir: str = ".", exclude_patterns: Optional[List[str]] =

# Performance optimization caches
self._glob_cache: Dict[str, List[str]] = {}
self._glob_set_cache: Dict[str, Set[Path]] = {}
self._file_list_cache: Optional[List[Path]] = None
self._timing_enabled = False
self._phase_timings: Dict[str, float] = {}
Expand Down Expand Up @@ -162,8 +169,8 @@ def _get_all_files(self) -> List[Path]:
if self._file_list_cache is None:
self._file_list_cache = []
for root, dirs, files in os.walk(self.base_dir):
# Skip hidden directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.')]
# Skip hidden and excluded directories for performance
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in DEFAULT_EXCLUDED_DIRNAMES]
for file in files:
if not file.startswith('.'):
self._file_list_cache.append(Path(root) / file)
Expand Down Expand Up @@ -423,7 +430,7 @@ def _analyze_project_structure(self) -> None:
continue

# Default hardcoded exclusions for backwards compatibility
if any(ignore in str(current_path) for ignore in ['node_modules', '__pycache__', '.git', 'dist', 'build']):
if any(ignore in str(current_path) for ignore in DEFAULT_EXCLUDED_DIRNAMES):
continue

# Apply configurable exclusion patterns
Expand Down Expand Up @@ -475,7 +482,7 @@ def _should_exclude_subdir(self, path: Path) -> bool:

# Also check if subdirectory is a default exclusion
dir_name = path.name
if dir_name in ['node_modules', '__pycache__', '.git', 'dist', 'build']:
if dir_name in DEFAULT_EXCLUDED_DIRNAMES:
return True

# Skip hidden directories
Expand Down Expand Up @@ -786,9 +793,10 @@ def _file_matches_pattern(self, file_path: Path, pattern: str) -> bool:

# Use cached glob results instead of repeated glob calls
matches = self._cached_glob(expanded_pattern)
# Convert to Path objects for comparison
match_paths = {Path(match) for match in matches}
if rel_path in match_paths:
# Use cached Set[Path] to avoid recreating on every call
if expanded_pattern not in self._glob_set_cache:
self._glob_set_cache[expanded_pattern] = {Path(match) for match in matches}
if rel_path in self._glob_set_cache[expanded_pattern]:
return True
except (ValueError, OSError):
pass
Expand Down
116 changes: 116 additions & 0 deletions tests/unit/compilation/test_context_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,122 @@ def test_single_item_brace_group(self):
result = optimizer._expand_glob_pattern("**/*.{ts}")
assert result == ["**/*.ts"]

def test_three_brace_groups(self):
"""Test expansion of three brace groups."""
optimizer = ContextOptimizer(base_dir="/tmp")
result = optimizer._expand_glob_pattern("{src,lib}/*.{test,spec}.{ts,js}")
expected = [
"src/*.test.ts", "src/*.test.js",
"src/*.spec.ts", "src/*.spec.js",
"lib/*.test.ts", "lib/*.test.js",
"lib/*.spec.ts", "lib/*.spec.js",
]
assert sorted(result) == sorted(expected)


class TestApmModulesExclusion:
"""Test that apm_modules/ is excluded from project scanning (Fixes #154)."""

@pytest.fixture
def project_with_apm_modules(self):
"""Create a project with a realistic apm_modules/ tree."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)

# Real project directories
for d in ["src", "src/components", "tests", "docs"]:
(base / d).mkdir(parents=True, exist_ok=True)

# Real project files
for f in [
"src/index.ts", "src/components/App.tsx",
"tests/app.test.ts", "docs/README.md",
]:
(base / f).touch()

# Simulate apm_modules with many nested packages
for i in range(50):
pkg_dir = base / "apm_modules" / f"org/package-{i}" / "sub"
pkg_dir.mkdir(parents=True, exist_ok=True)
(pkg_dir / "SKILL.md").touch()
(pkg_dir / "instructions.md").touch()

yield base

def test_apm_modules_excluded_from_directory_cache(self, project_with_apm_modules):
"""apm_modules directories must not appear in _directory_cache."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

cached_paths = set(optimizer._directory_cache.keys())
# Resolve to handle macOS /var -> /private/var symlink
resolved_base = project_with_apm_modules.resolve()

# Real dirs are cached
assert resolved_base / "src" in cached_paths
assert resolved_base / "tests" in cached_paths

# No apm_modules path is cached
apm_paths = [p for p in cached_paths if "apm_modules" in str(p)]
assert apm_paths == [], f"apm_modules leaked into cache: {apm_paths}"

def test_cache_size_unaffected_by_apm_modules(self, project_with_apm_modules):
"""Directory cache size should reflect only project dirs, not apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# 50 packages × sub-directory each = 150+ dirs if not excluded.
# Real project has at most ~5 dirs (root, src, src/components, tests, docs).
assert len(optimizer._directory_cache) <= 10, (
f"Cache has {len(optimizer._directory_cache)} dirs — "
f"apm_modules likely not excluded"
)

def test_os_walk_prunes_apm_modules(self, project_with_apm_modules):
"""os.walk must not descend into apm_modules subdirectories."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

# _should_exclude_subdir must flag apm_modules
apm_modules_path = project_with_apm_modules / "apm_modules"
assert optimizer._should_exclude_subdir(apm_modules_path) is True

def test_find_matching_dirs_ignores_apm_modules(self, project_with_apm_modules):
"""Pattern matching must not return directories inside apm_modules."""
optimizer = ContextOptimizer(base_dir=str(project_with_apm_modules))
optimizer._analyze_project_structure()

matching = optimizer._find_matching_directories("**/*.md")
apm_matches = [p for p in matching if "apm_modules" in str(p)]
assert apm_matches == [], f"apm_modules dirs matched: {apm_matches}"


class TestGlobCacheReuse:
"""Test that glob results are cached and Set[Path] conversion is not repeated."""

def test_set_path_cached_across_calls(self):
"""_file_matches_pattern must cache the Set[Path] conversion."""
with tempfile.TemporaryDirectory() as temp_dir:
base = Path(temp_dir)
(base / "src").mkdir()
(base / "src" / "a.ts").touch()
(base / "src" / "b.ts").touch()

optimizer = ContextOptimizer(base_dir=str(base))
optimizer._analyze_project_structure()

file_a = base / "src" / "a.ts"
file_b = base / "src" / "b.ts"

# First call populates cache
optimizer._file_matches_pattern(file_a, "**/*.ts")
assert "**/*.ts" in optimizer._glob_set_cache

# Second call should reuse the cached set (not recreate it)
cached_set_id = id(optimizer._glob_set_cache["**/*.ts"])
optimizer._file_matches_pattern(file_b, "**/*.ts")
assert id(optimizer._glob_set_cache["**/*.ts"]) == cached_set_id


if __name__ == "__main__":
pytest.main([__file__])