diff --git a/.gitignore b/.gitignore index fd122d98..74473bd6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ APPROACH.md next-steps.md apm-action-plan.md +tests/benchmarks/BASELINE.md # Python __pycache__/ diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 1c1dbd6d..87b257fa 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -122,6 +122,7 @@ apm install [PACKAGES...] [OPTIONS] - `--update` - Update dependencies to latest Git references - `--force` - Overwrite locally-authored files on collision - `--dry-run` - Show what would be installed without installing +- `--parallel-downloads INT` - Max concurrent package downloads (default: 4, 0 to disable) - `--verbose` - Show detailed installation information - `--trust-transitive-mcp` - Trust self-defined MCP servers from transitive packages (skip re-declaration requirement) diff --git a/docs/dependencies.md b/docs/dependencies.md index fe38dcc3..33cdb4de 100644 --- a/docs/dependencies.md +++ b/docs/dependencies.md @@ -406,6 +406,14 @@ When both packages are installed, your project gains: 4. **Build Dependency Graph**: Resolve transitive dependencies recursively 5. **Check Conflicts**: Identify any circular dependencies or conflicts +#### Resilient Downloads + +APM automatically retries failed HTTP requests with exponential backoff and jitter. Rate-limited responses (HTTP 429/503) are handled transparently, respecting `Retry-After` headers when provided. This ensures reliable installs even under heavy API usage or transient network issues. + +#### Parallel Downloads + +APM downloads packages in parallel using a thread pool, significantly reducing wall-clock time for large dependency trees. The concurrency level defaults to 4 and is configurable via `--parallel-downloads` (set to 0 to disable). For subdirectory packages in monorepos, APM attempts git sparse-checkout (git 2.25+) to download only the needed directory, falling back to a shallow clone if sparse-checkout is unavailable. + ### File Processing and Content Merging APM uses instruction-level merging rather than file-level precedence. When local and dependency files contribute instructions with overlapping `applyTo` patterns: @@ -527,7 +535,7 @@ The `deployed_files` field tracks exactly which files APM placed in your project ### How It Works 1. **First install**: APM resolves dependencies, downloads packages, and writes `apm.lock` -2. **Subsequent installs**: APM reads `apm.lock` and uses locked commits for exact reproducibility +2. **Subsequent installs**: APM reads `apm.lock` and uses locked commits for exact reproducibility. If the local checkout already matches the locked commit SHA, the download is skipped entirely. 3. **Updating**: Use `--update` to re-resolve dependencies and generate a fresh lockfile ### Version Control diff --git a/docs/enhanced-primitive-discovery.md b/docs/enhanced-primitive-discovery.md index cea1f063..692318a7 100644 --- a/docs/enhanced-primitive-discovery.md +++ b/docs/enhanced-primitive-discovery.md @@ -186,6 +186,8 @@ The enhanced discovery system integrates with: - Keep existing primitive (higher priority) - Record conflict with losing source information +Conflict detection uses O(1) name-indexed lookups, so performance remains constant regardless of collection size. + ### Error Handling - Gracefully handles missing `apm_modules/` directory diff --git a/pyproject.toml b/pyproject.toml index 87b8351e..3198f352 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,9 @@ warn_return_any = true warn_unused_configs = true [tool.pytest.ini_options] +addopts = "-m 'not benchmark'" markers = [ "integration: marks tests as integration tests that may require network access", "slow: marks tests as slow running tests", + "benchmark: marks performance benchmark tests (deselected by default, run with -m benchmark)", ] diff --git a/src/apm_cli/cli.py b/src/apm_cli/cli.py index e90847e3..56712e23 100644 --- a/src/apm_cli/cli.py +++ b/src/apm_cli/cli.py @@ -653,8 +653,15 @@ def _validate_package_exists(package): is_flag=True, help="Trust self-defined MCP servers from transitive packages (skip re-declaration requirement)", ) +@click.option( + "--parallel-downloads", + type=int, + default=4, + show_default=True, + help="Max concurrent package downloads (0 to disable parallelism)", +) @click.pass_context -def install(ctx, packages, runtime, exclude, only, update, dry_run, force, verbose, trust_transitive_mcp): +def install(ctx, packages, runtime, exclude, only, update, dry_run, force, verbose, trust_transitive_mcp, parallel_downloads): """Install APM and MCP dependencies from apm.yml (like npm install). This command automatically detects AI runtimes from your apm.yml scripts and installs @@ -753,7 +760,8 @@ def install(ctx, packages, runtime, exclude, only, update, dry_run, force, verbo # Otherwise install all from apm.yml only_pkgs = builtins.list(packages) if packages else None apm_count, prompt_count, agent_count = _install_apm_dependencies( - apm_package, update, verbose, only_pkgs, force=force + apm_package, update, verbose, only_pkgs, force=force, + parallel_downloads=parallel_downloads, ) except Exception as e: _rich_error(f"Failed to install APM dependencies: {e}") @@ -1554,6 +1562,7 @@ def _install_apm_dependencies( verbose: bool = False, only_packages: "builtins.list" = None, force: bool = False, + parallel_downloads: int = 4, ): """Install APM package dependencies. @@ -1563,6 +1572,7 @@ def _install_apm_dependencies( verbose: Show detailed installation information only_packages: If provided, only install these specific packages (not all from apm.yml) force: Whether to overwrite locally-authored files on collision + parallel_downloads: Max concurrent downloads (0 disables parallelism) """ if not APM_DEPS_AVAILABLE: raise RuntimeError("APM dependency system not available") @@ -1765,7 +1775,75 @@ def download_callback(dep_ref, modules_dir): # downloader already created above for transitive resolution installed_count = 0 - # Create progress display for downloads + # Phase 4 (#171): Parallel package downloads using ThreadPoolExecutor + # Pre-download all non-cached packages in parallel for wall-clock speedup. + # Results are stored and consumed by the sequential integration loop below. + from concurrent.futures import ThreadPoolExecutor, as_completed as _futures_completed + + _pre_download_results = {} # dep_key -> PackageInfo + _need_download = [] + for _pd_ref in deps_to_install: + _pd_key = _pd_ref.get_unique_key() + _pd_path = (apm_modules_dir / _pd_ref.alias) if _pd_ref.alias else _pd_ref.get_install_path(apm_modules_dir) + # Skip if already downloaded during BFS resolution + if _pd_key in callback_downloaded: + continue + # Skip if lockfile SHA matches local HEAD (Phase 5 check) + if _pd_path.exists() and existing_lockfile and not update_refs: + _pd_locked = existing_lockfile.get_dependency(_pd_key) + if _pd_locked and _pd_locked.resolved_commit and _pd_locked.resolved_commit != "cached": + try: + from git import Repo as _PDGitRepo + if _PDGitRepo(_pd_path).head.commit.hexsha == _pd_locked.resolved_commit: + continue + except Exception: + pass + # Build download ref (use locked commit for reproducibility) + _pd_dlref = str(_pd_ref) + if existing_lockfile: + _pd_locked = existing_lockfile.get_dependency(_pd_key) + if _pd_locked and _pd_locked.resolved_commit and _pd_locked.resolved_commit != "cached": + _pd_base = _pd_ref.repo_url + if _pd_ref.virtual_path: + _pd_base = f"{_pd_base}/{_pd_ref.virtual_path}" + _pd_dlref = f"{_pd_base}#{_pd_locked.resolved_commit}" + _need_download.append((_pd_ref, _pd_path, _pd_dlref)) + + if _need_download and parallel_downloads > 0: + with Progress( + SpinnerColumn(), + TextColumn("[cyan]{task.description}[/cyan]"), + BarColumn(), + TaskProgressColumn(), + transient=True, + ) as _dl_progress: + _max_workers = min(parallel_downloads, len(_need_download)) + with ThreadPoolExecutor(max_workers=_max_workers) as _executor: + _futures = {} + for _pd_ref, _pd_path, _pd_dlref in _need_download: + _pd_disp = str(_pd_ref) if _pd_ref.is_virtual else _pd_ref.repo_url + _pd_short = _pd_disp.split("/")[-1] if "/" in _pd_disp else _pd_disp + _pd_tid = _dl_progress.add_task(description=f"Fetching {_pd_short}", total=None) + _pd_fut = _executor.submit( + downloader.download_package, _pd_dlref, _pd_path, + progress_task_id=_pd_tid, progress_obj=_dl_progress, + ) + _futures[_pd_fut] = (_pd_ref, _pd_tid, _pd_disp) + for _pd_fut in _futures_completed(_futures): + _pd_ref, _pd_tid, _pd_disp = _futures[_pd_fut] + _pd_key = _pd_ref.get_unique_key() + try: + _pd_info = _pd_fut.result() + _pre_download_results[_pd_key] = _pd_info + _dl_progress.update(_pd_tid, visible=False) + _dl_progress.refresh() + except Exception: + _dl_progress.remove_task(_pd_tid) + # Silent: sequential loop below will retry and report errors + + _pre_downloaded_keys = set(_pre_download_results.keys()) + + # Create progress display for sequential integration with Progress( SpinnerColumn(), TextColumn("[cyan]{task.description}[/cyan]"), @@ -1791,7 +1869,7 @@ def download_callback(dep_ref, modules_dir): from apm_cli.models.apm_package import GitReferenceType resolved_ref = None - if dep_ref.reference: + if dep_ref.reference and dep_ref.get_unique_key() not in _pre_downloaded_keys: try: resolved_ref = downloader.resolve_git_reference( f"{dep_ref.repo_url}@{dep_ref.reference}" @@ -1806,8 +1884,20 @@ def download_callback(dep_ref, modules_dir): ] # Skip download if: already fetched by resolver callback, or cached tag/commit already_resolved = dep_ref.get_unique_key() in callback_downloaded + # Phase 5 (#171): Also skip when lockfile SHA matches local HEAD + lockfile_match = False + if install_path.exists() and existing_lockfile and not update_refs: + locked_dep = existing_lockfile.get_dependency(dep_ref.get_unique_key()) + if locked_dep and locked_dep.resolved_commit and locked_dep.resolved_commit != "cached": + try: + from git import Repo as GitRepo + local_repo = GitRepo(install_path) + if local_repo.head.commit.hexsha == locked_dep.resolved_commit: + lockfile_match = True + except Exception: + pass # Not a git repo or invalid — fall through to download skip_download = install_path.exists() and ( - (is_cacheable and not update_refs) or already_resolved + (is_cacheable and not update_refs) or already_resolved or lockfile_match ) if skip_download: @@ -2081,13 +2171,18 @@ def download_callback(dep_ref, modules_dir): base_ref = f"{base_ref}/{dep_ref.virtual_path}" download_ref = f"{base_ref}#{locked_dep.resolved_commit}" - # Download with live progress bar - package_info = downloader.download_package( - download_ref, - install_path, - progress_task_id=task_id, - progress_obj=progress, - ) + # Phase 4 (#171): Use pre-downloaded result if available + _dep_key = dep_ref.get_unique_key() + if _dep_key in _pre_download_results: + package_info = _pre_download_results[_dep_key] + else: + # Fallback: sequential download (should rarely happen) + package_info = downloader.download_package( + download_ref, + install_path, + progress_task_id=task_id, + progress_obj=progress, + ) # CRITICAL: Hide progress BEFORE printing success message to avoid overlap progress.update(task_id, visible=False) diff --git a/src/apm_cli/compilation/constitution.py b/src/apm_cli/compilation/constitution.py index 1a9e4b27..cdbaa351 100644 --- a/src/apm_cli/compilation/constitution.py +++ b/src/apm_cli/compilation/constitution.py @@ -2,10 +2,18 @@ from __future__ import annotations from pathlib import Path -from typing import Optional +from typing import Dict, Optional from .constants import CONSTITUTION_RELATIVE_PATH +# Module-level cache: resolved base_dir -> constitution content (#171) +_constitution_cache: Dict[Path, Optional[str]] = {} + + +def clear_constitution_cache() -> None: + """Clear the constitution read cache. Call in tests for isolation.""" + _constitution_cache.clear() + def find_constitution(base_dir: Path) -> Path: """Return path to constitution.md if present, else Path that does not exist. @@ -19,15 +27,24 @@ def find_constitution(base_dir: Path) -> Path: def read_constitution(base_dir: Path) -> Optional[str]: """Read full constitution content if file exists. + Results are cached by resolved base_dir for the lifetime of the process. + Args: base_dir: Repository root path. Returns: Full file text or None if absent. """ + resolved = base_dir.resolve() + if resolved in _constitution_cache: + return _constitution_cache[resolved] path = find_constitution(base_dir) if not path.exists() or not path.is_file(): + _constitution_cache[resolved] = None return None try: - return path.read_text(encoding="utf-8") + content = path.read_text(encoding="utf-8") + _constitution_cache[resolved] = content + return content except OSError: + _constitution_cache[resolved] = None return None diff --git a/src/apm_cli/compilation/context_optimizer.py b/src/apm_cli/compilation/context_optimizer.py index 0910cc25..33601ace 100644 --- a/src/apm_cli/compilation/context_optimizer.py +++ b/src/apm_cli/compilation/context_optimizer.py @@ -121,6 +121,7 @@ def __init__(self, base_dir: str = ".", exclude_patterns: Optional[List[str]] = self._glob_cache: Dict[str, List[str]] = {} self._glob_set_cache: Dict[str, Set[Path]] = {} self._file_list_cache: Optional[List[Path]] = None + self._inheritance_cache: Dict[Path, List[Path]] = {} # (#171) self._timing_enabled = False self._phase_timings: Dict[str, float] = {} @@ -1234,6 +1235,10 @@ def _get_inheritance_chain(self, working_directory: Path) -> List[Path]: Returns: List[Path]: Inheritance chain (most specific to root). """ + cached = self._inheritance_cache.get(working_directory) + if cached is not None: + return cached + chain = [] # Resolve the starting directory to ensure consistent path comparison try: @@ -1261,6 +1266,7 @@ def _get_inheritance_chain(self, working_directory: Path) -> List[Path]: except (OSError, ValueError): break + self._inheritance_cache[working_directory] = chain return chain def _is_child_directory(self, child: Path, parent: Path) -> bool: diff --git a/src/apm_cli/config.py b/src/apm_cli/config.py index d957571f..1415c4de 100644 --- a/src/apm_cli/config.py +++ b/src/apm_cli/config.py @@ -2,11 +2,14 @@ import os import json +from typing import Optional CONFIG_DIR = os.path.expanduser("~/.apm") CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json") +_config_cache: Optional[dict] = None + def ensure_config_exists(): """Ensure the configuration directory and file exist.""" @@ -21,12 +24,24 @@ def ensure_config_exists(): def get_config(): """Get the current configuration. + Results are cached for the lifetime of the process. + Returns: dict: Current configuration. """ + global _config_cache + if _config_cache is not None: + return _config_cache ensure_config_exists() with open(CONFIG_FILE, "r") as f: - return json.load(f) + _config_cache = json.load(f) + return _config_cache + + +def _invalidate_config_cache(): + """Invalidate the config cache (called after writes).""" + global _config_cache + _config_cache = None def update_config(updates): @@ -35,11 +50,13 @@ def update_config(updates): Args: updates (dict): Dictionary of configuration values to update. """ + _invalidate_config_cache() config = get_config() config.update(updates) with open(CONFIG_FILE, "w") as f: json.dump(config, f, indent=2) + _invalidate_config_cache() def get_default_client(): diff --git a/src/apm_cli/deps/apm_resolver.py b/src/apm_cli/deps/apm_resolver.py index 7f943ab5..3172042d 100644 --- a/src/apm_cli/deps/apm_resolver.py +++ b/src/apm_cli/deps/apm_resolver.py @@ -225,6 +225,7 @@ def detect_circular_dependencies(self, tree: DependencyTree) -> List[CircularRef circular_deps = [] visited: Set[str] = set() current_path: List[str] = [] + current_path_set: Set[str] = set() # O(1) membership test (#171) def dfs_detect_cycles(node: DependencyNode) -> None: """Recursive DFS function to detect cycles.""" @@ -234,7 +235,7 @@ def dfs_detect_cycles(node: DependencyNode) -> None: unique_key = node.dependency_ref.get_unique_key() # Check if this unique key is already in our current path (cycle detected) - if unique_key in current_path: + if unique_key in current_path_set: # Found a cycle - create the cycle path cycle_start_index = current_path.index(unique_key) cycle_path = current_path[cycle_start_index:] + [unique_key] @@ -249,23 +250,25 @@ def dfs_detect_cycles(node: DependencyNode) -> None: # Mark current node as visited and add unique key to path visited.add(node_id) current_path.append(unique_key) + current_path_set.add(unique_key) # Check all children for child in node.children: child_id = child.get_id() # Only recurse if we haven't processed this subtree completely - if child_id not in visited or child.dependency_ref.get_unique_key() in current_path: + if child_id not in visited or child.dependency_ref.get_unique_key() in current_path_set: dfs_detect_cycles(child) # Remove from path when backtracking (but keep in visited) - current_path.pop() + current_path_set.discard(current_path.pop()) # Start DFS from all root level dependencies (depth 1) root_deps = tree.get_nodes_at_depth(1) for root_dep in root_deps: if root_dep.get_id() not in visited: current_path = [] # Reset path for each root + current_path_set = set() dfs_detect_cycles(root_dep) return circular_deps diff --git a/src/apm_cli/deps/dependency_graph.py b/src/apm_cli/deps/dependency_graph.py index e94fa255..42ddb43d 100644 --- a/src/apm_cli/deps/dependency_graph.py +++ b/src/apm_cli/deps/dependency_graph.py @@ -1,5 +1,6 @@ """Data structures for dependency graph representation and resolution.""" +from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Set, Tuple, Any @@ -55,11 +56,16 @@ class DependencyTree: """Hierarchical representation of dependencies before flattening.""" root_package: APMPackage nodes: Dict[str, DependencyNode] = field(default_factory=dict) + _nodes_by_depth: Dict[int, List[DependencyNode]] = field(default_factory=lambda: defaultdict(list)) max_depth: int = 0 def add_node(self, node: DependencyNode) -> None: """Add a node to the tree.""" - self.nodes[node.get_id()] = node + node_id = node.get_id() + is_new = node_id not in self.nodes + self.nodes[node_id] = node + if is_new: + self._nodes_by_depth[node.depth].append(node) self.max_depth = max(self.max_depth, node.depth) def get_node(self, unique_key: str) -> Optional[DependencyNode]: @@ -68,7 +74,7 @@ def get_node(self, unique_key: str) -> Optional[DependencyNode]: def get_nodes_at_depth(self, depth: int) -> List[DependencyNode]: """Get all nodes at a specific depth level.""" - return [node for node in self.nodes.values() if node.depth == depth] + return list(self._nodes_by_depth.get(depth, [])) def has_dependency(self, repo_url: str) -> bool: """Check if a dependency exists in the tree.""" diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index abb08abe..13189a34 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -3,9 +3,11 @@ import os import shutil import sys +import time from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, Callable +import random import re import requests @@ -159,6 +161,65 @@ def _setup_git_environment(self) -> Dict[str, Any]: return env + def _resilient_get(self, url: str, headers: Dict[str, str], timeout: int = 30, max_retries: int = 3) -> requests.Response: + """HTTP GET with retry on 429/503 and rate-limit header awareness (#171). + + Args: + url: Request URL + headers: HTTP headers + timeout: Request timeout in seconds + max_retries: Maximum retry attempts for transient failures + + Returns: + requests.Response (caller should call .raise_for_status() as needed) + + Raises: + requests.exceptions.RequestException: After all retries exhausted + """ + last_exc = None + for attempt in range(max_retries): + try: + response = requests.get(url, headers=headers, timeout=timeout) + + # Handle rate limiting + if response.status_code in (429, 503): + retry_after = response.headers.get("Retry-After") + if retry_after: + try: + wait = min(float(retry_after), 60) + except (TypeError, ValueError): + # Retry-After may be an HTTP-date; fall back to exponential backoff + wait = min(2 ** attempt, 30) * (0.5 + random.random()) + else: + wait = min(2 ** attempt, 30) * (0.5 + random.random()) + _debug(f"Rate limited ({response.status_code}), retry in {wait:.1f}s (attempt {attempt + 1}/{max_retries})") + time.sleep(wait) + continue + + # Log rate limit proximity + remaining = response.headers.get("X-RateLimit-Remaining") + try: + if remaining and int(remaining) < 10: + _debug(f"GitHub API rate limit low: {remaining} requests remaining") + except (TypeError, ValueError): + pass + + return response + except requests.exceptions.ConnectionError as e: + last_exc = e + if attempt < max_retries - 1: + wait = min(2 ** attempt, 30) * (0.5 + random.random()) + _debug(f"Connection error, retry in {wait:.1f}s (attempt {attempt + 1}/{max_retries})") + time.sleep(wait) + except requests.exceptions.Timeout as e: + last_exc = e + if attempt < max_retries - 1: + _debug(f"Timeout, retrying (attempt {attempt + 1}/{max_retries})") + + if last_exc: + raise last_exc + raise requests.exceptions.RequestException(f"All {max_retries} attempts failed for {url}") + def _sanitize_git_error(self, error_message: str) -> str: """Sanitize Git error messages to remove potentially sensitive authentication information. @@ -523,7 +584,7 @@ def _download_ado_file(self, dep_ref: DependencyReference, file_path: str, ref: headers['Authorization'] = f'Basic {auth}' try: - response = requests.get(api_url, headers=headers, timeout=30) + response = self._resilient_get(api_url, headers=headers, timeout=30) response.raise_for_status() return response.content except requests.exceptions.HTTPError as e: @@ -543,7 +604,7 @@ def _download_ado_file(self, dep_ref: DependencyReference, file_path: str, ref: ) try: - response = requests.get(fallback_url, headers=headers, timeout=30) + response = self._resilient_get(fallback_url, headers=headers, timeout=30) response.raise_for_status() return response.content except requests.exceptions.HTTPError: @@ -599,7 +660,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re # Try to download with the specified ref try: - response = requests.get(api_url, headers=headers, timeout=30) + response = self._resilient_get(api_url, headers=headers, timeout=30) response.raise_for_status() return response.content except requests.exceptions.HTTPError as e: @@ -621,7 +682,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re fallback_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}" try: - response = requests.get(fallback_url, headers=headers, timeout=30) + response = self._resilient_get(fallback_url, headers=headers, timeout=30) response.raise_for_status() return response.content except requests.exceptions.HTTPError: @@ -637,7 +698,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re if self.github_token and not host.lower().endswith(".ghe.com"): try: unauth_headers = {'Accept': 'application/vnd.github.v3.raw'} - response = requests.get(api_url, headers=unauth_headers, timeout=30) + response = self._resilient_get(api_url, headers=unauth_headers, timeout=30) response.raise_for_status() return response.content except requests.exceptions.HTTPError: @@ -994,6 +1055,44 @@ def download_collection_package(self, dep_ref: DependencyReference, target_path: dependency_ref=dep_ref # Store for canonical dependency string ) + def _try_sparse_checkout(self, dep_ref: DependencyReference, temp_clone_path: Path, subdir_path: str, ref: str = None) -> bool: + """Attempt sparse-checkout to download only a subdirectory (git 2.25+). + + Returns True on success. Falls back silently on failure. + """ + import subprocess + try: + temp_clone_path.mkdir(parents=True, exist_ok=True) + env = {**os.environ, **(self.git_env or {})} + auth_url = self._build_repo_url(dep_ref.repo_url, use_ssh=False, dep_ref=dep_ref) + + cmds = [ + ['git', 'init'], + ['git', 'remote', 'add', 'origin', auth_url], + ['git', 'sparse-checkout', 'init', '--cone'], + ['git', 'sparse-checkout', 'set', subdir_path], + ] + fetch_cmd = ['git', 'fetch', 'origin'] + if ref: + fetch_cmd.append(ref) + fetch_cmd.append('--depth=1') + cmds.append(fetch_cmd) + cmds.append(['git', 'checkout', 'FETCH_HEAD']) + + for cmd in cmds: + result = subprocess.run( + cmd, cwd=str(temp_clone_path), env=env, + capture_output=True, text=True, timeout=120, + ) + if result.returncode != 0: + _debug(f"Sparse-checkout step failed ({' '.join(cmd)}): {result.stderr.strip()}") + return False + + return True + except Exception as e: + _debug(f"Sparse-checkout failed: {e}") + return False + def download_subdirectory_package(self, dep_ref: DependencyReference, target_path: Path, progress_task_id=None, progress_obj=None) -> PackageInfo: """Download a subdirectory from a repo as an APM package. @@ -1036,31 +1135,37 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat if progress_obj and progress_task_id is not None: progress_obj.update(progress_task_id, completed=20, total=100) - # Clone the repository (shallow clone for performance) - # Don't specify branch if none provided - let git use repo's default - package_display_name = subdir_path.split('/')[-1] - progress_reporter = GitProgressReporter(progress_task_id, progress_obj, package_display_name) if progress_task_id and progress_obj else None + # Phase 4 (#171): Try sparse-checkout first (git 2.25+), fall back to full clone + sparse_ok = self._try_sparse_checkout(dep_ref, temp_clone_path, subdir_path, ref) - clone_kwargs = { - 'dep_ref': dep_ref, - 'depth': 1, - } - if ref: - clone_kwargs['branch'] = ref - - try: - self._clone_with_fallback( - dep_ref.repo_url, - temp_clone_path, - progress_reporter=progress_reporter, - **clone_kwargs - ) - except Exception as e: - raise RuntimeError(f"Failed to clone repository: {e}") - - # Disable progress reporter after clone - if progress_reporter: - progress_reporter.disabled = True + if not sparse_ok: + # Full shallow clone fallback + if temp_clone_path.exists(): + shutil.rmtree(temp_clone_path) + + package_display_name = subdir_path.split('/')[-1] + progress_reporter = GitProgressReporter(progress_task_id, progress_obj, package_display_name) if progress_task_id and progress_obj else None + + clone_kwargs = { + 'dep_ref': dep_ref, + 'depth': 1, + } + if ref: + clone_kwargs['branch'] = ref + + try: + self._clone_with_fallback( + dep_ref.repo_url, + temp_clone_path, + progress_reporter=progress_reporter, + **clone_kwargs + ) + except Exception as e: + raise RuntimeError(f"Failed to clone repository: {e}") + + # Disable progress reporter after clone + if progress_reporter: + progress_reporter.disabled = True # Update progress - extracting subdirectory if progress_obj and progress_task_id is not None: diff --git a/src/apm_cli/models/apm_package.py b/src/apm_cli/models/apm_package.py index 3397f0de..4b27945c 100644 --- a/src/apm_cli/models/apm_package.py +++ b/src/apm_cli/models/apm_package.py @@ -9,6 +9,14 @@ from pathlib import Path from typing import Optional, List, Dict, Any, Union +# Module-level parse cache: resolved path -> APMPackage (#171) +_apm_yml_cache: Dict[Path, "APMPackage"] = {} + + +def clear_apm_yml_cache() -> None: + """Clear the from_apm_yml parse cache. Call in tests for isolation.""" + _apm_yml_cache.clear() + class GitReferenceType(Enum): """Types of Git references supported.""" @@ -1084,6 +1092,8 @@ class APMPackage: def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": """Load APM package from apm.yml file. + Results are cached by resolved path for the lifetime of the process. + Args: apm_yml_path: Path to the apm.yml file @@ -1097,6 +1107,11 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": if not apm_yml_path.exists(): raise FileNotFoundError(f"apm.yml not found: {apm_yml_path}") + resolved = apm_yml_path.resolve() + cached = _apm_yml_cache.get(resolved) + if cached is not None: + return cached + try: with open(apm_yml_path, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) @@ -1159,7 +1174,7 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": except ValueError as e: raise ValueError(f"Invalid 'type' field in apm.yml: {e}") - return cls( + result = cls( name=data['name'], version=data['version'], description=data.get('description'), @@ -1171,6 +1186,8 @@ def from_apm_yml(cls, apm_yml_path: Path) -> "APMPackage": target=data.get('target'), type=pkg_type, ) + _apm_yml_cache[resolved] = result + return result def get_apm_dependencies(self) -> List[DependencyReference]: """Get list of APM dependencies.""" diff --git a/src/apm_cli/primitives/models.py b/src/apm_cli/primitives/models.py index a84ce3be..1c7b090d 100644 --- a/src/apm_cli/primitives/models.py +++ b/src/apm_cli/primitives/models.py @@ -148,7 +148,23 @@ def __init__(self): self.contexts = [] self.skills = [] self.conflicts = [] + # Name→index maps for O(1) conflict lookups (see #171) + self._chatmode_index: Dict[str, int] = {} + self._instruction_index: Dict[str, int] = {} + self._context_index: Dict[str, int] = {} + self._skill_index: Dict[str, int] = {} + def _index_for(self, primitive_type: str) -> Dict[str, int]: + """Return the name→index map for the given primitive type.""" + if primitive_type == "chatmode": + return self._chatmode_index + elif primitive_type == "instruction": + return self._instruction_index + elif primitive_type == "context": + return self._context_index + else: + return self._skill_index + def add_primitive(self, primitive: Primitive) -> None: """Add a primitive to the appropriate collection. @@ -169,15 +185,12 @@ def add_primitive(self, primitive: Primitive) -> None: def _add_with_conflict_detection(self, new_primitive: Primitive, collection: List[Primitive], primitive_type: str) -> None: """Add primitive with conflict detection.""" - # Find existing primitive with same name - existing_index = None - for i, existing in enumerate(collection): - if existing.name == new_primitive.name: - existing_index = i - break + name_index = self._index_for(primitive_type) + existing_index = name_index.get(new_primitive.name) if existing_index is None: # No conflict, just add the primitive + name_index[new_primitive.name] = len(collection) collection.append(new_primitive) else: # Conflict detected - apply priority rules diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/benchmarks/run_baseline.py b/tests/benchmarks/run_baseline.py new file mode 100644 index 00000000..76ffdc86 --- /dev/null +++ b/tests/benchmarks/run_baseline.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +"""Standalone baseline benchmark — works against both original and optimized code. + +Measures raw performance of the bottleneck paths identified in #171. +No dependency on cache-clearing functions (those only exist post-optimization). + +Usage: uv run python tests/benchmarks/run_baseline.py +""" + +import importlib +import sys +import time +import statistics +from pathlib import Path + +# Ensure the project is importable +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "src")) + +from apm_cli.primitives.models import PrimitiveCollection, Instruction +from apm_cli.deps.dependency_graph import DependencyTree, DependencyNode +from apm_cli.deps.apm_resolver import APMDependencyResolver +from apm_cli.models.apm_package import APMPackage, DependencyReference + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_instruction(name: str, source: str = "local") -> Instruction: + return Instruction( + name=name, + file_path=Path(f"/fake/{name}.instructions.md"), + description=f"Test instruction {name}", + apply_to="**", + content=f"Content for {name}", + source=source, + ) + + +def _make_node(owner: str, repo: str, depth: int) -> DependencyNode: + dep_ref = DependencyReference.parse(f"{owner}/{repo}#main") + pkg = APMPackage(name=repo, version="1.0.0", source=f"{owner}/{repo}") + return DependencyNode(package=pkg, dependency_ref=dep_ref, depth=depth) + + +def _build_tree(n: int, max_depth: int) -> DependencyTree: + root = APMPackage(name="root", version="1.0.0") + tree = DependencyTree(root_package=root) + for i in range(n): + depth = (i % max_depth) + 1 + node = _make_node("owner", f"pkg-{i}", depth) + tree.add_node(node) + return tree + + +def bench(fn, *, runs: int = 5, label: str = ""): + """Run fn `runs` times, return (median_ms, min_ms, max_ms).""" + times = [] + for _ in range(runs): + start = time.perf_counter() + fn() + times.append((time.perf_counter() - start) * 1000) + med = statistics.median(times) + lo = min(times) + hi = max(times) + return med, lo, hi + + +# --------------------------------------------------------------------------- +# Benchmarks +# --------------------------------------------------------------------------- + +def bench_primitive_conflict_detection(count: int): + """Add `count` unique primitives — measures conflict-check cost.""" + def run(): + coll = PrimitiveCollection() + for i in range(count): + coll.add_primitive(_make_instruction(f"instr-{i}")) + assert coll.count() == count + return bench(run, label=f"primitive_add_{count}") + + +def bench_primitive_conflict_50pct(count: int): + """Add `count` primitives with 50% name collisions.""" + half = count // 2 + def run(): + coll = PrimitiveCollection() + for i in range(count): + coll.add_primitive(_make_instruction(f"instr-{i % half}", source="dep:pkg")) + assert coll.count() == half + return bench(run, label=f"primitive_conflict_50pct_{count}") + + +def bench_depth_lookup(n_packages: int, max_depth: int): + """Build tree then query every depth level.""" + tree = _build_tree(n_packages, max_depth) + def run(): + total = 0 + for d in range(1, max_depth + 1): + total += len(tree.get_nodes_at_depth(d)) + assert total == n_packages + return bench(run, runs=20, label=f"depth_lookup_{n_packages}x{max_depth}") + + +def bench_cycle_detection_chain(length: int): + """Detect cycles in a linear chain of `length` nodes.""" + root = APMPackage(name="root", version="1.0.0") + tree = DependencyTree(root_package=root) + prev = None + for i in range(length): + node = _make_node("owner", f"chain-{i}", depth=1) + tree.add_node(node) + if prev: + prev.children.append(node) + prev = node + resolver = APMDependencyResolver() + def run(): + cycles = resolver.detect_circular_dependencies(tree) + assert len(cycles) == 0 + return bench(run, label=f"cycle_detect_chain_{length}") + + +def bench_flatten(n_packages: int, max_depth: int): + """Flatten a tree of n packages across max_depth levels.""" + tree = _build_tree(n_packages, max_depth) + resolver = APMDependencyResolver() + def run(): + flat = resolver.flatten_dependencies(tree) + assert flat.total_dependencies() == n_packages + return bench(run, label=f"flatten_{n_packages}x{max_depth}") + + +def bench_from_apm_yml_repeated(tmp_dir: Path, repeats: int): + """Parse the same apm.yml file `repeats` times.""" + apm_yml = tmp_dir / "apm.yml" + apm_yml.write_text("name: bench-pkg\nversion: 1.0.0\n") + def run(): + for _ in range(repeats): + # Reload module-level cache state differs between baseline/optimized, + # but we just measure wall-clock for `repeats` calls. + APMPackage.from_apm_yml(apm_yml) + return bench(run, runs=3, label=f"from_apm_yml_x{repeats}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + import tempfile + + results = {} + + print("=" * 70) + print("APM Performance Baseline Benchmark") + print("=" * 70) + + # 1. Primitive conflict detection + for count in [100, 500, 1000]: + med, lo, hi = bench_primitive_conflict_detection(count) + key = f"primitive_add_unique_{count}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + for count in [100, 500, 1000]: + med, lo, hi = bench_primitive_conflict_50pct(count) + key = f"primitive_conflict_50pct_{count}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # 2. Depth lookups + for n, d in [(50, 5), (100, 10), (500, 10)]: + med, lo, hi = bench_depth_lookup(n, d) + key = f"depth_lookup_{n}x{d}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # 3. Cycle detection + for length in [20, 50, 100]: + med, lo, hi = bench_cycle_detection_chain(length) + key = f"cycle_detect_chain_{length}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # 4. Flatten + for n, d in [(50, 5), (100, 10), (500, 10)]: + med, lo, hi = bench_flatten(n, d) + key = f"flatten_{n}x{d}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # 5. from_apm_yml repeated parsing + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + for repeats in [10, 50]: + med, lo, hi = bench_from_apm_yml_repeated(tmp_path, repeats) + key = f"from_apm_yml_x{repeats}" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # 6. Phase 4: Parallel execution overhead (ThreadPoolExecutor vs sequential) + from concurrent.futures import ThreadPoolExecutor, as_completed + import time as _time + + def _simulated_work(ms: float): + """Simulate I/O-bound work by sleeping.""" + _time.sleep(ms / 1000) + return True + + # Sequential: 10 tasks × 50ms each = ~500ms + def _seq_10(): + for _ in range(10): + _simulated_work(50) + med, lo, hi = bench(_seq_10, runs=3, label="sequential_10x50ms") + key = "sequential_10x50ms" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + # Parallel: 10 tasks × 50ms, 4 workers → ~150ms (ceil(10/4) × 50ms) + def _par_10(): + with ThreadPoolExecutor(max_workers=4) as executor: + futs = [executor.submit(_simulated_work, 50) for _ in range(10)] + for f in as_completed(futs): + f.result() + med, lo, hi = bench(_par_10, runs=3, label="parallel_4w_10x50ms") + key = "parallel_4w_10x50ms" + results[key] = (med, lo, hi) + print(f" {key:45s} median={med:8.2f}ms min={lo:8.2f}ms max={hi:8.2f}ms") + + print("=" * 70) + print(f"Total benchmarks: {len(results)}") + print("=" * 70) + + return results + + +if __name__ == "__main__": + main() diff --git a/tests/benchmarks/test_perf_benchmarks.py b/tests/benchmarks/test_perf_benchmarks.py new file mode 100644 index 00000000..da6bab81 --- /dev/null +++ b/tests/benchmarks/test_perf_benchmarks.py @@ -0,0 +1,215 @@ +"""Performance benchmarks for APM dependency resolution and compilation. + +Covers the bottlenecks identified in issue #171: +- Primitive conflict detection (O(m) after fix, was O(m²)) +- Cycle detection (O(V+E) after fix, was O(V×D)) +- get_nodes_at_depth() (O(1) after fix, was O(V × max_depth)) +- from_apm_yml() parse caching +- Inheritance chain caching + +Run with: uv run pytest tests/benchmarks/ -v --tb=short -m benchmark +""" + +import time +from pathlib import Path + +import pytest + +from apm_cli.primitives.models import ( + PrimitiveCollection, + Instruction, +) +from apm_cli.deps.dependency_graph import DependencyTree, DependencyNode +from apm_cli.deps.apm_resolver import APMDependencyResolver +from apm_cli.models.apm_package import APMPackage, DependencyReference, clear_apm_yml_cache +from apm_cli.compilation.constitution import read_constitution, clear_constitution_cache + + +# --------------------------------------------------------------------------- +# Helpers to build synthetic data +# --------------------------------------------------------------------------- + +def _make_instruction(name: str, source: str = "local") -> Instruction: + return Instruction( + name=name, + file_path=Path(f"/fake/{name}.instructions.md"), + description=f"Test instruction {name}", + apply_to="**", + content=f"Content for {name}", + source=source, + ) + + +def _make_dep_ref(owner: str, repo: str, ref: str = "main") -> DependencyReference: + return DependencyReference.parse(f"{owner}/{repo}#{ref}") + + +def _make_node(owner: str, repo: str, depth: int, ref: str = "main") -> DependencyNode: + dep_ref = _make_dep_ref(owner, repo, ref) + pkg = APMPackage(name=repo, version="1.0.0", source=f"{owner}/{repo}") + return DependencyNode(package=pkg, dependency_ref=dep_ref, depth=depth) + + +def _build_deep_tree(n_packages: int, max_depth: int) -> DependencyTree: + """Build a synthetic dependency tree with n packages spread across depths.""" + root = APMPackage(name="root", version="1.0.0") + tree = DependencyTree(root_package=root) + for i in range(n_packages): + depth = (i % max_depth) + 1 + node = _make_node("owner", f"pkg-{i}", depth) + tree.add_node(node) + return tree + + +# --------------------------------------------------------------------------- +# Benchmark: Primitive conflict detection +# --------------------------------------------------------------------------- + +@pytest.mark.benchmark +class TestPrimitiveConflictDetectionPerf: + """Verify O(m) conflict detection (was O(m²) before #171).""" + + @pytest.mark.parametrize("count", [100, 500, 1000]) + def test_add_unique_primitives(self, count: int): + """Adding N unique primitives should scale linearly.""" + coll = PrimitiveCollection() + start = time.perf_counter() + for i in range(count): + coll.add_primitive(_make_instruction(f"instr-{i}")) + elapsed = time.perf_counter() - start + + assert coll.count() == count + assert not coll.has_conflicts() + # Rough sanity: 1000 unique adds should be well under 0.5s + assert elapsed < 0.5, f"Adding {count} primitives took {elapsed:.3f}s" + + def test_conflict_detection_with_duplicates(self): + """500 adds with 50% conflicts should still be fast.""" + coll = PrimitiveCollection() + names = [f"instr-{i % 250}" for i in range(500)] + start = time.perf_counter() + for name in names: + coll.add_primitive(_make_instruction(name, source="dep:pkg")) + elapsed = time.perf_counter() - start + + assert coll.count() == 250 + assert elapsed < 0.5 + + +# --------------------------------------------------------------------------- +# Benchmark: Dependency tree depth-index +# --------------------------------------------------------------------------- + +@pytest.mark.benchmark +class TestDepthIndexPerf: + """Verify O(1) depth lookups (was O(V × max_depth) before #171).""" + + def test_get_nodes_at_depth_large_tree(self): + """100 packages across 10 depths — lookups should be instant.""" + tree = _build_deep_tree(100, 10) + start = time.perf_counter() + total = 0 + for depth in range(1, 11): + total += len(tree.get_nodes_at_depth(depth)) + elapsed = time.perf_counter() - start + + assert total == 100 + assert elapsed < 0.01, f"Depth lookups took {elapsed:.3f}s" + + def test_depth_index_consistency(self): + """Depth index returns same nodes as brute-force scan.""" + tree = _build_deep_tree(50, 5) + for depth in range(1, 6): + indexed = set(n.get_id() for n in tree.get_nodes_at_depth(depth)) + brute = set( + n.get_id() + for n in tree.nodes.values() + if n.depth == depth + ) + assert indexed == brute, f"Mismatch at depth {depth}" + + +# --------------------------------------------------------------------------- +# Benchmark: Cycle detection +# --------------------------------------------------------------------------- + +@pytest.mark.benchmark +class TestCycleDetectionPerf: + """Verify O(V+E) cycle detection (was O(V×D) before #171).""" + + def test_no_cycles_deep_chain(self): + """50-node linear chain — O(V+E) detection.""" + root = APMPackage(name="root", version="1.0.0") + tree = DependencyTree(root_package=root) + prev = None + for i in range(50): + node = _make_node("owner", f"chain-{i}", depth=1) + tree.add_node(node) + if prev: + prev.children.append(node) + prev = node + + resolver = APMDependencyResolver() + start = time.perf_counter() + cycles = resolver.detect_circular_dependencies(tree) + elapsed = time.perf_counter() - start + + assert len(cycles) == 0 + assert elapsed < 0.05 + + +# --------------------------------------------------------------------------- +# Benchmark: from_apm_yml caching +# --------------------------------------------------------------------------- + +@pytest.mark.benchmark +class TestFromApmYmlCachePerf: + """Verify parse caching eliminates repeated disk I/O.""" + + def setup_method(self): + clear_apm_yml_cache() + + def test_cache_hit_is_faster(self, tmp_path: Path): + """Second parse of same file should be near-instant (cache hit).""" + apm_yml = tmp_path / "apm.yml" + apm_yml.write_text("name: bench-pkg\nversion: 1.0.0\n") + + # Cold parse + start = time.perf_counter() + pkg1 = APMPackage.from_apm_yml(apm_yml) + cold = time.perf_counter() - start + + # Warm parse (cache hit) + start = time.perf_counter() + pkg2 = APMPackage.from_apm_yml(apm_yml) + warm = time.perf_counter() - start + + assert pkg1.name == pkg2.name + # Cache hit should be at least 2x faster (typically 100x+) + assert warm < cold or warm < 0.001 + + +# --------------------------------------------------------------------------- +# Benchmark: read_constitution caching +# --------------------------------------------------------------------------- + +@pytest.mark.benchmark +class TestConstitutionCachePerf: + """Verify constitution read caching.""" + + def setup_method(self): + clear_constitution_cache() + + def test_cache_hit(self, tmp_path: Path): + """Repeated reads of constitution should be cached.""" + const_dir = tmp_path / "memory" + const_dir.mkdir() + (const_dir / "constitution.md").write_text("# Constitution\nTest content\n") + + # Cold read + content1 = read_constitution(tmp_path) + # Warm read + content2 = read_constitution(tmp_path) + + assert content1 == content2 + assert content1 is content2 # Same object (cache hit) diff --git a/tests/fixtures/synthetic_trees.py b/tests/fixtures/synthetic_trees.py new file mode 100644 index 00000000..d94b4c8f --- /dev/null +++ b/tests/fixtures/synthetic_trees.py @@ -0,0 +1,77 @@ +"""Synthetic fixtures for deep dependency tree testing. + +Generates apm.yml manifests that simulate real-world dependency patterns: +- 100 packages across 10 depth levels +- Diamond dependencies (A→B, A→C, B→D, C→D) +- Large primitive counts per package +""" + +import textwrap +from pathlib import Path +from typing import Dict, List + + +def generate_deep_tree_fixtures(base_dir: Path, n_packages: int = 100, max_depth: int = 10) -> Dict[str, Path]: + """Generate a tree of apm.yml files simulating deep transitive deps. + + Returns a dict of package_name -> apm.yml path. + """ + packages: Dict[str, Path] = {} + + for i in range(n_packages): + depth = (i % max_depth) + 1 + pkg_name = f"test-pkg-{i}" + pkg_dir = base_dir / f"owner/{pkg_name}" + pkg_dir.mkdir(parents=True, exist_ok=True) + + # Build dependency list (each package depends on 1-2 others at next depth) + deps = [] + if depth < max_depth: + child_idx = (i + max_depth) % n_packages + deps.append(f"owner/test-pkg-{child_idx}") + # Diamond: add a second dep sometimes + if i % 3 == 0: + child_idx2 = (i + max_depth + 1) % n_packages + deps.append(f"owner/test-pkg-{child_idx2}") + + deps_yaml = "" + if deps: + dep_lines = "\n".join(f" - {d}" for d in deps) + deps_yaml = f"\ndependencies:\n apm:\n{dep_lines}\n" + + content = textwrap.dedent(f"""\ + name: {pkg_name} + version: 1.0.0 + description: Synthetic package at depth {depth} + {deps_yaml}""") + + apm_yml = pkg_dir / "apm.yml" + apm_yml.write_text(content) + packages[pkg_name] = apm_yml + + return packages + + +def generate_primitive_heavy_package(base_dir: Path, n_instructions: int = 200, n_contexts: int = 50) -> Path: + """Generate a package with many primitives for conflict detection benchmarks.""" + pkg_dir = base_dir / "heavy-pkg" + pkg_dir.mkdir(parents=True, exist_ok=True) + + apm_yml = pkg_dir / "apm.yml" + apm_yml.write_text("name: heavy-pkg\nversion: 1.0.0\n") + + # Create instruction files + instructions_dir = pkg_dir / ".apm" / "instructions" + instructions_dir.mkdir(parents=True, exist_ok=True) + for i in range(n_instructions): + (instructions_dir / f"instr-{i}.instructions.md").write_text( + f"---\ndescription: Instruction {i}\napplyTo: '**'\n---\nContent {i}\n" + ) + + # Create context files + contexts_dir = pkg_dir / ".apm" / "contexts" + contexts_dir.mkdir(parents=True, exist_ok=True) + for i in range(n_contexts): + (contexts_dir / f"ctx-{i}.md").write_text(f"Context content {i}\n") + + return pkg_dir