Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 85 additions & 28 deletions src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import os
import shutil
import stat
import sys
import tempfile
import time
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -63,6 +65,46 @@ def _debug(message: str) -> None:
print(f"[DEBUG] {message}", file=sys.stderr)


def _close_repo(repo) -> None:
"""Release GitPython handles so directories can be deleted on Windows."""
if repo is None:
return
try:
repo.git.clear_cache()
except Exception:
pass
try:
repo.close()
except Exception:
pass


def _rmtree(path) -> None:
"""Remove a directory tree, handling read-only files and brief Windows locks.

Git pack/index files are often read-only, and on Windows git processes may
hold brief locks even after the Repo object is closed. This wrapper uses
an onerror callback for read-only files and a single retry for lock races.
"""
def _on_readonly(func, fpath, _exc_info):
"""onerror callback: make read-only files writable and retry."""
try:
os.chmod(fpath, stat.S_IWRITE)
func(fpath)
except OSError:
pass

try:
shutil.rmtree(path, onerror=_on_readonly)
except PermissionError:
if sys.platform == 'win32':
# Single retry after a brief wait for lingering git handles
time.sleep(0.5)
shutil.rmtree(path, ignore_errors=True)
# On all platforms: don't raise from cleanup — just leave the
# temp dir behind (the OS will clean it up eventually).


class GitProgressReporter(RemoteProgress):
"""Report git clone progress to Rich Progress."""

Expand Down Expand Up @@ -439,7 +481,6 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
# Create a temporary directory for Git operations
temp_dir = None
try:
import tempfile
temp_dir = Path(tempfile.mkdtemp())

if is_likely_commit:
Expand Down Expand Up @@ -512,7 +553,6 @@ def resolve_git_reference(self, repo_ref: str) -> ResolvedReference:
raise RuntimeError(f"Failed to clone repository {dep_ref.repo_url}: {sanitized_error}")

finally:
# Clean up temporary directory
if temp_dir and temp_dir.exists():
shutil.rmtree(temp_dir, ignore_errors=True)

Expand Down Expand Up @@ -1149,29 +1189,36 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat
if progress_obj and progress_task_id is not None:
progress_obj.update(progress_task_id, completed=10, total=100)

# Clone to a temporary directory first
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
temp_clone_path = Path(temp_dir) / "repo"

# Use mkdtemp + explicit cleanup so we control when rmtree runs.
# tempfile.TemporaryDirectory().__exit__ calls shutil.rmtree without our
# retry logic, which raises WinError 32 when git processes still hold
# handles at the end of the with-block.
temp_dir = tempfile.mkdtemp()
try:
# Sparse checkout always targets "repo/". If it fails we clone into
# "repo_clone/" so we never have to rmtree a directory that may still
# have live git handles from the failed subprocess.
sparse_clone_path = Path(temp_dir) / "repo"
temp_clone_path = sparse_clone_path

# Update progress - cloning
if progress_obj and progress_task_id is not None:
progress_obj.update(progress_task_id, completed=20, total=100)

# Phase 4 (#171): Try sparse-checkout first (git 2.25+), fall back to full clone
sparse_ok = self._try_sparse_checkout(dep_ref, temp_clone_path, subdir_path, ref)
sparse_ok = self._try_sparse_checkout(dep_ref, sparse_clone_path, subdir_path, ref)

if not sparse_ok:
# Full clone fallback
if temp_clone_path.exists():
shutil.rmtree(temp_clone_path)
# Full clone into a fresh subdirectory so we don't have to touch
# the (possibly locked) sparse-checkout directory at all.
temp_clone_path = Path(temp_dir) / "repo_clone"

package_display_name = subdir_path.split('/')[-1]
progress_reporter = GitProgressReporter(progress_task_id, progress_obj, package_display_name) if progress_task_id and progress_obj else None

# Detect if ref is a commit SHA (can't be used with --branch in shallow clones)
is_commit_sha = ref and re.match(r'^[a-f0-9]{7,40}$', ref) is not None

clone_kwargs = {
'dep_ref': dep_ref,
}
Expand All @@ -1183,7 +1230,7 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat
clone_kwargs['depth'] = 1
if ref:
clone_kwargs['branch'] = ref

try:
self._clone_with_fallback(
dep_ref.repo_url,
Expand All @@ -1193,38 +1240,41 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat
)
except Exception as e:
raise RuntimeError(f"Failed to clone repository: {e}") from e

if is_commit_sha:
repo_obj = None
try:
repo_obj = Repo(temp_clone_path)
repo_obj.git.checkout(ref)
except Exception as e:
raise RuntimeError(f"Failed to checkout commit {ref}: {e}") from e

finally:
_close_repo(repo_obj)

# Disable progress reporter after clone
if progress_reporter:
progress_reporter.disabled = True

# Update progress - extracting subdirectory
if progress_obj and progress_task_id is not None:
progress_obj.update(progress_task_id, completed=70, total=100)

# Check if subdirectory exists
source_subdir = temp_clone_path / subdir_path
if not source_subdir.exists():
raise RuntimeError(f"Subdirectory '{subdir_path}' not found in repository")

if not source_subdir.is_dir():
raise RuntimeError(f"Path '{subdir_path}' is not a directory")

# Create target directory
target_path.mkdir(parents=True, exist_ok=True)

# If target exists and has content, remove it
if target_path.exists() and any(target_path.iterdir()):
shutil.rmtree(target_path)
target_path.mkdir(parents=True, exist_ok=True)

# Copy subdirectory contents to target
for item in source_subdir.iterdir():
src = source_subdir / item.name
Expand All @@ -1233,17 +1283,24 @@ def download_subdirectory_package(self, dep_ref: DependencyReference, target_pat
shutil.copytree(src, dst)
else:
shutil.copy2(src, dst)

# Capture commit SHA before temp dir is destroyed

# Capture commit SHA; close the Repo object immediately so its file
# handles are released before _rmtree() runs in the finally block.
repo = None
try:
repo = Repo(temp_clone_path)
resolved_commit = repo.head.commit.hexsha
except Exception:
resolved_commit = "unknown"

finally:
_close_repo(repo)

# Update progress - validating
if progress_obj and progress_task_id is not None:
progress_obj.update(progress_task_id, completed=90, total=100)

finally:
_rmtree(temp_dir)

# Validate the extracted package (after temp dir is cleaned up)
validation_result = validate_apm_package(target_path)
Expand Down
128 changes: 128 additions & 0 deletions tests/test_github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import pytest
import stat
import tempfile
import shutil
from pathlib import Path
Expand Down Expand Up @@ -998,5 +999,132 @@ def setup_subdir(*args, **kwargs):
downloader.download_subdirectory_package(dep_ref, target)


class TestWindowsCleanupHelpers:
"""Test _rmtree and _close_repo helpers for Windows compatibility."""

def test_rmtree_removes_normal_directory(self):
from apm_cli.deps.github_downloader import _rmtree
d = Path(tempfile.mkdtemp())
(d / "file.txt").write_text("hello")
_rmtree(d)
assert not d.exists()

def test_rmtree_handles_readonly_files(self):
from apm_cli.deps.github_downloader import _rmtree
d = Path(tempfile.mkdtemp())
f = d / "readonly.txt"
f.write_text("locked")
os.chmod(str(f), stat.S_IREAD)
_rmtree(d)
assert not d.exists()

def test_close_repo_none_is_safe(self):
from apm_cli.deps.github_downloader import _close_repo
# Must not raise when passed None
_close_repo(None)

def test_close_repo_releases_gitpython_handles(self):
from apm_cli.deps.github_downloader import _close_repo
repo = MagicMock()
_close_repo(repo)
repo.git.clear_cache.assert_called_once()
repo.close.assert_called_once()

def test_close_repo_swallows_exceptions(self):
from apm_cli.deps.github_downloader import _close_repo
repo = MagicMock()
repo.git.clear_cache.side_effect = RuntimeError("git gone")
# Must not propagate
_close_repo(repo)
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_close_repo_swallows_exceptions() only asserts that _close_repo() doesn’t raise when repo.git.clear_cache() fails, but it doesn’t verify that repo.close() is still invoked. Since the main goal is to release handles, the test should assert that close() is called even if clear_cache() throws (this will also catch regressions where _close_repo stops short of closing on errors).

Suggested change
_close_repo(repo)
_close_repo(repo)
# Even if clear_cache fails, we must still attempt it and close the repo
repo.git.clear_cache.assert_called_once()
repo.close.assert_called_once()

Copilot uses AI. Check for mistakes.
# Even if clear_cache fails, we must still attempt it and close the repo
repo.git.clear_cache.assert_called_once()
repo.close.assert_called_once()


class TestDownloadSubdirectoryPackageWindowsCleanup:
"""Verify that WinError 32 file-lock races don't surface to the caller.

The root issue on Windows is that TemporaryDirectory.__exit__ calls
shutil.rmtree without retry logic, and git subprocess handles may still
be alive when the cleanup runs. The fix: manual mkdtemp + try/finally
+ _rmtree, plus _close_repo() before cleanup.
"""

def _make_dep_ref(self):
"""Return a minimal DependencyReference for a subdirectory package."""
# owner/repo/skills/test-pkg → virtual subdirectory reference
return DependencyReference.parse("owner/repo/skills/test-pkg")

def test_sparse_checkout_success_closes_sha_repo_before_rmtree(self, tmp_path):
"""When sparse checkout succeeds the SHA-capture Repo is closed before _rmtree."""
from apm_cli.deps.github_downloader import _close_repo # noqa
downloader = GitHubPackageDownloader()
dep = self._make_dep_ref()
target = tmp_path / "out"

call_order = []

def fake_close_repo(repo):
if repo is not None:
call_order.append(("close_repo", repo))

def fake_rmtree(path):
call_order.append(("rmtree", path))

fake_repo = MagicMock()
fake_repo.head.commit.hexsha = "abc1234"

def fake_sparse(dep_ref, clone_path, subdir, ref):
# Simulate sparse checkout writing the subdir
(clone_path / subdir).mkdir(parents=True, exist_ok=True)
(clone_path / subdir / "apm.yml").write_text("name: test-pkg\nversion: 1.0.0\n")
return True

with patch("apm_cli.deps.github_downloader._close_repo", side_effect=fake_close_repo), \
patch("apm_cli.deps.github_downloader._rmtree", side_effect=fake_rmtree), \
patch.object(downloader, "_try_sparse_checkout", side_effect=fake_sparse), \
patch("apm_cli.deps.github_downloader.Repo", return_value=fake_repo), \
patch("apm_cli.deps.github_downloader.validate_apm_package") as mock_validate:
mock_validate.return_value = MagicMock(is_valid=True, errors=[])
downloader.download_subdirectory_package(dep, target)

# Verify both _close_repo and _rmtree were called
assert any(op == "close_repo" for op, _ in call_order), "_close_repo was not called"
assert any(op == "rmtree" for op, _ in call_order), "_rmtree was not called"
# Verify _close_repo was called before _rmtree
close_repo_idx = next(i for i, (op, _) in enumerate(call_order) if op == "close_repo")
rmtree_idx = next(i for i, (op, _) in enumerate(call_order) if op == "rmtree")
assert close_repo_idx < rmtree_idx, "_close_repo must be called before _rmtree"

def test_sparse_checkout_failure_uses_fresh_clone_path(self, tmp_path):
"""When sparse checkout fails the full clone goes to a fresh path (repo_clone/)."""
downloader = GitHubPackageDownloader()
dep = self._make_dep_ref()
target = tmp_path / "out"

cloned_paths = []

def fake_clone_with_fallback(url, path, progress_reporter=None, **kwargs):
cloned_paths.append(path)
# Simulate clone writing the subdir
(path / dep.virtual_path).mkdir(parents=True, exist_ok=True)
(path / dep.virtual_path / "apm.yml").write_text("name: test-pkg\nversion: 1.0.0\n")

fake_repo = MagicMock()
fake_repo.head.commit.hexsha = "abc1234"

with patch.object(downloader, "_try_sparse_checkout", return_value=False), \
patch.object(downloader, "_clone_with_fallback", side_effect=fake_clone_with_fallback), \
patch("apm_cli.deps.github_downloader.Repo", return_value=fake_repo), \
patch("apm_cli.deps.github_downloader._close_repo"), \
patch("apm_cli.deps.github_downloader.validate_apm_package") as mock_validate:
mock_validate.return_value = MagicMock(is_valid=True, errors=[])
downloader.download_subdirectory_package(dep, target)

# Full clone must NOT reuse the sparse-checkout path "repo/"
assert len(cloned_paths) == 1
assert cloned_paths[0].name == "repo_clone"


if __name__ == '__main__':
pytest.main([__file__])
Loading