Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9b21c79
perf(check-run): cache branch protection settings to reduce API calls
myakove Nov 13, 2025
91d49d6
feat(webhook): clone repository for all PR events to enable local fil…
myakove Nov 13, 2025
8c7bd27
perf(git): replace multiple clones with single clone + worktrees
myakove Nov 13, 2025
182d7c1
perf(tests): update tests for _checkout_worktree refactoring
myakove Nov 13, 2025
2084a7f
test(coverage): add tests for _clone_repository_for_pr method
myakove Nov 13, 2025
f465f1d
test(pull-request): fix async property mocking and RuntimeWarnings
myakove Nov 13, 2025
03458f3
fix(tests): add missing asyncio import and remove unused parameter
myakove Nov 14, 2025
d4aecdb
perf(owners): read OWNERS files from local clone instead of GitHub API
myakove Nov 14, 2025
d36a426
fix(owners): improve error handling and logging for OWNERS file proce…
myakove Nov 14, 2025
249b3f0
refactor(owners): centralize OWNERS test data and improve code clarity
myakove Nov 14, 2025
be8dcd3
refactor(owners): implement CodeRabbit review improvements
myakove Nov 14, 2025
7af6233
security(owners): enforce OWNERS files read from PR base branch using…
myakove Nov 14, 2025
2498418
STDIN
myakove Nov 14, 2025
22109b0
remove unused fixturs
myakove Nov 14, 2025
a67831a
perf(owners): use git diff instead of API to fetch changed files
myakove Nov 14, 2025
e105e0c
fix(owners): implement CodeRabbit review improvements
myakove Nov 14, 2025
27db52c
fix(runner): only set check progress when set_check=True
myakove Nov 14, 2025
4f6b585
refactor(tests): remove obsolete Repository class from OWNERS tests
myakove Nov 15, 2025
d1e1a46
fix(git): resolve worktree bug for push events
myakove Nov 16, 2025
1533f6d
fix(tests): resolve CodeRabbit review improvements
myakove Nov 16, 2025
7490573
refactor(tests): improve test_github_api.py code style
myakove Nov 16, 2025
a9116b7
refactor(tests): extract duplicated test helpers to fixtures
myakove Nov 16, 2025
6b57217
Merge branch 'main' into reduce-api-calls
myakove Nov 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 154 additions & 5 deletions webhook_server/libs/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@
get_repository_github_app_api,
)
from webhook_server.utils.helpers import (
_redact_secrets,
format_task_fields,
get_api_with_highest_rate_limit,
get_apis_and_tokes_from_config,
get_github_repo_api,
prepare_log_prefix,
run_command,
)


Expand Down Expand Up @@ -123,6 +125,7 @@ def __init__(self, hook_data: dict[Any, Any], headers: Headers, logger: logging.
# Format: /tmp/tmp{random}/github-webhook-{repo_name}
# This prevents predictable paths and ensures isolation between concurrent webhook handlers
self.clone_repo_dir: str = tempfile.mkdtemp(prefix=f"github-webhook-{self.repository_name}-")
self._repo_cloned: bool = False # Track if repository has been cloned
# Initialize auto-verified users from API users
self.add_api_users_to_auto_verified_and_merged_users()

Expand Down Expand Up @@ -166,6 +169,143 @@ async def _get_token_metrics(self) -> str:
self.logger.debug(f"{self.log_prefix} Failed to get token metrics: {ex}")
return ""

async def _clone_repository(
self,
pull_request: PullRequest | None = None,
checkout_ref: str | None = None,
) -> None:
"""Clone repository for webhook processing with worktrees.

Clones the repository to self.clone_repo_dir.
Handlers create isolated worktrees from this single clone for their operations.

Args:
pull_request: PullRequest object (for PR events)
checkout_ref: Git ref to checkout (for push events, e.g., "refs/tags/v11.0.104")

Raises:
RuntimeError: If clone fails (aborts webhook processing)
"""
# Log start FIRST - even before early returns
self.logger.step( # type: ignore[attr-defined]
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'started')} "
"Cloning repository for handler worktrees"
)

if self._repo_cloned:
self.logger.debug(f"{self.log_prefix} Repository already cloned")
return

# Validate that at least one argument is provided
if pull_request is None and not checkout_ref:
self.logger.error(
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} "
"Invalid arguments: either pull_request or checkout_ref must be provided"
)
raise ValueError(
f"{self.log_prefix} _clone_repository() requires either pull_request or checkout_ref to be provided"
)

try:
github_token = self.token
clone_url_with_token = self.repository.clone_url.replace("https://", f"https://{github_token}@")

rc, _, err = await run_command(
command=f"git clone {clone_url_with_token} {self.clone_repo_dir}",
log_prefix=self.log_prefix,
redact_secrets=[github_token],
mask_sensitive=self.mask_sensitive,
)

def redact_output(value: str) -> str:
return _redact_secrets(value or "", [github_token], mask_sensitive=self.mask_sensitive)

Comment thread
myakove marked this conversation as resolved.
if not rc:
redacted_err = redact_output(err)
self.logger.error(
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} "
f"Failed to clone repository: {redacted_err}"
)
raise RuntimeError(f"Failed to clone repository: {redacted_err}")

# Configure git user
git_cmd = f"git -C {self.clone_repo_dir}"
rc, _, _ = await run_command(
command=f"{git_cmd} config user.name '{self.repository.owner.login}'",
log_prefix=self.log_prefix,
mask_sensitive=self.mask_sensitive,
)
if not rc:
self.logger.warning(f"{self.log_prefix} Failed to configure git user.name")

rc, _, _ = await run_command(
command=f"{git_cmd} config user.email '{self.repository.owner.login}@users.noreply.github.com'",
log_prefix=self.log_prefix,
mask_sensitive=self.mask_sensitive,
)
if not rc:
self.logger.warning(f"{self.log_prefix} Failed to configure git user.email")

# Configure PR fetch to enable origin/pr/* checkouts
rc, _, _ = await run_command(
command=(
f"{git_cmd} config --local --add remote.origin.fetch +refs/pull/*/head:refs/remotes/origin/pr/*"
),
log_prefix=self.log_prefix,
mask_sensitive=self.mask_sensitive,
)
if not rc:
self.logger.warning(f"{self.log_prefix} Failed to configure PR fetch refs")

# Fetch all refs including PRs
rc, _, _ = await run_command(
command=f"{git_cmd} remote update",
log_prefix=self.log_prefix,
mask_sensitive=self.mask_sensitive,
)
if not rc:
self.logger.warning(f"{self.log_prefix} Failed to fetch remote refs")

# Determine checkout target
if pull_request:
checkout_target = await asyncio.to_thread(lambda: pull_request.base.ref)
else:
# For push events: "refs/tags/v11.0.104" → "v11.0.104"
# "refs/heads/main" → "main"
# checkout_ref guaranteed to be non-None by validation at function start
assert checkout_ref is not None # mypy type narrowing
checkout_target = checkout_ref.replace("refs/tags/", "").replace("refs/heads/", "")

# Checkout target branch/tag
rc, _, err = await run_command(
command=f"{git_cmd} checkout {checkout_target}",
log_prefix=self.log_prefix,
mask_sensitive=self.mask_sensitive,
)
if not rc:
redacted_err = redact_output(err)
self.logger.error(
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} "
f"Failed to checkout {checkout_target}: {redacted_err}"
)
raise RuntimeError(f"Failed to checkout {checkout_target}: {redacted_err}")

self._repo_cloned = True
self.logger.success( # type: ignore[attr-defined]
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'completed')} "
f"Repository cloned to {self.clone_repo_dir} (ref: {checkout_target})"
)

except RuntimeError:
# Re-raise RuntimeError unchanged to avoid double-wrapping
raise
except Exception as ex:
self.logger.exception(
f"{self.log_prefix} {format_task_fields('webhook_processing', 'repo_clone', 'failed')} "
f"Exception during repository clone: {ex}"
)
raise RuntimeError(f"Repository clone failed: {ex}") from ex

async def process(self) -> Any:
event_log: str = f"Event type: {self.github_event}. event ID: {self.x_github_delivery}"
self.logger.step( # type: ignore[attr-defined]
Expand All @@ -192,6 +332,10 @@ async def process(self) -> Any:
f"Processing push event",
)
self.logger.debug(f"{self.log_prefix} {event_log}")

# Clone repository for push operations (PyPI uploads, container builds)
await self._clone_repository(checkout_ref=self.hook_data["ref"])

await PushHandler(github_webhook=self).process_push_webhook_data()
token_metrics = await self._get_token_metrics()
self.logger.success( # type: ignore[attr-defined]
Expand Down Expand Up @@ -242,6 +386,9 @@ async def process(self) -> Any:
self.parent_committer = pull_request.user.login
self.last_committer = getattr(self.last_commit.committer, "login", self.parent_committer)

# Clone repository for local file processing (OWNERS, changed files)
await self._clone_repository(pull_request=pull_request)

if self.github_event == "issue_comment":
self.logger.step( # type: ignore[attr-defined]
f"{self.log_prefix} {format_task_fields('webhook_processing', 'webhook_routing', 'processing')} "
Expand Down Expand Up @@ -444,6 +591,8 @@ def _repo_data_from_config(self, repository_config: dict[str, Any]) -> None:
value="create-issue-for-new-pr", return_on_none=global_create_issue_for_new_pr, extra_dict=repository_config
)

self.mask_sensitive = self.config.get_value("mask-sensitive-data", return_on_none=True)

async def get_pull_request(self, number: int | None = None) -> PullRequest | None:
if number:
self.logger.debug(f"{self.log_prefix} Attempting to get PR by number: {number}")
Expand Down Expand Up @@ -552,12 +701,12 @@ def _current_pull_request_supported_retest(self) -> list[str]:
return current_pull_request_supported_retest

def __del__(self) -> None:
"""Cleanup temporary clone directory on object destruction.
"""Remove the shared clone directory when the webhook object is destroyed.

This ensures the base temp directory created by tempfile.mkdtemp() is removed
when the webhook handler is destroyed, preventing temp directory leaks.
The subdirectories (created with -uuid4() suffix) are cleaned up by
_prepare_cloned_repo_dir context manager in handlers.
GithubWebhook now creates a single clone via tempfile.mkdtemp() and individual
handlers operate on worktrees created by git_worktree_checkout, which already
clean up their own directories. Only the base clone directory must be removed
here to prevent accumulating stale repositories on disk.
"""
if hasattr(self, "clone_repo_dir") and os.path.exists(self.clone_repo_dir):
try:
Expand Down
15 changes: 13 additions & 2 deletions webhook_server/libs/handlers/check_run_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def __init__(self, github_webhook: "GithubWebhook", owners_file_handler: OwnersF
self.logger = self.github_webhook.logger
self.log_prefix: str = self.github_webhook.log_prefix
self.repository: Repository = self.github_webhook.repository
self._repository_private: bool | None = None
self._branch_required_status_checks: list[str] | None = None
if isinstance(self.owners_file_handler, OwnersFileHandler):
self.labels_handler = LabelsHandler(
github_webhook=self.github_webhook, owners_file_handler=self.owners_file_handler
Expand Down Expand Up @@ -393,17 +395,26 @@ async def all_required_status_checks(self, pull_request: PullRequest) -> list[st
return _all_required_status_checks

async def get_branch_required_status_checks(self, pull_request: PullRequest) -> list[str]:
if self.repository.private:
# Check if private repo first (cache to avoid repeated API calls)
if self._repository_private is None:
self._repository_private = self.repository.private

if self._repository_private:
self.logger.info(
f"{self.log_prefix} Repository is private, skipping getting branch protection required status checks"
)
return []

# Cache branch protection settings in instance variable to avoid repeated API calls
if self._branch_required_status_checks is not None:
return self._branch_required_status_checks

pull_request_branch = await asyncio.to_thread(self.repository.get_branch, pull_request.base.ref)
branch_protection = await asyncio.to_thread(pull_request_branch.get_protection)
branch_required_status_checks = branch_protection.required_status_checks.contexts
self.logger.debug(f"{self.log_prefix} branch_required_status_checks: {branch_required_status_checks}")
return branch_required_status_checks
self._branch_required_status_checks = branch_required_status_checks
return self._branch_required_status_checks

async def required_check_in_progress(
self, pull_request: PullRequest, last_commit_check_runs: list[CheckRun]
Expand Down
Loading