From 77ad3a8ed11f3cbf02b1b71659e2458043d60c60 Mon Sep 17 00:00:00 2001 From: Meni Yakove Date: Fri, 21 Nov 2025 16:48:40 +0200 Subject: [PATCH] feat: make webhook URL path configurable - Remove hardcoded /webhook_server suffix from webhook URL - webhook-ip config now requires full URL including path - Enables compatibility with services like smee.io that provide URLs with built-in paths - Updated schema, examples, and documentation to reflect new requirement Examples: - Direct: https://your-domain.com/webhook_server - Smee.io: https://smee.io/your-channel BREAKING CHANGE: webhook-ip must now include the full path (e.g., /webhook_server) --- README.md | 80 +++++++++++----------- examples/config.yaml | 2 +- webhook_server/config/schema.yaml | 2 +- webhook_server/tests/manifests/config.yaml | 2 +- webhook_server/tests/test_webhook.py | 20 +++--- webhook_server/utils/webhook.py | 2 +- 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 07fabc350..644a467f7 100644 --- a/README.md +++ b/README.md @@ -206,7 +206,7 @@ Create `config.yaml` in your data directory: # yaml-language-server: $schema=https://raw.githubusercontent.com/myk-org/github-webhook-server/refs/heads/main/webhook_server/config/schema.yaml github-app-id: 123456 -webhook-ip: https://your-domain.com +webhook-ip: https://your-domain.com/webhook_server # Full URL with path (for smee.io use: https://smee.io/your-channel) github-tokens: - ghp_your_github_token @@ -245,7 +245,7 @@ auto-verified-and-merged-users: - "renovate[bot]" - "dependabot[bot]" -auto-verify-cherry-picked-prs: true # Auto-verify cherry-picked PRs (default: true) +auto-verify-cherry-picked-prs: true # Auto-verify cherry-picked PRs (default: true) # Global PR Size Labels (optional) pr-size-thresholds: @@ -708,14 +708,12 @@ The webhook server includes a comprehensive log viewer web interface for monitor **Memory-Optimized Streaming**: The log viewer uses advanced streaming and chunked processing techniques that replaced traditional bulk loading: - - **Constant Memory Usage**: Handles log files of any size with consistent memory footprint - **Early Filtering**: Reduces data transfer by filtering at the source before transmission - **Streaming Processing**: Real-time log processing without loading entire files into memory - **90% Memory Reduction**: Optimized for enterprise environments with gigabytes of log data - **Sub-second Response Times**: Fast query responses even with large datasets - ### 🔒 Security Warning **🚨 CRITICAL SECURITY NOTICE**: The log viewer endpoints (`/logs/*`) are **NOT PROTECTED** by @@ -757,6 +755,7 @@ processing microsecond before load (100-1000 updates ``` **Memory Efficiency**: + - **Streaming Parser**: Reads log files line-by-line instead of loading entire files - **Early Filtering**: Applies search criteria during parsing to reduce memory usage - **Chunked Responses**: Delivers results in small batches for responsive UI @@ -992,6 +991,7 @@ curl "http://localhost:5000/logs/api/export?format=json&level=ERROR" -o errors.j #### Performance Benchmarks The memory optimization work has achieved: + - **90% reduction** in memory usage compared to bulk loading - **Sub-second response times** for filtered queries on multi-GB log files - **Constant memory footprint** regardless of log file size @@ -1006,7 +1006,7 @@ of your GitHub webhook processing workflows. ### 🤖 MCP Features - **Real-time Log Analysis**: AI agents can query, filter, and analyze webhook processing logs -- **System Monitoring**: Access to health status and system metrics +- **System Monitoring**: Access to health status and system metrics - **Workflow Analysis**: Programmatic access to PR flow visualization and timing data - **Secure Architecture**: Only safe, read-only endpoints exposed to AI agents - **Intelligent Troubleshooting**: AI-powered error pattern recognition and debugging assistance @@ -1023,13 +1023,13 @@ The MCP integration follows a **security-first approach** with strict endpoint i ### 📡 Available MCP Endpoints -| Endpoint | Description | Use Case | -|----------|-------------|----------| -| `/mcp/webhook_server/healthcheck` | Server health status | System monitoring and uptime checks | -| `/mcp/logs/api/entries` | Historical log data with filtering | Log analysis and debugging | -| `/mcp/logs/api/export` | Log export functionality | Data analysis and reporting | -| `/mcp/logs/api/pr-flow/{identifier}` | PR flow visualization data | Workflow analysis and timing | -| `/mcp/logs/api/workflow-steps/{identifier}` | Workflow timeline data | Performance analysis | +| Endpoint | Description | Use Case | +| ------------------------------------------- | ---------------------------------- | ----------------------------------- | +| `/mcp/webhook_server/healthcheck` | Server health status | System monitoring and uptime checks | +| `/mcp/logs/api/entries` | Historical log data with filtering | Log analysis and debugging | +| `/mcp/logs/api/export` | Log export functionality | Data analysis and reporting | +| `/mcp/logs/api/pr-flow/{identifier}` | PR flow visualization data | Workflow analysis and timing | +| `/mcp/logs/api/workflow-steps/{identifier}` | Workflow timeline data | Performance analysis | **Note:** All MCP endpoints are proxied under the `/mcp` mount point. The MCP server creates a separate FastAPI app instance that duplicates the core API endpoints while excluding webhook processing, static files, @@ -1039,7 +1039,7 @@ and HTML pages for security. **IMPORTANT**: The `/mcp/logs/*` endpoints expose potentially **highly sensitive data** including: -- 🔑 **GitHub Personal Access Tokens** and API credentials +- 🔑 **GitHub Personal Access Tokens** and API credentials - 👤 **User information and GitHub usernames** - 📋 **Repository details and webhook payloads** - 🔒 **Internal system information and error details** @@ -1048,7 +1048,7 @@ and HTML pages for security. - ✅ Deploy **only on trusted networks** (VPN, internal network) - ✅ **Never expose MCP endpoints** directly to the internet -- ✅ Implement **reverse proxy authentication** for any external access +- ✅ Implement **reverse proxy authentication** for any external access - ✅ Use **firewall rules** to restrict access to authorized IP ranges only - ✅ Monitor and **audit access** to these endpoints @@ -1082,11 +1082,7 @@ http://your-server:5000/mcp "mcpServers": { "github-webhook-server-logs": { "command": "npx", - "args": [ - "mcp-remote", - "http://your-server:port/mcp", - "--allow-http" - ] + "args": ["mcp-remote", "http://your-server:port/mcp", "--allow-http"] } } } @@ -1096,13 +1092,13 @@ http://your-server:5000/mcp Once configured, you can ask AI agents natural language questions: -- *"Show me recent webhook errors from the last hour"* -- *"What's the current health status of my webhook server?"* -- *"Analyze the processing time for PR #123 and identify bottlenecks"* -- *"Find all webhook failures for repository myorg/myrepo today"* -- *"Export error logs from the last 24 hours for analysis"* -- *"Compare processing times between successful and failed webhooks"* -- *"Show me memory usage patterns in recent webhook processing"* +- _"Show me recent webhook errors from the last hour"_ +- _"What's the current health status of my webhook server?"_ +- _"Analyze the processing time for PR #123 and identify bottlenecks"_ +- _"Find all webhook failures for repository myorg/myrepo today"_ +- _"Export error logs from the last 24 hours for analysis"_ +- _"Compare processing times between successful and failed webhooks"_ +- _"Show me memory usage patterns in recent webhook processing"_ ### 🎯 Use Cases @@ -1139,18 +1135,18 @@ Users can interact with the webhook server through GitHub comments on pull reque ### Pull Request Commands -| Command | Description | Example | -| ------------------- | --------------------------------------------------- | ------------------- | -| `/verified` | Mark PR as verified | `/verified` | -| `/verified cancel` | Remove verification | `/verified cancel` | -| `/hold` | Block PR merging | `/hold` | -| `/hold cancel` | Unblock PR merging | `/hold cancel` | -| `/wip` | Mark as work in progress | `/wip` | -| `/wip cancel` | Remove WIP status | `/wip cancel` | -| `/lgtm` | Approve changes | `/lgtm` | -| `/approve` | Approve PR | `/approve` | -| `/assign-reviewers` | Assign OWNERS-based reviewers | `/assign-reviewers` | -| `/check-can-merge` | Check merge readiness | `/check-can-merge` | +| Command | Description | Example | +| ------------------- | ------------------------------------------------------- | ------------------- | +| `/verified` | Mark PR as verified | `/verified` | +| `/verified cancel` | Remove verification | `/verified cancel` | +| `/hold` | Block PR merging | `/hold` | +| `/hold cancel` | Unblock PR merging | `/hold cancel` | +| `/wip` | Mark as work in progress | `/wip` | +| `/wip cancel` | Remove WIP status | `/wip cancel` | +| `/lgtm` | Approve changes | `/lgtm` | +| `/approve` | Approve PR | `/approve` | +| `/assign-reviewers` | Assign OWNERS-based reviewers | `/assign-reviewers` | +| `/check-can-merge` | Check merge readiness | `/check-can-merge` | | `/reprocess` | Trigger complete PR workflow reprocessing (OWNERS only) | `/reprocess` | ### Workflow Management @@ -1162,6 +1158,7 @@ The `/reprocess` command triggers complete PR workflow reprocessing from scratch **Permissions**: Requires user to be in repository OWNERS file (same as `/retest`) **Use Cases**: + - Webhook delivery failed or was missed - Processing interrupted mid-workflow - OWNERS file changed and reviewers need reassignment @@ -1169,6 +1166,7 @@ The `/reprocess` command triggers complete PR workflow reprocessing from scratch - PR got into inconsistent state and needs full reset **Behavior**: + - Re-runs entire PR workflow including reviewer assignment, label updates, check queuing, and CI/CD tests - Won't create duplicate welcome messages or tracking issues if they already exist - Respects current repository configuration and OWNERS file @@ -1207,9 +1205,11 @@ Cherry-picked PRs can be automatically verified or require manual verification d | `/cherry-pick branch1 branch2` | Cherry-pick to multiple branches | `/cherry-pick v1.0 v2.0` | **Configuration**: Control auto-verification of cherry-picked PRs: + ```yaml -auto-verify-cherry-picked-prs: true # Default: true (auto-verify) - # Set to false to require manual verification +auto-verify-cherry-picked-prs: + true # Default: true (auto-verify) + # Set to false to require manual verification ``` ### Label Commands diff --git a/examples/config.yaml b/examples/config.yaml index 41fc40843..86f6c39b7 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -14,7 +14,7 @@ github-tokens: - - -webhook-ip: +webhook-ip: # Full URL with path (e.g., https://your-domain.com/webhook_server or https://smee.io/your-channel) docker: # Used to pull images from docker.io username: diff --git a/webhook_server/config/schema.yaml b/webhook_server/config/schema.yaml index 92d67b263..c6111a9b2 100644 --- a/webhook_server/config/schema.yaml +++ b/webhook_server/config/schema.yaml @@ -32,7 +32,7 @@ properties: description: Global GitHub token for all repositories webhook-ip: type: string - description: IP or FQDN address of the webhook server for adding to the repositories + description: Full webhook URL including path (e.g., https://your-domain.com/webhook_server or https://smee.io/your-channel). Must include the complete path where webhooks will be received. ip-bind: type: string diff --git a/webhook_server/tests/manifests/config.yaml b/webhook_server/tests/manifests/config.yaml index da1c14bbd..88b5fb4cf 100644 --- a/webhook_server/tests/manifests/config.yaml +++ b/webhook_server/tests/manifests/config.yaml @@ -7,7 +7,7 @@ github-tokens: - GITHIB TOKEN1 - GITHIB TOKEN2 -webhook-ip: HTTP://IP OR URL:PORT +webhook-ip: HTTP://IP OR URL:PORT/webhook_server webhook-secret: test-webhook-secret # pragma: allowlist secret docker: # Used to pull images from docker.io diff --git a/webhook_server/tests/test_webhook.py b/webhook_server/tests/test_webhook.py index 245643598..893a4d8d5 100644 --- a/webhook_server/tests/test_webhook.py +++ b/webhook_server/tests/test_webhook.py @@ -54,7 +54,7 @@ def test_process_github_webhook_success_no_existing_hooks( # Verify webhook creation was called mock_repo.create_hook.assert_called_once_with( name="web", - config={"url": "http://example.com/webhook_server", "content_type": "json"}, + config={"url": "http://example.com", "content_type": "json"}, events=["push", "pull_request"], active=True, ) @@ -84,7 +84,7 @@ def test_process_github_webhook_success_with_secret( mock_repo.create_hook.assert_called_once_with( name="web", config={ - "url": "http://example.com/webhook_server", + "url": "http://example.com", "content_type": "json", "secret": "test-secret", # pragma: allowlist secret }, @@ -109,7 +109,7 @@ def test_process_github_webhook_default_events( # Verify default events are used mock_repo.create_hook.assert_called_once_with( name="web", - config={"url": "http://example.com/webhook_server", "content_type": "json"}, + config={"url": "http://example.com", "content_type": "json"}, events=["*"], # Default events active=True, ) @@ -125,7 +125,7 @@ def test_process_github_webhook_existing_hook_same_config( """Test when webhook already exists with same configuration.""" # Mock existing hook with matching URL existing_hook = Mock() - existing_hook.config = {"url": "http://example.com/webhook_server", "content_type": "json"} + existing_hook.config = {"url": "http://example.com", "content_type": "json"} mock_repo.get_hooks.return_value = [existing_hook] mock_get_repo_api.return_value = mock_repo @@ -153,7 +153,7 @@ def test_process_github_webhook_secret_mismatch_deletes_old_hook( # Mock existing hook without secret, but we're adding a secret existing_hook = Mock() existing_hook.config = { - "url": "http://example.com/webhook_server", + "url": "http://example.com", "content_type": "json", # No secret in existing hook } @@ -177,7 +177,7 @@ def test_process_github_webhook_secret_mismatch_deletes_old_hook( mock_repo.create_hook.assert_called_once_with( name="web", config={ - "url": "http://example.com/webhook_server", + "url": "http://example.com", "content_type": "json", "secret": "new-secret", # pragma: allowlist secret }, @@ -197,7 +197,7 @@ def test_process_github_webhook_secret_removal_deletes_old_hook( # Mock existing hook with secret, but we're removing it existing_hook = Mock() existing_hook.config = { - "url": "http://example.com/webhook_server", + "url": "http://example.com", "content_type": "json", "secret": "old-secret", # pragma: allowlist secret } @@ -220,7 +220,7 @@ def test_process_github_webhook_secret_removal_deletes_old_hook( # Verify new hook was created without secret mock_repo.create_hook.assert_called_once_with( name="web", - config={"url": "http://example.com/webhook_server", "content_type": "json"}, + config={"url": "http://example.com", "content_type": "json"}, events=["push", "pull_request"], active=True, ) @@ -291,10 +291,10 @@ def test_process_github_webhook_multiple_existing_hooks( """Test handling of multiple existing hooks, only matching ones are affected.""" # Mock multiple existing hooks matching_hook = Mock() - matching_hook.config = {"url": "http://example.com/webhook_server", "content_type": "json"} + matching_hook.config = {"url": "http://example.com", "content_type": "json"} non_matching_hook = Mock() - non_matching_hook.config = {"url": "http://different.com/webhook_server", "content_type": "json"} + non_matching_hook.config = {"url": "http://different.com", "content_type": "json"} mock_repo.get_hooks.return_value = [matching_hook, non_matching_hook] mock_get_repo_api.return_value = mock_repo diff --git a/webhook_server/utils/webhook.py b/webhook_server/utils/webhook.py index f905bc0d8..e54ee797a 100644 --- a/webhook_server/utils/webhook.py +++ b/webhook_server/utils/webhook.py @@ -32,7 +32,7 @@ def process_github_webhook( if not repo: return False, f"[API user {api_user}] - Could not find repository {full_repository_name}", LOGGER.error - config_: dict[str, str] = {"url": f"{webhook_ip}/webhook_server", "content_type": "json"} + config_: dict[str, str] = {"url": webhook_ip, "content_type": "json"} if secret: config_["secret"] = secret