From 77ad3a8ed11f3cbf02b1b71659e2458043d60c60 Mon Sep 17 00:00:00 2001
From: Meni Yakove <myakove@gmail.com>
Date: Fri, 21 Nov 2025 16:48:40 +0200
Subject: [PATCH] feat: make webhook URL path configurable

- Remove hardcoded /webhook_server suffix from webhook URL
- webhook-ip config now requires full URL including path
- Enables compatibility with services like smee.io that provide URLs with built-in paths
- Updated schema, examples, and documentation to reflect new requirement

Examples:
- Direct: https://your-domain.com/webhook_server
- Smee.io: https://smee.io/your-channel

BREAKING CHANGE: webhook-ip must now include the full path (e.g., /webhook_server)
---
 README.md                                  | 80 +++++++++++-----------
 examples/config.yaml                       |  2 +-
 webhook_server/config/schema.yaml          |  2 +-
 webhook_server/tests/manifests/config.yaml |  2 +-
 webhook_server/tests/test_webhook.py       | 20 +++---
 webhook_server/utils/webhook.py            |  2 +-
 6 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index 07fabc350..644a467f7 100644
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ Create `config.yaml` in your data directory:
 # yaml-language-server: $schema=https://raw.githubusercontent.com/myk-org/github-webhook-server/refs/heads/main/webhook_server/config/schema.yaml
 
 github-app-id: 123456
-webhook-ip: https://your-domain.com
+webhook-ip: https://your-domain.com/webhook_server # Full URL with path (for smee.io use: https://smee.io/your-channel)
 github-tokens:
   - ghp_your_github_token
 
@@ -245,7 +245,7 @@ auto-verified-and-merged-users:
   - "renovate[bot]"
   - "dependabot[bot]"
 
-auto-verify-cherry-picked-prs: true  # Auto-verify cherry-picked PRs (default: true)
+auto-verify-cherry-picked-prs: true # Auto-verify cherry-picked PRs (default: true)
 
 # Global PR Size Labels (optional)
 pr-size-thresholds:
@@ -708,14 +708,12 @@ The webhook server includes a comprehensive log viewer web interface for monitor
 
 **Memory-Optimized Streaming**: The log viewer uses advanced streaming and chunked processing techniques that replaced traditional bulk loading:
 
-
 - **Constant Memory Usage**: Handles log files of any size with consistent memory footprint
 - **Early Filtering**: Reduces data transfer by filtering at the source before transmission
 - **Streaming Processing**: Real-time log processing without loading entire files into memory
 - **90% Memory Reduction**: Optimized for enterprise environments with gigabytes of log data
 - **Sub-second Response Times**: Fast query responses even with large datasets
 
-
 ### 🔒 Security Warning
 
 **🚨 CRITICAL SECURITY NOTICE**: The log viewer endpoints (`/logs/*`) are **NOT PROTECTED** by
@@ -757,6 +755,7 @@ processing   microsecond     before load     (100-1000       updates
 ```
 
 **Memory Efficiency**:
+
 - **Streaming Parser**: Reads log files line-by-line instead of loading entire files
 - **Early Filtering**: Applies search criteria during parsing to reduce memory usage
 - **Chunked Responses**: Delivers results in small batches for responsive UI
@@ -992,6 +991,7 @@ curl "http://localhost:5000/logs/api/export?format=json&level=ERROR" -o errors.j
 #### Performance Benchmarks
 
 The memory optimization work has achieved:
+
 - **90% reduction** in memory usage compared to bulk loading
 - **Sub-second response times** for filtered queries on multi-GB log files
 - **Constant memory footprint** regardless of log file size
@@ -1006,7 +1006,7 @@ of your GitHub webhook processing workflows.
 ### 🤖 MCP Features
 
 - **Real-time Log Analysis**: AI agents can query, filter, and analyze webhook processing logs
-- **System Monitoring**: Access to health status and system metrics  
+- **System Monitoring**: Access to health status and system metrics
 - **Workflow Analysis**: Programmatic access to PR flow visualization and timing data
 - **Secure Architecture**: Only safe, read-only endpoints exposed to AI agents
 - **Intelligent Troubleshooting**: AI-powered error pattern recognition and debugging assistance
@@ -1023,13 +1023,13 @@ The MCP integration follows a **security-first approach** with strict endpoint i
 
 ### 📡 Available MCP Endpoints
 
-| Endpoint | Description | Use Case |
-|----------|-------------|----------|
-| `/mcp/webhook_server/healthcheck` | Server health status | System monitoring and uptime checks |
-| `/mcp/logs/api/entries` | Historical log data with filtering | Log analysis and debugging |
-| `/mcp/logs/api/export` | Log export functionality | Data analysis and reporting |
-| `/mcp/logs/api/pr-flow/{identifier}` | PR flow visualization data | Workflow analysis and timing |
-| `/mcp/logs/api/workflow-steps/{identifier}` | Workflow timeline data | Performance analysis |
+| Endpoint                                    | Description                        | Use Case                            |
+| ------------------------------------------- | ---------------------------------- | ----------------------------------- |
+| `/mcp/webhook_server/healthcheck`           | Server health status               | System monitoring and uptime checks |
+| `/mcp/logs/api/entries`                     | Historical log data with filtering | Log analysis and debugging          |
+| `/mcp/logs/api/export`                      | Log export functionality           | Data analysis and reporting         |
+| `/mcp/logs/api/pr-flow/{identifier}`        | PR flow visualization data         | Workflow analysis and timing        |
+| `/mcp/logs/api/workflow-steps/{identifier}` | Workflow timeline data             | Performance analysis                |
 
 **Note:** All MCP endpoints are proxied under the `/mcp` mount point. The MCP server creates a separate
 FastAPI app instance that duplicates the core API endpoints while excluding webhook processing, static files,
@@ -1039,7 +1039,7 @@ and HTML pages for security.
 
 **IMPORTANT**: The `/mcp/logs/*` endpoints expose potentially **highly sensitive data** including:
 
-- 🔑 **GitHub Personal Access Tokens** and API credentials  
+- 🔑 **GitHub Personal Access Tokens** and API credentials
 - 👤 **User information and GitHub usernames**
 - 📋 **Repository details and webhook payloads**
 - 🔒 **Internal system information and error details**
@@ -1048,7 +1048,7 @@ and HTML pages for security.
 
 - ✅ Deploy **only on trusted networks** (VPN, internal network)
 - ✅ **Never expose MCP endpoints** directly to the internet
-- ✅ Implement **reverse proxy authentication** for any external access  
+- ✅ Implement **reverse proxy authentication** for any external access
 - ✅ Use **firewall rules** to restrict access to authorized IP ranges only
 - ✅ Monitor and **audit access** to these endpoints
 
@@ -1082,11 +1082,7 @@ http://your-server:5000/mcp
   "mcpServers": {
     "github-webhook-server-logs": {
       "command": "npx",
-      "args": [
-        "mcp-remote",
-        "http://your-server:port/mcp",
-        "--allow-http"
-      ]
+      "args": ["mcp-remote", "http://your-server:port/mcp", "--allow-http"]
     }
   }
 }
@@ -1096,13 +1092,13 @@ http://your-server:5000/mcp
 
 Once configured, you can ask AI agents natural language questions:
 
-- *"Show me recent webhook errors from the last hour"*
-- *"What's the current health status of my webhook server?"*
-- *"Analyze the processing time for PR #123 and identify bottlenecks"*
-- *"Find all webhook failures for repository myorg/myrepo today"*
-- *"Export error logs from the last 24 hours for analysis"*
-- *"Compare processing times between successful and failed webhooks"*
-- *"Show me memory usage patterns in recent webhook processing"*
+- _"Show me recent webhook errors from the last hour"_
+- _"What's the current health status of my webhook server?"_
+- _"Analyze the processing time for PR #123 and identify bottlenecks"_
+- _"Find all webhook failures for repository myorg/myrepo today"_
+- _"Export error logs from the last 24 hours for analysis"_
+- _"Compare processing times between successful and failed webhooks"_
+- _"Show me memory usage patterns in recent webhook processing"_
 
 ### 🎯 Use Cases
 
@@ -1139,18 +1135,18 @@ Users can interact with the webhook server through GitHub comments on pull reque
 
 ### Pull Request Commands
 
-| Command             | Description                                         | Example             |
-| ------------------- | --------------------------------------------------- | ------------------- |
-| `/verified`         | Mark PR as verified                                 | `/verified`         |
-| `/verified cancel`  | Remove verification                                 | `/verified cancel`  |
-| `/hold`             | Block PR merging                                    | `/hold`             |
-| `/hold cancel`      | Unblock PR merging                                  | `/hold cancel`      |
-| `/wip`              | Mark as work in progress                            | `/wip`              |
-| `/wip cancel`       | Remove WIP status                                   | `/wip cancel`       |
-| `/lgtm`             | Approve changes                                     | `/lgtm`             |
-| `/approve`          | Approve PR                                          | `/approve`          |
-| `/assign-reviewers` | Assign OWNERS-based reviewers                       | `/assign-reviewers` |
-| `/check-can-merge`  | Check merge readiness                               | `/check-can-merge`  |
+| Command             | Description                                             | Example             |
+| ------------------- | ------------------------------------------------------- | ------------------- |
+| `/verified`         | Mark PR as verified                                     | `/verified`         |
+| `/verified cancel`  | Remove verification                                     | `/verified cancel`  |
+| `/hold`             | Block PR merging                                        | `/hold`             |
+| `/hold cancel`      | Unblock PR merging                                      | `/hold cancel`      |
+| `/wip`              | Mark as work in progress                                | `/wip`              |
+| `/wip cancel`       | Remove WIP status                                       | `/wip cancel`       |
+| `/lgtm`             | Approve changes                                         | `/lgtm`             |
+| `/approve`          | Approve PR                                              | `/approve`          |
+| `/assign-reviewers` | Assign OWNERS-based reviewers                           | `/assign-reviewers` |
+| `/check-can-merge`  | Check merge readiness                                   | `/check-can-merge`  |
 | `/reprocess`        | Trigger complete PR workflow reprocessing (OWNERS only) | `/reprocess`        |
 
 ### Workflow Management
@@ -1162,6 +1158,7 @@ The `/reprocess` command triggers complete PR workflow reprocessing from scratch
 **Permissions**: Requires user to be in repository OWNERS file (same as `/retest`)
 
 **Use Cases**:
+
 - Webhook delivery failed or was missed
 - Processing interrupted mid-workflow
 - OWNERS file changed and reviewers need reassignment
@@ -1169,6 +1166,7 @@ The `/reprocess` command triggers complete PR workflow reprocessing from scratch
 - PR got into inconsistent state and needs full reset
 
 **Behavior**:
+
 - Re-runs entire PR workflow including reviewer assignment, label updates, check queuing, and CI/CD tests
 - Won't create duplicate welcome messages or tracking issues if they already exist
 - Respects current repository configuration and OWNERS file
@@ -1207,9 +1205,11 @@ Cherry-picked PRs can be automatically verified or require manual verification d
 | `/cherry-pick branch1 branch2` | Cherry-pick to multiple branches | `/cherry-pick v1.0 v2.0` |
 
 **Configuration**: Control auto-verification of cherry-picked PRs:
+
 ```yaml
-auto-verify-cherry-picked-prs: true  # Default: true (auto-verify)
-                                      # Set to false to require manual verification
+auto-verify-cherry-picked-prs:
+  true # Default: true (auto-verify)
+  # Set to false to require manual verification
 ```
 
 ### Label Commands
diff --git a/examples/config.yaml b/examples/config.yaml
index 41fc40843..86f6c39b7 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -14,7 +14,7 @@ github-tokens:
   - <GITHIB TOKEN1>
   - <GITHIB TOKEN2>
 
-webhook-ip: <HTTP://IP OR URL:PORT>
+webhook-ip: <HTTP://IP OR URL:PORT/webhook_server> # Full URL with path (e.g., https://your-domain.com/webhook_server or https://smee.io/your-channel)
 
 docker: # Used to pull images from docker.io
   username: <username>
diff --git a/webhook_server/config/schema.yaml b/webhook_server/config/schema.yaml
index 92d67b263..c6111a9b2 100644
--- a/webhook_server/config/schema.yaml
+++ b/webhook_server/config/schema.yaml
@@ -32,7 +32,7 @@ properties:
       description: Global GitHub token for all repositories
   webhook-ip:
     type: string
-    description: IP or FQDN address of the webhook server for adding to the repositories
+    description: Full webhook URL including path (e.g., https://your-domain.com/webhook_server or https://smee.io/your-channel). Must include the complete path where webhooks will be received.
 
   ip-bind:
     type: string
diff --git a/webhook_server/tests/manifests/config.yaml b/webhook_server/tests/manifests/config.yaml
index da1c14bbd..88b5fb4cf 100644
--- a/webhook_server/tests/manifests/config.yaml
+++ b/webhook_server/tests/manifests/config.yaml
@@ -7,7 +7,7 @@ github-tokens:
   - GITHIB TOKEN1
   - GITHIB TOKEN2
 
-webhook-ip: HTTP://IP OR URL:PORT
+webhook-ip: HTTP://IP OR URL:PORT/webhook_server
 webhook-secret: test-webhook-secret # pragma: allowlist secret
 
 docker: # Used to pull images from docker.io
diff --git a/webhook_server/tests/test_webhook.py b/webhook_server/tests/test_webhook.py
index 245643598..893a4d8d5 100644
--- a/webhook_server/tests/test_webhook.py
+++ b/webhook_server/tests/test_webhook.py
@@ -54,7 +54,7 @@ def test_process_github_webhook_success_no_existing_hooks(
         # Verify webhook creation was called
         mock_repo.create_hook.assert_called_once_with(
             name="web",
-            config={"url": "http://example.com/webhook_server", "content_type": "json"},
+            config={"url": "http://example.com", "content_type": "json"},
             events=["push", "pull_request"],
             active=True,
         )
@@ -84,7 +84,7 @@ def test_process_github_webhook_success_with_secret(
         mock_repo.create_hook.assert_called_once_with(
             name="web",
             config={
-                "url": "http://example.com/webhook_server",
+                "url": "http://example.com",
                 "content_type": "json",
                 "secret": "test-secret",  # pragma: allowlist secret
             },
@@ -109,7 +109,7 @@ def test_process_github_webhook_default_events(
         # Verify default events are used
         mock_repo.create_hook.assert_called_once_with(
             name="web",
-            config={"url": "http://example.com/webhook_server", "content_type": "json"},
+            config={"url": "http://example.com", "content_type": "json"},
             events=["*"],  # Default events
             active=True,
         )
@@ -125,7 +125,7 @@ def test_process_github_webhook_existing_hook_same_config(
         """Test when webhook already exists with same configuration."""
         # Mock existing hook with matching URL
         existing_hook = Mock()
-        existing_hook.config = {"url": "http://example.com/webhook_server", "content_type": "json"}
+        existing_hook.config = {"url": "http://example.com", "content_type": "json"}
         mock_repo.get_hooks.return_value = [existing_hook]
         mock_get_repo_api.return_value = mock_repo
 
@@ -153,7 +153,7 @@ def test_process_github_webhook_secret_mismatch_deletes_old_hook(
         # Mock existing hook without secret, but we're adding a secret
         existing_hook = Mock()
         existing_hook.config = {
-            "url": "http://example.com/webhook_server",
+            "url": "http://example.com",
             "content_type": "json",
             # No secret in existing hook
         }
@@ -177,7 +177,7 @@ def test_process_github_webhook_secret_mismatch_deletes_old_hook(
         mock_repo.create_hook.assert_called_once_with(
             name="web",
             config={
-                "url": "http://example.com/webhook_server",
+                "url": "http://example.com",
                 "content_type": "json",
                 "secret": "new-secret",  # pragma: allowlist secret
             },
@@ -197,7 +197,7 @@ def test_process_github_webhook_secret_removal_deletes_old_hook(
         # Mock existing hook with secret, but we're removing it
         existing_hook = Mock()
         existing_hook.config = {
-            "url": "http://example.com/webhook_server",
+            "url": "http://example.com",
             "content_type": "json",
             "secret": "old-secret",  # pragma: allowlist secret
         }
@@ -220,7 +220,7 @@ def test_process_github_webhook_secret_removal_deletes_old_hook(
         # Verify new hook was created without secret
         mock_repo.create_hook.assert_called_once_with(
             name="web",
-            config={"url": "http://example.com/webhook_server", "content_type": "json"},
+            config={"url": "http://example.com", "content_type": "json"},
             events=["push", "pull_request"],
             active=True,
         )
@@ -291,10 +291,10 @@ def test_process_github_webhook_multiple_existing_hooks(
         """Test handling of multiple existing hooks, only matching ones are affected."""
         # Mock multiple existing hooks
         matching_hook = Mock()
-        matching_hook.config = {"url": "http://example.com/webhook_server", "content_type": "json"}
+        matching_hook.config = {"url": "http://example.com", "content_type": "json"}
 
         non_matching_hook = Mock()
-        non_matching_hook.config = {"url": "http://different.com/webhook_server", "content_type": "json"}
+        non_matching_hook.config = {"url": "http://different.com", "content_type": "json"}
 
         mock_repo.get_hooks.return_value = [matching_hook, non_matching_hook]
         mock_get_repo_api.return_value = mock_repo
diff --git a/webhook_server/utils/webhook.py b/webhook_server/utils/webhook.py
index f905bc0d8..e54ee797a 100644
--- a/webhook_server/utils/webhook.py
+++ b/webhook_server/utils/webhook.py
@@ -32,7 +32,7 @@ def process_github_webhook(
     if not repo:
         return False, f"[API user {api_user}] - Could not find repository {full_repository_name}", LOGGER.error
 
-    config_: dict[str, str] = {"url": f"{webhook_ip}/webhook_server", "content_type": "json"}
+    config_: dict[str, str] = {"url": webhook_ip, "content_type": "json"}
 
     if secret:
         config_["secret"] = secret