From c14fea91cc418ebbf468454acbffd64b2c60f755 Mon Sep 17 00:00:00 2001
From: pablohashescobar <nikhilschacko@gmail.com>
Date: Mon, 5 May 2025 22:11:45 +0530
Subject: [PATCH] chore: correct live urls in background tasks

---
 apiserver/plane/bgtasks/copy_s3_object.py | 14 ++++--------
 apiserver/plane/utils/url.py              | 27 ++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/apiserver/plane/bgtasks/copy_s3_object.py b/apiserver/plane/bgtasks/copy_s3_object.py
index 97287339619..a92d7fe4e76 100644
--- a/apiserver/plane/bgtasks/copy_s3_object.py
+++ b/apiserver/plane/bgtasks/copy_s3_object.py
@@ -3,7 +3,7 @@
 import base64
 import requests
 from bs4 import BeautifulSoup
-from urllib.parse import urljoin
+
 # Django imports
 from django.conf import settings
 
@@ -12,7 +12,7 @@
 from plane.utils.exception_logger import log_exception
 from plane.settings.storage import S3Storage
 from celery import shared_task
-from plane.utils.url import get_url_components
+from plane.utils.url import normalize_url_path
 
 
 def get_entity_id_field(entity_type, entity_id):
@@ -69,17 +69,11 @@ def sync_with_external_service(entity_name, description_html):
             "variant": "rich" if entity_name == "PAGE" else "document",
         }
 
-        if not settings.LIVE_URL:
-            return {}
-
-        live_url = get_url_components(settings.LIVE_URL)
+        live_url = settings.LIVE_URL
         if not live_url:
             return {}
 
-        base_url = (
-            f"{live_url.get('scheme')}://{live_url.get('netloc')}{live_url.get('path')}"
-        )
-        url = urljoin(base_url, "convert-document/")
+        url = normalize_url_path(f"{live_url}/convert-document/")
 
         response = requests.post(url, json=data, headers=None)
         if response.status_code == 200:
diff --git a/apiserver/plane/utils/url.py b/apiserver/plane/utils/url.py
index 0658572bfe8..e485f93df61 100644
--- a/apiserver/plane/utils/url.py
+++ b/apiserver/plane/utils/url.py
@@ -1,6 +1,7 @@
 # Python imports
+import re
 from typing import Optional
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlunparse
 
 
 def is_valid_url(url: str) -> bool:
@@ -52,3 +53,27 @@ def get_url_components(url: str) -> Optional[dict]:
         "query": result.query,
         "fragment": result.fragment,
     }
+
+
+def normalize_url_path(url: str) -> str:
+    """
+    Normalize the path component of a URL by replacing multiple consecutive slashes with a single slash.
+
+    This function preserves the protocol, domain, query parameters, and fragments of the URL,
+    only modifying the path portion to ensure there are no duplicate slashes.
+
+    Args:
+        url (str): The input URL string to normalize.
+
+    Returns:
+        str: The normalized URL with redundant slashes in the path removed.
+
+    Example:
+        >>> normalize_url_path('https://example.com//foo///bar//baz?x=1#frag')
+        'https://example.com/foo/bar/baz?x=1#frag'
+    """
+    parts = urlparse(url)
+    # Normalize the path
+    normalized_path = re.sub(r"/+", "/", parts.path)
+    # Reconstruct the URL
+    return urlunparse(parts._replace(path=normalized_path))