From ab958cddb632a55c4c91efcad41c8f700538e88c Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Fri, 19 Dec 2025 15:04:29 +0530 Subject: [PATCH 1/8] fix: using base url of a redirect url --- apps/api/plane/bgtasks/work_item_link_task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 7ceaacaf5ad..029fcc4642d 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -49,9 +49,11 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: soup = None title = None + final_url = url # Use final URL after redirects for favicon resolution try: response = requests.get(url, headers=headers, timeout=1) + final_url = response.url # Get the final URL after any redirects soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title") @@ -60,8 +62,8 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: except requests.RequestException as e: logger.warning(f"Failed to fetch HTML for title: {str(e)}") - # Fetch and encode favicon - favicon_base64 = fetch_and_encode_favicon(headers, soup, url) + # Fetch and encode favicon using final URL (after redirects) + favicon_base64 = fetch_and_encode_favicon(headers, soup, final_url) # Prepare result result = { From ebf9516959301cc73dd6473eba67028da231450c Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 14:18:06 +0530 Subject: [PATCH 2/8] chore: internal networks check for the final_url --- apps/api/plane/bgtasks/work_item_link_task.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 029fcc4642d..a5e8a2707b5 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -31,17 +31,6 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: str: JSON string containing title and base64-encoded favicon """ try: - # Prevent access to private IP ranges - parsed = urlparse(url) - - try: - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") - except ValueError: - # Not an IP address, continue with domain validation - pass - # Set up headers to mimic a real browser headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa: E501 @@ -49,12 +38,22 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: soup = None title = None - final_url = url # Use final URL after redirects for favicon resolution + final_url = None try: response = requests.get(url, headers=headers, timeout=1) final_url = response.url # Get the final URL after any redirects + parsed = urlparse(final_url) + + try: + ip = ipaddress.ip_address(parsed.hostname) + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") + except ValueError: + # Not an IP address, continue with domain validation + pass + soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title") title = title_tag.get_text().strip() if title_tag else None From 267981b590f32a6138a1e181a8ddc481ae03ca99 Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 14:29:42 +0530 Subject: [PATCH 3/8] fix: none final_url --- apps/api/plane/bgtasks/work_item_link_task.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index a5e8a2707b5..00f0165f233 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -38,7 +38,17 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: soup = None title = None - final_url = None + final_url = url + + parsed = urlparse(url) + + try: + ip = ipaddress.ip_address(parsed.hostname) + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") + except ValueError: + # Not an IP address, continue with domain validation + pass try: response = requests.get(url, headers=headers, timeout=1) From 2507fce3b230ce33c20713bddbd0f22e7f3b3337 Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 14:45:22 +0530 Subject: [PATCH 4/8] fix: exception handling --- apps/api/plane/bgtasks/work_item_link_task.py | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 00f0165f233..07679a985f9 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -1,5 +1,6 @@ # Python imports import logging +import socket # Third party imports @@ -20,6 +21,26 @@ DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501 +def is_private_ip(hostname: str) -> bool: + """ + Check if a hostname resolves to a private/internal IP address. + + Args: + hostname: The hostname to check + + Returns: + bool: True if the hostname resolves to a private IP, False otherwise + """ + try: + # Resolve hostname to IP address + ip_str = socket.gethostbyname(hostname) + ip = ipaddress.ip_address(ip_str) + return ip.is_private or ip.is_loopback or ip.is_reserved + except (socket.gaierror, ValueError): + # If we can't resolve the hostname, allow it (will fail later on request) + return False + + def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: """ Crawls a URL to extract the title and favicon. @@ -42,13 +63,8 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: parsed = urlparse(url) - try: - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") - except ValueError: - # Not an IP address, continue with domain validation - pass + if is_private_ip(parsed.hostname): + raise ValueError("Access to private/internal networks is not allowed") try: response = requests.get(url, headers=headers, timeout=1) @@ -56,13 +72,8 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: parsed = urlparse(final_url) - try: - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") - except ValueError: - # Not an IP address, continue with domain validation - pass + if is_private_ip(parsed.hostname): + raise ValueError("Access to private/internal networks is not allowed") soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title") From 94bc7eac8928f62b7bf473ddc39fd9e3c495d25d Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 15:09:07 +0530 Subject: [PATCH 5/8] fix: exception handling --- apps/api/plane/bgtasks/work_item_link_task.py | 44 +++++++------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 07679a985f9..0915ec6c046 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -21,26 +21,6 @@ DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501 -def is_private_ip(hostname: str) -> bool: - """ - Check if a hostname resolves to a private/internal IP address. - - Args: - hostname: The hostname to check - - Returns: - bool: True if the hostname resolves to a private IP, False otherwise - """ - try: - # Resolve hostname to IP address - ip_str = socket.gethostbyname(hostname) - ip = ipaddress.ip_address(ip_str) - return ip.is_private or ip.is_loopback or ip.is_reserved - except (socket.gaierror, ValueError): - # If we can't resolve the hostname, allow it (will fail later on request) - return False - - def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: """ Crawls a URL to extract the title and favicon. @@ -61,19 +41,29 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: title = None final_url = url - parsed = urlparse(url) + parsed = urlparse(final_url) - if is_private_ip(parsed.hostname): - raise ValueError("Access to private/internal networks is not allowed") + try: + ip = ipaddress.ip_address(parsed.hostname) + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") + except ValueError: + # Not an IP address, continue with domain validation + pass try: - response = requests.get(url, headers=headers, timeout=1) + response = requests.get(final_url, headers=headers, timeout=1) final_url = response.url # Get the final URL after any redirects - parsed = urlparse(final_url) + try: + parsed = urlparse(final_url) - if is_private_ip(parsed.hostname): - raise ValueError("Access to private/internal networks is not allowed") + ip = ipaddress.ip_address(parsed.hostname) + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") + except ValueError: + # Not an IP address, continue with domain validation + pass soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title") From c88f4e0f0ae67d3b460bbefc47f38e51aba391f3 Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 15:14:53 +0530 Subject: [PATCH 6/8] chore: remove unused imports --- apps/api/plane/bgtasks/work_item_link_task.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 0915ec6c046..63df3ee7f90 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -1,7 +1,5 @@ # Python imports import logging -import socket - # Third party imports from celery import shared_task From 9d4d5f9690f2bff5d54c3d1b684ee1940cffa16e Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 15:31:04 +0530 Subject: [PATCH 7/8] refactor: moved ip address check logic into separate function --- apps/api/plane/bgtasks/work_item_link_task.py | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 63df3ee7f90..021fbb9585b 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -19,6 +19,18 @@ DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501 +def is_ip_address(url): + parsed = urlparse(url) + + try: + ip = ipaddress.ip_address(parsed.hostname) + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") + except ValueError: + # Not an IP address, continue with domain validation + pass + + def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: """ Crawls a URL to extract the title and favicon. @@ -39,29 +51,14 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: title = None final_url = url - parsed = urlparse(final_url) - - try: - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") - except ValueError: - # Not an IP address, continue with domain validation - pass + is_ip_address(final_url) try: response = requests.get(final_url, headers=headers, timeout=1) final_url = response.url # Get the final URL after any redirects - try: - parsed = urlparse(final_url) - - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") - except ValueError: - # Not an IP address, continue with domain validation - pass + # check for redirected url also + is_ip_address(final_url) soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title") From c60ae296fc149b7cd070aaacc722f1b3bd7cb623 Mon Sep 17 00:00:00 2001 From: sangeethailango Date: Wed, 24 Dec 2025 16:17:48 +0530 Subject: [PATCH 8/8] fix: ValueError logic --- apps/api/plane/bgtasks/work_item_link_task.py | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/apps/api/plane/bgtasks/work_item_link_task.py b/apps/api/plane/bgtasks/work_item_link_task.py index 021fbb9585b..e436c1e8f85 100644 --- a/apps/api/plane/bgtasks/work_item_link_task.py +++ b/apps/api/plane/bgtasks/work_item_link_task.py @@ -19,16 +19,32 @@ DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501 -def is_ip_address(url): +def validate_url_ip(url: str) -> None: + """ + Validate that a URL doesn't point to a private/internal IP address. + Only checks if the hostname is a direct IP address. + + Args: + url: The URL to validate + + Raises: + ValueError: If the URL points to a private/internal IP + """ parsed = urlparse(url) + hostname = parsed.hostname + + if not hostname: + return try: - ip = ipaddress.ip_address(parsed.hostname) - if ip.is_private or ip.is_loopback or ip.is_reserved: - raise ValueError("Access to private/internal networks is not allowed") + ip = ipaddress.ip_address(hostname) except ValueError: - # Not an IP address, continue with domain validation - pass + # Not an IP address (it's a domain name), nothing to check here + return + + # It IS an IP address - check if it's private/internal + if ip.is_private or ip.is_loopback or ip.is_reserved: + raise ValueError("Access to private/internal networks is not allowed") def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: @@ -51,14 +67,14 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]: title = None final_url = url - is_ip_address(final_url) + validate_url_ip(final_url) try: response = requests.get(final_url, headers=headers, timeout=1) final_url = response.url # Get the final URL after any redirects # check for redirected url also - is_ip_address(final_url) + validate_url_ip(final_url) soup = BeautifulSoup(response.content, "html.parser") title_tag = soup.find("title")