From d9c9e1b7ac1229bb03cc8c11e19a392a3a9097c5 Mon Sep 17 00:00:00 2001 From: NarayanBavisetti Date: Mon, 25 Aug 2025 13:29:25 +0530 Subject: [PATCH 1/3] chore: changed the html validation --- apps/api/plane/api/serializers/issue.py | 7 +- apps/api/plane/api/serializers/project.py | 5 +- apps/api/plane/app/serializers/draft.py | 7 +- apps/api/plane/app/serializers/issue.py | 7 +- apps/api/plane/app/serializers/page.py | 5 +- apps/api/plane/app/serializers/project.py | 5 +- apps/api/plane/app/serializers/workspace.py | 7 +- apps/api/plane/space/serializer/issue.py | 7 +- apps/api/plane/utils/content_validator.py | 107 ++------------------ apps/api/plane/utils/test_nh3.py | 88 ++++++++++++++++ 10 files changed, 140 insertions(+), 105 deletions(-) create mode 100644 apps/api/plane/utils/test_nh3.py diff --git a/apps/api/plane/api/serializers/issue.py b/apps/api/plane/api/serializers/issue.py index 69c35624658..5a347d2cb05 100644 --- a/apps/api/plane/api/serializers/issue.py +++ b/apps/api/plane/api/serializers/issue.py @@ -95,9 +95,14 @@ def validate(self, data): raise serializers.ValidationError({"description": error_msg}) if data.get("description_html"): - is_valid, error_msg = validate_html_content(data["description_html"]) + is_valid, error_msg, sanitized_html = validate_html_content( + data["description_html"] + ) if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html if data.get("description_binary"): is_valid, error_msg = validate_binary_data(data["description_binary"]) diff --git a/apps/api/plane/api/serializers/project.py b/apps/api/plane/api/serializers/project.py index e6a257f3eb4..edc782bb04e 100644 --- a/apps/api/plane/api/serializers/project.py +++ b/apps/api/plane/api/serializers/project.py @@ -216,9 +216,12 @@ def validate(self, data): if isinstance(data["description_html"], dict): is_valid, error_msg = validate_json_content(data["description_html"]) else: - is_valid, error_msg = validate_html_content( + is_valid, error_msg, sanitized_html = validate_html_content( str(data["description_html"]) ) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) diff --git a/apps/api/plane/app/serializers/draft.py b/apps/api/plane/app/serializers/draft.py index 38fa65527b9..49d62da807b 100644 --- a/apps/api/plane/app/serializers/draft.py +++ b/apps/api/plane/app/serializers/draft.py @@ -82,9 +82,14 @@ def validate(self, attrs): raise serializers.ValidationError({"description": error_msg}) if "description_html" in attrs and attrs["description_html"]: - is_valid, error_msg = validate_html_content(attrs["description_html"]) + is_valid, error_msg, sanitized_html = validate_html_content( + attrs["description_html"] + ) if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) + # Update the attrs with sanitized HTML if available + if sanitized_html is not None: + attrs["description_html"] = sanitized_html if "description_binary" in attrs and attrs["description_binary"]: is_valid, error_msg = validate_binary_data(attrs["description_binary"]) diff --git a/apps/api/plane/app/serializers/issue.py b/apps/api/plane/app/serializers/issue.py index 691140eba03..c03d5502995 100644 --- a/apps/api/plane/app/serializers/issue.py +++ b/apps/api/plane/app/serializers/issue.py @@ -134,9 +134,14 @@ def validate(self, attrs): raise serializers.ValidationError({"description": error_msg}) if "description_html" in attrs and attrs["description_html"]: - is_valid, error_msg = validate_html_content(attrs["description_html"]) + is_valid, error_msg, sanitized_html = validate_html_content( + attrs["description_html"] + ) if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) + # Update the attrs with sanitized HTML if available + if sanitized_html is not None: + attrs["description_html"] = sanitized_html if "description_binary" in attrs and attrs["description_binary"]: is_valid, error_msg = validate_binary_data(attrs["description_binary"]) diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py index 78762e4b4e1..29ed3510b09 100644 --- a/apps/api/plane/app/serializers/page.py +++ b/apps/api/plane/app/serializers/page.py @@ -229,11 +229,12 @@ def validate_description_html(self, value): return value # Use the validation function from utils - is_valid, error_message = validate_html_content(value) + is_valid, error_message, sanitized_html = validate_html_content(value) if not is_valid: raise serializers.ValidationError(error_message) - return value + # Return sanitized HTML if available, otherwise return original + return sanitized_html if sanitized_html is not None else value def validate_description(self, value): """Validate the JSON description""" diff --git a/apps/api/plane/app/serializers/project.py b/apps/api/plane/app/serializers/project.py index dfa541d9f17..c222043872c 100644 --- a/apps/api/plane/app/serializers/project.py +++ b/apps/api/plane/app/serializers/project.py @@ -81,9 +81,12 @@ def validate(self, data): if isinstance(data["description_html"], dict): is_valid, error_msg = validate_json_content(data["description_html"]) else: - is_valid, error_msg = validate_html_content( + is_valid, error_msg, sanitized_html = validate_html_content( str(data["description_html"]) ) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) diff --git a/apps/api/plane/app/serializers/workspace.py b/apps/api/plane/app/serializers/workspace.py index ec4c4bf63e0..d1381b22d8e 100644 --- a/apps/api/plane/app/serializers/workspace.py +++ b/apps/api/plane/app/serializers/workspace.py @@ -325,9 +325,14 @@ def validate(self, data): raise serializers.ValidationError({"description": error_msg}) if "description_html" in data and data["description_html"]: - is_valid, error_msg = validate_html_content(data["description_html"]) + is_valid, error_msg, sanitized_html = validate_html_content( + data["description_html"] + ) if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html if "description_binary" in data and data["description_binary"]: is_valid, error_msg = validate_binary_data(data["description_binary"]) diff --git a/apps/api/plane/space/serializer/issue.py b/apps/api/plane/space/serializer/issue.py index 3549e76262c..8de6d000ebd 100644 --- a/apps/api/plane/space/serializer/issue.py +++ b/apps/api/plane/space/serializer/issue.py @@ -296,9 +296,14 @@ def validate(self, data): raise serializers.ValidationError({"description": error_msg}) if "description_html" in data and data["description_html"]: - is_valid, error_msg = validate_html_content(data["description_html"]) + is_valid, error_msg, sanitized_html = validate_html_content( + data["description_html"] + ) if not is_valid: raise serializers.ValidationError({"description_html": error_msg}) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html if "description_binary" in data and data["description_binary"]: is_valid, error_msg = validate_binary_data(data["description_binary"]) diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py index d28b83fc700..81ee5019a62 100644 --- a/apps/api/plane/utils/content_validator.py +++ b/apps/api/plane/utils/content_validator.py @@ -2,6 +2,7 @@ import base64 import json import re +import nh3 # Maximum allowed size for binary data (10MB) @@ -41,69 +42,6 @@ "]*>", - r"", - # JavaScript URLs in various attributes - r'(?:href|src|action)\s*=\s*["\']?\s*javascript:', - # Data URLs with text/html (potential XSS) - r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html', - # Dangerous event handlers with JavaScript-like content - r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?', - # Object and embed tags that could load external content - r"<(?:object|embed)[^>]*(?:data|src)\s*=", - # Base tag that could change relative URL resolution - r"]*href\s*=", - # Dangerous iframe sources - r']*src\s*=\s*["\']?(?:javascript:|data:text/html)', - # Meta refresh redirects - r']*http-equiv\s*=\s*["\']?refresh["\']?', - # Link tags - simplified patterns - r']*rel\s*=\s*["\']?stylesheet["\']?', - r']*href\s*=\s*["\']?https?://', - r']*href\s*=\s*["\']?//', - r']*href\s*=\s*["\']?(?:data:|javascript:)', - # Style tags with external imports - r"]*>.*?@import.*?(?:https?://|//)", - # Link tags with dangerous rel types - r']*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?', - # Forms with action attributes - r"]*action\s*=", -] - -# Dangerous JavaScript patterns for event handlers -DANGEROUS_JS_PATTERNS = [ - r"alert\s*\(", - r"eval\s*\(", - r"document\s*\.", - r"window\s*\.", - r"location\s*\.", - r"fetch\s*\(", - r"XMLHttpRequest", - r"innerHTML\s*=", - r"outerHTML\s*=", - r"document\.write", - r"script\s*>", -] - -# HTML self-closing tags that don't need closing tags -SELF_CLOSING_TAGS = { - "img", - "br", - "hr", - "input", - "meta", - "link", - "area", - "base", - "col", - "embed", - "source", - "track", - "wbr", -} def validate_binary_data(data): @@ -149,46 +87,23 @@ def validate_binary_data(data): return True, None -def validate_html_content(html_content): +def validate_html_content(html_content: str): """ - Validate that HTML content is safe and doesn't contain malicious patterns. - - Args: - html_content (str): The HTML content to validate - - Returns: - tuple: (is_valid: bool, error_message: str or None) + Sanitize HTML content using nh3. + Returns a tuple: (is_valid, error_message, clean_html) """ if not html_content: - return True, None # Empty is OK + return True, None, None # Size check - 10MB limit (consistent with binary validation) if len(html_content.encode("utf-8")) > MAX_SIZE: - return False, "HTML content exceeds maximum size limit (10MB)" - - # Check for specific malicious patterns (simplified and more reliable) - for pattern in MALICIOUS_HTML_PATTERNS: - if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL): - return ( - False, - f"HTML content contains potentially malicious patterns: {pattern}", - ) - - # Additional check for inline event handlers that contain suspicious content - # This is more permissive - only blocks if the event handler contains actual dangerous code - event_handler_pattern = r'on\w+\s*=\s*["\']([^"\']*)["\']' - event_matches = re.findall(event_handler_pattern, html_content, re.IGNORECASE) - - for handler_content in event_matches: - for js_pattern in DANGEROUS_JS_PATTERNS: - if re.search(js_pattern, handler_content, re.IGNORECASE): - return ( - False, - f"HTML content contains dangerous JavaScript in event handler: {handler_content[:100]}", - ) - + return False, "HTML content exceeds maximum size limit (10MB)", None - return True, None + try: + clean_html = nh3.clean(html_content) + return True, None, clean_html + except Exception as e: + return False, f"Failed to sanitize HTML: {str(e)}", None def validate_json_content(json_content): diff --git a/apps/api/plane/utils/test_nh3.py b/apps/api/plane/utils/test_nh3.py new file mode 100644 index 00000000000..8c1395bd566 --- /dev/null +++ b/apps/api/plane/utils/test_nh3.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Simple test script for the nh3 HTML sanitization. +Run this to test the HTML sanitization functionality. +""" + +import sys +import os + +# Add the parent directory to the path so we can import the module +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +try: + from content_validator import validate_html_content + + print("โœ… Successfully imported validate_html_content") +except ImportError as e: + print(f"โŒ Failed to import: {e}") + sys.exit(1) + + +def test_html_sanitization(): + """Test the HTML sanitization functionality""" + + print("\nTesting HTML content validation and sanitization...") + print("=" * 60) + + # Test cases + test_cases = [ + # Safe HTML + ("

Hello World

", "Safe HTML - should be valid"), + ("
Bold text
", "Safe HTML with formatting"), + ("Link", "Safe HTML with link"), + # Malicious HTML that should be sanitized + ("

Hello

", "Script tag should be removed"), + ("

Click me

", "onclick attribute should be removed"), + ("", "iframe should be removed"), + ("", "onerror attribute should be removed"), + # Mixed content + ( + "

Safe text

More safe text
", + "Mixed content should be sanitized", + ), + # Empty content + ("", "Empty content should be valid"), + (None, "None content should be valid"), + ] + + passed = 0 + total = len(test_cases) + + for html_content, description in test_cases: + print(f"\n๐Ÿงช Test: {description}") + print(f"๐Ÿ“ฅ Input: {html_content}") + + try: + is_valid, error_msg, sanitized_html = validate_html_content(html_content) + + if is_valid: + print(f"โœ… Valid: {is_valid}") + print(f"๐Ÿ“ Error: {error_msg}") + print(f"๐Ÿงน Sanitized: {sanitized_html}") + + if html_content and sanitized_html != html_content: + print("๐Ÿ”’ Content was sanitized (malicious parts removed)") + else: + print("โœ… Content was already safe (no sanitization needed)") + passed += 1 + else: + print(f"โŒ Invalid: {is_valid}") + print(f"๐Ÿšจ Error: {error_msg}") + print(f"๐Ÿงน Sanitized: {sanitized_html}") + + except Exception as e: + print(f"๐Ÿ’ฅ Exception: {e}") + + print("-" * 40) + + print(f"\n๐Ÿ“Š Results: {passed}/{total} tests passed") + + if passed == total: + print("๐ŸŽ‰ All tests passed! HTML sanitization is working correctly.") + else: + print("โš ๏ธ Some tests failed. Please check the implementation.") + + +if __name__ == "__main__": + test_html_sanitization() From 41e35c79fea22bfdce8cbae0e805ac7f93923d2a Mon Sep 17 00:00:00 2001 From: NarayanBavisetti Date: Tue, 26 Aug 2025 13:55:58 +0530 Subject: [PATCH 2/3] chore: added requirements for nh3 --- apps/api/plane/utils/content_validator.py | 6 +- apps/api/plane/utils/test_nh3.py | 88 ----------------------- apps/api/requirements/base.txt | 4 +- 3 files changed, 6 insertions(+), 92 deletions(-) delete mode 100644 apps/api/plane/utils/test_nh3.py diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py index 81ee5019a62..c2d6d066d6f 100644 --- a/apps/api/plane/utils/content_validator.py +++ b/apps/api/plane/utils/content_validator.py @@ -3,7 +3,7 @@ import json import re import nh3 - +from plane.utils.exception_logger import log_exception # Maximum allowed size for binary data (10MB) MAX_SIZE = 10 * 1024 * 1024 @@ -43,7 +43,6 @@ ] - def validate_binary_data(data): """ Validate that binary data appears to be valid document format and doesn't contain malicious content. @@ -103,7 +102,8 @@ def validate_html_content(html_content: str): clean_html = nh3.clean(html_content) return True, None, clean_html except Exception as e: - return False, f"Failed to sanitize HTML: {str(e)}", None + log_exception(e) + return False, "Failed to sanitize HTML", None def validate_json_content(json_content): diff --git a/apps/api/plane/utils/test_nh3.py b/apps/api/plane/utils/test_nh3.py deleted file mode 100644 index 8c1395bd566..00000000000 --- a/apps/api/plane/utils/test_nh3.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test script for the nh3 HTML sanitization. -Run this to test the HTML sanitization functionality. -""" - -import sys -import os - -# Add the parent directory to the path so we can import the module -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -try: - from content_validator import validate_html_content - - print("โœ… Successfully imported validate_html_content") -except ImportError as e: - print(f"โŒ Failed to import: {e}") - sys.exit(1) - - -def test_html_sanitization(): - """Test the HTML sanitization functionality""" - - print("\nTesting HTML content validation and sanitization...") - print("=" * 60) - - # Test cases - test_cases = [ - # Safe HTML - ("

Hello World

", "Safe HTML - should be valid"), - ("
Bold text
", "Safe HTML with formatting"), - ("Link", "Safe HTML with link"), - # Malicious HTML that should be sanitized - ("

Hello

", "Script tag should be removed"), - ("

Click me

", "onclick attribute should be removed"), - ("", "iframe should be removed"), - ("", "onerror attribute should be removed"), - # Mixed content - ( - "

Safe text

More safe text
", - "Mixed content should be sanitized", - ), - # Empty content - ("", "Empty content should be valid"), - (None, "None content should be valid"), - ] - - passed = 0 - total = len(test_cases) - - for html_content, description in test_cases: - print(f"\n๐Ÿงช Test: {description}") - print(f"๐Ÿ“ฅ Input: {html_content}") - - try: - is_valid, error_msg, sanitized_html = validate_html_content(html_content) - - if is_valid: - print(f"โœ… Valid: {is_valid}") - print(f"๐Ÿ“ Error: {error_msg}") - print(f"๐Ÿงน Sanitized: {sanitized_html}") - - if html_content and sanitized_html != html_content: - print("๐Ÿ”’ Content was sanitized (malicious parts removed)") - else: - print("โœ… Content was already safe (no sanitization needed)") - passed += 1 - else: - print(f"โŒ Invalid: {is_valid}") - print(f"๐Ÿšจ Error: {error_msg}") - print(f"๐Ÿงน Sanitized: {sanitized_html}") - - except Exception as e: - print(f"๐Ÿ’ฅ Exception: {e}") - - print("-" * 40) - - print(f"\n๐Ÿ“Š Results: {passed}/{total} tests passed") - - if passed == total: - print("๐ŸŽ‰ All tests passed! HTML sanitization is working correctly.") - else: - print("โš ๏ธ Some tests failed. Please check the implementation.") - - -if __name__ == "__main__": - test_html_sanitization() diff --git a/apps/api/requirements/base.txt b/apps/api/requirements/base.txt index 78e9efed343..8c1a5beb35c 100644 --- a/apps/api/requirements/base.txt +++ b/apps/api/requirements/base.txt @@ -66,4 +66,6 @@ opentelemetry-sdk==1.28.1 opentelemetry-instrumentation-django==0.49b1 opentelemetry-exporter-otlp==1.28.1 # OpenAPI Specification -drf-spectacular==0.28.0 \ No newline at end of file +drf-spectacular==0.28.0 +# html sanitizer +nh3==0.2.18 \ No newline at end of file From 1a9f3817b2dbdc4367f7f2aa797854544e4fe328 Mon Sep 17 00:00:00 2001 From: NarayanBavisetti Date: Tue, 26 Aug 2025 14:28:23 +0530 Subject: [PATCH 3/3] chore: removed the json validations --- apps/api/plane/api/serializers/issue.py | 14 +- apps/api/plane/api/serializers/project.py | 19 +-- apps/api/plane/app/serializers/draft.py | 14 +- apps/api/plane/app/serializers/issue.py | 14 +- apps/api/plane/app/serializers/page.py | 12 -- apps/api/plane/app/serializers/project.py | 33 ++-- apps/api/plane/app/serializers/workspace.py | 14 +- apps/api/plane/space/serializer/issue.py | 12 +- apps/api/plane/utils/content_validator.py | 174 -------------------- 9 files changed, 41 insertions(+), 265 deletions(-) diff --git a/apps/api/plane/api/serializers/issue.py b/apps/api/plane/api/serializers/issue.py index 5a347d2cb05..075823cbfed 100644 --- a/apps/api/plane/api/serializers/issue.py +++ b/apps/api/plane/api/serializers/issue.py @@ -24,7 +24,6 @@ ) from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) @@ -89,17 +88,14 @@ def validate(self, data): raise serializers.ValidationError("Invalid HTML passed") # Validate description content for security - if data.get("description"): - is_valid, error_msg = validate_json_content(data["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - if data.get("description_html"): is_valid, error_msg, sanitized_html = validate_html_content( data["description_html"] ) if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) # Update the data with sanitized HTML if available if sanitized_html is not None: data["description_html"] = sanitized_html @@ -107,7 +103,9 @@ def validate(self, data): if data.get("description_binary"): is_valid, error_msg = validate_binary_data(data["description_binary"]) if not is_valid: - raise serializers.ValidationError({"description_binary": error_msg}) + raise serializers.ValidationError( + {"description_binary": "Invalid binary data"} + ) # Validate assignees are from project if data.get("assignees", []): diff --git a/apps/api/plane/api/serializers/project.py b/apps/api/plane/api/serializers/project.py index edc782bb04e..d860c46b2d2 100644 --- a/apps/api/plane/api/serializers/project.py +++ b/apps/api/plane/api/serializers/project.py @@ -12,7 +12,6 @@ from plane.utils.content_validator import ( validate_html_content, - validate_json_content, ) from .base import BaseSerializer @@ -200,22 +199,8 @@ def validate(self, data): ) # Validate description content for security - if "description" in data and data["description"]: - # For Project, description might be text field, not JSON - if isinstance(data["description"], dict): - is_valid, error_msg = validate_json_content(data["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - - if "description_text" in data and data["description_text"]: - is_valid, error_msg = validate_json_content(data["description_text"]) - if not is_valid: - raise serializers.ValidationError({"description_text": error_msg}) - if "description_html" in data and data["description_html"]: if isinstance(data["description_html"], dict): - is_valid, error_msg = validate_json_content(data["description_html"]) - else: is_valid, error_msg, sanitized_html = validate_html_content( str(data["description_html"]) ) @@ -223,7 +208,9 @@ def validate(self, data): if sanitized_html is not None: data["description_html"] = sanitized_html if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) return data diff --git a/apps/api/plane/app/serializers/draft.py b/apps/api/plane/app/serializers/draft.py index 49d62da807b..852caf8bf36 100644 --- a/apps/api/plane/app/serializers/draft.py +++ b/apps/api/plane/app/serializers/draft.py @@ -23,7 +23,6 @@ ) from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) from plane.app.permissions import ROLE @@ -76,17 +75,14 @@ def validate(self, attrs): raise serializers.ValidationError("Start date cannot exceed target date") # Validate description content for security - if "description" in attrs and attrs["description"]: - is_valid, error_msg = validate_json_content(attrs["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - if "description_html" in attrs and attrs["description_html"]: is_valid, error_msg, sanitized_html = validate_html_content( attrs["description_html"] ) if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) # Update the attrs with sanitized HTML if available if sanitized_html is not None: attrs["description_html"] = sanitized_html @@ -94,7 +90,9 @@ def validate(self, attrs): if "description_binary" in attrs and attrs["description_binary"]: is_valid, error_msg = validate_binary_data(attrs["description_binary"]) if not is_valid: - raise serializers.ValidationError({"description_binary": error_msg}) + raise serializers.ValidationError( + {"description_binary": "Invalid binary data"} + ) # Validate assignees are from project if attrs.get("assignee_ids", []): diff --git a/apps/api/plane/app/serializers/issue.py b/apps/api/plane/app/serializers/issue.py index c03d5502995..1eda376015b 100644 --- a/apps/api/plane/app/serializers/issue.py +++ b/apps/api/plane/app/serializers/issue.py @@ -43,7 +43,6 @@ ) from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) @@ -128,17 +127,14 @@ def validate(self, attrs): raise serializers.ValidationError("Start date cannot exceed target date") # Validate description content for security - if "description" in attrs and attrs["description"]: - is_valid, error_msg = validate_json_content(attrs["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - if "description_html" in attrs and attrs["description_html"]: is_valid, error_msg, sanitized_html = validate_html_content( attrs["description_html"] ) if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) # Update the attrs with sanitized HTML if available if sanitized_html is not None: attrs["description_html"] = sanitized_html @@ -146,7 +142,9 @@ def validate(self, attrs): if "description_binary" in attrs and attrs["description_binary"]: is_valid, error_msg = validate_binary_data(attrs["description_binary"]) if not is_valid: - raise serializers.ValidationError({"description_binary": error_msg}) + raise serializers.ValidationError( + {"description_binary": "Invalid binary data"} + ) # Validate assignees are from project if attrs.get("assignee_ids", []): diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py index 29ed3510b09..9ac6cc414f8 100644 --- a/apps/api/plane/app/serializers/page.py +++ b/apps/api/plane/app/serializers/page.py @@ -7,7 +7,6 @@ from plane.utils.content_validator import ( validate_binary_data, validate_html_content, - validate_json_content, ) from plane.db.models import ( Page, @@ -236,17 +235,6 @@ def validate_description_html(self, value): # Return sanitized HTML if available, otherwise return original return sanitized_html if sanitized_html is not None else value - def validate_description(self, value): - """Validate the JSON description""" - if not value: - return value - - # Use the validation function from utils - is_valid, error_message = validate_json_content(value) - if not is_valid: - raise serializers.ValidationError(error_message) - - return value def update(self, instance, validated_data): """Update the page instance with validated data""" diff --git a/apps/api/plane/app/serializers/project.py b/apps/api/plane/app/serializers/project.py index c222043872c..1d1ea927d1c 100644 --- a/apps/api/plane/app/serializers/project.py +++ b/apps/api/plane/app/serializers/project.py @@ -15,7 +15,6 @@ ) from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) @@ -65,30 +64,18 @@ def validate_identifier(self, identifier): def validate(self, data): # Validate description content for security - if "description" in data and data["description"]: - # For Project, description might be text field, not JSON - if isinstance(data["description"], dict): - is_valid, error_msg = validate_json_content(data["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - - if "description_text" in data and data["description_text"]: - is_valid, error_msg = validate_json_content(data["description_text"]) - if not is_valid: - raise serializers.ValidationError({"description_text": error_msg}) - if "description_html" in data and data["description_html"]: - if isinstance(data["description_html"], dict): - is_valid, error_msg = validate_json_content(data["description_html"]) - else: - is_valid, error_msg, sanitized_html = validate_html_content( - str(data["description_html"]) - ) - # Update the data with sanitized HTML if available - if sanitized_html is not None: - data["description_html"] = sanitized_html + is_valid, error_msg, sanitized_html = validate_html_content( + str(data["description_html"]) + ) + # Update the data with sanitized HTML if available + if sanitized_html is not None: + data["description_html"] = sanitized_html + if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) return data diff --git a/apps/api/plane/app/serializers/workspace.py b/apps/api/plane/app/serializers/workspace.py index d1381b22d8e..6b22f59e83d 100644 --- a/apps/api/plane/app/serializers/workspace.py +++ b/apps/api/plane/app/serializers/workspace.py @@ -26,7 +26,6 @@ from plane.utils.url import contains_url from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) @@ -319,17 +318,14 @@ class Meta: def validate(self, data): # Validate description content for security - if "description" in data and data["description"]: - is_valid, error_msg = validate_json_content(data["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - if "description_html" in data and data["description_html"]: is_valid, error_msg, sanitized_html = validate_html_content( data["description_html"] ) if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) # Update the data with sanitized HTML if available if sanitized_html is not None: data["description_html"] = sanitized_html @@ -337,7 +333,9 @@ def validate(self, data): if "description_binary" in data and data["description_binary"]: is_valid, error_msg = validate_binary_data(data["description_binary"]) if not is_valid: - raise serializers.ValidationError({"description_binary": error_msg}) + raise serializers.ValidationError( + {"description_binary": "Invalid binary data"} + ) return data diff --git a/apps/api/plane/space/serializer/issue.py b/apps/api/plane/space/serializer/issue.py index 8de6d000ebd..64f151a2d20 100644 --- a/apps/api/plane/space/serializer/issue.py +++ b/apps/api/plane/space/serializer/issue.py @@ -30,7 +30,6 @@ ) from plane.utils.content_validator import ( validate_html_content, - validate_json_content, validate_binary_data, ) @@ -290,17 +289,14 @@ def validate(self, data): raise serializers.ValidationError("Start date cannot exceed target date") # Validate description content for security - if "description" in data and data["description"]: - is_valid, error_msg = validate_json_content(data["description"]) - if not is_valid: - raise serializers.ValidationError({"description": error_msg}) - if "description_html" in data and data["description_html"]: is_valid, error_msg, sanitized_html = validate_html_content( data["description_html"] ) if not is_valid: - raise serializers.ValidationError({"description_html": error_msg}) + raise serializers.ValidationError( + {"error": "html content is not valid"} + ) # Update the data with sanitized HTML if available if sanitized_html is not None: data["description_html"] = sanitized_html @@ -308,7 +304,7 @@ def validate(self, data): if "description_binary" in data and data["description_binary"]: is_valid, error_msg = validate_binary_data(data["description_binary"]) if not is_valid: - raise serializers.ValidationError({"description_binary": error_msg}) + raise serializers.ValidationError({"description_binary": "Invalid binary data"}) return data diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py index c2d6d066d6f..47ee663ffc1 100644 --- a/apps/api/plane/utils/content_validator.py +++ b/apps/api/plane/utils/content_validator.py @@ -1,37 +1,11 @@ # Python imports import base64 -import json -import re import nh3 from plane.utils.exception_logger import log_exception # Maximum allowed size for binary data (10MB) MAX_SIZE = 10 * 1024 * 1024 -# Maximum recursion depth to prevent stack overflow -MAX_RECURSION_DEPTH = 20 - -# Dangerous text patterns that could indicate XSS or script injection -DANGEROUS_TEXT_PATTERNS = [ - r"]*>.*?", - r"javascript\s*:", - r"data\s*:\s*text/html", - r"eval\s*\(", - r"document\s*\.", - r"window\s*\.", - r"location\s*\.", -] - -# Dangerous attribute patterns for HTML attributes -DANGEROUS_ATTR_PATTERNS = [ - r"javascript\s*:", - r"data\s*:\s*text/html", - r"eval\s*\(", - r"alert\s*\(", - r"document\s*\.", - r"window\s*\.", -] - # Suspicious patterns for binary data content SUSPICIOUS_BINARY_PATTERNS = [ " MAX_SIZE: - return False, "JSON content exceeds maximum size limit (10MB)" - - # Basic structure validation for page description JSON - if isinstance(json_content, dict): - # Check for expected page description structure - # This is based on ProseMirror/Tiptap JSON structure - if "type" in json_content and json_content.get("type") == "doc": - # Valid document structure - if "content" in json_content and isinstance( - json_content["content"], list - ): - # Recursively check content for suspicious patterns - is_valid, error_msg = _validate_json_content_array( - json_content["content"] - ) - if not is_valid: - return False, error_msg - elif "type" not in json_content and "content" not in json_content: - # Allow other JSON structures but validate for suspicious content - is_valid, error_msg = _validate_json_content_recursive(json_content) - if not is_valid: - return False, error_msg - else: - return False, "JSON description must be a valid object" - - except (TypeError, ValueError) as e: - return False, "Invalid JSON structure" - except Exception as e: - return False, "Failed to validate JSON content" - - return True, None - - -def _validate_json_content_array(content, depth=0): - """ - Validate JSON content array for suspicious patterns. - - Args: - content (list): Array of content nodes to validate - depth (int): Current recursion depth (default: 0) - - Returns: - tuple: (is_valid: bool, error_message: str or None) - """ - # Check recursion depth to prevent stack overflow - if depth > MAX_RECURSION_DEPTH: - return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded" - - if not isinstance(content, list): - return True, None - - for node in content: - if isinstance(node, dict): - # Check text content for suspicious patterns (more targeted) - if node.get("type") == "text" and "text" in node: - text_content = node["text"] - for pattern in DANGEROUS_TEXT_PATTERNS: - if re.search(pattern, text_content, re.IGNORECASE): - return ( - False, - "JSON content contains suspicious script patterns in text", - ) - - # Check attributes for suspicious content (more targeted) - if "attrs" in node and isinstance(node["attrs"], dict): - for attr_name, attr_value in node["attrs"].items(): - if isinstance(attr_value, str): - # Only check specific attributes that could be dangerous - if attr_name.lower() in [ - "href", - "src", - "action", - "onclick", - "onload", - "onerror", - ]: - for pattern in DANGEROUS_ATTR_PATTERNS: - if re.search(pattern, attr_value, re.IGNORECASE): - return ( - False, - f"JSON content contains dangerous pattern in {attr_name} attribute", - ) - - # Recursively check nested content - if "content" in node and isinstance(node["content"], list): - is_valid, error_msg = _validate_json_content_array( - node["content"], depth + 1 - ) - if not is_valid: - return False, error_msg - - return True, None - - -def _validate_json_content_recursive(obj, depth=0): - """ - Recursively validate JSON object for suspicious content. - - Args: - obj: JSON object (dict, list, or primitive) to validate - depth (int): Current recursion depth (default: 0) - - Returns: - tuple: (is_valid: bool, error_message: str or None) - """ - # Check recursion depth to prevent stack overflow - if depth > MAX_RECURSION_DEPTH: - return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded" - if isinstance(obj, dict): - for key, value in obj.items(): - if isinstance(value, str): - # Check for dangerous patterns using module constants - for pattern in DANGEROUS_TEXT_PATTERNS: - if re.search(pattern, value, re.IGNORECASE): - return ( - False, - "JSON content contains suspicious script patterns", - ) - elif isinstance(value, (dict, list)): - is_valid, error_msg = _validate_json_content_recursive(value, depth + 1) - if not is_valid: - return False, error_msg - elif isinstance(obj, list): - for item in obj: - is_valid, error_msg = _validate_json_content_recursive(item, depth + 1) - if not is_valid: - return False, error_msg - - return True, None