From ab12c3e4e1a6921b8c5b947a964412597096b051 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Ketireddy Date: Tue, 29 Jul 2025 11:26:02 +0530 Subject: [PATCH 1/4] added PageBinaryUpdateSerializer for binary data validation and update --- apps/api/plane/app/serializers/page.py | 51 ++++++++++++++++++++++++ apps/api/plane/app/views/page/base.py | 28 ++++++------- apps/api/plane/utils/binary_validator.py | 42 +++++++++++++++++++ 3 files changed, 105 insertions(+), 16 deletions(-) create mode 100644 apps/api/plane/utils/binary_validator.py diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py index 1fd2f4d3c84..c0988c33f24 100644 --- a/apps/api/plane/app/serializers/page.py +++ b/apps/api/plane/app/serializers/page.py @@ -1,8 +1,10 @@ # Third party imports from rest_framework import serializers +import base64 # Module imports from .base import BaseSerializer +from plane.utils.binary_validator import validate_binary_data from plane.db.models import ( Page, PageLog, @@ -186,3 +188,52 @@ class Meta: "updated_by", ] read_only_fields = ["workspace", "page"] + + +class PageBinaryUpdateSerializer(serializers.Serializer): + """Serializer for updating page binary description with validation""" + + description_binary = serializers.CharField(required=False, allow_blank=True) + description_html = serializers.CharField(required=False, allow_blank=True) + description = serializers.JSONField(required=False, allow_null=True) + + def validate_description_binary(self, value): + """Validate the base64-encoded binary data""" + if not value: + return value + + try: + # Decode the base64 data + binary_data = base64.b64decode(value) + + # Validate the binary data + is_valid, error_message = validate_binary_data(binary_data) + if not is_valid: + raise serializers.ValidationError( + f"Invalid binary data: {error_message}" + ) + + return value + except Exception as e: + if isinstance(e, serializers.ValidationError): + raise + raise serializers.ValidationError("Failed to decode base64 data") + + def update(self, instance, validated_data): + """Update the page instance with validated data""" + if "description_binary" in validated_data: + if validated_data["description_binary"]: + instance.description_binary = base64.b64decode( + validated_data["description_binary"] + ) + else: + instance.description_binary = None + + if "description_html" in validated_data: + instance.description_html = validated_data["description_html"] + + if "description" in validated_data: + instance.description = validated_data["description"] + + instance.save() + return instance diff --git a/apps/api/plane/app/views/page/base.py b/apps/api/plane/app/views/page/base.py index cb9b0e09231..96de81abfb2 100644 --- a/apps/api/plane/app/views/page/base.py +++ b/apps/api/plane/app/views/page/base.py @@ -25,6 +25,7 @@ PageSerializer, SubPageSerializer, PageDetailSerializer, + PageBinaryUpdateSerializer, ) from plane.db.models import ( Page, @@ -538,32 +539,27 @@ def partial_update(self, request, slug, project_id, pk): {"description_html": page.description_html}, cls=DjangoJSONEncoder ) - # Get the base64 data from the request - base64_data = request.data.get("description_binary") - - # If base64 data is provided - if base64_data: - # Decode the base64 data to bytes - new_binary_data = base64.b64decode(base64_data) - # capture the page transaction + # Use serializer for validation and update + serializer = PageBinaryUpdateSerializer(page, data=request.data, partial=True) + if serializer.is_valid(): + # Capture the page transaction if request.data.get("description_html"): page_transaction.delay( new_value=request.data, old_value=existing_instance, page_id=pk ) - # Store the updated binary data - page.description_binary = new_binary_data - page.description_html = request.data.get("description_html") - page.description = request.data.get("description") - page.save() - # Return a success response + + # Update the page using serializer + updated_page = serializer.save() + + # Run background tasks page_version.delay( - page_id=page.id, + page_id=updated_page.id, existing_instance=existing_instance, user_id=request.user.id, ) return Response({"message": "Updated successfully"}) else: - return Response({"error": "No binary data provided"}) + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) class PageDuplicateEndpoint(BaseAPIView): diff --git a/apps/api/plane/utils/binary_validator.py b/apps/api/plane/utils/binary_validator.py new file mode 100644 index 00000000000..47f7719ef8b --- /dev/null +++ b/apps/api/plane/utils/binary_validator.py @@ -0,0 +1,42 @@ +# Python imports + + +def validate_binary_data(binary_data): + """ + Validate that binary data appears to be valid document format and doesn't contain malicious content. + + Args: + binary_data (bytes): The binary data to validate + + Returns: + tuple: (is_valid: bool, error_message: str or None) + """ + if not binary_data: + return True, None # Empty is OK + + # Size check - 10MB limit + MAX_SIZE = 10 * 1024 * 1024 + if len(binary_data) > MAX_SIZE: + return False, "Binary data exceeds maximum size limit (10MB)" + + # Basic format validation + if len(binary_data) < 4: + return False, "Binary data too short to be valid document format" + + # Check for suspicious text patterns (HTML/JS) + try: + decoded_text = binary_data.decode("utf-8", errors="ignore")[:200] + suspicious_patterns = [ + " Date: Wed, 30 Jul 2025 04:10:32 +0530 Subject: [PATCH 2/4] chore: added validation for description --- apps/api/plane/api/serializers/issue.py | 21 ++ apps/api/plane/api/serializers/project.py | 28 ++ apps/api/plane/app/serializers/__init__.py | 1 + apps/api/plane/app/serializers/draft.py | 22 ++ apps/api/plane/app/serializers/issue.py | 21 ++ apps/api/plane/app/serializers/page.py | 30 +- apps/api/plane/app/serializers/project.py | 31 ++ apps/api/plane/app/serializers/workspace.py | 24 ++ apps/api/plane/space/serializer/issue.py | 22 ++ apps/api/plane/utils/content_validator.py | 346 ++++++++++++++++++++ 10 files changed, 545 insertions(+), 1 deletion(-) create mode 100644 apps/api/plane/utils/content_validator.py diff --git a/apps/api/plane/api/serializers/issue.py b/apps/api/plane/api/serializers/issue.py index f906d4085f3..c57f9d35978 100644 --- a/apps/api/plane/api/serializers/issue.py +++ b/apps/api/plane/api/serializers/issue.py @@ -21,6 +21,11 @@ State, User, ) +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) from .base import BaseSerializer from .cycle import CycleLiteSerializer, CycleSerializer @@ -75,6 +80,22 @@ def validate(self, data): except Exception: raise serializers.ValidationError("Invalid HTML passed") + # Validate description content for security + if data.get("description"): + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if data.get("description_html"): + is_valid, error_msg = validate_html_content(data["description_html"]) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + if data.get("description_binary"): + is_valid, error_msg = validate_binary_data(data["description_binary"]) + if not is_valid: + raise serializers.ValidationError({"description_binary": error_msg}) + # Validate assignees are from project if data.get("assignees", []): data["assignees"] = ProjectMember.objects.filter( diff --git a/apps/api/plane/api/serializers/project.py b/apps/api/plane/api/serializers/project.py index c76652e1e7c..10ae7f4de32 100644 --- a/apps/api/plane/api/serializers/project.py +++ b/apps/api/plane/api/serializers/project.py @@ -3,6 +3,11 @@ # Module imports from plane.db.models import Project, ProjectIdentifier, WorkspaceMember +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) from .base import BaseSerializer @@ -57,6 +62,29 @@ def validate(self, data): "Default assignee should be a user in the workspace" ) + # Validate description content for security + if "description" in data and data["description"]: + # For Project, description might be text field, not JSON + if isinstance(data["description"], dict): + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_text" in data and data["description_text"]: + is_valid, error_msg = validate_json_content(data["description_text"]) + if not is_valid: + raise serializers.ValidationError({"description_text": error_msg}) + + if "description_html" in data and data["description_html"]: + if isinstance(data["description_html"], dict): + is_valid, error_msg = validate_json_content(data["description_html"]) + else: + is_valid, error_msg = validate_html_content( + str(data["description_html"]) + ) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + return data def create(self, validated_data): diff --git a/apps/api/plane/app/serializers/__init__.py b/apps/api/plane/app/serializers/__init__.py index f0d98886e3b..0116b206138 100644 --- a/apps/api/plane/app/serializers/__init__.py +++ b/apps/api/plane/app/serializers/__init__.py @@ -96,6 +96,7 @@ SubPageSerializer, PageDetailSerializer, PageVersionSerializer, + PageBinaryUpdateSerializer, PageVersionDetailSerializer, ) diff --git a/apps/api/plane/app/serializers/draft.py b/apps/api/plane/app/serializers/draft.py index f308352633b..6479d44c836 100644 --- a/apps/api/plane/app/serializers/draft.py +++ b/apps/api/plane/app/serializers/draft.py @@ -17,6 +17,11 @@ DraftIssueCycle, DraftIssueModule, ) +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) class DraftIssueCreateSerializer(BaseSerializer): @@ -64,6 +69,23 @@ def validate(self, data): and data.get("start_date", None) > data.get("target_date", None) ): raise serializers.ValidationError("Start date cannot exceed target date") + + # Validate description content for security + if "description" in data and data["description"]: + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_html" in data and data["description_html"]: + is_valid, error_msg = validate_html_content(data["description_html"]) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + if "description_binary" in data and data["description_binary"]: + is_valid, error_msg = validate_binary_data(data["description_binary"]) + if not is_valid: + raise serializers.ValidationError({"description_binary": error_msg}) + return data def create(self, validated_data): diff --git a/apps/api/plane/app/serializers/issue.py b/apps/api/plane/app/serializers/issue.py index 965d78aa2b6..98ceaaad6c0 100644 --- a/apps/api/plane/app/serializers/issue.py +++ b/apps/api/plane/app/serializers/issue.py @@ -38,6 +38,11 @@ IssueDescriptionVersion, ProjectMember, ) +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) class IssueFlatSerializer(BaseSerializer): @@ -127,6 +132,22 @@ def validate(self, attrs): member_id__in=attrs["assignee_ids"], ).values_list("member_id", flat=True) + # Validate description content for security + if "description" in attrs and attrs["description"]: + is_valid, error_msg = validate_json_content(attrs["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_html" in attrs and attrs["description_html"]: + is_valid, error_msg = validate_html_content(attrs["description_html"]) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + if "description_binary" in attrs and attrs["description_binary"]: + is_valid, error_msg = validate_binary_data(attrs["description_binary"]) + if not is_valid: + raise serializers.ValidationError({"description_binary": error_msg}) + return attrs def create(self, validated_data): diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py index c0988c33f24..c61ab8356ec 100644 --- a/apps/api/plane/app/serializers/page.py +++ b/apps/api/plane/app/serializers/page.py @@ -4,7 +4,11 @@ # Module imports from .base import BaseSerializer -from plane.utils.binary_validator import validate_binary_data +from plane.utils.content_validator import ( + validate_binary_data, + validate_html_content, + validate_json_content, +) from plane.db.models import ( Page, PageLog, @@ -219,6 +223,30 @@ def validate_description_binary(self, value): raise raise serializers.ValidationError("Failed to decode base64 data") + def validate_description_html(self, value): + """Validate the HTML content""" + if not value: + return value + + # Use the validation function from utils + is_valid, error_message = validate_html_content(value) + if not is_valid: + raise serializers.ValidationError(error_message) + + return value + + def validate_description(self, value): + """Validate the JSON description""" + if not value: + return value + + # Use the validation function from utils + is_valid, error_message = validate_json_content(value) + if not is_valid: + raise serializers.ValidationError(error_message) + + return value + def update(self, instance, validated_data): """Update the page instance with validated data""" if "description_binary" in validated_data: diff --git a/apps/api/plane/app/serializers/project.py b/apps/api/plane/app/serializers/project.py index 813cb068f48..dfa541d9f17 100644 --- a/apps/api/plane/app/serializers/project.py +++ b/apps/api/plane/app/serializers/project.py @@ -13,6 +13,11 @@ DeployBoard, ProjectPublicMember, ) +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) class ProjectSerializer(BaseSerializer): @@ -58,6 +63,32 @@ def validate_identifier(self, identifier): return identifier + def validate(self, data): + # Validate description content for security + if "description" in data and data["description"]: + # For Project, description might be text field, not JSON + if isinstance(data["description"], dict): + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_text" in data and data["description_text"]: + is_valid, error_msg = validate_json_content(data["description_text"]) + if not is_valid: + raise serializers.ValidationError({"description_text": error_msg}) + + if "description_html" in data and data["description_html"]: + if isinstance(data["description_html"], dict): + is_valid, error_msg = validate_json_content(data["description_html"]) + else: + is_valid, error_msg = validate_html_content( + str(data["description_html"]) + ) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + return data + def create(self, validated_data): workspace_id = self.context["workspace_id"] diff --git a/apps/api/plane/app/serializers/workspace.py b/apps/api/plane/app/serializers/workspace.py index 60866cade2f..ec4c4bf63e0 100644 --- a/apps/api/plane/app/serializers/workspace.py +++ b/apps/api/plane/app/serializers/workspace.py @@ -24,6 +24,11 @@ ) from plane.utils.constants import RESTRICTED_WORKSPACE_SLUGS from plane.utils.url import contains_url +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) # Django imports from django.core.validators import URLValidator @@ -312,6 +317,25 @@ class Meta: read_only_fields = ["workspace", "owner"] extra_kwargs = {"name": {"required": False}} + def validate(self, data): + # Validate description content for security + if "description" in data and data["description"]: + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_html" in data and data["description_html"]: + is_valid, error_msg = validate_html_content(data["description_html"]) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + if "description_binary" in data and data["description_binary"]: + is_valid, error_msg = validate_binary_data(data["description_binary"]) + if not is_valid: + raise serializers.ValidationError({"description_binary": error_msg}) + + return data + class WorkspaceUserPreferenceSerializer(BaseSerializer): class Meta: diff --git a/apps/api/plane/space/serializer/issue.py b/apps/api/plane/space/serializer/issue.py index e1445b4e636..3549e76262c 100644 --- a/apps/api/plane/space/serializer/issue.py +++ b/apps/api/plane/space/serializer/issue.py @@ -28,6 +28,11 @@ IssueVote, IssueRelation, ) +from plane.utils.content_validator import ( + validate_html_content, + validate_json_content, + validate_binary_data, +) class IssueStateFlatSerializer(BaseSerializer): @@ -283,6 +288,23 @@ def validate(self, data): and data.get("start_date", None) > data.get("target_date", None) ): raise serializers.ValidationError("Start date cannot exceed target date") + + # Validate description content for security + if "description" in data and data["description"]: + is_valid, error_msg = validate_json_content(data["description"]) + if not is_valid: + raise serializers.ValidationError({"description": error_msg}) + + if "description_html" in data and data["description_html"]: + is_valid, error_msg = validate_html_content(data["description_html"]) + if not is_valid: + raise serializers.ValidationError({"description_html": error_msg}) + + if "description_binary" in data and data["description_binary"]: + is_valid, error_msg = validate_binary_data(data["description_binary"]) + if not is_valid: + raise serializers.ValidationError({"description_binary": error_msg}) + return data def create(self, validated_data): diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py new file mode 100644 index 00000000000..c0490cfbab1 --- /dev/null +++ b/apps/api/plane/utils/content_validator.py @@ -0,0 +1,346 @@ +# Python imports +import json +import re + + +def validate_binary_data(binary_data): + """ + Validate that binary data appears to be valid document format and doesn't contain malicious content. + + Args: + binary_data (bytes): The binary data to validate + + Returns: + tuple: (is_valid: bool, error_message: str or None) + """ + if not binary_data: + return True, None # Empty is OK + + # Size check - 10MB limit + MAX_SIZE = 10 * 1024 * 1024 + if len(binary_data) > MAX_SIZE: + return False, "Binary data exceeds maximum size limit (10MB)" + + # Basic format validation + if len(binary_data) < 4: + return False, "Binary data too short to be valid document format" + + # Check for suspicious text patterns (HTML/JS) + try: + decoded_text = binary_data.decode("utf-8", errors="ignore")[:200] + suspicious_patterns = [ + " MAX_SIZE: + return False, "HTML content exceeds maximum size limit (10MB)" + + # Check for specific malicious patterns (simplified and more reliable) + malicious_patterns = [ + # Script tags with any content + r"]*>", + r"", + # JavaScript URLs in various attributes + r'(?:href|src|action)\s*=\s*["\']?\s*javascript:', + # Data URLs with text/html (potential XSS) + r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html', + # Dangerous event handlers with JavaScript-like content + r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?', + # Object and embed tags that could load external content + r"<(?:object|embed)[^>]*(?:data|src)\s*=", + # Base tag that could change relative URL resolution + r"]*href\s*=", + # Dangerous iframe sources + r']*src\s*=\s*["\']?(?!(?:https?://(?:www\.)?(?:youtube\.com|vimeo\.com|codepen\.io)|about:blank))', + # Meta refresh redirects + r']*http-equiv\s*=\s*["\']?refresh["\']?', + # Link tags - simplified patterns + r']*rel\s*=\s*["\']?stylesheet["\']?', + r']*href\s*=\s*["\']?https?://', + r']*href\s*=\s*["\']?//', + r']*href\s*=\s*["\']?(?:data:|javascript:)', + # Style tags with external imports + r"]*>.*?@import.*?(?:https?://|//)", + # Link tags with dangerous rel types + r']*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?', + ] + + for pattern in malicious_patterns: + if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL): + return ( + False, + f"HTML content contains potentially malicious patterns: {pattern}", + ) + + # Additional check for inline event handlers that contain suspicious content + # This is more permissive - only blocks if the event handler contains actual dangerous code + event_handler_pattern = r'on\w+\s*=\s*["\']([^"\']*)["\']' + event_matches = re.findall(event_handler_pattern, html_content, re.IGNORECASE) + + for handler_content in event_matches: + dangerous_js_patterns = [ + r"alert\s*\(", + r"eval\s*\(", + r"document\s*\.", + r"window\s*\.", + r"location\s*\.", + r"fetch\s*\(", + r"XMLHttpRequest", + r"innerHTML\s*=", + r"outerHTML\s*=", + r"document\.write", + r"script\s*>", + ] + + for js_pattern in dangerous_js_patterns: + if re.search(js_pattern, handler_content, re.IGNORECASE): + return ( + False, + f"HTML content contains dangerous JavaScript in event handler: {handler_content[:100]}", + ) + # Check for suspicious script patterns for XSS prevention + suspicious_patterns = [ + r"]*>.*?", + r"javascript:", + r"data:text/html", + r"on\w+\s*=", # Event handlers like onclick, onload, etc. + r']*src\s*=\s*["\'](?!https?://)', # Suspicious iframe sources + r"]*>", + r"]*>", + r"]*action\s*=", # Forms with action attributes + ] + + for pattern in suspicious_patterns: + if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL): + return ( + False, + "HTML content contains suspicious patterns that may pose security risks", + ) + + # Basic HTML structure validation - check for common malformed tags + try: + # Count opening and closing tags for basic structure validation + opening_tags = re.findall(r"<(\w+)[^>]*>", html_content) + closing_tags = re.findall(r"", html_content) + + # Check for some self-closing tags that don't need closing + self_closing_tags = { + "img", + "br", + "hr", + "input", + "meta", + "link", + "area", + "base", + "col", + "embed", + "source", + "track", + "wbr", + } + + # Filter out self-closing tags from opening tags + opening_tags_filtered = [ + tag for tag in opening_tags if tag.lower() not in self_closing_tags + ] + + # Basic check - if we have significantly more opening than closing tags, it might be malformed + if len(opening_tags_filtered) > len(closing_tags) + 10: # Allow some tolerance + return False, "HTML content appears to be malformed (unmatched tags)" + + except Exception: + # If HTML parsing fails, we'll allow it + pass + + return True, None + + +def validate_json_content(json_content): + """ + Validate that JSON content is safe and doesn't contain malicious patterns. + + Args: + json_content (dict): The JSON content to validate + + Returns: + tuple: (is_valid: bool, error_message: str or None) + """ + if not json_content: + return True, None # Empty is OK + + try: + # Size check - 10MB limit (consistent with other validations) + MAX_SIZE = 10 * 1024 * 1024 + json_str = json.dumps(json_content) + if len(json_str.encode("utf-8")) > MAX_SIZE: + return False, "JSON content exceeds maximum size limit (10MB)" + + # Basic structure validation for page description JSON + if isinstance(json_content, dict): + # Check for expected page description structure + # This is based on ProseMirror/Tiptap JSON structure + if "type" in json_content and json_content.get("type") == "doc": + # Valid document structure + if "content" in json_content and isinstance( + json_content["content"], list + ): + # Recursively check content for suspicious patterns + is_valid, error_msg = _validate_json_content_array( + json_content["content"] + ) + if not is_valid: + return False, error_msg + elif "type" not in json_content and "content" not in json_content: + # Allow other JSON structures but validate for suspicious content + is_valid, error_msg = _validate_json_content_recursive(json_content) + if not is_valid: + return False, error_msg + else: + return False, "JSON description must be a valid object" + + except (TypeError, ValueError) as e: + return False, "Invalid JSON structure" + except Exception as e: + return False, "Failed to validate JSON content" + + return True, None + + +def _validate_json_content_array(content): + """ + Validate JSON content array for suspicious patterns. + + Args: + content (list): Array of content nodes to validate + + Returns: + tuple: (is_valid: bool, error_message: str or None) + """ + if not isinstance(content, list): + return True, None + + for node in content: + if isinstance(node, dict): + # Check text content for suspicious patterns (more targeted) + if node.get("type") == "text" and "text" in node: + text_content = node["text"] + dangerous_patterns = [ + r"]*>.*?", + r"javascript\s*:", + r"data\s*:\s*text/html", + r"eval\s*\(", + r"document\s*\.", + r"window\s*\.", + r"location\s*\.", + ] + for pattern in dangerous_patterns: + if re.search(pattern, text_content, re.IGNORECASE): + return ( + False, + "JSON content contains suspicious script patterns in text", + ) + + # Check attributes for suspicious content (more targeted) + if "attrs" in node and isinstance(node["attrs"], dict): + for attr_name, attr_value in node["attrs"].items(): + if isinstance(attr_value, str): + # Only check specific attributes that could be dangerous + if attr_name.lower() in [ + "href", + "src", + "action", + "onclick", + "onload", + "onerror", + ]: + dangerous_attr_patterns = [ + r"javascript\s*:", + r"data\s*:\s*text/html", + r"eval\s*\(", + r"alert\s*\(", + r"document\s*\.", + r"window\s*\.", + ] + for pattern in dangerous_attr_patterns: + if re.search(pattern, attr_value, re.IGNORECASE): + return ( + False, + f"JSON content contains dangerous pattern in {attr_name} attribute", + ) + + # Recursively check nested content + if "content" in node and isinstance(node["content"], list): + is_valid, error_msg = _validate_json_content_array(node["content"]) + if not is_valid: + return False, error_msg + + return True, None + + +def _validate_json_content_recursive(obj): + """ + Recursively validate JSON object for suspicious content. + + Args: + obj: JSON object (dict, list, or primitive) to validate + + Returns: + tuple: (is_valid: bool, error_message: str or None) + """ + if isinstance(obj, dict): + for key, value in obj.items(): + if isinstance(value, str): + # More targeted dangerous patterns + dangerous_patterns = [ + r"]*>.*?", + r"javascript\s*:", + r"data\s*:\s*text/html", + r"eval\s*\(", + r"document\s*\.", + r"window\s*\.", + ] + for pattern in dangerous_patterns: + if re.search(pattern, value, re.IGNORECASE): + return ( + False, + "JSON content contains suspicious script patterns", + ) + elif isinstance(value, (dict, list)): + is_valid, error_msg = _validate_json_content_recursive(value) + if not is_valid: + return False, error_msg + elif isinstance(obj, list): + for item in obj: + is_valid, error_msg = _validate_json_content_recursive(item) + if not is_valid: + return False, error_msg + + return True, None From 45e5831a043f393694649ebbb3db2a9c8ff2792b Mon Sep 17 00:00:00 2001 From: NarayanBavisetti Date: Wed, 30 Jul 2025 04:39:21 +0530 Subject: [PATCH 3/4] chore: removed the duplicated file --- apps/api/plane/utils/binary_validator.py | 42 ---------------------- apps/api/plane/utils/content_validator.py | 43 ++++++++++------------- 2 files changed, 18 insertions(+), 67 deletions(-) delete mode 100644 apps/api/plane/utils/binary_validator.py diff --git a/apps/api/plane/utils/binary_validator.py b/apps/api/plane/utils/binary_validator.py deleted file mode 100644 index 47f7719ef8b..00000000000 --- a/apps/api/plane/utils/binary_validator.py +++ /dev/null @@ -1,42 +0,0 @@ -# Python imports - - -def validate_binary_data(binary_data): - """ - Validate that binary data appears to be valid document format and doesn't contain malicious content. - - Args: - binary_data (bytes): The binary data to validate - - Returns: - tuple: (is_valid: bool, error_message: str or None) - """ - if not binary_data: - return True, None # Empty is OK - - # Size check - 10MB limit - MAX_SIZE = 10 * 1024 * 1024 - if len(binary_data) > MAX_SIZE: - return False, "Binary data exceeds maximum size limit (10MB)" - - # Basic format validation - if len(binary_data) < 4: - return False, "Binary data too short to be valid document format" - - # Check for suspicious text patterns (HTML/JS) - try: - decoded_text = binary_data.decode("utf-8", errors="ignore")[:200] - suspicious_patterns = [ - " MAX_SIZE: return False, "Binary data exceeds maximum size limit (10MB)" @@ -38,7 +49,7 @@ def validate_binary_data(binary_data): ] if any(pattern in decoded_text.lower() for pattern in suspicious_patterns): return False, "Binary data contains suspicious content patterns" - except: + except Exception: pass # Binary data might not be decodable as text, which is fine return True, None @@ -58,7 +69,6 @@ def validate_html_content(html_content): return True, None # Empty is OK # Size check - 10MB limit (consistent with binary validation) - MAX_SIZE = 10 * 1024 * 1024 if len(html_content.encode("utf-8")) > MAX_SIZE: return False, "HTML content exceeds maximum size limit (10MB)" @@ -90,6 +100,8 @@ def validate_html_content(html_content): r"]*>.*?@import.*?(?:https?://|//)", # Link tags with dangerous rel types r']*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?', + # Forms with action attributes + r"]*action\s*=", ] for pattern in malicious_patterns: @@ -125,24 +137,6 @@ def validate_html_content(html_content): False, f"HTML content contains dangerous JavaScript in event handler: {handler_content[:100]}", ) - # Check for suspicious script patterns for XSS prevention - suspicious_patterns = [ - r"]*>.*?", - r"javascript:", - r"data:text/html", - r"on\w+\s*=", # Event handlers like onclick, onload, etc. - r']*src\s*=\s*["\'](?!https?://)', # Suspicious iframe sources - r"]*>", - r"]*>", - r"]*action\s*=", # Forms with action attributes - ] - - for pattern in suspicious_patterns: - if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL): - return ( - False, - "HTML content contains suspicious patterns that may pose security risks", - ) # Basic HTML structure validation - check for common malformed tags try: @@ -198,7 +192,6 @@ def validate_json_content(json_content): try: # Size check - 10MB limit (consistent with other validations) - MAX_SIZE = 10 * 1024 * 1024 json_str = json.dumps(json_content) if len(json_str.encode("utf-8")) > MAX_SIZE: return False, "JSON content exceeds maximum size limit (10MB)" From 55b745819fa2916e931b2c7236aeae1f8366f948 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Ketireddy Date: Wed, 30 Jul 2025 13:46:57 +0530 Subject: [PATCH 4/4] Fixed coderabbit comments - Improve content validation by consolidating patterns and enhancing recursion checks - Updated `PageBinaryUpdateSerializer` to simplify assignment of validated data. - Enhanced `content_validator.py` with consolidated dangerous patterns and added recursion depth checks to prevent stack overflow during validation. - Improved readability and maintainability of validation functions by using constants for patterns. --- apps/api/plane/app/serializers/page.py | 13 +- apps/api/plane/utils/content_validator.py | 234 ++++++++++++---------- 2 files changed, 130 insertions(+), 117 deletions(-) diff --git a/apps/api/plane/app/serializers/page.py b/apps/api/plane/app/serializers/page.py index c61ab8356ec..78762e4b4e1 100644 --- a/apps/api/plane/app/serializers/page.py +++ b/apps/api/plane/app/serializers/page.py @@ -217,7 +217,7 @@ def validate_description_binary(self, value): f"Invalid binary data: {error_message}" ) - return value + return binary_data except Exception as e: if isinstance(e, serializers.ValidationError): raise @@ -250,18 +250,13 @@ def validate_description(self, value): def update(self, instance, validated_data): """Update the page instance with validated data""" if "description_binary" in validated_data: - if validated_data["description_binary"]: - instance.description_binary = base64.b64decode( - validated_data["description_binary"] - ) - else: - instance.description_binary = None + instance.description_binary = validated_data.get("description_binary") if "description_html" in validated_data: - instance.description_html = validated_data["description_html"] + instance.description_html = validated_data.get("description_html") if "description" in validated_data: - instance.description = validated_data["description"] + instance.description = validated_data.get("description") instance.save() return instance diff --git a/apps/api/plane/utils/content_validator.py b/apps/api/plane/utils/content_validator.py index 21f3223d899..7b9932a35b9 100644 --- a/apps/api/plane/utils/content_validator.py +++ b/apps/api/plane/utils/content_validator.py @@ -3,8 +3,108 @@ import json import re + +# Maximum allowed size for binary data (10MB) MAX_SIZE = 10 * 1024 * 1024 +# Maximum recursion depth to prevent stack overflow +MAX_RECURSION_DEPTH = 20 + +# Dangerous text patterns that could indicate XSS or script injection +DANGEROUS_TEXT_PATTERNS = [ + r"]*>.*?", + r"javascript\s*:", + r"data\s*:\s*text/html", + r"eval\s*\(", + r"document\s*\.", + r"window\s*\.", + r"location\s*\.", +] + +# Dangerous attribute patterns for HTML attributes +DANGEROUS_ATTR_PATTERNS = [ + r"javascript\s*:", + r"data\s*:\s*text/html", + r"eval\s*\(", + r"alert\s*\(", + r"document\s*\.", + r"window\s*\.", +] + +# Suspicious patterns for binary data content +SUSPICIOUS_BINARY_PATTERNS = [ + "]*>", + r"", + # JavaScript URLs in various attributes + r'(?:href|src|action)\s*=\s*["\']?\s*javascript:', + # Data URLs with text/html (potential XSS) + r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html', + # Dangerous event handlers with JavaScript-like content + r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?', + # Object and embed tags that could load external content + r"<(?:object|embed)[^>]*(?:data|src)\s*=", + # Base tag that could change relative URL resolution + r"]*href\s*=", + # Dangerous iframe sources + r']*src\s*=\s*["\']?(?:javascript:|data:text/html)', + # Meta refresh redirects + r']*http-equiv\s*=\s*["\']?refresh["\']?', + # Link tags - simplified patterns + r']*rel\s*=\s*["\']?stylesheet["\']?', + r']*href\s*=\s*["\']?https?://', + r']*href\s*=\s*["\']?//', + r']*href\s*=\s*["\']?(?:data:|javascript:)', + # Style tags with external imports + r"]*>.*?@import.*?(?:https?://|//)", + # Link tags with dangerous rel types + r']*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?', + # Forms with action attributes + r"]*action\s*=", +] + +# Dangerous JavaScript patterns for event handlers +DANGEROUS_JS_PATTERNS = [ + r"alert\s*\(", + r"eval\s*\(", + r"document\s*\.", + r"window\s*\.", + r"location\s*\.", + r"fetch\s*\(", + r"XMLHttpRequest", + r"innerHTML\s*=", + r"outerHTML\s*=", + r"document\.write", + r"script\s*>", +] + +# HTML self-closing tags that don't need closing tags +SELF_CLOSING_TAGS = { + "img", + "br", + "hr", + "input", + "meta", + "link", + "area", + "base", + "col", + "embed", + "source", + "track", + "wbr", +} + def validate_binary_data(data): """ @@ -39,15 +139,9 @@ def validate_binary_data(data): # Check for suspicious text patterns (HTML/JS) try: decoded_text = binary_data.decode("utf-8", errors="ignore")[:200] - suspicious_patterns = [ - "]*>", - r"", - # JavaScript URLs in various attributes - r'(?:href|src|action)\s*=\s*["\']?\s*javascript:', - # Data URLs with text/html (potential XSS) - r'(?:href|src|action)\s*=\s*["\']?\s*data:text/html', - # Dangerous event handlers with JavaScript-like content - r'on(?:load|error|click|focus|blur|change|submit|reset|select|resize|scroll|unload|beforeunload|hashchange|popstate|storage|message|offline|online)\s*=\s*["\']?[^"\']*(?:javascript|alert|eval|document\.|window\.|location\.|history\.)[^"\']*["\']?', - # Object and embed tags that could load external content - r"<(?:object|embed)[^>]*(?:data|src)\s*=", - # Base tag that could change relative URL resolution - r"]*href\s*=", - # Dangerous iframe sources - r']*src\s*=\s*["\']?(?!(?:https?://(?:www\.)?(?:youtube\.com|vimeo\.com|codepen\.io)|about:blank))', - # Meta refresh redirects - r']*http-equiv\s*=\s*["\']?refresh["\']?', - # Link tags - simplified patterns - r']*rel\s*=\s*["\']?stylesheet["\']?', - r']*href\s*=\s*["\']?https?://', - r']*href\s*=\s*["\']?//', - r']*href\s*=\s*["\']?(?:data:|javascript:)', - # Style tags with external imports - r"]*>.*?@import.*?(?:https?://|//)", - # Link tags with dangerous rel types - r']*rel\s*=\s*["\']?(?:import|preload|prefetch|dns-prefetch|preconnect)["\']?', - # Forms with action attributes - r"]*action\s*=", - ] - - for pattern in malicious_patterns: + for pattern in MALICIOUS_HTML_PATTERNS: if re.search(pattern, html_content, re.IGNORECASE | re.DOTALL): return ( False, @@ -117,21 +180,7 @@ def validate_html_content(html_content): event_matches = re.findall(event_handler_pattern, html_content, re.IGNORECASE) for handler_content in event_matches: - dangerous_js_patterns = [ - r"alert\s*\(", - r"eval\s*\(", - r"document\s*\.", - r"window\s*\.", - r"location\s*\.", - r"fetch\s*\(", - r"XMLHttpRequest", - r"innerHTML\s*=", - r"outerHTML\s*=", - r"document\.write", - r"script\s*>", - ] - - for js_pattern in dangerous_js_patterns: + for js_pattern in DANGEROUS_JS_PATTERNS: if re.search(js_pattern, handler_content, re.IGNORECASE): return ( False, @@ -144,26 +193,9 @@ def validate_html_content(html_content): opening_tags = re.findall(r"<(\w+)[^>]*>", html_content) closing_tags = re.findall(r"", html_content) - # Check for some self-closing tags that don't need closing - self_closing_tags = { - "img", - "br", - "hr", - "input", - "meta", - "link", - "area", - "base", - "col", - "embed", - "source", - "track", - "wbr", - } - # Filter out self-closing tags from opening tags opening_tags_filtered = [ - tag for tag in opening_tags if tag.lower() not in self_closing_tags + tag for tag in opening_tags if tag.lower() not in SELF_CLOSING_TAGS ] # Basic check - if we have significantly more opening than closing tags, it might be malformed @@ -227,16 +259,21 @@ def validate_json_content(json_content): return True, None -def _validate_json_content_array(content): +def _validate_json_content_array(content, depth=0): """ Validate JSON content array for suspicious patterns. Args: content (list): Array of content nodes to validate + depth (int): Current recursion depth (default: 0) Returns: tuple: (is_valid: bool, error_message: str or None) """ + # Check recursion depth to prevent stack overflow + if depth > MAX_RECURSION_DEPTH: + return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded" + if not isinstance(content, list): return True, None @@ -245,16 +282,7 @@ def _validate_json_content_array(content): # Check text content for suspicious patterns (more targeted) if node.get("type") == "text" and "text" in node: text_content = node["text"] - dangerous_patterns = [ - r"]*>.*?", - r"javascript\s*:", - r"data\s*:\s*text/html", - r"eval\s*\(", - r"document\s*\.", - r"window\s*\.", - r"location\s*\.", - ] - for pattern in dangerous_patterns: + for pattern in DANGEROUS_TEXT_PATTERNS: if re.search(pattern, text_content, re.IGNORECASE): return ( False, @@ -274,15 +302,7 @@ def _validate_json_content_array(content): "onload", "onerror", ]: - dangerous_attr_patterns = [ - r"javascript\s*:", - r"data\s*:\s*text/html", - r"eval\s*\(", - r"alert\s*\(", - r"document\s*\.", - r"window\s*\.", - ] - for pattern in dangerous_attr_patterns: + for pattern in DANGEROUS_ATTR_PATTERNS: if re.search(pattern, attr_value, re.IGNORECASE): return ( False, @@ -291,48 +311,46 @@ def _validate_json_content_array(content): # Recursively check nested content if "content" in node and isinstance(node["content"], list): - is_valid, error_msg = _validate_json_content_array(node["content"]) + is_valid, error_msg = _validate_json_content_array( + node["content"], depth + 1 + ) if not is_valid: return False, error_msg return True, None -def _validate_json_content_recursive(obj): +def _validate_json_content_recursive(obj, depth=0): """ Recursively validate JSON object for suspicious content. Args: obj: JSON object (dict, list, or primitive) to validate + depth (int): Current recursion depth (default: 0) Returns: tuple: (is_valid: bool, error_message: str or None) """ + # Check recursion depth to prevent stack overflow + if depth > MAX_RECURSION_DEPTH: + return False, f"Maximum recursion depth ({MAX_RECURSION_DEPTH}) exceeded" if isinstance(obj, dict): for key, value in obj.items(): if isinstance(value, str): - # More targeted dangerous patterns - dangerous_patterns = [ - r"]*>.*?", - r"javascript\s*:", - r"data\s*:\s*text/html", - r"eval\s*\(", - r"document\s*\.", - r"window\s*\.", - ] - for pattern in dangerous_patterns: + # Check for dangerous patterns using module constants + for pattern in DANGEROUS_TEXT_PATTERNS: if re.search(pattern, value, re.IGNORECASE): return ( False, "JSON content contains suspicious script patterns", ) elif isinstance(value, (dict, list)): - is_valid, error_msg = _validate_json_content_recursive(value) + is_valid, error_msg = _validate_json_content_recursive(value, depth + 1) if not is_valid: return False, error_msg elif isinstance(obj, list): for item in obj: - is_valid, error_msg = _validate_json_content_recursive(item) + is_valid, error_msg = _validate_json_content_recursive(item, depth + 1) if not is_valid: return False, error_msg