diff --git a/.github/workflows/pr-orchestrator.yml b/.github/workflows/pr-orchestrator.yml index 50b5a06f..93e32080 100644 --- a/.github/workflows/pr-orchestrator.yml +++ b/.github/workflows/pr-orchestrator.yml @@ -93,6 +93,8 @@ jobs: name: Compatibility (Python 3.11) runs-on: ubuntu-latest needs: tests + permissions: + contents: read steps: - uses: actions/checkout@v4 - name: Set up Python 3.11 @@ -118,6 +120,8 @@ jobs: name: Contract-First CI runs-on: ubuntu-latest needs: [tests, compat-py311] + permissions: + contents: read steps: - uses: actions/checkout@v4 - name: Set up Python 3.12 @@ -142,6 +146,8 @@ jobs: name: CLI Command Validation runs-on: ubuntu-latest needs: contract-first-ci + permissions: + contents: read steps: - uses: actions/checkout@v4 - name: Set up Python 3.12 @@ -168,6 +174,8 @@ jobs: runs-on: ubuntu-latest needs: [tests] if: needs.tests.outputs.run_unit_coverage == 'true' + permissions: + contents: read steps: - uses: actions/checkout@v4 - name: Set up Python 3.12 @@ -203,6 +211,8 @@ jobs: name: Type Checking (basedpyright) runs-on: ubuntu-latest needs: [tests] + permissions: + contents: read steps: - uses: actions/checkout@v4 - name: Set up Python 3.12 @@ -226,6 +236,8 @@ jobs: name: Linting (ruff, pylint) runs-on: ubuntu-latest needs: [tests] + permissions: + contents: read steps: - name: Checkout uses: actions/checkout@v4 @@ -250,6 +262,8 @@ jobs: runs-on: ubuntu-latest needs: [tests, compat-py311, contract-first-ci, cli-validation, type-checking, linting] if: github.event_name == 'push' && github.ref == 'refs/heads/main' + permissions: + contents: read steps: - name: Checkout uses: actions/checkout@v4 diff --git a/src/specfact_cli/adapters/ado.py b/src/specfact_cli/adapters/ado.py index 36db8704..b958feac 100644 --- a/src/specfact_cli/adapters/ado.py +++ b/src/specfact_cli/adapters/ado.py @@ -15,6 +15,7 @@ from datetime import UTC, datetime from pathlib import Path from typing import Any +from urllib.parse import urlparse import requests from beartype import beartype @@ -745,13 +746,18 @@ def export_artifact( if not entry_repo: source_url = entry.get("source_url", "") # Try ADO URL pattern - match by org (GUIDs in URLs) - if source_url and "dev.azure.com" in source_url and "/" in target_repo: - target_org = target_repo.split("/")[0] - ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) - if ado_org_match and ado_org_match.group(1) == target_org: - # Org matches - this is likely the same ADO organization - work_item_id = entry.get("source_id") - break + if source_url and "/" in target_repo: + try: + parsed = urlparse(source_url) + if parsed.hostname and parsed.hostname.lower() == "dev.azure.com": + target_org = target_repo.split("/")[0] + ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) + if ado_org_match and ado_org_match.group(1) == target_org: + # Org matches - this is likely the same ADO organization + work_item_id = entry.get("source_id") + break + except Exception: + pass # Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs) # This prevents cross-project matches when both entry_repo and target_repo have project names diff --git a/src/specfact_cli/adapters/github.py b/src/specfact_cli/adapters/github.py index c767e421..f7eb3cb8 100644 --- a/src/specfact_cli/adapters/github.py +++ b/src/specfact_cli/adapters/github.py @@ -19,6 +19,7 @@ from datetime import UTC, datetime from pathlib import Path from typing import Any +from urllib.parse import urlparse import requests from beartype import beartype @@ -447,8 +448,23 @@ def detect(self, repo_path: Path, bridge_config: BridgeConfig | None = None) -> if git_config.exists(): try: config_content = git_config.read_text(encoding="utf-8") - if "github.com" in config_content.lower(): - return True + # Use proper URL parsing to avoid substring matching vulnerabilities + # Look for URL patterns in git config and validate the hostname + url_pattern = re.compile(r"url\s*=\s*(https?://[^\s]+|git@[^:]+:[^\s]+)") + # Official GitHub SSH hostnames + github_ssh_hosts = {"github.com", "ssh.github.com"} + for match in url_pattern.finditer(config_content): + url_str = match.group(1) + # Handle git@ format: git@github.com:user/repo.git or git@ssh.github.com:user/repo.git + if url_str.startswith("git@"): + host_part = url_str.split(":")[0].replace("git@", "") + if host_part in github_ssh_hosts: + return True + else: + # Parse HTTP/HTTPS URLs properly + parsed = urlparse(url_str) + if parsed.hostname and parsed.hostname.lower() == "github.com": + return True except Exception: pass diff --git a/src/specfact_cli/backlog/mappers/github_mapper.py b/src/specfact_cli/backlog/mappers/github_mapper.py index 47d5d412..02ea9b89 100644 --- a/src/specfact_cli/backlog/mappers/github_mapper.py +++ b/src/specfact_cli/backlog/mappers/github_mapper.py @@ -176,9 +176,21 @@ def _extract_default_content(self, body: str) -> str: Default content (body without ## headings) """ # Remove all sections starting with ## - pattern = r"^##.*?$(?:\n.*?)*?(?=^##|\Z)" - default_content = re.sub(pattern, "", body, flags=re.MULTILINE | re.DOTALL) - return default_content.strip() + # Use a more efficient pattern to avoid ReDoS: match lines starting with ## + # and everything up to the next ## or end of string, using non-backtracking approach + lines = body.split("\n") + result_lines: list[str] = [] + skip_section = False + + for line in lines: + # Check if this line starts a new section (## heading) + if re.match(r"^##+", line): + skip_section = True + else: + if not skip_section: + result_lines.append(line) + + return "\n".join(result_lines).strip() @beartype @require(lambda self, body: isinstance(body, str), "Body must be str") diff --git a/src/specfact_cli/sync/bridge_sync.py b/src/specfact_cli/sync/bridge_sync.py index e2dfcb72..49353747 100644 --- a/src/specfact_cli/sync/bridge_sync.py +++ b/src/specfact_cli/sync/bridge_sync.py @@ -13,6 +13,7 @@ import re import subprocess from dataclasses import dataclass +from urllib.parse import urlparse try: @@ -1247,11 +1248,17 @@ def _read_openspec_change_proposals(self, include_archived: bool = True) -> list if url_repo_match: entry["source_repo"] = url_repo_match.group(1) # Try ADO URL pattern - extract org, but we need project name from elsewhere - elif "dev.azure.com" in source_url: - # For ADO, we can't reliably extract project name from URL (GUID) - # The source_repo should have been saved in the hidden comment - # If not, we'll need to match by org only later - pass + else: + # Use proper URL parsing to validate ADO URLs + try: + parsed = urlparse(source_url) + if parsed.hostname and parsed.hostname.lower() == "dev.azure.com": + # For ADO, we can't reliably extract project name from URL (GUID) + # The source_repo should have been saved in the hidden comment + # If not, we'll need to match by org only later + pass + except Exception: + pass source_tracking_list.append(entry) # Check for status indicators in proposal content or directory name @@ -1539,16 +1546,21 @@ def _find_source_tracking_entry( return source_tracking # Try ADO URL pattern (ADO URLs contain GUIDs, not project names) # For ADO, match by org if target_repo contains the org - elif "dev.azure.com" in source_url and "/" in target_repo: - target_org = target_repo.split("/")[0] - ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) - # Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL) - if ( - ado_org_match - and ado_org_match.group(1) == target_org - and (entry_type == "ado" or entry_type == "") - ): - return source_tracking + elif "/" in target_repo: + try: + parsed = urlparse(source_url) + if parsed.hostname and parsed.hostname.lower() == "dev.azure.com": + target_org = target_repo.split("/")[0] + ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) + # Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL) + if ( + ado_org_match + and ado_org_match.group(1) == target_org + and (entry_type == "ado" or entry_type == "") + ): + return source_tracking + except Exception: + pass # Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs) # This prevents cross-project matches when both entry_repo and target_repo have project names @@ -1617,16 +1629,21 @@ def _find_source_tracking_entry( return entry # Try ADO URL pattern (but note: ADO URLs contain GUIDs, not project names) # For ADO, match by org if target_repo contains the org - elif "dev.azure.com" in source_url and "/" in target_repo: - target_org = target_repo.split("/")[0] - ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) - # Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL) - if ( - ado_org_match - and ado_org_match.group(1) == target_org - and (entry_type == "ado" or entry_type == "") - ): - return entry + elif "/" in target_repo: + try: + parsed = urlparse(source_url) + if parsed.hostname and parsed.hostname.lower() == "dev.azure.com": + target_org = target_repo.split("/")[0] + ado_org_match = re.search(r"dev\.azure\.com/([^/]+)/", source_url) + # Org matches and source_type is "ado" - return entry (project name may differ due to GUID in URL) + if ( + ado_org_match + and ado_org_match.group(1) == target_org + and (entry_type == "ado" or entry_type == "") + ): + return entry + except Exception: + pass # Tertiary match: for ADO, only match by org when project is truly unknown (GUID-only URLs) # This prevents cross-project matches when both entry_repo and target_repo have project names diff --git a/src/specfact_cli/utils/progress.py b/src/specfact_cli/utils/progress.py index e0dc4194..674673a6 100644 --- a/src/specfact_cli/utils/progress.py +++ b/src/specfact_cli/utils/progress.py @@ -214,4 +214,5 @@ def save_bundle_with_progress( pass # No progress display - just save directly + # In test mode, skip progress entirely to avoid async cleanup issues save_project_bundle(bundle, bundle_dir, atomic=atomic, progress_callback=None)