diff --git a/src/apm_cli/models/apm_package.py b/src/apm_cli/models/apm_package.py index 31c42149..9de71844 100644 --- a/src/apm_cli/models/apm_package.py +++ b/src/apm_cli/models/apm_package.py @@ -305,7 +305,10 @@ def parse(cls, dependency_str: str) -> "DependencyReference": """ if not dependency_str.strip(): raise ValueError("Empty dependency string") - + + # Decode percent-encoded characters (e.g., %20 for spaces in ADO project names) + dependency_str = urllib.parse.unquote(dependency_str) + # Check for control characters (newlines, tabs, etc.) if any(ord(c) < 32 for c in dependency_str): raise ValueError("Dependency string contains invalid control characters") @@ -557,12 +560,16 @@ def parse(cls, dependency_str: str) -> "DependencyReference": raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo'") # Security: validate characters to prevent injection + # ADO project names may contain spaces + allowed_pattern = r'^[a-zA-Z0-9._\- ]+$' if is_ado_host else r'^[a-zA-Z0-9._-]+$' for part in uparts: - if not re.match(r'^[a-zA-Z0-9._-]+$', part.rstrip('.git')): + if not re.match(allowed_pattern, part.rstrip('.git')): raise ValueError(f"Invalid repository path component: {part}") # Safely construct URL using detected host - github_url = urllib.parse.urljoin(f"https://{host}/", user_repo) + # Quote path components to handle spaces in ADO project names + quoted_repo = '/'.join(urllib.parse.quote(p, safe='') for p in uparts) + github_url = urllib.parse.urljoin(f"https://{host}/", quoted_repo) parsed_url = urllib.parse.urlparse(github_url) # SECURITY: Validate that this is actually a supported Git host URL. @@ -581,28 +588,31 @@ def parse(cls, dependency_str: str) -> "DependencyReference": path = path[:-4] # Handle _git in parsed path for ADO URLs - path_parts = path.split("/") + # Decode percent-encoded path components (e.g., spaces in ADO project names) + path_parts = [urllib.parse.unquote(p) for p in path.split("/")] if '_git' in path_parts: git_idx = path_parts.index('_git') path_parts = path_parts[:git_idx] + path_parts[git_idx+1:] - + # Validate path format based on host type is_ado_host = is_azure_devops_hostname(hostname) expected_parts = 3 if is_ado_host else 2 - + if len(path_parts) != expected_parts: if is_ado_host: raise ValueError(f"Invalid Azure DevOps repository path: expected 'org/project/repo', got '{path}'") else: raise ValueError(f"Invalid repository path: expected 'user/repo', got '{path}'") - + # Validate all path parts contain only allowed characters + # ADO project names may contain spaces + allowed_pattern = r'^[a-zA-Z0-9._\- ]+$' if is_ado_host else r'^[a-zA-Z0-9._-]+$' for i, part in enumerate(path_parts): if not part: raise ValueError(f"Invalid repository format: path component {i+1} cannot be empty") - if not re.match(r'^[a-zA-Z0-9._-]+$', part): + if not re.match(allowed_pattern, part): raise ValueError(f"Invalid repository path component: {part}") - + repo_url = "/".join(path_parts) # If host not set via SSH or parsed parts, default to default_host() @@ -613,8 +623,8 @@ def parse(cls, dependency_str: str) -> "DependencyReference": # Validate repo format based on host type is_ado_final = host and is_azure_devops_hostname(host) if is_ado_final: - # ADO format: org/project/repo (3 segments) - if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$', repo_url): + # ADO format: org/project/repo (3 segments, project may contain spaces) + if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._\- ]+/[a-zA-Z0-9._-]+$', repo_url): raise ValueError(f"Invalid Azure DevOps repository format: {repo_url}. Expected 'org/project/repo'") # Extract ADO-specific fields ado_parts = repo_url.split('/') @@ -655,7 +665,8 @@ def to_github_url(self) -> str: if self.is_azure_devops(): # ADO format: https://dev.azure.com/org/project/_git/repo - return f"https://{host}/{self.ado_organization}/{self.ado_project}/_git/{self.ado_repo}" + project = urllib.parse.quote(self.ado_project, safe='') + return f"https://{host}/{self.ado_organization}/{project}/_git/{self.ado_repo}" else: # GitHub format: https://github.com/owner/repo return f"https://{host}/{self.repo_url}" diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index c186aaee..5ec20887 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -153,10 +153,11 @@ def build_ado_https_clone_url(org: str, project: str, repo: str, token: Optional Returns: str: HTTPS clone URL for Azure DevOps """ + quoted_project = urllib.parse.quote(project, safe='') if token: # ADO uses PAT as password with empty username - return f"https://{token}@{host}/{org}/{project}/_git/{repo}" - return f"https://{host}/{org}/{project}/_git/{repo}" + return f"https://{token}@{host}/{org}/{quoted_project}/_git/{repo}" + return f"https://{host}/{org}/{quoted_project}/_git/{repo}" def build_ado_ssh_url(org: str, project: str, repo: str, host: str = "ssh.dev.azure.com") -> str: @@ -177,12 +178,13 @@ def build_ado_ssh_url(org: str, project: str, repo: str, host: str = "ssh.dev.az Returns: str: SSH clone URL for Azure DevOps """ + quoted_project = urllib.parse.quote(project, safe='') if host == "ssh.dev.azure.com": # Cloud format - return f"git@ssh.dev.azure.com:v3/{org}/{project}/{repo}" + return f"git@ssh.dev.azure.com:v3/{org}/{quoted_project}/{repo}" else: # Server format (user@host is optional, but commonly 'git@host') - return f"ssh://git@{host}/{org}/{project}/_git/{repo}" + return f"ssh://git@{host}/{org}/{quoted_project}/_git/{repo}" def build_ado_api_url(org: str, project: str, repo: str, path: str, ref: str = "main", host: str = "dev.azure.com") -> str: @@ -202,8 +204,9 @@ def build_ado_api_url(org: str, project: str, repo: str, path: str, ref: str = " str: API URL for retrieving file contents """ encoded_path = urllib.parse.quote(path, safe='') + quoted_project = urllib.parse.quote(project, safe='') return ( - f"https://{host}/{org}/{project}/_apis/git/repositories/{repo}/items" + f"https://{host}/{org}/{quoted_project}/_apis/git/repositories/{repo}/items" f"?path={encoded_path}&versionDescriptor.version={ref}&api-version=7.0" ) diff --git a/tests/test_apm_package_models.py b/tests/test_apm_package_models.py index fdbae36d..3496fa56 100644 --- a/tests/test_apm_package_models.py +++ b/tests/test_apm_package_models.py @@ -225,6 +225,43 @@ def test_parse_azure_devops_invalid_virtual_package(self): assert dep.is_virtual == False assert dep.repo_url == "myorg/myproject/myrepo" + def test_parse_azure_devops_project_with_spaces(self): + """Test that ADO project names with spaces are correctly parsed. + + Azure DevOps project names can contain spaces (e.g., 'My Project'). + Users may specify them with %20 encoding or literal spaces (shell-quoted). + """ + # Percent-encoded space in project name with _git segment + dep = DependencyReference.parse("dev.azure.com/myorg/My%20Project/_git/myrepo") + assert dep.host == "dev.azure.com" + assert dep.ado_organization == "myorg" + assert dep.ado_project == "My Project" + assert dep.ado_repo == "myrepo" + assert dep.is_azure_devops() == True + assert dep.repo_url == "myorg/My Project/myrepo" + + # Literal space in project name (simplified format without _git) + dep = DependencyReference.parse("dev.azure.com/myorg/My Project/myrepo") + assert dep.host == "dev.azure.com" + assert dep.ado_organization == "myorg" + assert dep.ado_project == "My Project" + assert dep.ado_repo == "myrepo" + assert dep.is_azure_devops() == True + + # Percent-encoded space in simplified format + dep = DependencyReference.parse("dev.azure.com/org/America%20Oh%20Yeah/repo") + assert dep.ado_project == "America Oh Yeah" + assert dep.ado_repo == "repo" + + # to_github_url() should produce a properly percent-encoded URL + dep = DependencyReference.parse("dev.azure.com/myorg/My%20Project/_git/myrepo") + url = dep.to_github_url() + assert url == "https://dev.azure.com/myorg/My%20Project/_git/myrepo" + + # Spaces should NOT be allowed in GitHub owner/repo names + with pytest.raises(ValueError): + DependencyReference.parse("github.com/my%20owner/repo") + def test_parse_virtual_package_with_malicious_host(self): """Test that virtual packages with malicious hosts are rejected.""" malicious_virtual_formats = [ diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index afd73595..a85d4ad0 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -584,6 +584,38 @@ def test_build_repo_url_for_ado_ssh(self): # Should build ADO SSH URL (git@ssh.dev.azure.com:v3/org/project/repo) assert url.startswith('git@ssh.dev.azure.com:') + def test_build_ado_urls_with_spaces_in_project(self): + """Test that URL builders properly encode spaces in ADO project names.""" + from apm_cli.utils.github_host import ( + build_ado_https_clone_url, + build_ado_ssh_url, + build_ado_api_url, + ) + + # HTTPS clone URL with token + url = build_ado_https_clone_url("myorg", "My Project", "myrepo", token="tok") + assert "My%20Project" in url + assert "My Project" not in url + assert url == "https://tok@dev.azure.com/myorg/My%20Project/_git/myrepo" + + # HTTPS clone URL without token + url = build_ado_https_clone_url("myorg", "My Project", "myrepo") + assert url == "https://dev.azure.com/myorg/My%20Project/_git/myrepo" + + # SSH cloud URL + url = build_ado_ssh_url("myorg", "My Project", "myrepo") + assert "My%20Project" in url + assert url == "git@ssh.dev.azure.com:v3/myorg/My%20Project/myrepo" + + # SSH server URL + url = build_ado_ssh_url("myorg", "My Project", "myrepo", host="ado.company.com") + assert "My%20Project" in url + + # API URL + url = build_ado_api_url("myorg", "My Project", "myrepo", "path/file.md") + assert "My%20Project" in url + assert "My Project" not in url + def test_build_repo_url_github_not_affected_by_ado_token(self): """Test that GitHub URL building uses GitHub token, not ADO token.""" with patch.dict(os.environ, {