Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions src/apm_cli/models/apm_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,10 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
"""
if not dependency_str.strip():
raise ValueError("Empty dependency string")


# Decode percent-encoded characters (e.g., %20 for spaces in ADO project names)
dependency_str = urllib.parse.unquote(dependency_str)

# Check for control characters (newlines, tabs, etc.)
if any(ord(c) < 32 for c in dependency_str):
raise ValueError("Dependency string contains invalid control characters")
Expand Down Expand Up @@ -557,12 +560,16 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
raise ValueError(f"Invalid repository format: {repo_url}. Expected 'user/repo'")

# Security: validate characters to prevent injection
# ADO project names may contain spaces
allowed_pattern = r'^[a-zA-Z0-9._\- ]+$' if is_ado_host else r'^[a-zA-Z0-9._-]+$'
for part in uparts:
if not re.match(r'^[a-zA-Z0-9._-]+$', part.rstrip('.git')):
if not re.match(allowed_pattern, part.rstrip('.git')):
raise ValueError(f"Invalid repository path component: {part}")

# Safely construct URL using detected host
github_url = urllib.parse.urljoin(f"https://{host}/", user_repo)
# Quote path components to handle spaces in ADO project names
quoted_repo = '/'.join(urllib.parse.quote(p, safe='') for p in uparts)
github_url = urllib.parse.urljoin(f"https://{host}/", quoted_repo)
parsed_url = urllib.parse.urlparse(github_url)

# SECURITY: Validate that this is actually a supported Git host URL.
Expand All @@ -581,28 +588,31 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
path = path[:-4]

# Handle _git in parsed path for ADO URLs
path_parts = path.split("/")
# Decode percent-encoded path components (e.g., spaces in ADO project names)
path_parts = [urllib.parse.unquote(p) for p in path.split("/")]
if '_git' in path_parts:
git_idx = path_parts.index('_git')
path_parts = path_parts[:git_idx] + path_parts[git_idx+1:]

# Validate path format based on host type
is_ado_host = is_azure_devops_hostname(hostname)
expected_parts = 3 if is_ado_host else 2

if len(path_parts) != expected_parts:
if is_ado_host:
raise ValueError(f"Invalid Azure DevOps repository path: expected 'org/project/repo', got '{path}'")
else:
raise ValueError(f"Invalid repository path: expected 'user/repo', got '{path}'")

# Validate all path parts contain only allowed characters
# ADO project names may contain spaces
allowed_pattern = r'^[a-zA-Z0-9._\- ]+$' if is_ado_host else r'^[a-zA-Z0-9._-]+$'
for i, part in enumerate(path_parts):
if not part:
raise ValueError(f"Invalid repository format: path component {i+1} cannot be empty")
if not re.match(r'^[a-zA-Z0-9._-]+$', part):
if not re.match(allowed_pattern, part):
raise ValueError(f"Invalid repository path component: {part}")

repo_url = "/".join(path_parts)

# If host not set via SSH or parsed parts, default to default_host()
Expand All @@ -613,8 +623,8 @@ def parse(cls, dependency_str: str) -> "DependencyReference":
# Validate repo format based on host type
is_ado_final = host and is_azure_devops_hostname(host)
if is_ado_final:
# ADO format: org/project/repo (3 segments)
if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$', repo_url):
# ADO format: org/project/repo (3 segments, project may contain spaces)
if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._\- ]+/[a-zA-Z0-9._-]+$', repo_url):
raise ValueError(f"Invalid Azure DevOps repository format: {repo_url}. Expected 'org/project/repo'")
# Extract ADO-specific fields
ado_parts = repo_url.split('/')
Expand Down Expand Up @@ -655,7 +665,8 @@ def to_github_url(self) -> str:

if self.is_azure_devops():
# ADO format: https://dev.azure.com/org/project/_git/repo
return f"https://{host}/{self.ado_organization}/{self.ado_project}/_git/{self.ado_repo}"
project = urllib.parse.quote(self.ado_project, safe='')
return f"https://{host}/{self.ado_organization}/{project}/_git/{self.ado_repo}"
else:
# GitHub format: https://github.com/owner/repo
return f"https://{host}/{self.repo_url}"
Expand Down
13 changes: 8 additions & 5 deletions src/apm_cli/utils/github_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,11 @@ def build_ado_https_clone_url(org: str, project: str, repo: str, token: Optional
Returns:
str: HTTPS clone URL for Azure DevOps
"""
quoted_project = urllib.parse.quote(project, safe='')
if token:
# ADO uses PAT as password with empty username
return f"https://{token}@{host}/{org}/{project}/_git/{repo}"
return f"https://{host}/{org}/{project}/_git/{repo}"
return f"https://{token}@{host}/{org}/{quoted_project}/_git/{repo}"
return f"https://{host}/{org}/{quoted_project}/_git/{repo}"


def build_ado_ssh_url(org: str, project: str, repo: str, host: str = "ssh.dev.azure.com") -> str:
Expand All @@ -177,12 +178,13 @@ def build_ado_ssh_url(org: str, project: str, repo: str, host: str = "ssh.dev.az
Returns:
str: SSH clone URL for Azure DevOps
"""
quoted_project = urllib.parse.quote(project, safe='')
if host == "ssh.dev.azure.com":
# Cloud format
return f"git@ssh.dev.azure.com:v3/{org}/{project}/{repo}"
return f"git@ssh.dev.azure.com:v3/{org}/{quoted_project}/{repo}"
else:
# Server format (user@host is optional, but commonly 'git@host')
return f"ssh://git@{host}/{org}/{project}/_git/{repo}"
return f"ssh://git@{host}/{org}/{quoted_project}/_git/{repo}"


def build_ado_api_url(org: str, project: str, repo: str, path: str, ref: str = "main", host: str = "dev.azure.com") -> str:
Expand All @@ -202,8 +204,9 @@ def build_ado_api_url(org: str, project: str, repo: str, path: str, ref: str = "
str: API URL for retrieving file contents
"""
encoded_path = urllib.parse.quote(path, safe='')
quoted_project = urllib.parse.quote(project, safe='')
return (
f"https://{host}/{org}/{project}/_apis/git/repositories/{repo}/items"
f"https://{host}/{org}/{quoted_project}/_apis/git/repositories/{repo}/items"
f"?path={encoded_path}&versionDescriptor.version={ref}&api-version=7.0"
)

Expand Down
37 changes: 37 additions & 0 deletions tests/test_apm_package_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,43 @@ def test_parse_azure_devops_invalid_virtual_package(self):
assert dep.is_virtual == False
assert dep.repo_url == "myorg/myproject/myrepo"

def test_parse_azure_devops_project_with_spaces(self):
"""Test that ADO project names with spaces are correctly parsed.

Azure DevOps project names can contain spaces (e.g., 'My Project').
Users may specify them with %20 encoding or literal spaces (shell-quoted).
"""
# Percent-encoded space in project name with _git segment
dep = DependencyReference.parse("dev.azure.com/myorg/My%20Project/_git/myrepo")
assert dep.host == "dev.azure.com"
assert dep.ado_organization == "myorg"
assert dep.ado_project == "My Project"
assert dep.ado_repo == "myrepo"
assert dep.is_azure_devops() == True
assert dep.repo_url == "myorg/My Project/myrepo"

# Literal space in project name (simplified format without _git)
dep = DependencyReference.parse("dev.azure.com/myorg/My Project/myrepo")
assert dep.host == "dev.azure.com"
assert dep.ado_organization == "myorg"
assert dep.ado_project == "My Project"
assert dep.ado_repo == "myrepo"
assert dep.is_azure_devops() == True

# Percent-encoded space in simplified format
dep = DependencyReference.parse("dev.azure.com/org/America%20Oh%20Yeah/repo")
assert dep.ado_project == "America Oh Yeah"
assert dep.ado_repo == "repo"

# to_github_url() should produce a properly percent-encoded URL
dep = DependencyReference.parse("dev.azure.com/myorg/My%20Project/_git/myrepo")
url = dep.to_github_url()
assert url == "https://dev.azure.com/myorg/My%20Project/_git/myrepo"

# Spaces should NOT be allowed in GitHub owner/repo names
with pytest.raises(ValueError):
DependencyReference.parse("github.com/my%20owner/repo")

def test_parse_virtual_package_with_malicious_host(self):
"""Test that virtual packages with malicious hosts are rejected."""
malicious_virtual_formats = [
Expand Down
32 changes: 32 additions & 0 deletions tests/test_github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,38 @@ def test_build_repo_url_for_ado_ssh(self):
# Should build ADO SSH URL (git@ssh.dev.azure.com:v3/org/project/repo)
assert url.startswith('git@ssh.dev.azure.com:')

def test_build_ado_urls_with_spaces_in_project(self):
"""Test that URL builders properly encode spaces in ADO project names."""
from apm_cli.utils.github_host import (
build_ado_https_clone_url,
build_ado_ssh_url,
build_ado_api_url,
)

# HTTPS clone URL with token
url = build_ado_https_clone_url("myorg", "My Project", "myrepo", token="tok")
assert "My%20Project" in url
assert "My Project" not in url
assert url == "https://tok@dev.azure.com/myorg/My%20Project/_git/myrepo"

# HTTPS clone URL without token
url = build_ado_https_clone_url("myorg", "My Project", "myrepo")
assert url == "https://dev.azure.com/myorg/My%20Project/_git/myrepo"

# SSH cloud URL
url = build_ado_ssh_url("myorg", "My Project", "myrepo")
assert "My%20Project" in url
assert url == "git@ssh.dev.azure.com:v3/myorg/My%20Project/myrepo"

# SSH server URL
url = build_ado_ssh_url("myorg", "My Project", "myrepo", host="ado.company.com")
assert "My%20Project" in url

# API URL
url = build_ado_api_url("myorg", "My Project", "myrepo", "path/file.md")
assert "My%20Project" in url
assert "My Project" not in url

def test_build_repo_url_github_not_affected_by_ado_token(self):
"""Test that GitHub URL building uses GitHub token, not ADO token."""
with patch.dict(os.environ, {
Expand Down
Loading