Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a28fda6
install codeql improvement and calculate optimal workers based on mac…
fernandomatsuosantos Jul 11, 2025
2bbf665
fix tests
fernandomatsuosantos Jul 11, 2025
95859f4
fix tests
fernandomatsuosantos Jul 11, 2025
f7b2ef1
fix tests
fernandomatsuosantos Jul 11, 2025
da76249
copilot improvements
fernandomatsuosantos Jul 11, 2025
558b9b4
copilot improvements
fernandomatsuosantos Jul 11, 2025
5a1eede
copilot improvements
fernandomatsuosantos Jul 11, 2025
2dc7ad7
copilot improvements
fernandomatsuosantos Jul 11, 2025
601a51b
fix codeql duplicated download
fernandomatsuosantos Jul 11, 2025
adaa80e
add fix sugested by mateus
fernandomatsuosantos Jul 11, 2025
4557044
add fix
fernandomatsuosantos Jul 11, 2025
215f79c
add fix lint
fernandomatsuosantos Jul 11, 2025
f8f46fd
add fix lint
fernandomatsuosantos Jul 11, 2025
0551482
fix get memory
fernandomatsuosantos Jul 11, 2025
b4f82bc
fix codeql download
fernandomatsuosantos Jul 11, 2025
1e32d48
fix codeql download
fernandomatsuosantos Jul 11, 2025
515e68d
fix os
fernandomatsuosantos Jul 11, 2025
edcae0e
change force
fernandomatsuosantos Jul 11, 2025
970dda8
remove code delete overwrite database
fernandomatsuosantos Jul 11, 2025
0524e9e
remove code delete overwrite database
fernandomatsuosantos Jul 11, 2025
41382a9
output project
fernandomatsuosantos Jul 12, 2025
1d4d276
output project
fernandomatsuosantos Jul 12, 2025
55505f7
fix lint issues
fernandomatsuosantos Jul 12, 2025
97ab97f
fix lint issues
fernandomatsuosantos Jul 12, 2025
8955d6c
add tests
fernandomatsuosantos Jul 12, 2025
7b4bab0
improve language detection
fernandomatsuosantos Jul 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 43 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ packages = [{include = "codeql_wrapper", from = "src"}]
python = "^3.8.1"
click = "^8.0.0"
colorama = "^0.4.6"
psutil = "^5.9.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.0.0"
Expand All @@ -34,6 +35,7 @@ black = ">=23,<25"
flake8 = "^6.0.0"
mypy = "^1.0.0"
types-colorama = "^0.4.15"
types-psutil = "^5.9.0"

[tool.poetry.scripts]
codeql-wrapper = "codeql_wrapper.cli:cli"
Expand Down
23 changes: 23 additions & 0 deletions src/codeql_wrapper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def cli(ctx: click.Context, verbose: bool = False) -> None:
envvar="GITHUB_TOKEN",
help="GitHub token for SARIF upload (or set GITHUB_TOKEN env var)",
)
@click.option(
"--max-workers",
type=int,
help="Maximum number of worker processes for concurrent analysis "
"(default: adaptive based on system resources)",
)
@click.pass_context
def analyze(
ctx: click.Context,
Expand All @@ -121,6 +127,7 @@ def analyze(
commit_sha: Optional[str],
ref: Optional[str],
github_token: Optional[str],
max_workers: Optional[int],
) -> None:
"""
Run CodeQL analysis on a repository.
Expand Down Expand Up @@ -207,6 +214,21 @@ def analyze(
else:
logger.warning(f"Unsupported language: {lang}")

# Validate max_workers parameter
if max_workers is not None:
if max_workers < 1:
click.echo(
click.style("ERROR:", fg="red", bold=True)
+ " --max-workers must be at least 1",
err=True,
)
sys.exit(1)
if max_workers > 16:
click.echo(
click.style("WARNING:", fg="yellow", bold=True)
+ f" Using {max_workers} workers may cause resource exhaustion on some systems"
)

# Create analysis request
request = CodeQLAnalysisRequest(
repository_path=Path(repository_path),
Expand All @@ -215,6 +237,7 @@ def analyze(
verbose=verbose,
force_install=force_install,
monorepo=monorepo,
max_workers=max_workers,
)

# Execute analysis
Expand Down
1 change: 1 addition & 0 deletions src/codeql_wrapper/domain/entities/codeql_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class CodeQLAnalysisRequest:
build_mode: Optional[str] = None
build_script: Optional[str] = None
queries: Optional[List[str]] = None
max_workers: Optional[int] = None

def __post_init__(self) -> None:
"""Validate analysis request."""
Expand Down
153 changes: 138 additions & 15 deletions src/codeql_wrapper/domain/use_cases/codeql_analysis_use_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
from pathlib import Path
from typing import Any, List, Optional, Set

# Try to import psutil, fallback gracefully if not available
try:
import psutil

PSUTIL_AVAILABLE = True
except ImportError:
PSUTIL_AVAILABLE = False

from ..entities.codeql_analysis import (
CodeQLAnalysisRequest,
CodeQLAnalysisResult,
Expand All @@ -24,15 +32,107 @@
class CodeQLAnalysisUseCase:
"""Use case for running CodeQL analysis on repositories."""

DEFAULT_MAX_WORKERS: int = 10

def __init__(self, logger: Any) -> None:
"""Initialize the use case with dependencies."""
self._logger = logger
self._language_detector = LanguageDetector()
self._codeql_installer = CodeQLInstaller()
self._codeql_runner: Optional[CodeQLRunner] = None

# Calculate optimal workers based on system resources (default)
self._adaptive_max_workers = self._calculate_optimal_workers()
self._manual_max_workers: Optional[int] = None

def _get_available_memory_gb(self) -> float:
"""
Get available system memory in GB.

Returns:
Available memory in GB. Falls back to 7GB if psutil is unavailable.
"""
if not PSUTIL_AVAILABLE:
self._logger.debug(
"psutil not available, using conservative memory estimate"
)
return 7.0 # GitHub Actions standard runner

try:
return psutil.virtual_memory().total / (1024**3)
except Exception as e:
self._logger.debug(
f"Failed to get memory info from psutil: {e}, "
"using conservative memory estimate"
)
return 7.0 # Fallback to GitHub Actions standard runner

def _calculate_optimal_workers(self) -> int:
"""
Calculate optimal number of workers based on system resources.

Takes into account CPU cores and available memory to prevent
resource exhaustion, especially important for GitHub Actions runners.

Returns:
Optimal number of worker processes for CodeQL analysis
"""
try:
# Get system specifications
cpu_count = os.cpu_count() or 2
memory_gb = self._get_available_memory_gb()

# Conservative calculation for CodeQL analysis
# Each CodeQL worker typically needs:
# - 1+ CPU cores for optimal performance
# - 2-4GB RAM for database creation and analysis

# Calculate limits based on available resources
max_by_cpu = min(cpu_count, 8) # Cap at 8 for efficiency
max_by_memory = max(
1, int(memory_gb / 2.5)
) # 2.5GB per worker (conservative)

# Take the minimum to avoid resource exhaustion
# Also apply reasonable bounds: min 1, max 6
optimal = max(1, min(max_by_cpu, max_by_memory, 6))

self._logger.debug(
f"Calculated optimal workers: {optimal} "
f"(CPU: {cpu_count}, Memory: {memory_gb:.1f}GB, "
f"Limits - CPU: {max_by_cpu}, Memory: {max_by_memory})"
)

return optimal

except Exception as e:
self._logger.warning(f"Failed to calculate optimal workers: {e}")
return 4 # Safe fallback for most environments

@property
def max_workers(self) -> int:
"""Get the maximum number of workers for this instance."""
return (
self._manual_max_workers
if self._manual_max_workers is not None
else self._adaptive_max_workers
)

def set_max_workers(self, max_workers: Optional[int]) -> None:
"""Set the maximum number of workers manually."""
if max_workers is not None:
if max_workers < 1:
raise ValueError("max_workers must be at least 1")
if max_workers > 16:
self._logger.warning(
f"Using {max_workers} workers may cause resource exhaustion"
)
self._logger.info(f"Using manual max_workers: {max_workers}")
else:
self._logger.info(
f"Using adaptive max_workers: {self._adaptive_max_workers}"
)

self._manual_max_workers = max_workers

def execute(self, request: CodeQLAnalysisRequest) -> RepositoryAnalysisSummary:
"""
Execute CodeQL analysis on a repo or monorepo.
Expand All @@ -48,6 +148,24 @@ def execute(self, request: CodeQLAnalysisRequest) -> RepositoryAnalysisSummary:
Exception: If analysis fails
"""
try:
# Set max workers from request if provided
self.set_max_workers(request.max_workers)

# Step 1: Verify CodeQL installation once for all projects
self._logger.info("Verifying CodeQL installation...")
installation_info = self._verify_codeql_installation(request.force_install)
if not installation_info.is_valid:
raise Exception(
f"CodeQL installation error: {installation_info.error_message}"
)

# Step 2: Initialize CodeQL runner once for all projects
self._codeql_runner = CodeQLRunner(str(installation_info.path))
self._logger.info(
f"CodeQL runner initialized with version {installation_info.version}"
)

# Step 3: Execute analysis based on repository type
if request.monorepo:
# Run scan based on .codeql.json if it exists
root_config_path = request.repository_path / ".codeql.json"
Expand Down Expand Up @@ -127,7 +245,7 @@ def _execute_monorepo_analysis(
all_analysis_results = []
error_messages = []

max_workers = min(os.cpu_count() or 1, self.DEFAULT_MAX_WORKERS)
max_workers = self.max_workers
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = []
for project_cfg in projects_config:
Expand Down Expand Up @@ -235,32 +353,37 @@ def _execute_single_repo_analysis(
f"{request.repository_path}"
)

# Step 1: Verify CodeQL installation
installation_info = self._verify_codeql_installation(request.force_install)
if not installation_info.is_valid:
raise Exception(
f"CodeQL installation error: {installation_info.error_message}"
# Initialize CodeQL runner if not already done (for subprocess calls)
if self._codeql_runner is None:
self._logger.debug("CodeQL runner not initialized, initializing now...")
installation_info = self._verify_codeql_installation(
request.force_install
)
if not installation_info.is_valid:
raise Exception(
f"CodeQL installation error: {installation_info.error_message}"
)
self._codeql_runner = CodeQLRunner(str(installation_info.path))
self._logger.debug(
f"CodeQL runner initialized with version {installation_info.version}"
)

# Step 2: Initialize CodeQL runner
self._codeql_runner = CodeQLRunner(str(installation_info.path))

# Step 3: Detect projects and languages
# Step 1: Detect projects and languages
detected_projects = self._detect_projects(request.repository_path)
self._logger.info(f"Detected {len(detected_projects)} project(s)")

# Step 4: Filter projects by target languages if specified
# Step 2: Filter projects by target languages if specified
filtered_projects = self._filter_projects_by_language(
detected_projects, request.target_languages
)

# Step 5: Run analysis on each project
# Step 3: Run analysis on each project
analysis_results = []
for project in filtered_projects:
result = self._analyze_project(project, request)
analysis_results.append(result)

# Step 6: Create summary
# Step 4: Create summary
summary = RepositoryAnalysisSummary(
repository_path=request.repository_path,
detected_projects=detected_projects,
Expand Down
Empty file removed test_cli_debug.py
Empty file.
Loading