Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
FROM python:3.12-slim

# Install uv
RUN pip install --no-cache-dir uv
# Install uv and git
RUN pip install --no-cache-dir uv && apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*

# Create non-root user
RUN useradd -m -u 1000 codeassureuser

WORKDIR /app

# Copy project files
COPY pyproject.toml ./
COPY sast_verify/ ./sast_verify/
# COPY codeassure.json ./

# Install the package
RUN uv pip install --system --no-cache .
# Install codeassure
RUN uv pip install --system --no-cache git+https://github.com/accuknox/codeassure-cli.git@v0.1.0

# Set ownership
RUN chown -R codeassureuser:codeassureuser /app
Expand Down
98 changes: 90 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,96 @@ codeassure --codebase DIR --findings FILE --output FILE [--config PATH] [--jobs
```json
{
"model": {
"provider": "openai",
"name": "qwen35-nvfp4",
"api_base": "http://localhost:5000/v1"
"provider": "openai-compatible",
"name": "your-model-name",
"api_base": "http://localhost:5000",
"api_key": "$YOUR_API_KEY_ENV_VAR",
"tool_calling": true
},
"concurrency": 16,
"concurrency": 4,
"stage_timeout": 120,
"finding_timeout": 300
}
```

### Model fields

| Field | Required | Description |
|---|---|---|
| `provider` | yes | One of `openai`, `openai-compatible`, `anthropic`, `google`, `gemini` |
| `name` | yes | Model name as known by the provider |
| `api_base` | no | Root host URL — always provide without `/v1` (see table below) |
| `api_key` | no | API key literal or `$ENV_VAR` reference (e.g. `"$OPENAI_API_KEY"`) |
| `tool_calling` | no | `true` (default) — set to `false` for models that don't support tool/function calling |

### `api_base` per provider

Always provide the root host. The SDK or CodeAssure appends the correct path automatically:

| Provider | You set `api_base` | Actual endpoint called |
|---|---|---|
| `openai` / `openai-compatible` | `http://localhost:5000` | `http://localhost:5000/v1/chat/completions` |
| `anthropic` | `https://your-proxy.example.com` | `https://your-proxy.example.com/v1/messages` |
| `google` / `gemini` | `https://your-proxy.example.com` | `https://your-proxy.example.com/v1beta/models/{model}:generateContent` |

### Provider examples

**Local vLLM / OpenAI-compatible:**
```json
{
"model": {
"provider": "openai-compatible",
"name": "qwen/qwen3.5-9b",
"api_base": "http://localhost:5000",
"tool_calling": false
}
}
```

**Anthropic-compatible proxy:**
```json
{
"model": {
"provider": "anthropic",
"name": "qwen/qwen3.5-9b",
"api_base": "https://your-proxy.example.com",
"api_key": "$ANTHROPIC_API_KEY",
"tool_calling": false
}
}
```

**Anthropic (direct):**
```json
{
"model": {
"provider": "anthropic",
"name": "claude-sonnet-4-6",
"api_key": "$ANTHROPIC_API_KEY"
}
}
```

**Google Gemini:**
```json
{
"model": {
"provider": "gemini",
"name": "gemini-2.0-flash",
"api_key": "$GEMINI_API_KEY"
}
}
```

### Other config fields

| Field | Default | Description |
|---|---|---|
| `concurrency` | `4` | Max concurrent LLM requests |
| `stage_timeout` | `120` | Seconds per LLM stage (analyzer or formatter) |
| `finding_timeout` | `300` | Seconds for the entire finding (both stages + repair) |
| `request_limit` | `200` | Max requests per `agent.run()` call |

## Brev Setup (Remote GPU Instance)

> Instance: `accuknox-nemotron-super-3`
Expand All @@ -69,7 +151,7 @@ brev list
brev port-forward accuknox-nemotron-super-3 --port 5000:5000
```

The vLLM endpoint is now available at `http://localhost:5000/v1`. The default `codeassure.json` is already configured to use this.
The vLLM endpoint is now available at `http://localhost:5000`. Set `api_base` to `http://localhost:5000` in `codeassure.json`.

## Output

Expand All @@ -78,9 +160,9 @@ Each finding gets a `verification` block:
{
"verification": {
"verdict": "true_positive",
"finding_correct": true,
"is_security_vulnerability": true,
"confidence": "high",
"severity": "high",
"reason": "subprocess.run called with dynamic user input and shell=True.",
"evidence": [{"location": "app/utils.py:42"}]
}
Expand All @@ -90,9 +172,9 @@ Each finding gets a `verification` block:
| Field | Values | Description |
|---|---|---|
| `verdict` | `true_positive`, `false_positive`, `uncertain` | Did the scanner correctly detect the pattern? |
| `finding_correct` | `true`, `false`, `null` | Does the flagged pattern exist in the code? |
| `is_security_vulnerability` | `true`, `false`, `null` | Is this exploitable? Assessed from code context, independent of verdict |
| `is_security_vulnerability` | `true`, `false` | Is this exploitable? Assessed from code context, independent of verdict |
| `confidence` | `high`, `medium`, `low` | Confidence level |
| `severity` | `critical`, `high`, `medium`, `low` | Assessed severity for `true_positive`; always `low` for `false_positive`/`uncertain` |

## Benchmarking

Expand Down
9 changes: 6 additions & 3 deletions codeassure.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
{
"model": {
"provider": "openai",
"name": "qwen35-nvfp4",
"api_base": "http://localhost:5000/v1"
"name": "qwen/qwen3.5-9b",
"api_base": "https://openrouter.ai/api",
"api_key": "$OPENROUTER_KEY"
},
"concurrency": 4
"concurrency": 2,
"stage_timeout": 300,
"finding_timeout": 600
}
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ build-backend = "setuptools.build_meta"

[project]
name = "codeassure"
version = "0.1.0"
version = "0.1.1"
description = "AI-powered SAST finding verification"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "MIT" }
dependencies = [
"pydantic-ai-slim[openai]",
"pydantic-ai-slim[openai,anthropic]",
"pydantic>=2.0",
"anthropic>=0.40.0",
]

[project.optional-dependencies]
google = ["pydantic-ai-slim[google]", "google-genai"]
build = ["pyinstaller>=6.0"]

[tool.setuptools.packages.find]
Expand Down
28 changes: 22 additions & 6 deletions sast_verify/agents/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from ..config import get_config
from ..prompts.analyzer import (
ANALYZER_INSTRUCTION,
ANALYZER_INSTRUCTION_NO_TOOLS,
GROUP_ANALYZER_INSTRUCTION,
GROUP_ANALYZER_INSTRUCTION_NO_TOOLS,
GROUP_VERDICT_FORMATTER_INSTRUCTION,
VERDICT_FORMATTER_INSTRUCTION,
)
Expand All @@ -14,11 +16,18 @@


def build_analyzer() -> Agent[AnalyzerDeps, str]:
cfg = get_config()
if cfg.model.tool_calling:
return Agent(
cfg.build_model(),
deps_type=AnalyzerDeps,
instructions=ANALYZER_INSTRUCTION,
tools=[read_file, grep_code],
)
return Agent(
get_config().build_model(),
cfg.build_model(),
deps_type=AnalyzerDeps,
instructions=ANALYZER_INSTRUCTION,
tools=[read_file, grep_code],
instructions=ANALYZER_INSTRUCTION_NO_TOOLS,
)


Expand All @@ -30,11 +39,18 @@ def build_verdict_formatter() -> Agent[None, str]:


def build_group_analyzer() -> Agent[AnalyzerDeps, str]:
cfg = get_config()
if cfg.model.tool_calling:
return Agent(
cfg.build_model(),
deps_type=AnalyzerDeps,
instructions=GROUP_ANALYZER_INSTRUCTION,
tools=[read_file, grep_code],
)
return Agent(
get_config().build_model(),
cfg.build_model(),
deps_type=AnalyzerDeps,
instructions=GROUP_ANALYZER_INSTRUCTION,
tools=[read_file, grep_code],
instructions=GROUP_ANALYZER_INSTRUCTION_NO_TOOLS,
)


Expand Down
36 changes: 28 additions & 8 deletions sast_verify/agents/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,25 @@

import re

from pydantic_ai.exceptions import UnexpectedModelBehavior


async def _run_with_retry(agent, message, *, retries: int = 3, base_delay: float = 2.0, **kwargs):
"""Run an agent call, retrying on transient UnexpectedModelBehavior (e.g. null API response)."""
for attempt in range(retries):
try:
return await agent.run(message, **kwargs)
except UnexpectedModelBehavior as exc:
if attempt < retries - 1:
delay = base_delay * (2 ** attempt)
log.warning(
"Transient model error (attempt %d/%d), retrying in %.1fs: %s",
attempt + 1, retries, delay, type(exc).__name__,
)
await asyncio.sleep(delay)
else:
raise


def _fix_unquoted_strings(text: str) -> str:
"""Fix JSON with unquoted string values — common with some models.
Expand Down Expand Up @@ -330,7 +349,7 @@ async def _analyze_one(
# Stage 1: Tool-using analysis
try:
analysis_result = await asyncio.wait_for(
analyzer.run(build_user_message(bundle), **run_kwargs),
_run_with_retry(analyzer, build_user_message(bundle), **run_kwargs),
timeout=stage_timeout,
)
analysis = analysis_result.output
Expand All @@ -356,15 +375,15 @@ async def _analyze_one(

try:
format_result = await asyncio.wait_for(
formatter.run(format_message, **formatter_kwargs),
_run_with_retry(formatter, format_message, **formatter_kwargs),
timeout=stage_timeout,
)
response = format_result.output
except asyncio.TimeoutError:
log.warning("Formatter timed out for finding %d", index)
response = ""
except Exception as exc:
log.error("Formatter failed for finding %d: %s", index, exc)
log.error("Formatter failed for finding %d: %s", index, type(exc).__name__)
response = ""

verdict = None
Expand All @@ -386,7 +405,8 @@ async def _analyze_one(
)
try:
repair_result = await asyncio.wait_for(
formatter.run(
_run_with_retry(
formatter,
repair_message,
message_history=format_result.all_messages(),
**formatter_kwargs,
Expand Down Expand Up @@ -464,15 +484,15 @@ def _uncertain_all(reason: str) -> dict[int, Verdict]:
# Stage 1: Tool-using analysis
try:
analysis_result = await asyncio.wait_for(
analyzer.run(build_group_message(group), **run_kwargs),
_run_with_retry(analyzer, build_group_message(group), **run_kwargs),
timeout=stage_timeout,
)
analysis = analysis_result.output
except asyncio.TimeoutError:
log.warning("Group analyzer timed out for %s", group.group_key)
return _uncertain_all(f"Analyzer stage timed out after {stage_timeout}s.")
except Exception as exc:
log.error("Group analyzer failed for %s: %s", group.group_key, exc)
log.error("Group analyzer failed for %s: %s", group.group_key, type(exc).__name__)
return _uncertain_all(f"Analyzer error: {type(exc).__name__}")

if not analysis.strip():
Expand All @@ -487,15 +507,15 @@ def _uncertain_all(reason: str) -> dict[int, Verdict]:

try:
format_result = await asyncio.wait_for(
formatter.run(format_message, **formatter_kwargs),
_run_with_retry(formatter, format_message, **formatter_kwargs),
timeout=stage_timeout,
)
response = format_result.output
except asyncio.TimeoutError:
log.warning("Group formatter timed out for %s", group.group_key)
response = ""
except Exception as exc:
log.error("Group formatter failed for %s: %s", group.group_key, exc)
log.error("Group formatter failed for %s: %s", group.group_key, type(exc).__name__)
response = ""

verdicts: dict[str, Verdict] | None = None
Expand Down
Loading