accuknox · Eshrath027 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -1,20 +1,15 @@
 FROM python:3.12-slim
 
-# Install uv
-RUN pip install --no-cache-dir uv
+# Install uv and git
+RUN pip install --no-cache-dir uv && apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
 
 # Create non-root user
 RUN useradd -m -u 1000 codeassureuser
 
 WORKDIR /app
 
-# Copy project files
-COPY pyproject.toml ./
-COPY sast_verify/ ./sast_verify/
-# COPY codeassure.json ./
-
-# Install the package
-RUN uv pip install --system --no-cache .
+# Install codeassure
+RUN uv pip install --system --no-cache git+https://github.com/accuknox/codeassure-cli.git@v0.1.0
 
 # Set ownership
 RUN chown -R codeassureuser:codeassureuser /app

diff --git a/README.md b/README.md
@@ -49,14 +49,96 @@ codeassure --codebase DIR --findings FILE --output FILE [--config PATH] [--jobs
 ```json
 {
   "model": {
-    "provider": "openai",
-    "name": "qwen35-nvfp4",
-    "api_base": "http://localhost:5000/v1"
+    "provider": "openai-compatible",
+    "name": "your-model-name",
+    "api_base": "http://localhost:5000",
+    "api_key": "$YOUR_API_KEY_ENV_VAR",
+    "tool_calling": true
   },
-  "concurrency": 16,
+  "concurrency": 4,
+  "stage_timeout": 120,
+  "finding_timeout": 300
 }
 ```
 
+### Model fields
+
+| Field | Required | Description |
+|---|---|---|
+| `provider` | yes | One of `openai`, `openai-compatible`, `anthropic`, `google`, `gemini` |
+| `name` | yes | Model name as known by the provider |
+| `api_base` | no | Root host URL — always provide without `/v1` (see table below) |
+| `api_key` | no | API key literal or `$ENV_VAR` reference (e.g. `"$OPENAI_API_KEY"`) |
+| `tool_calling` | no | `true` (default) — set to `false` for models that don't support tool/function calling |
+
+### `api_base` per provider
+
+Always provide the root host. The SDK or CodeAssure appends the correct path automatically:
+
+| Provider | You set `api_base` | Actual endpoint called |
+|---|---|---|
+| `openai` / `openai-compatible` | `http://localhost:5000` | `http://localhost:5000/v1/chat/completions` |
+| `anthropic` | `https://your-proxy.example.com` | `https://your-proxy.example.com/v1/messages` |
+| `google` / `gemini` | `https://your-proxy.example.com` | `https://your-proxy.example.com/v1beta/models/{model}:generateContent` |
+
+### Provider examples
+
+**Local vLLM / OpenAI-compatible:**
+```json
+{
+  "model": {
+    "provider": "openai-compatible",
+    "name": "qwen/qwen3.5-9b",
+    "api_base": "http://localhost:5000",
+    "tool_calling": false
+  }
+}
+```
+
+**Anthropic-compatible proxy:**
+```json
+{
+  "model": {
+    "provider": "anthropic",
+    "name": "qwen/qwen3.5-9b",
+    "api_base": "https://your-proxy.example.com",
+    "api_key": "$ANTHROPIC_API_KEY",
+    "tool_calling": false
+  }
+}
+```
+
+**Anthropic (direct):**
+```json
+{
+  "model": {
+    "provider": "anthropic",
+    "name": "claude-sonnet-4-6",
+    "api_key": "$ANTHROPIC_API_KEY"
+  }
+}
+```
+
+**Google Gemini:**
+```json
+{
+  "model": {
+    "provider": "gemini",
+    "name": "gemini-2.0-flash",
+    "api_key": "$GEMINI_API_KEY"
+  }
+}
+```
+
+### Other config fields
+
+| Field | Default | Description |
+|---|---|---|
+| `concurrency` | `4` | Max concurrent LLM requests |
+| `stage_timeout` | `120` | Seconds per LLM stage (analyzer or formatter) |
+| `finding_timeout` | `300` | Seconds for the entire finding (both stages + repair) |
+| `request_limit` | `200` | Max requests per `agent.run()` call |
+
 ## Brev Setup (Remote GPU Instance)
 
 > Instance: `accuknox-nemotron-super-3`
@@ -69,7 +151,7 @@ brev list
 brev port-forward accuknox-nemotron-super-3 --port 5000:5000
 ```
 
-The vLLM endpoint is now available at `http://localhost:5000/v1`. The default `codeassure.json` is already configured to use this.
+The vLLM endpoint is now available at `http://localhost:5000`. Set `api_base` to `http://localhost:5000` in `codeassure.json`.
 
 ## Output
 
@@ -78,9 +160,9 @@ Each finding gets a `verification` block:
 {
   "verification": {
     "verdict": "true_positive",
-    "finding_correct": true,
     "is_security_vulnerability": true,
     "confidence": "high",
+    "severity": "high",
     "reason": "subprocess.run called with dynamic user input and shell=True.",
     "evidence": [{"location": "app/utils.py:42"}]
   }
@@ -90,9 +172,9 @@ Each finding gets a `verification` block:
 | Field | Values | Description |
 |---|---|---|
 | `verdict` | `true_positive`, `false_positive`, `uncertain` | Did the scanner correctly detect the pattern? |
-| `finding_correct` | `true`, `false`, `null` | Does the flagged pattern exist in the code? |
-| `is_security_vulnerability` | `true`, `false`, `null` | Is this exploitable? Assessed from code context, independent of verdict |
+| `is_security_vulnerability` | `true`, `false` | Is this exploitable? Assessed from code context, independent of verdict |
 | `confidence` | `high`, `medium`, `low` | Confidence level |
+| `severity` | `critical`, `high`, `medium`, `low` | Assessed severity for `true_positive`; always `low` for `false_positive`/`uncertain` |
 
 ## Benchmarking
 

diff --git a/codeassure.json b/codeassure.json
@@ -1,8 +1,11 @@
 {
   "model": {
     "provider": "openai",
-    "name": "qwen35-nvfp4",
-    "api_base": "http://localhost:5000/v1"
+    "name": "qwen/qwen3.5-9b",
+    "api_base": "https://openrouter.ai/api",
+    "api_key": "$OPENROUTER_KEY"
   },
-  "concurrency": 4
+  "concurrency": 2,
+  "stage_timeout": 300,
+  "finding_timeout": 600
 }
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,18 +4,19 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "codeassure"
-version = "0.1.0"
+version = "0.1.1"
 description = "AI-powered SAST finding verification"
 readme = "README.md"
 requires-python = ">=3.11"
 license = { text = "MIT" }
 dependencies = [
-    "pydantic-ai-slim[openai]",
+    "pydantic-ai-slim[openai,anthropic]",
     "pydantic>=2.0",
     "anthropic>=0.40.0",
 ]
 
 [project.optional-dependencies]
+google = ["pydantic-ai-slim[google]", "google-genai"]
 build = ["pyinstaller>=6.0"]
 
 [tool.setuptools.packages.find]

diff --git a/sast_verify/agents/analyzer.py b/sast_verify/agents/analyzer.py
@@ -5,7 +5,9 @@
 from ..config import get_config
 from ..prompts.analyzer import (
     ANALYZER_INSTRUCTION,
+    ANALYZER_INSTRUCTION_NO_TOOLS,
     GROUP_ANALYZER_INSTRUCTION,
+    GROUP_ANALYZER_INSTRUCTION_NO_TOOLS,
     GROUP_VERDICT_FORMATTER_INSTRUCTION,
     VERDICT_FORMATTER_INSTRUCTION,
 )
@@ -14,11 +16,18 @@
 
 
 def build_analyzer() -> Agent[AnalyzerDeps, str]:
+    cfg = get_config()
+    if cfg.model.tool_calling:
+        return Agent(
+            cfg.build_model(),
+            deps_type=AnalyzerDeps,
+            instructions=ANALYZER_INSTRUCTION,
+            tools=[read_file, grep_code],
+        )
     return Agent(
-        get_config().build_model(),
+        cfg.build_model(),
         deps_type=AnalyzerDeps,
-        instructions=ANALYZER_INSTRUCTION,
-        tools=[read_file, grep_code],
+        instructions=ANALYZER_INSTRUCTION_NO_TOOLS,
     )
 
 
@@ -30,11 +39,18 @@ def build_verdict_formatter() -> Agent[None, str]:
 
 
 def build_group_analyzer() -> Agent[AnalyzerDeps, str]:
+    cfg = get_config()
+    if cfg.model.tool_calling:
+        return Agent(
+            cfg.build_model(),
+            deps_type=AnalyzerDeps,
+            instructions=GROUP_ANALYZER_INSTRUCTION,
+            tools=[read_file, grep_code],
+        )
     return Agent(
-        get_config().build_model(),
+        cfg.build_model(),
         deps_type=AnalyzerDeps,
-        instructions=GROUP_ANALYZER_INSTRUCTION,
-        tools=[read_file, grep_code],
+        instructions=GROUP_ANALYZER_INSTRUCTION_NO_TOOLS,
     )
 
 

diff --git a/sast_verify/agents/runner.py b/sast_verify/agents/runner.py
@@ -35,6 +35,25 @@
 
 import re
 
+from pydantic_ai.exceptions import UnexpectedModelBehavior
+
+
+async def _run_with_retry(agent, message, *, retries: int = 3, base_delay: float = 2.0, **kwargs):
+    """Run an agent call, retrying on transient UnexpectedModelBehavior (e.g. null API response)."""
+    for attempt in range(retries):
+        try:
+            return await agent.run(message, **kwargs)
+        except UnexpectedModelBehavior as exc:
+            if attempt < retries - 1:
+                delay = base_delay * (2 ** attempt)
+                log.warning(
+                    "Transient model error (attempt %d/%d), retrying in %.1fs: %s",
+                    attempt + 1, retries, delay, type(exc).__name__,
+                )
+                await asyncio.sleep(delay)
+            else:
+                raise
+
 
 def _fix_unquoted_strings(text: str) -> str:
     """Fix JSON with unquoted string values — common with some models.
@@ -330,7 +349,7 @@ async def _analyze_one(
     # Stage 1: Tool-using analysis
     try:
         analysis_result = await asyncio.wait_for(
-            analyzer.run(build_user_message(bundle), **run_kwargs),
+            _run_with_retry(analyzer, build_user_message(bundle), **run_kwargs),
             timeout=stage_timeout,
         )
         analysis = analysis_result.output
@@ -356,15 +375,15 @@ async def _analyze_one(
 
     try:
         format_result = await asyncio.wait_for(
-            formatter.run(format_message, **formatter_kwargs),
+            _run_with_retry(formatter, format_message, **formatter_kwargs),
             timeout=stage_timeout,
         )
         response = format_result.output
     except asyncio.TimeoutError:
         log.warning("Formatter timed out for finding %d", index)
         response = ""
     except Exception as exc:
-        log.error("Formatter failed for finding %d: %s", index, exc)
+        log.error("Formatter failed for finding %d: %s", index, type(exc).__name__)
         response = ""
 
     verdict = None
@@ -386,7 +405,8 @@ async def _analyze_one(
             )
             try:
                 repair_result = await asyncio.wait_for(
-                    formatter.run(
+                    _run_with_retry(
+                        formatter,
                         repair_message,
                         message_history=format_result.all_messages(),
                         **formatter_kwargs,
@@ -464,15 +484,15 @@ def _uncertain_all(reason: str) -> dict[int, Verdict]:
     # Stage 1: Tool-using analysis
     try:
         analysis_result = await asyncio.wait_for(
-            analyzer.run(build_group_message(group), **run_kwargs),
+            _run_with_retry(analyzer, build_group_message(group), **run_kwargs),
             timeout=stage_timeout,
         )
         analysis = analysis_result.output
     except asyncio.TimeoutError:
         log.warning("Group analyzer timed out for %s", group.group_key)
         return _uncertain_all(f"Analyzer stage timed out after {stage_timeout}s.")
     except Exception as exc:
-        log.error("Group analyzer failed for %s: %s", group.group_key, exc)
+        log.error("Group analyzer failed for %s: %s", group.group_key, type(exc).__name__)
         return _uncertain_all(f"Analyzer error: {type(exc).__name__}")
 
     if not analysis.strip():
@@ -487,15 +507,15 @@ def _uncertain_all(reason: str) -> dict[int, Verdict]:
 
     try:
         format_result = await asyncio.wait_for(
-            formatter.run(format_message, **formatter_kwargs),
+            _run_with_retry(formatter, format_message, **formatter_kwargs),
             timeout=stage_timeout,
         )
         response = format_result.output
     except asyncio.TimeoutError:
         log.warning("Group formatter timed out for %s", group.group_key)
         response = ""
     except Exception as exc:
-        log.error("Group formatter failed for %s: %s", group.group_key, exc)
+        log.error("Group formatter failed for %s: %s", group.group_key, type(exc).__name__)
         response = ""
 
     verdicts: dict[str, Verdict] | None = None