Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ llms-full.txt
.vscode/
*.swp
.DS_Store

build/
26 changes: 26 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM python:3.12-slim

# Install uv
RUN pip install --no-cache-dir uv

# Create non-root user
RUN useradd -m -u 1000 codeassureuser

WORKDIR /app

# Copy project files
COPY pyproject.toml ./
COPY sast_verify/ ./sast_verify/
# COPY codeassure.json ./

# Install the package
RUN uv pip install --system --no-cache .

# Set ownership
RUN chown -R codeassureuser:codeassureuser /app

USER codeassureuser

WORKDIR /workspace

ENTRYPOINT ["codeassure", "--config", "/app/codeassure.json"]
40 changes: 40 additions & 0 deletions brev_docker_files/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
services:
qwen35-server:
image: vllm/vllm-openai:v0.17.1-x86_64-cu130
ports:
- "5000:5000"
volumes:
- /home/shadeform/.cache/huggingface:/root/.cache/huggingface
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
command:
- Sehyo/Qwen3.5-122B-A10B-NVFP4
- --served-model-name
- qwen35-nvfp4
- --swap-space
- "16"
- --max-num-seqs
- "32"
- --max-model-len
- "65536"
- --gpu-memory-utilization
- "0.9"
- --tensor-parallel-size
- "1"
- --language-model-only
- --enable-auto-tool-choice
- --tool-call-parser
- qwen3_coder
- --reasoning-parser
- qwen3
- --trust-remote-code
- --host
- 0.0.0.0
- --port
- "5000"
53 changes: 53 additions & 0 deletions brev_docker_files/docker-compose_nemotron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
services:
nemotron-server:
image: vllm/vllm-openai:v0.17.1-x86_64-cu130
ports:
- "5000:5000"
volumes:
- /home/ubuntu/.cache/huggingface:/root/.cache/huggingface
- /home/shadeform/super_v3_reasoning_parser.py:/app/super_v3_reasoning_parser.py:ro
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
command:
- --model
- nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4
- --async-scheduling
- --served-model-name
- nvidia/nemotron-3-super
- --dtype
- auto
- --kv-cache-dtype
- fp8
- --tensor-parallel-size
- "1"
- --pipeline-parallel-size
- "1"
- --data-parallel-size
- "1"
- --swap-space
- "0"
- --trust-remote-code
- --attention-backend
- TRITON_ATTN
- --gpu-memory-utilization
- "0.9"
- --enable-chunked-prefill
- --max-num-seqs
- "512"
- --host
- 0.0.0.0
- --port
- "5000"
- --enable-auto-tool-choice
- --tool-call-parser
- qwen3_coder
- --reasoning-parser-plugin
- /app/super_v3_reasoning_parser.py
- --reasoning-parser
- super_v3
21 changes: 21 additions & 0 deletions brev_docker_files/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
ufw --force reset
ufw default deny incoming
ufw default allow outgoing
ufw allow 22/tcp
ufw allow 2222/tcp
ufw allow in from 0.0.0.0/0 to any port 22
ufw --force enable
iptables -F DOCKER-USER
iptables -A DOCKER-USER -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
iptables -A DOCKER-USER -i docker0 ! -o docker0 -j ACCEPT
iptables -A DOCKER-USER -i br+ ! -o br+ -j ACCEPT
iptables -A DOCKER-USER -i cni+ ! -o cni+ -j ACCEPT
iptables -A DOCKER-USER -i cali+ ! -o cali+ -j ACCEPT
iptables -A DOCKER-USER -i docker0 -o docker0 -j ACCEPT
iptables -A DOCKER-USER -i br+ -o br+ -j ACCEPT
iptables -A DOCKER-USER -i cni+ -o cni+ -j ACCEPT
iptables -A DOCKER-USER -i cali+ -o cali+ -j ACCEPT
iptables -A DOCKER-USER -i lo -j ACCEPT
iptables -A DOCKER-USER -j DROP
iptables -A DOCKER-USER -j RETURN
28 changes: 28 additions & 0 deletions brev_docker_files/super_v3_reasoning_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser


@ReasoningParserManager.register_module("super_v3")
class SuperV3ReasoningParser(DeepSeekR1ReasoningParser):
def extract_reasoning(self, model_output, request):
reasoning_content, final_content = super().extract_reasoning(
model_output, request
)
if (
hasattr(request, "chat_template_kwargs")
and request.chat_template_kwargs
and (
request.chat_template_kwargs.get("enable_thinking") is False
or request.chat_template_kwargs.get("force_nonempty_content") is True
)
and final_content is None
):
"""
The original `deepseek_r1` reasoning parser this inherits from will automatically put everything in the reasoning content when it cannot parse out reasoning. This was fine for the DeepSeek R1 model that was not intended to be used without reasoning.
1. Since the Nemotron 3 Nano and Super both have thinking off modes modulated by "enable_thinking=false" in the chat template kwargs, this change instead which will properly place the content in cases where there is no thinking enabled via config.
2. There are rare cases where the model will output only reasoning without an end-think token `</think>` (e.g. reasoning exceeds max length), which results in empty content returned. End users may want to unilaterally avoid such cases and always have a content response even if the model does not finish its reasoning.
"""
# Put all nonempty content into the content, rather than return content
reasoning_content, final_content = None, reasoning_content

return reasoning_content, final_content
12 changes: 12 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail

echo "==> Installing build deps..."
uv pip install --system -e ".[build]"

echo "==> Building standalone binary..."
pyinstaller codeassure.spec --clean

echo ""
echo "Binary ready: dist/codeassure"
echo "Test it: ./dist/codeassure --help"
4 changes: 4 additions & 0 deletions build_entry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from sast_verify.cli import main

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion codeassure.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
"name": "qwen35-nvfp4",
"api_base": "http://localhost:5000/v1"
},
"concurrency": 16
"concurrency": 4
}
85 changes: 85 additions & 0 deletions codeassure.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- mode: python ; coding: utf-8 -*-
# PyInstaller spec for codeassure standalone binary
from PyInstaller.utils.hooks import copy_metadata

a = Analysis(
['build_entry.py'],
pathex=[],
binaries=[],
datas=[
*copy_metadata('genai_prices'),
*copy_metadata('pydantic_ai_slim'),
],
hiddenimports=[
# genai_prices (used by pydantic-ai messages.py at import time)
'genai_prices',
# pydantic / pydantic-ai
'pydantic',
'pydantic.v1',
'pydantic_core',
'pydantic_ai',
'pydantic_ai.models',
'pydantic_ai.models.openai',
'pydantic_ai_slim',
# anthropic SDK
'anthropic',
'anthropic._client',
'anthropic.resources',
# httpx (used by both anthropic and pydantic-ai)
'httpx',
'httpcore',
# async
'anyio',
'anyio._backends._asyncio',
'sniffio',
# openai client (pydantic-ai-slim[openai])
'openai',
'openai._client',
# project internals
'sast_verify',
'sast_verify.cli',
'sast_verify.config',
'sast_verify.pipeline',
'sast_verify.preprocess',
'sast_verify.retrieval',
'sast_verify.schema',
'sast_verify.agents',
'sast_verify.agents.analyzer',
'sast_verify.agents.runner',
'sast_verify.agents.tools',
'sast_verify.agents.deps',
'sast_verify.prompts',
'sast_verify.prompts.analyzer',
'sast_verify.prompts.rule_policies',
'sast_verify.eval',
'sast_verify.eval.evaluate',
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
)

pyz = PYZ(a.pure)

exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='codeassure',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
56 changes: 27 additions & 29 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
services:
nemotron-server:
qwen35-server:
image: vllm/vllm-openai:v0.17.1-x86_64-cu130
ports:
- "5000:5000"
environment:
VLLM_ENABLE_CUDA_COMPATIBILITY: "1"
volumes:
- /home/ubuntu/.cache/huggingface:/root/.cache/huggingface
- /home/shadeform/.cache/huggingface:/root/.cache/huggingface
ipc: host
deploy:
resources:
Expand All @@ -15,28 +13,28 @@ services:
- driver: nvidia
count: all
capabilities: [gpu]
entrypoint: ["/bin/bash", "-lc"]
command: >
wget -qO /tmp/super_v3_reasoning_parser.py
https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4/raw/main/super_v3_reasoning_parser.py
&&
vllm serve nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4
--async-scheduling
--served-model-name nvidia/nemotron-3-super
--dtype auto
--kv-cache-dtype fp8
--tensor-parallel-size 1
--pipeline-parallel-size 1
--data-parallel-size 1
--swap-space 0
--trust-remote-code
--attention-backend TRITON_ATTN
--gpu-memory-utilization 0.9
--enable-chunked-prefill
--max-num-seqs 512
--host 0.0.0.0
--port 5000
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--reasoning-parser-plugin /tmp/super_v3_reasoning_parser.py
--reasoning-parser super_v3
command:
- Sehyo/Qwen3.5-122B-A10B-NVFP4
- --served-model-name
- qwen35-nvfp4
- --swap-space
- "16"
- --max-num-seqs
- "32"
- --max-model-len
- "65536"
- --gpu-memory-utilization
- "0.9"
- --tensor-parallel-size
- "1"
- --language-model-only
- --enable-auto-tool-choice
- --tool-call-parser
- qwen3_coder
- --reasoning-parser
- qwen3
- --trust-remote-code
- --host
- 0.0.0.0
- --port
- "5000"
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"

[project]
name = "codeassure"
version = "0.1.0"
description = "AI-powered SAST finding verification"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "MIT" }
dependencies = [
"pydantic-ai-slim[openai]",
"pydantic>=2.0",
"anthropic>=0.40.0",
]

[project.optional-dependencies]
build = ["pyinstaller>=6.0"]

[tool.setuptools.packages.find]
include = ["sast_verify*"]

Expand Down
Loading