runpod-workers · TimPietruskyRunPod · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/.env.example b/.env.example
@@ -10,12 +10,14 @@ TOOL_CALL_PARSER=hermes
 # HuggingFace (optional - for gated models)
 HF_TOKEN=hf_your_token_here
 
-# RunPod Configuration (for remote access)
+# Runpod Configuration (for remote access)
 RUNPOD_POD_ID=your-pod-id
 RUNPOD_VLLM_API_KEY=your-secure-api-key-here
 
-# Clawdbot Web UI (password for accessing the control panel)
-CLAWDBOT_WEB_PASSWORD=clawdbot
+# OpenClaw Configuration
+OPENCLAW_STATE_DIR=/workspace/.openclaw
+OPENCLAW_WORKSPACE=/workspace/openclaw
+OPENCLAW_WEB_PASSWORD=changeme
 
 # Messaging Integrations (optional)
 TELEGRAM_BOT_TOKEN=

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -2,11 +2,8 @@ name: Build and Push Docker Images
 
 on:
   push:
-    branches: [main, feat/add-model-folders, gguf-v3]
-    paths:
-      - 'models/**'
-      - 'Dockerfile'
-      - '.github/workflows/docker-build.yml'
+    branches: [main]
+    tags: ['*']
   pull_request:
     paths:
       - 'models/**'
@@ -26,12 +23,6 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
       - name: Determine build type
         id: build_type
         run: |
@@ -44,6 +35,12 @@ jobs:
       - name: Set environment variables
         run: |
           echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            BRANCH_NAME="${{ github.head_ref }}"
+          else
+            BRANCH_NAME="${GITHUB_REF##refs/heads/}"
+          fi
+          BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g')
 
           if [[ "${{ github.ref }}" == refs/tags/* ]]; then
             echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV
@@ -52,22 +49,34 @@ jobs:
             echo "VERSION=latest" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           else
-            BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g')
-            echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV
+            echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           fi
+          if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
+            echo "PUSH_IMAGES=false" >> $GITHUB_ENV
+          else
+            echo "PUSH_IMAGES=true" >> $GITHUB_ENV
+          fi
+
+      - name: Login to Docker Hub
+        if: env.PUSH_IMAGES == 'true'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
 
       - name: Build and push
         uses: docker/build-push-action@v6
         with:
-          context: models/glm47-flash-awq-4bit
-          push: true
+          context: .
+          file: models/glm47-flash-awq-4bit/Dockerfile
+          push: ${{ env.PUSH_IMAGES == 'true' }}
           tags: |
-            ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:${{ env.VERSION }}
-            ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }}
+            ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:${{ env.VERSION }}
+            ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }}
           platforms: linux/amd64
-          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache
-          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache,mode=max
+          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:buildcache
+          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:buildcache,mode=max
 
   # Disabled - vLLM NVFP4 has bugs with GLM-4.7 MLA on Blackwell
   # See models/glm47-flash-nvfp4-5090/ISSUES.md
@@ -81,12 +90,6 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
       - name: Determine build type
         id: build_type
         run: |
@@ -99,6 +102,12 @@ jobs:
       - name: Set environment variables
         run: |
           echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            BRANCH_NAME="${{ github.head_ref }}"
+          else
+            BRANCH_NAME="${GITHUB_REF##refs/heads/}"
+          fi
+          BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g')
 
           if [[ "${{ github.ref }}" == refs/tags/* ]]; then
             echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV
@@ -107,22 +116,34 @@ jobs:
             echo "VERSION=latest" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           else
-            BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g')
-            echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV
+            echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           fi
+          if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
+            echo "PUSH_IMAGES=false" >> $GITHUB_ENV
+          else
+            echo "PUSH_IMAGES=true" >> $GITHUB_ENV
+          fi
+
+      - name: Login to Docker Hub
+        if: env.PUSH_IMAGES == 'true'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
 
       - name: Build and push
         uses: docker/build-push-action@v6
         with:
-          context: models/glm47-flash-nvfp4-5090
-          push: true
+          context: .
+          file: models/glm47-flash-nvfp4-5090/Dockerfile
+          push: ${{ env.PUSH_IMAGES == 'true' }}
           tags: |
-            ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:${{ env.VERSION }}
-            ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }}
+            ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:${{ env.VERSION }}
+            ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }}
           platforms: linux/amd64
-          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache
-          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache,mode=max
+          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:buildcache
+          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:buildcache,mode=max
 
   # GLM-4.7-Flash GGUF with llama.cpp - WORKING on RTX 5090!
   build-glm47-flash-gguf-llamacpp:
@@ -134,12 +155,6 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
       - name: Determine build type
         id: build_type
         run: |
@@ -152,6 +167,12 @@ jobs:
       - name: Set environment variables
         run: |
           echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            BRANCH_NAME="${{ github.head_ref }}"
+          else
+            BRANCH_NAME="${GITHUB_REF##refs/heads/}"
+          fi
+          BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g')
 
           if [[ "${{ github.ref }}" == refs/tags/* ]]; then
             echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV
@@ -160,22 +181,34 @@ jobs:
             echo "VERSION=latest" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           else
-            BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g')
-            echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV
+            echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV
             echo "ALSO_LATEST=false" >> $GITHUB_ENV
           fi
+          if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
+            echo "PUSH_IMAGES=false" >> $GITHUB_ENV
+          else
+            echo "PUSH_IMAGES=true" >> $GITHUB_ENV
+          fi
+
+      - name: Login to Docker Hub
+        if: env.PUSH_IMAGES == 'true'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
 
       - name: Build and push
         uses: docker/build-push-action@v6
         with:
-          context: models/glm47-flash-gguf-llamacpp
-          push: true
+          context: .
+          file: models/glm47-flash-gguf-llamacpp/Dockerfile
+          push: ${{ env.PUSH_IMAGES == 'true' }}
           tags: |
-            ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:${{ env.VERSION }}
-            ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }}
+            ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:${{ env.VERSION }}
+            ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:latest', env.DOCKERHUB_REPO) || '' }}
           platforms: linux/amd64
-          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache
-          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache,mode=max
+          cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:buildcache
+          cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:buildcache,mode=max
 
   # Disabled for now
   # build-glm47-flash-fp16:

diff --git a/.gitignore b/.gitignore
@@ -20,7 +20,7 @@ Thumbs.db
 *.swp
 *.swo
 
-# Node (if running Clawdbot locally)
+# Node (if running OpenClaw locally)
 node_modules/
 
 # Python

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,102 @@
+# AGENTS.md
+
+OpenClaw Stack on Runpod: self-contained Docker images with LLM + media services for GPU pods.
+
+## Codebase Structure
+
+```
+openclaw-stack/
+├── models/                      # GPU-specific Dockerfiles
+│   ├── glm47-flash-gguf-llamacpp/  # RTX 5090 - llama.cpp (primary)
+│   ├── glm47-flash-awq-4bit/       # A100 80GB - vLLM
+│   ├── glm47-flash-fp16/           # H100/A100 - vLLM
+│   ├── glm47-flash-nvfp4-5090/     # RTX 5090 - vLLM (experimental)
+│   └── glm47-reap-w4a16/           # B200 - vLLM
+├── scripts/                     # Startup and utilities
+│   ├── entrypoint.sh               # Main container entrypoint
+│   ├── entrypoint-common.sh        # Shared entrypoint logic
+│   └── openclaw-image-gen          # Image generation CLI
+├── skills/                      # Agent capabilities
+│   └── image-gen/                  # FLUX.2 image generation
+├── config/
+│   ├── openclaw.json               # OpenClaw config template
+│   └── workspace/                  # Files copied to /workspace/openclaw/
+├── tests/                       # Test scripts
+└── Dockerfile                   # Base/fallback Dockerfile
+```
+
+## Key Decisions
+
+- **RTX 5090 uses llama.cpp** (`glm47-flash-gguf-llamacpp/`) — vLLM has dimension mismatch bugs with GLM-4.7 MLA attention on NVFP4
+- **PyTorch cu128 required for RTX 5090** — cu124 doesn't support Blackwell sm_120 architecture
+- **Diffusers from git** — stable release lacks `Flux2KleinPipeline` for image generation
+- **llama.cpp built from source** with `DCMAKE_CUDA_ARCHITECTURES="120"` for sm_120 support
+- **LLM and Audio binaries MUST be separate** — LLM uses main llama.cpp branch, Audio uses PR #18641 branch. They have incompatible shared libraries. LLM libs go to `/usr/local/lib/`, Audio libs go to `/usr/local/bin/` (see Dockerfile lines 52 vs 73). Mixing them breaks LLM server startup.
+- **Persistent servers for low latency** — Audio (port 8001) and Image (port 8002) run as persistent servers with models pre-loaded in VRAM. CLI scripts (`openclaw-tts`, `openclaw-stt`, `openclaw-image-gen`) call these servers via HTTP API for instant inference (~0.3-0.8s vs 2-3s with per-request loading). These ports are internal-only; public access goes through the proxy on 8080.
+
+## Build Commands
+
+```bash
+# Build primary RTX 5090 image
+docker build -f models/glm47-flash-gguf-llamacpp/Dockerfile -t openclaw-gguf .
+
+# Build other variants
+docker build -f models/glm47-flash-awq-4bit/Dockerfile -t openclaw-awq .
+docker build -f models/glm47-flash-fp16/Dockerfile -t openclaw-fp16 .
+```
+
+## Testing
+
+```bash
+# Health check
+curl http://localhost:8000/health
+
+# Test suites
+./tests/test-vllm.sh
+./tests/test-tool-calling.sh
+
+# Image generation
+openclaw-image-gen --prompt "test" --width 512 --height 512 --output /tmp/test.png
+```
+
+## Runpod Pod Access
+
+```bash
+# SSH into pod (use Runpod MCP tools to get IP/port)
+ssh -i ~/.ssh/id_runpod root@<ip> -p <port>
+
+# Common debugging
+nvidia-smi
+curl http://localhost:8000/health
+curl http://localhost:8000/v1/models
+```
+
+## Where to Make Changes
+
+| Task | Location |
+|------|----------|
+| Add new GPU variant | Create new folder in `models/` with Dockerfile + entrypoint.sh |
+| Change startup logic | `scripts/entrypoint-common.sh` (shared) or model-specific entrypoint |
+| Add agent skill | Create folder in `skills/` with SKILL.md |
+| Modify OpenClaw workspace | `config/workspace/` |
+| Update CI/CD | `.github/workflows/docker-build.yml` |
+
+## VRAM Usage (RTX 5090 - 32GB)
+
+| Component | VRAM | Notes |
+|-----------|------|-------|
+| GLM-4.7 LLM (200k ctx) | ~22.5 GB | Model + KV cache (q8_0), `LLAMA_GPU_LAYERS=44` |
+| Audio Server (TTS/STT) | ~2 GB | LFM2.5-Audio-1.5B-Q4_0 |
+| Image Server (FLUX.2) | ~3-4 GB | FLUX.2-klein-4B-SDNQ-4bit-dynamic |
+| **Total (all 3)** | **~29-30 GB** | **~2 GB free** |
+| **LLM + Audio only** | **~26 GB** | **~6 GB free** |
+
+**Note**: 200k context fits with all 3 servers on 32GB when `LLAMA_PARALLEL=1` and `LLAMA_GPU_LAYERS=44`. If memory pressure occurs, reduce `MAX_MODEL_LEN` or lower `LLAMA_GPU_LAYERS`.
+
+## Important Notes
+
+- Never start/stop servers in code — user handles that
+- Use Runpod MCP tools to manage pods
+- RTX 5090 image gen requires: PyTorch cu128 + diffusers from git
+- Model downloads go to `/workspace/huggingface/` (persisted volume)
+- **CRITICAL**: LLM binaries (main branch) and Audio binaries (PR #18641) must use separate library paths. Never copy audio `.so` files to `/usr/local/lib/` - they will break LLM server.