From 6a4b6309f9d07c1af4256d080f6871144f4defb0 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 11:24:48 +0100 Subject: [PATCH 01/12] feat: rename repo to Moltbot Refresh images, docs, and scripts to use Moltbot naming and env vars. Update Docker build workflow to tag images with branch names. --- .env.example | 4 +- .github/workflows/docker-build.yml | 30 +-- .gitignore | 2 +- CLAUDE.md | 28 +-- Dockerfile | 20 +- README.md | 44 ++--- config/{clawdbot.json => moltbot.json} | 6 +- config/workspace/AGENTS.md | 2 +- config/workspace/IDENTITY.md | 2 +- docker-compose.yml | 10 +- docs/video-script.md | 175 ++++++++++++++++++ models/glm47-flash-awq-4bit/Dockerfile | 10 +- models/glm47-flash-awq-4bit/README.md | 16 +- models/glm47-flash-awq-4bit/entrypoint.sh | 41 ++-- models/glm47-flash-fp16/Dockerfile | 12 +- models/glm47-flash-fp16/README.md | 4 +- models/glm47-flash-fp16/entrypoint.sh | 35 ++-- models/glm47-flash-gguf-llamacpp/Dockerfile | 8 +- models/glm47-flash-gguf-llamacpp/README.md | 22 +-- .../glm47-flash-gguf-llamacpp/entrypoint.sh | 42 +++-- models/glm47-flash-nvfp4-5090/Dockerfile | 6 +- models/glm47-flash-nvfp4-5090/README.md | 4 +- models/glm47-flash-nvfp4-5090/entrypoint.sh | 41 ++-- models/glm47-reap-w4a16/Dockerfile | 8 +- models/glm47-reap-w4a16/README.md | 4 +- models/glm47-reap-w4a16/entrypoint.sh | 35 ++-- scripts/entrypoint.sh | 41 ++-- .../{setup-clawdbot.sh => setup-moltbot.sh} | 44 +++-- scripts/start-vllm.sh | 4 +- .../{clawdbot-vllm.json => moltbot-vllm.json} | 8 +- templates/runpod-template.json | 10 +- 31 files changed, 464 insertions(+), 254 deletions(-) rename config/{clawdbot.json => moltbot.json} (88%) create mode 100644 docs/video-script.md rename scripts/{setup-clawdbot.sh => setup-moltbot.sh} (80%) mode change 100755 => 100644 rename templates/{clawdbot-vllm.json => moltbot-vllm.json} (97%) diff --git a/.env.example b/.env.example index 8fd6b96..22deea3 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,8 @@ HF_TOKEN=hf_your_token_here RUNPOD_POD_ID=your-pod-id RUNPOD_VLLM_API_KEY=your-secure-api-key-here -# Clawdbot Web UI (password for accessing the control panel) -CLAWDBOT_WEB_PASSWORD=clawdbot +# Moltbot Web UI (password for accessing the control panel) +MOLTBOT_WEB_PASSWORD=moltbot # Messaging Integrations (optional) TELEGRAM_BOT_TOKEN= diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 43996c7..2c54159 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -53,7 +53,7 @@ jobs: echo "ALSO_LATEST=false" >> $GITHUB_ENV else BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi @@ -63,11 +63,11 @@ jobs: context: models/glm47-flash-awq-4bit push: true tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:buildcache,mode=max # Disabled - vLLM NVFP4 has bugs with GLM-4.7 MLA on Blackwell # See models/glm47-flash-nvfp4-5090/ISSUES.md @@ -108,7 +108,7 @@ jobs: echo "ALSO_LATEST=false" >> $GITHUB_ENV else BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi @@ -118,11 +118,11 @@ jobs: context: models/glm47-flash-nvfp4-5090 push: true tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:buildcache,mode=max # GLM-4.7-Flash GGUF with llama.cpp - WORKING on RTX 5090! build-glm47-flash-gguf-llamacpp: @@ -161,7 +161,7 @@ jobs: echo "ALSO_LATEST=false" >> $GITHUB_ENV else BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi @@ -171,11 +171,11 @@ jobs: context: models/glm47-flash-gguf-llamacpp push: true tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:buildcache,mode=max # Disabled for now # build-glm47-flash-fp16: diff --git a/.gitignore b/.gitignore index 8a344f7..85eee5d 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ Thumbs.db *.swp *.swo -# Node (if running Clawdbot locally) +# Node (if running Moltbot locally) node_modules/ # Python diff --git a/CLAUDE.md b/CLAUDE.md index 9eb13c2..d80de81 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,19 +4,19 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -RunPod-optimized Docker deployment for running Clawdbot (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). +RunPod-optimized Docker deployment for running Moltbot (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). ## Build Commands ```bash # Build a specific model variant -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ +docker build -t moltbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ +docker build -t moltbot-glm47-flash-fp16 models/glm47-flash-fp16/ +docker build -t moltbot-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ # Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest +docker tag moltbot-glm47-flash-awq-4bit yourusername/moltbot-glm47-flash-awq-4bit:latest +docker push yourusername/moltbot-glm47-flash-awq-4bit:latest ``` ## Local Development @@ -56,12 +56,12 @@ models/ # Model-specific Dockerfiles and configs └── glm47-reap-w4a16/ # REAP W4A16 (B200) scripts/ # Startup orchestration -├── entrypoint.sh # Docker entrypoint (starts vLLM + Clawdbot) +├── entrypoint.sh # Docker entrypoint (starts vLLM + Moltbot) ├── start-vllm.sh # vLLM server with GPU detection -└── setup-clawdbot.sh # Clawdbot installation +└── setup-moltbot.sh # Moltbot installation config/ # Runtime configuration -├── clawdbot.json # Clawdbot config template +├── moltbot.json # Moltbot config template └── workspace/ # Agent identity and system docs ``` @@ -70,9 +70,9 @@ config/ # Runtime configuration | Port | Service | |-------|-------------------| | 8000 | vLLM API | -| 18789 | Clawdbot Gateway | -| 18790 | Clawdbot Bridge | -| 18793 | Clawdbot Canvas | +| 18789 | Moltbot Gateway | +| 18790 | Moltbot Bridge | +| 18793 | Moltbot Canvas | | 22 | SSH | ## CI/CD (GitHub Actions) @@ -98,10 +98,10 @@ Key variables from `.env.example`: ## Entrypoint Flow 1. Configure environment and detect GPU count -2. Generate `clawdbot.json` with vLLM provider settings +2. Generate `clawdbot.json` with vLLM provider settings (legacy file name used by Moltbot) 3. Start vLLM server in background 4. Wait for health check (max 5 minutes) -5. Start Clawdbot gateway +5. Start Moltbot gateway 6. Handle graceful shutdown on SIGTERM/SIGINT ## RunPod SSH Access diff --git a/Dockerfile b/Dockerfile index ec61858..34d195f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,14 @@ -# Clawdbot + vLLM Docker Image for RunPod +# Moltbot + vLLM Docker Image for RunPod # Pre-configured with everything needed for AI coding assistant FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 -LABEL maintainer="RunPod Clawdbot" -LABEL description="Clawdbot AI assistant with vLLM for local LLM inference" +LABEL maintainer="RunPod Moltbot" +LABEL description="Moltbot AI assistant with vLLM for local LLM inference" # Avoid interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -27,8 +27,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ # Install vLLM RUN pip install --no-cache-dir vllm -# Install Clawdbot -RUN npm install -g clawdbot@latest +# Install Moltbot +RUN npm install -g moltbot@latest # Create workspace directories RUN mkdir -p /workspace/huggingface \ @@ -40,14 +40,14 @@ RUN mkdir -p /workspace/huggingface \ COPY scripts/entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh -# Copy default Clawdbot workspace files +# Copy default Moltbot workspace files COPY config/workspace/ /workspace/clawd/ # Expose ports # 8000 - vLLM API -# 18789 - Clawdbot Gateway WebSocket -# 18790 - Clawdbot Bridge -# 18793 - Clawdbot Canvas +# 18789 - Moltbot Gateway WebSocket +# 18790 - Moltbot Bridge +# 18793 - Moltbot Canvas # 22 - SSH (RunPod adds this) EXPOSE 8000 18789 18790 18793 diff --git a/README.md b/README.md index 14e4923..0b12775 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Clawdbot on RunPod with vLLM +# Moltbot on RunPod with vLLM -Run Clawdbot with GLM-4.7 and other open-source coding models on RunPod using vLLM. Chat with your AI assistant via Telegram! +Run Moltbot with GLM-4.7 and other open-source coding models on RunPod using vLLM. Chat with your AI assistant via Telegram! ## Model Comparison @@ -21,16 +21,16 @@ Best value option with full 114k context window at $1.19/hr on A100 80GB. ```bash # GLM-4.7-Flash AWQ 4-bit (Best value, A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-awq-4bit:latest +IMAGE=yourusername/moltbot-glm47-flash-awq-4bit:latest # GLM-4.7-Flash FP16 (Full precision, H100/A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-fp16:latest +IMAGE=yourusername/moltbot-glm47-flash-fp16:latest # GLM-4.7-REAP W4A16 (High-end, B200) -IMAGE=yourusername/clawdbot-glm47-reap-w4a16:latest +IMAGE=yourusername/moltbot-glm47-reap-w4a16:latest # Base (Qwen2.5-7B, any GPU) -IMAGE=yourusername/clawdbot-vllm:latest +IMAGE=yourusername/moltbot-vllm:latest ``` ### 2. Create RunPod Pod @@ -71,15 +71,15 @@ Images are automatically built and pushed to Docker Hub via GitHub Actions. | Image | Description | |-------|-------------| -| `clawdbot-glm47-flash-awq-4bit` | GLM-4.7-Flash AWQ 4-bit for A100 80GB | -| `clawdbot-glm47-flash-fp16` | GLM-4.7-Flash FP16 for H100/A100 80GB | -| `clawdbot-glm47-reap-w4a16` | GLM-4.7-REAP W4A16 for B200 | -| `clawdbot-vllm` | Base image with Qwen2.5-7B | +| `moltbot-glm47-flash-awq-4bit` | GLM-4.7-Flash AWQ 4-bit for A100 80GB | +| `moltbot-glm47-flash-fp16` | GLM-4.7-Flash FP16 for H100/A100 80GB | +| `moltbot-glm47-reap-w4a16` | GLM-4.7-REAP W4A16 for B200 | +| `moltbot-vllm` | Base image with Qwen2.5-7B | ## Project Structure ``` -runpod-clawdbot/ +runpod-moltbot/ ├── README.md # This file ├── .github/ │ └── workflows/ @@ -102,15 +102,15 @@ runpod-clawdbot/ │ └── entrypoint.sh │ ├── scripts/ -│ ├── setup-clawdbot.sh +│ ├── setup-moltbot.sh │ └── start-vllm.sh │ ├── config/ -│ ├── clawdbot.json +│ ├── moltbot.json │ └── workspace/ │ ├── templates/ -│ └── clawdbot-vllm.json +│ └── moltbot-vllm.json │ ├── tests/ │ ├── test-vllm.sh @@ -149,13 +149,13 @@ Images are built automatically on: ```bash # Build locally -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-reap-w4a16 models/glm47-reap-w4a16/ +docker build -t moltbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ +docker build -t moltbot-glm47-flash-fp16 models/glm47-flash-fp16/ +docker build -t moltbot-glm47-reap-w4a16 models/glm47-reap-w4a16/ # Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest +docker tag moltbot-glm47-flash-awq-4bit yourusername/moltbot-glm47-flash-awq-4bit:latest +docker push yourusername/moltbot-glm47-flash-awq-4bit:latest ``` ## Configuration @@ -172,9 +172,9 @@ docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest | `TELEGRAM_BOT_TOKEN` | | Telegram bot token from @BotFather | | `GITHUB_TOKEN` | | GitHub PAT for git/gh operations | -### Clawdbot Configuration +### Moltbot Configuration -Config is auto-generated at `/workspace/.clawdbot/clawdbot.json`: +Config is auto-generated at `/workspace/.clawdbot/clawdbot.json` (legacy path used by Moltbot): ```json { @@ -282,7 +282,7 @@ curl http://localhost:8000/v1/chat/completions \ ## Resources -- [Clawdbot Documentation](https://github.com/clawdbot/clawdbot) +- [Moltbot Documentation](https://github.com/moltbot/moltbot) - [vLLM Documentation](https://docs.vllm.ai/) - [RunPod Documentation](https://docs.runpod.io/) - [GLM-4.7 Announcement](https://z.ai/blog/glm-4.7) diff --git a/config/clawdbot.json b/config/moltbot.json similarity index 88% rename from config/clawdbot.json rename to config/moltbot.json index a344968..f12195e 100644 --- a/config/clawdbot.json +++ b/config/moltbot.json @@ -1,10 +1,10 @@ { - "$schema": "https://clawdbot.com/schema/config.json", - "_comment": "Clawdbot configuration for RunPod vLLM integration", + "$schema": "https://clawd.bot/schema/config.json", + "_comment": "Moltbot configuration for RunPod vLLM integration", "_instructions": [ "Replace with your RunPod pod ID", "Replace with your vLLM API key", - "Adjust model settings based on your tier (see templates/clawdbot-vllm.json)" + "Adjust model settings based on your tier (see templates/moltbot-vllm.json)" ], "agents": { diff --git a/config/workspace/AGENTS.md b/config/workspace/AGENTS.md index f3d8d6e..5d1a3fc 100644 --- a/config/workspace/AGENTS.md +++ b/config/workspace/AGENTS.md @@ -1,4 +1,4 @@ -# AGENTS.md - Clawdbot Workspace +# AGENTS.md - Moltbot Workspace This folder is the assistant's working directory. diff --git a/config/workspace/IDENTITY.md b/config/workspace/IDENTITY.md index 547ff69..73d2de7 100644 --- a/config/workspace/IDENTITY.md +++ b/config/workspace/IDENTITY.md @@ -1,6 +1,6 @@ # Identity -You are a helpful AI coding assistant running on RunPod with a local LLM. +You are a helpful Moltbot AI coding assistant running on RunPod with a local LLM. You can help with: - Writing and debugging code - Explaining programming concepts diff --git a/docker-compose.yml b/docker-compose.yml index d72968a..666f1d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -# docker-compose.yml - Local development setup for Clawdbot + vLLM +# docker-compose.yml - Local development setup for Moltbot + vLLM # Note: This is for local testing only. For production, use RunPod. version: "3.8" @@ -7,7 +7,7 @@ services: # vLLM Server - requires NVIDIA GPU with sufficient VRAM vllm: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm + container_name: moltbot-vllm runtime: nvidia deploy: resources: @@ -46,7 +46,7 @@ services: # Mock vLLM for testing without GPU (uses smaller model) vllm-mock: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm-mock + container_name: moltbot-vllm-mock profiles: ["mock"] ports: - "8001:8000" @@ -64,7 +64,7 @@ services: # Test runner tests: image: curlimages/curl:latest - container_name: clawdbot-tests + container_name: moltbot-tests profiles: ["test"] depends_on: vllm: @@ -84,4 +84,4 @@ volumes: networks: default: - name: clawdbot-network + name: moltbot-network diff --git a/docs/video-script.md b/docs/video-script.md new file mode 100644 index 0000000..3fcc4cb --- /dev/null +++ b/docs/video-script.md @@ -0,0 +1,175 @@ +# Video Script: Moltbot fully self-hosted on RTX 5090 (GLM‑4.7‑Flash GGUF + llama.cpp) + +This doc turns the repo learnings into a demo-first video script for two audiences: + +- **How to set it up and use it** (first half) +- **How it works** (later), with **vLLM/NVFP4** as a short end note + +--- + +## Benchmark slide: where to get the “graph” + the numbers (Artificial Analysis) + +### Option A (fastest): screenshot Artificial Analysis model pages + +Use these pages and grab the **Artificial Analysis Intelligence Index** number shown on each page: + +- **GLM-4.7-Flash (Reasoning)**: 30 — +- **GLM-4.7 (Reasoning)**: 42 — +- **GPT-5.2 (xhigh)**: 51 — +- **GPT-5.2 Codex (xhigh)**: 48 — +- **Claude Opus 4.5 (Reasoning)**: 50 — +- **Claude 4.5 Sonnet (Reasoning)**: 42 — + +If you want a single AA page on screen as a citation backdrop, use a comparison page: + +- **GLM‑4.7 vs GPT‑5.2**: + +### Option B (cleanest): create your own bar chart, cite AA + +- Build a simple bar chart using the numbers above. +- Add a footer like: **Source: Artificial Analysis (Intelligence Index v4.0), accessed Jan 2026**. + +**Note on “Composer 1”**: The AA model page for “Composer 1” wasn’t reliably fetchable during prep (timeouts). If you want “Composer 1” in the slide, verify its page exists in AA and grab the index number from there; otherwise swap it for a different widely-known coding model that AA lists reliably. + +--- + +## Video script (demo-first; usage first; deep technical notes last) + +### 0:00–0:25 — Cold open / hook (call out fake “self-hosted”) + +**On screen**: quick montage: Telegram/WhatsApp agent convo → “Powered by Claude API” / billing pain → cut to local terminal + GPU. + +**You say**: +People call these “self-hosted agents”… but then the brain is still a paid API. If your agent stops working the second Claude is down or your token budget runs out, that’s not self-hosted. + +Today I’ll show a fully self-contained Moltbot setup: local model, local inference, agent UI—no external model API needed. + +### 0:25–0:55 — What you’ll build + requirements (set expectations) + +**On screen**: one slide: “Moltbot + GLM‑4.7‑Flash + llama.cpp (OpenAI API)”. + +**You say**: +We’re running GLM‑4.7‑Flash locally via llama.cpp and pointing Moltbot at it using an OpenAI-compatible API. + +If you’ve got an RTX 5090 (32GB), you can run the full 200k context. With 24GB, it can still work, just with a reduced context window—because the model weights alone are ~17GB. + +### 0:55–2:10 — Quick demo first (prove it works before you explain anything) + +**On screen**: +- Open Moltbot web UI +- Show the agent doing a quick code task (small repo change / explanation) +- Show a raw API call to the model (`/v1/chat/completions`) + +**You say**: +Let me prove it’s real before we talk architecture. This is Moltbot running against a model in the same environment. No Claude key. No OpenAI key. + +If you’re using Telegram integration, the same idea applies: messages go to a local model, not a hosted API. + +### 2:10–3:40 — Two ways to run it: local GPU vs RunPod (choose your path) + +**On screen**: split screen: local machine vs RunPod pod. + +**You say**: +You’ve got two options: + +- Local: lowest latency and everything stays on your machine. +- RunPod: if you don’t have a 5090—or you don’t want your workstation pinned all day—you can still keep it self-contained. You pay for compute time, not per-token API calls. + +### 3:40–5:30 — RunPod setup walkthrough (the “do this, then this” part) + +**On screen**: RunPod UI checklist. + +**You say (walkthrough voice)**: +Here’s the setup that actually matters: + +- **Image**: `runpod/moltbot-glm47-flash-gguf:latest` +- **Ports**: `8000/http` (llama.cpp), `18789/http` (Moltbot UI), `22/tcp` (SSH) +- **Network volume mounted to `/workspace`** (non-negotiable; model is ~17GB and you want persistence across restarts) +- **Environment variables**: + - `LLAMA_API_KEY` (protects the model API) + - `MOLTBOT_WEB_PASSWORD` (protects the web UI token) + - optionally `TELEGRAM_BOT_TOKEN` (Telegram) + +### 5:30–6:40 — Health check + raw chat completion (OpenAI-compat API) + +**On screen**: terminal showing `curl` to `/health` then `/v1/chat/completions`. + +**You say**: +llama.cpp runs an OpenAI-compatible API. That’s the trick: Moltbot doesn’t need to know it’s llama.cpp. + +**Show (copy/paste):** + +- Health check: `GET /health` on `:8000` +- Chat completion: `POST /v1/chat/completions` with `Authorization: Bearer $LLAMA_API_KEY` and `model: "glm-4.7-flash"` + +### 6:40–8:10 — The “gotcha”: first-time device pairing (and why it’s good) + +**On screen**: web UI says “pairing required” → SSH → approve device → refresh UI. + +**You say**: +First time you open the web UI, it won’t just let any browser control your agent. You must approve the device. + +**On screen (commands):** + +- List requests: + - `MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices list` +- Approve: + - `MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices approve ` + +**You say**: +This is the right default for something that can run commands and touch repos. + +### 8:10–9:10 — Benchmark slide (short, no methodology detour) + +**On screen**: your bar chart + tiny citation footer (Artificial Analysis URLs). + +**You say**: +Why GLM‑4.7‑Flash? Because it’s an open-weights model with serious benchmark performance. On Artificial Analysis’ Intelligence Index, you can see where it sits relative to the usual suspects. + +Quick callout list (keep it fast): + +- GLM‑4.7: 42 +- GLM‑4.7‑Flash: 30 +- GPT‑5.2: 51 +- GPT‑5.2 Codex: 48 +- Claude Opus 4.5 (Reasoning): 50 +- Claude 4.5 Sonnet (Reasoning): 42 + +### 9:10–10:45 — How it works (high level, but concrete) + +**On screen**: simple block diagram. + +**You say**: +Architecture is simple: + +- llama.cpp (`llama-server`) hosts the model and exposes OpenAI-style endpoints on `:8000` +- Moltbot points its provider config at `http://localhost:8000/v1` +- The container stores everything under `/workspace` so restarts don’t wipe model + state + +Then the “why it fits”: + +We’re running a GGUF quantization (Q4_K_M) and using Q8 KV cache quantization—this is what makes 200k context feasible on a 32GB card. + +### 10:45–12:00 — Ending note: what happened with vLLM/NVFP4 (keep it tight) + +**On screen**: one screenshot of the core error + a short bullet list. + +**You say**: +We tried the obvious path first: vLLM with NVFP4 for Blackwell. But as of Jan 2026, it’s blocked for GLM‑4.7 on the 5090. + +Root cause: GLM‑4.7’s MLA attention isn’t handled correctly in vLLM’s fallback path, leading to an attention output dimension mismatch. + +When those pieces land upstream (vLLM + cuDNN support), we’ll revisit and benchmark it. + +**On screen takeaway**: +Today’s working answer: GGUF + llama.cpp. + +--- + +## Suggested on-screen callouts (quick checklist) + +- **Ports**: `8000` (model API), `18789` (web UI), `22` (SSH) +- **Persistence**: “Network volume mounted to `/workspace`” +- **Security**: “API key for model + web token + device pairing” +- **Performance tagline (repo docs)**: “~175 tok/s, ~28GB VRAM, 200k context on RTX 5090” + diff --git a/models/glm47-flash-awq-4bit/Dockerfile b/models/glm47-flash-awq-4bit/Dockerfile index d8bda90..561b21d 100644 --- a/models/glm47-flash-awq-4bit/Dockerfile +++ b/models/glm47-flash-awq-4bit/Dockerfile @@ -34,8 +34,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Install Moltbot and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ npm cache clean --force # Environment defaults @@ -45,9 +45,9 @@ ENV SERVED_MODEL_NAME="glm-4.7-flash" ENV MAX_MODEL_LEN="114688" ENV VLLM_API_KEY="changeme" -# Clawdbot workspace -ENV CLAWDBOT_HOME="/workspace/.clawdbot" -ENV CLAWDBOT_WORKSPACE="/workspace/clawd" +# Moltbot workspace (legacy paths kept for compatibility) +ENV MOLTBOT_HOME="/workspace/.clawdbot" +ENV MOLTBOT_WORKSPACE="/workspace/clawd" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-awq-4bit/README.md b/models/glm47-flash-awq-4bit/README.md index dbd68e0..f8535df 100644 --- a/models/glm47-flash-awq-4bit/README.md +++ b/models/glm47-flash-awq-4bit/README.md @@ -22,7 +22,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-awq-4bit:latest` +- **Image**: `runpod/moltbot-glm47-flash-awq-4bit:latest` - **GPU**: 1x A100 80GB - **Volume**: 150GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -37,7 +37,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token for chat integration | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `MOLTBOT_WEB_PASSWORD` | No | `moltbot` | Password for web UI | ### 3. Access Points @@ -46,7 +46,7 @@ After the pod starts (~90 seconds for cached starts, longer for first start): | Service | URL | Auth | |---------|-----|------| | vLLM API | `https://-8000.proxy.runpod.net` | Bearer token (`VLLM_API_KEY`) | -| Web UI | `https://-18789.proxy.runpod.net` | Password (`CLAWDBOT_WEB_PASSWORD`) | +| Web UI | `https://-18789.proxy.runpod.net` | Password (`MOLTBOT_WEB_PASSWORD`) | | SSH | `ssh root@ -p ` | SSH key | ### 4. Test It @@ -87,12 +87,12 @@ All persistent data is stored on the network volume `/workspace`: ├── .cache/ │ ├── vllm/ # CUDA graphs & torch compile cache (~400MB) │ └── huggingface/ # HF cache -├── .clawdbot/ +├── .clawdbot/ # Legacy Moltbot state path │ ├── clawdbot.json # Config │ ├── agents/ # Agent state │ └── telegram/ # Telegram session ├── .config/gh/ # GitHub CLI config -└── clawd/ # Claude Code workspace +└── clawd/ # Workspace ``` **Startup times:** @@ -101,9 +101,9 @@ All persistent data is stored on the network volume `/workspace`: ## Web UI -Access the Clawdbot web UI at `https://-18789.proxy.runpod.net`: +Access the Moltbot web UI at `https://-18789.proxy.runpod.net`: -1. Enter the password (default: `clawdbot` or your `CLAWDBOT_WEB_PASSWORD`) +1. Enter the password (default: `moltbot` or your `MOLTBOT_WEB_PASSWORD`) 2. Chat with the model through the web interface 3. No CLI access required @@ -196,7 +196,7 @@ pkill -9 -f vllm **Web UI won't connect:** - Ensure port 18789 is exposed -- Check that gateway is running: `ps aux | grep clawdbot` +- Check that gateway is running: `ps aux | grep moltbot` - Verify bind mode is `lan` in config **Model download fails:** diff --git a/models/glm47-flash-awq-4bit/entrypoint.sh b/models/glm47-flash-awq-4bit/entrypoint.sh index 8bcb18c..d43bf7b 100644 --- a/models/glm47-flash-awq-4bit/entrypoint.sh +++ b/models/glm47-flash-awq-4bit/entrypoint.sh @@ -48,11 +48,16 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-114688}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI password - users enter this to access the Moltbot control panel +MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" + +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -99,11 +104,11 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup Moltbot config +mkdir -p "$MOLTBOT_HOME" -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then + echo "Creating Moltbot config (legacy clawdbot.json)..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -112,8 +117,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + # Create a minimal config - moltbot doctor will fix any missing fields + cat > "$MOLTBOT_HOME/clawdbot.json" << EOF { "models": { "providers": { @@ -149,12 +154,12 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$MOLTBOT_HOME/clawdbot.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +# Auto-fix config to match current Moltbot version's schema +echo "Running moltbot doctor to validate/fix config..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -174,19 +179,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start Moltbot gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting Moltbot gateway..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" gateway --auth password --password "$MOLTBOT_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " Moltbot Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" +echo " Web UI Password: $MOLTBOT_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens" echo "================================================" diff --git a/models/glm47-flash-fp16/Dockerfile b/models/glm47-flash-fp16/Dockerfile index c7f5ad6..8375223 100644 --- a/models/glm47-flash-fp16/Dockerfile +++ b/models/glm47-flash-fp16/Dockerfile @@ -33,18 +33,18 @@ RUN uv pip install --system -U vllm \ --extra-index-url https://wheels.vllm.ai/nightly && \ uv pip install --system git+https://github.com/huggingface/transformers.git -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Install Moltbot +RUN npm install -g --prefer-offline moltbot@latest && \ npm cache clean --force # Keep model files on container disk (requires 100GB) -# Only use workspace for persistent Clawdbot state +# Only use workspace for persistent Moltbot state RUN mkdir -p /workspace/.clawdbot /workspace/clawd # Environment variables -# HF_HOME on container disk (100GB needed), Clawdbot state on workspace +# HF_HOME on container disk (100GB needed), Moltbot state on workspace ENV HF_HOME=/root/.cache/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot ENV MODEL_NAME=zai-org/GLM-4.7-Flash ENV SERVED_MODEL_NAME=glm-4.7-flash ENV VLLM_API_KEY=changeme @@ -58,7 +58,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: Moltbot Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-flash-fp16/README.md b/models/glm47-flash-fp16/README.md index 6f3eb42..63585e0 100644 --- a/models/glm47-flash-fp16/README.md +++ b/models/glm47-flash-fp16/README.md @@ -29,7 +29,7 @@ Best quality with auto-detected context based on GPU. ### 1. Create RunPod Pod -- **Image**: `yourusername/clawdbot-glm47-flash-fp16:latest` +- **Image**: `yourusername/moltbot-glm47-flash-fp16:latest` - **GPU**: 1x H100 80GB or A100 80GB - **Volume**: 50GB at `/workspace` - **Container Disk**: 100GB (model stored here) @@ -67,7 +67,7 @@ Model is stored on container disk (100GB required), state persists on workspace ``` /root/.cache/huggingface/ # Model files (container disk) /workspace/ -├── .clawdbot/ +├── .clawdbot/ # Legacy Moltbot state path │ ├── clawdbot.json # Config │ ├── agents/ # State │ └── telegram/ # Session diff --git a/models/glm47-flash-fp16/entrypoint.sh b/models/glm47-flash-fp16/entrypoint.sh index 500953e..ee236e4 100644 --- a/models/glm47-flash-fp16/entrypoint.sh +++ b/models/glm47-flash-fp16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-Flash FP16 + Clawdbot startup script +# entrypoint.sh - GLM-4.7-Flash FP16 + Moltbot startup script set -e echo "============================================" -echo " GLM-4.7-Flash FP16 + Clawdbot Startup" +echo " GLM-4.7-Flash FP16 + Moltbot Startup" echo "============================================" echo "" echo "IMPORTANT: This requires vLLM NIGHTLY (not PyPI stable)!" @@ -50,7 +50,7 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.92}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm47}" # Keep model on container disk (requires 100GB containerDiskInGb) HF_HOME="${HF_HOME:-/root/.cache/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" @@ -63,9 +63,14 @@ else fi export HF_HOME -export CLAWDBOT_STATE_DIR +export MOLTBOT_STATE_DIR export MAX_MODEL_LEN +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi + # Set CUDA 13.1 paths for B200 (no-op on other GPUs if not installed) if [ -d "/usr/local/cuda-13.1" ]; then export PATH=/usr/local/cuda-13.1/bin:$PATH @@ -75,7 +80,7 @@ if [ -d "/usr/local/cuda-13.1" ]; then fi # Ensure directories exist (HF cache on container disk, state on workspace) -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd # Configure GitHub CLI # Priority: 1) GITHUB_TOKEN env var, 2) Persisted config in /workspace/.config/gh @@ -115,9 +120,9 @@ if command -v nvcc &> /dev/null; then fi echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize Moltbot config if not exists +if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then + echo "Creating Moltbot configuration (legacy clawdbot.json)..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -135,7 +140,7 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then # Reserve tokens for compaction: 15% of context RESERVE_TOKENS=$((MAX_MODEL_LEN * 15 / 100)) - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF { "agents": { "defaults": { @@ -180,10 +185,10 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" fi # Build vLLM command @@ -232,10 +237,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start Moltbot gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting Moltbot gateway..." +MOLTBOT_STATE_DIR=$MOLTBOT_STATE_DIR "$BOT_CMD" gateway & GATEWAY_PID=$! echo "" @@ -243,7 +248,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " Moltbot Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index 6489bb6..5830188 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -66,8 +66,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install huggingface_hub for model downloads (using Python API, not CLI) RUN python3 -m pip install --no-cache-dir huggingface_hub -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Install Moltbot and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ npm cache clean --force WORKDIR / @@ -79,8 +79,8 @@ ENV MODEL_NAME="unsloth/GLM-4.7-Flash-GGUF" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ LLAMA_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + MOLTBOT_HOME="/workspace/.clawdbot" \ + MOLTBOT_WORKSPACE="/workspace/clawd" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-gguf-llamacpp/README.md b/models/glm47-flash-gguf-llamacpp/README.md index a2d0640..51ebb96 100644 --- a/models/glm47-flash-gguf-llamacpp/README.md +++ b/models/glm47-flash-gguf-llamacpp/README.md @@ -23,7 +23,7 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja - **200k context** - Full model capacity on 32GB GPU - **Q8 KV cache quantization** - Fits 200k context in VRAM -- **OpenAI-compatible API** - Works with Clawdbot, Claude Code, etc. +- **OpenAI-compatible API** - Works with Moltbot, Claude Code, etc. - **Native chat template** - Uses `--jinja` for correct GLM-4.7 formatting ## Runpod Deployment @@ -33,26 +33,26 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja 1. **Add your SSH key** to [Runpod Account Settings → SSH Public Keys](https://www.runpod.io/console/user/settings) (required for device pairing later). If you don't have an SSH key, follow the [Runpod SSH guide](https://docs.runpod.io/pods/configuration/use-ssh). 2. **Create a Pod** with: - - Image: `runpod/clawdbot-glm47-flash-gguf:latest` + - Image: `runpod/moltbot-glm47-flash-gguf:latest` - GPU: RTX 5090 (or any 32GB+ GPU) - Ports: `8000/http`, `18789/http`, `22/tcp` - Network Volume: **30GB minimum**, mounted to `/workspace` - Required for model download (~17GB) and config persistence - Without a network volume, data is lost on pod restart - Environment Variables: - - `CLAWDBOT_WEB_PASSWORD` - Token for Web UI (default: `clawdbot`) + - `MOLTBOT_WEB_PASSWORD` - Token for Web UI (default: `moltbot`) - `LLAMA_API_KEY` - API key for llama.cpp (default: `changeme`) 3. **Wait for startup** - First launch downloads the model (~17GB), which takes a few minutes. Check pod logs for progress. 4. **Access the Web UI**: ``` - https://-18789.proxy.runpod.net/?token= + https://-18789.proxy.runpod.net/?token= ``` ### First-Time Device Pairing -Clawdbot requires device pairing for security. On first access, you'll see "pairing required". +Moltbot requires device pairing for security. On first access, you'll see "pairing required". **To approve your browser:** @@ -61,10 +61,10 @@ Clawdbot requires device pairing for security. On first access, you'll see "pair ssh root@ -p # List pending pairing requests -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices list +MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices list # Approve your device (use the Request ID from the list) -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices approve +MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices approve ``` After approval, refresh the Web UI - it will work permanently for that browser. @@ -74,7 +74,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | Port | Service | |------|---------| | 8000 | llama.cpp API (OpenAI-compatible) | -| 18789 | Clawdbot Web UI | +| 18789 | Moltbot Web UI | | 22 | SSH | ## Environment Variables @@ -84,7 +84,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | `MODEL_FILE` | `GLM-4.7-Flash-Q4_K_M.gguf` | GGUF file to use | | `MAX_MODEL_LEN` | `200000` | Context length | | `LLAMA_API_KEY` | `changeme` | API authentication | -| `CLAWDBOT_WEB_PASSWORD` | `clawdbot` | Web UI token | +| `MOLTBOT_WEB_PASSWORD` | `moltbot` | Web UI token | | `TELEGRAM_BOT_TOKEN` | - | Optional Telegram integration | | `GITHUB_TOKEN` | - | Optional GitHub CLI auth | @@ -92,13 +92,13 @@ After approval, refresh the Web UI - it will work permanently for that browser. ```bash # Build -docker build -t clawdbot-glm47-gguf-llamacpp . +docker build -t moltbot-glm47-gguf-llamacpp . # Run on RTX 5090 docker run --gpus all -p 8000:8000 -p 18789:18789 \ -v /path/to/workspace:/workspace \ -e LLAMA_API_KEY=your-key \ - clawdbot-glm47-gguf-llamacpp + moltbot-glm47-gguf-llamacpp ``` ## API Usage diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index 8125386..8f67512 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -77,10 +77,15 @@ fi LLAMA_API_KEY="${LLAMA_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" + +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi echo "Starting llama.cpp server..." echo " Model: $MODEL_PATH/$MODEL_FILE" @@ -127,11 +132,11 @@ if [ $WAITED -ge $MAX_WAIT ]; then echo "Container will stay running for debugging." fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup Moltbot config +mkdir -p "$MOLTBOT_HOME" -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then + echo "Creating Moltbot config (legacy clawdbot.json)..." if [ -n "$TELEGRAM_BOT_TOKEN" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true, \"botToken\": \"${TELEGRAM_BOT_TOKEN}\" }" @@ -139,7 +144,7 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$MOLTBOT_HOME/clawdbot.json" << EOF { "models": { "providers": { @@ -171,18 +176,18 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "gateway": { "mode": "local", "bind": "lan", - "auth": { "token": "$CLAWDBOT_WEB_PASSWORD" }, - "remote": { "token": "$CLAWDBOT_WEB_PASSWORD" } + "auth": { "token": "$MOLTBOT_WEB_PASSWORD" }, + "remote": { "token": "$MOLTBOT_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$MOLTBOT_HOME/clawdbot.json" fi # Auto-fix config -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +echo "Running moltbot doctor to validate/fix config..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -202,19 +207,20 @@ fi export OPENAI_API_KEY="$LLAMA_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway (use token auth for URL parameter support) +# Start Moltbot gateway (use token auth for URL parameter support) echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME CLAWDBOT_GATEWAY_TOKEN="$CLAWDBOT_WEB_PASSWORD" clawdbot gateway --auth token --token "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting Moltbot gateway..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME MOLTBOT_GATEWAY_TOKEN="$MOLTBOT_WEB_PASSWORD" \ +"$BOT_CMD" gateway --auth token --token "$MOLTBOT_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " llama.cpp API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net/?token=$CLAWDBOT_WEB_PASSWORD" -echo " Web UI Token: $CLAWDBOT_WEB_PASSWORD" +echo " Moltbot Gateway: ws://localhost:18789" +echo " Web UI: https://-18789.proxy.runpod.net/?token=$MOLTBOT_WEB_PASSWORD" +echo " Web UI Token: $MOLTBOT_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens (200k!)" echo " VRAM: ~28GB / 32GB" diff --git a/models/glm47-flash-nvfp4-5090/Dockerfile b/models/glm47-flash-nvfp4-5090/Dockerfile index 157c028..4ad3a82 100644 --- a/models/glm47-flash-nvfp4-5090/Dockerfile +++ b/models/glm47-flash-nvfp4-5090/Dockerfile @@ -53,7 +53,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install tools RUN uv pip install --system "huggingface_hub[cli]" && \ - npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ + npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ npm cache clean --force WORKDIR / @@ -63,8 +63,8 @@ ENV MODEL_NAME="GadflyII/GLM-4.7-Flash-NVFP4" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ VLLM_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + MOLTBOT_HOME="/workspace/.clawdbot" \ + MOLTBOT_WORKSPACE="/workspace/clawd" COPY entrypoint.sh benchmark.sh / RUN chmod +x /entrypoint.sh /benchmark.sh diff --git a/models/glm47-flash-nvfp4-5090/README.md b/models/glm47-flash-nvfp4-5090/README.md index eae8680..0eb8c8d 100644 --- a/models/glm47-flash-nvfp4-5090/README.md +++ b/models/glm47-flash-nvfp4-5090/README.md @@ -39,7 +39,7 @@ Full 200K context window with MLA for reduced KV cache memory. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-nvfp4-5090:latest` +- **Image**: `runpod/moltbot-glm47-flash-nvfp4-5090:latest` - **GPU**: 1x RTX 5090 32GB - **Volume**: 100GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -54,7 +54,7 @@ Full 200K context window with MLA for reduced KV cache memory. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `MOLTBOT_WEB_PASSWORD` | No | `moltbot` | Password for web UI | ### 3. Test It diff --git a/models/glm47-flash-nvfp4-5090/entrypoint.sh b/models/glm47-flash-nvfp4-5090/entrypoint.sh index 5642d59..8ca4c21 100644 --- a/models/glm47-flash-nvfp4-5090/entrypoint.sh +++ b/models/glm47-flash-nvfp4-5090/entrypoint.sh @@ -75,11 +75,16 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI password - users enter this to access the Moltbot control panel +MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" + +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -128,11 +133,11 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup Moltbot config +mkdir -p "$MOLTBOT_HOME" -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then + echo "Creating Moltbot config (legacy clawdbot.json)..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -141,9 +146,9 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields + # Create a minimal config - moltbot doctor will fix any missing fields # contextTokens: 180000 leaves room for output within 200K context - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$MOLTBOT_HOME/clawdbot.json" << EOF { "models": { "providers": { @@ -179,12 +184,12 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$MOLTBOT_HOME/clawdbot.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix 2>/dev/null || true +# Auto-fix config to match current Moltbot version's schema +echo "Running moltbot doctor to validate/fix config..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -204,19 +209,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start Moltbot gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" 2>/dev/null & +echo "Starting Moltbot gateway..." +MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" gateway --auth password --password "$MOLTBOT_WEB_PASSWORD" 2>/dev/null & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready! (RTX 5090 Blackwell SM120)" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " Moltbot Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" +echo " Web UI Password: $MOLTBOT_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME (NVFP4)" echo " Context: $MAX_MODEL_LEN tokens" echo " Cost: ~\$0.89/hr (36% savings vs A100)" diff --git a/models/glm47-reap-w4a16/Dockerfile b/models/glm47-reap-w4a16/Dockerfile index 9aba99f..58c74d6 100644 --- a/models/glm47-reap-w4a16/Dockerfile +++ b/models/glm47-reap-w4a16/Dockerfile @@ -30,13 +30,13 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Install vLLM nightly (required for GLM-4.7 support) RUN uv pip install --system -U vllm --pre -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Install Moltbot +RUN npm install -g --prefer-offline moltbot@latest && \ npm cache clean --force # Environment variables ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot ENV MODEL_NAME=0xSero/GLM-4.7-REAP-40-W4A16 ENV SERVED_MODEL_NAME=glm-4.7-reap ENV VLLM_API_KEY=changeme @@ -53,7 +53,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: Moltbot Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-reap-w4a16/README.md b/models/glm47-reap-w4a16/README.md index ff66fdc..e32188d 100644 --- a/models/glm47-reap-w4a16/README.md +++ b/models/glm47-reap-w4a16/README.md @@ -21,7 +21,7 @@ High-end option for maximum performance. ### 1. Create RunPod Pod -- **Image**: `yourusername/clawdbot-glm47-reap-w4a16:latest` +- **Image**: `yourusername/moltbot-glm47-reap-w4a16:latest` - **GPU**: 1x B200 180GB - **Volume**: 200GB at `/workspace` - **Container Disk**: 50GB @@ -59,7 +59,7 @@ Files persist on network volume `/workspace`: ``` /workspace/ ├── huggingface/ # Model cache -├── .clawdbot/ +├── .clawdbot/ # Legacy Moltbot state path │ ├── clawdbot.json # Config │ ├── agents/ # State │ └── telegram/ # Session diff --git a/models/glm47-reap-w4a16/entrypoint.sh b/models/glm47-reap-w4a16/entrypoint.sh index 973f989..4411988 100644 --- a/models/glm47-reap-w4a16/entrypoint.sh +++ b/models/glm47-reap-w4a16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-REAP W4A16 + Clawdbot startup script for RunPod B200 +# entrypoint.sh - GLM-4.7-REAP W4A16 + Moltbot startup script for RunPod B200 set -e echo "============================================" -echo " GLM-4.7-REAP W4A16 + Clawdbot Startup" +echo " GLM-4.7-REAP W4A16 + Moltbot Startup" echo "============================================" # Configuration from environment @@ -14,18 +14,23 @@ MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}" GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm45}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" export HF_HOME -export CLAWDBOT_STATE_DIR +export MOLTBOT_STATE_DIR export PATH=/usr/local/cuda-13.1/bin:$PATH export CUDA_HOME=/usr/local/cuda-13.1 export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd + +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi # Configure GitHub CLI if [ -n "$GITHUB_TOKEN" ]; then @@ -54,9 +59,9 @@ echo " Tool parser: $TOOL_CALL_PARSER" echo " CUDA: $(nvcc --version | grep release | awk '{print $5}' | tr -d ',')" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize Moltbot config if not exists +if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then + echo "Creating Moltbot configuration (legacy clawdbot.json)..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -65,7 +70,7 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF { "agents": { "defaults": { @@ -100,10 +105,10 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" fi # Build vLLM command @@ -148,10 +153,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start Moltbot gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting Moltbot gateway..." +MOLTBOT_STATE_DIR=$MOLTBOT_STATE_DIR "$BOT_CMD" gateway & GATEWAY_PID=$! echo "" @@ -159,7 +164,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " Moltbot Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index dba14ae..ebe77f4 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - Clawdbot + vLLM startup script for RunPod +# entrypoint.sh - Moltbot + vLLM startup script for RunPod set -e echo "============================================" -echo " Clawdbot + vLLM Startup" +echo " Moltbot + vLLM Startup" echo "============================================" # Configuration from environment @@ -15,14 +15,19 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-hermes}" TENSOR_PARALLEL_SIZE="${TENSOR_PARALLEL_SIZE:-auto}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" export HF_HOME -export CLAWDBOT_STATE_DIR +export MOLTBOT_STATE_DIR + +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + BOT_CMD="clawdbot" +fi # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd # Auto-detect tensor parallel size if [ "$TENSOR_PARALLEL_SIZE" = "auto" ]; then @@ -39,9 +44,9 @@ echo " Tensor parallel: $TENSOR_PARALLEL_SIZE" echo " Tool parser: $TOOL_CALL_PARSER" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize Moltbot config if not exists +if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then + echo "Creating Moltbot configuration (legacy clawdbot.json)..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -50,7 +55,7 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF { "agents": { "defaults": { @@ -85,16 +90,16 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" fi -# Initialize Clawdbot workspace if empty +# Initialize Moltbot workspace if empty if [ ! -f "/workspace/clawd/AGENTS.md" ]; then - echo "Initializing Clawdbot workspace..." - clawdbot setup --non-interactive --accept-risk --workspace /workspace/clawd 2>/dev/null || true + echo "Initializing Moltbot workspace..." + "$BOT_CMD" setup --non-interactive --accept-risk --workspace /workspace/clawd 2>/dev/null || true fi # Build vLLM command @@ -138,10 +143,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start Moltbot gateway echo "" -echo "Starting Clawdbot gateway..." -clawdbot gateway & +echo "Starting Moltbot gateway..." +"$BOT_CMD" gateway & GATEWAY_PID=$! echo "" @@ -149,7 +154,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " Moltbot Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/setup-clawdbot.sh b/scripts/setup-moltbot.sh old mode 100755 new mode 100644 similarity index 80% rename from scripts/setup-clawdbot.sh rename to scripts/setup-moltbot.sh index 61889c2..0efd285 --- a/scripts/setup-clawdbot.sh +++ b/scripts/setup-moltbot.sh @@ -1,5 +1,5 @@ #!/bin/bash -# setup-clawdbot.sh - Install and configure Clawdbot on RunPod +# setup-moltbot.sh - Install and configure Moltbot on RunPod # Prerequisites: vLLM server running on port 8000 set -e @@ -21,13 +21,13 @@ VLLM_HOST="${VLLM_HOST:-localhost}" VLLM_PORT="${VLLM_PORT:-8000}" VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-qwen3-30b-a3b}" -CLAWDBOT_CONFIG_DIR="${CLAWDBOT_CONFIG_DIR:-$HOME/.clawdbot}" +MOLTBOT_CONFIG_DIR="${MOLTBOT_CONFIG_DIR:-$HOME/.clawdbot}" RUNPOD_POD_ID="${RUNPOD_POD_ID:-}" # Print banner echo "" echo "===========================================" -echo " Clawdbot Setup Script" +echo " Moltbot Setup Script" echo "===========================================" echo "" @@ -55,10 +55,14 @@ if ! command -v npm &> /dev/null; then fi log_info "npm version: $(npm --version)" -# Step 2: Install Clawdbot -log_info "Installing Clawdbot..." -npm install -g clawdbot@latest -log_success "Clawdbot installed: $(clawdbot --version 2>/dev/null || echo 'version check failed')" +# Step 2: Install Moltbot +log_info "Installing Moltbot..." +npm install -g moltbot@latest +BOT_CMD="moltbot" +if ! command -v "$BOT_CMD" &> /dev/null; then + BOT_CMD="clawdbot" +fi +log_success "Moltbot installed: $("$BOT_CMD" --version 2>/dev/null || echo 'version check failed')" # Step 3: Wait for vLLM to be ready log_info "Waiting for vLLM server to be ready..." @@ -86,9 +90,9 @@ MODELS_RESPONSE=$(curl -s "http://${VLLM_HOST}:${VLLM_PORT}/v1/models" \ -H "Authorization: Bearer ${VLLM_API_KEY}") echo "Available models: $MODELS_RESPONSE" -# Step 4: Create Clawdbot configuration directory -log_info "Creating Clawdbot configuration..." -mkdir -p "$CLAWDBOT_CONFIG_DIR" +# Step 4: Create Moltbot configuration directory +log_info "Creating Moltbot configuration..." +mkdir -p "$MOLTBOT_CONFIG_DIR" # Determine the base URL for the vLLM endpoint if [ -n "$RUNPOD_POD_ID" ]; then @@ -99,8 +103,8 @@ else VLLM_BASE_URL="http://${VLLM_HOST}:${VLLM_PORT}/v1" fi -# Step 5: Create Clawdbot configuration file -cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF +# Step 5: Create Moltbot configuration file +cat > "$MOLTBOT_CONFIG_DIR/clawdbot.json" << EOF { "agents": { "defaults": { @@ -132,15 +136,15 @@ cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF } EOF -log_success "Clawdbot configuration created at $CLAWDBOT_CONFIG_DIR/clawdbot.json" +log_success "Moltbot configuration created at $MOLTBOT_CONFIG_DIR/clawdbot.json (legacy file name)" -# Step 6: Test Clawdbot connection -log_info "Testing Clawdbot configuration..." +# Step 6: Test Moltbot connection +log_info "Testing Moltbot configuration..." echo "" echo "Configuration summary:" echo " vLLM URL: $VLLM_BASE_URL" echo " Model: $SERVED_MODEL_NAME" -echo " Config dir: $CLAWDBOT_CONFIG_DIR" +echo " Config dir: $MOLTBOT_CONFIG_DIR" echo "" # Test a simple completion @@ -166,11 +170,11 @@ echo "===========================================" echo " Setup Complete!" echo "===========================================" echo "" -echo "To start Clawdbot, run:" -echo " clawdbot" +echo "To start Moltbot, run:" +echo " moltbot" echo "" echo "To start with daemon mode:" -echo " clawdbot onboard --install-daemon" +echo " moltbot onboard --install-daemon" echo "" -echo "Configuration file: $CLAWDBOT_CONFIG_DIR/clawdbot.json" +echo "Configuration file: $MOLTBOT_CONFIG_DIR/clawdbot.json" echo "" diff --git a/scripts/start-vllm.sh b/scripts/start-vllm.sh index 4af4bdd..75466b6 100755 --- a/scripts/start-vllm.sh +++ b/scripts/start-vllm.sh @@ -1,5 +1,5 @@ #!/bin/bash -# start-vllm.sh - vLLM startup script for Clawdbot on RunPod +# start-vllm.sh - vLLM startup script for Moltbot on RunPod # Handles model download, GPU detection, and vLLM server startup set -e @@ -36,7 +36,7 @@ export HF_HOME # Print banner echo "" echo "===========================================" -echo " Clawdbot vLLM Server Startup" +echo " Moltbot vLLM Server Startup" echo "===========================================" echo "" diff --git a/templates/clawdbot-vllm.json b/templates/moltbot-vllm.json similarity index 97% rename from templates/clawdbot-vllm.json rename to templates/moltbot-vllm.json index 726f483..2226522 100644 --- a/templates/clawdbot-vllm.json +++ b/templates/moltbot-vllm.json @@ -1,7 +1,7 @@ { "tiers": { "tier1": { - "name": "clawdbot-vllm-qwen3", + "name": "moltbot-vllm-qwen3", "description": "Tier 1: Qwen3-30B-A3B on 1x H100 (~$2/hr) - Best for validation", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -25,7 +25,7 @@ ] }, "tier2": { - "name": "clawdbot-vllm-mimo", + "name": "moltbot-vllm-mimo", "description": "Tier 2: MiMo-V2-Flash on 2x H100 (~$4/hr) - Fastest inference", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -48,7 +48,7 @@ ] }, "tier3": { - "name": "clawdbot-vllm-glm47", + "name": "moltbot-vllm-glm47", "description": "Tier 3: GLM-4.7-FP8 on 4x H100 or 2x H200 (~$7-8/hr) - SOTA tool calling", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, @@ -73,7 +73,7 @@ ] }, "tier3_h200": { - "name": "clawdbot-vllm-glm47-h200", + "name": "moltbot-vllm-glm47-h200", "description": "Tier 3 Alt: GLM-4.7-FP8 on 2x H200 (~$7/hr) - Best value for SOTA", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, diff --git a/templates/runpod-template.json b/templates/runpod-template.json index f070e66..a603969 100644 --- a/templates/runpod-template.json +++ b/templates/runpod-template.json @@ -1,7 +1,7 @@ { - "name": "clawdbot-vllm", - "description": "Clawdbot AI assistant with vLLM for local LLM inference. Includes Telegram integration.", - "imageName": "your-dockerhub-username/clawdbot-vllm:latest", + "name": "moltbot-vllm", + "description": "Moltbot AI assistant with vLLM for local LLM inference. Includes Telegram integration.", + "imageName": "your-dockerhub-username/moltbot-vllm:latest", "containerDiskInGb": 50, "volumeInGb": 150, "volumeMountPath": "/workspace", @@ -15,8 +15,8 @@ "TOOL_CALL_PARSER": "hermes", "TENSOR_PARALLEL_SIZE": "auto", "HF_HOME": "/workspace/huggingface", - "CLAWDBOT_STATE_DIR": "/workspace/.clawdbot", + "MOLTBOT_STATE_DIR": "/workspace/.clawdbot", "TELEGRAM_BOT_TOKEN": "" }, - "readme": "# Clawdbot + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `clawdbot pairing list telegram` then `clawdbot pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.clawdbot (survives restarts)\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: Clawdbot Gateway" + "readme": "# Moltbot + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `moltbot pairing list telegram` then `moltbot pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.clawdbot (legacy path used by Moltbot)\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: Moltbot Gateway" } From e843e25cc6e138d3482651f4350ed367c22ae1c3 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 11:29:44 +0100 Subject: [PATCH 02/12] chore: document branch image tags Clarify that branch builds publish tags using the branch name with slashes normalized. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b12775..9789772 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ runpod-moltbot/ Images are built automatically on: - Push to `main` → tagged as `:latest` -- Push to other branches → tagged as `:dev-{branch-name}` (e.g., `:dev-feature-xyz`) +- Push to other branches → tagged as `:{branch-name}` (slashes → `-`, e.g., `:feature-xyz`) - Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` - Pull requests → build only, no push (validation) - Manual workflow dispatch → select specific model From 5998e37077959fce766781de2e77b2ab5b680446 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:15:04 +0100 Subject: [PATCH 03/12] fix: tag PR images by head branch Push images on branch and PR builds using the source branch name and allow all branches/tags to trigger builds. --- .github/workflows/docker-build.yml | 90 ++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 2c54159..c9ea743 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,7 +2,8 @@ name: Build and Push Docker Images on: push: - branches: [main, feat/add-model-folders, gguf-v3] + branches: ['**'] + tags: ['*'] paths: - 'models/**' - 'Dockerfile' @@ -26,12 +27,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -44,6 +39,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -52,16 +53,27 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-awq-4bit - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:${{ env.VERSION }} ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} @@ -81,12 +93,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -99,6 +105,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -107,16 +119,27 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-nvfp4-5090 - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} @@ -134,12 +157,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -152,6 +169,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -160,16 +183,27 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-gguf-llamacpp - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:${{ env.VERSION }} ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} From 328269d737a538d4dff5050c4c502045682edcba Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:21:55 +0100 Subject: [PATCH 04/12] fix: enforce moltbot binary in gguf entrypoint Fail fast when moltbot is missing so the rename does not silently fall back. --- models/glm47-flash-gguf-llamacpp/entrypoint.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index 8f67512..787222d 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -84,7 +84,10 @@ MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" BOT_CMD="moltbot" if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" + echo "ERROR: moltbot command not found in PATH" + echo "PATH=$PATH" + echo "Container staying alive for debugging." + sleep infinity fi echo "Starting llama.cpp server..." From 11c416bf7579105bd75246f9128c23fface356bd Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:22:43 +0100 Subject: [PATCH 05/12] fix: avoid duplicate builds Trigger image builds on pull requests (branch tag) and release tags only, with documentation to match. --- .github/workflows/docker-build.yml | 5 ----- README.md | 4 +--- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index c9ea743..0afc94a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,12 +2,7 @@ name: Build and Push Docker Images on: push: - branches: ['**'] tags: ['*'] - paths: - - 'models/**' - - 'Dockerfile' - - '.github/workflows/docker-build.yml' pull_request: paths: - 'models/**' diff --git a/README.md b/README.md index 9789772..98f3a54 100644 --- a/README.md +++ b/README.md @@ -124,10 +124,8 @@ runpod-moltbot/ ## GitHub Actions Images are built automatically on: -- Push to `main` → tagged as `:latest` -- Push to other branches → tagged as `:{branch-name}` (slashes → `-`, e.g., `:feature-xyz`) +- Pull requests → tagged as `:{branch-name}` (slashes → `-`, e.g., `:feature-xyz`) - Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` -- Pull requests → build only, no push (validation) - Manual workflow dispatch → select specific model ### Required Setup From d38cf84f4eed127d95a4436ef3ac9bd2d085c354 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:27:47 +0100 Subject: [PATCH 06/12] fix: publish latest on main Trigger builds on main pushes so :latest is published while keeping PR builds for branches. --- .github/workflows/docker-build.yml | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 0afc94a..5ee216b 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,6 +2,7 @@ name: Build and Push Docker Images on: push: + branches: [main] tags: ['*'] pull_request: paths: diff --git a/README.md b/README.md index 98f3a54..8a93a75 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ runpod-moltbot/ Images are built automatically on: - Pull requests → tagged as `:{branch-name}` (slashes → `-`, e.g., `:feature-xyz`) +- Push to `main` → tagged as `:latest` - Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` - Manual workflow dispatch → select specific model From bbb2b7ce053f8774defcec504c2159b86aee08bf Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:29:36 +0100 Subject: [PATCH 07/12] fix: install moltbot CLI in gguf image Pin to the beta tag so the image gets the moltbot binary. --- models/glm47-flash-gguf-llamacpp/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index 5830188..cee1fe4 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -67,7 +67,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ RUN python3 -m pip install --no-cache-dir huggingface_hub # Install Moltbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ +# NOTE: npm "latest" lacks a CLI; use beta tag for moltbot binary. +RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot@2026.1.27-beta.1 && \ npm cache clean --force WORKDIR / From 929c9be0831ff537db411491617b163d220ff185 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 13:31:01 +0100 Subject: [PATCH 08/12] fix: install clawdbot with moltbot shim Use the supported clawdbot package and provide a moltbot symlink. --- models/glm47-flash-gguf-llamacpp/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index cee1fe4..d8548a4 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -66,10 +66,10 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install huggingface_hub for model downloads (using Python API, not CLI) RUN python3 -m pip install --no-cache-dir huggingface_hub -# Install Moltbot and Claude Code -# NOTE: npm "latest" lacks a CLI; use beta tag for moltbot binary. -RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot@2026.1.27-beta.1 && \ - npm cache clean --force +# Install Clawdbot and Claude Code (moltbot binary is a symlink) +RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ + npm cache clean --force && \ + ln -s "$(npm config get prefix)/bin/clawdbot" "$(npm config get prefix)/bin/moltbot" WORKDIR / From 3f7d76d2748911167bc5fa05af0b9e8895e631f2 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 14:47:24 +0100 Subject: [PATCH 09/12] fix: use clawdbot state dir env vars Ensure clawdbot reads the intended state directory in the gguf entrypoint. --- models/glm47-flash-gguf-llamacpp/entrypoint.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index 787222d..a91963c 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -78,6 +78,9 @@ LLAMA_API_KEY="${LLAMA_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" +CLAWDBOT_STATE_DIR="$MOLTBOT_HOME" +MOLTBOT_STATE_DIR="$MOLTBOT_HOME" +export CLAWDBOT_STATE_DIR MOLTBOT_STATE_DIR TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" @@ -190,7 +193,7 @@ fi # Auto-fix config echo "Running moltbot doctor to validate/fix config..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true +CLAWDBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -213,7 +216,7 @@ export OPENAI_BASE_URL="http://localhost:8000/v1" # Start Moltbot gateway (use token auth for URL parameter support) echo "" echo "Starting Moltbot gateway..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME MOLTBOT_GATEWAY_TOKEN="$MOLTBOT_WEB_PASSWORD" \ +CLAWDBOT_STATE_DIR=$MOLTBOT_HOME MOLTBOT_GATEWAY_TOKEN="$MOLTBOT_WEB_PASSWORD" \ "$BOT_CMD" gateway --auth token --token "$MOLTBOT_WEB_PASSWORD" & GATEWAY_PID=$! From f0227782f08ef2ae2c1eaddd8b291330704f4929 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Thu, 29 Jan 2026 15:35:35 +0100 Subject: [PATCH 10/12] fix: harden clawdbot state dir setup Create required state directories and lock down permissions after doctor. --- models/glm47-flash-gguf-llamacpp/entrypoint.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index a91963c..ffbade5 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -139,7 +139,9 @@ if [ $WAITED -ge $MAX_WAIT ]; then fi # Setup Moltbot config -mkdir -p "$MOLTBOT_HOME" +mkdir -p "$MOLTBOT_HOME" "$MOLTBOT_HOME/agents/main/sessions" "$MOLTBOT_HOME/credentials" +chmod 700 "$MOLTBOT_HOME" "$MOLTBOT_HOME/agents" "$MOLTBOT_HOME/agents/main" \ + "$MOLTBOT_HOME/agents/main/sessions" "$MOLTBOT_HOME/credentials" 2>/dev/null || true if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then echo "Creating Moltbot config (legacy clawdbot.json)..." @@ -194,6 +196,7 @@ fi # Auto-fix config echo "Running moltbot doctor to validate/fix config..." CLAWDBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true +chmod 600 "$MOLTBOT_HOME/clawdbot.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then From ccf2c1ade3086d7fb236d22b72ebf66357e9b860 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Fri, 30 Jan 2026 11:27:39 +0100 Subject: [PATCH 11/12] docs: refresh model matrix Rewrite the root README to focus on Moltbot images, context sizes, and status summary. --- README.md | 331 +++++++++--------------------------------------------- 1 file changed, 52 insertions(+), 279 deletions(-) diff --git a/README.md b/README.md index 8a93a75..a7ce1e1 100644 --- a/README.md +++ b/README.md @@ -1,291 +1,64 @@ -# Moltbot on RunPod with vLLM - -Run Moltbot with GLM-4.7 and other open-source coding models on RunPod using vLLM. Chat with your AI assistant via Telegram! - -## Model Comparison - -| Model | GPU | VRAM | Cost/hr | Context | Folder | -|-------|-----|------|---------|---------|--------| -| **Base (Qwen2.5-7B)** | Any | 16GB | $0.50 | 16k | `Dockerfile` | -| **GLM-4.7-Flash FP16** | H100/A100 80GB | 56GB | $1.20-1.99 | 32k-64k | `models/glm47-flash-fp16/` | -| **GLM-4.7-Flash AWQ 4-bit** | A100 80GB | 71GB | $1.19 | 114k | `models/glm47-flash-awq-4bit/` | -| **GLM-4.7-REAP W4A16** | B200 | 108GB | $5.19 | 32k | `models/glm47-reap-w4a16/` | - -### Recommended: GLM-4.7-Flash AWQ 4-bit - -Best value option with full 114k context window at $1.19/hr on A100 80GB. - -## Quick Start - -### 1. Choose Your Model - -```bash -# GLM-4.7-Flash AWQ 4-bit (Best value, A100 80GB) -IMAGE=yourusername/moltbot-glm47-flash-awq-4bit:latest - -# GLM-4.7-Flash FP16 (Full precision, H100/A100 80GB) -IMAGE=yourusername/moltbot-glm47-flash-fp16:latest - -# GLM-4.7-REAP W4A16 (High-end, B200) -IMAGE=yourusername/moltbot-glm47-reap-w4a16:latest - -# Base (Qwen2.5-7B, any GPU) -IMAGE=yourusername/moltbot-vllm:latest -``` - -### 2. Create RunPod Pod - -- **Image**: Your chosen image from above -- **GPU**: Match model requirements -- **Volume**: 150GB at `/workspace` -- **Container Disk**: 50-100GB (depending on model) -- **Ports**: `8000/http, 18789/http, 22/tcp` - -### 3. Set Environment Variables - -```bash -VLLM_API_KEY=your-secure-key # Required -TELEGRAM_BOT_TOKEN=your-telegram-token # Optional -GITHUB_TOKEN=ghp_xxx # Optional -``` - -### 4. Test It - -```bash -# Health check -curl http://localhost:8000/health - -# Chat completion -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "Hello!"}] - }' -``` - -## Docker Images - -Images are automatically built and pushed to Docker Hub via GitHub Actions. - -| Image | Description | -|-------|-------------| -| `moltbot-glm47-flash-awq-4bit` | GLM-4.7-Flash AWQ 4-bit for A100 80GB | -| `moltbot-glm47-flash-fp16` | GLM-4.7-Flash FP16 for H100/A100 80GB | -| `moltbot-glm47-reap-w4a16` | GLM-4.7-REAP W4A16 for B200 | -| `moltbot-vllm` | Base image with Qwen2.5-7B | - -## Project Structure - -``` -runpod-moltbot/ -├── README.md # This file -├── .github/ -│ └── workflows/ -│ └── docker-build.yml # Build & push to Docker Hub -│ -├── models/ -│ ├── glm47-flash-fp16/ # Full precision FP16 (H100/A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ ├── glm47-flash-awq-4bit/ # AWQ 4-bit quantized (A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ └── glm47-reap-w4a16/ # Pruned W4A16 quantized (B200) -│ ├── README.md -│ ├── Dockerfile -│ └── entrypoint.sh -│ -├── scripts/ -│ ├── setup-moltbot.sh -│ └── start-vllm.sh -│ -├── config/ -│ ├── moltbot.json -│ └── workspace/ -│ -├── templates/ -│ └── moltbot-vllm.json -│ -├── tests/ -│ ├── test-vllm.sh -│ └── test-tool-calling.sh -│ -├── Dockerfile # Base image (Qwen2.5-7B) -├── docker-compose.yml -└── .env.example -``` - -## GitHub Actions - -Images are built automatically on: -- Pull requests → tagged as `:{branch-name}` (slashes → `-`, e.g., `:feature-xyz`) -- Push to `main` → tagged as `:latest` -- Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` -- Manual workflow dispatch → select specific model - -### Required Setup - -**Secrets** (Repository → Settings → Secrets → Actions): - -| Secret | Description | -|--------|-------------| -| `DOCKERHUB_USERNAME` | Your Docker Hub username | -| `DOCKERHUB_TOKEN` | Docker Hub access token (not password) | - -**Variables** (Repository → Settings → Variables → Actions): - -| Variable | Description | -|----------|-------------| -| `DOCKERHUB_REPO` | (Optional) Custom repo name, defaults to username | - -### Manual Build - +# Moltbot on RunPod: self-contained LLM images + +This repository provides Docker images that bundle **Moltbot** with different LLMs so you can run a fully self-contained assistant on RunPod (or any GPU host). Each model variant has its own folder under `models/` with a dedicated README and startup script. + +## Model matrix (status + context) + +| Image tag | Backend | Weights | GPU target | Context | VRAM (approx) | Status | Notes | +|----------|---------|---------|------------|---------|----------------|--------|-------| +| `moltbot-glm47-flash-awq-4bit` | vLLM | `cyankiwi/GLM-4.7-Flash-AWQ-4bit` | A100 80GB | 114k | ~75GB | Working | Best value on A100; long context | +| `moltbot-glm47-flash-fp16` | vLLM | `zai-org/GLM-4.7-Flash` | H100/A100 80GB | 32k-64k | ~56GB+ | Working | Full precision | +| `moltbot-glm47-flash-gguf` | llama.cpp | `unsloth/GLM-4.7-Flash-GGUF` (Q4_K_M) | RTX 5090 32GB | 200k | ~28GB | Working | Recommended for 5090 | +| `moltbot-glm47-flash-nvfp4-5090` | vLLM | `GadflyII/GLM-4.7-Flash-NVFP4` | RTX 5090 32GB | 200k | ~30GB | Not working | vLLM MLA issues on Blackwell | +| `moltbot-glm47-reap-w4a16` | vLLM | `0xSero/GLM-4.7-REAP-40-W4A16` | B200 180GB | 32k | ~108GB | Working | High-end B200 | +| `moltbot-vllm` | vLLM | `Qwen/Qwen2.5-Coder-7B-Instruct` | 16GB+ | 16k | ~16GB | Working | Base image | + +Notes: +- Context values are defaults; some variants allow tuning via `MAX_MODEL_LEN`. +- NVFP4 status details live in `models/glm47-flash-nvfp4-5090/ISSUES.md`. + +## Quick start + +1. **Pick an image** from the table above. +2. **Create a RunPod pod**: + - Volume: 150GB at `/workspace` + - Ports: `8000/http, 18789/http, 22/tcp` +3. **Set environment variables**: + - `VLLM_API_KEY` (for vLLM variants) + - `MOLTBOT_WEB_PASSWORD` (web UI token) + - `HF_TOKEN` (optional, faster downloads) + - `TELEGRAM_BOT_TOKEN` (optional) + - For GGUF + llama.cpp: use `LLAMA_API_KEY` instead of `VLLM_API_KEY` + +4. **Health check**: ```bash -# Build locally -docker build -t moltbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t moltbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t moltbot-glm47-reap-w4a16 models/glm47-reap-w4a16/ - -# Push to Docker Hub -docker tag moltbot-glm47-flash-awq-4bit yourusername/moltbot-glm47-flash-awq-4bit:latest -docker push yourusername/moltbot-glm47-flash-awq-4bit:latest -``` - -## Configuration - -### Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `VLLM_API_KEY` | `changeme` | API key for vLLM authentication | -| `MODEL_NAME` | Model-specific | HuggingFace model ID | -| `SERVED_MODEL_NAME` | `glm-4.7-flash` | Model name in API responses | -| `MAX_MODEL_LEN` | Auto-detected | Maximum context length | -| `GPU_MEMORY_UTILIZATION` | `0.92` | GPU memory to use | -| `TELEGRAM_BOT_TOKEN` | | Telegram bot token from @BotFather | -| `GITHUB_TOKEN` | | GitHub PAT for git/gh operations | - -### Moltbot Configuration - -Config is auto-generated at `/workspace/.clawdbot/clawdbot.json` (legacy path used by Moltbot): - -```json -{ - "models": { - "providers": { - "local-vllm": { - "baseUrl": "http://localhost:8000/v1", - "apiKey": "your-vllm-api-key", - "api": "openai-completions" - } - } - } -} -``` - -## Telegram Setup - -1. Create a bot with [@BotFather](https://t.me/BotFather) -2. Copy the bot token -3. Set `TELEGRAM_BOT_TOKEN` environment variable -4. Start or restart the pod -5. Message your bot on Telegram! - -## GitHub Authentication - -For git operations inside the container: - -1. Create a [GitHub Personal Access Token](https://github.com/settings/tokens) -2. Select scopes: `repo`, `read:org`, `workflow` -3. Set `GITHUB_TOKEN` environment variable -4. Token is auto-configured on startup - -## Testing - -```bash -# Basic health check curl http://localhost:8000/health - -# List models -curl http://localhost:8000/v1/models \ - -H "Authorization: Bearer $VLLM_API_KEY" - -# Tool calling test -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "What is 2+2?"}], - "tools": [{ - "type": "function", - "function": { - "name": "calculate", - "description": "Perform a calculation", - "parameters": { - "type": "object", - "properties": { - "expression": {"type": "string"} - } - } - } - }] - }' ``` -## Troubleshooting - -### vLLM doesn't start -- Check GPU availability: `nvidia-smi` -- Verify VRAM is sufficient for model -- Check logs: `journalctl -u vllm` or container logs - -### Model loading is slow -- First load downloads model from HuggingFace (can be 18-60GB) -- Use network volume to persist model across restarts -- AWQ 4-bit model (18GB) loads faster than FP16 (31GB) +## Folder map -### Tool calling not working -- Verify `--enable-auto-tool-choice` is set -- Check tool parser matches model (`glm47` for GLM-4.7) -- Run test script: `./tests/test-tool-calling.sh` +| Folder | Purpose | +|--------|---------| +| `models/` | Model-specific Dockerfiles + entrypoints | +| `scripts/` | Base entrypoint + setup helpers | +| `templates/` | RunPod template JSONs | +| `config/` | Moltbot config templates | -### Orphaned GPU memory -- If vLLM crashes, GPU memory may stay allocated -- Restart the pod to clear memory -- Check with: `nvidia-smi` +## Build + release -### SSH port changes -- RunPod assigns random SSH ports after restart -- Check port via RunPod console or API -- Use RunPod web terminal as alternative +Images build on: +- Pull requests -> tag = branch name (slashes -> `-`) +- Push to `main` -> `:latest` +- Git tag (e.g., `v1.0.0`) -> `:v1.0.0` + `:latest` -## Known Issues +## Known issues -1. **GGUF not supported** - vLLM doesn't support GLM-4.7's GGUF format. Use AWQ. -2. **Container disk doesn't persist** - Only `/workspace` survives restarts. -3. **B200 requires CUDA 13.1+** - The REAP image includes this automatically. - -## Cost Optimization - -1. **Use AWQ 4-bit** - Same model, lower VRAM, cheaper GPU ($1.19 vs $1.99/hr) -2. **Stop pods when idle** - RunPod charges per minute -3. **Use network volumes** - Avoid re-downloading models -4. **Consider spot instances** - Up to 80% cheaper +- **NVFP4 on RTX 5090** is not working in vLLM due to MLA attention shape issues and missing Blackwell kernel support. See `models/glm47-flash-nvfp4-5090/ISSUES.md`. +- **GGUF is not supported in vLLM** (use llama.cpp image). +- **Container disk doesn't persist**; only `/workspace` survives restarts. ## Resources -- [Moltbot Documentation](https://github.com/moltbot/moltbot) -- [vLLM Documentation](https://docs.vllm.ai/) -- [RunPod Documentation](https://docs.runpod.io/) -- [GLM-4.7 Announcement](https://z.ai/blog/glm-4.7) - -## License - -MIT +- Moltbot: https://github.com/moltbot/moltbot +- vLLM: https://docs.vllm.ai/ +- RunPod: https://docs.runpod.io/ From 3c6ed9acfa9ced99af71044f5cd67febc48d7016 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Fri, 30 Jan 2026 15:43:21 +0100 Subject: [PATCH 12/12] feat: rename to openclaw Align images, configs, and entrypoints with OpenClaw branding and paths. Update docs and templates to drop Moltbot/Clawdbot references. --- .env.example | 6 +- .github/workflows/docker-build.yml | 24 ++-- .gitignore | 2 +- CLAUDE.md | 28 ++--- Dockerfile | 27 ++--- README.md | 22 ++-- config/{moltbot.json => openclaw.json} | 5 +- config/workspace/AGENTS.md | 2 +- config/workspace/IDENTITY.md | 2 +- docker-compose.yml | 10 +- docs/openclaw-migration-plan.md | 75 +++++++++++++ docs/video-script.md | 26 ++--- models/glm47-flash-awq-4bit/Dockerfile | 10 +- models/glm47-flash-awq-4bit/README.md | 18 +-- models/glm47-flash-awq-4bit/entrypoint.sh | 51 +++++---- models/glm47-flash-fp16/Dockerfile | 15 +-- models/glm47-flash-fp16/README.md | 8 +- models/glm47-flash-fp16/entrypoint.sh | 46 ++++---- models/glm47-flash-gguf-llamacpp/Dockerfile | 11 +- models/glm47-flash-gguf-llamacpp/README.md | 22 ++-- .../glm47-flash-gguf-llamacpp/entrypoint.sh | 56 +++++----- models/glm47-flash-nvfp4-5090/Dockerfile | 6 +- models/glm47-flash-nvfp4-5090/ISSUES.md | 104 +++++++++++++++++- models/glm47-flash-nvfp4-5090/README.md | 4 +- models/glm47-flash-nvfp4-5090/entrypoint.sh | 52 +++++---- models/glm47-reap-w4a16/Dockerfile | 11 +- models/glm47-reap-w4a16/README.md | 8 +- models/glm47-reap-w4a16/entrypoint.sh | 46 ++++---- scripts/entrypoint.sh | 51 +++++---- .../{setup-moltbot.sh => setup-openclaw.sh} | 45 ++++---- scripts/start-vllm.sh | 4 +- .../{moltbot-vllm.json => openclaw-vllm.json} | 8 +- templates/runpod-template.json | 11 +- 33 files changed, 499 insertions(+), 317 deletions(-) rename config/{moltbot.json => openclaw.json} (88%) create mode 100644 docs/openclaw-migration-plan.md rename scripts/{setup-moltbot.sh => setup-openclaw.sh} (80%) rename templates/{moltbot-vllm.json => openclaw-vllm.json} (97%) diff --git a/.env.example b/.env.example index 22deea3..a203fbe 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,10 @@ HF_TOKEN=hf_your_token_here RUNPOD_POD_ID=your-pod-id RUNPOD_VLLM_API_KEY=your-secure-api-key-here -# Moltbot Web UI (password for accessing the control panel) -MOLTBOT_WEB_PASSWORD=moltbot +# OpenClaw Configuration +OPENCLAW_STATE_DIR=/workspace/.openclaw +OPENCLAW_WORKSPACE=/workspace/openclaw +OPENCLAW_WEB_PASSWORD=openclaw # Messaging Integrations (optional) TELEGRAM_BOT_TOKEN= diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 5ee216b..a478857 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -71,11 +71,11 @@ jobs: context: models/glm47-flash-awq-4bit push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-awq-4bit:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:buildcache,mode=max # Disabled - vLLM NVFP4 has bugs with GLM-4.7 MLA on Blackwell # See models/glm47-flash-nvfp4-5090/ISSUES.md @@ -137,11 +137,11 @@ jobs: context: models/glm47-flash-nvfp4-5090 push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-nvfp4-5090:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:buildcache,mode=max # GLM-4.7-Flash GGUF with llama.cpp - WORKING on RTX 5090! build-glm47-flash-gguf-llamacpp: @@ -201,11 +201,11 @@ jobs: context: models/glm47-flash-gguf-llamacpp push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/moltbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/moltbot-glm47-flash-gguf:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:buildcache,mode=max # Disabled for now # build-glm47-flash-fp16: diff --git a/.gitignore b/.gitignore index 85eee5d..0935446 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ Thumbs.db *.swp *.swo -# Node (if running Moltbot locally) +# Node (if running OpenClaw locally) node_modules/ # Python diff --git a/CLAUDE.md b/CLAUDE.md index d80de81..35f8052 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,19 +4,19 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -RunPod-optimized Docker deployment for running Moltbot (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). +RunPod-optimized Docker deployment for running OpenClaw (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). ## Build Commands ```bash # Build a specific model variant -docker build -t moltbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t moltbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t moltbot-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ +docker build -t openclaw-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ +docker build -t openclaw-glm47-flash-fp16 models/glm47-flash-fp16/ +docker build -t openclaw-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ # Push to Docker Hub -docker tag moltbot-glm47-flash-awq-4bit yourusername/moltbot-glm47-flash-awq-4bit:latest -docker push yourusername/moltbot-glm47-flash-awq-4bit:latest +docker tag openclaw-glm47-flash-awq-4bit yourusername/openclaw-glm47-flash-awq-4bit:latest +docker push yourusername/openclaw-glm47-flash-awq-4bit:latest ``` ## Local Development @@ -56,12 +56,12 @@ models/ # Model-specific Dockerfiles and configs └── glm47-reap-w4a16/ # REAP W4A16 (B200) scripts/ # Startup orchestration -├── entrypoint.sh # Docker entrypoint (starts vLLM + Moltbot) +├── entrypoint.sh # Docker entrypoint (starts vLLM + OpenClaw) ├── start-vllm.sh # vLLM server with GPU detection -└── setup-moltbot.sh # Moltbot installation +└── setup-openclaw.sh # OpenClaw installation config/ # Runtime configuration -├── moltbot.json # Moltbot config template +├── openclaw.json # OpenClaw config template └── workspace/ # Agent identity and system docs ``` @@ -70,9 +70,9 @@ config/ # Runtime configuration | Port | Service | |-------|-------------------| | 8000 | vLLM API | -| 18789 | Moltbot Gateway | -| 18790 | Moltbot Bridge | -| 18793 | Moltbot Canvas | +| 18789 | OpenClaw Gateway | +| 18790 | OpenClaw Bridge | +| 18793 | OpenClaw Canvas | | 22 | SSH | ## CI/CD (GitHub Actions) @@ -98,10 +98,10 @@ Key variables from `.env.example`: ## Entrypoint Flow 1. Configure environment and detect GPU count -2. Generate `clawdbot.json` with vLLM provider settings (legacy file name used by Moltbot) +2. Generate `openclaw.json` with vLLM provider settings 3. Start vLLM server in background 4. Wait for health check (max 5 minutes) -5. Start Moltbot gateway +5. Start OpenClaw gateway 6. Handle graceful shutdown on SIGTERM/SIGINT ## RunPod SSH Access diff --git a/Dockerfile b/Dockerfile index 34d195f..3c24639 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,15 @@ -# Moltbot + vLLM Docker Image for RunPod +# OpenClaw + vLLM Docker Image for RunPod # Pre-configured with everything needed for AI coding assistant FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 -LABEL maintainer="RunPod Moltbot" -LABEL description="Moltbot AI assistant with vLLM for local LLM inference" +LABEL maintainer="RunPod OpenClaw" +LABEL description="OpenClaw AI assistant with vLLM for local LLM inference" # Avoid interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV HF_HOME=/workspace/huggingface -ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -27,27 +28,27 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ # Install vLLM RUN pip install --no-cache-dir vllm -# Install Moltbot -RUN npm install -g moltbot@latest +# Install OpenClaw +RUN npm install -g openclaw@latest # Create workspace directories RUN mkdir -p /workspace/huggingface \ - /workspace/.clawdbot \ - /workspace/clawd \ + /workspace/.openclaw \ + /workspace/openclaw \ /workspace/scripts # Copy startup script COPY scripts/entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh -# Copy default Moltbot workspace files -COPY config/workspace/ /workspace/clawd/ +# Copy default OpenClaw workspace files +COPY config/workspace/ /workspace/openclaw/ # Expose ports # 8000 - vLLM API -# 18789 - Moltbot Gateway WebSocket -# 18790 - Moltbot Bridge -# 18793 - Moltbot Canvas +# 18789 - OpenClaw Gateway WebSocket +# 18790 - OpenClaw Bridge +# 18793 - OpenClaw Canvas # 22 - SSH (RunPod adds this) EXPOSE 8000 18789 18790 18793 diff --git a/README.md b/README.md index a7ce1e1..f325cfe 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ -# Moltbot on RunPod: self-contained LLM images +# OpenClaw on RunPod: self-contained LLM images -This repository provides Docker images that bundle **Moltbot** with different LLMs so you can run a fully self-contained assistant on RunPod (or any GPU host). Each model variant has its own folder under `models/` with a dedicated README and startup script. +This repository provides Docker images that bundle **OpenClaw** with different LLMs so you can run a fully self-contained assistant on RunPod (or any GPU host). Each model variant has its own folder under `models/` with a dedicated README and startup script. ## Model matrix (status + context) | Image tag | Backend | Weights | GPU target | Context | VRAM (approx) | Status | Notes | |----------|---------|---------|------------|---------|----------------|--------|-------| -| `moltbot-glm47-flash-awq-4bit` | vLLM | `cyankiwi/GLM-4.7-Flash-AWQ-4bit` | A100 80GB | 114k | ~75GB | Working | Best value on A100; long context | -| `moltbot-glm47-flash-fp16` | vLLM | `zai-org/GLM-4.7-Flash` | H100/A100 80GB | 32k-64k | ~56GB+ | Working | Full precision | -| `moltbot-glm47-flash-gguf` | llama.cpp | `unsloth/GLM-4.7-Flash-GGUF` (Q4_K_M) | RTX 5090 32GB | 200k | ~28GB | Working | Recommended for 5090 | -| `moltbot-glm47-flash-nvfp4-5090` | vLLM | `GadflyII/GLM-4.7-Flash-NVFP4` | RTX 5090 32GB | 200k | ~30GB | Not working | vLLM MLA issues on Blackwell | -| `moltbot-glm47-reap-w4a16` | vLLM | `0xSero/GLM-4.7-REAP-40-W4A16` | B200 180GB | 32k | ~108GB | Working | High-end B200 | -| `moltbot-vllm` | vLLM | `Qwen/Qwen2.5-Coder-7B-Instruct` | 16GB+ | 16k | ~16GB | Working | Base image | +| `openclaw-glm47-flash-awq-4bit` | vLLM | `cyankiwi/GLM-4.7-Flash-AWQ-4bit` | A100 80GB | 114k | ~75GB | Working | Best value on A100; long context | +| `openclaw-glm47-flash-fp16` | vLLM | `zai-org/GLM-4.7-Flash` | H100/A100 80GB | 32k-64k | ~56GB+ | Working | Full precision | +| `openclaw-glm47-flash-gguf` | llama.cpp | `unsloth/GLM-4.7-Flash-GGUF` (Q4_K_M) | RTX 5090 32GB | 200k | ~28GB | Working | Recommended for 5090 | +| `openclaw-glm47-flash-nvfp4-5090` | vLLM | `GadflyII/GLM-4.7-Flash-NVFP4` | RTX 5090 32GB | 200k | ~30GB | Not working | vLLM MLA issues on Blackwell | +| `openclaw-glm47-reap-w4a16` | vLLM | `0xSero/GLM-4.7-REAP-40-W4A16` | B200 180GB | 32k | ~108GB | Working | High-end B200 | +| `openclaw-vllm` | vLLM | `Qwen/Qwen2.5-Coder-7B-Instruct` | 16GB+ | 16k | ~16GB | Working | Base image | Notes: - Context values are defaults; some variants allow tuning via `MAX_MODEL_LEN`. @@ -25,7 +25,7 @@ Notes: - Ports: `8000/http, 18789/http, 22/tcp` 3. **Set environment variables**: - `VLLM_API_KEY` (for vLLM variants) - - `MOLTBOT_WEB_PASSWORD` (web UI token) + - `OPENCLAW_WEB_PASSWORD` (web UI token) - `HF_TOKEN` (optional, faster downloads) - `TELEGRAM_BOT_TOKEN` (optional) - For GGUF + llama.cpp: use `LLAMA_API_KEY` instead of `VLLM_API_KEY` @@ -42,7 +42,7 @@ curl http://localhost:8000/health | `models/` | Model-specific Dockerfiles + entrypoints | | `scripts/` | Base entrypoint + setup helpers | | `templates/` | RunPod template JSONs | -| `config/` | Moltbot config templates | +| `config/` | OpenClaw config templates | ## Build + release @@ -59,6 +59,6 @@ Images build on: ## Resources -- Moltbot: https://github.com/moltbot/moltbot +- OpenClaw: https://github.com/openclaw/openclaw - vLLM: https://docs.vllm.ai/ - RunPod: https://docs.runpod.io/ diff --git a/config/moltbot.json b/config/openclaw.json similarity index 88% rename from config/moltbot.json rename to config/openclaw.json index f12195e..b31b116 100644 --- a/config/moltbot.json +++ b/config/openclaw.json @@ -1,10 +1,9 @@ { - "$schema": "https://clawd.bot/schema/config.json", - "_comment": "Moltbot configuration for RunPod vLLM integration", + "_comment": "OpenClaw configuration for RunPod vLLM integration", "_instructions": [ "Replace with your RunPod pod ID", "Replace with your vLLM API key", - "Adjust model settings based on your tier (see templates/moltbot-vllm.json)" + "Adjust model settings based on your tier (see templates/openclaw-vllm.json)" ], "agents": { diff --git a/config/workspace/AGENTS.md b/config/workspace/AGENTS.md index 5d1a3fc..d6ded55 100644 --- a/config/workspace/AGENTS.md +++ b/config/workspace/AGENTS.md @@ -1,4 +1,4 @@ -# AGENTS.md - Moltbot Workspace +# AGENTS.md - OpenClaw Workspace This folder is the assistant's working directory. diff --git a/config/workspace/IDENTITY.md b/config/workspace/IDENTITY.md index 73d2de7..554aa69 100644 --- a/config/workspace/IDENTITY.md +++ b/config/workspace/IDENTITY.md @@ -1,6 +1,6 @@ # Identity -You are a helpful Moltbot AI coding assistant running on RunPod with a local LLM. +You are a helpful OpenClaw AI coding assistant running on RunPod with a local LLM. You can help with: - Writing and debugging code - Explaining programming concepts diff --git a/docker-compose.yml b/docker-compose.yml index 666f1d5..74545db 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -# docker-compose.yml - Local development setup for Moltbot + vLLM +# docker-compose.yml - Local development setup for OpenClaw + vLLM # Note: This is for local testing only. For production, use RunPod. version: "3.8" @@ -7,7 +7,7 @@ services: # vLLM Server - requires NVIDIA GPU with sufficient VRAM vllm: image: vllm/vllm-openai:v0.12.0 - container_name: moltbot-vllm + container_name: openclaw-vllm runtime: nvidia deploy: resources: @@ -46,7 +46,7 @@ services: # Mock vLLM for testing without GPU (uses smaller model) vllm-mock: image: vllm/vllm-openai:v0.12.0 - container_name: moltbot-vllm-mock + container_name: openclaw-vllm-mock profiles: ["mock"] ports: - "8001:8000" @@ -64,7 +64,7 @@ services: # Test runner tests: image: curlimages/curl:latest - container_name: moltbot-tests + container_name: openclaw-tests profiles: ["test"] depends_on: vllm: @@ -84,4 +84,4 @@ volumes: networks: default: - name: moltbot-network + name: openclaw-network diff --git a/docs/openclaw-migration-plan.md b/docs/openclaw-migration-plan.md new file mode 100644 index 0000000..c36ea05 --- /dev/null +++ b/docs/openclaw-migration-plan.md @@ -0,0 +1,75 @@ +# OpenClaw Migration Plan (RunPod Images) + +## Background & upstream signals + +From the upstream OpenClaw project: +- The repository is now `openclaw/openclaw`, and the CLI shown in the README is `openclaw`. +- Install guidance includes `npm install -g openclaw@latest` and the one‑liner `curl -fsSL https://openclaw.ai/install.sh | bash`. +- The OpenClaw README documents new default paths: + - Config file: `~/.openclaw/openclaw.json` + - Workspace root: `~/.openclaw/workspace` + +Sources: +- https://github.com/openclaw/openclaw (README) +- https://openclaw.ai (installer + quick start) + +## Repo scan findings (current state) + +The repo still referenced legacy names and paths in many places before migration: +- Dockerfiles: base image installs, labels, ENVs, entrypoint banners +- Entrypoints: legacy CLI names and legacy state dir paths +- Docs: `README.md`, model READMEs, `docs/video-script.md` +- Templates: `templates/runpod-template.json`, `templates/openclaw-vllm.json` +- Config: `config/openclaw.json`, `config/workspace/IDENTITY.md` +- Scripts: `scripts/entrypoint.sh`, `scripts/setup-openclaw.sh` +- Env examples: `.env.example` + +No `OpenClaw` references exist yet in the repo. + +## Decisions (no legacy) + +1. **Package + binary naming** + - Install `openclaw@latest`. + - Use `openclaw` CLI only (no legacy binaries or symlinks). + +2. **State directory** + - Use `/workspace/.openclaw` as the only state directory in containers. + +3. **Config file name** + - Use `openclaw.json` only. + +## Migration plan (proposed steps) + +### 1) Dependency + CLI alignment +- Update Dockerfiles to install `openclaw@latest`. +- Use `openclaw` as the only CLI. + +### 2) State dir and workspace setup +- Use `/workspace/.openclaw` for all state. +- Create expected subdirectories (`agents/main/sessions`, `credentials`) and enforce permissions. + +### 3) Config generation + naming +- Generate `openclaw.json` with OpenAI‑compatible provider settings for the local model. +- Run `openclaw doctor --fix` to auto‑migrate schema after config write. + +### 4) Rename commands and docs +- Update all scripts/entrypoints to call `openclaw`. +- Replace docs and templates to use “OpenClaw” branding and new paths. +- Update README tables and sample image tags if the Docker repo/name changes. + +### 5) Environment variables and config keys +- Standardize on `OPENCLAW_STATE_DIR`, `OPENCLAW_WORKSPACE`, `OPENCLAW_WEB_PASSWORD`. +- Reflect in `.env.example` and RunPod templates. + +### 6) Validation +- Build images for each model variant. +- Smoke test: + - `openclaw doctor --fix` works + - `openclaw gateway` starts + - Web UI reachable via RunPod proxy + - Model inference via `/v1/chat/completions` +- Confirm the state dir and workspace are created under `/workspace/.openclaw`. + +## Open questions + +- Should image tags be renamed immediately or keep existing tags for continuity? diff --git a/docs/video-script.md b/docs/video-script.md index 3fcc4cb..4d4275e 100644 --- a/docs/video-script.md +++ b/docs/video-script.md @@ -1,4 +1,4 @@ -# Video Script: Moltbot fully self-hosted on RTX 5090 (GLM‑4.7‑Flash GGUF + llama.cpp) +# Video Script: OpenClaw fully self-hosted on RTX 5090 (GLM‑4.7‑Flash GGUF + llama.cpp) This doc turns the repo learnings into a demo-first video script for two audiences: @@ -42,26 +42,26 @@ If you want a single AA page on screen as a citation backdrop, use a comparison **You say**: People call these “self-hosted agents”… but then the brain is still a paid API. If your agent stops working the second Claude is down or your token budget runs out, that’s not self-hosted. -Today I’ll show a fully self-contained Moltbot setup: local model, local inference, agent UI—no external model API needed. +Today I’ll show a fully self-contained OpenClaw setup: local model, local inference, agent UI—no external model API needed. ### 0:25–0:55 — What you’ll build + requirements (set expectations) -**On screen**: one slide: “Moltbot + GLM‑4.7‑Flash + llama.cpp (OpenAI API)”. +**On screen**: one slide: “OpenClaw + GLM‑4.7‑Flash + llama.cpp (OpenAI API)”. **You say**: -We’re running GLM‑4.7‑Flash locally via llama.cpp and pointing Moltbot at it using an OpenAI-compatible API. +We’re running GLM‑4.7‑Flash locally via llama.cpp and pointing OpenClaw at it using an OpenAI-compatible API. If you’ve got an RTX 5090 (32GB), you can run the full 200k context. With 24GB, it can still work, just with a reduced context window—because the model weights alone are ~17GB. ### 0:55–2:10 — Quick demo first (prove it works before you explain anything) **On screen**: -- Open Moltbot web UI +- Open OpenClaw web UI - Show the agent doing a quick code task (small repo change / explanation) - Show a raw API call to the model (`/v1/chat/completions`) **You say**: -Let me prove it’s real before we talk architecture. This is Moltbot running against a model in the same environment. No Claude key. No OpenAI key. +Let me prove it’s real before we talk architecture. This is OpenClaw running against a model in the same environment. No Claude key. No OpenAI key. If you’re using Telegram integration, the same idea applies: messages go to a local model, not a hosted API. @@ -82,12 +82,12 @@ You’ve got two options: **You say (walkthrough voice)**: Here’s the setup that actually matters: -- **Image**: `runpod/moltbot-glm47-flash-gguf:latest` -- **Ports**: `8000/http` (llama.cpp), `18789/http` (Moltbot UI), `22/tcp` (SSH) +- **Image**: `runpod/openclaw-glm47-flash-gguf:latest` +- **Ports**: `8000/http` (llama.cpp), `18789/http` (OpenClaw UI), `22/tcp` (SSH) - **Network volume mounted to `/workspace`** (non-negotiable; model is ~17GB and you want persistence across restarts) - **Environment variables**: - `LLAMA_API_KEY` (protects the model API) - - `MOLTBOT_WEB_PASSWORD` (protects the web UI token) + - `OPENCLAW_WEB_PASSWORD` (protects the web UI token) - optionally `TELEGRAM_BOT_TOKEN` (Telegram) ### 5:30–6:40 — Health check + raw chat completion (OpenAI-compat API) @@ -95,7 +95,7 @@ Here’s the setup that actually matters: **On screen**: terminal showing `curl` to `/health` then `/v1/chat/completions`. **You say**: -llama.cpp runs an OpenAI-compatible API. That’s the trick: Moltbot doesn’t need to know it’s llama.cpp. +llama.cpp runs an OpenAI-compatible API. That’s the trick: OpenClaw doesn’t need to know it’s llama.cpp. **Show (copy/paste):** @@ -112,9 +112,9 @@ First time you open the web UI, it won’t just let any browser control your age **On screen (commands):** - List requests: - - `MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices list` + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram` - Approve: - - `MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices approve ` + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ` **You say**: This is the right default for something that can run commands and touch repos. @@ -143,7 +143,7 @@ Quick callout list (keep it fast): Architecture is simple: - llama.cpp (`llama-server`) hosts the model and exposes OpenAI-style endpoints on `:8000` -- Moltbot points its provider config at `http://localhost:8000/v1` +- OpenClaw points its provider config at `http://localhost:8000/v1` - The container stores everything under `/workspace` so restarts don’t wipe model + state Then the “why it fits”: diff --git a/models/glm47-flash-awq-4bit/Dockerfile b/models/glm47-flash-awq-4bit/Dockerfile index 561b21d..21ad16d 100644 --- a/models/glm47-flash-awq-4bit/Dockerfile +++ b/models/glm47-flash-awq-4bit/Dockerfile @@ -34,8 +34,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Install Moltbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force # Environment defaults @@ -45,9 +45,9 @@ ENV SERVED_MODEL_NAME="glm-4.7-flash" ENV MAX_MODEL_LEN="114688" ENV VLLM_API_KEY="changeme" -# Moltbot workspace (legacy paths kept for compatibility) -ENV MOLTBOT_HOME="/workspace/.clawdbot" -ENV MOLTBOT_WORKSPACE="/workspace/clawd" +# OpenClaw workspace +ENV OPENCLAW_STATE_DIR="/workspace/.openclaw" +ENV OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-awq-4bit/README.md b/models/glm47-flash-awq-4bit/README.md index f8535df..c9451f9 100644 --- a/models/glm47-flash-awq-4bit/README.md +++ b/models/glm47-flash-awq-4bit/README.md @@ -22,7 +22,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/moltbot-glm47-flash-awq-4bit:latest` +- **Image**: `runpod/openclaw-glm47-flash-awq-4bit:latest` - **GPU**: 1x A100 80GB - **Volume**: 150GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -37,7 +37,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token for chat integration | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `MOLTBOT_WEB_PASSWORD` | No | `moltbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `openclaw` | Password for web UI | ### 3. Access Points @@ -46,7 +46,7 @@ After the pod starts (~90 seconds for cached starts, longer for first start): | Service | URL | Auth | |---------|-----|------| | vLLM API | `https://-8000.proxy.runpod.net` | Bearer token (`VLLM_API_KEY`) | -| Web UI | `https://-18789.proxy.runpod.net` | Password (`MOLTBOT_WEB_PASSWORD`) | +| Web UI | `https://-18789.proxy.runpod.net` | Password (`OPENCLAW_WEB_PASSWORD`) | | SSH | `ssh root@ -p ` | SSH key | ### 4. Test It @@ -87,12 +87,12 @@ All persistent data is stored on the network volume `/workspace`: ├── .cache/ │ ├── vllm/ # CUDA graphs & torch compile cache (~400MB) │ └── huggingface/ # HF cache -├── .clawdbot/ # Legacy Moltbot state path -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # Agent state │ └── telegram/ # Telegram session ├── .config/gh/ # GitHub CLI config -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` **Startup times:** @@ -101,9 +101,9 @@ All persistent data is stored on the network volume `/workspace`: ## Web UI -Access the Moltbot web UI at `https://-18789.proxy.runpod.net`: +Access the OpenClaw web UI at `https://-18789.proxy.runpod.net`: -1. Enter the password (default: `moltbot` or your `MOLTBOT_WEB_PASSWORD`) +1. Enter the password (default: `openclaw` or your `OPENCLAW_WEB_PASSWORD`) 2. Chat with the model through the web interface 3. No CLI access required @@ -196,7 +196,7 @@ pkill -9 -f vllm **Web UI won't connect:** - Ensure port 18789 is exposed -- Check that gateway is running: `ps aux | grep moltbot` +- Check that gateway is running: `ps aux | grep openclaw` - Verify bind mode is `lan` in config **Model download fails:** diff --git a/models/glm47-flash-awq-4bit/entrypoint.sh b/models/glm47-flash-awq-4bit/entrypoint.sh index d43bf7b..7b86333 100644 --- a/models/glm47-flash-awq-4bit/entrypoint.sh +++ b/models/glm47-flash-awq-4bit/entrypoint.sh @@ -48,16 +48,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-114688}" -MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Moltbot control panel -MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" +# Web UI token/password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" -fi +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -104,11 +102,13 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Moltbot config -mkdir -p "$MOLTBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then - echo "Creating Moltbot config (legacy clawdbot.json)..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -117,8 +117,8 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - moltbot doctor will fix any missing fields - cat > "$MOLTBOT_HOME/clawdbot.json" << EOF + # Create a minimal config - openclaw doctor will fix any missing fields + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -141,7 +141,8 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 98304 + "contextTokens": 98304, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -149,17 +150,19 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Moltbot version's schema -echo "Running moltbot doctor to validate/fix config..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -179,19 +182,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Moltbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Moltbot gateway..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" gateway --auth password --password "$MOLTBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " vLLM API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $MOLTBOT_WEB_PASSWORD" +echo " Web UI Password: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens" echo "================================================" diff --git a/models/glm47-flash-fp16/Dockerfile b/models/glm47-flash-fp16/Dockerfile index 8375223..d1d62b6 100644 --- a/models/glm47-flash-fp16/Dockerfile +++ b/models/glm47-flash-fp16/Dockerfile @@ -33,18 +33,19 @@ RUN uv pip install --system -U vllm \ --extra-index-url https://wheels.vllm.ai/nightly && \ uv pip install --system git+https://github.com/huggingface/transformers.git -# Install Moltbot -RUN npm install -g --prefer-offline moltbot@latest && \ +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Keep model files on container disk (requires 100GB) -# Only use workspace for persistent Moltbot state -RUN mkdir -p /workspace/.clawdbot /workspace/clawd +# Only use workspace for persistent OpenClaw state +RUN mkdir -p /workspace/.openclaw /workspace/openclaw # Environment variables -# HF_HOME on container disk (100GB needed), Moltbot state on workspace +# HF_HOME on container disk (100GB needed), OpenClaw state on workspace ENV HF_HOME=/root/.cache/huggingface -ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=zai-org/GLM-4.7-Flash ENV SERVED_MODEL_NAME=glm-4.7-flash ENV VLLM_API_KEY=changeme @@ -58,7 +59,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Moltbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-flash-fp16/README.md b/models/glm47-flash-fp16/README.md index 63585e0..780f831 100644 --- a/models/glm47-flash-fp16/README.md +++ b/models/glm47-flash-fp16/README.md @@ -29,7 +29,7 @@ Best quality with auto-detected context based on GPU. ### 1. Create RunPod Pod -- **Image**: `yourusername/moltbot-glm47-flash-fp16:latest` +- **Image**: `yourusername/openclaw-glm47-flash-fp16:latest` - **GPU**: 1x H100 80GB or A100 80GB - **Volume**: 50GB at `/workspace` - **Container Disk**: 100GB (model stored here) @@ -67,11 +67,11 @@ Model is stored on container disk (100GB required), state persists on workspace ``` /root/.cache/huggingface/ # Model files (container disk) /workspace/ -├── .clawdbot/ # Legacy Moltbot state path -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## vLLM Configuration diff --git a/models/glm47-flash-fp16/entrypoint.sh b/models/glm47-flash-fp16/entrypoint.sh index ee236e4..122cb51 100644 --- a/models/glm47-flash-fp16/entrypoint.sh +++ b/models/glm47-flash-fp16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-Flash FP16 + Moltbot startup script +# entrypoint.sh - GLM-4.7-Flash FP16 + OpenClaw startup script set -e echo "============================================" -echo " GLM-4.7-Flash FP16 + Moltbot Startup" +echo " GLM-4.7-Flash FP16 + OpenClaw Startup" echo "============================================" echo "" echo "IMPORTANT: This requires vLLM NIGHTLY (not PyPI stable)!" @@ -50,9 +50,11 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.92}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm47}" # Keep model on container disk (requires 100GB containerDiskInGb) HF_HOME="${HF_HOME:-/root/.cache/huggingface}" -MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" # Auto-detect optimal context if not explicitly set if [ -z "$MAX_MODEL_LEN" ]; then @@ -63,13 +65,10 @@ else fi export HF_HOME -export MOLTBOT_STATE_DIR +export OPENCLAW_STATE_DIR export MAX_MODEL_LEN -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" -fi +BOT_CMD="openclaw" # Set CUDA 13.1 paths for B200 (no-op on other GPUs if not installed) if [ -d "/usr/local/cuda-13.1" ]; then @@ -80,7 +79,10 @@ if [ -d "/usr/local/cuda-13.1" ]; then fi # Ensure directories exist (HF cache on container disk, state on workspace) -mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Configure GitHub CLI # Priority: 1) GITHUB_TOKEN env var, 2) Persisted config in /workspace/.config/gh @@ -120,9 +122,9 @@ if command -v nvcc &> /dev/null; then fi echo "" -# Initialize Moltbot config if not exists -if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Moltbot configuration (legacy clawdbot.json)..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -140,12 +142,12 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then # Reserve tokens for compaction: 15% of context RESERVE_TOKENS=$((MAX_MODEL_LEN * 15 / 100)) - cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd", + "workspace": "/workspace/openclaw", "contextTokens": ${CONTEXT_TOKENS}, "systemPrompt": "Be concise and direct. Avoid unnecessary verbosity.", "compaction": { @@ -180,15 +182,17 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi # Build vLLM command @@ -237,10 +241,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Moltbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Moltbot gateway..." -MOLTBOT_STATE_DIR=$MOLTBOT_STATE_DIR "$BOT_CMD" gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -248,7 +252,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index d8548a4..e4b8fc7 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -66,10 +66,9 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install huggingface_hub for model downloads (using Python API, not CLI) RUN python3 -m pip install --no-cache-dir huggingface_hub -# Install Clawdbot and Claude Code (moltbot binary is a symlink) -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ - npm cache clean --force && \ - ln -s "$(npm config get prefix)/bin/clawdbot" "$(npm config get prefix)/bin/moltbot" +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ + npm cache clean --force WORKDIR / @@ -80,8 +79,8 @@ ENV MODEL_NAME="unsloth/GLM-4.7-Flash-GGUF" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ LLAMA_API_KEY="changeme" \ - MOLTBOT_HOME="/workspace/.clawdbot" \ - MOLTBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-gguf-llamacpp/README.md b/models/glm47-flash-gguf-llamacpp/README.md index 51ebb96..55249b9 100644 --- a/models/glm47-flash-gguf-llamacpp/README.md +++ b/models/glm47-flash-gguf-llamacpp/README.md @@ -23,7 +23,7 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja - **200k context** - Full model capacity on 32GB GPU - **Q8 KV cache quantization** - Fits 200k context in VRAM -- **OpenAI-compatible API** - Works with Moltbot, Claude Code, etc. +- **OpenAI-compatible API** - Works with OpenClaw, Claude Code, etc. - **Native chat template** - Uses `--jinja` for correct GLM-4.7 formatting ## Runpod Deployment @@ -33,26 +33,26 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja 1. **Add your SSH key** to [Runpod Account Settings → SSH Public Keys](https://www.runpod.io/console/user/settings) (required for device pairing later). If you don't have an SSH key, follow the [Runpod SSH guide](https://docs.runpod.io/pods/configuration/use-ssh). 2. **Create a Pod** with: - - Image: `runpod/moltbot-glm47-flash-gguf:latest` + - Image: `runpod/openclaw-glm47-flash-gguf:latest` - GPU: RTX 5090 (or any 32GB+ GPU) - Ports: `8000/http`, `18789/http`, `22/tcp` - Network Volume: **30GB minimum**, mounted to `/workspace` - Required for model download (~17GB) and config persistence - Without a network volume, data is lost on pod restart - Environment Variables: - - `MOLTBOT_WEB_PASSWORD` - Token for Web UI (default: `moltbot`) + - `OPENCLAW_WEB_PASSWORD` - Token for Web UI (default: `openclaw`) - `LLAMA_API_KEY` - API key for llama.cpp (default: `changeme`) 3. **Wait for startup** - First launch downloads the model (~17GB), which takes a few minutes. Check pod logs for progress. 4. **Access the Web UI**: ``` - https://-18789.proxy.runpod.net/?token= + https://-18789.proxy.runpod.net/?token= ``` ### First-Time Device Pairing -Moltbot requires device pairing for security. On first access, you'll see "pairing required". +OpenClaw requires device pairing for security. On first access, you'll see "pairing required". **To approve your browser:** @@ -61,10 +61,10 @@ Moltbot requires device pairing for security. On first access, you'll see "pairi ssh root@ -p # List pending pairing requests -MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices list +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram # Approve your device (use the Request ID from the list) -MOLTBOT_STATE_DIR=/workspace/.clawdbot moltbot devices approve +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ``` After approval, refresh the Web UI - it will work permanently for that browser. @@ -74,7 +74,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | Port | Service | |------|---------| | 8000 | llama.cpp API (OpenAI-compatible) | -| 18789 | Moltbot Web UI | +| 18789 | OpenClaw Web UI | | 22 | SSH | ## Environment Variables @@ -84,7 +84,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | `MODEL_FILE` | `GLM-4.7-Flash-Q4_K_M.gguf` | GGUF file to use | | `MAX_MODEL_LEN` | `200000` | Context length | | `LLAMA_API_KEY` | `changeme` | API authentication | -| `MOLTBOT_WEB_PASSWORD` | `moltbot` | Web UI token | +| `OPENCLAW_WEB_PASSWORD` | `openclaw` | Web UI token | | `TELEGRAM_BOT_TOKEN` | - | Optional Telegram integration | | `GITHUB_TOKEN` | - | Optional GitHub CLI auth | @@ -92,13 +92,13 @@ After approval, refresh the Web UI - it will work permanently for that browser. ```bash # Build -docker build -t moltbot-glm47-gguf-llamacpp . +docker build -t openclaw-glm47-gguf-llamacpp . # Run on RTX 5090 docker run --gpus all -p 8000:8000 -p 18789:18789 \ -v /path/to/workspace:/workspace \ -e LLAMA_API_KEY=your-key \ - moltbot-glm47-gguf-llamacpp + openclaw-glm47-gguf-llamacpp ``` ## API Usage diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index ffbade5..afbd24a 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -77,17 +77,16 @@ fi LLAMA_API_KEY="${LLAMA_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" -CLAWDBOT_STATE_DIR="$MOLTBOT_HOME" -MOLTBOT_STATE_DIR="$MOLTBOT_HOME" -export CLAWDBOT_STATE_DIR MOLTBOT_STATE_DIR +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" +export OPENCLAW_STATE_DIR OPENCLAW_WORKSPACE TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" -BOT_CMD="moltbot" +BOT_CMD="openclaw" if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - echo "ERROR: moltbot command not found in PATH" + echo "ERROR: openclaw command not found in PATH" echo "PATH=$PATH" echo "Container staying alive for debugging." sleep infinity @@ -138,13 +137,13 @@ if [ $WAITED -ge $MAX_WAIT ]; then echo "Container will stay running for debugging." fi -# Setup Moltbot config -mkdir -p "$MOLTBOT_HOME" "$MOLTBOT_HOME/agents/main/sessions" "$MOLTBOT_HOME/credentials" -chmod 700 "$MOLTBOT_HOME" "$MOLTBOT_HOME/agents" "$MOLTBOT_HOME/agents/main" \ - "$MOLTBOT_HOME/agents/main/sessions" "$MOLTBOT_HOME/credentials" 2>/dev/null || true +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then - echo "Creating Moltbot config (legacy clawdbot.json)..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." if [ -n "$TELEGRAM_BOT_TOKEN" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true, \"botToken\": \"${TELEGRAM_BOT_TOKEN}\" }" @@ -152,7 +151,7 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$MOLTBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -175,7 +174,8 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-llamacpp/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 180000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -184,19 +184,19 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then "gateway": { "mode": "local", "bind": "lan", - "auth": { "token": "$MOLTBOT_WEB_PASSWORD" }, - "remote": { "token": "$MOLTBOT_WEB_PASSWORD" } + "auth": { "mode": "token", "token": "$OPENCLAW_WEB_PASSWORD" }, + "remote": { "token": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi # Auto-fix config -echo "Running moltbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix || true -chmod 600 "$MOLTBOT_HOME/clawdbot.json" 2>/dev/null || true +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -216,20 +216,20 @@ fi export OPENAI_API_KEY="$LLAMA_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Moltbot gateway (use token auth for URL parameter support) +# Start OpenClaw gateway (use token auth for URL parameter support) echo "" -echo "Starting Moltbot gateway..." -CLAWDBOT_STATE_DIR=$MOLTBOT_HOME MOLTBOT_GATEWAY_TOKEN="$MOLTBOT_WEB_PASSWORD" \ -"$BOT_CMD" gateway --auth token --token "$MOLTBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR OPENCLAW_GATEWAY_TOKEN="$OPENCLAW_WEB_PASSWORD" \ +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " llama.cpp API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net/?token=$MOLTBOT_WEB_PASSWORD" -echo " Web UI Token: $MOLTBOT_WEB_PASSWORD" +echo " OpenClaw Gateway: ws://localhost:18789" +echo " Web UI: https://-18789.proxy.runpod.net/?token=$OPENCLAW_WEB_PASSWORD" +echo " Web UI Token: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens (200k!)" echo " VRAM: ~28GB / 32GB" diff --git a/models/glm47-flash-nvfp4-5090/Dockerfile b/models/glm47-flash-nvfp4-5090/Dockerfile index 4ad3a82..4869d22 100644 --- a/models/glm47-flash-nvfp4-5090/Dockerfile +++ b/models/glm47-flash-nvfp4-5090/Dockerfile @@ -53,7 +53,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install tools RUN uv pip install --system "huggingface_hub[cli]" && \ - npm install -g --prefer-offline @anthropic-ai/claude-code moltbot && \ + npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force WORKDIR / @@ -63,8 +63,8 @@ ENV MODEL_NAME="GadflyII/GLM-4.7-Flash-NVFP4" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ VLLM_API_KEY="changeme" \ - MOLTBOT_HOME="/workspace/.clawdbot" \ - MOLTBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh benchmark.sh / RUN chmod +x /entrypoint.sh /benchmark.sh diff --git a/models/glm47-flash-nvfp4-5090/ISSUES.md b/models/glm47-flash-nvfp4-5090/ISSUES.md index 10a4d42..5ebb642 100644 --- a/models/glm47-flash-nvfp4-5090/ISSUES.md +++ b/models/glm47-flash-nvfp4-5090/ISSUES.md @@ -7,7 +7,16 @@ ## Summary -Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM 0.14.0 on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture not being properly supported by vLLM's TransformersMoE fallback. +Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture and SM120 kernel support. + +## Upstream status (as of 2026-01-29) + +- vLLM Issue #32109 is **closed** and was closed by PR #33285. +- PR #33285 **restricts** FP8 MoE CUTLASS backend to SM90/SM100 (does not add SM120 MoE support). +- PR #32237 (SM120 FP8 MoE support) was **closed and not merged**. +- vLLM now includes `Glm4MoeLiteForCausalLM` support, but NVFP4 on SM120 is still unverified. + +Net: there is no confirmed upstream fix for NVFP4 + GLM-4.7 on RTX 5090 yet. ## Issues Encountered @@ -53,7 +62,7 @@ Actual: 5 heads × 256 = 1280 vLLM's attention produces wrong output dimensions for GLM-4.7's MLA architecture. -**Status**: UNRESOLVED - requires native Glm4MoeLite support in vLLM +**Status**: UNRESOLVED - still reproduced on SM120 ### 5. SGLang cuDNN Error @@ -70,7 +79,7 @@ cuDNN doesn't support NVFP4 GEMM on Blackwell SM120. ``` GPU: RTX 5090 (Blackwell SM120, 32GB) CUDA: 12.8 -vLLM: 0.14.0 (nightly) +vLLM: 0.14.x (nightly at the time) Transformers: 5.0.0.dev0 Model: GadflyII/GLM-4.7-Flash-NVFP4 ``` @@ -78,9 +87,92 @@ Model: GadflyII/GLM-4.7-Flash-NVFP4 ## When to Retry Check these before retrying: -1. vLLM has native `Glm4MoeLiteForCausalLM` (not TransformersMoE fallback) -2. vLLM Issue #32109 resolved -3. NVIDIA cuDNN Blackwell FP4 support +1. vLLM has native `Glm4MoeLiteForCausalLM` path for GLM-4.7 in production builds +2. SM120 FP8 MoE kernels are supported (not just gated off) +3. NVIDIA cuDNN Blackwell FP4 support is available + +## Known working nightly tag (from upstream reports) + +Community reports in vLLM Issue #32109 mention the following as working at the time: +- `docker.io/vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + +Reported commit window: +- last working: `ffc0a2798b118f7ceb21645df59d2bfdfc461d42` +- first broken: `5dcd7ef1f219068e6b6be5b614bc43978f028651` + +These are historical references for retesting. + +## Verification plan (recommended) + +1. Baseline: run the known working nightly image above with NVFP4 and confirm it still starts. +2. Candidate: run the latest vLLM release or nightly (v0.15.x) with the same config. +3. Compare logs for MLA mismatch or SM120 kernel selection errors. +4. Record results here and update status. + +## RunPod test checklist (NVFP4, no custom image) + +Goal: validate NVFP4 on RTX 5090 using official vLLM images (no custom build). + +### 1) Create a pod +- GPU: RTX 5090 32GB +- Volume: 100GB+ mounted at `/workspace` +- Ports: `8000/http`, `22/tcp` +- Image: use one of the two images below: + - Baseline (reported working): `vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + - Candidate (latest): `vllm/vllm-openai:latest` + +### 2) Environment variables +- `HF_TOKEN` (optional but recommended) +- `VLLM_API_KEY` (required) +- `MODEL_NAME=GadflyII/GLM-4.7-Flash-NVFP4` +- `SERVED_MODEL_NAME=glm-4.7-flash` +- `MAX_MODEL_LEN=200000` +- `TOOL_CALL_PARSER=glm47` +- `REASONING_PARSER=glm45` +- `GPU_MEMORY_UTILIZATION=0.95` +- `HF_HOME=/workspace/huggingface` + +### 3) Start command +Use the same command for both baseline and candidate images: +``` +vllm serve ${MODEL_NAME} \ + --host 0.0.0.0 \ + --port 8000 \ + --max-model-len ${MAX_MODEL_LEN} \ + --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION} \ + --served-model-name ${SERVED_MODEL_NAME} \ + --api-key ${VLLM_API_KEY} \ + --enable-auto-tool-choice \ + --tool-call-parser ${TOOL_CALL_PARSER} \ + --reasoning-parser ${REASONING_PARSER} +``` + +### 4) Health check +``` +curl http://localhost:8000/health +``` + +### 5) Minimal chat test +``` +curl http://localhost:8000/v1/chat/completions \ + -H "Authorization: Bearer ${VLLM_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-4.7-flash", + "messages": [{"role": "user", "content": "Hello!"}], + "max_tokens": 64 + }' +``` + +### 6) Log triage (what to watch for) +- `No compiled cutlass_scaled_mm for CUDA device capability: 120` +- `mat1 and mat2 shapes cannot be multiplied` (MLA mismatch) +- CUDA graph or cuDNN errors on SM120 + +### 7) Record results +- Image tag used +- vLLM version reported in logs +- Pass/fail and error signatures ## Working Alternative diff --git a/models/glm47-flash-nvfp4-5090/README.md b/models/glm47-flash-nvfp4-5090/README.md index 0eb8c8d..d674f01 100644 --- a/models/glm47-flash-nvfp4-5090/README.md +++ b/models/glm47-flash-nvfp4-5090/README.md @@ -39,7 +39,7 @@ Full 200K context window with MLA for reduced KV cache memory. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/moltbot-glm47-flash-nvfp4-5090:latest` +- **Image**: `runpod/openclaw-glm47-flash-nvfp4-5090:latest` - **GPU**: 1x RTX 5090 32GB - **Volume**: 100GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -54,7 +54,7 @@ Full 200K context window with MLA for reduced KV cache memory. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `MOLTBOT_WEB_PASSWORD` | No | `moltbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `openclaw` | Password for web UI | ### 3. Test It diff --git a/models/glm47-flash-nvfp4-5090/entrypoint.sh b/models/glm47-flash-nvfp4-5090/entrypoint.sh index 8ca4c21..9685a85 100644 --- a/models/glm47-flash-nvfp4-5090/entrypoint.sh +++ b/models/glm47-flash-nvfp4-5090/entrypoint.sh @@ -75,16 +75,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -MOLTBOT_HOME="${MOLTBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Moltbot control panel -MOLTBOT_WEB_PASSWORD="${MOLTBOT_WEB_PASSWORD:-moltbot}" +# Web UI password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" -fi +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -133,11 +131,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Moltbot config -mkdir -p "$MOLTBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then - echo "Creating Moltbot config (legacy clawdbot.json)..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -146,9 +147,9 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - moltbot doctor will fix any missing fields + # Create a minimal config - openclaw doctor will fix any missing fields # contextTokens: 180000 leaves room for output within 200K context - cat > "$MOLTBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -171,7 +172,8 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 180000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -179,17 +181,19 @@ if [ ! -f "$MOLTBOT_HOME/clawdbot.json" ]; then }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Moltbot version's schema -echo "Running moltbot doctor to validate/fix config..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" doctor --fix 2>/dev/null || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix 2>/dev/null || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -209,19 +213,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Moltbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Moltbot gateway..." -MOLTBOT_STATE_DIR=$MOLTBOT_HOME "$BOT_CMD" gateway --auth password --password "$MOLTBOT_WEB_PASSWORD" 2>/dev/null & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" 2>/dev/null & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready! (RTX 5090 Blackwell SM120)" echo " vLLM API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $MOLTBOT_WEB_PASSWORD" +echo " Web UI Password: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME (NVFP4)" echo " Context: $MAX_MODEL_LEN tokens" echo " Cost: ~\$0.89/hr (36% savings vs A100)" diff --git a/models/glm47-reap-w4a16/Dockerfile b/models/glm47-reap-w4a16/Dockerfile index 58c74d6..6da889f 100644 --- a/models/glm47-reap-w4a16/Dockerfile +++ b/models/glm47-reap-w4a16/Dockerfile @@ -30,13 +30,14 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Install vLLM nightly (required for GLM-4.7 support) RUN uv pip install --system -U vllm --pre -# Install Moltbot -RUN npm install -g --prefer-offline moltbot@latest && \ +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Environment variables ENV HF_HOME=/workspace/huggingface -ENV MOLTBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=0xSero/GLM-4.7-REAP-40-W4A16 ENV SERVED_MODEL_NAME=glm-4.7-reap ENV VLLM_API_KEY=changeme @@ -45,7 +46,7 @@ ENV GPU_MEMORY_UTILIZATION=0.90 ENV TOOL_CALL_PARSER=glm45 # Create directories -RUN mkdir -p /workspace/huggingface /workspace/.clawdbot /workspace/clawd +RUN mkdir -p /workspace/huggingface /workspace/.openclaw /workspace/openclaw # Copy entrypoint script COPY entrypoint.sh /entrypoint.sh @@ -53,7 +54,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Moltbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-reap-w4a16/README.md b/models/glm47-reap-w4a16/README.md index e32188d..3a472f8 100644 --- a/models/glm47-reap-w4a16/README.md +++ b/models/glm47-reap-w4a16/README.md @@ -21,7 +21,7 @@ High-end option for maximum performance. ### 1. Create RunPod Pod -- **Image**: `yourusername/moltbot-glm47-reap-w4a16:latest` +- **Image**: `yourusername/openclaw-glm47-reap-w4a16:latest` - **GPU**: 1x B200 180GB - **Volume**: 200GB at `/workspace` - **Container Disk**: 50GB @@ -59,11 +59,11 @@ Files persist on network volume `/workspace`: ``` /workspace/ ├── huggingface/ # Model cache -├── .clawdbot/ # Legacy Moltbot state path -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## B200 (Blackwell) Support diff --git a/models/glm47-reap-w4a16/entrypoint.sh b/models/glm47-reap-w4a16/entrypoint.sh index 4411988..4695ff9 100644 --- a/models/glm47-reap-w4a16/entrypoint.sh +++ b/models/glm47-reap-w4a16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-REAP W4A16 + Moltbot startup script for RunPod B200 +# entrypoint.sh - GLM-4.7-REAP W4A16 + OpenClaw startup script for RunPod B200 set -e echo "============================================" -echo " GLM-4.7-REAP W4A16 + Moltbot Startup" +echo " GLM-4.7-REAP W4A16 + OpenClaw Startup" echo "============================================" # Configuration from environment @@ -14,23 +14,25 @@ MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}" GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm45}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" export HF_HOME -export MOLTBOT_STATE_DIR +export OPENCLAW_STATE_DIR export PATH=/usr/local/cuda-13.1/bin:$PATH export CUDA_HOME=/usr/local/cuda-13.1 export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Ensure directories exist -mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" -fi +BOT_CMD="openclaw" # Configure GitHub CLI if [ -n "$GITHUB_TOKEN" ]; then @@ -59,9 +61,9 @@ echo " Tool parser: $TOOL_CALL_PARSER" echo " CUDA: $(nvcc --version | grep release | awk '{print $5}' | tr -d ',')" echo "" -# Initialize Moltbot config if not exists -if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Moltbot configuration (legacy clawdbot.json)..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -70,12 +72,12 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -100,15 +102,17 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi # Build vLLM command @@ -153,10 +157,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Moltbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Moltbot gateway..." -MOLTBOT_STATE_DIR=$MOLTBOT_STATE_DIR "$BOT_CMD" gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -164,7 +168,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index ebe77f4..fb33021 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - Moltbot + vLLM startup script for RunPod +# entrypoint.sh - OpenClaw + vLLM startup script for RunPod set -e echo "============================================" -echo " Moltbot + vLLM Startup" +echo " OpenClaw + vLLM Startup" echo "============================================" # Configuration from environment @@ -15,19 +15,20 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-hermes}" TENSOR_PARALLEL_SIZE="${TENSOR_PARALLEL_SIZE:-auto}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -MOLTBOT_STATE_DIR="${MOLTBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" export HF_HOME -export MOLTBOT_STATE_DIR +export OPENCLAW_STATE_DIR -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" >/dev/null 2>&1; then - BOT_CMD="clawdbot" -fi +BOT_CMD="openclaw" # Ensure directories exist -mkdir -p "$HF_HOME" "$MOLTBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" /workspace/openclaw +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Auto-detect tensor parallel size if [ "$TENSOR_PARALLEL_SIZE" = "auto" ]; then @@ -44,9 +45,9 @@ echo " Tensor parallel: $TENSOR_PARALLEL_SIZE" echo " Tool parser: $TOOL_CALL_PARSER" echo "" -# Initialize Moltbot config if not exists -if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Moltbot configuration (legacy clawdbot.json)..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -55,12 +56,12 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$MOLTBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -85,22 +86,20 @@ if [ ! -f "$MOLTBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "token", "token": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$MOLTBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $MOLTBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi -# Initialize Moltbot workspace if empty -if [ ! -f "/workspace/clawd/AGENTS.md" ]; then - echo "Initializing Moltbot workspace..." - "$BOT_CMD" setup --non-interactive --accept-risk --workspace /workspace/clawd 2>/dev/null || true -fi +# Workspace files are seeded during image build. # Build vLLM command VLLM_CMD="vllm serve $MODEL_NAME" @@ -143,10 +142,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Moltbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Moltbot gateway..." -"$BOT_CMD" gateway & +echo "Starting OpenClaw gateway..." +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -154,7 +153,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Moltbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/setup-moltbot.sh b/scripts/setup-openclaw.sh similarity index 80% rename from scripts/setup-moltbot.sh rename to scripts/setup-openclaw.sh index 0efd285..1c2e0c1 100644 --- a/scripts/setup-moltbot.sh +++ b/scripts/setup-openclaw.sh @@ -1,5 +1,5 @@ #!/bin/bash -# setup-moltbot.sh - Install and configure Moltbot on RunPod +# setup-openclaw.sh - Install and configure OpenClaw on RunPod # Prerequisites: vLLM server running on port 8000 set -e @@ -21,13 +21,13 @@ VLLM_HOST="${VLLM_HOST:-localhost}" VLLM_PORT="${VLLM_PORT:-8000}" VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-qwen3-30b-a3b}" -MOLTBOT_CONFIG_DIR="${MOLTBOT_CONFIG_DIR:-$HOME/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-$HOME/.openclaw}" RUNPOD_POD_ID="${RUNPOD_POD_ID:-}" # Print banner echo "" echo "===========================================" -echo " Moltbot Setup Script" +echo " OpenClaw Setup Script" echo "===========================================" echo "" @@ -55,14 +55,11 @@ if ! command -v npm &> /dev/null; then fi log_info "npm version: $(npm --version)" -# Step 2: Install Moltbot -log_info "Installing Moltbot..." -npm install -g moltbot@latest -BOT_CMD="moltbot" -if ! command -v "$BOT_CMD" &> /dev/null; then - BOT_CMD="clawdbot" -fi -log_success "Moltbot installed: $("$BOT_CMD" --version 2>/dev/null || echo 'version check failed')" +# Step 2: Install OpenClaw +log_info "Installing OpenClaw..." +npm install -g openclaw@latest +BOT_CMD="openclaw" +log_success "OpenClaw installed: $("$BOT_CMD" --version 2>/dev/null || echo 'version check failed')" # Step 3: Wait for vLLM to be ready log_info "Waiting for vLLM server to be ready..." @@ -90,9 +87,9 @@ MODELS_RESPONSE=$(curl -s "http://${VLLM_HOST}:${VLLM_PORT}/v1/models" \ -H "Authorization: Bearer ${VLLM_API_KEY}") echo "Available models: $MODELS_RESPONSE" -# Step 4: Create Moltbot configuration directory -log_info "Creating Moltbot configuration..." -mkdir -p "$MOLTBOT_CONFIG_DIR" +# Step 4: Create OpenClaw configuration directory +log_info "Creating OpenClaw configuration..." +mkdir -p "$OPENCLAW_STATE_DIR" # Determine the base URL for the vLLM endpoint if [ -n "$RUNPOD_POD_ID" ]; then @@ -103,8 +100,8 @@ else VLLM_BASE_URL="http://${VLLM_HOST}:${VLLM_PORT}/v1" fi -# Step 5: Create Moltbot configuration file -cat > "$MOLTBOT_CONFIG_DIR/clawdbot.json" << EOF +# Step 5: Create OpenClaw configuration file +cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { @@ -136,15 +133,15 @@ cat > "$MOLTBOT_CONFIG_DIR/clawdbot.json" << EOF } EOF -log_success "Moltbot configuration created at $MOLTBOT_CONFIG_DIR/clawdbot.json (legacy file name)" +log_success "OpenClaw configuration created at $OPENCLAW_STATE_DIR/openclaw.json" -# Step 6: Test Moltbot connection -log_info "Testing Moltbot configuration..." +# Step 6: Test OpenClaw connection +log_info "Testing OpenClaw configuration..." echo "" echo "Configuration summary:" echo " vLLM URL: $VLLM_BASE_URL" echo " Model: $SERVED_MODEL_NAME" -echo " Config dir: $MOLTBOT_CONFIG_DIR" +echo " Config dir: $OPENCLAW_STATE_DIR" echo "" # Test a simple completion @@ -170,11 +167,11 @@ echo "===========================================" echo " Setup Complete!" echo "===========================================" echo "" -echo "To start Moltbot, run:" -echo " moltbot" +echo "To start OpenClaw, run:" +echo " openclaw" echo "" echo "To start with daemon mode:" -echo " moltbot onboard --install-daemon" +echo " openclaw onboard --install-daemon" echo "" -echo "Configuration file: $MOLTBOT_CONFIG_DIR/clawdbot.json" +echo "Configuration file: $OPENCLAW_STATE_DIR/openclaw.json" echo "" diff --git a/scripts/start-vllm.sh b/scripts/start-vllm.sh index 75466b6..cc75e5f 100755 --- a/scripts/start-vllm.sh +++ b/scripts/start-vllm.sh @@ -1,5 +1,5 @@ #!/bin/bash -# start-vllm.sh - vLLM startup script for Moltbot on RunPod +# start-vllm.sh - vLLM startup script for OpenClaw on RunPod # Handles model download, GPU detection, and vLLM server startup set -e @@ -36,7 +36,7 @@ export HF_HOME # Print banner echo "" echo "===========================================" -echo " Moltbot vLLM Server Startup" +echo " OpenClaw vLLM Server Startup" echo "===========================================" echo "" diff --git a/templates/moltbot-vllm.json b/templates/openclaw-vllm.json similarity index 97% rename from templates/moltbot-vllm.json rename to templates/openclaw-vllm.json index 2226522..c6787ee 100644 --- a/templates/moltbot-vllm.json +++ b/templates/openclaw-vllm.json @@ -1,7 +1,7 @@ { "tiers": { "tier1": { - "name": "moltbot-vllm-qwen3", + "name": "openclaw-vllm-qwen3", "description": "Tier 1: Qwen3-30B-A3B on 1x H100 (~$2/hr) - Best for validation", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -25,7 +25,7 @@ ] }, "tier2": { - "name": "moltbot-vllm-mimo", + "name": "openclaw-vllm-mimo", "description": "Tier 2: MiMo-V2-Flash on 2x H100 (~$4/hr) - Fastest inference", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -48,7 +48,7 @@ ] }, "tier3": { - "name": "moltbot-vllm-glm47", + "name": "openclaw-vllm-glm47", "description": "Tier 3: GLM-4.7-FP8 on 4x H100 or 2x H200 (~$7-8/hr) - SOTA tool calling", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, @@ -73,7 +73,7 @@ ] }, "tier3_h200": { - "name": "moltbot-vllm-glm47-h200", + "name": "openclaw-vllm-glm47-h200", "description": "Tier 3 Alt: GLM-4.7-FP8 on 2x H200 (~$7/hr) - Best value for SOTA", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, diff --git a/templates/runpod-template.json b/templates/runpod-template.json index a603969..845d258 100644 --- a/templates/runpod-template.json +++ b/templates/runpod-template.json @@ -1,7 +1,7 @@ { - "name": "moltbot-vllm", - "description": "Moltbot AI assistant with vLLM for local LLM inference. Includes Telegram integration.", - "imageName": "your-dockerhub-username/moltbot-vllm:latest", + "name": "openclaw-vllm", + "description": "OpenClaw AI assistant with vLLM for local LLM inference. Includes Telegram integration.", + "imageName": "your-dockerhub-username/openclaw-vllm:latest", "containerDiskInGb": 50, "volumeInGb": 150, "volumeMountPath": "/workspace", @@ -15,8 +15,9 @@ "TOOL_CALL_PARSER": "hermes", "TENSOR_PARALLEL_SIZE": "auto", "HF_HOME": "/workspace/huggingface", - "MOLTBOT_STATE_DIR": "/workspace/.clawdbot", + "OPENCLAW_STATE_DIR": "/workspace/.openclaw", + "OPENCLAW_WORKSPACE": "/workspace/openclaw", "TELEGRAM_BOT_TOKEN": "" }, - "readme": "# Moltbot + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `moltbot pairing list telegram` then `moltbot pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.clawdbot (legacy path used by Moltbot)\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: Moltbot Gateway" + "readme": "# OpenClaw + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `openclaw pairing list telegram` then `openclaw pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.openclaw\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: OpenClaw Gateway" }