diff --git a/.env.example b/.env.example index 8fd6b96..a203fbe 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,10 @@ HF_TOKEN=hf_your_token_here RUNPOD_POD_ID=your-pod-id RUNPOD_VLLM_API_KEY=your-secure-api-key-here -# Clawdbot Web UI (password for accessing the control panel) -CLAWDBOT_WEB_PASSWORD=clawdbot +# OpenClaw Configuration +OPENCLAW_STATE_DIR=/workspace/.openclaw +OPENCLAW_WORKSPACE=/workspace/openclaw +OPENCLAW_WEB_PASSWORD=openclaw # Messaging Integrations (optional) TELEGRAM_BOT_TOKEN= diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 43996c7..a478857 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,11 +2,8 @@ name: Build and Push Docker Images on: push: - branches: [main, feat/add-model-folders, gguf-v3] - paths: - - 'models/**' - - 'Dockerfile' - - '.github/workflows/docker-build.yml' + branches: [main] + tags: ['*'] pull_request: paths: - 'models/**' @@ -26,12 +23,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -44,6 +35,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -52,22 +49,33 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-awq-4bit - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-awq-4bit:buildcache,mode=max # Disabled - vLLM NVFP4 has bugs with GLM-4.7 MLA on Blackwell # See models/glm47-flash-nvfp4-5090/ISSUES.md @@ -81,12 +89,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -99,6 +101,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -107,22 +115,33 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-nvfp4-5090 - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-nvfp4-5090:buildcache,mode=max # GLM-4.7-Flash GGUF with llama.cpp - WORKING on RTX 5090! build-glm47-flash-gguf-llamacpp: @@ -134,12 +153,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -152,6 +165,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -160,22 +179,33 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: context: models/glm47-flash-gguf-llamacpp - push: true + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-glm47-flash-gguf:buildcache,mode=max # Disabled for now # build-glm47-flash-fp16: diff --git a/.gitignore b/.gitignore index 8a344f7..0935446 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ Thumbs.db *.swp *.swo -# Node (if running Clawdbot locally) +# Node (if running OpenClaw locally) node_modules/ # Python diff --git a/CLAUDE.md b/CLAUDE.md index 9eb13c2..35f8052 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,19 +4,19 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -RunPod-optimized Docker deployment for running Clawdbot (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). +RunPod-optimized Docker deployment for running OpenClaw (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). ## Build Commands ```bash # Build a specific model variant -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ +docker build -t openclaw-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ +docker build -t openclaw-glm47-flash-fp16 models/glm47-flash-fp16/ +docker build -t openclaw-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ # Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest +docker tag openclaw-glm47-flash-awq-4bit yourusername/openclaw-glm47-flash-awq-4bit:latest +docker push yourusername/openclaw-glm47-flash-awq-4bit:latest ``` ## Local Development @@ -56,12 +56,12 @@ models/ # Model-specific Dockerfiles and configs └── glm47-reap-w4a16/ # REAP W4A16 (B200) scripts/ # Startup orchestration -├── entrypoint.sh # Docker entrypoint (starts vLLM + Clawdbot) +├── entrypoint.sh # Docker entrypoint (starts vLLM + OpenClaw) ├── start-vllm.sh # vLLM server with GPU detection -└── setup-clawdbot.sh # Clawdbot installation +└── setup-openclaw.sh # OpenClaw installation config/ # Runtime configuration -├── clawdbot.json # Clawdbot config template +├── openclaw.json # OpenClaw config template └── workspace/ # Agent identity and system docs ``` @@ -70,9 +70,9 @@ config/ # Runtime configuration | Port | Service | |-------|-------------------| | 8000 | vLLM API | -| 18789 | Clawdbot Gateway | -| 18790 | Clawdbot Bridge | -| 18793 | Clawdbot Canvas | +| 18789 | OpenClaw Gateway | +| 18790 | OpenClaw Bridge | +| 18793 | OpenClaw Canvas | | 22 | SSH | ## CI/CD (GitHub Actions) @@ -98,10 +98,10 @@ Key variables from `.env.example`: ## Entrypoint Flow 1. Configure environment and detect GPU count -2. Generate `clawdbot.json` with vLLM provider settings +2. Generate `openclaw.json` with vLLM provider settings 3. Start vLLM server in background 4. Wait for health check (max 5 minutes) -5. Start Clawdbot gateway +5. Start OpenClaw gateway 6. Handle graceful shutdown on SIGTERM/SIGINT ## RunPod SSH Access diff --git a/Dockerfile b/Dockerfile index ec61858..3c24639 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,15 @@ -# Clawdbot + vLLM Docker Image for RunPod +# OpenClaw + vLLM Docker Image for RunPod # Pre-configured with everything needed for AI coding assistant FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 -LABEL maintainer="RunPod Clawdbot" -LABEL description="Clawdbot AI assistant with vLLM for local LLM inference" +LABEL maintainer="RunPod OpenClaw" +LABEL description="OpenClaw AI assistant with vLLM for local LLM inference" # Avoid interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -27,27 +28,27 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ # Install vLLM RUN pip install --no-cache-dir vllm -# Install Clawdbot -RUN npm install -g clawdbot@latest +# Install OpenClaw +RUN npm install -g openclaw@latest # Create workspace directories RUN mkdir -p /workspace/huggingface \ - /workspace/.clawdbot \ - /workspace/clawd \ + /workspace/.openclaw \ + /workspace/openclaw \ /workspace/scripts # Copy startup script COPY scripts/entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh -# Copy default Clawdbot workspace files -COPY config/workspace/ /workspace/clawd/ +# Copy default OpenClaw workspace files +COPY config/workspace/ /workspace/openclaw/ # Expose ports # 8000 - vLLM API -# 18789 - Clawdbot Gateway WebSocket -# 18790 - Clawdbot Bridge -# 18793 - Clawdbot Canvas +# 18789 - OpenClaw Gateway WebSocket +# 18790 - OpenClaw Bridge +# 18793 - OpenClaw Canvas # 22 - SSH (RunPod adds this) EXPOSE 8000 18789 18790 18793 diff --git a/README.md b/README.md index 14e4923..f325cfe 100644 --- a/README.md +++ b/README.md @@ -1,292 +1,64 @@ -# Clawdbot on RunPod with vLLM - -Run Clawdbot with GLM-4.7 and other open-source coding models on RunPod using vLLM. Chat with your AI assistant via Telegram! - -## Model Comparison - -| Model | GPU | VRAM | Cost/hr | Context | Folder | -|-------|-----|------|---------|---------|--------| -| **Base (Qwen2.5-7B)** | Any | 16GB | $0.50 | 16k | `Dockerfile` | -| **GLM-4.7-Flash FP16** | H100/A100 80GB | 56GB | $1.20-1.99 | 32k-64k | `models/glm47-flash-fp16/` | -| **GLM-4.7-Flash AWQ 4-bit** | A100 80GB | 71GB | $1.19 | 114k | `models/glm47-flash-awq-4bit/` | -| **GLM-4.7-REAP W4A16** | B200 | 108GB | $5.19 | 32k | `models/glm47-reap-w4a16/` | - -### Recommended: GLM-4.7-Flash AWQ 4-bit - -Best value option with full 114k context window at $1.19/hr on A100 80GB. - -## Quick Start - -### 1. Choose Your Model - -```bash -# GLM-4.7-Flash AWQ 4-bit (Best value, A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-awq-4bit:latest - -# GLM-4.7-Flash FP16 (Full precision, H100/A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-fp16:latest - -# GLM-4.7-REAP W4A16 (High-end, B200) -IMAGE=yourusername/clawdbot-glm47-reap-w4a16:latest - -# Base (Qwen2.5-7B, any GPU) -IMAGE=yourusername/clawdbot-vllm:latest -``` - -### 2. Create RunPod Pod - -- **Image**: Your chosen image from above -- **GPU**: Match model requirements -- **Volume**: 150GB at `/workspace` -- **Container Disk**: 50-100GB (depending on model) -- **Ports**: `8000/http, 18789/http, 22/tcp` - -### 3. Set Environment Variables - -```bash -VLLM_API_KEY=your-secure-key # Required -TELEGRAM_BOT_TOKEN=your-telegram-token # Optional -GITHUB_TOKEN=ghp_xxx # Optional -``` - -### 4. Test It - -```bash -# Health check -curl http://localhost:8000/health - -# Chat completion -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "Hello!"}] - }' -``` - -## Docker Images - -Images are automatically built and pushed to Docker Hub via GitHub Actions. - -| Image | Description | -|-------|-------------| -| `clawdbot-glm47-flash-awq-4bit` | GLM-4.7-Flash AWQ 4-bit for A100 80GB | -| `clawdbot-glm47-flash-fp16` | GLM-4.7-Flash FP16 for H100/A100 80GB | -| `clawdbot-glm47-reap-w4a16` | GLM-4.7-REAP W4A16 for B200 | -| `clawdbot-vllm` | Base image with Qwen2.5-7B | - -## Project Structure - -``` -runpod-clawdbot/ -├── README.md # This file -├── .github/ -│ └── workflows/ -│ └── docker-build.yml # Build & push to Docker Hub -│ -├── models/ -│ ├── glm47-flash-fp16/ # Full precision FP16 (H100/A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ ├── glm47-flash-awq-4bit/ # AWQ 4-bit quantized (A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ └── glm47-reap-w4a16/ # Pruned W4A16 quantized (B200) -│ ├── README.md -│ ├── Dockerfile -│ └── entrypoint.sh -│ -├── scripts/ -│ ├── setup-clawdbot.sh -│ └── start-vllm.sh -│ -├── config/ -│ ├── clawdbot.json -│ └── workspace/ -│ -├── templates/ -│ └── clawdbot-vllm.json -│ -├── tests/ -│ ├── test-vllm.sh -│ └── test-tool-calling.sh -│ -├── Dockerfile # Base image (Qwen2.5-7B) -├── docker-compose.yml -└── .env.example -``` - -## GitHub Actions - -Images are built automatically on: -- Push to `main` → tagged as `:latest` -- Push to other branches → tagged as `:dev-{branch-name}` (e.g., `:dev-feature-xyz`) -- Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` -- Pull requests → build only, no push (validation) -- Manual workflow dispatch → select specific model - -### Required Setup - -**Secrets** (Repository → Settings → Secrets → Actions): - -| Secret | Description | -|--------|-------------| -| `DOCKERHUB_USERNAME` | Your Docker Hub username | -| `DOCKERHUB_TOKEN` | Docker Hub access token (not password) | - -**Variables** (Repository → Settings → Variables → Actions): - -| Variable | Description | -|----------|-------------| -| `DOCKERHUB_REPO` | (Optional) Custom repo name, defaults to username | - -### Manual Build - +# OpenClaw on RunPod: self-contained LLM images + +This repository provides Docker images that bundle **OpenClaw** with different LLMs so you can run a fully self-contained assistant on RunPod (or any GPU host). Each model variant has its own folder under `models/` with a dedicated README and startup script. + +## Model matrix (status + context) + +| Image tag | Backend | Weights | GPU target | Context | VRAM (approx) | Status | Notes | +|----------|---------|---------|------------|---------|----------------|--------|-------| +| `openclaw-glm47-flash-awq-4bit` | vLLM | `cyankiwi/GLM-4.7-Flash-AWQ-4bit` | A100 80GB | 114k | ~75GB | Working | Best value on A100; long context | +| `openclaw-glm47-flash-fp16` | vLLM | `zai-org/GLM-4.7-Flash` | H100/A100 80GB | 32k-64k | ~56GB+ | Working | Full precision | +| `openclaw-glm47-flash-gguf` | llama.cpp | `unsloth/GLM-4.7-Flash-GGUF` (Q4_K_M) | RTX 5090 32GB | 200k | ~28GB | Working | Recommended for 5090 | +| `openclaw-glm47-flash-nvfp4-5090` | vLLM | `GadflyII/GLM-4.7-Flash-NVFP4` | RTX 5090 32GB | 200k | ~30GB | Not working | vLLM MLA issues on Blackwell | +| `openclaw-glm47-reap-w4a16` | vLLM | `0xSero/GLM-4.7-REAP-40-W4A16` | B200 180GB | 32k | ~108GB | Working | High-end B200 | +| `openclaw-vllm` | vLLM | `Qwen/Qwen2.5-Coder-7B-Instruct` | 16GB+ | 16k | ~16GB | Working | Base image | + +Notes: +- Context values are defaults; some variants allow tuning via `MAX_MODEL_LEN`. +- NVFP4 status details live in `models/glm47-flash-nvfp4-5090/ISSUES.md`. + +## Quick start + +1. **Pick an image** from the table above. +2. **Create a RunPod pod**: + - Volume: 150GB at `/workspace` + - Ports: `8000/http, 18789/http, 22/tcp` +3. **Set environment variables**: + - `VLLM_API_KEY` (for vLLM variants) + - `OPENCLAW_WEB_PASSWORD` (web UI token) + - `HF_TOKEN` (optional, faster downloads) + - `TELEGRAM_BOT_TOKEN` (optional) + - For GGUF + llama.cpp: use `LLAMA_API_KEY` instead of `VLLM_API_KEY` + +4. **Health check**: ```bash -# Build locally -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-reap-w4a16 models/glm47-reap-w4a16/ - -# Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest -``` - -## Configuration - -### Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `VLLM_API_KEY` | `changeme` | API key for vLLM authentication | -| `MODEL_NAME` | Model-specific | HuggingFace model ID | -| `SERVED_MODEL_NAME` | `glm-4.7-flash` | Model name in API responses | -| `MAX_MODEL_LEN` | Auto-detected | Maximum context length | -| `GPU_MEMORY_UTILIZATION` | `0.92` | GPU memory to use | -| `TELEGRAM_BOT_TOKEN` | | Telegram bot token from @BotFather | -| `GITHUB_TOKEN` | | GitHub PAT for git/gh operations | - -### Clawdbot Configuration - -Config is auto-generated at `/workspace/.clawdbot/clawdbot.json`: - -```json -{ - "models": { - "providers": { - "local-vllm": { - "baseUrl": "http://localhost:8000/v1", - "apiKey": "your-vllm-api-key", - "api": "openai-completions" - } - } - } -} -``` - -## Telegram Setup - -1. Create a bot with [@BotFather](https://t.me/BotFather) -2. Copy the bot token -3. Set `TELEGRAM_BOT_TOKEN` environment variable -4. Start or restart the pod -5. Message your bot on Telegram! - -## GitHub Authentication - -For git operations inside the container: - -1. Create a [GitHub Personal Access Token](https://github.com/settings/tokens) -2. Select scopes: `repo`, `read:org`, `workflow` -3. Set `GITHUB_TOKEN` environment variable -4. Token is auto-configured on startup - -## Testing - -```bash -# Basic health check curl http://localhost:8000/health - -# List models -curl http://localhost:8000/v1/models \ - -H "Authorization: Bearer $VLLM_API_KEY" - -# Tool calling test -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "What is 2+2?"}], - "tools": [{ - "type": "function", - "function": { - "name": "calculate", - "description": "Perform a calculation", - "parameters": { - "type": "object", - "properties": { - "expression": {"type": "string"} - } - } - } - }] - }' ``` -## Troubleshooting - -### vLLM doesn't start -- Check GPU availability: `nvidia-smi` -- Verify VRAM is sufficient for model -- Check logs: `journalctl -u vllm` or container logs - -### Model loading is slow -- First load downloads model from HuggingFace (can be 18-60GB) -- Use network volume to persist model across restarts -- AWQ 4-bit model (18GB) loads faster than FP16 (31GB) +## Folder map -### Tool calling not working -- Verify `--enable-auto-tool-choice` is set -- Check tool parser matches model (`glm47` for GLM-4.7) -- Run test script: `./tests/test-tool-calling.sh` +| Folder | Purpose | +|--------|---------| +| `models/` | Model-specific Dockerfiles + entrypoints | +| `scripts/` | Base entrypoint + setup helpers | +| `templates/` | RunPod template JSONs | +| `config/` | OpenClaw config templates | -### Orphaned GPU memory -- If vLLM crashes, GPU memory may stay allocated -- Restart the pod to clear memory -- Check with: `nvidia-smi` +## Build + release -### SSH port changes -- RunPod assigns random SSH ports after restart -- Check port via RunPod console or API -- Use RunPod web terminal as alternative +Images build on: +- Pull requests -> tag = branch name (slashes -> `-`) +- Push to `main` -> `:latest` +- Git tag (e.g., `v1.0.0`) -> `:v1.0.0` + `:latest` -## Known Issues +## Known issues -1. **GGUF not supported** - vLLM doesn't support GLM-4.7's GGUF format. Use AWQ. -2. **Container disk doesn't persist** - Only `/workspace` survives restarts. -3. **B200 requires CUDA 13.1+** - The REAP image includes this automatically. - -## Cost Optimization - -1. **Use AWQ 4-bit** - Same model, lower VRAM, cheaper GPU ($1.19 vs $1.99/hr) -2. **Stop pods when idle** - RunPod charges per minute -3. **Use network volumes** - Avoid re-downloading models -4. **Consider spot instances** - Up to 80% cheaper +- **NVFP4 on RTX 5090** is not working in vLLM due to MLA attention shape issues and missing Blackwell kernel support. See `models/glm47-flash-nvfp4-5090/ISSUES.md`. +- **GGUF is not supported in vLLM** (use llama.cpp image). +- **Container disk doesn't persist**; only `/workspace` survives restarts. ## Resources -- [Clawdbot Documentation](https://github.com/clawdbot/clawdbot) -- [vLLM Documentation](https://docs.vllm.ai/) -- [RunPod Documentation](https://docs.runpod.io/) -- [GLM-4.7 Announcement](https://z.ai/blog/glm-4.7) - -## License - -MIT +- OpenClaw: https://github.com/openclaw/openclaw +- vLLM: https://docs.vllm.ai/ +- RunPod: https://docs.runpod.io/ diff --git a/config/clawdbot.json b/config/openclaw.json similarity index 88% rename from config/clawdbot.json rename to config/openclaw.json index a344968..b31b116 100644 --- a/config/clawdbot.json +++ b/config/openclaw.json @@ -1,10 +1,9 @@ { - "$schema": "https://clawdbot.com/schema/config.json", - "_comment": "Clawdbot configuration for RunPod vLLM integration", + "_comment": "OpenClaw configuration for RunPod vLLM integration", "_instructions": [ "Replace with your RunPod pod ID", "Replace with your vLLM API key", - "Adjust model settings based on your tier (see templates/clawdbot-vllm.json)" + "Adjust model settings based on your tier (see templates/openclaw-vllm.json)" ], "agents": { diff --git a/config/workspace/AGENTS.md b/config/workspace/AGENTS.md index f3d8d6e..d6ded55 100644 --- a/config/workspace/AGENTS.md +++ b/config/workspace/AGENTS.md @@ -1,4 +1,4 @@ -# AGENTS.md - Clawdbot Workspace +# AGENTS.md - OpenClaw Workspace This folder is the assistant's working directory. diff --git a/config/workspace/IDENTITY.md b/config/workspace/IDENTITY.md index 547ff69..554aa69 100644 --- a/config/workspace/IDENTITY.md +++ b/config/workspace/IDENTITY.md @@ -1,6 +1,6 @@ # Identity -You are a helpful AI coding assistant running on RunPod with a local LLM. +You are a helpful OpenClaw AI coding assistant running on RunPod with a local LLM. You can help with: - Writing and debugging code - Explaining programming concepts diff --git a/docker-compose.yml b/docker-compose.yml index d72968a..74545db 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -# docker-compose.yml - Local development setup for Clawdbot + vLLM +# docker-compose.yml - Local development setup for OpenClaw + vLLM # Note: This is for local testing only. For production, use RunPod. version: "3.8" @@ -7,7 +7,7 @@ services: # vLLM Server - requires NVIDIA GPU with sufficient VRAM vllm: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm + container_name: openclaw-vllm runtime: nvidia deploy: resources: @@ -46,7 +46,7 @@ services: # Mock vLLM for testing without GPU (uses smaller model) vllm-mock: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm-mock + container_name: openclaw-vllm-mock profiles: ["mock"] ports: - "8001:8000" @@ -64,7 +64,7 @@ services: # Test runner tests: image: curlimages/curl:latest - container_name: clawdbot-tests + container_name: openclaw-tests profiles: ["test"] depends_on: vllm: @@ -84,4 +84,4 @@ volumes: networks: default: - name: clawdbot-network + name: openclaw-network diff --git a/docs/openclaw-migration-plan.md b/docs/openclaw-migration-plan.md new file mode 100644 index 0000000..c36ea05 --- /dev/null +++ b/docs/openclaw-migration-plan.md @@ -0,0 +1,75 @@ +# OpenClaw Migration Plan (RunPod Images) + +## Background & upstream signals + +From the upstream OpenClaw project: +- The repository is now `openclaw/openclaw`, and the CLI shown in the README is `openclaw`. +- Install guidance includes `npm install -g openclaw@latest` and the one‑liner `curl -fsSL https://openclaw.ai/install.sh | bash`. +- The OpenClaw README documents new default paths: + - Config file: `~/.openclaw/openclaw.json` + - Workspace root: `~/.openclaw/workspace` + +Sources: +- https://github.com/openclaw/openclaw (README) +- https://openclaw.ai (installer + quick start) + +## Repo scan findings (current state) + +The repo still referenced legacy names and paths in many places before migration: +- Dockerfiles: base image installs, labels, ENVs, entrypoint banners +- Entrypoints: legacy CLI names and legacy state dir paths +- Docs: `README.md`, model READMEs, `docs/video-script.md` +- Templates: `templates/runpod-template.json`, `templates/openclaw-vllm.json` +- Config: `config/openclaw.json`, `config/workspace/IDENTITY.md` +- Scripts: `scripts/entrypoint.sh`, `scripts/setup-openclaw.sh` +- Env examples: `.env.example` + +No `OpenClaw` references exist yet in the repo. + +## Decisions (no legacy) + +1. **Package + binary naming** + - Install `openclaw@latest`. + - Use `openclaw` CLI only (no legacy binaries or symlinks). + +2. **State directory** + - Use `/workspace/.openclaw` as the only state directory in containers. + +3. **Config file name** + - Use `openclaw.json` only. + +## Migration plan (proposed steps) + +### 1) Dependency + CLI alignment +- Update Dockerfiles to install `openclaw@latest`. +- Use `openclaw` as the only CLI. + +### 2) State dir and workspace setup +- Use `/workspace/.openclaw` for all state. +- Create expected subdirectories (`agents/main/sessions`, `credentials`) and enforce permissions. + +### 3) Config generation + naming +- Generate `openclaw.json` with OpenAI‑compatible provider settings for the local model. +- Run `openclaw doctor --fix` to auto‑migrate schema after config write. + +### 4) Rename commands and docs +- Update all scripts/entrypoints to call `openclaw`. +- Replace docs and templates to use “OpenClaw” branding and new paths. +- Update README tables and sample image tags if the Docker repo/name changes. + +### 5) Environment variables and config keys +- Standardize on `OPENCLAW_STATE_DIR`, `OPENCLAW_WORKSPACE`, `OPENCLAW_WEB_PASSWORD`. +- Reflect in `.env.example` and RunPod templates. + +### 6) Validation +- Build images for each model variant. +- Smoke test: + - `openclaw doctor --fix` works + - `openclaw gateway` starts + - Web UI reachable via RunPod proxy + - Model inference via `/v1/chat/completions` +- Confirm the state dir and workspace are created under `/workspace/.openclaw`. + +## Open questions + +- Should image tags be renamed immediately or keep existing tags for continuity? diff --git a/docs/video-script.md b/docs/video-script.md new file mode 100644 index 0000000..4d4275e --- /dev/null +++ b/docs/video-script.md @@ -0,0 +1,175 @@ +# Video Script: OpenClaw fully self-hosted on RTX 5090 (GLM‑4.7‑Flash GGUF + llama.cpp) + +This doc turns the repo learnings into a demo-first video script for two audiences: + +- **How to set it up and use it** (first half) +- **How it works** (later), with **vLLM/NVFP4** as a short end note + +--- + +## Benchmark slide: where to get the “graph” + the numbers (Artificial Analysis) + +### Option A (fastest): screenshot Artificial Analysis model pages + +Use these pages and grab the **Artificial Analysis Intelligence Index** number shown on each page: + +- **GLM-4.7-Flash (Reasoning)**: 30 — +- **GLM-4.7 (Reasoning)**: 42 — +- **GPT-5.2 (xhigh)**: 51 — +- **GPT-5.2 Codex (xhigh)**: 48 — +- **Claude Opus 4.5 (Reasoning)**: 50 — +- **Claude 4.5 Sonnet (Reasoning)**: 42 — + +If you want a single AA page on screen as a citation backdrop, use a comparison page: + +- **GLM‑4.7 vs GPT‑5.2**: + +### Option B (cleanest): create your own bar chart, cite AA + +- Build a simple bar chart using the numbers above. +- Add a footer like: **Source: Artificial Analysis (Intelligence Index v4.0), accessed Jan 2026**. + +**Note on “Composer 1”**: The AA model page for “Composer 1” wasn’t reliably fetchable during prep (timeouts). If you want “Composer 1” in the slide, verify its page exists in AA and grab the index number from there; otherwise swap it for a different widely-known coding model that AA lists reliably. + +--- + +## Video script (demo-first; usage first; deep technical notes last) + +### 0:00–0:25 — Cold open / hook (call out fake “self-hosted”) + +**On screen**: quick montage: Telegram/WhatsApp agent convo → “Powered by Claude API” / billing pain → cut to local terminal + GPU. + +**You say**: +People call these “self-hosted agents”… but then the brain is still a paid API. If your agent stops working the second Claude is down or your token budget runs out, that’s not self-hosted. + +Today I’ll show a fully self-contained OpenClaw setup: local model, local inference, agent UI—no external model API needed. + +### 0:25–0:55 — What you’ll build + requirements (set expectations) + +**On screen**: one slide: “OpenClaw + GLM‑4.7‑Flash + llama.cpp (OpenAI API)”. + +**You say**: +We’re running GLM‑4.7‑Flash locally via llama.cpp and pointing OpenClaw at it using an OpenAI-compatible API. + +If you’ve got an RTX 5090 (32GB), you can run the full 200k context. With 24GB, it can still work, just with a reduced context window—because the model weights alone are ~17GB. + +### 0:55–2:10 — Quick demo first (prove it works before you explain anything) + +**On screen**: +- Open OpenClaw web UI +- Show the agent doing a quick code task (small repo change / explanation) +- Show a raw API call to the model (`/v1/chat/completions`) + +**You say**: +Let me prove it’s real before we talk architecture. This is OpenClaw running against a model in the same environment. No Claude key. No OpenAI key. + +If you’re using Telegram integration, the same idea applies: messages go to a local model, not a hosted API. + +### 2:10–3:40 — Two ways to run it: local GPU vs RunPod (choose your path) + +**On screen**: split screen: local machine vs RunPod pod. + +**You say**: +You’ve got two options: + +- Local: lowest latency and everything stays on your machine. +- RunPod: if you don’t have a 5090—or you don’t want your workstation pinned all day—you can still keep it self-contained. You pay for compute time, not per-token API calls. + +### 3:40–5:30 — RunPod setup walkthrough (the “do this, then this” part) + +**On screen**: RunPod UI checklist. + +**You say (walkthrough voice)**: +Here’s the setup that actually matters: + +- **Image**: `runpod/openclaw-glm47-flash-gguf:latest` +- **Ports**: `8000/http` (llama.cpp), `18789/http` (OpenClaw UI), `22/tcp` (SSH) +- **Network volume mounted to `/workspace`** (non-negotiable; model is ~17GB and you want persistence across restarts) +- **Environment variables**: + - `LLAMA_API_KEY` (protects the model API) + - `OPENCLAW_WEB_PASSWORD` (protects the web UI token) + - optionally `TELEGRAM_BOT_TOKEN` (Telegram) + +### 5:30–6:40 — Health check + raw chat completion (OpenAI-compat API) + +**On screen**: terminal showing `curl` to `/health` then `/v1/chat/completions`. + +**You say**: +llama.cpp runs an OpenAI-compatible API. That’s the trick: OpenClaw doesn’t need to know it’s llama.cpp. + +**Show (copy/paste):** + +- Health check: `GET /health` on `:8000` +- Chat completion: `POST /v1/chat/completions` with `Authorization: Bearer $LLAMA_API_KEY` and `model: "glm-4.7-flash"` + +### 6:40–8:10 — The “gotcha”: first-time device pairing (and why it’s good) + +**On screen**: web UI says “pairing required” → SSH → approve device → refresh UI. + +**You say**: +First time you open the web UI, it won’t just let any browser control your agent. You must approve the device. + +**On screen (commands):** + +- List requests: + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram` +- Approve: + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ` + +**You say**: +This is the right default for something that can run commands and touch repos. + +### 8:10–9:10 — Benchmark slide (short, no methodology detour) + +**On screen**: your bar chart + tiny citation footer (Artificial Analysis URLs). + +**You say**: +Why GLM‑4.7‑Flash? Because it’s an open-weights model with serious benchmark performance. On Artificial Analysis’ Intelligence Index, you can see where it sits relative to the usual suspects. + +Quick callout list (keep it fast): + +- GLM‑4.7: 42 +- GLM‑4.7‑Flash: 30 +- GPT‑5.2: 51 +- GPT‑5.2 Codex: 48 +- Claude Opus 4.5 (Reasoning): 50 +- Claude 4.5 Sonnet (Reasoning): 42 + +### 9:10–10:45 — How it works (high level, but concrete) + +**On screen**: simple block diagram. + +**You say**: +Architecture is simple: + +- llama.cpp (`llama-server`) hosts the model and exposes OpenAI-style endpoints on `:8000` +- OpenClaw points its provider config at `http://localhost:8000/v1` +- The container stores everything under `/workspace` so restarts don’t wipe model + state + +Then the “why it fits”: + +We’re running a GGUF quantization (Q4_K_M) and using Q8 KV cache quantization—this is what makes 200k context feasible on a 32GB card. + +### 10:45–12:00 — Ending note: what happened with vLLM/NVFP4 (keep it tight) + +**On screen**: one screenshot of the core error + a short bullet list. + +**You say**: +We tried the obvious path first: vLLM with NVFP4 for Blackwell. But as of Jan 2026, it’s blocked for GLM‑4.7 on the 5090. + +Root cause: GLM‑4.7’s MLA attention isn’t handled correctly in vLLM’s fallback path, leading to an attention output dimension mismatch. + +When those pieces land upstream (vLLM + cuDNN support), we’ll revisit and benchmark it. + +**On screen takeaway**: +Today’s working answer: GGUF + llama.cpp. + +--- + +## Suggested on-screen callouts (quick checklist) + +- **Ports**: `8000` (model API), `18789` (web UI), `22` (SSH) +- **Persistence**: “Network volume mounted to `/workspace`” +- **Security**: “API key for model + web token + device pairing” +- **Performance tagline (repo docs)**: “~175 tok/s, ~28GB VRAM, 200k context on RTX 5090” + diff --git a/models/glm47-flash-awq-4bit/Dockerfile b/models/glm47-flash-awq-4bit/Dockerfile index d8bda90..21ad16d 100644 --- a/models/glm47-flash-awq-4bit/Dockerfile +++ b/models/glm47-flash-awq-4bit/Dockerfile @@ -34,8 +34,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force # Environment defaults @@ -45,9 +45,9 @@ ENV SERVED_MODEL_NAME="glm-4.7-flash" ENV MAX_MODEL_LEN="114688" ENV VLLM_API_KEY="changeme" -# Clawdbot workspace -ENV CLAWDBOT_HOME="/workspace/.clawdbot" -ENV CLAWDBOT_WORKSPACE="/workspace/clawd" +# OpenClaw workspace +ENV OPENCLAW_STATE_DIR="/workspace/.openclaw" +ENV OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-awq-4bit/README.md b/models/glm47-flash-awq-4bit/README.md index dbd68e0..c9451f9 100644 --- a/models/glm47-flash-awq-4bit/README.md +++ b/models/glm47-flash-awq-4bit/README.md @@ -22,7 +22,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-awq-4bit:latest` +- **Image**: `runpod/openclaw-glm47-flash-awq-4bit:latest` - **GPU**: 1x A100 80GB - **Volume**: 150GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -37,7 +37,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token for chat integration | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `openclaw` | Password for web UI | ### 3. Access Points @@ -46,7 +46,7 @@ After the pod starts (~90 seconds for cached starts, longer for first start): | Service | URL | Auth | |---------|-----|------| | vLLM API | `https://-8000.proxy.runpod.net` | Bearer token (`VLLM_API_KEY`) | -| Web UI | `https://-18789.proxy.runpod.net` | Password (`CLAWDBOT_WEB_PASSWORD`) | +| Web UI | `https://-18789.proxy.runpod.net` | Password (`OPENCLAW_WEB_PASSWORD`) | | SSH | `ssh root@ -p ` | SSH key | ### 4. Test It @@ -87,12 +87,12 @@ All persistent data is stored on the network volume `/workspace`: ├── .cache/ │ ├── vllm/ # CUDA graphs & torch compile cache (~400MB) │ └── huggingface/ # HF cache -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # Agent state │ └── telegram/ # Telegram session ├── .config/gh/ # GitHub CLI config -└── clawd/ # Claude Code workspace +└── openclaw/ # Workspace ``` **Startup times:** @@ -101,9 +101,9 @@ All persistent data is stored on the network volume `/workspace`: ## Web UI -Access the Clawdbot web UI at `https://-18789.proxy.runpod.net`: +Access the OpenClaw web UI at `https://-18789.proxy.runpod.net`: -1. Enter the password (default: `clawdbot` or your `CLAWDBOT_WEB_PASSWORD`) +1. Enter the password (default: `openclaw` or your `OPENCLAW_WEB_PASSWORD`) 2. Chat with the model through the web interface 3. No CLI access required @@ -196,7 +196,7 @@ pkill -9 -f vllm **Web UI won't connect:** - Ensure port 18789 is exposed -- Check that gateway is running: `ps aux | grep clawdbot` +- Check that gateway is running: `ps aux | grep openclaw` - Verify bind mode is `lan` in config **Model download fails:** diff --git a/models/glm47-flash-awq-4bit/entrypoint.sh b/models/glm47-flash-awq-4bit/entrypoint.sh index 8bcb18c..7b86333 100644 --- a/models/glm47-flash-awq-4bit/entrypoint.sh +++ b/models/glm47-flash-awq-4bit/entrypoint.sh @@ -48,11 +48,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-114688}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI token/password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" + +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -99,11 +102,13 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -112,8 +117,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + # Create a minimal config - openclaw doctor will fix any missing fields + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -136,7 +141,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 98304 + "contextTokens": 98304, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -144,17 +150,19 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -174,19 +182,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" +echo " Web UI Password: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens" echo "================================================" diff --git a/models/glm47-flash-fp16/Dockerfile b/models/glm47-flash-fp16/Dockerfile index c7f5ad6..d1d62b6 100644 --- a/models/glm47-flash-fp16/Dockerfile +++ b/models/glm47-flash-fp16/Dockerfile @@ -33,18 +33,19 @@ RUN uv pip install --system -U vllm \ --extra-index-url https://wheels.vllm.ai/nightly && \ uv pip install --system git+https://github.com/huggingface/transformers.git -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Keep model files on container disk (requires 100GB) -# Only use workspace for persistent Clawdbot state -RUN mkdir -p /workspace/.clawdbot /workspace/clawd +# Only use workspace for persistent OpenClaw state +RUN mkdir -p /workspace/.openclaw /workspace/openclaw # Environment variables -# HF_HOME on container disk (100GB needed), Clawdbot state on workspace +# HF_HOME on container disk (100GB needed), OpenClaw state on workspace ENV HF_HOME=/root/.cache/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=zai-org/GLM-4.7-Flash ENV SERVED_MODEL_NAME=glm-4.7-flash ENV VLLM_API_KEY=changeme @@ -58,7 +59,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-flash-fp16/README.md b/models/glm47-flash-fp16/README.md index 6f3eb42..780f831 100644 --- a/models/glm47-flash-fp16/README.md +++ b/models/glm47-flash-fp16/README.md @@ -29,7 +29,7 @@ Best quality with auto-detected context based on GPU. ### 1. Create RunPod Pod -- **Image**: `yourusername/clawdbot-glm47-flash-fp16:latest` +- **Image**: `yourusername/openclaw-glm47-flash-fp16:latest` - **GPU**: 1x H100 80GB or A100 80GB - **Volume**: 50GB at `/workspace` - **Container Disk**: 100GB (model stored here) @@ -67,11 +67,11 @@ Model is stored on container disk (100GB required), state persists on workspace ``` /root/.cache/huggingface/ # Model files (container disk) /workspace/ -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## vLLM Configuration diff --git a/models/glm47-flash-fp16/entrypoint.sh b/models/glm47-flash-fp16/entrypoint.sh index 500953e..122cb51 100644 --- a/models/glm47-flash-fp16/entrypoint.sh +++ b/models/glm47-flash-fp16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-Flash FP16 + Clawdbot startup script +# entrypoint.sh - GLM-4.7-Flash FP16 + OpenClaw startup script set -e echo "============================================" -echo " GLM-4.7-Flash FP16 + Clawdbot Startup" +echo " GLM-4.7-Flash FP16 + OpenClaw Startup" echo "============================================" echo "" echo "IMPORTANT: This requires vLLM NIGHTLY (not PyPI stable)!" @@ -50,9 +50,11 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.92}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm47}" # Keep model on container disk (requires 100GB containerDiskInGb) HF_HOME="${HF_HOME:-/root/.cache/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" # Auto-detect optimal context if not explicitly set if [ -z "$MAX_MODEL_LEN" ]; then @@ -63,9 +65,11 @@ else fi export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR export MAX_MODEL_LEN +BOT_CMD="openclaw" + # Set CUDA 13.1 paths for B200 (no-op on other GPUs if not installed) if [ -d "/usr/local/cuda-13.1" ]; then export PATH=/usr/local/cuda-13.1/bin:$PATH @@ -75,7 +79,10 @@ if [ -d "/usr/local/cuda-13.1" ]; then fi # Ensure directories exist (HF cache on container disk, state on workspace) -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Configure GitHub CLI # Priority: 1) GITHUB_TOKEN env var, 2) Persisted config in /workspace/.config/gh @@ -115,9 +122,9 @@ if command -v nvcc &> /dev/null; then fi echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -135,12 +142,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then # Reserve tokens for compaction: 15% of context RESERVE_TOKENS=$((MAX_MODEL_LEN * 15 / 100)) - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd", + "workspace": "/workspace/openclaw", "contextTokens": ${CONTEXT_TOKENS}, "systemPrompt": "Be concise and direct. Avoid unnecessary verbosity.", "compaction": { @@ -175,15 +182,17 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi # Build vLLM command @@ -232,10 +241,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -243,7 +252,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index 6489bb6..e4b8fc7 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -66,8 +66,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install huggingface_hub for model downloads (using Python API, not CLI) RUN python3 -m pip install --no-cache-dir huggingface_hub -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force WORKDIR / @@ -79,8 +79,8 @@ ENV MODEL_NAME="unsloth/GLM-4.7-Flash-GGUF" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ LLAMA_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh diff --git a/models/glm47-flash-gguf-llamacpp/README.md b/models/glm47-flash-gguf-llamacpp/README.md index a2d0640..55249b9 100644 --- a/models/glm47-flash-gguf-llamacpp/README.md +++ b/models/glm47-flash-gguf-llamacpp/README.md @@ -23,7 +23,7 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja - **200k context** - Full model capacity on 32GB GPU - **Q8 KV cache quantization** - Fits 200k context in VRAM -- **OpenAI-compatible API** - Works with Clawdbot, Claude Code, etc. +- **OpenAI-compatible API** - Works with OpenClaw, Claude Code, etc. - **Native chat template** - Uses `--jinja` for correct GLM-4.7 formatting ## Runpod Deployment @@ -33,26 +33,26 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja 1. **Add your SSH key** to [Runpod Account Settings → SSH Public Keys](https://www.runpod.io/console/user/settings) (required for device pairing later). If you don't have an SSH key, follow the [Runpod SSH guide](https://docs.runpod.io/pods/configuration/use-ssh). 2. **Create a Pod** with: - - Image: `runpod/clawdbot-glm47-flash-gguf:latest` + - Image: `runpod/openclaw-glm47-flash-gguf:latest` - GPU: RTX 5090 (or any 32GB+ GPU) - Ports: `8000/http`, `18789/http`, `22/tcp` - Network Volume: **30GB minimum**, mounted to `/workspace` - Required for model download (~17GB) and config persistence - Without a network volume, data is lost on pod restart - Environment Variables: - - `CLAWDBOT_WEB_PASSWORD` - Token for Web UI (default: `clawdbot`) + - `OPENCLAW_WEB_PASSWORD` - Token for Web UI (default: `openclaw`) - `LLAMA_API_KEY` - API key for llama.cpp (default: `changeme`) 3. **Wait for startup** - First launch downloads the model (~17GB), which takes a few minutes. Check pod logs for progress. 4. **Access the Web UI**: ``` - https://-18789.proxy.runpod.net/?token= + https://-18789.proxy.runpod.net/?token= ``` ### First-Time Device Pairing -Clawdbot requires device pairing for security. On first access, you'll see "pairing required". +OpenClaw requires device pairing for security. On first access, you'll see "pairing required". **To approve your browser:** @@ -61,10 +61,10 @@ Clawdbot requires device pairing for security. On first access, you'll see "pair ssh root@ -p # List pending pairing requests -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices list +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram # Approve your device (use the Request ID from the list) -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices approve +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ``` After approval, refresh the Web UI - it will work permanently for that browser. @@ -74,7 +74,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | Port | Service | |------|---------| | 8000 | llama.cpp API (OpenAI-compatible) | -| 18789 | Clawdbot Web UI | +| 18789 | OpenClaw Web UI | | 22 | SSH | ## Environment Variables @@ -84,7 +84,7 @@ After approval, refresh the Web UI - it will work permanently for that browser. | `MODEL_FILE` | `GLM-4.7-Flash-Q4_K_M.gguf` | GGUF file to use | | `MAX_MODEL_LEN` | `200000` | Context length | | `LLAMA_API_KEY` | `changeme` | API authentication | -| `CLAWDBOT_WEB_PASSWORD` | `clawdbot` | Web UI token | +| `OPENCLAW_WEB_PASSWORD` | `openclaw` | Web UI token | | `TELEGRAM_BOT_TOKEN` | - | Optional Telegram integration | | `GITHUB_TOKEN` | - | Optional GitHub CLI auth | @@ -92,13 +92,13 @@ After approval, refresh the Web UI - it will work permanently for that browser. ```bash # Build -docker build -t clawdbot-glm47-gguf-llamacpp . +docker build -t openclaw-glm47-gguf-llamacpp . # Run on RTX 5090 docker run --gpus all -p 8000:8000 -p 18789:18789 \ -v /path/to/workspace:/workspace \ -e LLAMA_API_KEY=your-key \ - clawdbot-glm47-gguf-llamacpp + openclaw-glm47-gguf-llamacpp ``` ## API Usage diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index 8125386..afbd24a 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -77,10 +77,20 @@ fi LLAMA_API_KEY="${LLAMA_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" +export OPENCLAW_STATE_DIR OPENCLAW_WORKSPACE TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" + +BOT_CMD="openclaw" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + echo "ERROR: openclaw command not found in PATH" + echo "PATH=$PATH" + echo "Container staying alive for debugging." + sleep infinity +fi echo "Starting llama.cpp server..." echo " Model: $MODEL_PATH/$MODEL_FILE" @@ -127,11 +137,13 @@ if [ $WAITED -ge $MAX_WAIT ]; then echo "Container will stay running for debugging." fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." if [ -n "$TELEGRAM_BOT_TOKEN" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true, \"botToken\": \"${TELEGRAM_BOT_TOKEN}\" }" @@ -139,7 +151,7 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -162,7 +174,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-llamacpp/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 180000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -171,18 +184,19 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "gateway": { "mode": "local", "bind": "lan", - "auth": { "token": "$CLAWDBOT_WEB_PASSWORD" }, - "remote": { "token": "$CLAWDBOT_WEB_PASSWORD" } + "auth": { "mode": "token", "token": "$OPENCLAW_WEB_PASSWORD" }, + "remote": { "token": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi # Auto-fix config -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -202,19 +216,20 @@ fi export OPENAI_API_KEY="$LLAMA_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway (use token auth for URL parameter support) +# Start OpenClaw gateway (use token auth for URL parameter support) echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME CLAWDBOT_GATEWAY_TOKEN="$CLAWDBOT_WEB_PASSWORD" clawdbot gateway --auth token --token "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR OPENCLAW_GATEWAY_TOKEN="$OPENCLAW_WEB_PASSWORD" \ +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready!" echo " llama.cpp API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net/?token=$CLAWDBOT_WEB_PASSWORD" -echo " Web UI Token: $CLAWDBOT_WEB_PASSWORD" +echo " OpenClaw Gateway: ws://localhost:18789" +echo " Web UI: https://-18789.proxy.runpod.net/?token=$OPENCLAW_WEB_PASSWORD" +echo " Web UI Token: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME" echo " Context: $MAX_MODEL_LEN tokens (200k!)" echo " VRAM: ~28GB / 32GB" diff --git a/models/glm47-flash-nvfp4-5090/Dockerfile b/models/glm47-flash-nvfp4-5090/Dockerfile index 157c028..4869d22 100644 --- a/models/glm47-flash-nvfp4-5090/Dockerfile +++ b/models/glm47-flash-nvfp4-5090/Dockerfile @@ -53,7 +53,7 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install tools RUN uv pip install --system "huggingface_hub[cli]" && \ - npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ + npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force WORKDIR / @@ -63,8 +63,8 @@ ENV MODEL_NAME="GadflyII/GLM-4.7-Flash-NVFP4" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ VLLM_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" COPY entrypoint.sh benchmark.sh / RUN chmod +x /entrypoint.sh /benchmark.sh diff --git a/models/glm47-flash-nvfp4-5090/ISSUES.md b/models/glm47-flash-nvfp4-5090/ISSUES.md index 10a4d42..5ebb642 100644 --- a/models/glm47-flash-nvfp4-5090/ISSUES.md +++ b/models/glm47-flash-nvfp4-5090/ISSUES.md @@ -7,7 +7,16 @@ ## Summary -Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM 0.14.0 on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture not being properly supported by vLLM's TransformersMoE fallback. +Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture and SM120 kernel support. + +## Upstream status (as of 2026-01-29) + +- vLLM Issue #32109 is **closed** and was closed by PR #33285. +- PR #33285 **restricts** FP8 MoE CUTLASS backend to SM90/SM100 (does not add SM120 MoE support). +- PR #32237 (SM120 FP8 MoE support) was **closed and not merged**. +- vLLM now includes `Glm4MoeLiteForCausalLM` support, but NVFP4 on SM120 is still unverified. + +Net: there is no confirmed upstream fix for NVFP4 + GLM-4.7 on RTX 5090 yet. ## Issues Encountered @@ -53,7 +62,7 @@ Actual: 5 heads × 256 = 1280 vLLM's attention produces wrong output dimensions for GLM-4.7's MLA architecture. -**Status**: UNRESOLVED - requires native Glm4MoeLite support in vLLM +**Status**: UNRESOLVED - still reproduced on SM120 ### 5. SGLang cuDNN Error @@ -70,7 +79,7 @@ cuDNN doesn't support NVFP4 GEMM on Blackwell SM120. ``` GPU: RTX 5090 (Blackwell SM120, 32GB) CUDA: 12.8 -vLLM: 0.14.0 (nightly) +vLLM: 0.14.x (nightly at the time) Transformers: 5.0.0.dev0 Model: GadflyII/GLM-4.7-Flash-NVFP4 ``` @@ -78,9 +87,92 @@ Model: GadflyII/GLM-4.7-Flash-NVFP4 ## When to Retry Check these before retrying: -1. vLLM has native `Glm4MoeLiteForCausalLM` (not TransformersMoE fallback) -2. vLLM Issue #32109 resolved -3. NVIDIA cuDNN Blackwell FP4 support +1. vLLM has native `Glm4MoeLiteForCausalLM` path for GLM-4.7 in production builds +2. SM120 FP8 MoE kernels are supported (not just gated off) +3. NVIDIA cuDNN Blackwell FP4 support is available + +## Known working nightly tag (from upstream reports) + +Community reports in vLLM Issue #32109 mention the following as working at the time: +- `docker.io/vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + +Reported commit window: +- last working: `ffc0a2798b118f7ceb21645df59d2bfdfc461d42` +- first broken: `5dcd7ef1f219068e6b6be5b614bc43978f028651` + +These are historical references for retesting. + +## Verification plan (recommended) + +1. Baseline: run the known working nightly image above with NVFP4 and confirm it still starts. +2. Candidate: run the latest vLLM release or nightly (v0.15.x) with the same config. +3. Compare logs for MLA mismatch or SM120 kernel selection errors. +4. Record results here and update status. + +## RunPod test checklist (NVFP4, no custom image) + +Goal: validate NVFP4 on RTX 5090 using official vLLM images (no custom build). + +### 1) Create a pod +- GPU: RTX 5090 32GB +- Volume: 100GB+ mounted at `/workspace` +- Ports: `8000/http`, `22/tcp` +- Image: use one of the two images below: + - Baseline (reported working): `vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + - Candidate (latest): `vllm/vllm-openai:latest` + +### 2) Environment variables +- `HF_TOKEN` (optional but recommended) +- `VLLM_API_KEY` (required) +- `MODEL_NAME=GadflyII/GLM-4.7-Flash-NVFP4` +- `SERVED_MODEL_NAME=glm-4.7-flash` +- `MAX_MODEL_LEN=200000` +- `TOOL_CALL_PARSER=glm47` +- `REASONING_PARSER=glm45` +- `GPU_MEMORY_UTILIZATION=0.95` +- `HF_HOME=/workspace/huggingface` + +### 3) Start command +Use the same command for both baseline and candidate images: +``` +vllm serve ${MODEL_NAME} \ + --host 0.0.0.0 \ + --port 8000 \ + --max-model-len ${MAX_MODEL_LEN} \ + --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION} \ + --served-model-name ${SERVED_MODEL_NAME} \ + --api-key ${VLLM_API_KEY} \ + --enable-auto-tool-choice \ + --tool-call-parser ${TOOL_CALL_PARSER} \ + --reasoning-parser ${REASONING_PARSER} +``` + +### 4) Health check +``` +curl http://localhost:8000/health +``` + +### 5) Minimal chat test +``` +curl http://localhost:8000/v1/chat/completions \ + -H "Authorization: Bearer ${VLLM_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-4.7-flash", + "messages": [{"role": "user", "content": "Hello!"}], + "max_tokens": 64 + }' +``` + +### 6) Log triage (what to watch for) +- `No compiled cutlass_scaled_mm for CUDA device capability: 120` +- `mat1 and mat2 shapes cannot be multiplied` (MLA mismatch) +- CUDA graph or cuDNN errors on SM120 + +### 7) Record results +- Image tag used +- vLLM version reported in logs +- Pass/fail and error signatures ## Working Alternative diff --git a/models/glm47-flash-nvfp4-5090/README.md b/models/glm47-flash-nvfp4-5090/README.md index eae8680..d674f01 100644 --- a/models/glm47-flash-nvfp4-5090/README.md +++ b/models/glm47-flash-nvfp4-5090/README.md @@ -39,7 +39,7 @@ Full 200K context window with MLA for reduced KV cache memory. ### 1. Create RunPod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-nvfp4-5090:latest` +- **Image**: `runpod/openclaw-glm47-flash-nvfp4-5090:latest` - **GPU**: 1x RTX 5090 32GB - **Volume**: 100GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -54,7 +54,7 @@ Full 200K context window with MLA for reduced KV cache memory. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `openclaw` | Password for web UI | ### 3. Test It diff --git a/models/glm47-flash-nvfp4-5090/entrypoint.sh b/models/glm47-flash-nvfp4-5090/entrypoint.sh index 5642d59..9685a85 100644 --- a/models/glm47-flash-nvfp4-5090/entrypoint.sh +++ b/models/glm47-flash-nvfp4-5090/entrypoint.sh @@ -75,11 +75,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" + +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -128,11 +131,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -141,9 +147,9 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields + # Create a minimal config - openclaw doctor will fix any missing fields # contextTokens: 180000 leaves room for output within 200K context - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -166,7 +172,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 180000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { @@ -174,17 +181,19 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix 2>/dev/null || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix 2>/dev/null || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -204,19 +213,19 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" 2>/dev/null & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" 2>/dev/null & GATEWAY_PID=$! echo "" echo "================================================" echo " Ready! (RTX 5090 Blackwell SM120)" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" +echo " Web UI Password: $OPENCLAW_WEB_PASSWORD" echo " Model: $SERVED_MODEL_NAME (NVFP4)" echo " Context: $MAX_MODEL_LEN tokens" echo " Cost: ~\$0.89/hr (36% savings vs A100)" diff --git a/models/glm47-reap-w4a16/Dockerfile b/models/glm47-reap-w4a16/Dockerfile index 9aba99f..6da889f 100644 --- a/models/glm47-reap-w4a16/Dockerfile +++ b/models/glm47-reap-w4a16/Dockerfile @@ -30,13 +30,14 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Install vLLM nightly (required for GLM-4.7 support) RUN uv pip install --system -U vllm --pre -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Environment variables ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=0xSero/GLM-4.7-REAP-40-W4A16 ENV SERVED_MODEL_NAME=glm-4.7-reap ENV VLLM_API_KEY=changeme @@ -45,7 +46,7 @@ ENV GPU_MEMORY_UTILIZATION=0.90 ENV TOOL_CALL_PARSER=glm45 # Create directories -RUN mkdir -p /workspace/huggingface /workspace/.clawdbot /workspace/clawd +RUN mkdir -p /workspace/huggingface /workspace/.openclaw /workspace/openclaw # Copy entrypoint script COPY entrypoint.sh /entrypoint.sh @@ -53,7 +54,7 @@ RUN chmod +x /entrypoint.sh # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-reap-w4a16/README.md b/models/glm47-reap-w4a16/README.md index ff66fdc..3a472f8 100644 --- a/models/glm47-reap-w4a16/README.md +++ b/models/glm47-reap-w4a16/README.md @@ -21,7 +21,7 @@ High-end option for maximum performance. ### 1. Create RunPod Pod -- **Image**: `yourusername/clawdbot-glm47-reap-w4a16:latest` +- **Image**: `yourusername/openclaw-glm47-reap-w4a16:latest` - **GPU**: 1x B200 180GB - **Volume**: 200GB at `/workspace` - **Container Disk**: 50GB @@ -59,11 +59,11 @@ Files persist on network volume `/workspace`: ``` /workspace/ ├── huggingface/ # Model cache -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## B200 (Blackwell) Support diff --git a/models/glm47-reap-w4a16/entrypoint.sh b/models/glm47-reap-w4a16/entrypoint.sh index 973f989..4695ff9 100644 --- a/models/glm47-reap-w4a16/entrypoint.sh +++ b/models/glm47-reap-w4a16/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-REAP W4A16 + Clawdbot startup script for RunPod B200 +# entrypoint.sh - GLM-4.7-REAP W4A16 + OpenClaw startup script for RunPod B200 set -e echo "============================================" -echo " GLM-4.7-REAP W4A16 + Clawdbot Startup" +echo " GLM-4.7-REAP W4A16 + OpenClaw Startup" echo "============================================" # Configuration from environment @@ -14,18 +14,25 @@ MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}" GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm45}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR export PATH=/usr/local/cuda-13.1/bin:$PATH export CUDA_HOME=/usr/local/cuda-13.1 export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true + +BOT_CMD="openclaw" # Configure GitHub CLI if [ -n "$GITHUB_TOKEN" ]; then @@ -54,9 +61,9 @@ echo " Tool parser: $TOOL_CALL_PARSER" echo " CUDA: $(nvcc --version | grep release | awk '{print $5}' | tr -d ',')" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -65,12 +72,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -95,15 +102,17 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi # Build vLLM command @@ -148,10 +157,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -159,7 +168,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index dba14ae..fb33021 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,9 +1,9 @@ #!/bin/bash -# entrypoint.sh - Clawdbot + vLLM startup script for RunPod +# entrypoint.sh - OpenClaw + vLLM startup script for RunPod set -e echo "============================================" -echo " Clawdbot + vLLM Startup" +echo " OpenClaw + vLLM Startup" echo "============================================" # Configuration from environment @@ -15,14 +15,20 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-hermes}" TENSOR_PARALLEL_SIZE="${TENSOR_PARALLEL_SIZE:-auto}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR + +BOT_CMD="openclaw" # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" /workspace/openclaw +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Auto-detect tensor parallel size if [ "$TENSOR_PARALLEL_SIZE" = "auto" ]; then @@ -39,9 +45,9 @@ echo " Tensor parallel: $TENSOR_PARALLEL_SIZE" echo " Tool parser: $TOOL_CALL_PARSER" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -50,12 +56,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -80,22 +86,20 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then ${TELEGRAM_CONFIG} }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "token", "token": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi -# Initialize Clawdbot workspace if empty -if [ ! -f "/workspace/clawd/AGENTS.md" ]; then - echo "Initializing Clawdbot workspace..." - clawdbot setup --non-interactive --accept-risk --workspace /workspace/clawd 2>/dev/null || true -fi +# Workspace files are seeded during image build. # Build vLLM command VLLM_CMD="vllm serve $MODEL_NAME" @@ -138,10 +142,10 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -clawdbot gateway & +echo "Starting OpenClaw gateway..." +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" @@ -149,7 +153,7 @@ echo "============================================" echo " Services Running" echo "============================================" echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" +echo " OpenClaw Gateway: ws://localhost:18789" echo "" echo " vLLM PID: $VLLM_PID" echo " Gateway PID: $GATEWAY_PID" diff --git a/scripts/setup-clawdbot.sh b/scripts/setup-openclaw.sh old mode 100755 new mode 100644 similarity index 81% rename from scripts/setup-clawdbot.sh rename to scripts/setup-openclaw.sh index 61889c2..1c2e0c1 --- a/scripts/setup-clawdbot.sh +++ b/scripts/setup-openclaw.sh @@ -1,5 +1,5 @@ #!/bin/bash -# setup-clawdbot.sh - Install and configure Clawdbot on RunPod +# setup-openclaw.sh - Install and configure OpenClaw on RunPod # Prerequisites: vLLM server running on port 8000 set -e @@ -21,13 +21,13 @@ VLLM_HOST="${VLLM_HOST:-localhost}" VLLM_PORT="${VLLM_PORT:-8000}" VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-qwen3-30b-a3b}" -CLAWDBOT_CONFIG_DIR="${CLAWDBOT_CONFIG_DIR:-$HOME/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-$HOME/.openclaw}" RUNPOD_POD_ID="${RUNPOD_POD_ID:-}" # Print banner echo "" echo "===========================================" -echo " Clawdbot Setup Script" +echo " OpenClaw Setup Script" echo "===========================================" echo "" @@ -55,10 +55,11 @@ if ! command -v npm &> /dev/null; then fi log_info "npm version: $(npm --version)" -# Step 2: Install Clawdbot -log_info "Installing Clawdbot..." -npm install -g clawdbot@latest -log_success "Clawdbot installed: $(clawdbot --version 2>/dev/null || echo 'version check failed')" +# Step 2: Install OpenClaw +log_info "Installing OpenClaw..." +npm install -g openclaw@latest +BOT_CMD="openclaw" +log_success "OpenClaw installed: $("$BOT_CMD" --version 2>/dev/null || echo 'version check failed')" # Step 3: Wait for vLLM to be ready log_info "Waiting for vLLM server to be ready..." @@ -86,9 +87,9 @@ MODELS_RESPONSE=$(curl -s "http://${VLLM_HOST}:${VLLM_PORT}/v1/models" \ -H "Authorization: Bearer ${VLLM_API_KEY}") echo "Available models: $MODELS_RESPONSE" -# Step 4: Create Clawdbot configuration directory -log_info "Creating Clawdbot configuration..." -mkdir -p "$CLAWDBOT_CONFIG_DIR" +# Step 4: Create OpenClaw configuration directory +log_info "Creating OpenClaw configuration..." +mkdir -p "$OPENCLAW_STATE_DIR" # Determine the base URL for the vLLM endpoint if [ -n "$RUNPOD_POD_ID" ]; then @@ -99,8 +100,8 @@ else VLLM_BASE_URL="http://${VLLM_HOST}:${VLLM_PORT}/v1" fi -# Step 5: Create Clawdbot configuration file -cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF +# Step 5: Create OpenClaw configuration file +cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { @@ -132,15 +133,15 @@ cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF } EOF -log_success "Clawdbot configuration created at $CLAWDBOT_CONFIG_DIR/clawdbot.json" +log_success "OpenClaw configuration created at $OPENCLAW_STATE_DIR/openclaw.json" -# Step 6: Test Clawdbot connection -log_info "Testing Clawdbot configuration..." +# Step 6: Test OpenClaw connection +log_info "Testing OpenClaw configuration..." echo "" echo "Configuration summary:" echo " vLLM URL: $VLLM_BASE_URL" echo " Model: $SERVED_MODEL_NAME" -echo " Config dir: $CLAWDBOT_CONFIG_DIR" +echo " Config dir: $OPENCLAW_STATE_DIR" echo "" # Test a simple completion @@ -166,11 +167,11 @@ echo "===========================================" echo " Setup Complete!" echo "===========================================" echo "" -echo "To start Clawdbot, run:" -echo " clawdbot" +echo "To start OpenClaw, run:" +echo " openclaw" echo "" echo "To start with daemon mode:" -echo " clawdbot onboard --install-daemon" +echo " openclaw onboard --install-daemon" echo "" -echo "Configuration file: $CLAWDBOT_CONFIG_DIR/clawdbot.json" +echo "Configuration file: $OPENCLAW_STATE_DIR/openclaw.json" echo "" diff --git a/scripts/start-vllm.sh b/scripts/start-vllm.sh index 4af4bdd..cc75e5f 100755 --- a/scripts/start-vllm.sh +++ b/scripts/start-vllm.sh @@ -1,5 +1,5 @@ #!/bin/bash -# start-vllm.sh - vLLM startup script for Clawdbot on RunPod +# start-vllm.sh - vLLM startup script for OpenClaw on RunPod # Handles model download, GPU detection, and vLLM server startup set -e @@ -36,7 +36,7 @@ export HF_HOME # Print banner echo "" echo "===========================================" -echo " Clawdbot vLLM Server Startup" +echo " OpenClaw vLLM Server Startup" echo "===========================================" echo "" diff --git a/templates/clawdbot-vllm.json b/templates/openclaw-vllm.json similarity index 97% rename from templates/clawdbot-vllm.json rename to templates/openclaw-vllm.json index 726f483..c6787ee 100644 --- a/templates/clawdbot-vllm.json +++ b/templates/openclaw-vllm.json @@ -1,7 +1,7 @@ { "tiers": { "tier1": { - "name": "clawdbot-vllm-qwen3", + "name": "openclaw-vllm-qwen3", "description": "Tier 1: Qwen3-30B-A3B on 1x H100 (~$2/hr) - Best for validation", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -25,7 +25,7 @@ ] }, "tier2": { - "name": "clawdbot-vllm-mimo", + "name": "openclaw-vllm-mimo", "description": "Tier 2: MiMo-V2-Flash on 2x H100 (~$4/hr) - Fastest inference", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -48,7 +48,7 @@ ] }, "tier3": { - "name": "clawdbot-vllm-glm47", + "name": "openclaw-vllm-glm47", "description": "Tier 3: GLM-4.7-FP8 on 4x H100 or 2x H200 (~$7-8/hr) - SOTA tool calling", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, @@ -73,7 +73,7 @@ ] }, "tier3_h200": { - "name": "clawdbot-vllm-glm47-h200", + "name": "openclaw-vllm-glm47-h200", "description": "Tier 3 Alt: GLM-4.7-FP8 on 2x H200 (~$7/hr) - Best value for SOTA", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, diff --git a/templates/runpod-template.json b/templates/runpod-template.json index f070e66..845d258 100644 --- a/templates/runpod-template.json +++ b/templates/runpod-template.json @@ -1,7 +1,7 @@ { - "name": "clawdbot-vllm", - "description": "Clawdbot AI assistant with vLLM for local LLM inference. Includes Telegram integration.", - "imageName": "your-dockerhub-username/clawdbot-vllm:latest", + "name": "openclaw-vllm", + "description": "OpenClaw AI assistant with vLLM for local LLM inference. Includes Telegram integration.", + "imageName": "your-dockerhub-username/openclaw-vllm:latest", "containerDiskInGb": 50, "volumeInGb": 150, "volumeMountPath": "/workspace", @@ -15,8 +15,9 @@ "TOOL_CALL_PARSER": "hermes", "TENSOR_PARALLEL_SIZE": "auto", "HF_HOME": "/workspace/huggingface", - "CLAWDBOT_STATE_DIR": "/workspace/.clawdbot", + "OPENCLAW_STATE_DIR": "/workspace/.openclaw", + "OPENCLAW_WORKSPACE": "/workspace/openclaw", "TELEGRAM_BOT_TOKEN": "" }, - "readme": "# Clawdbot + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `clawdbot pairing list telegram` then `clawdbot pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.clawdbot (survives restarts)\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: Clawdbot Gateway" + "readme": "# OpenClaw + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `openclaw pairing list telegram` then `openclaw pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.openclaw\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: OpenClaw Gateway" }