diff --git a/.env.example b/.env.example index 8fd6b96..fa94123 100644 --- a/.env.example +++ b/.env.example @@ -10,12 +10,14 @@ TOOL_CALL_PARSER=hermes # HuggingFace (optional - for gated models) HF_TOKEN=hf_your_token_here -# RunPod Configuration (for remote access) +# Runpod Configuration (for remote access) RUNPOD_POD_ID=your-pod-id RUNPOD_VLLM_API_KEY=your-secure-api-key-here -# Clawdbot Web UI (password for accessing the control panel) -CLAWDBOT_WEB_PASSWORD=clawdbot +# OpenClaw Configuration +OPENCLAW_STATE_DIR=/workspace/.openclaw +OPENCLAW_WORKSPACE=/workspace/openclaw +OPENCLAW_WEB_PASSWORD=changeme # Messaging Integrations (optional) TELEGRAM_BOT_TOKEN= diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 43996c7..fe65e99 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -2,11 +2,8 @@ name: Build and Push Docker Images on: push: - branches: [main, feat/add-model-folders, gguf-v3] - paths: - - 'models/**' - - 'Dockerfile' - - '.github/workflows/docker-build.yml' + branches: [main] + tags: ['*'] pull_request: paths: - 'models/**' @@ -26,12 +23,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -44,6 +35,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -52,22 +49,34 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: - context: models/glm47-flash-awq-4bit - push: true + context: . + file: models/glm47-flash-awq-4bit/Dockerfile + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-awq-4bit:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-awq-4bit:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-awq-4bit:buildcache,mode=max # Disabled - vLLM NVFP4 has bugs with GLM-4.7 MLA on Blackwell # See models/glm47-flash-nvfp4-5090/ISSUES.md @@ -81,12 +90,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -99,6 +102,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -107,22 +116,34 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: - context: models/glm47-flash-nvfp4-5090 - push: true + context: . + file: models/glm47-flash-nvfp4-5090/Dockerfile + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-nvfp4-5090:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-nvfp4-5090:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-nvfp4-5090:buildcache,mode=max # GLM-4.7-Flash GGUF with llama.cpp - WORKING on RTX 5090! build-glm47-flash-gguf-llamacpp: @@ -134,12 +155,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine build type id: build_type run: | @@ -152,6 +167,12 @@ jobs: - name: Set environment variables run: | echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO || secrets.DOCKERHUB_USERNAME }}" >> $GITHUB_ENV + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + BRANCH_NAME="${{ github.head_ref }}" + else + BRANCH_NAME="${GITHUB_REF##refs/heads/}" + fi + BRANCH_TAG=$(echo "$BRANCH_NAME" | sed 's|/|-|g') if [[ "${{ github.ref }}" == refs/tags/* ]]; then echo "VERSION=${GITHUB_REF##refs/tags/}" >> $GITHUB_ENV @@ -160,22 +181,34 @@ jobs: echo "VERSION=latest" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV else - BRANCH_NAME=$(echo ${GITHUB_REF##refs/heads/} | sed 's/\//-/g') - echo "VERSION=dev-${BRANCH_NAME}" >> $GITHUB_ENV + echo "VERSION=${BRANCH_TAG}" >> $GITHUB_ENV echo "ALSO_LATEST=false" >> $GITHUB_ENV fi + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then + echo "PUSH_IMAGES=false" >> $GITHUB_ENV + else + echo "PUSH_IMAGES=true" >> $GITHUB_ENV + fi + + - name: Login to Docker Hub + if: env.PUSH_IMAGES == 'true' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 with: - context: models/glm47-flash-gguf-llamacpp - push: true + context: . + file: models/glm47-flash-gguf-llamacpp/Dockerfile + push: ${{ env.PUSH_IMAGES == 'true' }} tags: | - ${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:${{ env.VERSION }} - ${{ env.ALSO_LATEST == 'true' && format('{0}/clawdbot-glm47-flash-gguf:latest', env.DOCKERHUB_REPO) || '' }} + ${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:${{ env.VERSION }} + ${{ env.ALSO_LATEST == 'true' && format('{0}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:latest', env.DOCKERHUB_REPO) || '' }} platforms: linux/amd64 - cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache - cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/clawdbot-glm47-flash-gguf:buildcache,mode=max + cache-from: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:buildcache + cache-to: type=registry,ref=${{ env.DOCKERHUB_REPO }}/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:buildcache,mode=max # Disabled for now # build-glm47-flash-fp16: diff --git a/.gitignore b/.gitignore index 8a344f7..0935446 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ Thumbs.db *.swp *.swo -# Node (if running Clawdbot locally) +# Node (if running OpenClaw locally) node_modules/ # Python diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..05934da --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,102 @@ +# AGENTS.md + +OpenClaw Stack on Runpod: self-contained Docker images with LLM + media services for GPU pods. + +## Codebase Structure + +``` +openclaw-stack/ +├── models/ # GPU-specific Dockerfiles +│ ├── glm47-flash-gguf-llamacpp/ # RTX 5090 - llama.cpp (primary) +│ ├── glm47-flash-awq-4bit/ # A100 80GB - vLLM +│ ├── glm47-flash-fp16/ # H100/A100 - vLLM +│ ├── glm47-flash-nvfp4-5090/ # RTX 5090 - vLLM (experimental) +│ └── glm47-reap-w4a16/ # B200 - vLLM +├── scripts/ # Startup and utilities +│ ├── entrypoint.sh # Main container entrypoint +│ ├── entrypoint-common.sh # Shared entrypoint logic +│ └── openclaw-image-gen # Image generation CLI +├── skills/ # Agent capabilities +│ └── image-gen/ # FLUX.2 image generation +├── config/ +│ ├── openclaw.json # OpenClaw config template +│ └── workspace/ # Files copied to /workspace/openclaw/ +├── tests/ # Test scripts +└── Dockerfile # Base/fallback Dockerfile +``` + +## Key Decisions + +- **RTX 5090 uses llama.cpp** (`glm47-flash-gguf-llamacpp/`) — vLLM has dimension mismatch bugs with GLM-4.7 MLA attention on NVFP4 +- **PyTorch cu128 required for RTX 5090** — cu124 doesn't support Blackwell sm_120 architecture +- **Diffusers from git** — stable release lacks `Flux2KleinPipeline` for image generation +- **llama.cpp built from source** with `DCMAKE_CUDA_ARCHITECTURES="120"` for sm_120 support +- **LLM and Audio binaries MUST be separate** — LLM uses main llama.cpp branch, Audio uses PR #18641 branch. They have incompatible shared libraries. LLM libs go to `/usr/local/lib/`, Audio libs go to `/usr/local/bin/` (see Dockerfile lines 52 vs 73). Mixing them breaks LLM server startup. +- **Persistent servers for low latency** — Audio (port 8001) and Image (port 8002) run as persistent servers with models pre-loaded in VRAM. CLI scripts (`openclaw-tts`, `openclaw-stt`, `openclaw-image-gen`) call these servers via HTTP API for instant inference (~0.3-0.8s vs 2-3s with per-request loading). These ports are internal-only; public access goes through the proxy on 8080. + +## Build Commands + +```bash +# Build primary RTX 5090 image +docker build -f models/glm47-flash-gguf-llamacpp/Dockerfile -t openclaw-gguf . + +# Build other variants +docker build -f models/glm47-flash-awq-4bit/Dockerfile -t openclaw-awq . +docker build -f models/glm47-flash-fp16/Dockerfile -t openclaw-fp16 . +``` + +## Testing + +```bash +# Health check +curl http://localhost:8000/health + +# Test suites +./tests/test-vllm.sh +./tests/test-tool-calling.sh + +# Image generation +openclaw-image-gen --prompt "test" --width 512 --height 512 --output /tmp/test.png +``` + +## Runpod Pod Access + +```bash +# SSH into pod (use Runpod MCP tools to get IP/port) +ssh -i ~/.ssh/id_runpod root@ -p + +# Common debugging +nvidia-smi +curl http://localhost:8000/health +curl http://localhost:8000/v1/models +``` + +## Where to Make Changes + +| Task | Location | +|------|----------| +| Add new GPU variant | Create new folder in `models/` with Dockerfile + entrypoint.sh | +| Change startup logic | `scripts/entrypoint-common.sh` (shared) or model-specific entrypoint | +| Add agent skill | Create folder in `skills/` with SKILL.md | +| Modify OpenClaw workspace | `config/workspace/` | +| Update CI/CD | `.github/workflows/docker-build.yml` | + +## VRAM Usage (RTX 5090 - 32GB) + +| Component | VRAM | Notes | +|-----------|------|-------| +| GLM-4.7 LLM (200k ctx) | ~22.5 GB | Model + KV cache (q8_0), `LLAMA_GPU_LAYERS=44` | +| Audio Server (TTS/STT) | ~2 GB | LFM2.5-Audio-1.5B-Q4_0 | +| Image Server (FLUX.2) | ~3-4 GB | FLUX.2-klein-4B-SDNQ-4bit-dynamic | +| **Total (all 3)** | **~29-30 GB** | **~2 GB free** | +| **LLM + Audio only** | **~26 GB** | **~6 GB free** | + +**Note**: 200k context fits with all 3 servers on 32GB when `LLAMA_PARALLEL=1` and `LLAMA_GPU_LAYERS=44`. If memory pressure occurs, reduce `MAX_MODEL_LEN` or lower `LLAMA_GPU_LAYERS`. + +## Important Notes + +- Never start/stop servers in code — user handles that +- Use Runpod MCP tools to manage pods +- RTX 5090 image gen requires: PyTorch cu128 + diffusers from git +- Model downloads go to `/workspace/huggingface/` (persisted volume) +- **CRITICAL**: LLM binaries (main branch) and Audio binaries (PR #18641) must use separate library paths. Never copy audio `.so` files to `/usr/local/lib/` - they will break LLM server. diff --git a/CLAUDE.md b/CLAUDE.md index 9eb13c2..885736e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,134 +1,3 @@ # CLAUDE.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -RunPod-optimized Docker deployment for running Clawdbot (AI coding assistant) with GLM-4.7 language models using vLLM for inference. Multiple model variants are optimized for different GPU tiers (A100, H100, B200, RTX 5090). - -## Build Commands - -```bash -# Build a specific model variant -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-flash-nvfp4-5090 models/glm47-flash-nvfp4-5090/ - -# Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest -``` - -## Local Development - -```bash -# Run vLLM server with GPU -docker-compose up vllm - -# Run with mock vLLM (no GPU required) -docker-compose --profile mock up vllm-mock - -# Run test suite -docker-compose --profile test up tests -``` - -## Testing - -```bash -# Health check -curl http://localhost:8000/health - -# List models -curl http://localhost:8000/v1/models -H "Authorization: Bearer $VLLM_API_KEY" - -# Run full test suites -./tests/test-vllm.sh # 6 tests: health, models, chat, coding, tokens, streaming -./tests/test-tool-calling.sh # Tool calling functionality -``` - -## Architecture - -``` -models/ # Model-specific Dockerfiles and configs -├── glm47-flash-awq-4bit/ # AWQ 4-bit quantized (A100 80GB) -├── glm47-flash-fp16/ # Full precision (H100/A100) -├── glm47-flash-nvfp4-5090/# NVFP4 quantized (RTX 5090) -└── glm47-reap-w4a16/ # REAP W4A16 (B200) - -scripts/ # Startup orchestration -├── entrypoint.sh # Docker entrypoint (starts vLLM + Clawdbot) -├── start-vllm.sh # vLLM server with GPU detection -└── setup-clawdbot.sh # Clawdbot installation - -config/ # Runtime configuration -├── clawdbot.json # Clawdbot config template -└── workspace/ # Agent identity and system docs -``` - -## Key Ports - -| Port | Service | -|-------|-------------------| -| 8000 | vLLM API | -| 18789 | Clawdbot Gateway | -| 18790 | Clawdbot Bridge | -| 18793 | Clawdbot Canvas | -| 22 | SSH | - -## CI/CD (GitHub Actions) - -Workflow at `.github/workflows/docker-build.yml`: -- Push to `main` → tagged `:latest` -- Push to branches → tagged `:dev-{branch}` -- Git tags (v1.0.0) → tagged with version + `:latest` -- PRs → build validation only - -Required secrets: `DOCKERHUB_USERNAME`, `DOCKERHUB_TOKEN` - -## Environment Variables - -Key variables from `.env.example`: -- `VLLM_API_KEY` - API authentication -- `MODEL_NAME` - HuggingFace model path -- `SERVED_MODEL_NAME` - Model alias for API -- `MAX_MODEL_LEN` - Context window size -- `TOOL_CALL_PARSER` - Parser type (hermes) -- `HF_TOKEN` - HuggingFace authentication (for gated models) - -## Entrypoint Flow - -1. Configure environment and detect GPU count -2. Generate `clawdbot.json` with vLLM provider settings -3. Start vLLM server in background -4. Wait for health check (max 5 minutes) -5. Start Clawdbot gateway -6. Handle graceful shutdown on SIGTERM/SIGINT - -## RunPod SSH Access - -Always use the local RunPod SSH key when connecting to pods: - -```bash -ssh -i ~/.ssh/id_runpod root@ -p -``` - -## Debugging on RunPod Pods - -When SSH'd into a RunPod pod, check these locations for logs: - -```bash -# vLLM logs (runs in foreground, check container logs in RunPod UI) -# Or if debugging after SSH: -ps aux | grep vllm # Check if vLLM is running -nvidia-smi # Check GPU memory usage - -# System logs -journalctl -u ssh # SSH service logs -dmesg | tail -50 # Kernel messages (CUDA errors appear here) - -# Container startup logs visible in RunPod web UI under "Logs" tab - -# Common debugging commands -curl http://localhost:8000/health # vLLM health check -curl http://localhost:8000/v1/models # List loaded models -``` +Agents **MUST** read [AGENTS.md](./AGENTS.md) to get context of the project. diff --git a/Dockerfile b/Dockerfile index ec61858..1baf230 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,15 @@ -# Clawdbot + vLLM Docker Image for RunPod +# OpenClaw + vLLM Docker Image for Runpod # Pre-configured with everything needed for AI coding assistant FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 -LABEL maintainer="RunPod Clawdbot" -LABEL description="Clawdbot AI assistant with vLLM for local LLM inference" +LABEL maintainer="Runpod OpenClaw Stack" +LABEL description="OpenClaw Stack with vLLM for local LLM inference" # Avoid interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -27,32 +28,39 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ # Install vLLM RUN pip install --no-cache-dir vllm -# Install Clawdbot -RUN npm install -g clawdbot@latest +# Image generation dependencies (SDNQ + Diffusers) +RUN python3 -m pip install --no-cache-dir sdnq diffusers transformers accelerate safetensors + +# Install OpenClaw +RUN npm install -g openclaw@latest # Create workspace directories RUN mkdir -p /workspace/huggingface \ - /workspace/.clawdbot \ - /workspace/clawd \ + /workspace/.openclaw \ + /workspace/openclaw \ /workspace/scripts -# Copy startup script +# Copy startup scripts + skills + CLI +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh COPY scripts/entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +RUN chmod +x /entrypoint.sh /usr/local/bin/openclaw-image-gen -# Copy default Clawdbot workspace files -COPY config/workspace/ /workspace/clawd/ +# Copy default OpenClaw workspace files +COPY config/workspace/ /workspace/openclaw/ # Expose ports # 8000 - vLLM API -# 18789 - Clawdbot Gateway WebSocket -# 18790 - Clawdbot Bridge -# 18793 - Clawdbot Canvas -# 22 - SSH (RunPod adds this) +# 18789 - OpenClaw Gateway WebSocket +# 18790 - OpenClaw Bridge +# 18793 - OpenClaw Canvas +# 22 - SSH (Runpod adds this) EXPOSE 8000 18789 18790 18793 # Environment variables (can be overridden at runtime) ENV VLLM_API_KEY=changeme +ENV OPENCLAW_WEB_PASSWORD=changeme ENV MODEL_NAME=Qwen/Qwen2.5-Coder-7B-Instruct ENV SERVED_MODEL_NAME=local-coder ENV MAX_MODEL_LEN=16384 diff --git a/README.md b/README.md index 14e4923..b09fbd5 100644 --- a/README.md +++ b/README.md @@ -1,292 +1,107 @@ -# Clawdbot on RunPod with vLLM - -Run Clawdbot with GLM-4.7 and other open-source coding models on RunPod using vLLM. Chat with your AI assistant via Telegram! - -## Model Comparison - -| Model | GPU | VRAM | Cost/hr | Context | Folder | -|-------|-----|------|---------|---------|--------| -| **Base (Qwen2.5-7B)** | Any | 16GB | $0.50 | 16k | `Dockerfile` | -| **GLM-4.7-Flash FP16** | H100/A100 80GB | 56GB | $1.20-1.99 | 32k-64k | `models/glm47-flash-fp16/` | -| **GLM-4.7-Flash AWQ 4-bit** | A100 80GB | 71GB | $1.19 | 114k | `models/glm47-flash-awq-4bit/` | -| **GLM-4.7-REAP W4A16** | B200 | 108GB | $5.19 | 32k | `models/glm47-reap-w4a16/` | - -### Recommended: GLM-4.7-Flash AWQ 4-bit - -Best value option with full 114k context window at $1.19/hr on A100 80GB. - -## Quick Start - -### 1. Choose Your Model - -```bash -# GLM-4.7-Flash AWQ 4-bit (Best value, A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-awq-4bit:latest - -# GLM-4.7-Flash FP16 (Full precision, H100/A100 80GB) -IMAGE=yourusername/clawdbot-glm47-flash-fp16:latest - -# GLM-4.7-REAP W4A16 (High-end, B200) -IMAGE=yourusername/clawdbot-glm47-reap-w4a16:latest - -# Base (Qwen2.5-7B, any GPU) -IMAGE=yourusername/clawdbot-vllm:latest -``` - -### 2. Create RunPod Pod - -- **Image**: Your chosen image from above -- **GPU**: Match model requirements -- **Volume**: 150GB at `/workspace` -- **Container Disk**: 50-100GB (depending on model) -- **Ports**: `8000/http, 18789/http, 22/tcp` - -### 3. Set Environment Variables - -```bash -VLLM_API_KEY=your-secure-key # Required -TELEGRAM_BOT_TOKEN=your-telegram-token # Optional -GITHUB_TOKEN=ghp_xxx # Optional -``` - -### 4. Test It - +# OpenClaw Stack on Runpod + +OpenClaw Stack is a self-contained stack that includes an LLM plus image/audio services and the OpenClaw UI, so you can run a fully self-contained assistant on Runpod (or any GPU host). Each model variant has its own folder under `models/` with a dedicated README and startup script. + +## Primary release (published) + +| Image tag | LLM | Audio | Image | GPU target | Context | Status | +|----------|-----|-------|-------|------------|---------|--------| +| `openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf` | [unsloth/GLM-4.7-Flash-GGUF](https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF) (Q4_K_M) | [LiquidAI/LFM2.5-Audio-1.5B-GGUF](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-GGUF) | [Disty0/FLUX.2-klein-4B-SDNQ-4bit-dynamic](https://huggingface.co/Disty0/FLUX.2-klein-4B-SDNQ-4bit-dynamic) | RTX 5090 32GB | 150k (default) | Published | + +## Testing images (not published) + +| Image tag | Backend | LLM weights | GPU target | Status | Notes | +|----------|---------|-------------|------------|--------|-------| +| `openclaw-stack-glm4.7-flash-awq-4bit` | vLLM | [cyankiwi/GLM-4.7-Flash-AWQ-4bit](https://huggingface.co/cyankiwi/GLM-4.7-Flash-AWQ-4bit) | A100 80GB | Testing | Best value on A100; long context (LLM-only) | +| `openclaw-stack-glm4.7-flash-fp16` | vLLM | [zai-org/GLM-4.7-Flash](https://huggingface.co/zai-org/GLM-4.7-Flash) | H100/A100 80GB | Testing | Full precision (LLM-only) | +| `openclaw-stack-glm4.7-flash-nvfp4-5090` | vLLM | [GadflyII/GLM-4.7-Flash-NVFP4](https://huggingface.co/GadflyII/GLM-4.7-Flash-NVFP4) | RTX 5090 32GB | Not working | vLLM MLA issues on Blackwell (LLM-only) | +| `openclaw-stack-glm4.7-reap-w4a16` | vLLM | [0xSero/GLM-4.7-REAP-40-W4A16](https://huggingface.co/0xSero/GLM-4.7-REAP-40-W4A16) | B200 180GB | Testing | High-end B200 (LLM-only) | +| `openclaw-stack-vllm` | vLLM | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | 16GB+ | Testing | Base image (LLM-only) | + +Notes: +- Only the primary image is published right now. +- Context values are defaults; some variants allow tuning via `MAX_MODEL_LEN`. +- NVFP4 status details live in `models/glm47-flash-nvfp4-5090/ISSUES.md`. + +## Deployment on Runpod + +1. **Pick an image** from the table above. +2. **Create a Runpod pod**: + - Volume: 30GB minimum at `/workspace` (increase for vLLM models) + - Ports: `8000/http, 8080/http, 18789/http, 22/tcp` +3. **Set environment variables**: + - `VLLM_API_KEY` (for vLLM variants) + - `OPENCLAW_WEB_PASSWORD` (web UI token) + - `HF_TOKEN` (optional, faster downloads) + - `TELEGRAM_BOT_TOKEN` (optional) + - For GGUF + llama.cpp: use `LLAMA_API_KEY` instead of `VLLM_API_KEY` +4. **Open the Control UI** (use your Runpod pod ID): + - `https://-18789.proxy.runpod.net/?token=` +5. **Open the Media UI (proxy)**: + - `https://-8080.proxy.runpod.net` +6. **Approve device pairing** (first time only): + - When you see “pairing required”, SSH into the pod and run: + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw devices list --json` + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw devices approve ` + - Pairing requests expire quickly; refresh the Web UI if it disappears. +7. **Health check**: ```bash -# Health check curl http://localhost:8000/health - -# Chat completion -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "Hello!"}] - }' ``` -## Docker Images - -Images are automatically built and pushed to Docker Hub via GitHub Actions. +## Folder map -| Image | Description | -|-------|-------------| -| `clawdbot-glm47-flash-awq-4bit` | GLM-4.7-Flash AWQ 4-bit for A100 80GB | -| `clawdbot-glm47-flash-fp16` | GLM-4.7-Flash FP16 for H100/A100 80GB | -| `clawdbot-glm47-reap-w4a16` | GLM-4.7-REAP W4A16 for B200 | -| `clawdbot-vllm` | Base image with Qwen2.5-7B | +| Folder | Purpose | +|--------|---------| +| `models/` | Model-specific Dockerfiles + entrypoints | +| `scripts/` | Base entrypoint + setup helpers | +| `templates/` | Runpod template JSONs | +| `config/` | OpenClaw config templates | -## Project Structure +## Port map (published image) -``` -runpod-clawdbot/ -├── README.md # This file -├── .github/ -│ └── workflows/ -│ └── docker-build.yml # Build & push to Docker Hub -│ -├── models/ -│ ├── glm47-flash-fp16/ # Full precision FP16 (H100/A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ ├── glm47-flash-awq-4bit/ # AWQ 4-bit quantized (A100 80GB) -│ │ ├── README.md -│ │ ├── Dockerfile -│ │ └── entrypoint.sh -│ │ -│ └── glm47-reap-w4a16/ # Pruned W4A16 quantized (B200) -│ ├── README.md -│ ├── Dockerfile -│ └── entrypoint.sh -│ -├── scripts/ -│ ├── setup-clawdbot.sh -│ └── start-vllm.sh -│ -├── config/ -│ ├── clawdbot.json -│ └── workspace/ -│ -├── templates/ -│ └── clawdbot-vllm.json -│ -├── tests/ -│ ├── test-vllm.sh -│ └── test-tool-calling.sh -│ -├── Dockerfile # Base image (Qwen2.5-7B) -├── docker-compose.yml -└── .env.example -``` +- `8000/http` — LLM API (OpenAI-compatible) +- `8080/http` — Media proxy + UI (image/audio links) +- `18789/http` — OpenClaw Control UI +- `22/tcp` — SSH -## GitHub Actions +Note: audio/image servers run on `8001/8002` **internally only** and should not be exposed. -Images are built automatically on: -- Push to `main` → tagged as `:latest` -- Push to other branches → tagged as `:dev-{branch-name}` (e.g., `:dev-feature-xyz`) -- Push git tag (e.g., `v1.0.0`) → tagged as `:v1.0.0` + `:latest` -- Pull requests → build only, no push (validation) -- Manual workflow dispatch → select specific model +## Image naming + tags -### Required Setup +We publish one image per variant under: -**Secrets** (Repository → Settings → Secrets → Actions): +- `openclaw-stack---flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf` (full stack) +- `openclaw-stack--` (LLM-only testing images) -| Secret | Description | -|--------|-------------| -| `DOCKERHUB_USERNAME` | Your Docker Hub username | -| `DOCKERHUB_TOKEN` | Docker Hub access token (not password) | +Dots are valid in Docker repository names and tags, so we keep model versions like `glm4.7`, `flux.2`, and `lfm2.5`. -**Variables** (Repository → Settings → Variables → Actions): +Current published image: -| Variable | Description | -|----------|-------------| -| `DOCKERHUB_REPO` | (Optional) Custom repo name, defaults to username | +- `openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf` -### Manual Build +Tags: -```bash -# Build locally -docker build -t clawdbot-glm47-flash-awq-4bit models/glm47-flash-awq-4bit/ -docker build -t clawdbot-glm47-flash-fp16 models/glm47-flash-fp16/ -docker build -t clawdbot-glm47-reap-w4a16 models/glm47-reap-w4a16/ - -# Push to Docker Hub -docker tag clawdbot-glm47-flash-awq-4bit yourusername/clawdbot-glm47-flash-awq-4bit:latest -docker push yourusername/clawdbot-glm47-flash-awq-4bit:latest -``` +- `:latest` for main branch +- `:` for branch builds +- `:vX.Y.Z` for version tags -## Configuration +## Build + release -### Environment Variables +Images build on: +- Pull requests -> tag = branch name (slashes -> `-`) +- Push to `main` -> `:latest` +- Git tag (e.g., `v1.0.0`) -> `:v1.0.0` + `:latest` -| Variable | Default | Description | -|----------|---------|-------------| -| `VLLM_API_KEY` | `changeme` | API key for vLLM authentication | -| `MODEL_NAME` | Model-specific | HuggingFace model ID | -| `SERVED_MODEL_NAME` | `glm-4.7-flash` | Model name in API responses | -| `MAX_MODEL_LEN` | Auto-detected | Maximum context length | -| `GPU_MEMORY_UTILIZATION` | `0.92` | GPU memory to use | -| `TELEGRAM_BOT_TOKEN` | | Telegram bot token from @BotFather | -| `GITHUB_TOKEN` | | GitHub PAT for git/gh operations | +## Known issues -### Clawdbot Configuration - -Config is auto-generated at `/workspace/.clawdbot/clawdbot.json`: - -```json -{ - "models": { - "providers": { - "local-vllm": { - "baseUrl": "http://localhost:8000/v1", - "apiKey": "your-vllm-api-key", - "api": "openai-completions" - } - } - } -} -``` - -## Telegram Setup - -1. Create a bot with [@BotFather](https://t.me/BotFather) -2. Copy the bot token -3. Set `TELEGRAM_BOT_TOKEN` environment variable -4. Start or restart the pod -5. Message your bot on Telegram! - -## GitHub Authentication - -For git operations inside the container: - -1. Create a [GitHub Personal Access Token](https://github.com/settings/tokens) -2. Select scopes: `repo`, `read:org`, `workflow` -3. Set `GITHUB_TOKEN` environment variable -4. Token is auto-configured on startup - -## Testing - -```bash -# Basic health check -curl http://localhost:8000/health - -# List models -curl http://localhost:8000/v1/models \ - -H "Authorization: Bearer $VLLM_API_KEY" - -# Tool calling test -curl http://localhost:8000/v1/chat/completions \ - -H "Authorization: Bearer $VLLM_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "glm-4.7-flash", - "messages": [{"role": "user", "content": "What is 2+2?"}], - "tools": [{ - "type": "function", - "function": { - "name": "calculate", - "description": "Perform a calculation", - "parameters": { - "type": "object", - "properties": { - "expression": {"type": "string"} - } - } - } - }] - }' -``` - -## Troubleshooting - -### vLLM doesn't start -- Check GPU availability: `nvidia-smi` -- Verify VRAM is sufficient for model -- Check logs: `journalctl -u vllm` or container logs - -### Model loading is slow -- First load downloads model from HuggingFace (can be 18-60GB) -- Use network volume to persist model across restarts -- AWQ 4-bit model (18GB) loads faster than FP16 (31GB) - -### Tool calling not working -- Verify `--enable-auto-tool-choice` is set -- Check tool parser matches model (`glm47` for GLM-4.7) -- Run test script: `./tests/test-tool-calling.sh` - -### Orphaned GPU memory -- If vLLM crashes, GPU memory may stay allocated -- Restart the pod to clear memory -- Check with: `nvidia-smi` - -### SSH port changes -- RunPod assigns random SSH ports after restart -- Check port via RunPod console or API -- Use RunPod web terminal as alternative - -## Known Issues - -1. **GGUF not supported** - vLLM doesn't support GLM-4.7's GGUF format. Use AWQ. -2. **Container disk doesn't persist** - Only `/workspace` survives restarts. -3. **B200 requires CUDA 13.1+** - The REAP image includes this automatically. - -## Cost Optimization - -1. **Use AWQ 4-bit** - Same model, lower VRAM, cheaper GPU ($1.19 vs $1.99/hr) -2. **Stop pods when idle** - RunPod charges per minute -3. **Use network volumes** - Avoid re-downloading models -4. **Consider spot instances** - Up to 80% cheaper +- **NVFP4 on RTX 5090** is not working in vLLM due to MLA attention shape issues and missing Blackwell kernel support. See `models/glm47-flash-nvfp4-5090/ISSUES.md`. +- **GGUF is not supported in vLLM** (use llama.cpp image). +- **Container disk doesn't persist**; only `/workspace` survives restarts. ## Resources -- [Clawdbot Documentation](https://github.com/clawdbot/clawdbot) -- [vLLM Documentation](https://docs.vllm.ai/) -- [RunPod Documentation](https://docs.runpod.io/) -- [GLM-4.7 Announcement](https://z.ai/blog/glm-4.7) - -## License - -MIT +- OpenClaw Stack: https://github.com/runpod-workers/openclaw-stack +- OpenClaw: https://github.com/openclaw/openclaw +- vLLM: https://docs.vllm.ai/ +- Runpod: https://docs.runpod.io/ diff --git a/config/clawdbot.json b/config/openclaw.json similarity index 85% rename from config/clawdbot.json rename to config/openclaw.json index a344968..6c7100b 100644 --- a/config/clawdbot.json +++ b/config/openclaw.json @@ -1,10 +1,9 @@ { - "$schema": "https://clawdbot.com/schema/config.json", - "_comment": "Clawdbot configuration for RunPod vLLM integration", + "_comment": "OpenClaw configuration for Runpod vLLM integration", "_instructions": [ - "Replace with your RunPod pod ID", + "Replace with your Runpod pod ID", "Replace with your vLLM API key", - "Adjust model settings based on your tier (see templates/clawdbot-vllm.json)" + "Adjust model settings based on your tier (see templates/openclaw-vllm.json)" ], "agents": { diff --git a/config/workspace/AGENTS.md b/config/workspace/AGENTS.md index f3d8d6e..65d05a4 100644 --- a/config/workspace/AGENTS.md +++ b/config/workspace/AGENTS.md @@ -1,4 +1,4 @@ -# AGENTS.md - Clawdbot Workspace +# AGENTS.md - OpenClaw Workspace This folder is the assistant's working directory. @@ -7,6 +7,18 @@ This folder is the assistant's working directory. - Your agent identity lives in IDENTITY.md. - Your profile lives in USER.md. +## Skills + +### Image Generation +Generate images using FLUX.2 Klein SDNQ (4-bit quantized, runs on RTX 5090). + +```bash +openclaw-image-gen --prompt "" --width 1024 --height 1024 --output /workspace/openclaw/images/output.png +openclaw-image-gen --prompt "" --aspect 16:9 --output /workspace/openclaw/images/output.png +``` + +Default to 1024x1024 if user doesn't specify size. Images saved to `/workspace/openclaw/images/`. + ## Safety defaults - Don't exfiltrate secrets or private data. - Don't run destructive commands unless explicitly asked. @@ -16,6 +28,3 @@ This folder is the assistant's working directory. - Keep a short daily log at memory/YYYY-MM-DD.md (create memory/ if needed). - On session start, read today + yesterday if present. - Capture durable facts, preferences, and decisions; avoid secrets. - -## Customize -- Add your preferred style, rules, and "memory" here. diff --git a/config/workspace/IDENTITY.md b/config/workspace/IDENTITY.md index 547ff69..b6757da 100644 --- a/config/workspace/IDENTITY.md +++ b/config/workspace/IDENTITY.md @@ -1,6 +1,6 @@ # Identity -You are a helpful AI coding assistant running on RunPod with a local LLM. +You are a helpful OpenClaw AI coding assistant running on Runpod with a local LLM. You can help with: - Writing and debugging code - Explaining programming concepts diff --git a/docker-compose.yml b/docker-compose.yml index d72968a..073c854 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ -# docker-compose.yml - Local development setup for Clawdbot + vLLM -# Note: This is for local testing only. For production, use RunPod. +# docker-compose.yml - Local development setup for OpenClaw + vLLM +# Note: This is for local testing only. For production, use Runpod. version: "3.8" @@ -7,7 +7,7 @@ services: # vLLM Server - requires NVIDIA GPU with sufficient VRAM vllm: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm + container_name: openclaw-vllm runtime: nvidia deploy: resources: @@ -46,7 +46,7 @@ services: # Mock vLLM for testing without GPU (uses smaller model) vllm-mock: image: vllm/vllm-openai:v0.12.0 - container_name: clawdbot-vllm-mock + container_name: openclaw-vllm-mock profiles: ["mock"] ports: - "8001:8000" @@ -64,7 +64,7 @@ services: # Test runner tests: image: curlimages/curl:latest - container_name: clawdbot-tests + container_name: openclaw-tests profiles: ["test"] depends_on: vllm: @@ -84,4 +84,4 @@ volumes: networks: default: - name: clawdbot-network + name: openclaw-network diff --git a/docs/images/flux2-klein-1024.png b/docs/images/flux2-klein-1024.png new file mode 100644 index 0000000..534e4cf Binary files /dev/null and b/docs/images/flux2-klein-1024.png differ diff --git a/docs/images/test-robot.png b/docs/images/test-robot.png new file mode 100644 index 0000000..88a0d84 Binary files /dev/null and b/docs/images/test-robot.png differ diff --git a/docs/openclaw-migration-plan.md b/docs/openclaw-migration-plan.md new file mode 100644 index 0000000..f34367a --- /dev/null +++ b/docs/openclaw-migration-plan.md @@ -0,0 +1,75 @@ +# OpenClaw Migration Plan (Runpod Images) + +## Background & upstream signals + +From the upstream OpenClaw project: +- The repository is now `openclaw/openclaw`, and the CLI shown in the README is `openclaw`. +- Install guidance includes `npm install -g openclaw@latest` and the one‑liner `curl -fsSL https://openclaw.ai/install.sh | bash`. +- The OpenClaw README documents new default paths: + - Config file: `~/.openclaw/openclaw.json` + - Workspace root: `~/.openclaw/workspace` + +Sources: +- https://github.com/openclaw/openclaw (README) +- https://openclaw.ai (installer + quick start) + +## Repo scan findings (current state) + +The repo still referenced legacy names and paths in many places before migration: +- Dockerfiles: base image installs, labels, ENVs, entrypoint banners +- Entrypoints: legacy CLI names and legacy state dir paths +- Docs: `README.md`, model READMEs, `docs/video-script.md` +- Templates: `templates/runpod-template.json`, `templates/openclaw-vllm.json` +- Config: `config/openclaw.json`, `config/workspace/IDENTITY.md` +- Scripts: `scripts/entrypoint.sh`, `scripts/setup-openclaw.sh` +- Env examples: `.env.example` + +No `OpenClaw` references exist yet in the repo. + +## Decisions (no legacy) + +1. **Package + binary naming** + - Install `openclaw@latest`. + - Use `openclaw` CLI only (no legacy binaries or symlinks). + +2. **State directory** + - Use `/workspace/.openclaw` as the only state directory in containers. + +3. **Config file name** + - Use `openclaw.json` only. + +## Migration plan (proposed steps) + +### 1) Dependency + CLI alignment +- Update Dockerfiles to install `openclaw@latest`. +- Use `openclaw` as the only CLI. + +### 2) State dir and workspace setup +- Use `/workspace/.openclaw` for all state. +- Create expected subdirectories (`agents/main/sessions`, `credentials`) and enforce permissions. + +### 3) Config generation + naming +- Generate `openclaw.json` with OpenAI‑compatible provider settings for the local model. +- Run `openclaw doctor --fix` to auto‑migrate schema after config write. + +### 4) Rename commands and docs +- Update all scripts/entrypoints to call `openclaw`. +- Replace docs and templates to use “OpenClaw” branding and new paths. +- Update README tables and sample image tags if the Docker repo/name changes. + +### 5) Environment variables and config keys +- Standardize on `OPENCLAW_STATE_DIR`, `OPENCLAW_WORKSPACE`, `OPENCLAW_WEB_PASSWORD`. +- Reflect in `.env.example` and Runpod templates. + +### 6) Validation +- Build images for each model variant. +- Smoke test: + - `openclaw doctor --fix` works + - `openclaw gateway` starts +- Web UI reachable via Runpod proxy + - Model inference via `/v1/chat/completions` +- Confirm the state dir and workspace are created under `/workspace/.openclaw`. + +## Open questions + +- Should image tags be renamed immediately or keep existing tags for continuity? diff --git a/docs/video-script.md b/docs/video-script.md new file mode 100644 index 0000000..7108106 --- /dev/null +++ b/docs/video-script.md @@ -0,0 +1,175 @@ +# Video Script: OpenClaw fully self-hosted on RTX 5090 (GLM‑4.7‑Flash GGUF + llama.cpp) + +This doc turns the repo learnings into a demo-first video script for two audiences: + +- **How to set it up and use it** (first half) +- **How it works** (later), with **vLLM/NVFP4** as a short end note + +--- + +## Benchmark slide: where to get the “graph” + the numbers (Artificial Analysis) + +### Option A (fastest): screenshot Artificial Analysis model pages + +Use these pages and grab the **Artificial Analysis Intelligence Index** number shown on each page: + +- **GLM-4.7-Flash (Reasoning)**: 30 — +- **GLM-4.7 (Reasoning)**: 42 — +- **GPT-5.2 (xhigh)**: 51 — +- **GPT-5.2 Codex (xhigh)**: 48 — +- **Claude Opus 4.5 (Reasoning)**: 50 — +- **Claude 4.5 Sonnet (Reasoning)**: 42 — + +If you want a single AA page on screen as a citation backdrop, use a comparison page: + +- **GLM‑4.7 vs GPT‑5.2**: + +### Option B (cleanest): create your own bar chart, cite AA + +- Build a simple bar chart using the numbers above. +- Add a footer like: **Source: Artificial Analysis (Intelligence Index v4.0), accessed Jan 2026**. + +**Note on “Composer 1”**: The AA model page for “Composer 1” wasn’t reliably fetchable during prep (timeouts). If you want “Composer 1” in the slide, verify its page exists in AA and grab the index number from there; otherwise swap it for a different widely-known coding model that AA lists reliably. + +--- + +## Video script (demo-first; usage first; deep technical notes last) + +### 0:00–0:25 — Cold open / hook (call out fake “self-hosted”) + +**On screen**: quick montage: Telegram/WhatsApp agent convo → “Powered by Claude API” / billing pain → cut to local terminal + GPU. + +**You say**: +People call these “self-hosted agents”… but then the brain is still a paid API. If your agent stops working the second Claude is down or your token budget runs out, that’s not self-hosted. + +Today I’ll show a fully self-contained OpenClaw setup: local model, local inference, agent UI—no external model API needed. + +### 0:25–0:55 — What you’ll build + requirements (set expectations) + +**On screen**: one slide: “OpenClaw + GLM‑4.7‑Flash + llama.cpp (OpenAI API)”. + +**You say**: +We’re running GLM‑4.7‑Flash locally via llama.cpp and pointing OpenClaw at it using an OpenAI-compatible API. + +If you’ve got an RTX 5090 (32GB), you can run the full 200k context. With 24GB, it can still work, just with a reduced context window—because the model weights alone are ~17GB. + +### 0:55–2:10 — Quick demo first (prove it works before you explain anything) + +**On screen**: +- Open OpenClaw web UI +- Show the agent doing a quick code task (small repo change / explanation) +- Show a raw API call to the model (`/v1/chat/completions`) + +**You say**: +Let me prove it’s real before we talk architecture. This is OpenClaw running against a model in the same environment. No Claude key. No OpenAI key. + +If you’re using Telegram integration, the same idea applies: messages go to a local model, not a hosted API. + +### 2:10–3:40 — Two ways to run it: local GPU vs Runpod (choose your path) + +**On screen**: split screen: local machine vs Runpod pod. + +**You say**: +You’ve got two options: + +- Local: lowest latency and everything stays on your machine. +- Runpod: if you don’t have a 5090—or you don’t want your workstation pinned all day—you can still keep it self-contained. You pay for compute time, not per-token API calls. + +### 3:40–5:30 — Runpod setup walkthrough (the “do this, then this” part) + +**On screen**: Runpod UI checklist. + +**You say (walkthrough voice)**: +Here’s the setup that actually matters: + +- **Image**: `runpod/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:latest` +- **Ports**: `8000/http` (llama.cpp), `8080/http` (media proxy UI), `18789/http` (OpenClaw UI), `22/tcp` (SSH) +- **Network volume mounted to `/workspace`** (non-negotiable; model is ~17GB and you want persistence across restarts) +- **Environment variables**: + - `LLAMA_API_KEY` (protects the model API) + - `OPENCLAW_WEB_PASSWORD` (protects the web UI token) + - optionally `TELEGRAM_BOT_TOKEN` (Telegram) + +### 5:30–6:40 — Health check + raw chat completion (OpenAI-compat API) + +**On screen**: terminal showing `curl` to `/health` then `/v1/chat/completions`. + +**You say**: +llama.cpp runs an OpenAI-compatible API. That’s the trick: OpenClaw doesn’t need to know it’s llama.cpp. + +**Show (copy/paste):** + +- Health check: `GET /health` on `:8000` +- Chat completion: `POST /v1/chat/completions` with `Authorization: Bearer $LLAMA_API_KEY` and `model: "glm-4.7-flash"` + +### 6:40–8:10 — The “gotcha”: first-time device pairing (and why it’s good) + +**On screen**: web UI says “pairing required” → SSH → approve device → refresh UI. + +**You say**: +First time you open the web UI, it won’t just let any browser control your agent. You must approve the device. + +**On screen (commands):** + +- List requests: + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram` +- Approve: + - `OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ` + +**You say**: +This is the right default for something that can run commands and touch repos. + +### 8:10–9:10 — Benchmark slide (short, no methodology detour) + +**On screen**: your bar chart + tiny citation footer (Artificial Analysis URLs). + +**You say**: +Why GLM‑4.7‑Flash? Because it’s an open-weights model with serious benchmark performance. On Artificial Analysis’ Intelligence Index, you can see where it sits relative to the usual suspects. + +Quick callout list (keep it fast): + +- GLM‑4.7: 42 +- GLM‑4.7‑Flash: 30 +- GPT‑5.2: 51 +- GPT‑5.2 Codex: 48 +- Claude Opus 4.5 (Reasoning): 50 +- Claude 4.5 Sonnet (Reasoning): 42 + +### 9:10–10:45 — How it works (high level, but concrete) + +**On screen**: simple block diagram. + +**You say**: +Architecture is simple: + +- llama.cpp (`llama-server`) hosts the model and exposes OpenAI-style endpoints on `:8000` +- OpenClaw points its provider config at `http://localhost:8000/v1` +- The container stores everything under `/workspace` so restarts don’t wipe model + state + +Then the “why it fits”: + +We’re running a GGUF quantization (Q4_K_M) and using Q8 KV cache quantization—this is what makes 200k context feasible on a 32GB card. + +### 10:45–12:00 — Ending note: what happened with vLLM/NVFP4 (keep it tight) + +**On screen**: one screenshot of the core error + a short bullet list. + +**You say**: +We tried the obvious path first: vLLM with NVFP4 for Blackwell. But as of Jan 2026, it’s blocked for GLM‑4.7 on the 5090. + +Root cause: GLM‑4.7’s MLA attention isn’t handled correctly in vLLM’s fallback path, leading to an attention output dimension mismatch. + +When those pieces land upstream (vLLM + cuDNN support), we’ll revisit and benchmark it. + +**On screen takeaway**: +Today’s working answer: GGUF + llama.cpp. + +--- + +## Suggested on-screen callouts (quick checklist) + +- **Ports**: `8000` (model API), `18789` (web UI), `22` (SSH) +- **Persistence**: “Network volume mounted to `/workspace`” +- **Security**: “API key for model + web token + device pairing” +- **Performance tagline (repo docs)**: “~175 tok/s, ~28GB VRAM, 200k context on RTX 5090” + diff --git a/models/glm47-flash-awq-4bit/Dockerfile b/models/glm47-flash-awq-4bit/Dockerfile index d8bda90..a2f8b6e 100644 --- a/models/glm47-flash-awq-4bit/Dockerfile +++ b/models/glm47-flash-awq-4bit/Dockerfile @@ -22,6 +22,9 @@ RUN VLLM_WHEEL="vllm-0.14.0rc2.dev187+g22375f8d1-cp38-abi3-manylinux_2_31_x86_64 pip install git+https://github.com/huggingface/transformers.git && \ rm "/tmp/${VLLM_WHEEL}" +# Image generation dependencies (SDNQ + Diffusers) +RUN python3 -m pip install --no-cache-dir sdnq diffusers transformers accelerate safetensors + # Install Node.js 22.x and GitHub CLI in single layer to reduce image size RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | \ @@ -34,8 +37,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force # Environment defaults @@ -45,12 +48,15 @@ ENV SERVED_MODEL_NAME="glm-4.7-flash" ENV MAX_MODEL_LEN="114688" ENV VLLM_API_KEY="changeme" -# Clawdbot workspace -ENV CLAWDBOT_HOME="/workspace/.clawdbot" -ENV CLAWDBOT_WORKSPACE="/workspace/clawd" +# OpenClaw workspace +ENV OPENCLAW_STATE_DIR="/workspace/.openclaw" +ENV OPENCLAW_WORKSPACE="/workspace/openclaw" -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh +COPY models/glm47-flash-awq-4bit/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh /usr/local/bin/openclaw-image-gen EXPOSE 8000 18789 22 ENTRYPOINT ["/entrypoint.sh"] diff --git a/models/glm47-flash-awq-4bit/README.md b/models/glm47-flash-awq-4bit/README.md index dbd68e0..32fd6f4 100644 --- a/models/glm47-flash-awq-4bit/README.md +++ b/models/glm47-flash-awq-4bit/README.md @@ -19,10 +19,10 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. ## Quick Start -### 1. Create RunPod Pod +### 1. Create Runpod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-awq-4bit:latest` +- **Image**: `runpod/openclaw-stack-glm4.7-flash-awq-4bit:latest` - **GPU**: 1x A100 80GB - **Volume**: 150GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -37,7 +37,7 @@ Quantized version of GLM-4.7-Flash for **A100 80GB** GPUs. Best value for GLM-4. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token for chat integration | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `changeme` | Password for web UI | ### 3. Access Points @@ -46,7 +46,7 @@ After the pod starts (~90 seconds for cached starts, longer for first start): | Service | URL | Auth | |---------|-----|------| | vLLM API | `https://-8000.proxy.runpod.net` | Bearer token (`VLLM_API_KEY`) | -| Web UI | `https://-18789.proxy.runpod.net` | Password (`CLAWDBOT_WEB_PASSWORD`) | +| Web UI | `https://-18789.proxy.runpod.net` | Password (`OPENCLAW_WEB_PASSWORD`) | | SSH | `ssh root@ -p ` | SSH key | ### 4. Test It @@ -87,12 +87,12 @@ All persistent data is stored on the network volume `/workspace`: ├── .cache/ │ ├── vllm/ # CUDA graphs & torch compile cache (~400MB) │ └── huggingface/ # HF cache -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # Agent state │ └── telegram/ # Telegram session ├── .config/gh/ # GitHub CLI config -└── clawd/ # Claude Code workspace +└── openclaw/ # Workspace ``` **Startup times:** @@ -101,9 +101,9 @@ All persistent data is stored on the network volume `/workspace`: ## Web UI -Access the Clawdbot web UI at `https://-18789.proxy.runpod.net`: +Access the OpenClaw web UI at `https://-18789.proxy.runpod.net`: -1. Enter the password (default: `clawdbot` or your `CLAWDBOT_WEB_PASSWORD`) +1. Enter the password (default: `changeme` or your `OPENCLAW_WEB_PASSWORD`) 2. Chat with the model through the web interface 3. No CLI access required @@ -175,7 +175,7 @@ The entrypoint is optimized for A100 80GB: ## Known Issues -1. **SSH port changes after restart** - Check the new SSH port via RunPod dashboard +1. **SSH port changes after restart** - Check the new SSH port via Runpod dashboard 2. **Orphaned GPU processes** - If vLLM crashes, restart the pod to free GPU memory 3. **GGUF not supported** - vLLM doesn't support GLM-4.7's GGUF format; use AWQ 4. **Container disk doesn't persist** - Only `/workspace` survives restarts @@ -196,7 +196,7 @@ pkill -9 -f vllm **Web UI won't connect:** - Ensure port 18789 is exposed -- Check that gateway is running: `ps aux | grep clawdbot` +- Check that gateway is running: `ps aux | grep openclaw` - Verify bind mode is `lan` in config **Model download fails:** diff --git a/models/glm47-flash-awq-4bit/entrypoint.sh b/models/glm47-flash-awq-4bit/entrypoint.sh index 8bcb18c..8adaa5f 100644 --- a/models/glm47-flash-awq-4bit/entrypoint.sh +++ b/models/glm47-flash-awq-4bit/entrypoint.sh @@ -1,18 +1,14 @@ #!/bin/bash set -e +source /opt/openclaw/entrypoint-common.sh echo "================================================" echo " GLM-4.7-Flash AWQ (4-bit) on A100 80GB" echo "================================================" -# RunPod's /start.sh handles SSH setup using PUBLIC_KEY env var +# Runpod's /start.sh handles SSH setup using PUBLIC_KEY env var # It ends with 'sleep infinity' so we run it in background -if [ -f /start.sh ]; then - echo "Running RunPod start script (background)..." - /start.sh & - # Give it a moment to set up SSH - sleep 5 -fi +oc_start_runpod_ssh # Persist vLLM cache (CUDA graphs, torch compile) on network storage # This speeds up subsequent pod starts by reusing cached compiled kernels @@ -48,11 +44,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-114688}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI token/password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" + +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -99,11 +98,13 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -112,8 +113,8 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + # Create a minimal config - openclaw doctor will fix any missing fields + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -136,25 +137,32 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 98304 + "contextTokens": 98304, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true +oc_sync_gateway_auth "password" # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -174,22 +182,14 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" -echo "================================================" -echo " Ready!" -echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" -echo " Model: $SERVED_MODEL_NAME" -echo " Context: $MAX_MODEL_LEN tokens" -echo "================================================" +oc_print_ready "vLLM API" "$SERVED_MODEL_NAME" "$MAX_MODEL_LEN tokens" "password" # Handle shutdown cleanup() { diff --git a/models/glm47-flash-fp16/Dockerfile b/models/glm47-flash-fp16/Dockerfile index c7f5ad6..bf6fa53 100644 --- a/models/glm47-flash-fp16/Dockerfile +++ b/models/glm47-flash-fp16/Dockerfile @@ -33,18 +33,22 @@ RUN uv pip install --system -U vllm \ --extra-index-url https://wheels.vllm.ai/nightly && \ uv pip install --system git+https://github.com/huggingface/transformers.git -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Image generation dependencies (SDNQ + Diffusers) +RUN uv pip install --system sdnq diffusers accelerate safetensors + +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Keep model files on container disk (requires 100GB) -# Only use workspace for persistent Clawdbot state -RUN mkdir -p /workspace/.clawdbot /workspace/clawd +# Only use workspace for persistent OpenClaw state +RUN mkdir -p /workspace/.openclaw /workspace/openclaw # Environment variables -# HF_HOME on container disk (100GB needed), Clawdbot state on workspace +# HF_HOME on container disk (100GB needed), OpenClaw state on workspace ENV HF_HOME=/root/.cache/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=zai-org/GLM-4.7-Flash ENV SERVED_MODEL_NAME=glm-4.7-flash ENV VLLM_API_KEY=changeme @@ -52,13 +56,16 @@ ENV GPU_MEMORY_UTILIZATION=0.92 ENV TOOL_CALL_PARSER=glm47 # MAX_MODEL_LEN is auto-detected based on GPU - don't set here -# Copy entrypoint script -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +# Copy entrypoint scripts + skills + CLI +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh +COPY models/glm47-flash-fp16/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh /usr/local/bin/openclaw-image-gen # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-flash-fp16/README.md b/models/glm47-flash-fp16/README.md index 6f3eb42..d5b6c8f 100644 --- a/models/glm47-flash-fp16/README.md +++ b/models/glm47-flash-fp16/README.md @@ -27,9 +27,9 @@ Best quality with auto-detected context based on GPU. ## Quick Start -### 1. Create RunPod Pod +### 1. Create Runpod Pod -- **Image**: `yourusername/clawdbot-glm47-flash-fp16:latest` +- **Image**: `yourusername/openclaw-stack-glm4.7-flash-fp16:latest` - **GPU**: 1x H100 80GB or A100 80GB - **Volume**: 50GB at `/workspace` - **Container Disk**: 100GB (model stored here) @@ -67,11 +67,11 @@ Model is stored on container disk (100GB required), state persists on workspace ``` /root/.cache/huggingface/ # Model files (container disk) /workspace/ -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## vLLM Configuration diff --git a/models/glm47-flash-fp16/entrypoint.sh b/models/glm47-flash-fp16/entrypoint.sh index 500953e..aa2ce40 100644 --- a/models/glm47-flash-fp16/entrypoint.sh +++ b/models/glm47-flash-fp16/entrypoint.sh @@ -1,9 +1,10 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-Flash FP16 + Clawdbot startup script +# entrypoint.sh - GLM-4.7-Flash FP16 + OpenClaw startup script set -e +source /opt/openclaw/entrypoint-common.sh echo "============================================" -echo " GLM-4.7-Flash FP16 + Clawdbot Startup" +echo " GLM-4.7-Flash FP16 + OpenClaw Startup" echo "============================================" echo "" echo "IMPORTANT: This requires vLLM NIGHTLY (not PyPI stable)!" @@ -13,31 +14,25 @@ echo "" # Auto-detect GPU and set optimal context length # GLM-4.7-Flash: ~31GB model weights, KV cache ~160KB/token (BF16) or ~80KB/token (FP8) detect_optimal_context() { - local gpu_mem_mb=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1) - local gpu_name=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1) + local gpu_mem_mb + gpu_mem_mb=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1) + local gpu_name + gpu_name=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1) echo "Detected GPU: $gpu_name with ${gpu_mem_mb}MB VRAM" - # Calculate optimal context based on GPU memory - # Model weights: ~31GB, leaving rest for KV cache - # Using conservative estimates with FP8 KV cache if [ -z "$gpu_mem_mb" ]; then - echo "32768" # Fallback + echo "32768" elif [ "$gpu_mem_mb" -ge 180000 ]; then - # B200 180GB: Can do 200k+ easily - echo "196608" # 192k + echo "196608" elif [ "$gpu_mem_mb" -ge 140000 ]; then - # H200 141GB: Can do ~150k - echo "131072" # 128k + echo "131072" elif [ "$gpu_mem_mb" -ge 80000 ]; then - # H100/A100 80GB: Can do ~64k safely, maybe 96k with FP8 KV - echo "65536" # 64k + echo "65536" elif [ "$gpu_mem_mb" -ge 48000 ]; then - # A100 40GB or similar: ~32k - echo "32768" # 32k + echo "32768" else - # Smaller GPUs - echo "16384" # 16k + echo "16384" fi } @@ -46,15 +41,17 @@ MODEL_NAME="${MODEL_NAME:-zai-org/GLM-4.7-Flash}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" VLLM_API_KEY="${VLLM_API_KEY:-changeme}" GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.92}" +MAX_MODEL_LEN="${MAX_MODEL_LEN:-}" # glm47 parser requires vLLM nightly from wheels.vllm.ai TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm47}" # Keep model on container disk (requires 100GB containerDiskInGb) HF_HOME="${HF_HOME:-/root/.cache/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" -# Auto-detect optimal context if not explicitly set if [ -z "$MAX_MODEL_LEN" ]; then MAX_MODEL_LEN=$(detect_optimal_context) echo "Auto-detected optimal context length: $MAX_MODEL_LEN tokens" @@ -63,9 +60,11 @@ else fi export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR export MAX_MODEL_LEN +BOT_CMD="openclaw" + # Set CUDA 13.1 paths for B200 (no-op on other GPUs if not installed) if [ -d "/usr/local/cuda-13.1" ]; then export PATH=/usr/local/cuda-13.1/bin:$PATH @@ -75,7 +74,10 @@ if [ -d "/usr/local/cuda-13.1" ]; then fi # Ensure directories exist (HF cache on container disk, state on workspace) -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Configure GitHub CLI # Priority: 1) GITHUB_TOKEN env var, 2) Persisted config in /workspace/.config/gh @@ -115,9 +117,9 @@ if command -v nvcc &> /dev/null; then fi echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -135,12 +137,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then # Reserve tokens for compaction: 15% of context RESERVE_TOKENS=$((MAX_MODEL_LEN * 15 / 100)) - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd", + "workspace": "/workspace/openclaw", "contextTokens": ${CONTEXT_TOKENS}, "systemPrompt": "Be concise and direct. Avoid unnecessary verbosity.", "compaction": { @@ -174,18 +176,25 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi +oc_sync_gateway_auth "password" + # Build vLLM command # Note: GLM-4.7-Flash requires: # - --block-size 32 (workaround for FlashInfer bug with head_size 256) @@ -232,23 +241,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" -echo "============================================" -echo " Services Running" -echo "============================================" -echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo "" -echo " vLLM PID: $VLLM_PID" -echo " Gateway PID: $GATEWAY_PID" -echo "============================================" -echo "" +oc_print_ready "vLLM API" "$SERVED_MODEL_NAME" "$MAX_MODEL_LEN tokens" "password" # Keep container running and handle signals trap "kill $VLLM_PID $GATEWAY_PID 2>/dev/null; exit 0" SIGTERM SIGINT diff --git a/models/glm47-flash-gguf-llamacpp/Dockerfile b/models/glm47-flash-gguf-llamacpp/Dockerfile index 6489bb6..e49daa9 100644 --- a/models/glm47-flash-gguf-llamacpp/Dockerfile +++ b/models/glm47-flash-gguf-llamacpp/Dockerfile @@ -7,7 +7,7 @@ # - VRAM (model): ~17.3GB # - VRAM (KV cache): ~10GB with Q8 quantization # - Total VRAM: ~28GB -> fits on RTX 5090 (32GB) -# - Context Window: 200,000 tokens (full model capacity!) +# - Context Window: 150,000 tokens (default; increase if VRAM allows) # - Inference: ~175 tokens/sec on RTX 5090 # # Why llama.cpp instead of vLLM? @@ -25,7 +25,7 @@ ENV DEBIAN_FRONTEND=noninteractive # Install build dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential cmake git curl sudo \ + build-essential cmake git curl sudo unzip \ python3 python3-dev python3-venv python3-pip \ openssh-server \ && rm -f /etc/ssh/ssh_host_* && mkdir -p /var/run/sshd \ @@ -37,7 +37,7 @@ WORKDIR /workspace # Build llama.cpp from source with CUDA SM120 support # Using --allow-shlib-undefined to defer libcuda.so resolution to runtime -# Using GGML_NATIVE=OFF for portable binary (GitHub runner has different CPU than RunPod) +# Using GGML_NATIVE=OFF for portable binary (GitHub runner has different CPU than Runpod) RUN git clone --depth 1 https://github.com/ggml-org/llama.cpp.git && \ cd llama.cpp && \ cmake -B build \ @@ -53,6 +53,27 @@ RUN git clone --depth 1 https://github.com/ggml-org/llama.cpp.git && \ ldconfig && \ cd / && rm -rf /workspace/llama.cpp +# Build LFM2.5-Audio runners with CUDA SM120 support (GPU inference) +# Built from llama.cpp PR #18641 (LiquidAI audio model support) +# This gives ~80x speedup over CPU-only prebuilt runners +RUN git clone https://github.com/ggml-org/llama.cpp.git /tmp/llama-audio && \ + cd /tmp/llama-audio && \ + git fetch origin pull/18641/head:liquid-audio && \ + git checkout liquid-audio && \ + cmake -B build \ + -DGGML_CUDA=ON \ + -DGGML_NATIVE=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="120" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined && \ + cmake --build build --target llama-liquid-audio-cli llama-liquid-audio-server -j$(nproc) && \ + cp build/bin/llama-liquid-audio-cli /usr/local/bin/ && \ + cp build/bin/llama-liquid-audio-server /usr/local/bin/ && \ + cp build/bin/*.so* /usr/local/bin/ 2>/dev/null || true && \ + chmod +x /usr/local/bin/llama-liquid-audio-cli /usr/local/bin/llama-liquid-audio-server && \ + rm -rf /tmp/llama-audio + # Install Node.js and GitHub CLI RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | \ @@ -66,8 +87,14 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install huggingface_hub for model downloads (using Python API, not CLI) RUN python3 -m pip install --no-cache-dir huggingface_hub -# Install Clawdbot and Claude Code -RUN npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ +# Image generation dependencies (SDNQ + Diffusers + Torch) +# PyTorch cu128 required for RTX 5090 (Blackwell sm_120) support +# Diffusers from git required for Flux2KleinPipeline +RUN python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu128 && \ + python3 -m pip install --no-cache-dir sdnq git+https://github.com/huggingface/diffusers.git transformers accelerate safetensors + +# Install OpenClaw and Claude Code +RUN npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force WORKDIR / @@ -77,13 +104,26 @@ ENV MODEL_NAME="unsloth/GLM-4.7-Flash-GGUF" \ MODEL_FILE="GLM-4.7-Flash-Q4_K_M.gguf" \ MODEL_PATH="/workspace/models/GLM-4.7-Flash-GGUF" \ SERVED_MODEL_NAME="glm-4.7-flash" \ - MAX_MODEL_LEN="200000" \ + MAX_MODEL_LEN="150000" \ LLAMA_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_WEB_PASSWORD="changeme" \ + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" \ + AUDIO_MODEL_NAME="LiquidAI/LFM2.5-Audio-1.5B-GGUF" \ + AUDIO_MODEL_PATH="/workspace/models/LFM2.5-Audio-GGUF" \ + AUDIO_QUANT="Q4_0" -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/openclaw-image-server /usr/local/bin/openclaw-image-server +COPY scripts/openclaw-tts /usr/local/bin/openclaw-tts +COPY scripts/openclaw-stt /usr/local/bin/openclaw-stt +COPY scripts/openclaw-web-proxy /usr/local/bin/openclaw-web-proxy +COPY web/ /opt/openclaw/web/ +COPY plugins/ /opt/openclaw/plugins/ +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh +COPY models/glm47-flash-gguf-llamacpp/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh /usr/local/bin/openclaw-image-gen /usr/local/bin/openclaw-image-server /usr/local/bin/openclaw-tts /usr/local/bin/openclaw-stt /usr/local/bin/openclaw-web-proxy -EXPOSE 8000 18789 22 +EXPOSE 8000 8080 18789 22 ENTRYPOINT ["/entrypoint.sh"] diff --git a/models/glm47-flash-gguf-llamacpp/README.md b/models/glm47-flash-gguf-llamacpp/README.md index a2d0640..dd46416 100644 --- a/models/glm47-flash-gguf-llamacpp/README.md +++ b/models/glm47-flash-gguf-llamacpp/README.md @@ -15,15 +15,15 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja | Model | unsloth/GLM-4.7-Flash-GGUF (Q4_K_M) | | Model Size | ~17GB | | VRAM (total) | ~28GB | -| Context Window | **200,000 tokens** | +| Context Window | **150,000 tokens (default)** | | GPU | RTX 5090 (32GB, Blackwell SM120) | | Inference Speed | ~175 tokens/sec | ## Key Features -- **200k context** - Full model capacity on 32GB GPU -- **Q8 KV cache quantization** - Fits 200k context in VRAM -- **OpenAI-compatible API** - Works with Clawdbot, Claude Code, etc. +- **150k default context** - Balanced for stability on 32GB GPU +- **Q8 KV cache quantization** - Fits 150k default context in VRAM +- **OpenAI-compatible API** - Works with OpenClaw, Claude Code, etc. - **Native chat template** - Uses `--jinja` for correct GLM-4.7 formatting ## Runpod Deployment @@ -33,26 +33,30 @@ llama.cpp has native support for `Glm4MoeLite` architecture (PR #18936 merged Ja 1. **Add your SSH key** to [Runpod Account Settings → SSH Public Keys](https://www.runpod.io/console/user/settings) (required for device pairing later). If you don't have an SSH key, follow the [Runpod SSH guide](https://docs.runpod.io/pods/configuration/use-ssh). 2. **Create a Pod** with: - - Image: `runpod/clawdbot-glm47-flash-gguf:latest` + - Image: `runpod/openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf:latest` - GPU: RTX 5090 (or any 32GB+ GPU) - - Ports: `8000/http`, `18789/http`, `22/tcp` + - Ports: `8000/http`, `8080/http`, `18789/http`, `22/tcp` - Network Volume: **30GB minimum**, mounted to `/workspace` - Required for model download (~17GB) and config persistence - Without a network volume, data is lost on pod restart - Environment Variables: - - `CLAWDBOT_WEB_PASSWORD` - Token for Web UI (default: `clawdbot`) + - `OPENCLAW_WEB_PASSWORD` - Token for Web UI (default: `changeme`) - `LLAMA_API_KEY` - API key for llama.cpp (default: `changeme`) 3. **Wait for startup** - First launch downloads the model (~17GB), which takes a few minutes. Check pod logs for progress. -4. **Access the Web UI**: +4. **Access the Control UI**: ``` - https://-18789.proxy.runpod.net/?token= + https://-18789.proxy.runpod.net/?token= + ``` +5. **Access the Media UI (proxy)**: + ``` + https://-8080.proxy.runpod.net ``` ### First-Time Device Pairing -Clawdbot requires device pairing for security. On first access, you'll see "pairing required". +OpenClaw requires device pairing for security. On first access, you'll see "pairing required". **To approve your browser:** @@ -61,10 +65,10 @@ Clawdbot requires device pairing for security. On first access, you'll see "pair ssh root@ -p # List pending pairing requests -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices list +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing list telegram # Approve your device (use the Request ID from the list) -CLAWDBOT_STATE_DIR=/workspace/.clawdbot clawdbot devices approve +OPENCLAW_STATE_DIR=/workspace/.openclaw openclaw pairing approve telegram ``` After approval, refresh the Web UI - it will work permanently for that browser. @@ -74,17 +78,20 @@ After approval, refresh the Web UI - it will work permanently for that browser. | Port | Service | |------|---------| | 8000 | llama.cpp API (OpenAI-compatible) | -| 18789 | Clawdbot Web UI | +| 8080 | Media proxy + UI (image/audio links) | +| 18789 | OpenClaw Control UI | | 22 | SSH | +Note: audio/image servers run on `8001/8002` internally and are not exposed. + ## Environment Variables | Variable | Default | Description | |----------|---------|-------------| | `MODEL_FILE` | `GLM-4.7-Flash-Q4_K_M.gguf` | GGUF file to use | -| `MAX_MODEL_LEN` | `200000` | Context length | +| `MAX_MODEL_LEN` | `150000` | Context length | | `LLAMA_API_KEY` | `changeme` | API authentication | -| `CLAWDBOT_WEB_PASSWORD` | `clawdbot` | Web UI token | +| `OPENCLAW_WEB_PASSWORD` | `changeme` | Web UI token | | `TELEGRAM_BOT_TOKEN` | - | Optional Telegram integration | | `GITHUB_TOKEN` | - | Optional GitHub CLI auth | @@ -92,13 +99,13 @@ After approval, refresh the Web UI - it will work permanently for that browser. ```bash # Build -docker build -t clawdbot-glm47-gguf-llamacpp . +docker build -f models/glm47-flash-gguf-llamacpp/Dockerfile -t openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf . # Run on RTX 5090 -docker run --gpus all -p 8000:8000 -p 18789:18789 \ +docker run --gpus all -p 8000:8000 -p 8080:8080 -p 18789:18789 \ -v /path/to/workspace:/workspace \ -e LLAMA_API_KEY=your-key \ - clawdbot-glm47-gguf-llamacpp + openclaw-stack-glm4.7-flash-gguf-flux.2-klein-4b-sdnq-4bit-dynamic-lfm2.5-audio-1.5b-gguf ``` ## API Usage @@ -133,6 +140,6 @@ You can use different GGUF quantizations by changing `MODEL_FILE`: | Feature | llama.cpp GGUF | vLLM NVFP4 | |---------|---------------|------------| | Works on RTX 5090 | ✅ Yes | ❌ No (bugs) | -| 200k context | ✅ Yes | ❌ OOM | +| Context length (default) | 150k (200k optional) | ❌ OOM at 200k | | Inference speed | ~175 tok/s | N/A | | KV cache quant | ✅ Q8 | ❌ FP16 only | diff --git a/models/glm47-flash-gguf-llamacpp/entrypoint.sh b/models/glm47-flash-gguf-llamacpp/entrypoint.sh index 8125386..8d7fa67 100644 --- a/models/glm47-flash-gguf-llamacpp/entrypoint.sh +++ b/models/glm47-flash-gguf-llamacpp/entrypoint.sh @@ -2,31 +2,12 @@ # Don't exit on error - we want the container to stay alive for debugging set +e +source /opt/openclaw/entrypoint-common.sh + # ============================================================ # Setup SSH server FIRST so we can always connect # ============================================================ -echo "Setting up SSH server..." - -# Generate host keys if they don't exist -if [ ! -f /etc/ssh/ssh_host_rsa_key ]; then - ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' - ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N '' - ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N '' -fi - -# Setup authorized_keys from PUBLIC_KEY env var -if [ -n "$PUBLIC_KEY" ]; then - mkdir -p ~/.ssh - echo "$PUBLIC_KEY" > ~/.ssh/authorized_keys - chmod 700 ~/.ssh - chmod 600 ~/.ssh/authorized_keys - echo "SSH public key configured" -fi - -# Start SSH daemon -mkdir -p /var/run/sshd -/usr/sbin/sshd -echo "SSH server started on port 22" +oc_setup_ssh_manual echo "" echo "================================================" @@ -73,24 +54,176 @@ print('Download complete!') } fi +# ============================================================ +# Download LFM2.5-Audio model for TTS/STT +# ============================================================ +AUDIO_MODEL_PATH="${AUDIO_MODEL_PATH:-/workspace/models/LFM2.5-Audio-GGUF}" +AUDIO_MODEL_NAME="${AUDIO_MODEL_NAME:-LiquidAI/LFM2.5-Audio-1.5B-GGUF}" +AUDIO_QUANT="${AUDIO_QUANT:-Q4_0}" + +# Files needed for audio model +AUDIO_FILES=( + "LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" + "mmproj-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" + "vocoder-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" + "tokenizer-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" +) + +# Check if all audio files exist +AUDIO_DOWNLOAD_NEEDED=false +for audio_file in "${AUDIO_FILES[@]}"; do + if [ ! -f "$AUDIO_MODEL_PATH/$audio_file" ]; then + AUDIO_DOWNLOAD_NEEDED=true + break + fi +done + +if [ "$AUDIO_DOWNLOAD_NEEDED" = true ]; then + echo "" + echo "Downloading LFM2.5-Audio model for TTS/STT..." + mkdir -p "$AUDIO_MODEL_PATH" + + for audio_file in "${AUDIO_FILES[@]}"; do + if [ ! -f "$AUDIO_MODEL_PATH/$audio_file" ]; then + echo " Downloading $audio_file..." + python3 -c " +from huggingface_hub import hf_hub_download +hf_hub_download( + repo_id='$AUDIO_MODEL_NAME', + filename='$audio_file', + local_dir='$AUDIO_MODEL_PATH', + local_dir_use_symlinks=False +) +print(' Done: $audio_file') +" || echo " WARNING: Failed to download $audio_file" + fi + done + echo "Audio model download complete!" +else + echo "Audio model files already present at $AUDIO_MODEL_PATH" +fi + # Set defaults LLAMA_API_KEY="${LLAMA_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" -MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +MAX_MODEL_LEN="${MAX_MODEL_LEN:-150000}" +LLAMA_PARALLEL="${LLAMA_PARALLEL:-1}" +LLAMA_GPU_LAYERS="${LLAMA_GPU_LAYERS:-999}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" +OPENCLAW_WEB_PROXY_PORT="${OPENCLAW_WEB_PROXY_PORT:-8080}" +export OPENCLAW_STATE_DIR OPENCLAW_WORKSPACE OPENCLAW_WEB_PROXY_PORT +if [ -n "${RUNPOD_POD_ID:-}" ] && [ -z "${OPENCLAW_IMAGE_PUBLIC_BASE_URL:-}" ]; then + OPENCLAW_IMAGE_PUBLIC_BASE_URL="https://${RUNPOD_POD_ID}-${OPENCLAW_WEB_PROXY_PORT}.proxy.runpod.net" + export OPENCLAW_IMAGE_PUBLIC_BASE_URL +fi TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" + +BOT_CMD="openclaw" +if ! command -v "$BOT_CMD" >/dev/null 2>&1; then + echo "ERROR: openclaw command not found in PATH" + echo "PATH=$PATH" + echo "Container staying alive for debugging." + sleep infinity +fi + +oc_fatal_gpu() { + local details="$1" + echo "" + echo "================================================================================" + echo "================================================================================" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "!!!!!!!!!!!!!!!!!!!! GPU INITIALIZATION FAILED - ABORTING !!!!!!!!!!!!!!!!!!!!!" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "================================================================================" + echo "We can not continue. The GPU or GPU driver has a problem that we can not resolve." + echo "Contact Runpod support at help@runpod.io" + echo "--------------------------------------------------------------------------------" + if [ -n "$details" ]; then + echo "Details:" + echo "$details" + echo "--------------------------------------------------------------------------------" + fi + cat <<'EOF' + ____ ________ __ __ + / __ \____ ___ ____ / ____/ /___ __ _____/ /_ ____ _/ /__________ + / / / / __ \/ _ \/ __ \/ / / / __ \ | /| / / __ / / / / __ / / ___/ ___/ +/ /_/ / /_/ / __/ / / / /___/ / /_/ / |/ |/ / /_/ / /_/ / /_/ / / / (__ ) +\____/ .___/\___/_/ /_/\____/_/\____/|__/|__/\__,_/\__,_/\__,_/_/_/ /____/ + /_/ +EOF + echo "================================================================================" + exit 1 +} + +oc_check_cuda() { + if ! command -v python3 >/dev/null 2>&1; then + oc_fatal_gpu "python3 is missing; unable to verify CUDA availability." + fi + local check_output="" + check_output="$(python3 - <<'PY' +import ctypes +import os +import sys +from ctypes import c_int, c_char_p + +def err_string(lib, code): + msg = c_char_p() + try: + lib.cuGetErrorString(code, ctypes.byref(msg)) + return msg.value.decode() if msg.value else "unknown" + except Exception: + return "unknown" + +try: + lib = ctypes.CDLL("libcuda.so.1") +except OSError as exc: + print(f"libcuda.so.1 load failed: {exc}") + sys.exit(1) + +lib.cuInit.argtypes = [ctypes.c_uint] +lib.cuInit.restype = c_int +err = lib.cuInit(0) +if err != 0: + print(f"cuInit failed: {err} {err_string(lib, err)}") + sys.exit(1) + +lib.cuDeviceGetCount.argtypes = [ctypes.POINTER(c_int)] +lib.cuDeviceGetCount.restype = c_int +count = c_int() +err2 = lib.cuDeviceGetCount(ctypes.byref(count)) +if err2 != 0 or count.value < 1: + print(f"cuDeviceGetCount failed: {err2} {err_string(lib, err2)} count={count.value}") + sys.exit(1) + +visible = os.environ.get("CUDA_VISIBLE_DEVICES", "") +nvidia_visible = os.environ.get("NVIDIA_VISIBLE_DEVICES", "") +print(f"CUDA_VISIBLE_DEVICES={visible or '(unset)'}") +print(f"NVIDIA_VISIBLE_DEVICES={nvidia_visible or '(unset)'}") +print(f"cuda_device_count={count.value}") +PY +)" + local check_status=$? + if [ $check_status -ne 0 ]; then + oc_fatal_gpu "$check_output" + fi +} + +oc_check_cuda echo "Starting llama.cpp server..." echo " Model: $MODEL_PATH/$MODEL_FILE" echo " Context: $MAX_MODEL_LEN tokens" +echo " Parallel slots: $LLAMA_PARALLEL" +echo " GPU layers: $LLAMA_GPU_LAYERS" echo " API Key: ${LLAMA_API_KEY:0:4}..." # Start llama-server with OpenAI-compatible API # Key flags: # -ngl 999: Offload all layers to GPU -# -c: Context length (200k tokens) +# -c: Context length (default 150k tokens) # --jinja: Required for GLM-4.7 chat template # -ctk q8_0 -ctv q8_0: Quantize KV cache to fit 200k in 32GB VRAM # --api-key: Enable API key authentication @@ -98,7 +231,8 @@ llama-server \ -m "$MODEL_PATH/$MODEL_FILE" \ --host 0.0.0.0 \ --port 8000 \ - -ngl 999 \ + -ngl "$LLAMA_GPU_LAYERS" \ + --parallel "$LLAMA_PARALLEL" \ -c "$MAX_MODEL_LEN" \ --jinja \ -ctk q8_0 \ @@ -108,6 +242,39 @@ llama-server \ LLAMA_PID=$! +# Start LFM2.5-Audio server for TTS/STT (GPU inference) +echo "" +echo "Starting LFM2.5-Audio server for TTS/STT..." +echo " Model: $AUDIO_MODEL_PATH/LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" +echo " Port: 8001 (GPU accelerated, ~845 MiB VRAM)" + +env LD_LIBRARY_PATH="/usr/local/bin" llama-liquid-audio-server \ + -m "$AUDIO_MODEL_PATH/LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" \ + -mm "$AUDIO_MODEL_PATH/mmproj-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" \ + -mv "$AUDIO_MODEL_PATH/vocoder-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" \ + --tts-speaker-file "$AUDIO_MODEL_PATH/tokenizer-LFM2.5-Audio-1.5B-${AUDIO_QUANT}.gguf" \ + -ngl 99 \ + --host 0.0.0.0 \ + --port 8001 \ + 2>&1 & + +AUDIO_PID=$! + +# Start FLUX.2 Klein image generation server +echo "" +echo "Starting FLUX.2 Klein image generation server..." +echo " Model: Disty0/FLUX.2-klein-4B-SDNQ-4bit-dynamic" +echo " Port: 8002 (GPU accelerated, ~3-4 GB VRAM)" + +openclaw-image-server --port 8002 > /tmp/image-server.log 2>&1 & +IMAGE_PID=$! + +# Start lightweight media proxy + UI +echo "" +echo "Starting OpenClaw media web proxy..." +openclaw-web-proxy --port "$OPENCLAW_WEB_PROXY_PORT" --web-root "/opt/openclaw/web" > /tmp/openclaw-web-proxy.log 2>&1 & +WEB_PROXY_PID=$! + # Wait for llama-server to be ready echo "Waiting for llama-server to start..." MAX_WAIT=600 @@ -127,11 +294,23 @@ if [ $WAITED -ge $MAX_WAIT ]; then echo "Container will stay running for debugging." fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +mkdir -p "$OPENCLAW_WORKSPACE/images" "$OPENCLAW_WORKSPACE/audio" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true + +# Install tool_result hook plugins into workspace (if bundled) +OPENCLAW_EXT_DIR="$OPENCLAW_WORKSPACE/.openclaw/extensions" +if [ -d "/opt/openclaw/plugins/toolresult-images" ]; then + mkdir -p "$OPENCLAW_EXT_DIR" + if [ ! -d "$OPENCLAW_EXT_DIR/toolresult-images" ]; then + cp -r "/opt/openclaw/plugins/toolresult-images" "$OPENCLAW_EXT_DIR/" + fi +fi -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." if [ -n "$TELEGRAM_BOT_TOKEN" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true, \"botToken\": \"${TELEGRAM_BOT_TOKEN}\" }" @@ -139,7 +318,7 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -162,27 +341,42 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-llamacpp/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 135000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, + "plugins": { + "load": { "paths": ["$OPENCLAW_WORKSPACE/.openclaw/extensions"] }, + "entries": { "toolresult-images": { "enabled": true } } + }, "gateway": { "mode": "local", "bind": "lan", - "auth": { "token": "$CLAWDBOT_WEB_PASSWORD" }, - "remote": { "token": "$CLAWDBOT_WEB_PASSWORD" } + "auth": { "mode": "token", "token": "$OPENCLAW_WEB_PASSWORD" }, + "remote": { "token": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" +fi + +IMAGE_BASE_URL_FILE="$OPENCLAW_WORKSPACE/image-base-url.txt" +if [ -n "${OPENCLAW_IMAGE_PUBLIC_BASE_URL:-}" ] && [ ! -f "$IMAGE_BASE_URL_FILE" ]; then + echo "$OPENCLAW_IMAGE_PUBLIC_BASE_URL" > "$IMAGE_BASE_URL_FILE" fi # Auto-fix config -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix || true +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true +oc_sync_gateway_auth "token" # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -202,28 +396,40 @@ fi export OPENAI_API_KEY="$LLAMA_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway (use token auth for URL parameter support) +# Start OpenClaw gateway (use token auth for URL parameter support) echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME CLAWDBOT_GATEWAY_TOKEN="$CLAWDBOT_WEB_PASSWORD" clawdbot gateway --auth token --token "$CLAWDBOT_WEB_PASSWORD" & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR OPENCLAW_GATEWAY_TOKEN="$OPENCLAW_WEB_PASSWORD" \ +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! +MEDIA_PROXY_URL="" +if [ -n "${RUNPOD_POD_ID:-}" ]; then + MEDIA_PROXY_URL="https://${RUNPOD_POD_ID}-${OPENCLAW_WEB_PROXY_PORT}.proxy.runpod.net" +fi + echo "" -echo "================================================" -echo " Ready!" -echo " llama.cpp API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net/?token=$CLAWDBOT_WEB_PASSWORD" -echo " Web UI Token: $CLAWDBOT_WEB_PASSWORD" -echo " Model: $SERVED_MODEL_NAME" -echo " Context: $MAX_MODEL_LEN tokens (200k!)" -echo " VRAM: ~28GB / 32GB" -echo "================================================" +oc_print_ready "llama.cpp API" "$SERVED_MODEL_NAME" "$MAX_MODEL_LEN tokens" "token" \ + "VRAM: LLM ~24GB + Audio ~2GB + Image ~3-4GB = ~29-30GB / 32GB" \ + "Media UI (local): http://localhost:${OPENCLAW_WEB_PROXY_PORT}" \ + "${MEDIA_PROXY_URL:+Media UI (public): ${MEDIA_PROXY_URL}}" +echo "" +echo " Audio Server (internal): http://localhost:8001 (not exposed)" +echo " - openclaw-tts \"Hello world\" --output /tmp/hello.wav" +echo " - openclaw-stt /path/to/audio.wav" +echo "" +echo " Image Server (internal): http://localhost:8002 (not exposed)" +echo " - openclaw-image-gen --prompt \"A robot\" --output /tmp/robot.png" +echo "" +echo " Media UI: http://localhost:${OPENCLAW_WEB_PROXY_PORT}" # Handle shutdown cleanup() { echo "Shutting down..." [ -n "$GATEWAY_PID" ] && kill $GATEWAY_PID 2>/dev/null + [ -n "$IMAGE_PID" ] && kill $IMAGE_PID 2>/dev/null + [ -n "$AUDIO_PID" ] && kill $AUDIO_PID 2>/dev/null + [ -n "$WEB_PROXY_PID" ] && kill $WEB_PROXY_PID 2>/dev/null kill $LLAMA_PID 2>/dev/null exit 0 } diff --git a/models/glm47-flash-nvfp4-5090/Dockerfile b/models/glm47-flash-nvfp4-5090/Dockerfile index 157c028..6f8c8d0 100644 --- a/models/glm47-flash-nvfp4-5090/Dockerfile +++ b/models/glm47-flash-nvfp4-5090/Dockerfile @@ -53,9 +53,12 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ # Install tools RUN uv pip install --system "huggingface_hub[cli]" && \ - npm install -g --prefer-offline @anthropic-ai/claude-code clawdbot && \ + npm install -g --prefer-offline @anthropic-ai/claude-code openclaw@latest && \ npm cache clean --force +# Image generation dependencies (SDNQ + Diffusers) +RUN uv pip install --system sdnq diffusers accelerate safetensors + WORKDIR / ENV MODEL_NAME="GadflyII/GLM-4.7-Flash-NVFP4" \ @@ -63,11 +66,14 @@ ENV MODEL_NAME="GadflyII/GLM-4.7-Flash-NVFP4" \ SERVED_MODEL_NAME="glm-4.7-flash" \ MAX_MODEL_LEN="200000" \ VLLM_API_KEY="changeme" \ - CLAWDBOT_HOME="/workspace/.clawdbot" \ - CLAWDBOT_WORKSPACE="/workspace/clawd" + OPENCLAW_STATE_DIR="/workspace/.openclaw" \ + OPENCLAW_WORKSPACE="/workspace/openclaw" -COPY entrypoint.sh benchmark.sh / -RUN chmod +x /entrypoint.sh /benchmark.sh +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh +COPY models/glm47-flash-nvfp4-5090/entrypoint.sh models/glm47-flash-nvfp4-5090/benchmark.sh / +RUN chmod +x /entrypoint.sh /benchmark.sh /usr/local/bin/openclaw-image-gen EXPOSE 8000 18789 22 ENTRYPOINT ["/entrypoint.sh"] diff --git a/models/glm47-flash-nvfp4-5090/ISSUES.md b/models/glm47-flash-nvfp4-5090/ISSUES.md index 10a4d42..e8285ef 100644 --- a/models/glm47-flash-nvfp4-5090/ISSUES.md +++ b/models/glm47-flash-nvfp4-5090/ISSUES.md @@ -7,7 +7,16 @@ ## Summary -Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM 0.14.0 on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture not being properly supported by vLLM's TransformersMoE fallback. +Attempting to run `GadflyII/GLM-4.7-Flash-NVFP4` with vLLM on RTX 5090 fails due to multiple issues with the GLM-4.7 MLA (Multi-Latent Attention) architecture and SM120 kernel support. + +## Upstream status (as of 2026-01-29) + +- vLLM Issue #32109 is **closed** and was closed by PR #33285. +- PR #33285 **restricts** FP8 MoE CUTLASS backend to SM90/SM100 (does not add SM120 MoE support). +- PR #32237 (SM120 FP8 MoE support) was **closed and not merged**. +- vLLM now includes `Glm4MoeLiteForCausalLM` support, but NVFP4 on SM120 is still unverified. + +Net: there is no confirmed upstream fix for NVFP4 + GLM-4.7 on RTX 5090 yet. ## Issues Encountered @@ -53,7 +62,7 @@ Actual: 5 heads × 256 = 1280 vLLM's attention produces wrong output dimensions for GLM-4.7's MLA architecture. -**Status**: UNRESOLVED - requires native Glm4MoeLite support in vLLM +**Status**: UNRESOLVED - still reproduced on SM120 ### 5. SGLang cuDNN Error @@ -70,7 +79,7 @@ cuDNN doesn't support NVFP4 GEMM on Blackwell SM120. ``` GPU: RTX 5090 (Blackwell SM120, 32GB) CUDA: 12.8 -vLLM: 0.14.0 (nightly) +vLLM: 0.14.x (nightly at the time) Transformers: 5.0.0.dev0 Model: GadflyII/GLM-4.7-Flash-NVFP4 ``` @@ -78,9 +87,92 @@ Model: GadflyII/GLM-4.7-Flash-NVFP4 ## When to Retry Check these before retrying: -1. vLLM has native `Glm4MoeLiteForCausalLM` (not TransformersMoE fallback) -2. vLLM Issue #32109 resolved -3. NVIDIA cuDNN Blackwell FP4 support +1. vLLM has native `Glm4MoeLiteForCausalLM` path for GLM-4.7 in production builds +2. SM120 FP8 MoE kernels are supported (not just gated off) +3. NVIDIA cuDNN Blackwell FP4 support is available + +## Known working nightly tag (from upstream reports) + +Community reports in vLLM Issue #32109 mention the following as working at the time: +- `docker.io/vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + +Reported commit window: +- last working: `ffc0a2798b118f7ceb21645df59d2bfdfc461d42` +- first broken: `5dcd7ef1f219068e6b6be5b614bc43978f028651` + +These are historical references for retesting. + +## Verification plan (recommended) + +1. Baseline: run the known working nightly image above with NVFP4 and confirm it still starts. +2. Candidate: run the latest vLLM release or nightly (v0.15.x) with the same config. +3. Compare logs for MLA mismatch or SM120 kernel selection errors. +4. Record results here and update status. + +## Runpod test checklist (NVFP4, no custom image) + +Goal: validate NVFP4 on RTX 5090 using official vLLM images (no custom build). + +### 1) Create a pod +- GPU: RTX 5090 32GB +- Volume: 100GB+ mounted at `/workspace` +- Ports: `8000/http`, `22/tcp` +- Image: use one of the two images below: + - Baseline (reported working): `vllm/vllm-openai:nightly-0d4044edd85de30d7d4558aeea4d1e95c7c556d6` + - Candidate (latest): `vllm/vllm-openai:latest` + +### 2) Environment variables +- `HF_TOKEN` (optional but recommended) +- `VLLM_API_KEY` (required) +- `MODEL_NAME=GadflyII/GLM-4.7-Flash-NVFP4` +- `SERVED_MODEL_NAME=glm-4.7-flash` +- `MAX_MODEL_LEN=200000` +- `TOOL_CALL_PARSER=glm47` +- `REASONING_PARSER=glm45` +- `GPU_MEMORY_UTILIZATION=0.95` +- `HF_HOME=/workspace/huggingface` + +### 3) Start command +Use the same command for both baseline and candidate images: +``` +vllm serve ${MODEL_NAME} \ + --host 0.0.0.0 \ + --port 8000 \ + --max-model-len ${MAX_MODEL_LEN} \ + --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION} \ + --served-model-name ${SERVED_MODEL_NAME} \ + --api-key ${VLLM_API_KEY} \ + --enable-auto-tool-choice \ + --tool-call-parser ${TOOL_CALL_PARSER} \ + --reasoning-parser ${REASONING_PARSER} +``` + +### 4) Health check +``` +curl http://localhost:8000/health +``` + +### 5) Minimal chat test +``` +curl http://localhost:8000/v1/chat/completions \ + -H "Authorization: Bearer ${VLLM_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-4.7-flash", + "messages": [{"role": "user", "content": "Hello!"}], + "max_tokens": 64 + }' +``` + +### 6) Log triage (what to watch for) +- `No compiled cutlass_scaled_mm for CUDA device capability: 120` +- `mat1 and mat2 shapes cannot be multiplied` (MLA mismatch) +- CUDA graph or cuDNN errors on SM120 + +### 7) Record results +- Image tag used +- vLLM version reported in logs +- Pass/fail and error signatures ## Working Alternative diff --git a/models/glm47-flash-nvfp4-5090/README.md b/models/glm47-flash-nvfp4-5090/README.md index eae8680..84018b9 100644 --- a/models/glm47-flash-nvfp4-5090/README.md +++ b/models/glm47-flash-nvfp4-5090/README.md @@ -36,10 +36,10 @@ Full 200K context window with MLA for reduced KV cache memory. ## Quick Start -### 1. Create RunPod Pod +### 1. Create Runpod Pod **Settings:** -- **Image**: `runpod/clawdbot-glm47-flash-nvfp4-5090:latest` +- **Image**: `runpod/openclaw-stack-glm4.7-flash-nvfp4-5090:latest` - **GPU**: 1x RTX 5090 32GB - **Volume**: 100GB at `/workspace` (network storage) - **Container Disk**: 50GB @@ -54,7 +54,7 @@ Full 200K context window with MLA for reduced KV cache memory. | `HF_TOKEN` | Recommended | - | [HuggingFace token](https://huggingface.co/settings/tokens) for faster model downloads | | `TELEGRAM_BOT_TOKEN` | No | - | Telegram bot token | | `GITHUB_TOKEN` | No | - | GitHub token for `gh` CLI | -| `CLAWDBOT_WEB_PASSWORD` | No | `clawdbot` | Password for web UI | +| `OPENCLAW_WEB_PASSWORD` | No | `changeme` | Password for web UI | ### 3. Test It diff --git a/models/glm47-flash-nvfp4-5090/entrypoint.sh b/models/glm47-flash-nvfp4-5090/entrypoint.sh index 5642d59..05c459a 100644 --- a/models/glm47-flash-nvfp4-5090/entrypoint.sh +++ b/models/glm47-flash-nvfp4-5090/entrypoint.sh @@ -1,48 +1,14 @@ #!/bin/bash # Don't use set -e - we want to continue even if some commands fail +source /opt/openclaw/entrypoint-common.sh + echo "================================================" echo " GLM-4.7-Flash NVFP4 on RTX 5090 (Blackwell)" echo "================================================" -# Setup SSH for remote access (mirrors RunPod's /start.sh behavior) -setup_ssh() { - echo "Setting up SSH..." - - # Setup authorized_keys from PUBLIC_KEY env var - if [ -n "$PUBLIC_KEY" ]; then - mkdir -p ~/.ssh - echo "$PUBLIC_KEY" >> ~/.ssh/authorized_keys - chmod 700 ~/.ssh - chmod 600 ~/.ssh/authorized_keys - echo " Added public key to authorized_keys" - else - echo " WARNING: No PUBLIC_KEY set - SSH login will not work" - fi - - # Generate host keys if they don't exist - for keytype in rsa ecdsa ed25519; do - keyfile="/etc/ssh/ssh_host_${keytype}_key" - if [ ! -f "$keyfile" ]; then - ssh-keygen -t $keytype -f $keyfile -N "" -q 2>/dev/null || true - echo " Generated $keytype host key" - fi - done - - # Create run directory for sshd - mkdir -p /var/run/sshd - - # Start sshd directly (not via service command which may not exist) - if [ -x /usr/sbin/sshd ]; then - /usr/sbin/sshd - echo " SSH daemon started" - else - echo " WARNING: sshd not found, SSH will not be available" - fi -} - -# Run SSH setup (errors are non-fatal) -setup_ssh || echo "SSH setup had issues but continuing..." +# Setup SSH for remote access (mirrors Runpod's /start.sh behavior) +oc_setup_ssh_manual || echo "SSH setup had issues but continuing..." # Persist vLLM cache (CUDA graphs, torch compile) on network storage # This speeds up subsequent pod starts by reusing cached compiled kernels @@ -75,11 +41,14 @@ fi VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-glm-4.7-flash}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-200000}" -CLAWDBOT_HOME="${CLAWDBOT_HOME:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" -# Web UI password - users enter this to access the Clawdbot control panel -CLAWDBOT_WEB_PASSWORD="${CLAWDBOT_WEB_PASSWORD:-clawdbot}" +# Web UI password - users enter this to access the OpenClaw control panel +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" + +BOT_CMD="openclaw" echo "Starting vLLM server..." echo " Model: $MODEL_PATH" @@ -128,11 +97,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then # Don't exit - keep container running for debugging fi -# Setup Clawdbot config -mkdir -p "$CLAWDBOT_HOME" +# Setup OpenClaw config +mkdir -p "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true -if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then - echo "Creating Clawdbot config..." +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw config..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -141,9 +113,9 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - # Create a minimal config - clawdbot doctor will fix any missing fields + # Create a minimal config - openclaw doctor will fix any missing fields # contextTokens: 180000 leaves room for output within 200K context - cat > "$CLAWDBOT_HOME/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "models": { "providers": { @@ -166,25 +138,32 @@ if [ ! -f "$CLAWDBOT_HOME/clawdbot.json" ]; then "agents": { "defaults": { "model": { "primary": "local-vllm/$SERVED_MODEL_NAME" }, - "contextTokens": 180000 + "contextTokens": 180000, + "workspace": "$OPENCLAW_WORKSPACE" } }, "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, "gateway": { "mode": "local", - "bind": "lan" + "bind": "lan", + "auth": { "mode": "password", "password": "$OPENCLAW_WEB_PASSWORD" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_HOME/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" fi -# Auto-fix config to match current Clawdbot version's schema -echo "Running clawdbot doctor to validate/fix config..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot doctor --fix 2>/dev/null || true +# Auto-fix config to match current OpenClaw version's schema +echo "Running openclaw doctor to validate/fix config..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" doctor --fix 2>/dev/null || true +chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" 2>/dev/null || true +oc_sync_gateway_auth "password" # Setup GitHub CLI if token provided if [ -n "$GITHUB_TOKEN" ]; then @@ -204,23 +183,15 @@ fi export OPENAI_API_KEY="$VLLM_API_KEY" export OPENAI_BASE_URL="http://localhost:8000/v1" -# Start Clawdbot gateway with password auth for web UI access +# Start OpenClaw gateway with password auth for web UI access echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_HOME clawdbot gateway --auth password --password "$CLAWDBOT_WEB_PASSWORD" 2>/dev/null & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" 2>/dev/null & GATEWAY_PID=$! echo "" -echo "================================================" -echo " Ready! (RTX 5090 Blackwell SM120)" -echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo " Web UI: https://-18789.proxy.runpod.net" -echo " Web UI Password: $CLAWDBOT_WEB_PASSWORD" -echo " Model: $SERVED_MODEL_NAME (NVFP4)" -echo " Context: $MAX_MODEL_LEN tokens" -echo " Cost: ~\$0.89/hr (36% savings vs A100)" -echo "================================================" +oc_print_ready "vLLM API" "$SERVED_MODEL_NAME (NVFP4)" "$MAX_MODEL_LEN tokens" "password" \ + "Cost: ~\$0.89/hr (36% savings vs A100)" # Handle shutdown cleanup() { diff --git a/models/glm47-reap-w4a16/Dockerfile b/models/glm47-reap-w4a16/Dockerfile index 9aba99f..224e9c1 100644 --- a/models/glm47-reap-w4a16/Dockerfile +++ b/models/glm47-reap-w4a16/Dockerfile @@ -1,4 +1,4 @@ -# Dockerfile for GLM-4.7-REAP W4A16 on RunPod B200 +# Dockerfile for GLM-4.7-REAP W4A16 on Runpod B200 # This uses the 40% expert-pruned, INT4 quantized version of GLM-4.7 # Model: 0xSero/GLM-4.7-REAP-40-W4A16 (~108GB VRAM) # @@ -30,13 +30,17 @@ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Install vLLM nightly (required for GLM-4.7 support) RUN uv pip install --system -U vllm --pre -# Install Clawdbot -RUN npm install -g --prefer-offline clawdbot@latest && \ +# Image generation dependencies (SDNQ + Diffusers) +RUN uv pip install --system sdnq diffusers accelerate safetensors + +# Install OpenClaw +RUN npm install -g --prefer-offline openclaw@latest && \ npm cache clean --force # Environment variables ENV HF_HOME=/workspace/huggingface -ENV CLAWDBOT_STATE_DIR=/workspace/.clawdbot +ENV OPENCLAW_STATE_DIR=/workspace/.openclaw +ENV OPENCLAW_WORKSPACE=/workspace/openclaw ENV MODEL_NAME=0xSero/GLM-4.7-REAP-40-W4A16 ENV SERVED_MODEL_NAME=glm-4.7-reap ENV VLLM_API_KEY=changeme @@ -45,15 +49,18 @@ ENV GPU_MEMORY_UTILIZATION=0.90 ENV TOOL_CALL_PARSER=glm45 # Create directories -RUN mkdir -p /workspace/huggingface /workspace/.clawdbot /workspace/clawd +RUN mkdir -p /workspace/huggingface /workspace/.openclaw /workspace/openclaw -# Copy entrypoint script -COPY entrypoint.sh /entrypoint.sh -RUN chmod +x /entrypoint.sh +# Copy entrypoint script + skills + CLI +COPY skills/ /opt/openclaw/skills/ +COPY scripts/openclaw-image-gen /usr/local/bin/openclaw-image-gen +COPY scripts/entrypoint-common.sh /opt/openclaw/entrypoint-common.sh +COPY models/glm47-reap-w4a16/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh /usr/local/bin/openclaw-image-gen # Expose ports # 8000: vLLM API -# 18789: Clawdbot Gateway +# 18789: OpenClaw Gateway # 22: SSH EXPOSE 8000 18789 22 diff --git a/models/glm47-reap-w4a16/README.md b/models/glm47-reap-w4a16/README.md index ff66fdc..7756ba9 100644 --- a/models/glm47-reap-w4a16/README.md +++ b/models/glm47-reap-w4a16/README.md @@ -19,9 +19,9 @@ High-end option for maximum performance. ## Quick Start -### 1. Create RunPod Pod +### 1. Create Runpod Pod -- **Image**: `yourusername/clawdbot-glm47-reap-w4a16:latest` +- **Image**: `yourusername/openclaw-stack-glm4.7-reap-w4a16:latest` - **GPU**: 1x B200 180GB - **Volume**: 200GB at `/workspace` - **Container Disk**: 50GB @@ -59,11 +59,11 @@ Files persist on network volume `/workspace`: ``` /workspace/ ├── huggingface/ # Model cache -├── .clawdbot/ -│ ├── clawdbot.json # Config +├── .openclaw/ # OpenClaw state path +│ ├── openclaw.json # Config │ ├── agents/ # State │ └── telegram/ # Session -└── clawd/ # Workspace +└── openclaw/ # Workspace ``` ## B200 (Blackwell) Support diff --git a/models/glm47-reap-w4a16/entrypoint.sh b/models/glm47-reap-w4a16/entrypoint.sh index 973f989..36a780e 100644 --- a/models/glm47-reap-w4a16/entrypoint.sh +++ b/models/glm47-reap-w4a16/entrypoint.sh @@ -1,9 +1,10 @@ #!/bin/bash -# entrypoint.sh - GLM-4.7-REAP W4A16 + Clawdbot startup script for RunPod B200 +# entrypoint.sh - GLM-4.7-REAP W4A16 + OpenClaw startup script for Runpod B200 set -e +source /opt/openclaw/entrypoint-common.sh echo "============================================" -echo " GLM-4.7-REAP W4A16 + Clawdbot Startup" +echo " GLM-4.7-REAP W4A16 + OpenClaw Startup" echo "============================================" # Configuration from environment @@ -14,18 +15,25 @@ MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}" GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-glm45}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WORKSPACE="${OPENCLAW_WORKSPACE:-/workspace/openclaw}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" GITHUB_TOKEN="${GITHUB_TOKEN:-}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR export PATH=/usr/local/cuda-13.1/bin:$PATH export CUDA_HOME=/usr/local/cuda-13.1 export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:$LD_LIBRARY_PATH # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" "$OPENCLAW_WORKSPACE" +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true + +BOT_CMD="openclaw" # Configure GitHub CLI if [ -n "$GITHUB_TOKEN" ]; then @@ -54,9 +62,9 @@ echo " Tool parser: $TOOL_CALL_PARSER" echo " CUDA: $(nvcc --version | grep release | awk '{print $5}' | tr -d ',')" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -65,12 +73,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -94,18 +102,25 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "password", "password": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi +oc_sync_gateway_auth "password" + # Build vLLM command # Note: GLM-4.7-REAP requires: # - FlashInfer attention (downloads pre-compiled B200 cubins from NVIDIA) @@ -148,23 +163,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -CLAWDBOT_STATE_DIR=$CLAWDBOT_STATE_DIR clawdbot gateway & +echo "Starting OpenClaw gateway..." +OPENCLAW_STATE_DIR=$OPENCLAW_STATE_DIR "$BOT_CMD" gateway --auth password --password "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" -echo "============================================" -echo " Services Running" -echo "============================================" -echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo "" -echo " vLLM PID: $VLLM_PID" -echo " Gateway PID: $GATEWAY_PID" -echo "============================================" -echo "" +oc_print_ready "vLLM API" "$SERVED_MODEL_NAME" "$MAX_MODEL_LEN tokens" "password" # Keep container running and handle signals trap "kill $VLLM_PID $GATEWAY_PID 2>/dev/null; exit 0" SIGTERM SIGINT diff --git a/plugins/toolresult-images/index.mjs b/plugins/toolresult-images/index.mjs new file mode 100644 index 0000000..8c1adf1 --- /dev/null +++ b/plugins/toolresult-images/index.mjs @@ -0,0 +1,214 @@ +const IMAGE_EXT_RE = /\.(png|jpg|jpeg|webp|gif)$/i; +const IMAGE_PATH_RE = /\/(images\/[^"'\\s<>]+|latest)\b/i; +const WORKSPACE_IMAGE_RE = /\/workspace\/openclaw\/images\/([^\s"'<>]+)/i; +const IMAGE_JSON_RE = + /"(image_public_url|image_proxy_url|image_local_url|image_url)"\s*:\s*"([^"]+)"/i; +const IMAGE_PATH_JSON_RE = /"image_path"\s*:\s*"([^"]+)"/i; +const WORKSPACE_AUDIO_RE = /\/workspace\/openclaw\/audio\/([^\s"'<>]+)/i; +const AUDIO_JSON_RE = /"(audio_url|audio_link)"\s*:\s*"([^"]+)"/i; +const AUDIO_PATH_JSON_RE = /"audio_path"\s*:\s*"([^"]+)"/i; + +function trimUrl(value) { + return value.replace(/[)\].,;]+$/, ""); +} + +function resolveBaseUrl() { + const envBase = process.env.OPENCLAW_IMAGE_PUBLIC_BASE_URL; + if (envBase && envBase.trim()) { + return envBase.trim().replace(/\/+$/, ""); + } + const podId = process.env.RUNPOD_POD_ID; + if (podId && podId.trim()) { + const port = process.env.OPENCLAW_WEB_PROXY_PORT || "8080"; + return `https://${podId.trim()}-${port}.proxy.runpod.net`; + } + return ""; +} + +function extractImageUrl(text) { + if (!text || typeof text !== "string") { + return ""; + } + + const jsonMatch = text.match(IMAGE_JSON_RE); + if (jsonMatch && jsonMatch[2]) { + return trimUrl(jsonMatch[2]); + } + + const jsonPathMatch = text.match(IMAGE_PATH_JSON_RE); + if (jsonPathMatch && jsonPathMatch[1]) { + const local = jsonPathMatch[1]; + const file = local.match(WORKSPACE_IMAGE_RE); + if (file && file[1]) { + return `/images/${trimUrl(file[1])}`; + } + } + + const keyMatch = text.match( + /(image_public_url|image_proxy_url|image_local_url|image_url)\s*[:=]\s*["']?([^\s"'<>]+)["']?/i, + ); + if (keyMatch && keyMatch[2]) { + return trimUrl(keyMatch[2]); + } + + const urlLine = text.match(/URL:\s*([^\s"'<>]+)/i); + if (urlLine && urlLine[1]) { + return trimUrl(urlLine[1]); + } + + const localPath = text.match(WORKSPACE_IMAGE_RE); + if (localPath && localPath[1]) { + return `/images/${trimUrl(localPath[1])}`; + } + + const urls = text.match(/https?:\/\/[^\s"'<>]+/gi) || []; + for (const candidate of urls) { + const cleaned = trimUrl(candidate); + if (IMAGE_EXT_RE.test(cleaned) || IMAGE_PATH_RE.test(cleaned)) { + return cleaned; + } + } + + const rel = text.match(/(\/images\/[^\s"'<>]+|\/latest)\b/i); + if (rel && rel[1]) { + return trimUrl(rel[1]); + } + + return ""; +} + +function normalizeImageUrl(url) { + if (!url) { + return ""; + } + if (url.startsWith("/")) { + const base = resolveBaseUrl(); + if (!base) { + return ""; + } + return `${base}${url}`; + } + return url; +} + +function extractAudioUrl(text) { + if (!text || typeof text !== "string") { + return ""; + } + + const jsonMatch = text.match(AUDIO_JSON_RE); + if (jsonMatch && jsonMatch[2]) { + return trimUrl(jsonMatch[2]); + } + + const jsonPathMatch = text.match(AUDIO_PATH_JSON_RE); + if (jsonPathMatch && jsonPathMatch[1]) { + const local = jsonPathMatch[1]; + const file = local.match(WORKSPACE_AUDIO_RE); + if (file && file[1]) { + return `/audio/${trimUrl(file[1])}`; + } + } + + const savedLine = text.match(/Audio saved to:\s*([^\s"'<>]+)/i); + if (savedLine && savedLine[1]) { + const local = trimUrl(savedLine[1]); + const file = local.match(WORKSPACE_AUDIO_RE); + if (file && file[1]) { + return `/audio/${trimUrl(file[1])}`; + } + } + + const localPath = text.match(WORKSPACE_AUDIO_RE); + if (localPath && localPath[1]) { + return `/audio/${trimUrl(localPath[1])}`; + } + + return ""; +} + +function normalizeAudioUrl(url) { + if (!url) { + return ""; + } + if (url.startsWith("/")) { + const base = resolveBaseUrl(); + if (!base) { + return ""; + } + return `${base}${url}`; + } + return url; +} + +function hasImageBlock(content) { + if (!Array.isArray(content)) { + return false; + } + return content.some((block) => { + if (!block || typeof block !== "object") { + return false; + } + return block.type === "image" || block.type === "image_url"; + }); +} + +function collectText(content, details) { + const parts = []; + if (Array.isArray(content)) { + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + if (block.type === "text" && typeof block.text === "string") { + parts.push(block.text); + } + } + } + if (details && typeof details === "object" && typeof details.aggregated === "string") { + parts.push(details.aggregated); + } + return parts.filter(Boolean).join("\n"); +} + +export default { + id: "toolresult-images", + register(api) { + api.on( + "tool_result_persist", + (event) => { + const msg = event?.message; + if (!msg || typeof msg !== "object") { + return; + } + if (msg.role !== "toolResult") { + return; + } + const content = Array.isArray(msg.content) ? msg.content : []; + const text = collectText(content, msg.details); + const updates = []; + + if (!hasImageBlock(content)) { + const url = extractImageUrl(text); + const resolved = normalizeImageUrl(url); + if (resolved) { + updates.push({ type: "image_url", image_url: { url: resolved } }); + } + } + + const audioUrl = normalizeAudioUrl(extractAudioUrl(text)); + if (audioUrl) { + updates.push({ type: "text", text: `Audio: ${audioUrl}` }); + } + + if (updates.length === 0) { + return; + } + + const finalContent = [...content, ...updates]; + return { message: { ...msg, content: finalContent } }; + }, + { priority: 30 }, + ); + }, +}; diff --git a/plugins/toolresult-images/openclaw.plugin.json b/plugins/toolresult-images/openclaw.plugin.json new file mode 100644 index 0000000..519c76b --- /dev/null +++ b/plugins/toolresult-images/openclaw.plugin.json @@ -0,0 +1,11 @@ +{ + "id": "toolresult-images", + "name": "Tool Result Images", + "description": "Convert image/audio outputs in tool results into renderable links.", + "version": "1.0.0", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/scripts/entrypoint-common.sh b/scripts/entrypoint-common.sh new file mode 100644 index 0000000..184f144 --- /dev/null +++ b/scripts/entrypoint-common.sh @@ -0,0 +1,190 @@ +#!/bin/bash +# Common helpers for OpenClaw Runpod entrypoints. + +oc_init_web_ui() { + local pod_id="${RUNPOD_POD_ID:-}" + if [ -n "$pod_id" ]; then + WEB_UI_BASE="https://${pod_id}-18789.proxy.runpod.net" + else + WEB_UI_BASE="https://-18789.proxy.runpod.net" + fi + + WEB_UI_TOKEN="${OPENCLAW_WEB_PASSWORD:-changeme}" + WEB_UI_URL="${WEB_UI_BASE}/?token=${WEB_UI_TOKEN}" +} + +oc_print_ready() { + local api_label="$1" + local model_label="$2" + local context_label="$3" + local auth_mode="$4" + shift 4 || true + + oc_init_web_ui + + echo "================================================" + echo " Ready!" + echo " ${api_label}: http://localhost:8000" + echo " OpenClaw Gateway: ws://localhost:18789" + + if [ "$auth_mode" = "token" ]; then + echo " Web UI: ${WEB_UI_URL}" + echo " Web UI Token: ${WEB_UI_TOKEN}" + else + echo " Web UI: ${WEB_UI_BASE}" + echo " Web UI Password: ${WEB_UI_TOKEN}" + fi + + if [ -n "$model_label" ]; then + echo " Model: ${model_label}" + fi + if [ -n "$context_label" ]; then + echo " Context: ${context_label}" + fi + + for extra in "$@"; do + if [ -n "$extra" ]; then + echo " ${extra}" + fi + done + + echo " Status: ready for requests" + echo "================================================" +} + +oc_sync_gateway_auth() { + local mode="${1:-token}" + local cfg="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}/openclaw.json" + if [ ! -f "$cfg" ]; then + return + fi + if ! command -v python3 >/dev/null 2>&1; then + echo "WARNING: python3 not found; skipping gateway auth sync" + return + fi + + OPENCLAW_GATEWAY_AUTH_MODE="$mode" python3 - <<'PY' +import json +import os + +cfg = os.path.join(os.environ.get("OPENCLAW_STATE_DIR", "/workspace/.openclaw"), "openclaw.json") +mode = os.environ.get("OPENCLAW_GATEWAY_AUTH_MODE", "token") +token = os.environ.get("OPENCLAW_WEB_PASSWORD", "changeme") + +with open(cfg, "r", encoding="utf-8") as f: + data = json.load(f) + +gw = data.setdefault("gateway", {}) +auth = gw.setdefault("auth", {}) +changed = False + +if mode == "token": + if auth.get("mode") != "token": + auth["mode"] = "token" + changed = True + if auth.get("token") != token: + auth["token"] = token + changed = True + remote = gw.setdefault("remote", {}) + if remote.get("token") != token: + remote["token"] = token + changed = True +elif mode == "password": + if auth.get("mode") != "password": + auth["mode"] = "password" + changed = True + if auth.get("password") != token: + auth["password"] = token + changed = True + +if changed: + with open(cfg, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) +PY + chmod 600 "$cfg" 2>/dev/null || true +} + +oc_setup_ssh_manual() { + echo "Initializing SSH..." + + if [ -n "${PUBLIC_KEY:-}" ]; then + mkdir -p ~/.ssh + if command -v python3 >/dev/null 2>&1; then + python3 - <<'PY' +import os +import re + +raw = os.environ.get("PUBLIC_KEY", "") +raw = raw.strip() +if raw: + raw = raw.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n") + raw = raw.replace("\r\n", "\n").replace("\r", "\n") + key_start = r"(?:ssh-|ecdsa-|sk-)" + raw = re.sub(rf"\s+(?={key_start}[^\s]+\s+[A-Za-z0-9+/=]{{20,}})", "\n", raw) + +lines = [line.strip() for line in raw.splitlines() if line.strip()] + +path = os.path.expanduser("~/.ssh/authorized_keys") +with open(path, "w", encoding="utf-8") as f: + f.write("\n".join(lines) + ("\n" if lines else "")) + +print(f"SSH keys written: {len(lines)}") +PY + else + printf '%b' "$PUBLIC_KEY" | awk ' + BEGIN { + key_re = "^(ssh-|ecdsa-|sk-)"; + base_re = "^[A-Za-z0-9+/=]{20,}$"; + } + { + for (i = 1; i <= NF; i++) { + token = $i; + next_token = (i < NF ? $(i + 1) : ""); + if (token ~ key_re && next_token ~ base_re) { + if (line != "") { + print line; + } + line = token; + } else if (line != "") { + line = line " " token; + } else { + line = token; + } + } + } + END { + if (line != "") { + print line; + } + } + ' > ~/.ssh/authorized_keys + fi + chmod 700 ~/.ssh + chmod 600 ~/.ssh/authorized_keys + else + echo "WARNING: PUBLIC_KEY not set - SSH login disabled" + fi + + for keytype in rsa ecdsa ed25519; do + local keyfile="/etc/ssh/ssh_host_${keytype}_key" + if [ ! -f "$keyfile" ]; then + ssh-keygen -t "$keytype" -f "$keyfile" -N "" -q >/dev/null 2>&1 || true + fi + done + + mkdir -p /var/run/sshd + if [ -x /usr/sbin/sshd ]; then + /usr/sbin/sshd + echo "SSH ready" + else + echo "WARNING: sshd not found - SSH unavailable" + fi +} + +oc_start_runpod_ssh() { + if [ -f /start.sh ]; then + echo "Starting SSH setup..." + /start.sh >/var/log/runpod-start.log 2>&1 & + sleep 5 + fi +} diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index dba14ae..c7d7122 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,9 +1,10 @@ #!/bin/bash -# entrypoint.sh - Clawdbot + vLLM startup script for RunPod +# entrypoint.sh - OpenClaw + vLLM startup script for Runpod set -e +source /opt/openclaw/entrypoint-common.sh echo "============================================" -echo " Clawdbot + vLLM Startup" +echo " OpenClaw + vLLM Startup" echo "============================================" # Configuration from environment @@ -15,14 +16,20 @@ GPU_MEMORY_UTILIZATION="${GPU_MEMORY_UTILIZATION:-0.90}" TOOL_CALL_PARSER="${TOOL_CALL_PARSER:-hermes}" TENSOR_PARALLEL_SIZE="${TENSOR_PARALLEL_SIZE:-auto}" HF_HOME="${HF_HOME:-/workspace/huggingface}" -CLAWDBOT_STATE_DIR="${CLAWDBOT_STATE_DIR:-/workspace/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-/workspace/.openclaw}" +OPENCLAW_WEB_PASSWORD="${OPENCLAW_WEB_PASSWORD:-changeme}" TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}" export HF_HOME -export CLAWDBOT_STATE_DIR +export OPENCLAW_STATE_DIR + +BOT_CMD="openclaw" # Ensure directories exist -mkdir -p "$HF_HOME" "$CLAWDBOT_STATE_DIR" /workspace/clawd +mkdir -p "$HF_HOME" "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents/main/sessions" \ + "$OPENCLAW_STATE_DIR/credentials" /workspace/openclaw +chmod 700 "$OPENCLAW_STATE_DIR" "$OPENCLAW_STATE_DIR/agents" "$OPENCLAW_STATE_DIR/agents/main" \ + "$OPENCLAW_STATE_DIR/agents/main/sessions" "$OPENCLAW_STATE_DIR/credentials" 2>/dev/null || true # Auto-detect tensor parallel size if [ "$TENSOR_PARALLEL_SIZE" = "auto" ]; then @@ -39,9 +46,9 @@ echo " Tensor parallel: $TENSOR_PARALLEL_SIZE" echo " Tool parser: $TOOL_CALL_PARSER" echo "" -# Initialize Clawdbot config if not exists -if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then - echo "Creating Clawdbot configuration..." +# Initialize OpenClaw config if not exists +if [ ! -f "$OPENCLAW_STATE_DIR/openclaw.json" ]; then + echo "Creating OpenClaw configuration..." # Build telegram config based on whether token is provided if [ -n "$TELEGRAM_BOT_TOKEN" ]; then @@ -50,12 +57,12 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then TELEGRAM_CONFIG="\"telegram\": { \"enabled\": true }" fi - cat > "$CLAWDBOT_STATE_DIR/clawdbot.json" << EOF + cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { "model": { "primary": "local-vllm/${SERVED_MODEL_NAME}" }, - "workspace": "/workspace/clawd" + "workspace": "/workspace/openclaw" } }, "models": { @@ -79,23 +86,27 @@ if [ ! -f "$CLAWDBOT_STATE_DIR/clawdbot.json" ]; then "channels": { ${TELEGRAM_CONFIG} }, + "skills": { + "load": { "extraDirs": ["/opt/openclaw/skills"] } + }, "gateway": { - "mode": "local" + "mode": "local", + "bind": "lan", + "auth": { "mode": "token", "token": "${OPENCLAW_WEB_PASSWORD}" } }, "logging": { "level": "info" } } EOF - chmod 600 "$CLAWDBOT_STATE_DIR/clawdbot.json" + chmod 600 "$OPENCLAW_STATE_DIR/openclaw.json" echo "Config created. Telegram token: ${TELEGRAM_BOT_TOKEN:+provided}${TELEGRAM_BOT_TOKEN:-NOT SET - add manually}" else - echo "Existing config found at $CLAWDBOT_STATE_DIR/clawdbot.json - preserving it" + echo "Existing config found at $OPENCLAW_STATE_DIR/openclaw.json - preserving it" fi -# Initialize Clawdbot workspace if empty -if [ ! -f "/workspace/clawd/AGENTS.md" ]; then - echo "Initializing Clawdbot workspace..." - clawdbot setup --non-interactive --accept-risk --workspace /workspace/clawd 2>/dev/null || true -fi +# Keep gateway tokens in sync with OPENCLAW_WEB_PASSWORD. +oc_sync_gateway_auth "token" + +# Workspace files are seeded during image build. # Build vLLM command VLLM_CMD="vllm serve $MODEL_NAME" @@ -138,22 +149,14 @@ if [ $WAITED -ge $MAX_WAIT ]; then exit 1 fi -# Start Clawdbot gateway +# Start OpenClaw gateway echo "" -echo "Starting Clawdbot gateway..." -clawdbot gateway & +echo "Starting OpenClaw gateway..." +"$BOT_CMD" gateway --auth token --token "$OPENCLAW_WEB_PASSWORD" & GATEWAY_PID=$! echo "" -echo "============================================" -echo " Services Running" -echo "============================================" -echo " vLLM API: http://localhost:8000" -echo " Clawdbot Gateway: ws://localhost:18789" -echo "" -echo " vLLM PID: $VLLM_PID" -echo " Gateway PID: $GATEWAY_PID" -echo "============================================" +oc_print_ready "vLLM API" "$SERVED_MODEL_NAME" "$MAX_MODEL_LEN tokens" "token" echo "" # Keep container running and handle signals diff --git a/scripts/openclaw-image-gen b/scripts/openclaw-image-gen new file mode 100644 index 0000000..5a13b79 --- /dev/null +++ b/scripts/openclaw-image-gen @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +openclaw-image-gen - Generate images using persistent FLUX.2 Klein server +Fast inference with pre-loaded model + +Usage: + openclaw-image-gen --prompt "A cute robot" --output /tmp/robot.png + openclaw-image-gen --prompt "Sunset" --aspect 16:9 --output /tmp/sunset.png +""" + +import argparse +import base64 +import json +import os +import sys +import urllib.request + +IMAGE_SERVER_URL = "http://localhost:8002/generate" + + +def generate_image(prompt, width=None, height=None, aspect=None, long_side=1024, + steps=4, guidance=1.0, seed=0, filename=None): + """Call image server and return PNG bytes + metadata.""" + payload = { + "prompt": prompt, + "steps": steps, + "guidance": guidance, + "seed": seed, + "long_side": long_side, + } + if width: + payload["width"] = width + if height: + payload["height"] = height + if aspect: + payload["aspect"] = aspect + if filename: + payload["filename"] = filename + + req = urllib.request.Request( + IMAGE_SERVER_URL, + data=json.dumps(payload).encode('utf-8'), + headers={"Content-Type": "application/json"}, + method="POST" + ) + + try: + with urllib.request.urlopen(req, timeout=120) as response: + data = json.loads(response.read()) + if "error" in data: + raise RuntimeError(data["error"]) + return base64.b64decode(data["image"]), data["width"], data["height"], data + except urllib.error.URLError as e: + print(f"Error: Cannot connect to image server at {IMAGE_SERVER_URL}", file=sys.stderr) + print("Make sure openclaw-image-server is running", file=sys.stderr) + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser(description="Generate images with FLUX.2 Klein") + parser.add_argument("--prompt", required=True, help="Image prompt") + parser.add_argument("--width", type=int, help="Output width") + parser.add_argument("--height", type=int, help="Output height") + parser.add_argument("--aspect", help="Aspect ratio (e.g. 1:1, 16:9)") + parser.add_argument("--long-side", type=int, default=1024, help="Long side when using aspect") + parser.add_argument("--steps", type=int, default=4, help="Inference steps") + parser.add_argument("--guidance", type=float, default=1.0, help="Guidance scale") + parser.add_argument("--seed", type=int, default=0, help="Random seed") + parser.add_argument("--output", default="output.png", help="Output file path") + # Legacy flag - ignored, server always uses GPU + parser.add_argument("--cpu-offload", action="store_true", help="(ignored, server uses GPU)") + parser.add_argument("--no-quant-matmul", action="store_true", help="(ignored)") + parser.add_argument("--model", help="(ignored, server has model loaded)") + + args = parser.parse_args() + + print(f"Generating: {args.prompt}", file=sys.stderr) + + img_bytes, w, h, meta = generate_image( + prompt=args.prompt, + width=args.width, + height=args.height, + aspect=args.aspect, + long_side=args.long_side, + steps=args.steps, + guidance=args.guidance, + seed=args.seed, + filename=os.path.basename(args.output), + ) + + output_dir = os.path.dirname(os.path.abspath(args.output)) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + with open(args.output, "wb") as f: + f.write(img_bytes) + + print(f"Saved {w}x{h} image to: {args.output}", file=sys.stderr) + if isinstance(meta, dict): + url = meta.get("image_public_url") or meta.get("image_proxy_url") or meta.get("image_local_url") or meta.get("image_url") + if url: + if url.startswith("/"): + base = IMAGE_SERVER_URL.rsplit("/", 1)[0] + url = f"{base}{url}" + print(f"URL: {url}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/scripts/openclaw-image-server b/scripts/openclaw-image-server new file mode 100644 index 0000000..41c7285 --- /dev/null +++ b/scripts/openclaw-image-server @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +""" +openclaw-image-server - Persistent FLUX.2 Klein image generation server +Keeps model loaded in VRAM for instant inference + +Starts on port 8002, accepts POST requests with JSON body +""" + +import argparse +import base64 +import io +import json +import math +import os +import sys +import time +from http.server import HTTPServer, BaseHTTPRequestHandler + +# Reduce VRAM pressure from torch.compile/inductor +os.environ.setdefault("TORCH_COMPILE_DISABLE", "1") +os.environ.setdefault("TORCHDYNAMO_DISABLE", "1") +os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1") +os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + +# Global pipeline reference +pipe = None +device = None +generator_device = None +last_image_path = None +server_port = 8002 + +IMAGE_OUTPUT_DIR = os.environ.get("OPENCLAW_IMAGE_OUTPUT_DIR", "/workspace/openclaw/images") +IMAGE_PUBLIC_BASE_URL = os.environ.get("OPENCLAW_IMAGE_PUBLIC_BASE_URL") +IMAGE_PUBLIC_BASE_URL_FILE = os.environ.get( + "OPENCLAW_IMAGE_PUBLIC_BASE_URL_FILE", + "/workspace/openclaw/image-base-url.txt", +) + + +def ensure_output_dir(): + os.makedirs(IMAGE_OUTPUT_DIR, exist_ok=True) + + +def safe_basename(name): + return os.path.basename(name) + + +def build_image_name(seed, width, height): + ts = int(time.time()) + return f"openclaw-{ts}-{seed}-{width}x{height}.png" + + +def get_public_base_url(): + if IMAGE_PUBLIC_BASE_URL: + return IMAGE_PUBLIC_BASE_URL + try: + if os.path.isfile(IMAGE_PUBLIC_BASE_URL_FILE): + with open(IMAGE_PUBLIC_BASE_URL_FILE, "r", encoding="utf-8") as f: + value = f.read().strip() + if value: + return value + except Exception: + pass + return None + + +def round_to_multiple(value, multiple=8): + return int(math.ceil(value / multiple) * multiple) + + +def parse_aspect(aspect): + if ":" in aspect: + parts = aspect.split(":") + elif "x" in aspect: + parts = aspect.split("x") + else: + raise ValueError("Aspect ratio must be like 1:1 or 16:9.") + w = float(parts[0]) + h = float(parts[1]) + if w <= 0 or h <= 0: + raise ValueError("Aspect ratio values must be positive.") + return w / h + + +def resolve_size(width, height, aspect, long_side=1024): + if width and height: + w = width + h = height + elif aspect: + ratio = parse_aspect(aspect) + if ratio >= 1: + w = long_side + h = long_side / ratio + else: + h = long_side + w = long_side * ratio + else: + w = long_side + h = long_side + w = round_to_multiple(max(256, int(w))) + h = round_to_multiple(max(256, int(h))) + return w, h + + +class ImageHandler(BaseHTTPRequestHandler): + def log_message(self, format, *args): + print(f"[ImageServer] {args[0]}") + + def do_GET(self): + if self.path == "/health": + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"status": "ok", "model_loaded": pipe is not None}).encode()) + elif self.path == "/latest": + if last_image_path: + self._serve_file(last_image_path) + else: + self.send_response(404) + self.end_headers() + elif self.path.startswith("/images/"): + image_name = safe_basename(self.path[len("/images/"):]) + image_path = os.path.join(IMAGE_OUTPUT_DIR, image_name) + self._serve_file(image_path) + else: + self.send_response(404) + self.end_headers() + + def _serve_file(self, path): + real_root = os.path.realpath(IMAGE_OUTPUT_DIR) + real_path = os.path.realpath(path) + if not real_path.startswith(real_root + os.sep) and real_path != real_root: + self.send_response(403) + self.end_headers() + return + if not os.path.isfile(real_path): + self.send_response(404) + self.end_headers() + return + try: + with open(real_path, "rb") as f: + data = f.read() + content_type = "image/png" if real_path.lower().endswith(".png") else "application/octet-stream" + self.send_response(200) + self.send_header("Content-Type", content_type) + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + except Exception: + self.send_response(500) + self.end_headers() + + def do_POST(self): + if self.path != "/generate": + self.send_response(404) + self.end_headers() + return + + try: + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) + data = json.loads(body) + + prompt = data.get("prompt", "") + if not prompt: + self.send_response(400) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"error": "prompt required"}).encode()) + return + + width = data.get("width") + height = data.get("height") + aspect = data.get("aspect") + long_side = data.get("long_side", 1024) + steps = data.get("steps", 4) + guidance = data.get("guidance", 1.0) + seed = data.get("seed", 0) + + w, h = resolve_size(width, height, aspect, long_side) + print(f"[ImageServer] Generating {w}x{h}: {prompt[:50]}...") + + import torch + gen = torch.Generator(device=generator_device).manual_seed(seed) + + result = pipe( + prompt=prompt, + height=h, + width=w, + guidance_scale=guidance, + num_inference_steps=steps, + generator=gen, + ) + + # Convert to PNG bytes + img_buffer = io.BytesIO() + result.images[0].save(img_buffer, format="PNG") + img_bytes = img_buffer.getvalue() + + ensure_output_dir() + filename = data.get("filename") or data.get("name") + if filename: + filename = safe_basename(filename) + else: + filename = build_image_name(seed, w, h) + image_path = os.path.join(IMAGE_OUTPUT_DIR, filename) + with open(image_path, "wb") as f: + f.write(img_bytes) + global last_image_path + last_image_path = image_path + + image_url = f"/images/{filename}" + local_url = f"http://localhost:{server_port}{image_url}" + proxy_url = None + public_url = None + public_base = get_public_base_url() + if public_base: + public_base = public_base.rstrip("/") + public_url = f"{public_base}{image_url}" + proxy_url = public_url + else: + pod_id = os.environ.get("RUNPOD_POD_ID") + if pod_id: + proxy_url = f"https://{pod_id}-{server_port}.proxy.runpod.net{image_url}" + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({ + "image": base64.b64encode(img_bytes).decode(), + "width": w, + "height": h, + "format": "png", + "image_name": filename, + "image_path": image_path, + "image_url": image_url, + "image_local_url": local_url, + "image_proxy_url": proxy_url, + "image_public_url": public_url + }).encode()) + print(f"[ImageServer] Done, {len(img_bytes)} bytes") + + except Exception as e: + import traceback + print(f"[ImageServer] Error: {e}") + traceback.print_exc() + self.send_response(500) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps({"error": str(e)}).encode()) + + +def load_model(model_id): + global pipe, device, generator_device + import torch + import diffusers + + # Import SDNQ for the quantized model + try: + import sdnq # noqa: F401 + from sdnq.loader import apply_sdnq_options_to_model + from sdnq.quantizer import SDNQConfig, SDNQQuantizer, QuantizationMethod + has_sdnq = True + except ImportError: + has_sdnq = False + print("[ImageServer] WARNING: sdnq not installed, may not work with SDNQ models") + + print(f"[ImageServer] Loading model: {model_id}") + device = "cuda" if torch.cuda.is_available() else "cpu" + generator_device = device + dtype = torch.bfloat16 if device == "cuda" else torch.float32 + + # Register SDNQ quantizer with diffusers if available + if has_sdnq: + try: + from diffusers.quantizers import auto as diff_auto + diff_auto.AUTO_QUANTIZATION_CONFIG_MAPPING.setdefault(QuantizationMethod.SDNQ.value, SDNQConfig) + diff_auto.AUTO_QUANTIZATION_CONFIG_MAPPING.setdefault(QuantizationMethod.SDNQ_TRAINING.value, SDNQConfig) + diff_auto.AUTO_QUANTIZER_MAPPING.setdefault(QuantizationMethod.SDNQ.value, SDNQQuantizer) + diff_auto.AUTO_QUANTIZER_MAPPING.setdefault(QuantizationMethod.SDNQ_TRAINING.value, SDNQQuantizer) + print("[ImageServer] SDNQ quantizer registered with diffusers") + except Exception as exc: + print(f"[ImageServer] WARNING: failed to register SDNQ quantizer: {exc}") + + # Load pipeline - SDNQ models need special handling + pipe = diffusers.Flux2KleinPipeline.from_pretrained(model_id, torch_dtype=dtype) + + # Apply SDNQ optimizations if available + if has_sdnq: + triton_available = False + try: + import triton # noqa: F401 + triton_available = True + except ImportError: + pass + + use_quantized = triton_available and torch.cuda.is_available() + try: + pipe.transformer = apply_sdnq_options_to_model(pipe.transformer, use_quantized_matmul=use_quantized) + pipe.text_encoder = apply_sdnq_options_to_model(pipe.text_encoder, use_quantized_matmul=use_quantized) + print("[ImageServer] SDNQ optimizations applied") + except Exception as e: + print(f"[ImageServer] SDNQ optimization failed: {e}") + + pipe.to(device) + if device == "cuda": + try: + pipe.enable_attention_slicing() + pipe.enable_vae_slicing() + pipe.enable_vae_tiling() + print("[ImageServer] Enabled attention/vae slicing for lower VRAM") + except Exception as exc: + print(f"[ImageServer] WARNING: could not enable VRAM optimizations: {exc}") + print(f"[ImageServer] Model loaded on {device}") + + # Report VRAM usage + if device == "cuda": + allocated = torch.cuda.memory_allocated() / 1024**3 + print(f"[ImageServer] VRAM allocated: {allocated:.2f} GB") + + +def main(): + global server_port + parser = argparse.ArgumentParser(description="FLUX.2 Klein image generation server") + parser.add_argument("--model", default="Disty0/FLUX.2-klein-4B-SDNQ-4bit-dynamic", + help="HuggingFace model ID") + parser.add_argument("--host", default="0.0.0.0", help="Bind host") + parser.add_argument("--port", type=int, default=8002, help="Bind port") + args = parser.parse_args() + server_port = args.port + + ensure_output_dir() + load_model(args.model) + + server = HTTPServer((args.host, args.port), ImageHandler) + print(f"[ImageServer] Listening on http://{args.host}:{args.port}") + print("[ImageServer] POST /generate with JSON: {prompt, width, height, aspect, steps, guidance, seed}") + server.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/scripts/openclaw-stt b/scripts/openclaw-stt new file mode 100644 index 0000000..f147631 --- /dev/null +++ b/scripts/openclaw-stt @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +openclaw-stt - Speech-to-Text using LFM2.5-Audio server +Transcribes audio using the persistent audio server (near-instant inference) + +Usage: + openclaw-stt /path/to/audio.wav + openclaw-stt --audio /path/to/recording.wav --output /tmp/transcript.txt +""" + +import argparse +import base64 +import json +import sys +import urllib.request + +AUDIO_SERVER_URL = "http://localhost:8001/v1/chat/completions" + + +def stt_stream(wav_path: str) -> str: + """Stream ASR from server, return transcribed text.""" + # Read and encode audio file + with open(wav_path, "rb") as f: + audio_data = base64.b64encode(f.read()).decode("utf-8") + + payload = { + "model": "", + "messages": [ + {"role": "system", "content": "Perform ASR."}, + { + "role": "user", + "content": [ + { + "type": "input_audio", + "input_audio": {"data": audio_data, "format": "wav"} + } + ] + } + ], + "stream": True, + "max_tokens": 4096 + } + + req = urllib.request.Request( + AUDIO_SERVER_URL, + data=json.dumps(payload).encode('utf-8'), + headers={"Content-Type": "application/json"}, + method="POST" + ) + + text_chunks = [] + + try: + with urllib.request.urlopen(req, timeout=120) as response: + for line in response: + line = line.decode('utf-8').strip() + if not line or line == "data: [DONE]": + continue + if line.startswith("data: "): + try: + data = json.loads(line[6:]) + delta = data.get("choices", [{}])[0].get("delta", {}) + if "content" in delta and delta["content"]: + text_chunks.append(delta["content"]) + except json.JSONDecodeError: + pass + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + return "".join(text_chunks).strip() + + +def main(): + parser = argparse.ArgumentParser(description="Transcribe audio using LFM2.5-Audio") + parser.add_argument("audio", nargs="?", help="Input WAV audio file") + parser.add_argument("-a", "--audio", dest="audio_opt", help="Input WAV file (alternative)") + parser.add_argument("-o", "--output", help="Output text file (optional, prints to stdout)") + + args = parser.parse_args() + audio_path = args.audio or args.audio_opt + + if not audio_path: + print("Error: Audio file is required", file=sys.stderr) + parser.print_help() + sys.exit(1) + + print(f"Transcribing: {audio_path}", file=sys.stderr) + + transcript = stt_stream(audio_path) + + if args.output: + with open(args.output, "w") as f: + f.write(transcript) + print(f"Transcript saved to: {args.output}", file=sys.stderr) + else: + print(transcript) + + +if __name__ == "__main__": + main() diff --git a/scripts/openclaw-tts b/scripts/openclaw-tts new file mode 100644 index 0000000..f9b1d83 --- /dev/null +++ b/scripts/openclaw-tts @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +openclaw-tts - Text-to-Speech using LFM2.5-Audio server +Converts text to speech using the persistent audio server (near-instant inference) + +Usage: + openclaw-tts "Hello world" --output /tmp/hello.wav + openclaw-tts --text "Hello" --output /tmp/hello.wav --voice "UK female" +""" + +import argparse +import base64 +import json +import os +import struct +import sys +import wave +import urllib.request + +AUDIO_SERVER_URL = "http://localhost:8001/v1/chat/completions" +SAMPLE_RATE = 24000 + +VOICES = { + "US male": "Perform TTS. Use the US male voice.", + "UK male": "Perform TTS. Use the UK male voice.", + "US female": "Perform TTS. Use the US female voice.", + "UK female": "Perform TTS. Use the UK female voice.", +} + + +def tts_stream(text: str, voice: str = "US male") -> list: + """Stream TTS from server, return audio samples as 16-bit integers.""" + system_prompt = VOICES.get(voice, VOICES["US male"]) + + payload = { + "model": "", + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": text} + ], + "stream": True, + "max_tokens": 4096 + } + + req = urllib.request.Request( + AUDIO_SERVER_URL, + data=json.dumps(payload).encode('utf-8'), + headers={"Content-Type": "application/json"}, + method="POST" + ) + + all_samples = [] + + try: + with urllib.request.urlopen(req, timeout=120) as response: + for line in response: + line = line.decode('utf-8').strip() + if not line or line == "data: [DONE]": + continue + if line.startswith("data: "): + try: + data = json.loads(line[6:]) + delta = data.get("choices", [{}])[0].get("delta", {}) + if "audio" in delta and delta["audio"] and "data" in delta["audio"]: + chunk_data = delta["audio"]["data"] + pcm_bytes = base64.b64decode(chunk_data) + n_samples = len(pcm_bytes) // 2 + if n_samples > 0: + samples = struct.unpack(f"<{n_samples}h", pcm_bytes) + all_samples.extend(samples) + except (json.JSONDecodeError, struct.error): + pass + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + return all_samples + + +def save_wav(samples: list, output_path: str, sample_rate: int = SAMPLE_RATE): + """Save 16-bit samples to WAV file.""" + output_dir = os.path.dirname(os.path.abspath(output_path)) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + with wave.open(output_path, 'wb') as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) # 16-bit + wav_file.setframerate(sample_rate) + wav_file.writeframes(struct.pack(f"<{len(samples)}h", *samples)) + + +def main(): + parser = argparse.ArgumentParser(description="Convert text to speech using LFM2.5-Audio") + parser.add_argument("text", nargs="?", help="Text to convert to speech") + parser.add_argument("-t", "--text", dest="text_opt", help="Text to convert (alternative)") + parser.add_argument("-o", "--output", required=True, help="Output WAV file path") + parser.add_argument("-v", "--voice", default="US male", + choices=list(VOICES.keys()), help="Voice to use") + + args = parser.parse_args() + text = args.text or args.text_opt + + if not text: + print("Error: Text is required", file=sys.stderr) + parser.print_help() + sys.exit(1) + + print(f"Generating speech for: \"{text}\"", file=sys.stderr) + + samples = tts_stream(text, args.voice) + + if not samples: + print("Error: No audio generated", file=sys.stderr) + sys.exit(1) + + save_wav(samples, args.output) + duration = len(samples) / SAMPLE_RATE + print(f"Success! Audio saved to: {args.output} ({duration:.2f}s)", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/scripts/openclaw-web-proxy b/scripts/openclaw-web-proxy new file mode 100644 index 0000000..fe18279 --- /dev/null +++ b/scripts/openclaw-web-proxy @@ -0,0 +1,426 @@ +#!/usr/bin/env node +"use strict"; + +const http = require("http"); +const https = require("https"); +const fs = require("fs"); +const path = require("path"); +const { URL } = require("url"); + +function readArg(flag, fallback) { + const idx = process.argv.indexOf(flag); + if (idx !== -1 && process.argv[idx + 1]) { + return process.argv[idx + 1]; + } + return fallback; +} + +const host = readArg("--host", process.env.OPENCLAW_WEB_PROXY_HOST || "0.0.0.0"); +const port = parseInt( + readArg("--port", process.env.OPENCLAW_WEB_PROXY_PORT || "8080"), + 10, +); +const webRoot = readArg("--web-root", process.env.OPENCLAW_WEB_ROOT || "/opt/openclaw/web"); +const llmBase = readArg("--llm-url", process.env.OPENCLAW_LLM_URL || "http://localhost:8000"); +const audioBase = readArg("--audio-url", process.env.OPENCLAW_AUDIO_URL || "http://localhost:8001"); +const imageBase = readArg("--image-url", process.env.OPENCLAW_IMAGE_URL || "http://localhost:8002"); +const audioOutputDir = readArg( + "--audio-dir", + process.env.OPENCLAW_AUDIO_OUTPUT_DIR || "/workspace/openclaw/audio", +); + +const TTS_VOICES = { + "US male": "Perform TTS. Use the US male voice.", + "UK male": "Perform TTS. Use the UK male voice.", + "US female": "Perform TTS. Use the US female voice.", + "UK female": "Perform TTS. Use the UK female voice.", +}; + +const configPayload = { + llmBasePath: "/api/llm", + audioBasePath: "/api/audio", + imageBasePath: "/api/image", + imagePublicBaseUrl: process.env.OPENCLAW_IMAGE_PUBLIC_BASE_URL || "", + llmModel: process.env.SERVED_MODEL_NAME || "glm-4.7-flash", +}; + +const mimeTypes = { + ".html": "text/html; charset=utf-8", + ".js": "application/javascript; charset=utf-8", + ".css": "text/css; charset=utf-8", + ".png": "image/png", + ".svg": "image/svg+xml", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".wav": "audio/wav", + ".mp3": "audio/mpeg", + ".m4a": "audio/mp4", + ".ogg": "audio/ogg", + ".ico": "image/x-icon", + ".json": "application/json; charset=utf-8", +}; + +function sendJson(res, status, payload) { + res.statusCode = status; + res.setHeader("Content-Type", "application/json; charset=utf-8"); + res.end(JSON.stringify(payload)); +} + +function sendText(res, status, payload) { + res.statusCode = status; + res.setHeader("Content-Type", "text/plain; charset=utf-8"); + res.end(payload); +} + +function readBody(req) { + return new Promise((resolve, reject) => { + const chunks = []; + req.on("data", (chunk) => chunks.push(chunk)); + req.on("end", () => resolve(Buffer.concat(chunks))); + req.on("error", reject); + }); +} + +async function readJson(req) { + const body = await readBody(req); + if (!body.length) { + return {}; + } + try { + return JSON.parse(body.toString("utf-8")); + } catch (err) { + throw new Error("invalid_json"); + } +} + +function safeFilePath(requestPath) { + const cleanPath = requestPath.split("?")[0]; + const normalized = path.normalize(cleanPath).replace(/^(\.\.(\/|\\|$))+/, ""); + return path.join(webRoot, normalized); +} + +function serveFile(filePath, res) { + fs.stat(filePath, (err, stat) => { + if (err || !stat.isFile()) { + res.statusCode = 404; + res.end("Not found"); + return; + } + const ext = path.extname(filePath).toLowerCase(); + res.statusCode = 200; + res.setHeader("Content-Type", mimeTypes[ext] || "application/octet-stream"); + res.setHeader("Content-Length", String(stat.size)); + fs.createReadStream(filePath).pipe(res); + }); +} + +function serveMediaFile(filePath, res) { + const root = path.resolve(audioOutputDir); + const target = path.resolve(filePath); + if (!target.startsWith(root + path.sep) && target !== root) { + res.statusCode = 403; + res.end("Forbidden"); + return; + } + fs.stat(target, (err, stat) => { + if (err || !stat.isFile()) { + res.statusCode = 404; + res.end("Not found"); + return; + } + const ext = path.extname(target).toLowerCase(); + res.statusCode = 200; + res.setHeader("Content-Type", mimeTypes[ext] || "application/octet-stream"); + res.setHeader("Content-Length", String(stat.size)); + fs.createReadStream(target).pipe(res); + }); +} + +function serveStatic(req, res) { + const url = new URL(req.url, `http://${req.headers.host || "localhost"}`); + if (url.pathname === "/") { + return serveFile(path.join(webRoot, "index.html"), res); + } + return serveFile(safeFilePath(url.pathname), res); +} + +function proxyRequest(req, res, targetBase, stripPrefix) { + const original = new URL(req.url, `http://${req.headers.host || "localhost"}`); + let proxiedPath = original.pathname; + if (stripPrefix && proxiedPath.startsWith(stripPrefix)) { + proxiedPath = proxiedPath.slice(stripPrefix.length) || "/"; + } + const targetUrl = new URL(proxiedPath + original.search, targetBase); + const proxyHeaders = { ...req.headers, host: targetUrl.host }; + const client = targetUrl.protocol === "https:" ? https : http; + const proxyReq = client.request( + { + hostname: targetUrl.hostname, + port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80), + method: req.method, + path: targetUrl.pathname + targetUrl.search, + headers: proxyHeaders, + }, + (proxyRes) => { + res.writeHead(proxyRes.statusCode || 502, proxyRes.headers); + proxyRes.pipe(res); + }, + ); + + proxyReq.on("error", (err) => { + if (!res.headersSent) { + sendJson(res, 502, { error: "proxy_error", details: err.message }); + } else { + res.end(); + } + }); + + req.pipe(proxyReq); + req.on("aborted", () => proxyReq.destroy()); + res.on("close", () => proxyReq.destroy()); +} + +async function parseSse(response, handlers) { + const reader = response.body?.getReader(); + if (!reader) { + return; + } + const decoder = new TextDecoder(); + let buffer = ""; + while (true) { + const { value, done } = await reader.read(); + if (done) { + break; + } + buffer += decoder.decode(value, { stream: true }); + let idx = buffer.indexOf("\n"); + while (idx !== -1) { + const line = buffer.slice(0, idx).trim(); + buffer = buffer.slice(idx + 1); + idx = buffer.indexOf("\n"); + if (!line.startsWith("data:")) { + continue; + } + const payload = line.slice(5).trim(); + if (!payload || payload === "[DONE]") { + continue; + } + try { + const data = JSON.parse(payload); + handlers(data); + } catch { + // ignore invalid json chunk + } + } + } +} + +function pcmToWav(pcmBuffer, sampleRate = 24000, channels = 1, bitDepth = 16) { + const blockAlign = (channels * bitDepth) / 8; + const byteRate = sampleRate * blockAlign; + const header = Buffer.alloc(44); + header.write("RIFF", 0); + header.writeUInt32LE(36 + pcmBuffer.length, 4); + header.write("WAVE", 8); + header.write("fmt ", 12); + header.writeUInt32LE(16, 16); + header.writeUInt16LE(1, 20); + header.writeUInt16LE(channels, 22); + header.writeUInt32LE(sampleRate, 24); + header.writeUInt32LE(byteRate, 28); + header.writeUInt16LE(blockAlign, 32); + header.writeUInt16LE(bitDepth, 34); + header.write("data", 36); + header.writeUInt32LE(pcmBuffer.length, 40); + return Buffer.concat([header, pcmBuffer]); +} + +async function callAudioServer(payload) { + const url = new URL("/v1/chat/completions", audioBase); + const response = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + if (!response.ok) { + const text = await response.text().catch(() => ""); + throw new Error(text || `audio server error (${response.status})`); + } + return response; +} + +async function handleTts(text, voice) { + const prompt = TTS_VOICES[voice] || TTS_VOICES["US male"]; + const payload = { + model: "", + messages: [ + { role: "system", content: prompt }, + { role: "user", content: text }, + ], + stream: true, + max_tokens: 4096, + }; + const response = await callAudioServer(payload); + const chunks = []; + await parseSse(response, (data) => { + const delta = data?.choices?.[0]?.delta || {}; + const audio = delta.audio; + if (audio && audio.data) { + chunks.push(Buffer.from(audio.data, "base64")); + } + }); + const pcm = Buffer.concat(chunks); + return pcmToWav(pcm); +} + +async function handleStt(audioBase64, format) { + const payload = { + model: "", + messages: [ + { role: "system", content: "Perform ASR." }, + { + role: "user", + content: [ + { + type: "input_audio", + input_audio: { data: audioBase64, format: format || "wav" }, + }, + ], + }, + ], + stream: true, + max_tokens: 4096, + }; + const response = await callAudioServer(payload); + let text = ""; + await parseSse(response, (data) => { + const delta = data?.choices?.[0]?.delta || {}; + if (typeof delta.content === "string") { + text += delta.content; + } + }); + return text.trim(); +} + +function probe(url) { + return new Promise((resolve) => { + const target = new URL(url); + const client = target.protocol === "https:" ? https : http; + const req = client.request( + { + hostname: target.hostname, + port: target.port || (target.protocol === "https:" ? 443 : 80), + path: target.pathname + target.search, + method: "GET", + timeout: 2000, + }, + (res) => { + res.resume(); + resolve({ ok: res.statusCode && res.statusCode < 500, status: res.statusCode }); + }, + ); + req.on("error", () => resolve({ ok: false, status: null })); + req.on("timeout", () => { + req.destroy(); + resolve({ ok: false, status: null }); + }); + req.end(); + }); +} + +const server = http.createServer(async (req, res) => { + const url = new URL(req.url, `http://${req.headers.host || "localhost"}`); + + if (url.pathname === "/config.json") { + return sendJson(res, 200, configPayload); + } + + if (url.pathname === "/health") { + const [llm, image, audio] = await Promise.all([ + probe(new URL("/health", llmBase).toString()), + probe(new URL("/health", imageBase).toString()), + probe(new URL("/v1/models", audioBase).toString()), + ]); + return sendJson(res, 200, { llm, image, audio }); + } + + if (url.pathname === "/api/audio/tts") { + if (req.method !== "POST") { + return sendText(res, 405, "Method not allowed"); + } + try { + const body = await readJson(req); + const text = typeof body.text === "string" ? body.text.trim() : ""; + const voice = typeof body.voice === "string" ? body.voice.trim() : "US male"; + if (!text) { + return sendJson(res, 400, { error: "text required" }); + } + const wav = await handleTts(text, voice); + res.statusCode = 200; + res.setHeader("Content-Type", "audio/wav"); + res.setHeader("Content-Length", String(wav.length)); + res.end(wav); + } catch (err) { + return sendJson(res, 500, { error: String(err.message || err) }); + } + return; + } + + if (url.pathname === "/api/audio/stt") { + if (req.method !== "POST") { + return sendText(res, 405, "Method not allowed"); + } + try { + const body = await readJson(req); + const raw = + (typeof body.audioBase64 === "string" && body.audioBase64) || + (typeof body.audio === "string" && body.audio) || + ""; + const format = typeof body.format === "string" ? body.format : "wav"; + if (!raw) { + return sendJson(res, 400, { error: "audioBase64 required" }); + } + const match = /^data:[^;]+;base64,(.*)$/.exec(raw); + const audioBase64 = match ? match[1] : raw; + const text = await handleStt(audioBase64, format); + return sendJson(res, 200, { text }); + } catch (err) { + return sendJson(res, 500, { error: String(err.message || err) }); + } + } + + if (url.pathname.startsWith("/api/llm/")) { + return proxyRequest(req, res, llmBase, "/api/llm"); + } + if (url.pathname.startsWith("/api/audio/")) { + return proxyRequest(req, res, audioBase, "/api/audio"); + } + if (url.pathname.startsWith("/api/image/")) { + return proxyRequest(req, res, imageBase, "/api/image"); + } + if (url.pathname.startsWith("/audio/")) { + const filename = decodeURIComponent(url.pathname.slice("/audio/".length)); + const safeName = path.basename(filename); + const target = path.join(audioOutputDir, safeName); + return serveMediaFile(target, res); + } + + if (url.pathname === "/generate" || url.pathname.startsWith("/images/") || url.pathname === "/latest") { + return proxyRequest(req, res, imageBase, ""); + } + + if (req.method === "GET" || req.method === "HEAD") { + return serveStatic(req, res); + } + + res.statusCode = 404; + res.end("Not found"); +}); + +server.listen(port, host, () => { + console.log(`[WebProxy] Listening on http://${host}:${port}`); + console.log(`[WebProxy] Web root: ${webRoot}`); + console.log(`[WebProxy] LLM: ${llmBase}`); + console.log(`[WebProxy] Audio: ${audioBase}`); + console.log(`[WebProxy] Image: ${imageBase}`); +}); diff --git a/scripts/setup-clawdbot.sh b/scripts/setup-openclaw.sh old mode 100755 new mode 100644 similarity index 79% rename from scripts/setup-clawdbot.sh rename to scripts/setup-openclaw.sh index 61889c2..3d2f911 --- a/scripts/setup-clawdbot.sh +++ b/scripts/setup-openclaw.sh @@ -1,5 +1,5 @@ #!/bin/bash -# setup-clawdbot.sh - Install and configure Clawdbot on RunPod +# setup-openclaw.sh - Install and configure OpenClaw on Runpod # Prerequisites: vLLM server running on port 8000 set -e @@ -21,17 +21,17 @@ VLLM_HOST="${VLLM_HOST:-localhost}" VLLM_PORT="${VLLM_PORT:-8000}" VLLM_API_KEY="${VLLM_API_KEY:-changeme}" SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-qwen3-30b-a3b}" -CLAWDBOT_CONFIG_DIR="${CLAWDBOT_CONFIG_DIR:-$HOME/.clawdbot}" +OPENCLAW_STATE_DIR="${OPENCLAW_STATE_DIR:-$HOME/.openclaw}" RUNPOD_POD_ID="${RUNPOD_POD_ID:-}" # Print banner echo "" echo "===========================================" -echo " Clawdbot Setup Script" +echo " OpenClaw Setup Script" echo "===========================================" echo "" -# Check if running as root (common on RunPod) +# Check if running as root (common on Runpod) if [ "$EUID" -eq 0 ]; then log_info "Running as root" fi @@ -55,10 +55,11 @@ if ! command -v npm &> /dev/null; then fi log_info "npm version: $(npm --version)" -# Step 2: Install Clawdbot -log_info "Installing Clawdbot..." -npm install -g clawdbot@latest -log_success "Clawdbot installed: $(clawdbot --version 2>/dev/null || echo 'version check failed')" +# Step 2: Install OpenClaw +log_info "Installing OpenClaw..." +npm install -g openclaw@latest +BOT_CMD="openclaw" +log_success "OpenClaw installed: $("$BOT_CMD" --version 2>/dev/null || echo 'version check failed')" # Step 3: Wait for vLLM to be ready log_info "Waiting for vLLM server to be ready..." @@ -86,21 +87,21 @@ MODELS_RESPONSE=$(curl -s "http://${VLLM_HOST}:${VLLM_PORT}/v1/models" \ -H "Authorization: Bearer ${VLLM_API_KEY}") echo "Available models: $MODELS_RESPONSE" -# Step 4: Create Clawdbot configuration directory -log_info "Creating Clawdbot configuration..." -mkdir -p "$CLAWDBOT_CONFIG_DIR" +# Step 4: Create OpenClaw configuration directory +log_info "Creating OpenClaw configuration..." +mkdir -p "$OPENCLAW_STATE_DIR" # Determine the base URL for the vLLM endpoint if [ -n "$RUNPOD_POD_ID" ]; then - # Running on RunPod - use proxy URL + # Running on Runpod - use proxy URL VLLM_BASE_URL="https://${RUNPOD_POD_ID}-${VLLM_PORT}.proxy.runpod.net/v1" else # Local or direct connection VLLM_BASE_URL="http://${VLLM_HOST}:${VLLM_PORT}/v1" fi -# Step 5: Create Clawdbot configuration file -cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF +# Step 5: Create OpenClaw configuration file +cat > "$OPENCLAW_STATE_DIR/openclaw.json" << EOF { "agents": { "defaults": { @@ -132,15 +133,15 @@ cat > "$CLAWDBOT_CONFIG_DIR/clawdbot.json" << EOF } EOF -log_success "Clawdbot configuration created at $CLAWDBOT_CONFIG_DIR/clawdbot.json" +log_success "OpenClaw configuration created at $OPENCLAW_STATE_DIR/openclaw.json" -# Step 6: Test Clawdbot connection -log_info "Testing Clawdbot configuration..." +# Step 6: Test OpenClaw connection +log_info "Testing OpenClaw configuration..." echo "" echo "Configuration summary:" echo " vLLM URL: $VLLM_BASE_URL" echo " Model: $SERVED_MODEL_NAME" -echo " Config dir: $CLAWDBOT_CONFIG_DIR" +echo " Config dir: $OPENCLAW_STATE_DIR" echo "" # Test a simple completion @@ -166,11 +167,11 @@ echo "===========================================" echo " Setup Complete!" echo "===========================================" echo "" -echo "To start Clawdbot, run:" -echo " clawdbot" +echo "To start OpenClaw, run:" +echo " openclaw" echo "" echo "To start with daemon mode:" -echo " clawdbot onboard --install-daemon" +echo " openclaw onboard --install-daemon" echo "" -echo "Configuration file: $CLAWDBOT_CONFIG_DIR/clawdbot.json" +echo "Configuration file: $OPENCLAW_STATE_DIR/openclaw.json" echo "" diff --git a/scripts/start-vllm.sh b/scripts/start-vllm.sh index 4af4bdd..0d610c2 100755 --- a/scripts/start-vllm.sh +++ b/scripts/start-vllm.sh @@ -1,5 +1,5 @@ #!/bin/bash -# start-vllm.sh - vLLM startup script for Clawdbot on RunPod +# start-vllm.sh - vLLM startup script for OpenClaw on Runpod # Handles model download, GPU detection, and vLLM server startup set -e @@ -36,7 +36,7 @@ export HF_HOME # Print banner echo "" echo "===========================================" -echo " Clawdbot vLLM Server Startup" +echo " OpenClaw vLLM Server Startup" echo "===========================================" echo "" diff --git a/skills/image-gen/SKILL.md b/skills/image-gen/SKILL.md new file mode 100644 index 0000000..5998669 --- /dev/null +++ b/skills/image-gen/SKILL.md @@ -0,0 +1,17 @@ +--- +name: image-gen +description: Generate images with a local FLUX.2 Klein SDNQ model. +metadata: {"openclaw":{"emoji":"🖼️","requires":{"bins":["openclaw-image-gen"]}}} +--- +Use this skill to generate an image from a prompt. Invoke the `exec` tool to run +the CLI and save the output under `/workspace/openclaw/images/`. + +Required inputs: +- prompt (string) +- width/height *or* aspect ratio (e.g. `1:1`, `16:9`) + +Examples: +- `openclaw-image-gen --prompt "" --width 1024 --height 1024 --output /workspace/openclaw/images/output.png` +- `openclaw-image-gen --prompt "" --aspect 16:9 --output /workspace/openclaw/images/output.png` + +If the user does not specify size, default to 1024x1024. diff --git a/skills/stt/SKILL.md b/skills/stt/SKILL.md new file mode 100644 index 0000000..a09ddf1 --- /dev/null +++ b/skills/stt/SKILL.md @@ -0,0 +1,24 @@ +--- +name: stt +description: Transcribe speech audio to text with a local LFM2.5-Audio model. +metadata: {"openclaw":{"emoji":"🎤","requires":{"bins":["openclaw-stt"]}}} +--- +Use this skill to transcribe speech audio to text (ASR - Automatic Speech Recognition). +Invoke the `exec` tool to run the CLI. + +Required inputs: +- audio (string) - Path to the input WAV audio file + +Optional inputs: +- output (string) - Path to save the transcript (if not provided, prints to stdout) + +Examples: +- `openclaw-stt /workspace/openclaw/audio/recording.wav` +- `openclaw-stt --audio /tmp/voice.wav --output /workspace/openclaw/transcripts/voice.txt` +- `openclaw-stt /workspace/openclaw/audio/meeting.wav` + +Notes: +- Input should be WAV audio format +- The model runs on CPU, so longer audio may take more time +- Best results with clear speech and minimal background noise +- The transcript is printed to stdout unless --output is specified diff --git a/skills/tts/SKILL.md b/skills/tts/SKILL.md new file mode 100644 index 0000000..7c19d27 --- /dev/null +++ b/skills/tts/SKILL.md @@ -0,0 +1,25 @@ +--- +name: tts +description: Convert text to speech audio with a local LFM2.5-Audio model. +metadata: {"openclaw":{"emoji":"🔊","requires":{"bins":["openclaw-tts"]}}} +--- +Use this skill to convert text to speech audio. Invoke the `exec` tool to run +the CLI and save the output under `/workspace/openclaw/audio/`. + +Required inputs: +- text (string) - The text to convert to speech +- output (string) - The output WAV file path + +Optional inputs: +- voice (string) - Voice to use: "US male", "UK male", "US female", "UK female" (default: US male) + +Examples: +- `openclaw-tts "Hello, how can I help you today?" --output /workspace/openclaw/audio/greeting.wav` +- `openclaw-tts --text "Welcome to OpenClaw" --output /workspace/openclaw/audio/welcome.wav` +- `openclaw-tts "Good morning" --output /tmp/greeting.wav --voice "UK female"` + +Notes: +- Output format is WAV audio at 16kHz +- The model runs on CPU, so longer texts may take more time +- Keep text reasonably short for best results (a few sentences) +- Available voices: US male, UK male, US female, UK female diff --git a/templates/clawdbot-vllm.json b/templates/openclaw-vllm.json similarity index 96% rename from templates/clawdbot-vllm.json rename to templates/openclaw-vllm.json index 726f483..44ed80f 100644 --- a/templates/clawdbot-vllm.json +++ b/templates/openclaw-vllm.json @@ -1,7 +1,7 @@ { "tiers": { "tier1": { - "name": "clawdbot-vllm-qwen3", + "name": "openclaw-stack-vllm-qwen3", "description": "Tier 1: Qwen3-30B-A3B on 1x H100 (~$2/hr) - Best for validation", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -25,7 +25,7 @@ ] }, "tier2": { - "name": "clawdbot-vllm-mimo", + "name": "openclaw-stack-vllm-mimo", "description": "Tier 2: MiMo-V2-Flash on 2x H100 (~$4/hr) - Fastest inference", "imageName": "vllm/vllm-openai:v0.12.0", "containerDiskInGb": 50, @@ -48,7 +48,7 @@ ] }, "tier3": { - "name": "clawdbot-vllm-glm47", + "name": "openclaw-stack-vllm-glm47", "description": "Tier 3: GLM-4.7-FP8 on 4x H100 or 2x H200 (~$7-8/hr) - SOTA tool calling", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, @@ -73,7 +73,7 @@ ] }, "tier3_h200": { - "name": "clawdbot-vllm-glm47-h200", + "name": "openclaw-stack-vllm-glm47-h200", "description": "Tier 3 Alt: GLM-4.7-FP8 on 2x H200 (~$7/hr) - Best value for SOTA", "imageName": "vllm/vllm-openai:latest", "containerDiskInGb": 100, diff --git a/templates/runpod-template.json b/templates/runpod-template.json index f070e66..a600c9f 100644 --- a/templates/runpod-template.json +++ b/templates/runpod-template.json @@ -1,7 +1,7 @@ { - "name": "clawdbot-vllm", - "description": "Clawdbot AI assistant with vLLM for local LLM inference. Includes Telegram integration.", - "imageName": "your-dockerhub-username/clawdbot-vllm:latest", + "name": "openclaw-stack-vllm", + "description": "OpenClaw AI assistant with vLLM for local LLM inference. Includes Telegram integration.", + "imageName": "your-dockerhub-username/openclaw-stack-vllm:latest", "containerDiskInGb": 50, "volumeInGb": 150, "volumeMountPath": "/workspace", @@ -15,8 +15,9 @@ "TOOL_CALL_PARSER": "hermes", "TENSOR_PARALLEL_SIZE": "auto", "HF_HOME": "/workspace/huggingface", - "CLAWDBOT_STATE_DIR": "/workspace/.clawdbot", + "OPENCLAW_STATE_DIR": "/workspace/.openclaw", + "OPENCLAW_WORKSPACE": "/workspace/openclaw", "TELEGRAM_BOT_TOKEN": "" }, - "readme": "# Clawdbot + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `clawdbot pairing list telegram` then `clawdbot pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.clawdbot (survives restarts)\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: Clawdbot Gateway" + "readme": "# OpenClaw + vLLM\n\nAI coding assistant with local LLM inference.\n\n## Quick Start\n1. Set TELEGRAM_BOT_TOKEN env var (get from @BotFather)\n2. Start the pod - services auto-start\n3. Message your bot on Telegram\n4. First time: approve pairing via SSH: `openclaw pairing list telegram` then `openclaw pairing approve telegram CODE --notify`\n\n## Persistence\n- Config & pairings stored in /workspace/.openclaw\n- Model cache in /workspace/huggingface\n\n## Environment Variables\n- `MODEL_NAME`: HuggingFace model ID\n- `TELEGRAM_BOT_TOKEN`: Your Telegram bot token\n- `VLLM_API_KEY`: API key for vLLM\n- `MAX_MODEL_LEN`: Context length\n\n## Ports\n- 8000: vLLM API\n- 18789: OpenClaw Gateway" } diff --git a/web/app.js b/web/app.js new file mode 100644 index 0000000..97a0453 --- /dev/null +++ b/web/app.js @@ -0,0 +1,518 @@ +const { useEffect, useState } = React; + +function Pill({ label, status }) { + const className = status === "ok" ? "pill ok" : status === "warn" ? "pill warn" : "pill err"; + return React.createElement("span", { className }, label); +} + +function Section({ title, children }) { + return React.createElement( + "section", + { className: "card" }, + React.createElement("h2", null, title), + children, + ); +} + +function App() { + const [config, setConfig] = useState(null); + const [health, setHealth] = useState(null); + const [healthError, setHealthError] = useState(""); + + const [llmPrompt, setLlmPrompt] = useState(""); + const [llmApiKey, setLlmApiKey] = useState(""); + const [llmOutput, setLlmOutput] = useState(""); + const [llmError, setLlmError] = useState(""); + const [llmLoading, setLlmLoading] = useState(false); + + const [imgPrompt, setImgPrompt] = useState(""); + const [imgAspect, setImgAspect] = useState(""); + const [imgWidth, setImgWidth] = useState(""); + const [imgHeight, setImgHeight] = useState(""); + const [imgSeed, setImgSeed] = useState("0"); + const [imgSteps, setImgSteps] = useState("4"); + const [imgResult, setImgResult] = useState(null); + const [imgError, setImgError] = useState(""); + const [imgLoading, setImgLoading] = useState(false); + const [ttsText, setTtsText] = useState(""); + const [ttsVoice, setTtsVoice] = useState("US male"); + const [ttsAudioUrl, setTtsAudioUrl] = useState(""); + const [ttsError, setTtsError] = useState(""); + const [ttsLoading, setTtsLoading] = useState(false); + const [sttFile, setSttFile] = useState(null); + const [sttText, setSttText] = useState(""); + const [sttError, setSttError] = useState(""); + const [sttLoading, setSttLoading] = useState(false); + + useEffect(() => { + fetch("/config.json") + .then((res) => res.json()) + .then(setConfig) + .catch(() => setConfig(null)); + }, []); + + useEffect(() => { + return () => { + if (ttsAudioUrl) { + URL.revokeObjectURL(ttsAudioUrl); + } + }; + }, [ttsAudioUrl]); + + const refreshHealth = () => { + setHealthError(""); + fetch("/health") + .then((res) => res.json()) + .then(setHealth) + .catch(() => { + setHealth(null); + setHealthError("Health check failed."); + }); + }; + + useEffect(() => { + refreshHealth(); + }, []); + + const runLlm = async () => { + setLlmError(""); + setLlmOutput(""); + setLlmLoading(true); + try { + const model = config?.llmModel || "glm-4.7-flash"; + const payload = { + model, + stream: false, + messages: [{ role: "user", content: llmPrompt }], + }; + const headers = { "Content-Type": "application/json" }; + if (llmApiKey.trim()) { + headers.Authorization = `Bearer ${llmApiKey.trim()}`; + } + const res = await fetch("/api/llm/v1/chat/completions", { + method: "POST", + headers, + body: JSON.stringify(payload), + }); + const data = await res.json().catch(() => ({})); + if (!res.ok) { + throw new Error(data?.error?.message || `Request failed (${res.status})`); + } + const content = + data?.choices?.[0]?.message?.content || + data?.choices?.[0]?.text || + "No response content."; + setLlmOutput(content); + } catch (err) { + setLlmError(err.message || "LLM request failed."); + } finally { + setLlmLoading(false); + } + }; + + const runImage = async () => { + setImgError(""); + setImgResult(null); + setImgLoading(true); + try { + const payload = { + prompt: imgPrompt, + steps: imgSteps ? Number(imgSteps) : 4, + seed: imgSeed ? Number(imgSeed) : 0, + }; + if (imgAspect.trim()) { + payload.aspect = imgAspect.trim(); + } else { + if (imgWidth) payload.width = Number(imgWidth); + if (imgHeight) payload.height = Number(imgHeight); + } + const res = await fetch("/api/image/generate", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + const data = await res.json().catch(() => ({})); + if (!res.ok || data.error) { + throw new Error(data?.error || `Request failed (${res.status})`); + } + let url = + data.image_public_url || + data.image_proxy_url || + data.image_local_url || + data.image_url || + ""; + if (url.startsWith("/")) { + url = `${window.location.origin}${url}`; + } + setImgResult({ + url, + name: data.image_name, + width: data.width, + height: data.height, + }); + } catch (err) { + setImgError(err.message || "Image request failed."); + } finally { + setImgLoading(false); + } + }; + + const readFileAsDataUrl = (file) => + new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result); + reader.onerror = () => reject(new Error("Failed to read file.")); + reader.readAsDataURL(file); + }); + + const runTts = async () => { + setTtsError(""); + setTtsAudioUrl(""); + setTtsLoading(true); + try { + const res = await fetch("/api/audio/tts", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: ttsText, voice: ttsVoice }), + }); + if (!res.ok) { + const msg = await res.text(); + throw new Error(msg || `Request failed (${res.status})`); + } + const blob = await res.blob(); + const url = URL.createObjectURL(blob); + setTtsAudioUrl(url); + } catch (err) { + setTtsError(err.message || "TTS request failed."); + } finally { + setTtsLoading(false); + } + }; + + const runStt = async () => { + setSttError(""); + setSttText(""); + setSttLoading(true); + try { + if (!sttFile) { + throw new Error("Select a WAV file first."); + } + const dataUrl = await readFileAsDataUrl(sttFile); + const res = await fetch("/api/audio/stt", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ audio: dataUrl, format: "wav" }), + }); + const data = await res.json().catch(() => ({})); + if (!res.ok || data.error) { + throw new Error(data?.error || `Request failed (${res.status})`); + } + setSttText(data.text || ""); + } catch (err) { + setSttError(err.message || "STT request failed."); + } finally { + setSttLoading(false); + } + }; + + const healthPill = (key, label) => { + if (!health || !health[key]) { + return React.createElement(Pill, { label: `${label}: unknown`, status: "warn" }); + } + const status = health[key].ok ? "ok" : "err"; + const text = `${label}: ${health[key].status || "error"}`; + return React.createElement(Pill, { label: text, status }); + }; + + return React.createElement( + "div", + { className: "container" }, + React.createElement("h1", { className: "title" }, "OpenClaw Media Proxy"), + React.createElement( + "p", + { className: "subtitle" }, + "Single endpoint for LLM, audio, and image services.", + ), + + Section({ + title: "Status", + children: React.createElement( + "div", + { className: "grid two" }, + React.createElement( + "div", + null, + React.createElement("div", { className: "row" }, [ + healthPill("llm", "llm"), + healthPill("audio", "audio"), + healthPill("image", "image"), + ]), + healthError ? React.createElement("div", { className: "error" }, healthError) : null, + React.createElement( + "div", + { className: "row", style: { marginTop: "12px" } }, + React.createElement("button", { onClick: refreshHealth }, "Refresh"), + ), + ), + React.createElement( + "div", + null, + React.createElement( + "div", + { className: "muted" }, + "Public image base: ", + config?.imagePublicBaseUrl || "not set", + ), + React.createElement( + "div", + { className: "muted", style: { marginTop: "6px" } }, + "LLM model: ", + config?.llmModel || "glm-4.7-flash", + ), + ), + ), + }), + + Section({ + title: "LLM (glm-4.7-flash)", + children: React.createElement( + "div", + { className: "grid" }, + React.createElement( + "div", + null, + React.createElement("label", null, "Prompt"), + React.createElement("textarea", { + value: llmPrompt, + onChange: (e) => setLlmPrompt(e.target.value), + placeholder: "Ask the model something...", + }), + ), + React.createElement( + "div", + null, + React.createElement("label", null, "API key (Bearer)"), + React.createElement("input", { + value: llmApiKey, + onChange: (e) => setLlmApiKey(e.target.value), + placeholder: "LLAMA_API_KEY", + type: "password", + }), + React.createElement( + "div", + { className: "muted" }, + "Default is often 'changeme' unless you set it.", + ), + ), + React.createElement( + "div", + { className: "row" }, + React.createElement( + "button", + { onClick: runLlm, disabled: llmLoading || !llmPrompt.trim() }, + llmLoading ? "Running..." : "Send", + ), + llmError ? React.createElement("span", { className: "error" }, llmError) : null, + ), + llmOutput + ? React.createElement( + "div", + null, + React.createElement("label", null, "Response"), + React.createElement("pre", null, llmOutput), + ) + : null, + ), + }), + + Section({ + title: "Image (FLUX.2 Klein)", + children: React.createElement( + "div", + { className: "grid" }, + React.createElement( + "div", + null, + React.createElement("label", null, "Prompt"), + React.createElement("textarea", { + value: imgPrompt, + onChange: (e) => setImgPrompt(e.target.value), + placeholder: "A friendly robot on a desk, photorealistic...", + }), + ), + React.createElement( + "div", + { className: "grid two" }, + React.createElement( + "div", + null, + React.createElement("label", null, "Aspect ratio (optional)"), + React.createElement("input", { + value: imgAspect, + onChange: (e) => setImgAspect(e.target.value), + placeholder: "1:1 or 16:9", + }), + ), + React.createElement( + "div", + null, + React.createElement("label", null, "Steps / Seed"), + React.createElement( + "div", + { className: "row" }, + React.createElement("input", { + value: imgSteps, + onChange: (e) => setImgSteps(e.target.value), + placeholder: "4", + }), + React.createElement("input", { + value: imgSeed, + onChange: (e) => setImgSeed(e.target.value), + placeholder: "0", + }), + ), + ), + React.createElement( + "div", + null, + React.createElement("label", null, "Width (optional)"), + React.createElement("input", { + value: imgWidth, + onChange: (e) => setImgWidth(e.target.value), + placeholder: "1024", + }), + ), + React.createElement( + "div", + null, + React.createElement("label", null, "Height (optional)"), + React.createElement("input", { + value: imgHeight, + onChange: (e) => setImgHeight(e.target.value), + placeholder: "1024", + }), + ), + ), + React.createElement( + "div", + { className: "row" }, + React.createElement( + "button", + { onClick: runImage, disabled: imgLoading || !imgPrompt.trim() }, + imgLoading ? "Generating..." : "Generate", + ), + imgError ? React.createElement("span", { className: "error" }, imgError) : null, + ), + imgResult + ? React.createElement( + "div", + null, + React.createElement( + "div", + { className: "muted" }, + `Saved as ${imgResult.name || "image"}. ${imgResult.width}x${imgResult.height}`, + ), + React.createElement( + "div", + { className: "muted" }, + "URL: ", + React.createElement("a", { href: imgResult.url, target: "_blank" }, imgResult.url), + ), + imgResult.url + ? React.createElement("img", { className: "preview", src: imgResult.url }) + : null, + ) + : null, + ), + }), + + Section({ + title: "Audio (LFM2.5)", + children: React.createElement( + "div", + { className: "grid two" }, + React.createElement( + "div", + null, + React.createElement("label", null, "Text to speech"), + React.createElement("textarea", { + value: ttsText, + onChange: (e) => setTtsText(e.target.value), + placeholder: "Type text to synthesize...", + }), + React.createElement("label", null, "Voice"), + React.createElement( + "select", + { value: ttsVoice, onChange: (e) => setTtsVoice(e.target.value) }, + React.createElement("option", { value: "US male" }, "US male"), + React.createElement("option", { value: "UK male" }, "UK male"), + React.createElement("option", { value: "US female" }, "US female"), + React.createElement("option", { value: "UK female" }, "UK female"), + ), + React.createElement( + "div", + { className: "row", style: { marginTop: "12px" } }, + React.createElement( + "button", + { onClick: runTts, disabled: ttsLoading || !ttsText.trim() }, + ttsLoading ? "Generating..." : "Generate speech", + ), + ttsError ? React.createElement("span", { className: "error" }, ttsError) : null, + ), + ttsAudioUrl + ? React.createElement( + "div", + { style: { marginTop: "12px" } }, + React.createElement("audio", { + controls: true, + src: ttsAudioUrl, + style: { width: "100%" }, + }), + React.createElement( + "div", + { className: "muted", style: { marginTop: "6px" } }, + React.createElement("a", { href: ttsAudioUrl, download: "tts.wav" }, "Download"), + ), + ) + : null, + ), + React.createElement( + "div", + null, + React.createElement("label", null, "Speech to text (WAV)"), + React.createElement("input", { + type: "file", + accept: "audio/wav", + onChange: (e) => setSttFile(e.target.files?.[0] || null), + }), + React.createElement( + "div", + { className: "muted" }, + "Upload a WAV file to transcribe.", + ), + React.createElement( + "div", + { className: "row", style: { marginTop: "12px" } }, + React.createElement( + "button", + { onClick: runStt, disabled: sttLoading || !sttFile }, + sttLoading ? "Transcribing..." : "Transcribe", + ), + sttError ? React.createElement("span", { className: "error" }, sttError) : null, + ), + sttText + ? React.createElement( + "div", + { style: { marginTop: "12px" } }, + React.createElement("label", null, "Transcript"), + React.createElement("pre", null, sttText), + ) + : null, + ), + ), + }), + ); +} + +const root = ReactDOM.createRoot(document.getElementById("root")); +root.render(React.createElement(App)); diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..2f5e1d1 --- /dev/null +++ b/web/index.html @@ -0,0 +1,15 @@ + + + + + + OpenClaw Media Proxy + + + +
+ + + + + diff --git a/web/styles.css b/web/styles.css new file mode 100644 index 0000000..2270a84 --- /dev/null +++ b/web/styles.css @@ -0,0 +1,161 @@ +* { + box-sizing: border-box; +} + +:root { + color-scheme: light dark; +} + +body { + margin: 0; + font-family: "Inter", "Segoe UI", system-ui, -apple-system, sans-serif; + background: #0b0c0f; + color: #e8eaf0; +} + +a { + color: #7cc4ff; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +.container { + max-width: 980px; + margin: 0 auto; + padding: 32px 20px 48px; +} + +.title { + font-size: 28px; + margin: 0 0 6px; +} + +.subtitle { + color: #b1b6c1; + margin: 0 0 24px; +} + +.card { + background: #151821; + border: 1px solid #2b303b; + border-radius: 14px; + padding: 20px; + margin-bottom: 20px; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.25); +} + +.card h2 { + margin: 0 0 12px; + font-size: 20px; +} + +.grid { + display: grid; + gap: 16px; +} + +.grid.two { + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); +} + +label { + display: block; + font-size: 13px; + color: #b1b6c1; + margin-bottom: 6px; +} + +input, +textarea, +select { + width: 100%; + padding: 10px 12px; + border-radius: 10px; + border: 1px solid #343a46; + background: #0f1117; + color: #e8eaf0; + font-size: 14px; +} + +textarea { + min-height: 96px; + resize: vertical; +} + +button { + padding: 10px 16px; + border-radius: 10px; + border: none; + background: #3b82f6; + color: white; + font-weight: 600; + cursor: pointer; +} + +button:disabled { + opacity: 0.6; + cursor: not-allowed; +} + +.row { + display: flex; + align-items: center; + gap: 12px; + flex-wrap: wrap; +} + +.pill { + display: inline-flex; + align-items: center; + padding: 4px 10px; + border-radius: 999px; + font-size: 12px; + font-weight: 600; + background: #1f2937; + color: #d1d5db; +} + +.pill.ok { + background: rgba(34, 197, 94, 0.15); + color: #4ade80; +} + +.pill.warn { + background: rgba(250, 204, 21, 0.15); + color: #facc15; +} + +.pill.err { + background: rgba(248, 113, 113, 0.18); + color: #fca5a5; +} + +.muted { + color: #9aa1ad; + font-size: 13px; +} + +pre { + background: #0f1117; + border: 1px solid #2b303b; + border-radius: 10px; + padding: 12px; + overflow: auto; + white-space: pre-wrap; + word-break: break-word; +} + +img.preview { + max-width: 100%; + border-radius: 12px; + border: 1px solid #2b303b; + margin-top: 12px; +} + +.error { + color: #fca5a5; + font-size: 13px; +}