mindverse · kevin-mindverse · Apr 29, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 24, 2025
diff --git a/.env b/.env
@@ -65,3 +65,5 @@ DOCUMENT_CHUNK_OVERLAP=200
 
 # Embedding configurations
 EMBEDDING_MAX_TEXT_LENGTH=3072
+
+DOCKER_BACKEND_DOCKERFILE=Dockerfile.backend.cuda
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,8 @@ run/*
 .backend.pid
 .frontend.pid
 logs/train/
+llama_cpp_backup/llama.cpp.zip
+scripts/check_cuda_status.ps1
+scripts/test_cuda_detection.bat
+.env
+.gpu_selected
diff --git a/Dockerfile.backend b/Dockerfile.backend
@@ -19,6 +19,11 @@ RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /a
 COPY dependencies/graphrag-1.2.1.dev27.tar.gz /app/dependencies/
 COPY dependencies/llama.cpp.zip /app/dependencies/
 
+# Copy GPU checker script (only used for status reporting, not rebuilding)
+COPY docker/app/check_gpu_support.sh /app/
+COPY docker/app/check_torch_cuda.py /app/
+RUN chmod +x /app/check_gpu_support.sh
+
 # Build llama.cpp
 RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
     && echo "Using local llama.cpp archive..." \
@@ -33,7 +38,11 @@ RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
          echo "Successfully built llama-server"; \
        fi
 
-# 
+# Mark as CPU-only build for runtime reference
+RUN mkdir -p /app/data && \
+    echo "{ \"gpu_optimized\": false, \"optimized_on\": \"$(date -u +\"%Y-%m-%dT%H:%M:%SZ\")\" }" > /app/data/gpu_optimized.json && \
+    echo "Created CPU-only marker file"
+
 # Copy project configuration - Files that occasionally change
 COPY pyproject.toml README.md /app/
 

diff --git a/Dockerfile.backend.cuda b/Dockerfile.backend.cuda
@@ -0,0 +1,111 @@
+FROM nvidia/cuda:12.8.1-devel-ubuntu24.04
+
+# Set working directory
+WORKDIR /app
+
+# Add build argument to conditionally skip llama.cpp build
+ARG SKIP_LLAMA_BUILD=false
+
+# Install system dependencies with noninteractive mode to avoid prompts
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    build-essential cmake git curl wget lsof vim unzip sqlite3 \
+    python3-pip python3-venv python3-full python3-poetry pipx \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3 /usr/bin/python
+
+# Create a virtual environment to avoid PEP 668 restrictions
+RUN python -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+ENV VIRTUAL_ENV="/app/venv"
+
+# Use the virtual environment's pip to install packages
+RUN pip install --upgrade pip \
+    && pip install poetry \
+    && poetry config virtualenvs.create false
+
+# Create directories
+RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /app/run /app/resources
+
+# Copy dependency files - Files that rarely change
+COPY dependencies/graphrag-1.2.1.dev27.tar.gz /app/dependencies/
+COPY dependencies/llama.cpp.zip /app/dependencies/
+
+# Copy GPU checker script
+COPY docker/app/check_gpu_support.sh /app/
+COPY docker/app/check_torch_cuda.py /app/
+RUN chmod +x /app/check_gpu_support.sh
+
+# Unpack llama.cpp and build with CUDA support (conditionally, based on SKIP_LLAMA_BUILD)
+RUN if [ "$SKIP_LLAMA_BUILD" = "false" ]; then \
+        echo "=====================================================================" && \
+        echo "STARTING LLAMA.CPP BUILD WITH CUDA SUPPORT - THIS WILL TAKE SOME TIME" && \
+        echo "=====================================================================" && \
+        LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \
+        echo "Using local llama.cpp archive..." && \
+        unzip -q "$LLAMA_LOCAL_ZIP" && \
+        cd llama.cpp && \
+        mkdir -p build && \
+        cd build && \
+        echo "Starting CMake configuration with CUDA support..." && \
+        cmake -DGGML_CUDA=ON \
+              -DCMAKE_BUILD_TYPE=Release \
+              -DBUILD_SHARED_LIBS=OFF \
+              -DLLAMA_NATIVE=OFF \
+              -DCMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \
+              .. && \
+        echo "Starting build process (this will take several minutes)..." && \
+        cmake --build . --config Release -j --verbose && \
+        echo "Build completed successfully" && \
+        chmod +x /app/llama.cpp/build/bin/llama-server /app/llama.cpp/build/bin/llama-cli && \
+        echo "====================================================================" && \
+        echo "CUDA BUILD COMPLETED SUCCESSFULLY! GPU ACCELERATION IS NOW AVAILABLE" && \
+        echo "===================================================================="; \
+    else \
+        echo "=====================================================================" && \
+        echo "SKIPPING LLAMA.CPP BUILD (SKIP_LLAMA_BUILD=$SKIP_LLAMA_BUILD)" && \
+        echo "Using existing llama.cpp build from Docker volume" && \
+        echo "=====================================================================" && \
+        LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \
+        echo "Just unpacking llama.cpp archive (no build)..." && \
+        unzip -q "$LLAMA_LOCAL_ZIP" && \
+        cd llama.cpp && \
+        mkdir -p build; \
+    fi
+
+# Mark as GPU-optimized build for runtime reference
+RUN mkdir -p /app/data && \
+    echo "{ \"gpu_optimized\": true, \"optimized_on\": \"$(date -u +\"%Y-%m-%dT%H:%M:%SZ\")\" }" > /app/data/gpu_optimized.json && \
+    echo "Created GPU-optimized marker file"
+
+# Copy project configuration - Files that occasionally change
+COPY pyproject.toml README.md /app/
+
+# Fix for potential package installation issues with Poetry
+RUN pip install --upgrade setuptools wheel
+RUN poetry install --no-interaction --no-root || poetry install --no-interaction --no-root --without dev
+RUN pip install --force-reinstall dependencies/graphrag-1.2.1.dev27.tar.gz
+
+# Copy source code - Files that frequently change
+COPY docker/ /app/docker/
+COPY lpm_kernel/ /app/lpm_kernel/
+
+# Check module import
+RUN python -c "import lpm_kernel; print('Module import check passed')"
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    BASE_DIR=/app/data \
+    LOCAL_LOG_DIR=/app/logs \
+    RUN_DIR=/app/run \
+    RESOURCES_DIR=/app/resources \
+    APP_ROOT=/app \
+    FLASK_APP=lpm_kernel.app \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+# Expose ports
+EXPOSE 8002 8080
+
+# Set the startup command
+CMD ["bash", "-c", "echo 'Checking SQLite database...' && if [ ! -s /app/data/sqlite/lpm.db ]; then echo 'SQLite database not found or empty, initializing...' && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db '.read /app/docker/sqlite/init.sql' && echo 'SQLite database initialized successfully' && echo 'Tables created:' && sqlite3 /app/data/sqlite/lpm.db '.tables'; else echo 'SQLite database already exists, skipping initialization'; fi && echo 'Checking ChromaDB...' && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo 'ChromaDB collections not found, initializing...' && python /app/docker/app/init_chroma.py && echo 'ChromaDB initialized successfully'; else echo 'ChromaDB already exists, skipping initialization'; fi && echo 'Starting application at ' $(date) >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]
diff --git a/Makefile b/Makefile
@@ -1,4 +1,13 @@
-.PHONY: install test format lint all setup start stop restart restart-backend restart-force help docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-frontend docker-restart-all
+.PHONY: install test format lint all setup start stop restart restart-backend restart-force help docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-backend-fast docker-restart-backend-smart docker-restart-frontend docker-restart-all docker-check-cuda docker-use-gpu docker-use-cpu
+
+# Check for GPU flag file and set Docker Compose file accordingly
+ifeq ($(wildcard .gpu_selected),)
+    # No GPU flag file found, use CPU configuration
+    DOCKER_COMPOSE_FILE := docker-compose.yml
+else
+    # GPU flag file found, use GPU configuration
+    DOCKER_COMPOSE_FILE := docker-compose-gpu.yml
+endif
 
 # Detect operating system and set environment
 ifeq ($(OS),Windows_NT)
@@ -39,6 +48,9 @@ else
     COLOR_RED := \033[1;31m
 endif
 
+# Default Docker Compose configuration (non-GPU)
+DOCKER_COMPOSE_FILE := docker-compose.yml
+
 # Show help message
 help:
 ifeq ($(WINDOWS),1)
@@ -69,8 +81,12 @@ ifeq ($(WINDOWS),1)
 	@echo   make docker-build-backend  - Build only backend Docker image
 	@echo   make docker-build-frontend - Build only frontend Docker image
 	@echo   make docker-restart-backend - Restart only backend container
+	@echo   make docker-restart-backend-fast - Restart backend+cuda without rebuilding llama.cpp
 	@echo   make docker-restart-frontend - Restart only frontend container
 	@echo   make docker-restart-all    - Restart all Docker containers
+	@echo   make docker-check-cuda     - Check CUDA support in containers
+	@echo   make docker-use-gpu        - Switch to GPU configuration
+	@echo   make docker-use-cpu        - Switch to CPU-only configuration
 	@echo.
 	@echo All Available Commands:
 	@echo   make help                  - Show this help message
@@ -106,9 +122,13 @@ else
 	@echo "  make docker-down           - Stop all Docker containers"
 	@echo "  make docker-build-backend  - Build only backend Docker image"
 	@echo "  make docker-build-frontend - Build only frontend Docker image"
-	@echo "  make docker-restart-backend - Restart only backend container"
+	@echo "  make docker-restart-backend - Restart only backend container (with rebuild)"
+	@echo "  make docker-restart-backend-fast - Restart backend+cuda without rebuilding llama.cpp"
 	@echo "  make docker-restart-frontend - Restart only frontend container"
 	@echo "  make docker-restart-all    - Restart all Docker containers"
+	@echo "  make docker-check-cuda     - Check CUDA support in containers"
+	@echo "  make docker-use-gpu        - Switch to GPU configuration"
+	@echo "  make docker-use-cpu        - Switch to CPU-only configuration"
 	@echo ""
 	@echo "$(COLOR_BOLD)All Available Commands:$(COLOR_RESET)"
 	@echo "  make help                  - Show this help message"
@@ -124,6 +144,27 @@ else
 	fi
 endif
 
+# Configuration switchers for Docker
+docker-use-gpu:
+	@echo "Switching to GPU configuration..."
+ifeq ($(WINDOWS),1)
+	@echo GPU mode enabled. Docker commands will use docker-compose-gpu.yml
+	@echo gpu > .gpu_selected
+else
+	@echo "$(COLOR_GREEN)GPU mode enabled. Docker commands will use docker-compose-gpu.yml$(COLOR_RESET)"
+	@echo "gpu" > .gpu_selected
+endif
+
+docker-use-cpu:
+	@echo "Switching to CPU-only configuration..."
+ifeq ($(WINDOWS),1)
+	@echo CPU-only mode enabled. Docker commands will use docker-compose.yml
+	@rm -f .gpu_selected
+else
+	@echo "$(COLOR_GREEN)CPU-only mode enabled. Docker commands will use docker-compose.yml$(COLOR_RESET)"
+	@rm -f .gpu_selected
+endif
+
 setup:
 	./scripts/setup.sh
 
@@ -156,37 +197,99 @@ DOCKER_COMPOSE_CMD := $(shell if command -v docker-compose >/dev/null 2>&1; then
 endif
 
 docker-build:
-	$(DOCKER_COMPOSE_CMD) build
+ifeq ($(WINDOWS),1)
+	@echo "Prompting for CUDA preference..."
+	@scripts\prompt_cuda.bat
+else
+	@echo "Prompting for CUDA preference..."
+	@chmod +x ./scripts/prompt_cuda.sh
+	@./scripts/prompt_cuda.sh
+endif
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build
 
 docker-up:
-	$(DOCKER_COMPOSE_CMD) up -d
+	@echo "Building and starting Docker containers..."
+ifeq ($(WINDOWS),1)
+	@echo "Prompting for CUDA preference..."
+	@scripts\prompt_cuda.bat
+	@echo "Checking CUDA preference..."
+	@cmd /c "if exist .gpu_selected ( echo CUDA support detected, using GPU configuration... & docker compose -f docker-compose-gpu.yml build & docker compose -f docker-compose-gpu.yml up -d ) else ( echo No CUDA support selected, using CPU-only configuration... & docker compose -f docker-compose.yml build & docker compose -f docker-compose.yml up -d )"
+else
+	@echo "Prompting for CUDA preference..."
+	@chmod +x ./scripts/prompt_cuda.sh
+	@./scripts/prompt_cuda.sh
+	@echo "Checking CUDA preference..."
+	@if [ -f .gpu_selected ]; then \
+		echo "CUDA support detected, using GPU configuration..."; \
+		$(DOCKER_COMPOSE_CMD) -f docker-compose-gpu.yml build; \
+		$(DOCKER_COMPOSE_CMD) -f docker-compose-gpu.yml up -d; \
+	else \
+		echo "No CUDA support selected, using CPU-only configuration..."; \
+		$(DOCKER_COMPOSE_CMD) -f docker-compose.yml build; \
+		$(DOCKER_COMPOSE_CMD) -f docker-compose.yml up -d; \
+	fi
+endif
+	@echo "Container startup complete"
+	@echo "Check CUDA support with: make docker-check-cuda"
 
 docker-down:
-	$(DOCKER_COMPOSE_CMD) down
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) down
 
 docker-build-backend:
-	$(DOCKER_COMPOSE_CMD) build backend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build backend
 
 docker-build-frontend:
-	$(DOCKER_COMPOSE_CMD) build frontend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build frontend
 
+# Standard backend restart with complete rebuild
 docker-restart-backend:
-	$(DOCKER_COMPOSE_CMD) stop backend
-	$(DOCKER_COMPOSE_CMD) rm -f backend
-	$(DOCKER_COMPOSE_CMD) build backend || { echo "$(COLOR_RED)❌ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
-	$(DOCKER_COMPOSE_CMD) up -d backend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop backend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f backend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build backend || { echo "$(COLOR_RED)❌ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d backend
+
+
+# Fast backend restart: preserves llama.cpp build
+docker-restart-backend-fast:
+	@echo "Smart restarting backend container (preserving llama.cpp build)..."
+	@echo "Stopping backend container..."
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop backend
+	@echo "Removing backend container..."
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f backend
+	@echo "Building backend image with build-arg to skip llama.cpp build..."
+ifeq ($(wildcard .gpu_selected),)
+	@echo "Using CPU configuration (docker-compose.yml)..."
+else
+	@echo "Using GPU configuration (docker-compose-gpu.yml)..."
+endif
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build --build-arg SKIP_LLAMA_BUILD=true backend || { echo "$(COLOR_RED)❌ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
+	@echo "Starting backend container..."
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d backend
+	@echo "Backend container smart-restarted successfully"
+	@echo "Check CUDA support with: make docker-check-cuda"
 
 docker-restart-frontend:
-	$(DOCKER_COMPOSE_CMD) stop frontend
-	$(DOCKER_COMPOSE_CMD) rm -f frontend
-	$(DOCKER_COMPOSE_CMD) build frontend || { echo "$(COLOR_RED)❌ Frontend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
-	$(DOCKER_COMPOSE_CMD) up -d frontend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop frontend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f frontend
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build frontend || { echo "$(COLOR_RED)❌ Frontend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d frontend
 
 docker-restart-all:
-	$(DOCKER_COMPOSE_CMD) stop
-	$(DOCKER_COMPOSE_CMD) rm -f
-	$(DOCKER_COMPOSE_CMD) build || { echo "$(COLOR_RED)❌ Build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
-	$(DOCKER_COMPOSE_CMD) up -d
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build || { echo "$(COLOR_RED)❌ Build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
+	$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d
+
+# New command to check CUDA support in containers
+docker-check-cuda:
+	@echo "Checking CUDA support in Docker containers..."
+ifeq ($(WINDOWS),1)
+	@echo Running CUDA support check in backend container
+	@docker exec second-me-backend /app/check_gpu_support.sh || echo No GPU support detected in backend container
+else
+	@echo "$(COLOR_CYAN)Running CUDA support check in backend container:$(COLOR_RESET)"
+	@docker exec second-me-backend /app/check_gpu_support.sh || echo "$(COLOR_RED)No GPU support detected in backend container$(COLOR_RESET)"
+endif
 
 install:
 	poetry install
Original file line number	Diff line number	Diff line change
Expand Up		@@ -65,3 +65,5 @@ DOCUMENT_CHUNK_OVERLAP=200

		# Embedding configurations
		EMBEDDING_MAX_TEXT_LENGTH=3072

		DOCKER_BACKEND_DOCKERFILE=Dockerfile.backend.cuda