Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@ DOCUMENT_CHUNK_OVERLAP=200

# Embedding configurations
EMBEDDING_MAX_TEXT_LENGTH=3072

DOCKER_BACKEND_DOCKERFILE=Dockerfile.backend.cuda
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,8 @@ run/*
.backend.pid
.frontend.pid
logs/train/
llama_cpp_backup/llama.cpp.zip
scripts/check_cuda_status.ps1
scripts/test_cuda_detection.bat
.env
.gpu_selected
11 changes: 10 additions & 1 deletion Dockerfile.backend
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /a
COPY dependencies/graphrag-1.2.1.dev27.tar.gz /app/dependencies/
COPY dependencies/llama.cpp.zip /app/dependencies/

# Copy GPU checker script (only used for status reporting, not rebuilding)
COPY docker/app/check_gpu_support.sh /app/
COPY docker/app/check_torch_cuda.py /app/
RUN chmod +x /app/check_gpu_support.sh

# Build llama.cpp
RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
&& echo "Using local llama.cpp archive..." \
Expand All @@ -33,7 +38,11 @@ RUN LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" \
echo "Successfully built llama-server"; \
fi

#
# Mark as CPU-only build for runtime reference
RUN mkdir -p /app/data && \
echo "{ \"gpu_optimized\": false, \"optimized_on\": \"$(date -u +\"%Y-%m-%dT%H:%M:%SZ\")\" }" > /app/data/gpu_optimized.json && \
echo "Created CPU-only marker file"

# Copy project configuration - Files that occasionally change
COPY pyproject.toml README.md /app/

Expand Down
111 changes: 111 additions & 0 deletions Dockerfile.backend.cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
FROM nvidia/cuda:12.8.1-devel-ubuntu24.04

# Set working directory
WORKDIR /app

# Add build argument to conditionally skip llama.cpp build
ARG SKIP_LLAMA_BUILD=false

# Install system dependencies with noninteractive mode to avoid prompts
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential cmake git curl wget lsof vim unzip sqlite3 \
python3-pip python3-venv python3-full python3-poetry pipx \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3 /usr/bin/python

# Create a virtual environment to avoid PEP 668 restrictions
RUN python -m venv /app/venv
ENV PATH="/app/venv/bin:$PATH"
ENV VIRTUAL_ENV="/app/venv"

# Use the virtual environment's pip to install packages
RUN pip install --upgrade pip \
&& pip install poetry \
&& poetry config virtualenvs.create false

# Create directories
RUN mkdir -p /app/dependencies /app/data/sqlite /app/data/chroma_db /app/logs /app/run /app/resources

# Copy dependency files - Files that rarely change
COPY dependencies/graphrag-1.2.1.dev27.tar.gz /app/dependencies/
COPY dependencies/llama.cpp.zip /app/dependencies/

# Copy GPU checker script
COPY docker/app/check_gpu_support.sh /app/
COPY docker/app/check_torch_cuda.py /app/
RUN chmod +x /app/check_gpu_support.sh

# Unpack llama.cpp and build with CUDA support (conditionally, based on SKIP_LLAMA_BUILD)
RUN if [ "$SKIP_LLAMA_BUILD" = "false" ]; then \
echo "=====================================================================" && \
echo "STARTING LLAMA.CPP BUILD WITH CUDA SUPPORT - THIS WILL TAKE SOME TIME" && \
echo "=====================================================================" && \
LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \
echo "Using local llama.cpp archive..." && \
unzip -q "$LLAMA_LOCAL_ZIP" && \
cd llama.cpp && \
mkdir -p build && \
cd build && \
echo "Starting CMake configuration with CUDA support..." && \
cmake -DGGML_CUDA=ON \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DLLAMA_NATIVE=OFF \
-DCMAKE_CUDA_FLAGS="-Wno-deprecated-gpu-targets" \
.. && \
echo "Starting build process (this will take several minutes)..." && \
cmake --build . --config Release -j --verbose && \
echo "Build completed successfully" && \
chmod +x /app/llama.cpp/build/bin/llama-server /app/llama.cpp/build/bin/llama-cli && \
echo "====================================================================" && \
echo "CUDA BUILD COMPLETED SUCCESSFULLY! GPU ACCELERATION IS NOW AVAILABLE" && \
echo "===================================================================="; \
else \
echo "=====================================================================" && \
echo "SKIPPING LLAMA.CPP BUILD (SKIP_LLAMA_BUILD=$SKIP_LLAMA_BUILD)" && \
echo "Using existing llama.cpp build from Docker volume" && \
echo "=====================================================================" && \
LLAMA_LOCAL_ZIP="dependencies/llama.cpp.zip" && \
echo "Just unpacking llama.cpp archive (no build)..." && \
unzip -q "$LLAMA_LOCAL_ZIP" && \
cd llama.cpp && \
mkdir -p build; \
fi

# Mark as GPU-optimized build for runtime reference
RUN mkdir -p /app/data && \
echo "{ \"gpu_optimized\": true, \"optimized_on\": \"$(date -u +\"%Y-%m-%dT%H:%M:%SZ\")\" }" > /app/data/gpu_optimized.json && \
echo "Created GPU-optimized marker file"

# Copy project configuration - Files that occasionally change
COPY pyproject.toml README.md /app/

# Fix for potential package installation issues with Poetry
RUN pip install --upgrade setuptools wheel
RUN poetry install --no-interaction --no-root || poetry install --no-interaction --no-root --without dev
RUN pip install --force-reinstall dependencies/graphrag-1.2.1.dev27.tar.gz

# Copy source code - Files that frequently change
COPY docker/ /app/docker/
COPY lpm_kernel/ /app/lpm_kernel/

# Check module import
RUN python -c "import lpm_kernel; print('Module import check passed')"

# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONPATH=/app \
BASE_DIR=/app/data \
LOCAL_LOG_DIR=/app/logs \
RUN_DIR=/app/run \
RESOURCES_DIR=/app/resources \
APP_ROOT=/app \
FLASK_APP=lpm_kernel.app \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

# Expose ports
EXPOSE 8002 8080

# Set the startup command
CMD ["bash", "-c", "echo 'Checking SQLite database...' && if [ ! -s /app/data/sqlite/lpm.db ]; then echo 'SQLite database not found or empty, initializing...' && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db '.read /app/docker/sqlite/init.sql' && echo 'SQLite database initialized successfully' && echo 'Tables created:' && sqlite3 /app/data/sqlite/lpm.db '.tables'; else echo 'SQLite database already exists, skipping initialization'; fi && echo 'Checking ChromaDB...' && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo 'ChromaDB collections not found, initializing...' && python /app/docker/app/init_chroma.py && echo 'ChromaDB initialized successfully'; else echo 'ChromaDB already exists, skipping initialization'; fi && echo 'Starting application at ' $(date) >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]
141 changes: 122 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
.PHONY: install test format lint all setup start stop restart restart-backend restart-force help docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-frontend docker-restart-all
.PHONY: install test format lint all setup start stop restart restart-backend restart-force help docker-build docker-up docker-down docker-build-backend docker-build-frontend docker-restart-backend docker-restart-backend-fast docker-restart-backend-smart docker-restart-frontend docker-restart-all docker-check-cuda docker-use-gpu docker-use-cpu

# Check for GPU flag file and set Docker Compose file accordingly
ifeq ($(wildcard .gpu_selected),)
# No GPU flag file found, use CPU configuration
DOCKER_COMPOSE_FILE := docker-compose.yml
else
# GPU flag file found, use GPU configuration
DOCKER_COMPOSE_FILE := docker-compose-gpu.yml
endif

# Detect operating system and set environment
ifeq ($(OS),Windows_NT)
Expand Down Expand Up @@ -39,6 +48,9 @@ else
COLOR_RED := \033[1;31m
endif

# Default Docker Compose configuration (non-GPU)
DOCKER_COMPOSE_FILE := docker-compose.yml

# Show help message
help:
ifeq ($(WINDOWS),1)
Expand Down Expand Up @@ -69,8 +81,12 @@ ifeq ($(WINDOWS),1)
@echo make docker-build-backend - Build only backend Docker image
@echo make docker-build-frontend - Build only frontend Docker image
@echo make docker-restart-backend - Restart only backend container
@echo make docker-restart-backend-fast - Restart backend+cuda without rebuilding llama.cpp
@echo make docker-restart-frontend - Restart only frontend container
@echo make docker-restart-all - Restart all Docker containers
@echo make docker-check-cuda - Check CUDA support in containers
@echo make docker-use-gpu - Switch to GPU configuration
@echo make docker-use-cpu - Switch to CPU-only configuration
@echo.
@echo All Available Commands:
@echo make help - Show this help message
Expand Down Expand Up @@ -106,9 +122,13 @@ else
@echo " make docker-down - Stop all Docker containers"
@echo " make docker-build-backend - Build only backend Docker image"
@echo " make docker-build-frontend - Build only frontend Docker image"
@echo " make docker-restart-backend - Restart only backend container"
@echo " make docker-restart-backend - Restart only backend container (with rebuild)"
@echo " make docker-restart-backend-fast - Restart backend+cuda without rebuilding llama.cpp"
@echo " make docker-restart-frontend - Restart only frontend container"
@echo " make docker-restart-all - Restart all Docker containers"
@echo " make docker-check-cuda - Check CUDA support in containers"
@echo " make docker-use-gpu - Switch to GPU configuration"
@echo " make docker-use-cpu - Switch to CPU-only configuration"
@echo ""
@echo "$(COLOR_BOLD)All Available Commands:$(COLOR_RESET)"
@echo " make help - Show this help message"
Expand All @@ -124,6 +144,27 @@ else
fi
endif

# Configuration switchers for Docker
docker-use-gpu:
@echo "Switching to GPU configuration..."
ifeq ($(WINDOWS),1)
@echo GPU mode enabled. Docker commands will use docker-compose-gpu.yml
@echo gpu > .gpu_selected
else
@echo "$(COLOR_GREEN)GPU mode enabled. Docker commands will use docker-compose-gpu.yml$(COLOR_RESET)"
@echo "gpu" > .gpu_selected
endif

docker-use-cpu:
@echo "Switching to CPU-only configuration..."
ifeq ($(WINDOWS),1)
@echo CPU-only mode enabled. Docker commands will use docker-compose.yml
@rm -f .gpu_selected
else
@echo "$(COLOR_GREEN)CPU-only mode enabled. Docker commands will use docker-compose.yml$(COLOR_RESET)"
@rm -f .gpu_selected
endif

setup:
./scripts/setup.sh

Expand Down Expand Up @@ -156,37 +197,99 @@ DOCKER_COMPOSE_CMD := $(shell if command -v docker-compose >/dev/null 2>&1; then
endif

docker-build:
$(DOCKER_COMPOSE_CMD) build
ifeq ($(WINDOWS),1)
@echo "Prompting for CUDA preference..."
@scripts\prompt_cuda.bat
else
@echo "Prompting for CUDA preference..."
@chmod +x ./scripts/prompt_cuda.sh
@./scripts/prompt_cuda.sh
endif
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build

docker-up:
$(DOCKER_COMPOSE_CMD) up -d
@echo "Building and starting Docker containers..."
ifeq ($(WINDOWS),1)
@echo "Prompting for CUDA preference..."
@scripts\prompt_cuda.bat
@echo "Checking CUDA preference..."
@cmd /c "if exist .gpu_selected ( echo CUDA support detected, using GPU configuration... & docker compose -f docker-compose-gpu.yml build & docker compose -f docker-compose-gpu.yml up -d ) else ( echo No CUDA support selected, using CPU-only configuration... & docker compose -f docker-compose.yml build & docker compose -f docker-compose.yml up -d )"
else
@echo "Prompting for CUDA preference..."
@chmod +x ./scripts/prompt_cuda.sh
@./scripts/prompt_cuda.sh
@echo "Checking CUDA preference..."
@if [ -f .gpu_selected ]; then \
echo "CUDA support detected, using GPU configuration..."; \
$(DOCKER_COMPOSE_CMD) -f docker-compose-gpu.yml build; \
$(DOCKER_COMPOSE_CMD) -f docker-compose-gpu.yml up -d; \
else \
echo "No CUDA support selected, using CPU-only configuration..."; \
$(DOCKER_COMPOSE_CMD) -f docker-compose.yml build; \
$(DOCKER_COMPOSE_CMD) -f docker-compose.yml up -d; \
fi
endif
@echo "Container startup complete"
@echo "Check CUDA support with: make docker-check-cuda"

docker-down:
$(DOCKER_COMPOSE_CMD) down
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) down

docker-build-backend:
$(DOCKER_COMPOSE_CMD) build backend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build backend

docker-build-frontend:
$(DOCKER_COMPOSE_CMD) build frontend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build frontend

# Standard backend restart with complete rebuild
docker-restart-backend:
$(DOCKER_COMPOSE_CMD) stop backend
$(DOCKER_COMPOSE_CMD) rm -f backend
$(DOCKER_COMPOSE_CMD) build backend || { echo "$(COLOR_RED)โŒ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) up -d backend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop backend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f backend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build backend || { echo "$(COLOR_RED)โŒ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d backend


# Fast backend restart: preserves llama.cpp build
docker-restart-backend-fast:
@echo "Smart restarting backend container (preserving llama.cpp build)..."
@echo "Stopping backend container..."
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop backend
@echo "Removing backend container..."
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f backend
@echo "Building backend image with build-arg to skip llama.cpp build..."
ifeq ($(wildcard .gpu_selected),)
@echo "Using CPU configuration (docker-compose.yml)..."
else
@echo "Using GPU configuration (docker-compose-gpu.yml)..."
endif
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build --build-arg SKIP_LLAMA_BUILD=true backend || { echo "$(COLOR_RED)โŒ Backend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
@echo "Starting backend container..."
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d backend
@echo "Backend container smart-restarted successfully"
@echo "Check CUDA support with: make docker-check-cuda"

docker-restart-frontend:
$(DOCKER_COMPOSE_CMD) stop frontend
$(DOCKER_COMPOSE_CMD) rm -f frontend
$(DOCKER_COMPOSE_CMD) build frontend || { echo "$(COLOR_RED)โŒ Frontend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) up -d frontend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop frontend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f frontend
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build frontend || { echo "$(COLOR_RED)โŒ Frontend build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d frontend

docker-restart-all:
$(DOCKER_COMPOSE_CMD) stop
$(DOCKER_COMPOSE_CMD) rm -f
$(DOCKER_COMPOSE_CMD) build || { echo "$(COLOR_RED)โŒ Build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) up -d
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) stop
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) rm -f
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) build || { echo "$(COLOR_RED)โŒ Build failed! Aborting operation...$(COLOR_RESET)"; exit 1; }
$(DOCKER_COMPOSE_CMD) -f $(DOCKER_COMPOSE_FILE) up -d

# New command to check CUDA support in containers
docker-check-cuda:
@echo "Checking CUDA support in Docker containers..."
ifeq ($(WINDOWS),1)
@echo Running CUDA support check in backend container
@docker exec second-me-backend /app/check_gpu_support.sh || echo No GPU support detected in backend container
else
@echo "$(COLOR_CYAN)Running CUDA support check in backend container:$(COLOR_RESET)"
@docker exec second-me-backend /app/check_gpu_support.sh || echo "$(COLOR_RED)No GPU support detected in backend container$(COLOR_RESET)"
endif

install:
poetry install
Expand Down
Loading