cuttle-cards · lihaobhsfer · Jan 25, 2026 · Jan 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -186,6 +186,7 @@ eng_plans/
 
 # RL training artifacts
 rl/models/*.zip
+!rl/models/cuttle_rl_final.zip
 rl/logs/
 
 .DS_Store

diff --git a/Makefile b/Makefile
@@ -14,6 +14,14 @@ run:
 
 run-with-rl:
 	source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python main_with_rl_ai.py
+
+# Dockerized dev environment (backend + Vite)
+dev:
+	docker compose -f docker-compose.dev.yaml up --build -d
+
+dev-down:
+	docker compose -f docker-compose.dev.yaml down
+
 # Generate documentation using pdoc
 docs:
 	source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python docs.py
@@ -57,4 +65,4 @@ test-rl:
 	@echo "Quick RL training test with action masking (10K timesteps, ~2-3 minutes)..."
 	source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python -c \
 		"from rl import config; config.TRAINING_CONFIG['total_timesteps'] = 10000; \
-		exec(open('rl/train.py').read())"
+		exec(open('rl/train.py').read())"
diff --git a/README.md b/README.md
@@ -2,22 +2,62 @@
 
 
 # Set Up
+## Local dev (no Docker)
 ## Create a virtual environment
 
 ```bash
 python3 -m venv cuttle-bot-3.12
 source ./cuttle-bot-3.12/bin/activate
 ```
 
+Or use the Makefile helper (requires `python3.12` on PATH):
+
+```bash
+make setup
+```
 
 ## Install requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
+## Run the dev servers
+
+Backend API (FastAPI + reload):
+
+```bash
+uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
+```
+
+Frontend (Vite):
+
+```bash
+cd web && npm run dev
+```
+
+Open http://localhost:5173
+
+## Docker dev (hot reload)
+
+```bash
+make dev
+```
+
+Open http://localhost:5173 (API at http://localhost:8000).
+
 ## Set up AI player
 
+The game currently supports two types of AI players: RL based AI, and LLM based AI.
+
+### RL based AI
+
+The repo comes with a model zip file which is loaded into the game server. 
+
+The model can be trained locally (see later sections). 
+
+### LLM Based AI
+
 The AI player uses ollama to generate actions. You'll need to install ollama and set up a model.
 
 Follow the installation guide here: https://github.com/ollama/ollama
@@ -101,3 +141,4 @@ Adjust training parameters in `rl/config.py`:
 - Models saved to: `rl/models/`
 - Training logs: `rl/logs/` (view with TensorBoard)
 - Checkpoints every 10K timesteps
+- Checkpoints are gitignored, but the final model named `cuttle_rl_final.zip` is tracked with version control (git)
diff --git a/codex-workflows/codex_mcp.py b/codex-workflows/codex_mcp.py