diff --git a/.gitignore b/.gitignore index 66bfb0f..17a9f02 100644 --- a/.gitignore +++ b/.gitignore @@ -186,6 +186,7 @@ eng_plans/ # RL training artifacts rl/models/*.zip +!rl/models/cuttle_rl_final.zip rl/logs/ .DS_Store diff --git a/Makefile b/Makefile index 7b87083..0533d60 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,14 @@ run: run-with-rl: source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python main_with_rl_ai.py + +# Dockerized dev environment (backend + Vite) +dev: + docker compose -f docker-compose.dev.yaml up --build -d + +dev-down: + docker compose -f docker-compose.dev.yaml down + # Generate documentation using pdoc docs: source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python docs.py @@ -57,4 +65,4 @@ test-rl: @echo "Quick RL training test with action masking (10K timesteps, ~2-3 minutes)..." source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python -c \ "from rl import config; config.TRAINING_CONFIG['total_timesteps'] = 10000; \ - exec(open('rl/train.py').read())" \ No newline at end of file + exec(open('rl/train.py').read())" diff --git a/README.md b/README.md index c4a6d1b..a247234 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ # Set Up +## Local dev (no Docker) ## Create a virtual environment ```bash @@ -9,6 +10,11 @@ python3 -m venv cuttle-bot-3.12 source ./cuttle-bot-3.12/bin/activate ``` +Or use the Makefile helper (requires `python3.12` on PATH): + +```bash +make setup +``` ## Install requirements @@ -16,8 +22,42 @@ source ./cuttle-bot-3.12/bin/activate pip install -r requirements.txt ``` +## Run the dev servers + +Backend API (FastAPI + reload): + +```bash +uvicorn server.app:app --reload --host 0.0.0.0 --port 8000 +``` + +Frontend (Vite): + +```bash +cd web && npm run dev +``` + +Open http://localhost:5173 + +## Docker dev (hot reload) + +```bash +make dev +``` + +Open http://localhost:5173 (API at http://localhost:8000). + ## Set up AI player +The game currently supports two types of AI players: RL based AI, and LLM based AI. + +### RL based AI + +The repo comes with a model zip file which is loaded into the game server. + +The model can be trained locally (see later sections). + +### LLM Based AI + The AI player uses ollama to generate actions. You'll need to install ollama and set up a model. Follow the installation guide here: https://github.com/ollama/ollama @@ -101,3 +141,4 @@ Adjust training parameters in `rl/config.py`: - Models saved to: `rl/models/` - Training logs: `rl/logs/` (view with TensorBoard) - Checkpoints every 10K timesteps +- Checkpoints are gitignored, but the final model named `cuttle_rl_final.zip` is tracked with version control (git) diff --git a/codex-workflows/codex_mcp.py b/codex-workflows/codex_mcp.py deleted file mode 100644 index 3e65a53..0000000 --- a/codex-workflows/codex_mcp.py +++ /dev/null @@ -1,51 +0,0 @@ -import asyncio -import os -import shlex - -from dotenv import load_dotenv - -from agents import Agent, Runner, set_default_openai_api -from agents.mcp import MCPServerStdio - -load_dotenv(override=True) -set_default_openai_api(os.getenv("OPENAI_API_KEY")) - - -async def main() -> None: - command = os.getenv("CODEX_MCP_COMMAND", "npx") - args = shlex.split( - os.getenv("CODEX_MCP_ARGS", "-y @openai/codex mcp-server") - ) - async with MCPServerStdio( - name="Codex CLI", - params={ - "command": command, - "args": args, - }, - client_session_timeout_seconds=360000, - ) as codex_mcp_server: - developer_agent = Agent( - name="Game Developer", - instructions=( - "You are an expert in building simple games using basic html + css + javascript with no dependencies. " - "Save your work in a file called index.html in the current directory. " - "Always call codex with \"approval-policy\": \"never\" and \"sandbox\": \"workspace-write\"." - ), - mcp_servers=[codex_mcp_server], - ) - - designer_agent = Agent( - name="Game Designer", - instructions=( - "You are an indie game connoisseur. Come up with an idea for a single page html + css + javascript game that a developer could build in about 50 lines of code. " - "Format your request as a 3 sentence design brief for a game developer and call the Game Developer coder with your idea." - ), - model="gpt-5", - handoffs=[developer_agent], - ) - - await Runner.run(designer_agent, "Implement a fun new game!") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-workflows/index.html b/codex-workflows/index.html deleted file mode 100644 index 92d0cd2..0000000 --- a/codex-workflows/index.html +++ /dev/null @@ -1,391 +0,0 @@ - - -
- - -