Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ test_outputs/

eng_plans/

# RL experiments
rl/experiments/
rl/gameplay_logs/

# RL training artifacts
rl/models/*.zip
!rl/models/cuttle_rl_final.zip
Expand Down
35 changes: 35 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,38 @@ test-rl:
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python -c \
"from rl import config; config.TRAINING_CONFIG['total_timesteps'] = 10000; \
exec(open('rl/train.py').read())"

debug-rl:
@echo "Running RL games with detailed logging..."
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/debug_gameplay.py

analyze-rl:
@echo "Analyzing RL gameplay logs..."
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/analyze_logs.py

view-rl:
@echo "Viewing RL gameplay logs..."
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/view_game.py

hypersearch-rl:
@echo "Running hyperparameter search (full)..."
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/hyperparameter_search.py

hypersearch-quick-rl:
@echo "Running quick hyperparameter search..."
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/hyperparameter_search.py --quick

compare-rl:
@echo "Compare experiment results..."
@echo "Usage: make compare-rl DIR=rl/experiments/20260125_120000"
@if [ -z "$(DIR)" ]; then \
echo "Error: DIR not specified"; \
exit 1; \
fi
source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/compare_experiments.py $(DIR)

monitor-rl:
@source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/monitor.py

watch-rl:
@source $(VENV_NAME)/bin/activate && PYTHONPATH=$(CURRENT_DIR) python rl/monitor.py --watch
4 changes: 4 additions & 0 deletions game/game_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,10 @@ def play_one_off(
if card not in self.discard_pile:
self._move_card_to_discard(card)

# One-off resolution is complete (counter accepted or effect applied).
self.resolving_one_off = False
self.one_off_card_to_counter = None

# Turn is finished after resolution
return True, None

Expand Down
29 changes: 12 additions & 17 deletions game/rl_ai_player.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,10 @@ def _encode_game_state(self, game_state: GameState) -> np.ndarray:
return self.env.env.unwrapped._encode_state()

def _get_action_mask(self, legal_actions: List[Action]) -> np.ndarray:
"""Get action mask for the current legal actions.

Args:
legal_actions (List[Action]): List of legal actions.

Returns:
np.ndarray: Boolean mask for valid actions.
"""
mask = np.zeros(50, dtype=bool) # Max 50 actions
mask[:len(legal_actions)] = True
return mask
"""Get action mask for the current legal actions."""
from rl.action_mapping import legal_action_mask_from_actions

return legal_action_mask_from_actions(legal_actions)

async def get_action(
self,
Expand Down Expand Up @@ -167,12 +160,14 @@ async def get_action(
deterministic=True
)

# Ensure action index is valid
if action_index >= len(legal_actions):
action_index = 0 # Fallback to first legal action

# Return the chosen action
return legal_actions[action_index]
action_index = int(action_index)

from rl.action_mapping import build_action_map

action_map = build_action_map(legal_actions)
if action_index not in action_map:
return legal_actions[0]
return action_map[action_index]

except Exception as e:
last_error = e
Expand Down
60 changes: 30 additions & 30 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
astroid==3.2.4
black==24.8.0
click==8.1.7
dill==0.3.9
flake8==7.1.1
fastapi==0.115.0
isort==5.13.2
astroid==4.0.3
black==26.1.0
click==8.3.1
dill==0.4.1
flake8==7.3.0
fastapi==0.128.0
isort==7.0.0
mccabe==0.7.0
mypy==1.13.0
mypy-extensions==1.0.0
packaging==24.2
pathspec==0.12.1
platformdirs==4.3.6
pdoc==14.7.0
pycodestyle==2.12.1
pyflakes==3.2.0
pylint==3.2.7
pytest==8.1.1
pytest-timeout==2.3.1
tomli==2.2.1
tomlkit==0.13.2
typing-extensions==4.12.2
uvicorn[standard]==0.30.6
ollama==0.4.6
pytest-asyncio==0.23.8
mypy==1.19.1
mypy-extensions==1.1.0
packaging==26.0
pathspec==1.0.3
platformdirs==4.5.1
pdoc==16.0.0
pycodestyle==2.14.0
pyflakes==3.4.0
pylint==4.0.4
pytest==9.0.2
pytest-timeout==2.4.0
tomli==2.4.0
tomlkit==0.14.0
typing-extensions==4.15.0
uvicorn[standard]==0.40.0
ollama==0.6.1
pytest-asyncio==1.3.0

# RL Training Dependencies
gymnasium==0.29.1
stable-baselines3==2.2.1
sb3-contrib==2.2.1
torch>=2.0.0
tensorboard==2.13.0
numpy>=1.24.0
gymnasium==1.2.3
stable-baselines3==2.7.1
sb3-contrib==2.7.1
torch==2.10.0
tensorboard==2.20.0
numpy==2.4.1
Loading
Loading