Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
327 changes: 233 additions & 94 deletions pacman/pacman.pyw
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ from pygame.locals import *
# WIN???
SCRIPT_PATH=sys.path[0]

# ---------------------------------------------------------------------------
# Q-Learning AI configuration
# Set AI_ENABLED = False to play manually with the keyboard.
# ---------------------------------------------------------------------------
AI_ENABLED = True
AI_QTABLE_PATH = os.path.join(SCRIPT_PATH, 'q_table.json')

if AI_ENABLED:
from q_learning_ai import QLearningAgent

# NO_GIF_TILES -- tile numbers which do not correspond to a GIF file
# currently only "23" for the high-score list
NO_GIF_TILES=[23]
Expand Down Expand Up @@ -666,13 +676,14 @@ class ghost ():
if not self.state == 3:
# chase pac-man
self.currentPath = path.FindPath( (self.nearestRow, self.nearestCol), (player.nearestRow, player.nearestCol) )
self.FollowNextPathWay()

if self.currentPath:
self.FollowNextPathWay()

else:
# glasses found way back to ghost box
self.state = 1
self.speed = self.speed / 4

# give ghost a path to a random spot (containing a pellet)
(randRow, randCol) = (0, 0)

Expand All @@ -681,7 +692,8 @@ class ghost ():
randCol = random.randint(1, thisLevel.lvlWidth - 2)

self.currentPath = path.FindPath( (self.nearestRow, self.nearestCol), (randRow, randCol) )
self.FollowNextPathWay()
if self.currentPath:
self.FollowNextPathWay()

class fruit ():
def __init__ (self):
Expand Down Expand Up @@ -1371,12 +1383,103 @@ def CheckInputs():
elif thisGame.mode == 3:
if pygame.key.get_pressed()[ pygame.K_RETURN ] or (js!=None and js.get_button(JS_STARTBUTTON)):
thisGame.StartNewGame()





# _____________________________________
# ___/ Q-Learning AI step (called per frame) \_______________________________________

# How many game frames between AI decisions (one tile = 16px / 2px-per-frame = 8 frames).
AI_DECISION_INTERVAL = 8

def AIStep():
"""Drive Pac-Man with the Q-Learning agent each frame."""
global ai_prev_mode, ai_frame_counter

# --- Mode 1: normal gameplay -- make periodic movement decisions ---
if thisGame.mode == 1:
ai_frame_counter += 1

if ai_frame_counter >= AI_DECISION_INTERVAL:
ai_frame_counter = 0

curr_state = ai_agent.get_state(player, ghosts, thisLevel, thisGame)

# Update Q-table from the previous decision step
if ai_agent.prev_state is not None:
score_delta = thisGame.score - ai_agent.prev_score
reward = score_delta - 1 # small step penalty to encourage speed
ai_agent.update(ai_agent.prev_state, ai_agent.prev_action, reward, curr_state)
ai_agent.decay_epsilon()

# Choose and immediately apply the next action
action = ai_agent.choose_action(curr_state, player, thisLevel)
speed = player.speed
dx, dy = ai_agent.ACTION_VELS[action]
dx *= speed
dy *= speed

if not thisLevel.CheckIfHitWall(player.x + dx, player.y + dy,
player.nearestRow, player.nearestCol):
player.velX = dx
player.velY = dy

ai_agent.prev_state = curr_state
ai_agent.prev_action = action
ai_agent.prev_score = thisGame.score

# --- Mode 2: Pac-Man just died -- apply death penalty ---
elif thisGame.mode == 2:
if ai_prev_mode == 1 and ai_agent.prev_state is not None:
terminal = (0,) * 13
ai_agent.update(ai_agent.prev_state, ai_agent.prev_action, -500, terminal)
Comment on lines +1434 to +1435
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Consider handling terminal transitions without bootstrapping off a dummy next state.

Using a synthetic terminal = (0,) * 13 still allows the update to bootstrap from whatever Q-values get learned for that dummy state, which can distort the intended -500 terminal penalty. Instead, handle terminal transitions without a next-state value (e.g., new_q = current + alpha * (reward - current) with no gamma * max Q(s'), or by allowing next_state=None and skipping the best_next term) so the terminal reward isn’t coupled to an arbitrary placeholder state.

Suggested implementation:

    # --- Mode 2: Pac-Man just died -- apply death penalty ---
    elif thisGame.mode == 2:
        if ai_prev_mode == 1 and ai_agent.prev_state is not None:
            # Terminal transition: no next state, so apply pure terminal penalty
            ai_agent.update(ai_agent.prev_state, ai_agent.prev_action, -500, None)
            ai_agent.decay_epsilon()
            ai_agent.prev_state = None

To fully implement the suggested behavior, you should also adjust the ai_agent.update method (likely in the AI agent class) so that:

  1. Its signature allows next_state to be None.
  2. When next_state is None, it performs a non-bootstrapping terminal update, e.g.:
    • new_q = current_q + alpha * (reward - current_q)
    • i.e., do not add gamma * max_a' Q(next_state, a') in this branch.
  3. When next_state is not None, keep the existing Q-learning update with the bootstrap term.
    This ensures the -500 death penalty is not coupled to any arbitrary placeholder state.

ai_agent.decay_epsilon()
ai_agent.prev_state = None

# --- Mode 3: game over -- save Q-table and auto-restart ---
elif thisGame.mode == 3:
ai_agent.episode += 1
# Save every 5 episodes so progress is not lost
if ai_agent.episode % 5 == 0:
ai_agent.save(AI_QTABLE_PATH)
ai_agent.prev_state = None
thisGame.StartNewGame()

# --- Mode 6: level complete -- apply level-win bonus ---
elif thisGame.mode == 6:
if ai_prev_mode == 1 and ai_agent.prev_state is not None:
terminal = (0,) * 13
ai_agent.update(ai_agent.prev_state, ai_agent.prev_action, 1000, terminal)
ai_agent.prev_state = None

ai_prev_mode = thisGame.mode

# Escape: save Q-table and quit
if pygame.key.get_pressed()[pygame.K_ESCAPE]:
ai_agent.save(AI_QTABLE_PATH)
sys.exit(0)


def DrawAIStats():
"""Overlay a small HUD showing Q-learning progress."""
info_lines = [
"AI Q-Learning",
f"Episode : {ai_agent.episode}",
f"Epsilon : {ai_agent.epsilon:.3f}",
f"States : {len(ai_agent.q_table)}",
f"Steps : {ai_agent.steps}",
f"Speed : {sim_speed}x (+/- to change)",
]
x, y = 4, 4
for line in info_lines:
surf = ai_font.render(line, True, (255, 255, 0))
screen.blit(surf, (x, y))
y += 10


# _____________________________________________
# ___/ function: Get ID-Tilename Cross References \______________________________________
# ___/ function: Get ID-Tilename Cross References \______________________________________

def GetCrossRef ():

f = open(os.path.join(SCRIPT_PATH,"res","crossref.txt"), 'r')
Expand Down Expand Up @@ -1473,91 +1576,124 @@ if pygame.joystick.get_count()>0:
js.init()
else: js=None

while True:
# ---------------------------------------------------------------------------
# Q-Learning AI initialisation
# ---------------------------------------------------------------------------
sim_speed = 1 # simulation steps per rendered frame
SIM_SPEEDS = [1, 2, 4, 8, 16] # available speed levels (toggle with +/-)

if AI_ENABLED:
ai_agent = QLearningAgent(qtable_path=AI_QTABLE_PATH)
ai_prev_mode = thisGame.mode # track mode transitions for reward signals
ai_frame_counter = 0 # counts frames between AI decisions
ai_font = pygame.font.Font(os.path.join(SCRIPT_PATH, "res", "VeraMoBd.ttf"), 9)

while True:

events = pygame.event.get()
CheckIfCloseButton(events)

# Speed control: +/= to go faster, - to go slower (AI mode only)
if AI_ENABLED:
for event in events:
if event.type == KEYDOWN:
if event.key in (K_EQUALS, K_PLUS):
idx = SIM_SPEEDS.index(sim_speed)
sim_speed = SIM_SPEEDS[min(idx + 1, len(SIM_SPEEDS) - 1)]
elif event.key == K_MINUS:
idx = SIM_SPEEDS.index(sim_speed)
sim_speed = SIM_SPEEDS[max(idx - 1, 0)]

# Run the game update sim_speed times per rendered frame
for _step in range(sim_speed):

# Q-Learning AI handles all input and Q-table updates when enabled
if AI_ENABLED:
AIStep()

CheckIfCloseButton( pygame.event.get() )

if thisGame.mode == 1:
# normal gameplay mode
CheckInputs()

thisGame.modeTimer += 1
player.Move()
for i in range(0, 4, 1):
ghosts[i].Move()
thisFruit.Move()
elif thisGame.mode == 2:
# waiting after getting hit by a ghost
thisGame.modeTimer += 1

if thisGame.modeTimer == 90:
thisLevel.Restart()

thisGame.lives -= 1
if thisGame.lives == -1:
thisGame.updatehiscores(thisGame.score)
thisGame.SetMode( 3 )
thisGame.drawmidgamehiscores()
else:
thisGame.SetMode( 4 )
elif thisGame.mode == 3:
# game over
CheckInputs()
elif thisGame.mode == 4:
# waiting to start
thisGame.modeTimer += 1
if thisGame.modeTimer == 90:
thisGame.SetMode( 1 )
player.velX = player.speed
elif thisGame.mode == 5:
# brief pause after munching a vulnerable ghost
thisGame.modeTimer += 1
if thisGame.modeTimer == 30:
thisGame.SetMode( 1 )
elif thisGame.mode == 6:
# pause after eating all the pellets
thisGame.modeTimer += 1
if thisGame.modeTimer == 60:
thisGame.SetMode( 7 )
oldEdgeLightColor = thisLevel.edgeLightColor
oldEdgeShadowColor = thisLevel.edgeShadowColor
oldFillColor = thisLevel.fillColor
elif thisGame.mode == 7:
# flashing maze after finishing level
thisGame.modeTimer += 1
whiteSet = [10, 30, 50, 70]
normalSet = [20, 40, 60, 80]
if not whiteSet.count(thisGame.modeTimer) == 0:
# member of white set
thisLevel.edgeLightColor = (255, 255, 255, 255)
thisLevel.edgeShadowColor = (255, 255, 255, 255)
thisLevel.fillColor = (0, 0, 0, 255)
GetCrossRef()
elif not normalSet.count(thisGame.modeTimer) == 0:
# member of normal set
thisLevel.edgeLightColor = oldEdgeLightColor
thisLevel.edgeShadowColor = oldEdgeShadowColor
thisLevel.fillColor = oldFillColor
GetCrossRef()
elif thisGame.modeTimer == 150:
thisGame.SetMode ( 8 )
elif thisGame.mode == 8:
# blank screen before changing levels
thisGame.modeTimer += 1
if thisGame.modeTimer == 10:
thisGame.SetNextLevel()
if thisGame.mode == 1:
# normal gameplay mode
if not AI_ENABLED:
CheckInputs()

thisGame.modeTimer += 1
player.Move()
for i in range(0, 4, 1):
ghosts[i].Move()
thisFruit.Move()

elif thisGame.mode == 2:
# waiting after getting hit by a ghost
thisGame.modeTimer += 1

if thisGame.modeTimer == 90:
thisLevel.Restart()

thisGame.lives -= 1
if thisGame.lives == -1:
thisGame.updatehiscores(thisGame.score)
thisGame.SetMode( 3 )
thisGame.drawmidgamehiscores()
else:
thisGame.SetMode( 4 )

elif thisGame.mode == 3:
# game over
if not AI_ENABLED:
CheckInputs()

elif thisGame.mode == 4:
# waiting to start
thisGame.modeTimer += 1

if thisGame.modeTimer == 90:
thisGame.SetMode( 1 )
player.velX = player.speed

elif thisGame.mode == 5:
# brief pause after munching a vulnerable ghost
thisGame.modeTimer += 1

if thisGame.modeTimer == 30:
thisGame.SetMode( 1 )

elif thisGame.mode == 6:
# pause after eating all the pellets
thisGame.modeTimer += 1

if thisGame.modeTimer == 60:
thisGame.SetMode( 7 )
oldEdgeLightColor = thisLevel.edgeLightColor
oldEdgeShadowColor = thisLevel.edgeShadowColor
oldFillColor = thisLevel.fillColor

elif thisGame.mode == 7:
# flashing maze after finishing level
thisGame.modeTimer += 1

whiteSet = [10, 30, 50, 70]
normalSet = [20, 40, 60, 80]

if not whiteSet.count(thisGame.modeTimer) == 0:
# member of white set
thisLevel.edgeLightColor = (255, 255, 255, 255)
thisLevel.edgeShadowColor = (255, 255, 255, 255)
thisLevel.fillColor = (0, 0, 0, 255)
GetCrossRef()
elif not normalSet.count(thisGame.modeTimer) == 0:
# member of normal set
thisLevel.edgeLightColor = oldEdgeLightColor
thisLevel.edgeShadowColor = oldEdgeShadowColor
thisLevel.fillColor = oldFillColor
GetCrossRef()
elif thisGame.modeTimer == 150:
thisGame.SetMode ( 8 )

elif thisGame.mode == 8:
# blank screen before changing levels
thisGame.modeTimer += 1
if thisGame.modeTimer == 10:
thisGame.SetNextLevel()

thisGame.SmartMoveScreen()

Expand All @@ -1584,7 +1720,10 @@ while True:


thisGame.DrawScore()


if AI_ENABLED:
DrawAIStats()

pygame.display.flip()

clock.tick (60)
Loading