131 lines
4.1 KiB
Python
131 lines
4.1 KiB
Python
import typing
|
|
from copy import deepcopy
|
|
|
|
State = tuple[int, list[list[int | None]]] # Tuple of player (whose turn it is),
|
|
# and board
|
|
Action = tuple[int, int] # Where to place the player's piece
|
|
|
|
|
|
class Game:
|
|
def initial_state(self) -> State:
|
|
return (0, [[None, None, None], [None, None, None], [None, None, None]])
|
|
|
|
def to_move(self, state: State) -> int:
|
|
player_index, _ = state
|
|
return player_index
|
|
|
|
def actions(self, state: State) -> list[Action]:
|
|
_, board = state
|
|
actions = []
|
|
for row in range(3):
|
|
for col in range(3):
|
|
if board[row][col] is None:
|
|
actions.append((row, col))
|
|
return actions
|
|
|
|
def result(self, state: State, action: Action) -> State:
|
|
_, board = state
|
|
row, col = action
|
|
next_board = deepcopy(board)
|
|
next_board[row][col] = self.to_move(state)
|
|
return (self.to_move(state) + 1) % 2, next_board
|
|
|
|
def is_winner(self, state: State, player: int) -> bool:
|
|
_, board = state
|
|
for row in range(3):
|
|
if all(board[row][col] == player for col in range(3)):
|
|
return True
|
|
for col in range(3):
|
|
if all(board[row][col] == player for row in range(3)):
|
|
return True
|
|
if all(board[i][i] == player for i in range(3)):
|
|
return True
|
|
return all(board[i][2 - i] == player for i in range(3))
|
|
|
|
def is_terminal(self, state: State) -> bool:
|
|
_, board = state
|
|
if self.is_winner(state, (self.to_move(state) + 1) % 2):
|
|
return True
|
|
return all(board[row][col] is not None for row in range(3) for col in range(3))
|
|
|
|
def utility(self, state, player):
|
|
assert self.is_terminal(state)
|
|
if self.is_winner(state, player):
|
|
return 1
|
|
if self.is_winner(state, (player + 1) % 2):
|
|
return -1
|
|
return 0
|
|
|
|
def print(self, state: State):
|
|
_, board = state
|
|
print()
|
|
for row in range(3):
|
|
cells = [
|
|
" " if board[row][col] is None else "x" if board[row][col] == 0 else "o"
|
|
for col in range(3)
|
|
]
|
|
print(f" {cells[0]} | {cells[1]} | {cells[2]}")
|
|
if row < 2:
|
|
print("---+---+---")
|
|
print()
|
|
if self.is_terminal(state):
|
|
if self.utility(state, 0) > 0:
|
|
print(f"P1 won")
|
|
elif self.utility(state, 1) > 0:
|
|
print(f"P2 won")
|
|
else:
|
|
print("The game is a draw")
|
|
else:
|
|
print(f"It is P{self.to_move(state)+1}'s turn to move")
|
|
|
|
|
|
def alpha_beta_search(game: Game, state: State) -> Action | None:
|
|
_, result = max_value(game, state, float("-inf"), float("+inf"))
|
|
return result
|
|
|
|
|
|
def max_value(
|
|
game: Game, state: State, alpha: float, beta: float
|
|
) -> tuple[float, Action | None]:
|
|
if game.is_terminal(state):
|
|
return game.utility(state, player), None
|
|
v, move = float("-inf"), float("-inf")
|
|
for a in game.actions(state):
|
|
v2, a2 = min_value(game, game.result(state, a), alpha, beta)
|
|
if v2 > v:
|
|
v, move = v2, a
|
|
alpha = max(alpha, v)
|
|
if v >= beta:
|
|
return v, typing.cast(Action, move)
|
|
return v, typing.cast(Action, move)
|
|
|
|
|
|
def min_value(
|
|
game: Game, state: State, alpha: float, beta: float
|
|
) -> tuple[float, Action | None]:
|
|
if game.is_terminal(state):
|
|
return game.utility(state, player), None
|
|
v, move = float("+inf"), float("+inf")
|
|
for a in game.actions(state):
|
|
v2, a2 = max_value(game, game.result(state, a), alpha, beta)
|
|
if v2 < v:
|
|
v, move = v2, a
|
|
alpha = min(beta, v)
|
|
if v <= alpha:
|
|
return v, typing.cast(Action, move)
|
|
return v, typing.cast(Action, move)
|
|
|
|
|
|
game = Game()
|
|
|
|
state = game.initial_state()
|
|
game.print(state)
|
|
while not game.is_terminal(state):
|
|
player = game.to_move(state)
|
|
action = alpha_beta_search(game, state) # The player whose turn it is
|
|
# is the MAX player
|
|
print(f"P{player+1}'s action: {action}")
|
|
assert action is not None
|
|
state = game.result(state, action)
|
|
game.print(state)
|