TDT4136/assignment3/tic_tac_toe.py

import typing
from copy import deepcopy

State = tuple[int, list[list[int | None]]]  # Tuple of player (whose turn it is),
# and board
Action = tuple[int, int]  # Where to place the player's piece


class Game:
    def initial_state(self) -> State:
        return (0, [[None, None, None], [None, None, None], [None, None, None]])

    def to_move(self, state: State) -> int:
        player_index, _ = state
        return player_index

    def actions(self, state: State) -> list[Action]:
        _, board = state
        actions = []
        for row in range(3):
            for col in range(3):
                if board[row][col] is None:
                    actions.append((row, col))
        return actions

    def result(self, state: State, action: Action) -> State:
        _, board = state
        row, col = action
        next_board = deepcopy(board)
        next_board[row][col] = self.to_move(state)
        return (self.to_move(state) + 1) % 2, next_board

    def is_winner(self, state: State, player: int) -> bool:
        _, board = state
        for row in range(3):
            if all(board[row][col] == player for col in range(3)):
                return True
        for col in range(3):
            if all(board[row][col] == player for row in range(3)):
                return True
        if all(board[i][i] == player for i in range(3)):
            return True
        return all(board[i][2 - i] == player for i in range(3))

    def is_terminal(self, state: State) -> bool:
        _, board = state
        if self.is_winner(state, (self.to_move(state) + 1) % 2):
            return True
        return all(board[row][col] is not None for row in range(3) for col in range(3))

    def utility(self, state, player):
        assert self.is_terminal(state)
        if self.is_winner(state, player):
            return 1
        if self.is_winner(state, (player + 1) % 2):
            return -1
        return 0

    def print(self, state: State):
        _, board = state
        print()
        for row in range(3):
            cells = [
                " " if board[row][col] is None else "x" if board[row][col] == 0 else "o"
                for col in range(3)
            ]
            print(f" {cells[0]} | {cells[1]} | {cells[2]}")
            if row < 2:
                print("---+---+---")
        print()
        if self.is_terminal(state):
            if self.utility(state, 0) > 0:
                print(f"P1 won")
            elif self.utility(state, 1) > 0:
                print(f"P2 won")
            else:
                print("The game is a draw")
        else:
            print(f"It is P{self.to_move(state)+1}'s turn to move")


def alpha_beta_search(game: Game, state: State) -> Action | None:
    _, result = max_value(game, state, float("-inf"), float("+inf"))
    return result


def max_value(
    game: Game, state: State, alpha: float, beta: float
) -> tuple[float, Action | None]:
    if game.is_terminal(state):
        return game.utility(state, player), None
    v, move = float("-inf"), float("-inf")
    for a in game.actions(state):
        v2, a2 = min_value(game, game.result(state, a), alpha, beta)
        if v2 > v:
            v, move = v2, a
            alpha = max(alpha, v)
        if v >= beta:
            return v, typing.cast(Action, move)
    return v, typing.cast(Action, move)


def min_value(
    game: Game, state: State, alpha: float, beta: float
) -> tuple[float, Action | None]:
    if game.is_terminal(state):
        return game.utility(state, player), None
    v, move = float("+inf"), float("+inf")
    for a in game.actions(state):
        v2, a2 = max_value(game, game.result(state, a), alpha, beta)
        if v2 < v:
            v, move = v2, a
            alpha = min(beta, v)
        if v <= alpha:
            return v, typing.cast(Action, move)
    return v, typing.cast(Action, move)


game = Game()

state = game.initial_state()
game.print(state)
while not game.is_terminal(state):
    player = game.to_move(state)
    action = alpha_beta_search(game, state)  # The player whose turn it is
    # is the MAX player
    print(f"P{player+1}'s action: {action}")
    assert action is not None
    state = game.result(state, action)
    game.print(state)