diff --git a/assignment3/tic_tac_toe.py b/assignment3/tic_tac_toe.py index d07fb5f..a66b0a8 100644 --- a/assignment3/tic_tac_toe.py +++ b/assignment3/tic_tac_toe.py @@ -1,3 +1,4 @@ +import typing from copy import deepcopy State = tuple[int, list[list[int | None]]] # Tuple of player (whose turn it is), @@ -76,3 +77,54 @@ class Game: print("The game is a draw") else: print(f"It is P{self.to_move(state)+1}'s turn to move") + + +def alpha_beta_search(game: Game, state: State) -> Action | None: + _, result = max_value(game, state, float("-inf"), float("+inf")) + return result + + +def max_value( + game: Game, state: State, alpha: float, beta: float +) -> tuple[float, Action | None]: + if game.is_terminal(state): + return game.utility(state, player), None + v, move = float("-inf"), float("-inf") + for a in game.actions(state): + v2, a2 = min_value(game, game.result(state, a), alpha, beta) + if v2 > v: + v, move = v2, a + alpha = max(alpha, v) + if v >= beta: + return v, typing.cast(Action, move) + return v, typing.cast(Action, move) + + +def min_value( + game: Game, state: State, alpha: float, beta: float +) -> tuple[float, Action | None]: + if game.is_terminal(state): + return game.utility(state, player), None + v, move = float("+inf"), float("+inf") + for a in game.actions(state): + v2, a2 = max_value(game, game.result(state, a), alpha, beta) + if v2 < v: + v, move = v2, a + alpha = min(beta, v) + if v <= alpha: + return v, typing.cast(Action, move) + return v, typing.cast(Action, move) + + +game = Game() + +state = game.initial_state() +game.print(state) +while not game.is_terminal(state): + player = game.to_move(state) + action = alpha_beta_search(game, state) # The player whose turn it is + # is the MAX player + print(f"P{player+1}'s action: {action}") + assert action is not None + state = game.result(state, action) + game.print(state)