From 1da4eefc6f217e25fc1fb4d90fd271f9a3a5fc1f Mon Sep 17 00:00:00 2001 From: Jon <vitale.jonathan@ymail.com> Date: Sun, 4 Jun 2023 14:02:48 +1000 Subject: [PATCH] Add solution for workshop week 4 --- week4/solution/agent_programs.py | 141 +++++++++++++++++++++++++++++++ week4/solution/tictactoe.py | 8 ++ 2 files changed, 149 insertions(+) create mode 100644 week4/solution/agent_programs.py create mode 100644 week4/solution/tictactoe.py diff --git a/week4/solution/agent_programs.py b/week4/solution/agent_programs.py new file mode 100644 index 0000000..f2f3926 --- /dev/null +++ b/week4/solution/agent_programs.py @@ -0,0 +1,141 @@ +import random +import time +from une_ai.tictactoe import TicTacToeGameEnvironment as gm + +def player_marker(player_name): + if player_name == 'MAX': + return 'X' + else: + return 'Y' + +def minimax(game_state, depth, player, is_maximising): + move_best = None + + if is_maximising: + value = float('-Inf') + else: + value = float('+Inf') + if depth == 0 or gm.is_terminal(game_state): + value = gm.payoff(game_state, player) + return value, move_best + + legal_actions = gm.get_legal_actions(game_state) + for action in legal_actions: + new_state = gm.transition_result(game_state, action) + value_new, move_new = minimax(new_state, depth - 1, player, not is_maximising) + if (is_maximising and value_new > value) or (not is_maximising and value_new < value): + value = value_new + move_best = action + + return value, move_best + +def minimax_alpha_beta(game_state, depth, player, is_maximising, alpha, beta): + move_best = None + legal_actions = gm.get_legal_actions(game_state) + if is_maximising: + value = float('-Inf') + else: + value = float('+Inf') + if depth == 0 or gm.is_terminal(game_state): + value = gm.payoff(game_state, player) + return value, move_best + + for action in legal_actions: + new_state = gm.transition_result(game_state, action) + value_new, move_new = minimax_alpha_beta(new_state, depth - 1, player, not is_maximising, alpha, beta) + if is_maximising: + if value_new > value: + value = value_new + move_best = action + alpha = max(value, alpha) + if value >= beta: + break + else: + if value_new < value: + value = value_new + move_best = action + beta = min(value, beta) + if value <= alpha: + break + + + return value, move_best + +def random_game_simulation(game_state, player, next_move): + new_game_state = gm.transition_result(game_state, next_move) + if gm.is_terminal(new_game_state): + value = gm.payoff(new_game_state, player) + return value + else: + new_legal_actions = gm.get_legal_actions(new_game_state) + next_move_rnd = random.choice(new_legal_actions) + value = random_game_simulation(new_game_state, player, next_move_rnd) + + return value + +def montecarlo_simulation(game_state, player, N): + if gm.is_terminal(game_state): + return None + + legal_actions = gm.get_legal_actions(game_state) + k = len(legal_actions) + payoffs = [None]*k + for i in range(0, k): + total_payoff = 0 + for j in range(0, N): + cur_payoff = random_game_simulation(game_state, player, legal_actions[i]) + total_payoff += cur_payoff + + avg_payoff = total_payoff / N + payoffs[i] = avg_payoff + + avg_payoff_best = float('-Inf') + move_best = None + for i, avg in enumerate(payoffs): + if move_best is None or avg > avg_payoff_best: + avg_payoff_best = avg + move_best = legal_actions[i] + + return move_best + +def max_agent_program_random(percepts, actuators): + game_state = percepts['game-board-sensor'] + free_pos = game_state.find_value(None) + if len(free_pos) > 0: + move = random.choice(free_pos) + actions = ['mark-{0}-{1}'.format(move[0], move[1])] + return actions + + return [] + +def max_agent_program_minimax(percepts, actuators): + game_state = percepts['game-board-sensor'] + free_pos = game_state.find_value(None) + if len(free_pos) > 0: + _, best_move = minimax(game_state, 'MAX') + return [best_move] + + return [] + +def max_agent_program(percepts, actuators): + game_state = percepts['game-board-sensor'] + free_pos = game_state.find_value(None) + if len(free_pos) > 0: + _, best_move = minimax_alpha_beta(game_state, 100, 'X', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf')) + #best_move = montecarlo_simulation(game_state, 'X', 100) + return [best_move] + + return [] + +def min_agent_program(percepts, actuators): + game_state = percepts['game-board-sensor'] + free_pos = game_state.find_value(None) + if len(free_pos) > 0: + #move = random.choice(free_pos) + #actions = ['mark-{0}-{1}'.format(move[0], move[1])] + #return actions + #_, best_move = minimax_alpha_beta(game_state, 100, 'O', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf')) + best_move = montecarlo_simulation(game_state, 'O', 100) + return [best_move] + + return [] \ No newline at end of file diff --git a/week4/solution/tictactoe.py b/week4/solution/tictactoe.py new file mode 100644 index 0000000..12f6650 --- /dev/null +++ b/week4/solution/tictactoe.py @@ -0,0 +1,8 @@ +from une_ai.tictactoe import TicTacToeGame +from agent_programs import max_agent_program, min_agent_program + +if __name__ == '__main__': + game = TicTacToeGame( + max_agent_program, + min_agent_program + ) \ No newline at end of file -- GitLab