Add solution for workshop week 4

1da4eefc · Jon · 5c69baca · 1da4eefc · 1da4eefc
Commit 1da4eefc authored 2 years ago by Jon
--- a/week4/solution/agent_programs.py
+++ b/week4/solution/agent_programs.py
+import random
+import time
+from une_ai.tictactoe import TicTacToeGameEnvironment as gm
+
+def player_marker(player_name):
+    if player_name == 'MAX':
+        return 'X'
+    else:
+        return 'Y'
+
+def minimax(game_state, depth, player, is_maximising):
+    move_best = None
+    
+    if is_maximising:
+        value = float('-Inf')
+    else:
+        value = float('+Inf')
+    if depth == 0 or gm.is_terminal(game_state):
+        value = gm.payoff(game_state, player)
+        return value, move_best
+    
+    legal_actions = gm.get_legal_actions(game_state)
+    for action in legal_actions:
+        new_state = gm.transition_result(game_state, action)
+        value_new, move_new = minimax(new_state, depth - 1, player, not is_maximising)
+        if (is_maximising and value_new > value) or (not is_maximising and value_new < value):
+            value = value_new
+            move_best = action
+
+    return value, move_best
+
+def minimax_alpha_beta(game_state, depth, player, is_maximising, alpha, beta):
+    move_best = None
+    legal_actions = gm.get_legal_actions(game_state)
+    if is_maximising:
+        value = float('-Inf')
+    else:
+        value = float('+Inf')
+    if depth == 0 or gm.is_terminal(game_state):
+        value = gm.payoff(game_state, player)
+        return value, move_best
+    
+    for action in legal_actions:
+        new_state = gm.transition_result(game_state, action)
+        value_new, move_new = minimax_alpha_beta(new_state, depth - 1, player, not is_maximising, alpha, beta)
+        if is_maximising:
+            if value_new > value:
+                value = value_new
+                move_best = action
+            alpha = max(value, alpha)
+            if value >= beta:
+                break
+        else:
+            if value_new < value:
+                value = value_new
+                move_best = action
+            beta = min(value, beta)
+            if value <= alpha:
+                break
+        
+
+    return value, move_best
+
+def random_game_simulation(game_state, player, next_move):
+    new_game_state = gm.transition_result(game_state, next_move)
+    if gm.is_terminal(new_game_state):
+        value = gm.payoff(new_game_state, player)
+        return value
+    else:
+        new_legal_actions = gm.get_legal_actions(new_game_state)
+        next_move_rnd = random.choice(new_legal_actions)
+        value = random_game_simulation(new_game_state, player, next_move_rnd)
+    
+    return value
+
+def montecarlo_simulation(game_state, player, N):
+    if gm.is_terminal(game_state):
+        return None
+
+    legal_actions = gm.get_legal_actions(game_state)
+    k = len(legal_actions)
+    payoffs = [None]*k
+    for i in range(0, k):
+        total_payoff = 0
+        for j in range(0, N):
+            cur_payoff = random_game_simulation(game_state, player, legal_actions[i])
+            total_payoff += cur_payoff
+        
+        avg_payoff = total_payoff / N
+        payoffs[i] = avg_payoff
+    
+    avg_payoff_best = float('-Inf')
+    move_best = None
+    for i, avg in enumerate(payoffs):
+        if move_best is None or avg > avg_payoff_best:
+            avg_payoff_best = avg
+            move_best = legal_actions[i]
+    
+    return move_best
+
+def max_agent_program_random(percepts, actuators):
+    game_state = percepts['game-board-sensor']
+    free_pos = game_state.find_value(None)
+    if len(free_pos) > 0:
+        move = random.choice(free_pos)
+        actions = ['mark-{0}-{1}'.format(move[0], move[1])]
+        return actions
+    
+    return []
+
+def max_agent_program_minimax(percepts, actuators):
+    game_state = percepts['game-board-sensor']
+    free_pos = game_state.find_value(None)
+    if len(free_pos) > 0:
+        _, best_move = minimax(game_state, 'MAX')
+        return [best_move]
+    
+    return []
+
+def max_agent_program(percepts, actuators):
+    game_state = percepts['game-board-sensor']
+    free_pos = game_state.find_value(None)
+    if len(free_pos) > 0:
+        _, best_move = minimax_alpha_beta(game_state, 100, 'X', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf'))
+        #best_move = montecarlo_simulation(game_state, 'X', 100)
+        return [best_move]
+    
+    return []
+
+def min_agent_program(percepts, actuators):
+    game_state = percepts['game-board-sensor']
+    free_pos = game_state.find_value(None)
+    if len(free_pos) > 0:
+        #move = random.choice(free_pos)
+        #actions = ['mark-{0}-{1}'.format(move[0], move[1])]
+        #return actions
+        #_, best_move = minimax_alpha_beta(game_state, 100, 'O', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf'))
+        best_move = montecarlo_simulation(game_state, 'O', 100)
+        return [best_move]
+    
+    return []
\ No newline at end of file
--- a/week4/solution/tictactoe.py
+++ b/week4/solution/tictactoe.py
+from une_ai.tictactoe import TicTacToeGame
+from agent_programs import max_agent_program, min_agent_program
+
+if __name__ == '__main__':
+    game = TicTacToeGame(
+        max_agent_program,
+        min_agent_program
+    )
\ No newline at end of file