Skip to content
Snippets Groups Projects
Commit 1da4eefc authored by Jon's avatar Jon
Browse files

Add solution for workshop week 4

parent 5c69baca
No related branches found
No related tags found
No related merge requests found
import random
import time
from une_ai.tictactoe import TicTacToeGameEnvironment as gm
def player_marker(player_name):
if player_name == 'MAX':
return 'X'
else:
return 'Y'
def minimax(game_state, depth, player, is_maximising):
move_best = None
if is_maximising:
value = float('-Inf')
else:
value = float('+Inf')
if depth == 0 or gm.is_terminal(game_state):
value = gm.payoff(game_state, player)
return value, move_best
legal_actions = gm.get_legal_actions(game_state)
for action in legal_actions:
new_state = gm.transition_result(game_state, action)
value_new, move_new = minimax(new_state, depth - 1, player, not is_maximising)
if (is_maximising and value_new > value) or (not is_maximising and value_new < value):
value = value_new
move_best = action
return value, move_best
def minimax_alpha_beta(game_state, depth, player, is_maximising, alpha, beta):
move_best = None
legal_actions = gm.get_legal_actions(game_state)
if is_maximising:
value = float('-Inf')
else:
value = float('+Inf')
if depth == 0 or gm.is_terminal(game_state):
value = gm.payoff(game_state, player)
return value, move_best
for action in legal_actions:
new_state = gm.transition_result(game_state, action)
value_new, move_new = minimax_alpha_beta(new_state, depth - 1, player, not is_maximising, alpha, beta)
if is_maximising:
if value_new > value:
value = value_new
move_best = action
alpha = max(value, alpha)
if value >= beta:
break
else:
if value_new < value:
value = value_new
move_best = action
beta = min(value, beta)
if value <= alpha:
break
return value, move_best
def random_game_simulation(game_state, player, next_move):
new_game_state = gm.transition_result(game_state, next_move)
if gm.is_terminal(new_game_state):
value = gm.payoff(new_game_state, player)
return value
else:
new_legal_actions = gm.get_legal_actions(new_game_state)
next_move_rnd = random.choice(new_legal_actions)
value = random_game_simulation(new_game_state, player, next_move_rnd)
return value
def montecarlo_simulation(game_state, player, N):
if gm.is_terminal(game_state):
return None
legal_actions = gm.get_legal_actions(game_state)
k = len(legal_actions)
payoffs = [None]*k
for i in range(0, k):
total_payoff = 0
for j in range(0, N):
cur_payoff = random_game_simulation(game_state, player, legal_actions[i])
total_payoff += cur_payoff
avg_payoff = total_payoff / N
payoffs[i] = avg_payoff
avg_payoff_best = float('-Inf')
move_best = None
for i, avg in enumerate(payoffs):
if move_best is None or avg > avg_payoff_best:
avg_payoff_best = avg
move_best = legal_actions[i]
return move_best
def max_agent_program_random(percepts, actuators):
game_state = percepts['game-board-sensor']
free_pos = game_state.find_value(None)
if len(free_pos) > 0:
move = random.choice(free_pos)
actions = ['mark-{0}-{1}'.format(move[0], move[1])]
return actions
return []
def max_agent_program_minimax(percepts, actuators):
game_state = percepts['game-board-sensor']
free_pos = game_state.find_value(None)
if len(free_pos) > 0:
_, best_move = minimax(game_state, 'MAX')
return [best_move]
return []
def max_agent_program(percepts, actuators):
game_state = percepts['game-board-sensor']
free_pos = game_state.find_value(None)
if len(free_pos) > 0:
_, best_move = minimax_alpha_beta(game_state, 100, 'X', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf'))
#best_move = montecarlo_simulation(game_state, 'X', 100)
return [best_move]
return []
def min_agent_program(percepts, actuators):
game_state = percepts['game-board-sensor']
free_pos = game_state.find_value(None)
if len(free_pos) > 0:
#move = random.choice(free_pos)
#actions = ['mark-{0}-{1}'.format(move[0], move[1])]
#return actions
#_, best_move = minimax_alpha_beta(game_state, 100, 'O', True, float("-Inf"), float("+Inf")) #, float('-Inf'), float('+Inf'))
best_move = montecarlo_simulation(game_state, 'O', 100)
return [best_move]
return []
\ No newline at end of file
from une_ai.tictactoe import TicTacToeGame
from agent_programs import max_agent_program, min_agent_program
if __name__ == '__main__':
game = TicTacToeGame(
max_agent_program,
min_agent_program
)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment