Skip to content
Snippets Groups Projects
Commit 9195ad7b authored by Jon's avatar Jon
Browse files

Add material and solution for week 9

parent 914b0ded
No related branches found
No related tags found
No related merge requests found
Showing
with 1503 additions and 0 deletions
File added
import math
from dt_node import DTNode
from guess_who_samples import SamplesSet
training_set = SamplesSet()
def entropy(samples, feature):
global training_set
total_entropy = 0
# generating a list with all the values for the considered
# feature in the samples
# you can use the class method get_values_by_feature from the class SamplesSet
# computing entropy by summing up
# the partial entropy for each feature value
# You can retrieve the possible values of a feature
# by using the method get_feature_values from the training_set instance
# for each possible value v of the considered feature
# count the number of positive samples,
# i.e. the samples with value for the considered feature
# equal to the current value v
#
# The probability is then the number of positive samples divided
# by the total number of samples
#
# If the probability is 0 or 1, the partial entropy is 0, so skip
# Else, the current partial entropy is -P(positive)*log2(P(positive))
#
# sum the partial entropy to the total entropy
return total_entropy
def remainder(samples, feature):
global training_set
total_remainder = 0
# Retrieve the possible values for the considered feature
# you can use the method .get_feature_values from the training_set instance
# For each possible value v of the considered feature
# Retrieve the samples having value = v for the considered feature
# you can use the class method get_samples_by_feature_value from the class SamplesSet
#
# If the number of retrieved samples is > 0
# compute the partial remainder as [#(retrieved samples) / #(samples)] * entropy(retrieved samples, decision feature)
# (the decision feature can be gathered from training_set.get_decision_feature())
# sum the partial remainder to the total remainder
return total_remainder
def information_gain(samples, feature):
global training_set
# Information gain can be computed as
# entropy(samples, decision feature) - remainder(samples, feature)
# The decision feature can be gathered from training_set.get_decision_feature()
ig = 0
return ig
def learn_tree(samples, features, parent_samples, parent_node=None, edge=None):
global training_set
if len(samples) == 0:
leaf = training_set.plurality_value(parent_samples)
return DTNode(leaf, parent_node, edge)
labels = SamplesSet.get_values_by_feature(samples, training_set.get_decision_feature())
if len(set(labels)) == 1:
leaf = labels[0]
return DTNode(leaf, parent_node, edge)
if len(features) == 0:
leaf = training_set.plurality_value(samples)
return DTNode(leaf, parent_node, edge)
best_feature = None
best_ig = None
for feature in features:
if feature != training_set.get_decision_feature():
cur_ig = information_gain(samples, feature)
if best_ig is None or cur_ig > best_ig:
best_ig = cur_ig
best_feature = feature
tree = DTNode(best_feature, parent_node, edge)
for value in training_set.get_feature_values(best_feature):
subsamples = SamplesSet.get_samples_by_feature_value(samples, best_feature, value)
subfeatures = features.copy()
subfeatures.remove(best_feature)
subtree = learn_tree(subsamples, subfeatures, samples, tree, best_feature)
tree.add_successor(subtree, value)
return tree
def classify(sample, decision_tree):
print("\n--------------------------")
print("Classifying the sample {0}".format(sample))
cur_node = decision_tree
while len(cur_node.get_successors()) > 0:
successors = cur_node.get_successors()
cur_feat = cur_node.get_state()
print("Evaluating feature '{0}'".format(cur_feat))
feat_val = sample.get_feature_value(cur_feat)
print("Feature value for the sample is '{0}'".format(feat_val))
cur_node = successors[feat_val]
print("Classification: {0}".format(cur_node.get_state()))
return cur_node.get_state()
if __name__ == '__main__':
print("Learning decision tree...")
decision_tree = learn_tree(
training_set.get_samples(),
training_set.get_classification_features(),
[], None, None)
print("Decision tree learned!")
print("Classification...\n")
n_misclassifications = 0
for sample in training_set.get_samples():
ground_truth = sample.get_label()
prediction = classify(sample, decision_tree)
if ground_truth != prediction:
n_misclassifications += 1
print("Misclassification for sample {0}".format(sample))
print("Ground truth was '{0}' and prediction was '{1}'".format(ground_truth, prediction))
print("\nNumber of misclassifications: {0}".format(n_misclassifications))
from une_ai.models import GraphNode
class DTNode(GraphNode):
def __init__(self, state, parent_node, edge):
super().__init__(state, parent_node, edge, 0)
self._successors = {}
def add_successor(self, successor, edge):
self._successors[edge] = successor
return self._successors[edge]
def get_successors(self):
successors = {}
for key, val in self._successors.items():
successors[key] = val
return successors
\ No newline at end of file
Name,IsFemale,HasDarkColourEyes,HasBlackHair,HasBlondHair,HasRedHair,HasWhiteHair,HasLongHair,HasBigLips,HasMoustache,HasBeard,WearHat,IsBald,WearGlasses,WearEarrings
Alex,No,Yes,Yes,No,No,No,No,Yes,Yes,No,No,No,No,No
Alfred,No,No,No,No,Yes,No,No,No,Yes,No,No,No,No,No
Anita,Yes,No,No,No,No,Yes,No,No,No,No,No,No,No,No
Anne,Yes,Yes,Yes,No,No,No,No,No,No,No,No,No,No,Yes
Bernard,No,Yes,No,No,No,No,No,No,No,No,Yes,No,No,No
Bill,No,Yes,No,No,Yes,No,No,No,No,Yes,No,Yes,No,No
Charles,No,Yes,No,Yes,No,No,No,Yes,Yes,No,No,No,No,No
Claire,Yes,Yes,No,No,Yes,No,Yes,No,No,No,Yes,No,Yes,No
David,No,Yes,No,Yes,No,No,Yes,No,No,Yes,No,No,No,No
Eric,No,Yes,No,Yes,No,No,No,No,No,No,Yes,No,No,No
Frans,No,Yes,No,No,Yes,No,No,No,No,No,No,No,No,No
George,No,Yes,No,No,No,Yes,No,No,No,No,Yes,No,No,No
Herman,No,Yes,No,No,Yes,No,No,No,No,No,No,Yes,No,No
Joe,No,Yes,No,Yes,No,No,No,No,No,No,No,No,Yes,No
Maria,Yes,Yes,No,No,No,No,Yes,No,No,No,Yes,No,No,Yes
Max,No,Yes,Yes,No,No,No,No,Yes,Yes,No,No,No,No,No
Paul,No,Yes,No,No,No,Yes,No,No,No,No,No,No,Yes,No
Peter,No,No,No,No,No,Yes,No,Yes,No,No,No,No,No,No
Philip,No,Yes,Yes,No,No,No,No,No,No,Yes,No,No,No,No
Richard,No,Yes,No,No,No,No,No,No,Yes,Yes,No,Yes,No,No
Robert,No,No,No,No,No,No,No,No,No,No,No,No,No,No
Sam,No,Yes,No,No,No,Yes,No,No,No,No,No,Yes,Yes,No
Susan,Yes,Yes,No,Yes,No,No,Yes,Yes,No,No,No,No,No,Yes
Tom,No,No,Yes,No,No,No,No,No,No,No,No,Yes,Yes,No
\ No newline at end of file
import csv
class Sample():
def __init__(self, sample_vector, features, decision_feature):
self._x = {}
self._y = {}
for i, feature in enumerate(features):
if feature != decision_feature:
self._x[feature] = sample_vector[i]
else:
self._y[feature] = sample_vector[i]
def get_sample_values(self):
sample_vector = []
for value in self._x.values():
sample_vector.append(value)
return sample_vector
def get_feature_value(self, feature):
assert feature in self._x.keys() or feature in self._y.keys(), "'{0}' is not a valid feature.".format(feature)
if feature in self._x.keys():
return self._x[feature]
else:
return self.get_label()
def get_label(self):
decision_feature = list(self._y.keys())[0]
return self._y[decision_feature]
def __str__(self):
return str(self._x) + ' -> ' + str(self._y)
class SamplesSet():
def __init__(self, guess_who_file='guess_who.csv', decision_feature='Name'):
self._features_values = {}
self._features = []
self._decision_feature = decision_feature
self._samples = []
with open(guess_who_file, newline='', mode='r', encoding='utf-8-sig') as f:
reader = csv.reader(f)
for i, row in enumerate(reader):
if i == 0:
# First row with the name of the features
self._features = row
for feature in self._features:
self._features_values[feature] = []
continue
# rows with names and values for the features
# the name is our label we want to predict from the features
cur_sample = row
self.add_sample(cur_sample)
for i, feature_val in enumerate(cur_sample):
cur_feature = self._features[i]
if feature_val not in self._features_values[cur_feature]:
self._features_values[cur_feature].append(feature_val)
def add_sample(self, sample_vector):
new_sample = Sample(sample_vector, self._features, self._decision_feature)
self._samples.append(new_sample)
return len(self._samples)-1
def get_classification_features(self):
classification_features = self._features.copy()
classification_features.remove(self._decision_feature)
return classification_features
def get_decision_feature(self):
return self._decision_feature
def get_feature_values(self, feature):
assert feature in self._features_values.keys(), "'{0}' is not a valid feature.".format(feature)
return self._features_values[feature]
def get_sample_at_index(self, index):
assert index >= 0 and index < len(self._samples), "The parameter index must be >= 0 and less than the number of samples"
return self._samples[index]
def get_samples(self):
return self._samples
def get_values_by_feature(samples, feature):
values = []
for sample in samples:
values.append(sample.get_feature_value(feature))
return values
def get_samples_by_feature_value(samples, feature, feature_value):
subsamples = []
for sample in samples:
if sample.get_feature_value(feature) == feature_value:
subsamples.append(sample)
return subsamples
def plurality_value(self, samples):
max_count = None
best_label = None
labels = SamplesSet.get_values_by_feature(samples, self._decision_feature)
for value in self._features_values[self._decision_feature]:
cur_count = labels.count(value)
if max_count is None or cur_count > max_count:
max_count = cur_count
best_label = value
return best_label
\ No newline at end of file
Name,Gender,EyeColour,HairColour,HairLength,LipsSize,HasMoustache,HasBeard,WearHat,IsBald,WearGlasses,WearEarrings
Alex,Male,Brown,Black,Short,Big,Yes,No,No,No,No,No
Alfred,Male,Blue,Red,Short,Small,Yes,No,No,No,No,No
Anita,Female,Blue,White,Short,Small,No,No,No,No,No,No
Anne,Female,Brown,Black,Short,Small,No,No,No,No,No,Yes
Bernard,Male,Brown,Brown,Short,Small,No,No,Yes,No,No,No
Bill,Male,Brown,Red,Short,Small,No,Yes,No,Yes,No,No
Charles,Male,Brown,Blond,Short,Big,Yes,No,No,No,No,No
Claire,Female,Brown,Red,Long,Small,No,No,Yes,No,Yes,No
David,Male,Brown,Blond,Long,Small,No,Yes,No,No,No,No
Eric,Male,Brown,Blond,Short,Small,No,No,Yes,No,No,No
Frans,Male,Brown,Red,Short,Small,No,No,No,No,No,No
George,Male,Brown,White,Short,Small,No,No,Yes,No,No,No
Herman,Male,Brown,Red,Short,Small,No,No,No,Yes,No,No
Joe,Male,Brown,Blond,Short,Small,No,No,No,No,Yes,No
Maria,Female,Brown,Brown,Long,Small,No,No,Yes,No,No,Yes
Max,Male,Brown,Black,Short,Big,Yes,No,No,No,No,No
Paul,Male,Brown,White,Short,Small,No,No,No,No,Yes,No
Peter,Male,Blue,White,Short,Big,No,No,No,No,No,No
Philip,Male,Brown,Black,Short,Small,No,Yes,No,No,No,No
Richard,Male,Brown,Brown,Short,Small,Yes,Yes,No,Yes,No,No
Robert,Male,Blue,Brown,Short,Small,No,No,No,No,No,No
Sam,Male,Brown,White,Short,Small,No,No,No,Yes,Yes,No
Susan,Female,Brown,Blond,Long,Big,No,No,No,No,No,Yes
Tom,Male,Blue,Black,Short,Small,No,No,No,Yes,Yes,No
\ No newline at end of file
import random
import json
from tictactoe_game_environment import TicTacToeGameEnvironment
from reinforcement_learning import state_to_str
def agent_program_random(percepts, actuators):
game_board = percepts['game-board-sensor']
player_turn = percepts['turn-taking-indicator']
game_state = {
'game-board': game_board.copy(),
'player-turn': player_turn
}
legal_moves = TicTacToeGameEnvironment.get_legal_actions(game_state)
if len(legal_moves) > 0:
return [random.choice(legal_moves)]
return []
def agent_program_RL(percepts, actuators):
game_board = percepts['game-board-sensor']
player_turn = percepts['turn-taking-indicator']
game_state = {
'game-board': game_board.copy(),
'player-turn': player_turn
}
with open('vfunction.json'.format(player_turn), 'r') as f:
vfunction = json.load(f)
opponent = 'X' if player_turn == 'O' else 'O'
v_player = vfunction['player-{0}'.format(player_turn)]
v_opponent = vfunction['player-{0}'.format(opponent)]
if not TicTacToeGameEnvironment.is_terminal(game_state):
best_action = None
max_advantage = None
for action in TicTacToeGameEnvironment.get_legal_actions(game_state):
new_state = TicTacToeGameEnvironment.transition_result(game_state, action)
future_state_str = state_to_str(new_state)
if future_state_str in v_player.keys() and future_state_str in v_opponent.keys():
advantage = v_player[future_state_str] - v_opponent[future_state_str]
if best_action is None or advantage > max_advantage:
best_action = action
max_advantage = advantage
if best_action is not None:
return [best_action]
else:
print("No best action found in v function for state {0}. Selecting it randomly".format(state_to_str(game_state)))
actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
selected_action = random.choice(actions)
return [selected_action]
return []
import random
import json
import sys
from tictactoe_game_environment import TicTacToeGameEnvironment
# code taken from https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
# A function to print a progress bar to keep track
# of the learning process
def progress(count, total, suffix=''):
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
sys.stdout.flush()
# A function transforming a game board state into a
# string representing it
def state_to_str(state):
board_str = ''
game_board = state['game-board']
for i in range(game_board.get_width()):
for j in range(game_board.get_height()):
value = game_board.get_item_value(i, j)
if value is None:
value = '*'
board_str += value
return board_str
# A function that picks the best next action for the
# current player given the current policies.
# The best action for the player is determined as the
# difference between the v-value of the player
# to transition in the new state and the v-value of
# the opponent in that same transitioned state.
# There is an off_policy_likelihood percentage to
# select a random action instead of the best one.
def pick_best_action(vfunction, state, off_policy_likelihood):
player = 'player-{0}'.format(state['player-turn'])
opponent = 'player-X' if state['player-turn'] == 'O' else 'player-O'
v_player = vfunction[player]
v_opponent = vfunction[opponent]
# getting the legal actions
legal_actions = [] # replace this line with the correct list of legal actions
# setting the selected action as None
selected_action = None
# keeping track of the states not explored yet
# (for exploration mode)
unvisited_future_states = []
# selecting the action based on the current policy from the
# so far learned v function for the player
max_advantage = float('-Inf')
for action in legal_actions:
# Transition the state given the current action
# Transform the state into a string identifier with the
# function state_to_str
# If the state identifier is present in the v functions of both players
# compute the advantage by subtracting the v-value of the player for
# the transitioned state with the v-value of the opponent for the transitioned state
# update the max_advantage if advantage > max_advantage
# else, if the state identifier is not in the vfunction of the current player
# add the state identifier to the unvisited_future_states
pass
# checking if we should use exploration instead of exploitation
if random.random() < off_policy_likelihood or selected_action is None:
# exploration mode, selecting random action
if len(unvisited_future_states) > 0:
item = random.choice(unvisited_future_states)
selected_action = item[0]
else:
selected_action = random.choice(legal_actions)
return selected_action
# A function to update the v function of a player
# The v function is updated according to the TD(0) equation
def update_v_function(v, state, new_state, reward, alpha, gamma):
# Compute the string state identifier for state
# If the identifier is in the v function, take that value as the
# old v-value
# else, set the v-value for that state identifier as random.random()*0.1
# and set old v-value to that random value
# Compute the string state identifier for new_state
# If the identifier is in the v function, take that value as the
# v-value at state s'
# else, set the v-value for that state identifier as random.random()*0.1
# and set s' v-value to that random value
# updated v value = old_v-value + alpha*(reward + gamma*s'_v-value - old_v-value)
# it is not necessary to return any value.
# Just update the v-value in the dictionary and since the dictionary is
# passed by reference, it will update it when leaving the function
pass
# The learning function
def learn(alpha=0.1, gamma=0.9, off_policy_likelihood=0.1, n_episodes=100000):
# setting the v functions for the players as empty dictionaries
vfunction = {'player-X': {}, 'player-O': {}}
# We start from the opening state of the game
starting_environment = TicTacToeGameEnvironment()
state = starting_environment.get_game_state()
# We need to keep track to the last state to update for the
# other player
past_states = {'player-X': None, 'player-O': None}
# Loop for n_episodes
i = 0
while i < n_episodes:
progress(i, n_episodes)
cur_player = state['player-turn']
last_player = 'X' if state['player-turn'] == 'O' else 'O'
v_cur_player = vfunction['player-{0}'.format(cur_player)]
v_last_player = vfunction['player-{0}'.format(last_player)]
# from the current state, we pick the best action
selected_action = pick_best_action(
vfunction,
state,
off_policy_likelihood
)
# Transitioning state
new_state = TicTacToeGameEnvironment.transition_result(state, selected_action)
# We store the transitioned state as the state to update for the current player
# during the next iteration
past_states['player-{0}'.format(state['player-turn'])] = new_state
# We compute the reward for the current player for performing
# the selected action and transitioning to new_state
# If new_state is not terminal, the reward will be 0
cur_player_reward = TicTacToeGameEnvironment.payoff(new_state, cur_player)
# Given the received reward, we update the v function of the current player
# Even if the reward = 0, the v function for the current state will be updated
# given the gamma*v-value of the new_state, thus propagating the rewards from
# terminal states back to early states of the game
update_v_function(v_cur_player, state, new_state, cur_player_reward, alpha, gamma)
# We also need to update the v function of the last player
# If we have a past state for the last player, we can do so
if past_states['player-{0}'.format(last_player)] is not None:
last_player_new_state = past_states['player-{0}'.format(last_player)]
# The reward is given by the payoff at the same new_state but for the last_player
# In this game is simply -1*cur_player_reward
last_player_reward = TicTacToeGameEnvironment.payoff(new_state, last_player)
# We need to update the v-value for the past state of the last player
# when transitioning to the new_state with the achieved reward for new_state
update_v_function(v_last_player, last_player_new_state, new_state, last_player_reward, alpha, gamma)
# We check if the episode terminated
if TicTacToeGameEnvironment.is_terminal(new_state):
# Yes, we restart
state = starting_environment.get_game_state()
past_states = {'player-X': None, 'player-O': None}
i += 1
else:
# No, we continue
state = new_state
return vfunction
if __name__ == "__main__":
vfunction = learn()
with open('vfunction.json', 'w+') as f:
json.dump(vfunction, f)
\ No newline at end of file
from une_ai.tictactoe import TicTacToeGame
from une_ai.tictactoe import TicTacToePlayer
from tictactoe_game_environment import TicTacToeGameEnvironment
from agent_programs import agent_program_RL, agent_program_random
if __name__ == '__main__':
player_X = TicTacToePlayer('X', agent_program_RL)
player_O = TicTacToePlayer('O', agent_program_random)
# DO NOT EDIT THE FOLLOWING INSTRUCTIONS!
environment = TicTacToeGameEnvironment()
environment.add_player(player_X)
environment.add_player(player_O)
game = TicTacToeGame(player_X, player_O, environment)
import numpy as np
from scipy.signal import convolve2d
from une_ai.models import GameEnvironment, GridMap, Agent
class IllegalMove(Exception):
pass
class TicTacToeGameEnvironment(GameEnvironment):
def __init__(self, board_size=3):
super().__init__("Tic Tac Toe")
self._board_size = board_size
self._game_board = GridMap(board_size, board_size, None)
self._player_turn = 'X' # X always starts
# TODO
# implement the abstract method add_player
# the GameEnvironment superclass uses a dictionary self._players
# to store the players of the game.
# For this game, we must limit the players to 2 players and
# The first added player will be X and the second O
def add_player(self, player):
assert isinstance(player, Agent), "The parameter player must be an instance of a subclass of the class Agent"
assert len(self._players) < 2, "It is not possible to add more than 2 players for this game."
if len(self._players) == 0:
marker = 'X'
else:
marker = 'O'
self._players[marker] = player
return marker
# TODO
# implement the abstract method get_game_state
# the method must return the current state of the game
# as a dictionary with the following keys:
# 'game-board' -> a copy of the game board (as 3x3 GridMap)
# 'player-turn' -> 'X' or 'O' depending on the current player turn
# You may first create properties in the constructor function __init__
# to store the game board and the current turn
def get_game_state(self):
gs = {
'game-board': self._game_board.copy(),
'player-turn': self._player_turn
}
return gs
# TODO
# implement the abstract method get_percepts
# this method returns a dictionary with keys the sensors of the agent
# and values the percepts gathered for that sensor at time t
# the sensors are:
# 'game-board-sensor' -> the 'game-board' value from the current game state
# 'turn-taking-indicator' -> the 'player-turn' value from the current game state
def get_percepts(self):
gs = self.get_game_state()
return {
'game-board-sensor': gs['game-board'],
'turn-taking-indicator': gs['player-turn']
}
# TODO
# implement the abstract method get_legal_actions
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns the list of
# legal actions in that game state
# An action is legal in a given game state if the game board cell
# for that action is free from marks
def get_legal_actions(game_state):
legal_actions = []
game_board = game_state['game-board']
empty_cells = game_board.find_value(None)
for empty_cell in empty_cells:
legal_actions.append('mark-{0}-{1}'.format(empty_cell[0], empty_cell[1]))
return legal_actions
# TODO
# implement the abstract method transition_result
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state and an action to perform as input and it returns
# the new game state.
def transition_result(game_state, action):
legal_actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
if action not in legal_actions:
raise(IllegalMove('The action {0} is not a legal move for the given game state {1}.'.format(action, game_state.get_map())))
marker = TicTacToeGameEnvironment.turn(game_state)
tokens = action.split('-')
x, y = (int(tokens[1]), int(tokens[2]))
new_game_board = game_state['game-board'].copy()
new_game_board.set_item_value(x, y, marker)
new_game_state = {
'game-board': new_game_board,
'player-turn': 'O' if game_state['player-turn'] == 'X' else 'X'
}
return new_game_state
# TODO
# implement the abstract method state_transition
# this method takes as input the agent's actuators
# and it changes the game environment state based
# on the values of the agent's actuators
# This agent has only one actuator, 'marker'
# the value of this actuator is a tuple with the x and y
# coordinates where the agent will place its marker on the game board
# We can implement this method by re-using the static method
# transition_result we just implemented
def state_transition(self, agent_actuators):
assert agent_actuators['marker'] is not None, "During a turn, the player must have set the 'marker' actuator value to a coordinate (x, y) of the game board where to place the marker."
x, y = agent_actuators['marker']
gs = self.get_game_state()
action = 'mark-{0}-{1}'.format(x, y)
new_gs = TicTacToeGameEnvironment.transition_result(gs, action)
self._player_turn = new_gs['player-turn']
self._game_board = new_gs['game-board'].copy()
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It returns the turn of the player given a game state.
def turn(game_state):
assert 'player-turn' in game_state.keys(), "Invalid game state. A game state must have the key 'player-turn'"
return game_state['player-turn']
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns the winner ('X' or 'O') if there is any
# or None if there is no winner (a tie or a non-terminal state)
# This method is already provided to you. You should look at its implementation
# and try to understand how it is finding a winner with the convolution operation
def get_winner(game_state):
game_board = game_state['game-board']
horizontal_kernel = np.array([[ 1, 1, 1]])
vertical_kernel = np.transpose(horizontal_kernel)
diag_kernel = np.eye(3, dtype=np.uint8)
flipped_diag_kernel = np.fliplr(diag_kernel)
detection_kernels = [horizontal_kernel, vertical_kernel, diag_kernel, flipped_diag_kernel]
for marker in ['X', 'O']:
player_markers = game_board.get_map() == marker
for kernel in detection_kernels:
convolved_values = convolve2d(player_markers, kernel, mode="valid")
if (convolved_values == 3).any():
return marker
return None
# TODO
# implement the abstract method is_terminal
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns True if the game state
# is terminal and False otherwise.
# In this game, a state is terminal if there are no more legal actions
# or if there is a winner.
def is_terminal(game_state):
# game is over if the board is full
remaining_actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
winner = TicTacToeGameEnvironment.get_winner(game_state)
return len(remaining_actions) == 0 or winner is not None
# TODO
# implement the abstract method payoff
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state and the player name ('X' or 'O') as input and it returns
# the payoff value for that player in the given game state
# In this scenario, we are only considering terminal states with a winner
# if there is not a winner yet (or there is a tie) we return 0
# In other games the payoff function may be more complex
def payoff(game_state, player_name):
winner = TicTacToeGameEnvironment.get_winner(game_state)
if winner is None:
return 0
elif winner == player_name:
return 1
else:
return -1
\ No newline at end of file
File added
import math
from dt_node import DTNode
from guess_who_samples import SamplesSet
training_set = SamplesSet()
def entropy(samples, feature):
global training_set
# generating a list with all the values for the considered
# feature given in the samples
values = SamplesSet.get_values_by_feature(samples, feature)
# computing entropy by summing up
# the partial entropy for each feature value
feature_vals = training_set.get_feature_values(feature)
total_entropy = 0
for value in feature_vals:
n_positive = values.count(value)
prob = n_positive / len(values)
if prob == 0 or prob == 1:
# current partial entropy is 0, skip
continue
total_entropy += prob*math.log2(prob)
return -1*total_entropy
def remainder(samples, feature):
global training_set
feature_vals = training_set.get_feature_values(feature)
total_remainder = 0
for value in feature_vals:
cur_samples = SamplesSet.get_samples_by_feature_value(samples, feature, value)
if len(cur_samples) > 0:
total_remainder += (len(cur_samples) / len(samples)) * entropy(cur_samples, training_set.get_decision_feature())
return total_remainder
def information_gain(samples, feature):
global training_set
return entropy(samples, training_set.get_decision_feature()) - remainder(samples, feature)
def learn_tree(samples, features, parent_samples, parent_node=None, edge=None):
global training_set
if len(samples) == 0:
leaf = training_set.plurality_value(parent_samples)
return DTNode(leaf, parent_node, edge)
labels = SamplesSet.get_values_by_feature(samples, training_set.get_decision_feature())
if len(set(labels)) == 1:
leaf = labels[0]
return DTNode(leaf, parent_node, edge)
if len(features) == 0:
leaf = training_set.plurality_value(samples)
return DTNode(leaf, parent_node, edge)
best_feature = None
best_ig = None
for feature in features:
if feature != training_set.get_decision_feature():
cur_ig = information_gain(samples, feature)
if best_ig is None or cur_ig > best_ig:
best_ig = cur_ig
best_feature = feature
tree = DTNode(best_feature, parent_node, edge)
for value in training_set.get_feature_values(best_feature):
subsamples = SamplesSet.get_samples_by_feature_value(samples, best_feature, value)
subfeatures = features.copy()
subfeatures.remove(best_feature)
subtree = learn_tree(subsamples, subfeatures, samples, tree, best_feature)
tree.add_successor(subtree, value)
return tree
def classify(sample, decision_tree):
print("\n--------------------------")
print("Classifying the sample {0}".format(sample))
cur_node = decision_tree
while len(cur_node.get_successors()) > 0:
successors = cur_node.get_successors()
cur_feat = cur_node.get_state()
print("Evaluating feature '{0}'".format(cur_feat))
feat_val = sample.get_feature_value(cur_feat)
print("Feature value for the sample is '{0}'".format(feat_val))
cur_node = successors[feat_val]
print("Classification: {0}".format(cur_node.get_state()))
return cur_node.get_state()
if __name__ == '__main__':
print("Learning decision tree...")
decision_tree = learn_tree(
training_set.get_samples(),
training_set.get_classification_features(),
[], None, None)
print("Decision tree learned!")
print("Classification...\n")
n_misclassifications = 0
for sample in training_set.get_samples():
ground_truth = sample.get_label()
prediction = classify(sample, decision_tree)
if ground_truth != prediction:
n_misclassifications += 1
print("Misclassification for sample {0}".format(sample))
print("Ground truth was '{0}' and prediction was '{1}'".format(ground_truth, prediction))
print("\nNumber of misclassifications: {0}".format(n_misclassifications))
from une_ai.models import GraphNode
class DTNode(GraphNode):
def __init__(self, state, parent_node, edge):
super().__init__(state, parent_node, edge, 0)
self._successors = {}
def add_successor(self, successor, edge):
self._successors[edge] = successor
return self._successors[edge]
def get_successors(self):
successors = {}
for key, val in self._successors.items():
successors[key] = val
return successors
\ No newline at end of file
Name,IsFemale,HasDarkColourEyes,HasBlackHair,HasBlondHair,HasRedHair,HasWhiteHair,HasLongHair,HasBigLips,HasMoustache,HasBeard,WearHat,IsBald,WearGlasses,WearEarrings
Alex,No,Yes,Yes,No,No,No,No,Yes,Yes,No,No,No,No,No
Alfred,No,No,No,No,Yes,No,No,No,Yes,No,No,No,No,No
Anita,Yes,No,No,No,No,Yes,No,No,No,No,No,No,No,No
Anne,Yes,Yes,Yes,No,No,No,No,No,No,No,No,No,No,Yes
Bernard,No,Yes,No,No,No,No,No,No,No,No,Yes,No,No,No
Bill,No,Yes,No,No,Yes,No,No,No,No,Yes,No,Yes,No,No
Charles,No,Yes,No,Yes,No,No,No,Yes,Yes,No,No,No,No,No
Claire,Yes,Yes,No,No,Yes,No,Yes,No,No,No,Yes,No,Yes,No
David,No,Yes,No,Yes,No,No,Yes,No,No,Yes,No,No,No,No
Eric,No,Yes,No,Yes,No,No,No,No,No,No,Yes,No,No,No
Frans,No,Yes,No,No,Yes,No,No,No,No,No,No,No,No,No
George,No,Yes,No,No,No,Yes,No,No,No,No,Yes,No,No,No
Herman,No,Yes,No,No,Yes,No,No,No,No,No,No,Yes,No,No
Joe,No,Yes,No,Yes,No,No,No,No,No,No,No,No,Yes,No
Maria,Yes,Yes,No,No,No,No,Yes,No,No,No,Yes,No,No,Yes
Max,No,Yes,Yes,No,No,No,No,Yes,Yes,No,No,No,No,No
Paul,No,Yes,No,No,No,Yes,No,No,No,No,No,No,Yes,No
Peter,No,No,No,No,No,Yes,No,Yes,No,No,No,No,No,No
Philip,No,Yes,Yes,No,No,No,No,No,No,Yes,No,No,No,No
Richard,No,Yes,No,No,No,No,No,No,Yes,Yes,No,Yes,No,No
Robert,No,No,No,No,No,No,No,No,No,No,No,No,No,No
Sam,No,Yes,No,No,No,Yes,No,No,No,No,No,Yes,Yes,No
Susan,Yes,Yes,No,Yes,No,No,Yes,Yes,No,No,No,No,No,Yes
Tom,No,No,Yes,No,No,No,No,No,No,No,No,Yes,Yes,No
\ No newline at end of file
import csv
class Sample():
def __init__(self, sample_vector, features, decision_feature):
self._x = {}
self._y = {}
for i, feature in enumerate(features):
if feature != decision_feature:
self._x[feature] = sample_vector[i]
else:
self._y[feature] = sample_vector[i]
def get_sample_values(self):
sample_vector = []
for value in self._x.values():
sample_vector.append(value)
return sample_vector
def get_feature_value(self, feature):
assert feature in self._x.keys() or feature in self._y.keys(), "'{0}' is not a valid feature.".format(feature)
if feature in self._x.keys():
return self._x[feature]
else:
return self.get_label()
def get_label(self):
decision_feature = list(self._y.keys())[0]
return self._y[decision_feature]
def __str__(self):
return str(self._x) + ' -> ' + str(self._y)
class SamplesSet():
def __init__(self, guess_who_file='guess_who.csv', decision_feature='Name'):
self._features_values = {}
self._features = []
self._decision_feature = decision_feature
self._samples = []
with open(guess_who_file, newline='', mode='r', encoding='utf-8-sig') as f:
reader = csv.reader(f)
for i, row in enumerate(reader):
if i == 0:
# First row with the name of the features
self._features = row
for feature in self._features:
self._features_values[feature] = []
continue
# rows with names and values for the features
# the name is our label we want to predict from the features
cur_sample = row
self.add_sample(cur_sample)
for i, feature_val in enumerate(cur_sample):
cur_feature = self._features[i]
if feature_val not in self._features_values[cur_feature]:
self._features_values[cur_feature].append(feature_val)
def add_sample(self, sample_vector):
new_sample = Sample(sample_vector, self._features, self._decision_feature)
self._samples.append(new_sample)
return len(self._samples)-1
def get_classification_features(self):
classification_features = self._features.copy()
classification_features.remove(self._decision_feature)
return classification_features
def get_decision_feature(self):
return self._decision_feature
def get_feature_values(self, feature):
assert feature in self._features_values.keys(), "'{0}' is not a valid feature.".format(feature)
return self._features_values[feature]
def get_sample_at_index(self, index):
assert index >= 0 and index < len(self._samples), "The parameter index must be >= 0 and less than the number of samples"
return self._samples[index]
def get_samples(self):
return self._samples
def get_values_by_feature(samples, feature):
values = []
for sample in samples:
values.append(sample.get_feature_value(feature))
return values
def get_samples_by_feature_value(samples, feature, feature_value):
subsamples = []
for sample in samples:
if sample.get_feature_value(feature) == feature_value:
subsamples.append(sample)
return subsamples
def plurality_value(self, samples):
max_count = None
best_label = None
labels = SamplesSet.get_values_by_feature(samples, self._decision_feature)
for value in self._features_values[self._decision_feature]:
cur_count = labels.count(value)
if max_count is None or cur_count > max_count:
max_count = cur_count
best_label = value
return best_label
\ No newline at end of file
import random
import json
from tictactoe_game_environment import TicTacToeGameEnvironment
from reinforcement_learning import state_to_str
def agent_program_random(percepts, actuators):
game_board = percepts['game-board-sensor']
player_turn = percepts['turn-taking-indicator']
game_state = {
'game-board': game_board.copy(),
'player-turn': player_turn
}
legal_moves = TicTacToeGameEnvironment.get_legal_actions(game_state)
if len(legal_moves) > 0:
return [random.choice(legal_moves)]
return []
def agent_program_RL(percepts, actuators):
game_board = percepts['game-board-sensor']
player_turn = percepts['turn-taking-indicator']
game_state = {
'game-board': game_board.copy(),
'player-turn': player_turn
}
with open('vfunction.json'.format(player_turn), 'r') as f:
vfunction = json.load(f)
opponent = 'X' if player_turn == 'O' else 'O'
v_player = vfunction['player-{0}'.format(player_turn)]
v_opponent = vfunction['player-{0}'.format(opponent)]
if not TicTacToeGameEnvironment.is_terminal(game_state):
best_action = None
max_advantage = None
for action in TicTacToeGameEnvironment.get_legal_actions(game_state):
new_state = TicTacToeGameEnvironment.transition_result(game_state, action)
future_state_str = state_to_str(new_state)
if future_state_str in v_player.keys() and future_state_str in v_opponent.keys():
advantage = v_player[future_state_str] - v_opponent[future_state_str]
if best_action is None or advantage > max_advantage:
best_action = action
max_advantage = advantage
if best_action is not None:
return [best_action]
else:
print("No best action found in v function for state {0}. Selecting it randomly".format(state_to_str(game_state)))
actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
selected_action = random.choice(actions)
return [selected_action]
return []
import random
import json
import sys
from tictactoe_game_environment import TicTacToeGameEnvironment
# code taken from https://gist.github.com/vladignatyev/06860ec2040cb497f0f3
# A function to print a progress bar to keep track
# of the learning process
def progress(count, total, suffix=''):
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
sys.stdout.flush()
# A function transforming a game board state into a
# string representing it
def state_to_str(state):
board_str = ''
game_board = state['game-board']
for i in range(game_board.get_width()):
for j in range(game_board.get_height()):
value = game_board.get_item_value(i, j)
if value is None:
value = '*'
board_str += value
return board_str
# A function that picks the best next action for the
# current player given the current policies.
# The best action for the player is determined as the
# difference between the v-value of the player
# to transition in the new state and the v-value of
# the opponent in that same transitioned state.
# There is an off_policy_likelihood percentage to
# select a random action instead of the best one.
def pick_best_action(vfunction, state, off_policy_likelihood):
player = 'player-{0}'.format(state['player-turn'])
opponent = 'player-X' if state['player-turn'] == 'O' else 'player-O'
v_player = vfunction[player]
v_opponent = vfunction[opponent]
# getting the legal actions
legal_actions = TicTacToeGameEnvironment.get_legal_actions(state)
# setting the selected action as None
selected_action = None
# keeping track of the states not explored yet
# (for exploration mode)
unvisited_future_states = []
# selecting the action based on the current policy from the
# so far learned v function for the player
max_advantage = float('-Inf')
for action in legal_actions:
future_state = TicTacToeGameEnvironment.transition_result(state, action)
future_state_str = state_to_str(future_state)
if future_state_str in v_player.keys() and future_state_str in v_opponent.keys():
advantage = v_player[future_state_str] - v_opponent[future_state_str]
if advantage > max_advantage:
selected_action = action
max_advantage = advantage
elif future_state_str not in v_player.keys():
unvisited_future_states.append((action, future_state))
# checking if we should use exploration instead of exploitation
if random.random() < off_policy_likelihood or selected_action is None:
# exploration mode, selecting random action
if len(unvisited_future_states) > 0:
item = random.choice(unvisited_future_states)
selected_action = item[0]
else:
selected_action = random.choice(legal_actions)
return selected_action
# A function to update the v function of a player
# The v function is updated according to the TD(0) equation
def update_v_function(v, state, new_state, reward, alpha, gamma):
state_str = state_to_str(state)
if state_str not in v.keys():
v[state_str] = random.random()*0.1
future_state_str = state_to_str(new_state)
if future_state_str not in v.keys():
v[future_state_str] = random.random()*0.1
v_old = v[state_str]
v_new = v_old + alpha*(reward + gamma*v[future_state_str] - v_old)
v[state_str] = v_new
# The learning function
def learn(alpha=0.1, gamma=0.9, off_policy_likelihood=0.1, n_episodes=100000):
# setting the v functions for the players as empty dictionaries
vfunction = {'player-X': {}, 'player-O': {}}
# We start from the opening state of the game
starting_environment = TicTacToeGameEnvironment()
state = starting_environment.get_game_state()
# We need to keep track to the last state to update for the
# other player
past_states = {'player-X': None, 'player-O': None}
# Loop for n_episodes
i = 0
while i < n_episodes:
progress(i, n_episodes)
cur_player = state['player-turn']
last_player = 'X' if state['player-turn'] == 'O' else 'O'
v_cur_player = vfunction['player-{0}'.format(cur_player)]
v_last_player = vfunction['player-{0}'.format(last_player)]
# from the current state, we pick the best action
selected_action = pick_best_action(
vfunction,
state,
off_policy_likelihood
)
# Transitioning state
new_state = TicTacToeGameEnvironment.transition_result(state, selected_action)
# We store the transitioned state as the state to update for the current player
# during the next iteration
past_states['player-{0}'.format(state['player-turn'])] = new_state
# We compute the reward for the current player for performing
# the selected action and transitioning to new_state
# If new_state is not terminal, the reward will be 0
cur_player_reward = TicTacToeGameEnvironment.payoff(new_state, cur_player)
# Given the received reward, we update the v function of the current player
# Even if the reward = 0, the v function for the current state will be updated
# given the gamma*v-value of the new_state, thus propagating the rewards from
# terminal states back to early states of the game
update_v_function(v_cur_player, state, new_state, cur_player_reward, alpha, gamma)
# We also need to update the v function of the last player
# If we have a past state for the last player, we can do so
if past_states['player-{0}'.format(last_player)] is not None:
last_player_new_state = past_states['player-{0}'.format(last_player)]
# The reward is given by the payoff at the same new_state but for the last_player
# In this game is simply -1*cur_player_reward
last_player_reward = TicTacToeGameEnvironment.payoff(new_state, last_player)
# We need to update the v-value for the past state of the last player
# when transitioning to the new_state with the achieved reward for new_state
update_v_function(v_last_player, last_player_new_state, new_state, last_player_reward, alpha, gamma)
# We check if the episode terminated
if TicTacToeGameEnvironment.is_terminal(new_state):
# Yes, we restart
state = starting_environment.get_game_state()
past_states = {'player-X': None, 'player-O': None}
i += 1
else:
# No, we continue
state = new_state
return vfunction
if __name__ == "__main__":
vfunction = learn()
with open('vfunction.json', 'w+') as f:
json.dump(vfunction, f)
\ No newline at end of file
from une_ai.tictactoe import TicTacToeGame
from une_ai.tictactoe import TicTacToePlayer
from tictactoe_game_environment import TicTacToeGameEnvironment
from agent_programs import agent_program_RL, agent_program_random
if __name__ == '__main__':
player_X = TicTacToePlayer('X', agent_program_RL)
player_O = TicTacToePlayer('O', agent_program_random)
# DO NOT EDIT THE FOLLOWING INSTRUCTIONS!
environment = TicTacToeGameEnvironment()
environment.add_player(player_X)
environment.add_player(player_O)
game = TicTacToeGame(player_X, player_O, environment)
import numpy as np
from scipy.signal import convolve2d
from une_ai.models import GameEnvironment, GridMap, Agent
class IllegalMove(Exception):
pass
class TicTacToeGameEnvironment(GameEnvironment):
def __init__(self, board_size=3):
super().__init__("Tic Tac Toe")
self._board_size = board_size
self._game_board = GridMap(board_size, board_size, None)
self._player_turn = 'X' # X always starts
# TODO
# implement the abstract method add_player
# the GameEnvironment superclass uses a dictionary self._players
# to store the players of the game.
# For this game, we must limit the players to 2 players and
# The first added player will be X and the second O
def add_player(self, player):
assert isinstance(player, Agent), "The parameter player must be an instance of a subclass of the class Agent"
assert len(self._players) < 2, "It is not possible to add more than 2 players for this game."
if len(self._players) == 0:
marker = 'X'
else:
marker = 'O'
self._players[marker] = player
return marker
# TODO
# implement the abstract method get_game_state
# the method must return the current state of the game
# as a dictionary with the following keys:
# 'game-board' -> a copy of the game board (as 3x3 GridMap)
# 'player-turn' -> 'X' or 'O' depending on the current player turn
# You may first create properties in the constructor function __init__
# to store the game board and the current turn
def get_game_state(self):
gs = {
'game-board': self._game_board.copy(),
'player-turn': self._player_turn
}
return gs
# TODO
# implement the abstract method get_percepts
# this method returns a dictionary with keys the sensors of the agent
# and values the percepts gathered for that sensor at time t
# the sensors are:
# 'game-board-sensor' -> the 'game-board' value from the current game state
# 'turn-taking-indicator' -> the 'player-turn' value from the current game state
def get_percepts(self):
gs = self.get_game_state()
return {
'game-board-sensor': gs['game-board'],
'turn-taking-indicator': gs['player-turn']
}
# TODO
# implement the abstract method get_legal_actions
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns the list of
# legal actions in that game state
# An action is legal in a given game state if the game board cell
# for that action is free from marks
def get_legal_actions(game_state):
legal_actions = []
game_board = game_state['game-board']
empty_cells = game_board.find_value(None)
for empty_cell in empty_cells:
legal_actions.append('mark-{0}-{1}'.format(empty_cell[0], empty_cell[1]))
return legal_actions
# TODO
# implement the abstract method transition_result
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state and an action to perform as input and it returns
# the new game state.
def transition_result(game_state, action):
legal_actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
if action not in legal_actions:
raise(IllegalMove('The action {0} is not a legal move for the given game state {1}.'.format(action, game_state.get_map())))
marker = TicTacToeGameEnvironment.turn(game_state)
tokens = action.split('-')
x, y = (int(tokens[1]), int(tokens[2]))
new_game_board = game_state['game-board'].copy()
new_game_board.set_item_value(x, y, marker)
new_game_state = {
'game-board': new_game_board,
'player-turn': 'O' if game_state['player-turn'] == 'X' else 'X'
}
return new_game_state
# TODO
# implement the abstract method state_transition
# this method takes as input the agent's actuators
# and it changes the game environment state based
# on the values of the agent's actuators
# This agent has only one actuator, 'marker'
# the value of this actuator is a tuple with the x and y
# coordinates where the agent will place its marker on the game board
# We can implement this method by re-using the static method
# transition_result we just implemented
def state_transition(self, agent_actuators):
assert agent_actuators['marker'] is not None, "During a turn, the player must have set the 'marker' actuator value to a coordinate (x, y) of the game board where to place the marker."
x, y = agent_actuators['marker']
gs = self.get_game_state()
action = 'mark-{0}-{1}'.format(x, y)
new_gs = TicTacToeGameEnvironment.transition_result(gs, action)
self._player_turn = new_gs['player-turn']
self._game_board = new_gs['game-board'].copy()
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It returns the turn of the player given a game state.
def turn(game_state):
assert 'player-turn' in game_state.keys(), "Invalid game state. A game state must have the key 'player-turn'"
return game_state['player-turn']
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns the winner ('X' or 'O') if there is any
# or None if there is no winner (a tie or a non-terminal state)
# This method is already provided to you. You should look at its implementation
# and try to understand how it is finding a winner with the convolution operation
def get_winner(game_state):
game_board = game_state['game-board']
horizontal_kernel = np.array([[ 1, 1, 1]])
vertical_kernel = np.transpose(horizontal_kernel)
diag_kernel = np.eye(3, dtype=np.uint8)
flipped_diag_kernel = np.fliplr(diag_kernel)
detection_kernels = [horizontal_kernel, vertical_kernel, diag_kernel, flipped_diag_kernel]
for marker in ['X', 'O']:
player_markers = game_board.get_map() == marker
for kernel in detection_kernels:
convolved_values = convolve2d(player_markers, kernel, mode="valid")
if (convolved_values == 3).any():
return marker
return None
# TODO
# implement the abstract method is_terminal
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state as input and it returns True if the game state
# is terminal and False otherwise.
# In this game, a state is terminal if there are no more legal actions
# or if there is a winner.
def is_terminal(game_state):
# game is over if the board is full
remaining_actions = TicTacToeGameEnvironment.get_legal_actions(game_state)
winner = TicTacToeGameEnvironment.get_winner(game_state)
return len(remaining_actions) == 0 or winner is not None
# TODO
# implement the abstract method payoff
# This method is a static method (i.e. we do not have access to self
# and it can only be accessed via the class TicTacToeGameEnvironment)
# It takes a game_state and the player name ('X' or 'O') as input and it returns
# the payoff value for that player in the given game state
# In this scenario, we are only considering terminal states with a winner
# if there is not a winner yet (or there is a tie) we return 0
# In other games the payoff function may be more complex
def payoff(game_state, player_name):
winner = TicTacToeGameEnvironment.get_winner(game_state)
if winner is None:
return 0
elif winner == player_name:
return 1
else:
return -1
\ No newline at end of file
Source diff could not be displayed: it is too large. Options to address this: view the blob.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment