diff --git a/week10/material/classification.py b/week10/material/classification.py new file mode 100644 index 0000000000000000000000000000000000000000..3e6deffb672cfb00198c6f601824a62756f7dbc4 --- /dev/null +++ b/week10/material/classification.py @@ -0,0 +1,164 @@ +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import accuracy_score +import matplotlib.pyplot as plt + +# Generate random samples for two classes distributed +# according to two gaussian distributions +def generate_samples(num_samples, mean1, cov1, mean2, cov2): + class_labels = np.random.randint(2, size=num_samples) # 0 , 1 + + X = [] + y = [] + for label in class_labels: + if label == 0: + sample = np.random.multivariate_normal(mean1, cov1) + elif label == 1: + sample = np.random.multivariate_normal(mean2, cov2) + X.append(sample) + y.append(label) + + return np.array(X), np.array(y) + +def create_ann(input_size, hidden_size, output_size): + np.random.seed(0) # random seed for replicability + model = { + 'W1': np.random.randn(input_size, hidden_size), # weights input layer + 'b1': np.zeros((1, hidden_size)), # biases input layer + 'W2': np.random.rand(hidden_size, output_size), # weights output layer + 'b2': np.zeros((1, output_size)) # biases output layer + } + + return model + +# Sigmoid activation function and its derivative +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +# The derivative is in the form of accepting the activation +# a = sigma(x) +def sigmoid_derivative(a): + return a * (1 - a) + +# One-hot encode the target labels +def one_hot_encode(labels, num_classes): + encoded = np.zeros((len(labels), num_classes)) + encoded[np.arange(len(labels)), labels] = 1 + return encoded + +# Forward propagation +def forward_propagation(X, model): + hidden_output = sigmoid(np.dot(X, model['W1']) + model['b1']) + output = sigmoid(np.dot(hidden_output, model['W2']) + model['b2']) + + cache = { + 'A0': X, + 'A1': hidden_output, + 'A2': output + } + + return output, cache + +# Backpropagation +def backpropagation(y, model, cache, learning_rate): + predicted_output = cache['A2'] + + # Compute loss and error + loss = np.mean((y - predicted_output) ** 2) + output_error = (-2/y.shape[0]) * (y - predicted_output) + + # Backpropagation + output_delta = output_error * sigmoid_derivative(predicted_output) + hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back + hidden_output = cache['A1'] # activations of the hidden layer + hidden_delta = hidden_error * sigmoid_derivative(hidden_output) + + # Update weights and biases + model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate + model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate + model['W1'] -= cache['A0'].T.dot(hidden_delta) * learning_rate + model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate + + return loss # no need to return the model, we are already updating it by reference + +def train(X, y, model, num_epochs=10000, learning_rate=0.1): + + plt.ion() # Turn on interactive mode for live updating + + # Creating a plot to show the learning process + fig, ax = plt.subplots() + ax.set_xlabel('Feature 1') + ax.set_ylabel('Feature 2') + sc0 = ax.scatter([], [], label='Class 0') + sc1 = ax.scatter([], [], label='Class 1') + sc2 = ax.scatter([], [], label='Class 2') + ax.legend() + ax.set_xlim(-4,4) + ax.set_ylim(-4,4) + + for epoch in range(num_epochs): + # Forward propagation + predicted_output, cache = forward_propagation(X, model) + + # Backpropagation + loss = backpropagation(y, model, cache, learning_rate) + + if epoch < 10 or epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {loss:.4f}") + + # Update the plot + y_pred_classes = np.argmax(predicted_output, axis=1) + sc0.set_offsets(X[y_pred_classes == 0, :2]) + sc1.set_offsets(X[y_pred_classes == 1, :2]) + sc2.set_offsets(X[y_pred_classes == 2, :2]) + ax.relim() + ax.autoscale_view() + fig.canvas.flush_events() + + plt.ioff() + plt.show() + +if __name__ == '__main__': + + # Generating the training and test samples + mean1 = [2, 2] + cov1 = [[0.2, 0.5], [0.1, 0.3]] + mean2 = [4, 4] + cov2 = [[0.2, -0.45], [-0.1, 0.2]] + + num_samples = 1000 + X, y = generate_samples(num_samples, mean1, cov1, mean2, cov2) + + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) + + # Normalize the input features using StandardScaler + scaler_input = StandardScaler() + X_train_scaled = scaler_input.fit_transform(X_train) + X_test_scaled = scaler_input.transform(X_test) + + # We need to encode the labels in classes + num_classes = 2 + y_train_encoded = one_hot_encode(y_train, num_classes) + y_test_encoded = one_hot_encode(y_test, num_classes) + + # Neural network architecture + input_size = X_train_scaled.shape[1] + hidden_size = 4 # you can change this parameter to see what happens + output_size = num_classes + model = create_ann(input_size, hidden_size, output_size) + + # Training the model + n_epochs = 10000 + learning_rate = 0.1 + train(X_train_scaled, y_train_encoded, model, n_epochs, learning_rate) + + # Testing the performance of the model + # Prediction on test data + y_pred, _ = forward_propagation(X_test_scaled, model) + y_pred_classes = np.argmax(y_pred, axis=1) + + # Now we can compute the classification accuracy + accuracy = accuracy_score(y_test, y_pred_classes) + print(f"Accuracy on Test Data: {accuracy:.4f}") \ No newline at end of file diff --git a/week10/material/regression.py b/week10/material/regression.py new file mode 100644 index 0000000000000000000000000000000000000000..38a288e11e3ce111cdc7a8568ef2799affb772c7 --- /dev/null +++ b/week10/material/regression.py @@ -0,0 +1,168 @@ +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt + +# Generate random samples of a function with 3 inputs and 1 output +def generate_samples(num_samples): + X = np.random.rand(num_samples, 3) + y = np.sin(X[:, 0]) + 2 * np.cos(X[:, 1]) - 0.5 * X[:, 2] + y = y.reshape((y.shape[0], 1)) + return X, y + +def create_ann(input_size, hidden_size, output_size): + np.random.seed(0) # random seed for replicability + model = { + 'W1': np.random.randn(input_size, hidden_size), # weights input layer + 'b1': np.zeros((1, hidden_size)), # biases input layer + 'W2': np.random.rand(hidden_size, output_size), # weights output layer + 'b2': np.zeros((1, output_size)) # biases output layer + } + + return model + +# Sigmoid activation function and its derivative +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +# The derivative is in the form of accepting the activation +# a = sigma(x) +def sigmoid_derivative(a): + return a * (1 - a) + +# Forward propagation +def forward_propagation(X, model): + # Compute the activation of the hidden layer + # sigmoid(dotproduct(X, W1) + b1) + # numpy provides the method .dot for the dot product + hidden_activation = ... + + # Now compute the output by performing the dot + # product between the activation of the hidden layer + # and the weights W2, sum b2 and use the sigmoid for the activation + output = ... + + # store the activation of the hidden layer and the output layer in + # the following dictionary + cache = { + 'A0': X, + 'A1': ..., + 'A2': ... + } + + return output, cache + +# Backpropagation +def backpropagation(y, model, cache, learning_rate): + predicted_output = cache['A2'] + + # Compute loss and error + # The loss is the ground truth y - the predicted output, all squared and averaged + # numpy offers the method .mean to compute the average + loss = ... + + # the output error gradient: + # -2/num_of_samples * (ground truth - prediction) + output_error = ... + + # Backpropagation + # delta for the output layer: + # output_error * sigmoid_derivative(prediction) + output_delta = ... + + # hidden layer error: + # dot product between W2 transposed (you can use .T) and the output_delta + hidden_error = ... + + # hidden layer cached activation + hidden_output = cache['A1'] + + # final hidden layer delta: + # hidden_error * sigmoid_derivative(hidden_output) + hidden_delta = ... + + # Update weights and biases + + model['W2'] -= ... + model['b2'] -= ... + model['W1'] -= ... + model['b1'] -= ... + + return loss # no need to return the model, we are already updating it by reference + +def train(X, y, model, num_epochs=10000, learning_rate=0.1): + + plt.ion() # Turn on interactive mode for live updating + + # Creating a plot to show the learning process + fig, ax = plt.subplots() + ax.set_xlabel('Training sample') + ax.set_ylabel('Y') + line_gt, = ax.plot([], [], label='Ground truth') + line_pred, = ax.plot([], [], label='Prediction') + ax.legend() + + for epoch in range(num_epochs): + # Forward propagation + predicted_output, cache = forward_propagation(X, model) + + # Backpropagation + loss = backpropagation(y, model, cache, learning_rate) + + if epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {loss:.4f}") + + # Update the plot + line_gt.set_data(range(y.shape[0]), y) # ground truth + line_pred.set_data(range(predicted_output.shape[0]), predicted_output) # predictions + ax.relim() + ax.autoscale_view() + fig.canvas.flush_events() + + plt.ioff() + plt.show() + +if __name__ == '__main__': + + # Generating the training and test samples + num_samples = 1000 + X, y = generate_samples(num_samples) + + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) + + # Normalize the input features using StandardScaler + scaler_input = StandardScaler() + X_train_scaled = scaler_input.fit_transform(X_train) + X_test_scaled = scaler_input.transform(X_test) + + # We are trying to learn a function with co-domain not bounded + # by [0, 1]. The used activation function (Sigmoid) + # only outputs values between 0 and 1 so we need to make sure + # that we scale the outputs between 0 and 1 as well + scaler_output = MinMaxScaler() + y_train_scaled = scaler_output.fit_transform(y_train) + y_test_scaled = scaler_output.transform(y_test) + + # Neural network architecture + input_size = X_train_scaled.shape[1] + hidden_size = 8 # you can change this parameter to see what happens + output_size = 1 + model = create_ann(input_size, hidden_size, output_size) + + # Training the model + n_epochs = 10000 + learning_rate = 0.1 + train(X_train_scaled, y_train_scaled, model, n_epochs, learning_rate) + + # Testing the performance of the model + # Prediction on test data + y_pred_scaled, _ = forward_propagation(X_test_scaled, model) + + # We need to scale back the predictions + y_pred = scaler_output.inverse_transform(y_pred_scaled) + + # Now we can compute the mean squared error + mse = mean_squared_error(y_test, y_pred) + print(f"\nMSE for the test set: {mse:.4f}") \ No newline at end of file diff --git a/week10/solution/classification.py b/week10/solution/classification.py new file mode 100644 index 0000000000000000000000000000000000000000..242b67c8f5550e14de8abb2cbe3777dc1021e055 --- /dev/null +++ b/week10/solution/classification.py @@ -0,0 +1,167 @@ +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import accuracy_score +import matplotlib.pyplot as plt + +# Generate random samples for three classes distributed +# according to three gaussian distributions +def generate_samples(num_samples, mean1, cov1, mean2, cov2, mean3, cov3): + class_labels = np.random.randint(3, size=num_samples) # 0 , 1, 2 + + X = [] + y = [] + for label in class_labels: + if label == 0: + sample = np.random.multivariate_normal(mean1, cov1) + elif label == 1: + sample = np.random.multivariate_normal(mean2, cov2) + elif label == 2: + sample = np.random.multivariate_normal(mean3, cov3) + X.append(sample) + y.append(label) + + return np.array(X), np.array(y) + +def create_ann(input_size, hidden_size, output_size): + np.random.seed(0) # random seed for replicability + model = { + 'W1': np.random.randn(input_size, hidden_size), # weights input layer + 'b1': np.zeros((1, hidden_size)), # biases input layer + 'W2': np.random.rand(hidden_size, output_size), # weights output layer + 'b2': np.zeros((1, output_size)) # biases output layer + } + + return model + +# Sigmoid activation function and its derivative +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +# The derivative is in the form of accepting the activation +# a = sigma(x) +def sigmoid_derivative(a): + return a * (1 - a) + +# One-hot encode the target labels +def one_hot_encode(labels, num_classes): + encoded = np.zeros((len(labels), num_classes)) + encoded[np.arange(len(labels)), labels] = 1 + return encoded + +# Forward propagation +def forward_propagation(X, model): + hidden_output = sigmoid(np.dot(X, model['W1']) + model['b1']) + output = sigmoid(np.dot(hidden_output, model['W2']) + model['b2']) + + cache = { + 'A1': hidden_output, + 'A2': output + } + + return output, cache + +# Backpropagation +def backpropagation(y, model, cache, learning_rate): + predicted_output = cache['A2'] + + # Compute loss and error + loss = np.mean((y - predicted_output) ** 2) + output_error = (-2/y.shape[0]) * (y - predicted_output) + + # Backpropagation + output_delta = output_error * sigmoid_derivative(predicted_output) + hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back + hidden_output = cache['A1'] # activations of the hidden layer + hidden_delta = hidden_error * sigmoid_derivative(hidden_output) + + # Update weights and biases + model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate + model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate + model['W1'] -= X_train_scaled.T.dot(hidden_delta) * learning_rate + model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate + + return loss # no need to return the model, we are already updating it by reference + +def train(X, y, model, num_epochs=10000, learning_rate=0.1): + + plt.ion() # Turn on interactive mode for live updating + + # Creating a plot to show the learning process + fig, ax = plt.subplots() + ax.set_xlabel('Feature 1') + ax.set_ylabel('Feature 2') + sc0 = ax.scatter([], [], label='Class 0') + sc1 = ax.scatter([], [], label='Class 1') + sc2 = ax.scatter([], [], label='Class 2') + ax.legend() + ax.set_xlim(-4,4) + ax.set_ylim(-4,4) + + for epoch in range(num_epochs): + # Forward propagation + predicted_output, cache = forward_propagation(X, model) + + # Backpropagation + loss = backpropagation(y, model, cache, learning_rate) + + if epoch < 10 or epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {loss:.4f}") + + # Update the plot + y_pred_classes = np.argmax(predicted_output, axis=1) + sc0.set_offsets(X[y_pred_classes == 0, :2]) + sc1.set_offsets(X[y_pred_classes == 1, :2]) + sc2.set_offsets(X[y_pred_classes == 2, :2]) + ax.relim() + ax.autoscale_view() + fig.canvas.flush_events() + + plt.ioff() + plt.show() + +if __name__ == '__main__': + + # Generating the training and test samples + mean1 = [2, 2] + cov1 = [[0.2, 0.1], [0.1, 0.2]] + mean2 = [4, 4] + cov2 = [[0.2, -0.1], [-0.1, 0.2]] + mean3 = [6, 2] + cov3 = [[0.5, -0.2], [0.1, -0.1]] + + num_samples = 1000 + X, y = generate_samples(num_samples, mean1, cov1, mean2, cov2, mean3, cov3) + + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) + + # Normalize the input features using StandardScaler + scaler_input = StandardScaler() + X_train_scaled = scaler_input.fit_transform(X_train) + X_test_scaled = scaler_input.transform(X_test) + + # We need to encode the labels in classes + num_classes = 3 + y_train_encoded = one_hot_encode(y_train, num_classes) + y_test_encoded = one_hot_encode(y_test, num_classes) + + # Neural network architecture + input_size = X_train_scaled.shape[1] + hidden_size = 4 # you can change this parameter to see what happens + output_size = num_classes + model = create_ann(input_size, hidden_size, output_size) + + # Training the model + n_epochs = 10000 + learning_rate = 0.1 + train(X_train_scaled, y_train_encoded, model, n_epochs, learning_rate) + + # Testing the performance of the model + # Prediction on test data + y_pred, _ = forward_propagation(X_test_scaled, model) + y_pred_classes = np.argmax(y_pred, axis=1) + + # Now we can compute the classification accuracy + accuracy = accuracy_score(y_test, y_pred_classes) + print(f"Accuracy on Test Data: {accuracy:.4f}") \ No newline at end of file diff --git a/week10/solution/regression.py b/week10/solution/regression.py new file mode 100644 index 0000000000000000000000000000000000000000..a39514a86fda59c54db4e09b5ecf4e9425104b33 --- /dev/null +++ b/week10/solution/regression.py @@ -0,0 +1,143 @@ +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt + +# Generate random samples of a function with 3 inputs and 1 output +def generate_samples(num_samples): + X = np.random.rand(num_samples, 3) + y = np.sin(X[:, 0]) + 2 * np.cos(X[:, 1]) - 0.5 * X[:, 2] + y = y.reshape((y.shape[0], 1)) + return X, y + +def create_ann(input_size, hidden_size, output_size): + np.random.seed(0) # random seed for replicability + model = { + 'W1': np.random.randn(input_size, hidden_size), # weights input layer + 'b1': np.zeros((1, hidden_size)), # biases input layer + 'W2': np.random.rand(hidden_size, output_size), # weights output layer + 'b2': np.zeros((1, output_size)) # biases output layer + } + + return model + +# Sigmoid activation function and its derivative +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +# The derivative is in the form of accepting the activation +# a = sigma(x) +def sigmoid_derivative(a): + return a * (1 - a) + +# Forward propagation +def forward_propagation(X, model): + hidden_activation = sigmoid(np.dot(X, model['W1']) + model['b1']) + output = sigmoid(np.dot(hidden_activation, model['W2']) + model['b2']) + + cache = { + 'A0': X, + 'A1': hidden_activation, + 'A2': output + } + + return output, cache + +# Backpropagation +def backpropagation(y, model, cache, learning_rate): + predicted_output = cache['A2'] + + # Compute loss and error + loss = np.mean((y - predicted_output) ** 2) + output_error = (-2/y.shape[0]) * (y - predicted_output) + + # Backpropagation + output_delta = output_error * sigmoid_derivative(predicted_output) + hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back + hidden_output = cache['A1'] # activations of the hidden layer + hidden_delta = hidden_error * sigmoid_derivative(hidden_output) + + # Update weights and biases + model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate + model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate + model['W1'] -= cache['A0'].T.dot(hidden_delta) * learning_rate + model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate + + return loss # no need to return the model, we are already updating it by reference + +def train(X, y, model, num_epochs=10000, learning_rate=0.1): + + plt.ion() # Turn on interactive mode for live updating + + # Creating a plot to show the learning process + fig, ax = plt.subplots() + ax.set_xlabel('Training sample') + ax.set_ylabel('Y') + line_gt, = ax.plot([], [], label='Ground truth') + line_pred, = ax.plot([], [], label='Prediction') + ax.legend() + + for epoch in range(num_epochs): + # Forward propagation + predicted_output, cache = forward_propagation(X, model) + + # Backpropagation + loss = backpropagation(y, model, cache, learning_rate) + + if epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {loss:.4f}") + + # Update the plot + line_gt.set_data(range(y.shape[0]), y) # ground truth + line_pred.set_data(range(predicted_output.shape[0]), predicted_output) # predictions + ax.relim() + ax.autoscale_view() + fig.canvas.flush_events() + + plt.ioff() + plt.show() + +if __name__ == '__main__': + + # Generating the training and test samples + num_samples = 1000 + X, y = generate_samples(num_samples) + + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) + + # Normalize the input features using StandardScaler + scaler_input = StandardScaler() + X_train_scaled = scaler_input.fit_transform(X_train) + X_test_scaled = scaler_input.transform(X_test) + + # We are trying to learn a function with co-domain not bounded + # by [0, 1]. The used activation function (Sigmoid) + # only outputs values between 0 and 1 so we need to make sure + # that we scale the outputs between 0 and 1 as well + scaler_output = MinMaxScaler() + y_train_scaled = scaler_output.fit_transform(y_train) + y_test_scaled = scaler_output.transform(y_test) + + # Neural network architecture + input_size = X_train_scaled.shape[1] + hidden_size = 8 # you can change this parameter to see what happens + output_size = 1 + model = create_ann(input_size, hidden_size, output_size) + + # Training the model + n_epochs = 10000 + learning_rate = 0.1 + train(X_train_scaled, y_train_scaled, model, n_epochs, learning_rate) + + # Testing the performance of the model + # Prediction on test data + y_pred_scaled, _ = forward_propagation(X_test_scaled, model) + + # We need to scale back the predictions + y_pred = scaler_output.inverse_transform(y_pred_scaled) + + # Now we can compute the mean squared error + mse = mean_squared_error(y_test, y_pred) + print(f"\nMSE for the test set: {mse:.4f}") \ No newline at end of file