Add week10 workshop

5a209c2b · Jon · 2db3dadb · 5a209c2b · 5a209c2b · 5a209c2b
Commit 5a209c2b authored Feb 14, 2024 by Jon
--- a/week10/material/classification.py
+++ b/week10/material/classification.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+
+# Generate random samples for two classes distributed
+# according to two gaussian distributions
+def generate_samples(num_samples, mean1, cov1, mean2, cov2):
+    class_labels = np.random.randint(2, size=num_samples)  # 0 , 1
+    
+    X = []
+    y = []
+    for label in class_labels:
+        if label == 0:
+            sample = np.random.multivariate_normal(mean1, cov1)
+        elif label == 1:
+            sample = np.random.multivariate_normal(mean2, cov2)
+        X.append(sample)
+        y.append(label)
+    
+    return np.array(X), np.array(y)
+
+def create_ann(input_size, hidden_size, output_size):
+    np.random.seed(0) # random seed for replicability
+    model = {
+        'W1': np.random.randn(input_size, hidden_size), # weights input layer
+        'b1': np.zeros((1, hidden_size)), # biases input layer
+        'W2': np.random.rand(hidden_size, output_size), # weights output layer
+        'b2': np.zeros((1, output_size)) # biases output layer
+    }
+
+    return model
+
+# Sigmoid activation function and its derivative
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+# The derivative is in the form of accepting the activation
+# a = sigma(x)
+def sigmoid_derivative(a):
+    return a * (1 - a)
+
+# One-hot encode the target labels
+def one_hot_encode(labels, num_classes):
+    encoded = np.zeros((len(labels), num_classes))
+    encoded[np.arange(len(labels)), labels] = 1
+    return encoded
+
+# Forward propagation
+def forward_propagation(X, model):
+    hidden_output = sigmoid(np.dot(X, model['W1']) + model['b1'])
+    output = sigmoid(np.dot(hidden_output, model['W2']) + model['b2'])
+
+    cache = {
+        'A0': X,
+        'A1': hidden_output,
+        'A2': output
+    }
+
+    return output, cache
+
+# Backpropagation
+def backpropagation(y, model, cache, learning_rate):
+    predicted_output = cache['A2']
+
+    # Compute loss and error
+    loss = np.mean((y - predicted_output) ** 2)
+    output_error = (-2/y.shape[0]) * (y - predicted_output)
+    
+    # Backpropagation
+    output_delta = output_error * sigmoid_derivative(predicted_output)
+    hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back
+    hidden_output = cache['A1'] # activations of the hidden layer
+    hidden_delta = hidden_error * sigmoid_derivative(hidden_output)
+    
+    # Update weights and biases
+    model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate
+    model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate
+    model['W1'] -= cache['A0'].T.dot(hidden_delta) * learning_rate
+    model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
+
+    return loss # no need to return the model, we are already updating it by reference
+
+def train(X, y, model, num_epochs=10000, learning_rate=0.1):
+
+    plt.ion()  # Turn on interactive mode for live updating
+
+    # Creating a plot to show the learning process
+    fig, ax = plt.subplots()
+    ax.set_xlabel('Feature 1')
+    ax.set_ylabel('Feature 2')
+    sc0 = ax.scatter([], [], label='Class 0')
+    sc1 = ax.scatter([], [], label='Class 1')
+    sc2 = ax.scatter([], [], label='Class 2')
+    ax.legend()
+    ax.set_xlim(-4,4)
+    ax.set_ylim(-4,4)
+
+    for epoch in range(num_epochs):
+        # Forward propagation
+        predicted_output, cache = forward_propagation(X, model)
+
+        # Backpropagation
+        loss = backpropagation(y, model, cache, learning_rate)
+        
+        if epoch < 10 or epoch % 100 == 0:
+            print(f"Epoch {epoch}, Loss: {loss:.4f}")
+
+            # Update the plot
+            y_pred_classes = np.argmax(predicted_output, axis=1)
+            sc0.set_offsets(X[y_pred_classes == 0, :2])
+            sc1.set_offsets(X[y_pred_classes == 1, :2])
+            sc2.set_offsets(X[y_pred_classes == 2, :2])
+            ax.relim()
+            ax.autoscale_view()
+            fig.canvas.flush_events()
+    
+    plt.ioff()
+    plt.show()
+
+if __name__ == '__main__':
+
+    # Generating the training and test samples
+    mean1 = [2, 2]
+    cov1 = [[0.2, 0.5], [0.1, 0.3]]
+    mean2 = [4, 4]
+    cov2 = [[0.2, -0.45], [-0.1, 0.2]]
+
+    num_samples = 1000
+    X, y = generate_samples(num_samples, mean1, cov1, mean2, cov2)
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+    # Normalize the input features using StandardScaler
+    scaler_input = StandardScaler()
+    X_train_scaled = scaler_input.fit_transform(X_train)
+    X_test_scaled = scaler_input.transform(X_test)
+
+    # We need to encode the labels in classes
+    num_classes = 2
+    y_train_encoded = one_hot_encode(y_train, num_classes)
+    y_test_encoded = one_hot_encode(y_test, num_classes)
+
+    # Neural network architecture
+    input_size = X_train_scaled.shape[1]
+    hidden_size = 4 # you can change this parameter to see what happens
+    output_size = num_classes 
+    model = create_ann(input_size, hidden_size, output_size)
+
+    # Training the model
+    n_epochs = 10000
+    learning_rate = 0.1
+    train(X_train_scaled, y_train_encoded, model, n_epochs, learning_rate)
+
+    # Testing the performance of the model
+    # Prediction on test data
+    y_pred, _ = forward_propagation(X_test_scaled, model)
+    y_pred_classes = np.argmax(y_pred, axis=1)
+
+    # Now we can compute the classification accuracy
+    accuracy = accuracy_score(y_test, y_pred_classes)
+    print(f"Accuracy on Test Data: {accuracy:.4f}")
\ No newline at end of file
--- a/week10/material/regression.py
+++ b/week10/material/regression.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.metrics import mean_squared_error
+import matplotlib.pyplot as plt
+
+# Generate random samples of a function with 3 inputs and 1 output
+def generate_samples(num_samples):
+    X = np.random.rand(num_samples, 3)
+    y = np.sin(X[:, 0]) + 2 * np.cos(X[:, 1]) - 0.5 * X[:, 2]
+    y = y.reshape((y.shape[0], 1))
+    return X, y
+
+def create_ann(input_size, hidden_size, output_size):
+    np.random.seed(0) # random seed for replicability
+    model = {
+        'W1': np.random.randn(input_size, hidden_size), # weights input layer
+        'b1': np.zeros((1, hidden_size)), # biases input layer
+        'W2': np.random.rand(hidden_size, output_size), # weights output layer
+        'b2': np.zeros((1, output_size)) # biases output layer
+    }
+
+    return model
+
+# Sigmoid activation function and its derivative
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+# The derivative is in the form of accepting the activation
+# a = sigma(x)
+def sigmoid_derivative(a):
+    return a * (1 - a)
+
+# Forward propagation
+def forward_propagation(X, model):
+    # Compute the activation of the hidden layer
+    # sigmoid(dotproduct(X, W1) + b1)
+    # numpy provides the method .dot for the dot product
+    hidden_activation = ...
+
+    # Now compute the output by performing the dot
+    # product between the activation of the hidden layer
+    # and the weights W2, sum b2 and use the sigmoid for the activation
+    output = ...
+
+    # store the activation of the hidden layer and the output layer in
+    # the following dictionary
+    cache = {
+        'A0': X,
+        'A1': ...,
+        'A2': ...
+    }
+
+    return output, cache
+
+# Backpropagation
+def backpropagation(y, model, cache, learning_rate):
+    predicted_output = cache['A2']
+
+    # Compute loss and error
+    # The loss is the ground truth y - the predicted output, all squared and averaged
+    # numpy offers the method .mean to compute the average
+    loss = ...
+    
+    # the output error gradient:
+    # -2/num_of_samples * (ground truth - prediction)
+    output_error = ...
+    
+    # Backpropagation
+    # delta for the output layer:
+    # output_error * sigmoid_derivative(prediction)
+    output_delta = ...
+
+    # hidden layer error:
+    # dot product between W2 transposed (you can use .T) and the output_delta
+    hidden_error = ...
+
+    # hidden layer cached activation
+    hidden_output = cache['A1'] 
+
+    # final hidden layer delta:
+    # hidden_error * sigmoid_derivative(hidden_output)
+    hidden_delta = ...
+    
+    # Update weights and biases
+
+    model['W2'] -= ...
+    model['b2'] -= ...
+    model['W1'] -= ...
+    model['b1'] -= ...
+
+    return loss # no need to return the model, we are already updating it by reference
+
+def train(X, y, model, num_epochs=10000, learning_rate=0.1):
+
+    plt.ion()  # Turn on interactive mode for live updating
+
+    # Creating a plot to show the learning process
+    fig, ax = plt.subplots()
+    ax.set_xlabel('Training sample')
+    ax.set_ylabel('Y')
+    line_gt, = ax.plot([], [], label='Ground truth')
+    line_pred, = ax.plot([], [], label='Prediction')
+    ax.legend()
+
+    for epoch in range(num_epochs):
+        # Forward propagation
+        predicted_output, cache = forward_propagation(X, model)
+
+        # Backpropagation
+        loss = backpropagation(y, model, cache, learning_rate)
+        
+        if epoch % 100 == 0:
+            print(f"Epoch {epoch}, Loss: {loss:.4f}")
+
+            # Update the plot
+            line_gt.set_data(range(y.shape[0]), y) # ground truth
+            line_pred.set_data(range(predicted_output.shape[0]), predicted_output) # predictions
+            ax.relim()
+            ax.autoscale_view()
+            fig.canvas.flush_events()
+    
+    plt.ioff()
+    plt.show()
+
+if __name__ == '__main__':
+
+    # Generating the training and test samples
+    num_samples = 1000
+    X, y = generate_samples(num_samples)
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+    # Normalize the input features using StandardScaler
+    scaler_input = StandardScaler()
+    X_train_scaled = scaler_input.fit_transform(X_train)
+    X_test_scaled = scaler_input.transform(X_test)
+
+    # We are trying to learn a function with co-domain not bounded
+    # by [0, 1]. The used activation function (Sigmoid)
+    # only outputs values between 0 and 1 so we need to make sure
+    # that we scale the outputs between 0 and 1 as well
+    scaler_output = MinMaxScaler()
+    y_train_scaled = scaler_output.fit_transform(y_train)
+    y_test_scaled = scaler_output.transform(y_test)
+
+    # Neural network architecture
+    input_size = X_train_scaled.shape[1]
+    hidden_size = 8 # you can change this parameter to see what happens
+    output_size = 1 
+    model = create_ann(input_size, hidden_size, output_size)
+
+    # Training the model
+    n_epochs = 10000
+    learning_rate = 0.1
+    train(X_train_scaled, y_train_scaled, model, n_epochs, learning_rate)
+
+    # Testing the performance of the model
+    # Prediction on test data
+    y_pred_scaled, _ = forward_propagation(X_test_scaled, model)
+
+    # We need to scale back the predictions
+    y_pred = scaler_output.inverse_transform(y_pred_scaled)
+
+    # Now we can compute the mean squared error
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"\nMSE for the test set: {mse:.4f}")
\ No newline at end of file
--- a/week10/solution/classification.py
+++ b/week10/solution/classification.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+
+# Generate random samples for three classes distributed
+# according to three gaussian distributions
+def generate_samples(num_samples, mean1, cov1, mean2, cov2, mean3, cov3):
+    class_labels = np.random.randint(3, size=num_samples)  # 0 , 1, 2
+    
+    X = []
+    y = []
+    for label in class_labels:
+        if label == 0:
+            sample = np.random.multivariate_normal(mean1, cov1)
+        elif label == 1:
+            sample = np.random.multivariate_normal(mean2, cov2)
+        elif label == 2:
+            sample = np.random.multivariate_normal(mean3, cov3)
+        X.append(sample)
+        y.append(label)
+    
+    return np.array(X), np.array(y)
+
+def create_ann(input_size, hidden_size, output_size):
+    np.random.seed(0) # random seed for replicability
+    model = {
+        'W1': np.random.randn(input_size, hidden_size), # weights input layer
+        'b1': np.zeros((1, hidden_size)), # biases input layer
+        'W2': np.random.rand(hidden_size, output_size), # weights output layer
+        'b2': np.zeros((1, output_size)) # biases output layer
+    }
+
+    return model
+
+# Sigmoid activation function and its derivative
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+# The derivative is in the form of accepting the activation
+# a = sigma(x)
+def sigmoid_derivative(a):
+    return a * (1 - a)
+
+# One-hot encode the target labels
+def one_hot_encode(labels, num_classes):
+    encoded = np.zeros((len(labels), num_classes))
+    encoded[np.arange(len(labels)), labels] = 1
+    return encoded
+
+# Forward propagation
+def forward_propagation(X, model):
+    hidden_output = sigmoid(np.dot(X, model['W1']) + model['b1'])
+    output = sigmoid(np.dot(hidden_output, model['W2']) + model['b2'])
+
+    cache = {
+        'A1': hidden_output,
+        'A2': output
+    }
+
+    return output, cache
+
+# Backpropagation
+def backpropagation(y, model, cache, learning_rate):
+    predicted_output = cache['A2']
+
+    # Compute loss and error
+    loss = np.mean((y - predicted_output) ** 2)
+    output_error = (-2/y.shape[0]) * (y - predicted_output)
+    
+    # Backpropagation
+    output_delta = output_error * sigmoid_derivative(predicted_output)
+    hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back
+    hidden_output = cache['A1'] # activations of the hidden layer
+    hidden_delta = hidden_error * sigmoid_derivative(hidden_output)
+    
+    # Update weights and biases
+    model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate
+    model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate
+    model['W1'] -= X_train_scaled.T.dot(hidden_delta) * learning_rate
+    model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
+
+    return loss # no need to return the model, we are already updating it by reference
+
+def train(X, y, model, num_epochs=10000, learning_rate=0.1):
+
+    plt.ion()  # Turn on interactive mode for live updating
+
+    # Creating a plot to show the learning process
+    fig, ax = plt.subplots()
+    ax.set_xlabel('Feature 1')
+    ax.set_ylabel('Feature 2')
+    sc0 = ax.scatter([], [], label='Class 0')
+    sc1 = ax.scatter([], [], label='Class 1')
+    sc2 = ax.scatter([], [], label='Class 2')
+    ax.legend()
+    ax.set_xlim(-4,4)
+    ax.set_ylim(-4,4)
+
+    for epoch in range(num_epochs):
+        # Forward propagation
+        predicted_output, cache = forward_propagation(X, model)
+
+        # Backpropagation
+        loss = backpropagation(y, model, cache, learning_rate)
+        
+        if epoch < 10 or epoch % 100 == 0:
+            print(f"Epoch {epoch}, Loss: {loss:.4f}")
+
+            # Update the plot
+            y_pred_classes = np.argmax(predicted_output, axis=1)
+            sc0.set_offsets(X[y_pred_classes == 0, :2])
+            sc1.set_offsets(X[y_pred_classes == 1, :2])
+            sc2.set_offsets(X[y_pred_classes == 2, :2])
+            ax.relim()
+            ax.autoscale_view()
+            fig.canvas.flush_events()
+    
+    plt.ioff()
+    plt.show()
+
+if __name__ == '__main__':
+
+    # Generating the training and test samples
+    mean1 = [2, 2]
+    cov1 = [[0.2, 0.1], [0.1, 0.2]]
+    mean2 = [4, 4]
+    cov2 = [[0.2, -0.1], [-0.1, 0.2]]
+    mean3 = [6, 2]
+    cov3 = [[0.5, -0.2], [0.1, -0.1]]
+
+    num_samples = 1000
+    X, y = generate_samples(num_samples, mean1, cov1, mean2, cov2, mean3, cov3)
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+    # Normalize the input features using StandardScaler
+    scaler_input = StandardScaler()
+    X_train_scaled = scaler_input.fit_transform(X_train)
+    X_test_scaled = scaler_input.transform(X_test)
+
+    # We need to encode the labels in classes
+    num_classes = 3
+    y_train_encoded = one_hot_encode(y_train, num_classes)
+    y_test_encoded = one_hot_encode(y_test, num_classes)
+
+    # Neural network architecture
+    input_size = X_train_scaled.shape[1]
+    hidden_size = 4 # you can change this parameter to see what happens
+    output_size = num_classes 
+    model = create_ann(input_size, hidden_size, output_size)
+
+    # Training the model
+    n_epochs = 10000
+    learning_rate = 0.1
+    train(X_train_scaled, y_train_encoded, model, n_epochs, learning_rate)
+
+    # Testing the performance of the model
+    # Prediction on test data
+    y_pred, _ = forward_propagation(X_test_scaled, model)
+    y_pred_classes = np.argmax(y_pred, axis=1)
+
+    # Now we can compute the classification accuracy
+    accuracy = accuracy_score(y_test, y_pred_classes)
+    print(f"Accuracy on Test Data: {accuracy:.4f}")
\ No newline at end of file
--- a/week10/solution/regression.py
+++ b/week10/solution/regression.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.metrics import mean_squared_error
+import matplotlib.pyplot as plt
+
+# Generate random samples of a function with 3 inputs and 1 output
+def generate_samples(num_samples):
+    X = np.random.rand(num_samples, 3)
+    y = np.sin(X[:, 0]) + 2 * np.cos(X[:, 1]) - 0.5 * X[:, 2]
+    y = y.reshape((y.shape[0], 1))
+    return X, y
+
+def create_ann(input_size, hidden_size, output_size):
+    np.random.seed(0) # random seed for replicability
+    model = {
+        'W1': np.random.randn(input_size, hidden_size), # weights input layer
+        'b1': np.zeros((1, hidden_size)), # biases input layer
+        'W2': np.random.rand(hidden_size, output_size), # weights output layer
+        'b2': np.zeros((1, output_size)) # biases output layer
+    }
+
+    return model
+
+# Sigmoid activation function and its derivative
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+# The derivative is in the form of accepting the activation
+# a = sigma(x)
+def sigmoid_derivative(a):
+    return a * (1 - a)
+
+# Forward propagation
+def forward_propagation(X, model):
+    hidden_activation = sigmoid(np.dot(X, model['W1']) + model['b1'])
+    output = sigmoid(np.dot(hidden_activation, model['W2']) + model['b2'])
+
+    cache = {
+        'A0': X,
+        'A1': hidden_activation,
+        'A2': output
+    }
+
+    return output, cache
+
+# Backpropagation
+def backpropagation(y, model, cache, learning_rate):
+    predicted_output = cache['A2']
+
+    # Compute loss and error
+    loss = np.mean((y - predicted_output) ** 2)
+    output_error = (-2/y.shape[0]) * (y - predicted_output)
+    
+    # Backpropagation
+    output_delta = output_error * sigmoid_derivative(predicted_output)
+    hidden_error = output_delta.dot(model['W2'].T) # dot product to backpropagate the error back
+    hidden_output = cache['A1'] # activations of the hidden layer
+    hidden_delta = hidden_error * sigmoid_derivative(hidden_output)
+    
+    # Update weights and biases
+    model['W2'] -= hidden_output.T.dot(output_delta) * learning_rate
+    model['b2'] -= np.sum(output_delta, axis=0, keepdims=True) * learning_rate
+    model['W1'] -= cache['A0'].T.dot(hidden_delta) * learning_rate
+    model['b1'] -= np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
+
+    return loss # no need to return the model, we are already updating it by reference
+
+def train(X, y, model, num_epochs=10000, learning_rate=0.1):
+
+    plt.ion()  # Turn on interactive mode for live updating
+
+    # Creating a plot to show the learning process
+    fig, ax = plt.subplots()
+    ax.set_xlabel('Training sample')
+    ax.set_ylabel('Y')
+    line_gt, = ax.plot([], [], label='Ground truth')
+    line_pred, = ax.plot([], [], label='Prediction')
+    ax.legend()
+
+    for epoch in range(num_epochs):
+        # Forward propagation
+        predicted_output, cache = forward_propagation(X, model)
+
+        # Backpropagation
+        loss = backpropagation(y, model, cache, learning_rate)
+        
+        if epoch % 100 == 0:
+            print(f"Epoch {epoch}, Loss: {loss:.4f}")
+
+            # Update the plot
+            line_gt.set_data(range(y.shape[0]), y) # ground truth
+            line_pred.set_data(range(predicted_output.shape[0]), predicted_output) # predictions
+            ax.relim()
+            ax.autoscale_view()
+            fig.canvas.flush_events()
+    
+    plt.ioff()
+    plt.show()
+
+if __name__ == '__main__':
+
+    # Generating the training and test samples
+    num_samples = 1000
+    X, y = generate_samples(num_samples)
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+    # Normalize the input features using StandardScaler
+    scaler_input = StandardScaler()
+    X_train_scaled = scaler_input.fit_transform(X_train)
+    X_test_scaled = scaler_input.transform(X_test)
+
+    # We are trying to learn a function with co-domain not bounded
+    # by [0, 1]. The used activation function (Sigmoid)
+    # only outputs values between 0 and 1 so we need to make sure
+    # that we scale the outputs between 0 and 1 as well
+    scaler_output = MinMaxScaler()
+    y_train_scaled = scaler_output.fit_transform(y_train)
+    y_test_scaled = scaler_output.transform(y_test)
+
+    # Neural network architecture
+    input_size = X_train_scaled.shape[1]
+    hidden_size = 8 # you can change this parameter to see what happens
+    output_size = 1 
+    model = create_ann(input_size, hidden_size, output_size)
+
+    # Training the model
+    n_epochs = 10000
+    learning_rate = 0.1
+    train(X_train_scaled, y_train_scaled, model, n_epochs, learning_rate)
+
+    # Testing the performance of the model
+    # Prediction on test data
+    y_pred_scaled, _ = forward_propagation(X_test_scaled, model)
+
+    # We need to scale back the predictions
+    y_pred = scaler_output.inverse_transform(y_pred_scaled)
+
+    # Now we can compute the mean squared error
+    mse = mean_squared_error(y_test, y_pred)
+    print(f"\nMSE for the test set: {mse:.4f}")
\ No newline at end of file