zoukankan      html  css  js  c++  java
  • SML_Assignment4

    Task 1: Neural Networks

    1a) Multi-layer Perceptron

    We use mean cross entropy loss, because it is good for multi-category problems;
    We use ReLu as activation function due to excellent adaptability to multi-class classification. We have also tested sigmoid, but there are problems with vanishing gradients.
    The input layer has 784 neurons, because the count of pixel is (28 imes 28);
    The output layer has 10 neurons, because we need to classify 10 categories.
    The selection strategy of learning rate is constantly changing in the process of network training. At the beginning, the parameters are relatively random, so we should choose a relatively large learning rate, so that loss will fall faster. When we train for a period of time, the parameter update should have a smaller range of change, and then we choose a smaller learning rate

    Full code is below:

    import numpy as np
    from matplotlib import pyplot as plt
    
    
    def initialize(hidden_dim, output_dim):
        # retrieve mnist data
        X_train = np.loadtxt('./dataSets/mnist_small_train_in.txt', delimiter=',')
        X_test = np.loadtxt('./dataSets/mnist_small_test_in.txt', delimiter=',')
        y_train = np.loadtxt('./dataSets/mnist_small_train_out.txt', delimiter=',', dtype="int32")
        y_test = np.loadtxt('./dataSets/mnist_small_test_out.txt', delimiter=',', dtype="int32")
    
        # normalize data
        X_train = (X_train - np.mean(X_train)) / np.std(X_train)
        X_test = (X_test - np.mean(X_test)) / np.std(X_test)
    
        # one hot encode labels
        temp_y_train = np.eye(10)[y_train]
        temp_y_test = np.eye(10)[y_test]
        y_train = temp_y_train.reshape(list(y_train.shape) + [10])
        y_test = temp_y_test.reshape(list(y_test.shape) + [10])
    
        # weights for single hidden layer
        W1 = np.random.randn(hidden_dim, X_train.shape[1]) * 0.01
        b1 = np.zeros((hidden_dim,))
        W2 = np.random.randn(output_dim, hidden_dim) * 0.01
        b2 = np.zeros((output_dim,))
    
        parameters = [W1, b1, W2, b2]
    
        # return to column vectors
        return parameters, X_train.T, X_test.T, y_train.T, y_test.T
    
    
    def sigmoid(x):
        return 1.0 / (1.0 + np.exp(-x))
    
    
    def dsigmoid(x):
        # input x is already sigmoid, no need to recompute
        return x * (1.0 - x)
    
    
    def ReLU(x):
        return x * (x > 0)
    
    
    def dReLU(x):
        return 1. * (x > 0)
    
    # Cross entropy loss
    def closs(pred, y):
        return np.squeeze(-np.sum(np.multiply(np.log(pred), y)) / len(y))
    
    def dcloss(pred, y):
        return pred - y
    
    # Mean squared error loss
    def loss(pred, y):
        return np.sum(.5 * np.sum((pred - y) ** 2, axis=0), axis=0) / y.shape[1]
    
    def dloss(pred, y):
        return (pred - y) / y.shape[1]
    
    
    class NeuralNet(object):
    
        def __init__(self, hidden_dim, output_dim):
            # size of layers
            self.hidden_dim_1 = hidden_dim
            self.output_dim = output_dim
    
            # weights and data
            parameters, self.x_train, self.x_test, self.y_train, self.y_test = initialize(hidden_dim, output_dim)
            self.W1, self.b1, self.W2, self.b2 = parameters
    
            # activations
            batch_size = self.x_train.shape[0]
    
            self.ai = np.ones((self.x_train.shape[1], batch_size))
            self.ah1 = np.ones((self.hidden_dim_1, batch_size))
            self.ao = np.ones((self.output_dim, batch_size))
    
            # classification output for transformed OHE
            self.classification = np.ones(self.ao.shape)
    
            # container for loss progress
            self.loss = None
            self.test_error = None
    
        def forward_pass(self, x):
            # input activations
            self.ai = x
    
            # hidden_1 activations
            self.ah1 = sigmoid(self.W1 @ self.ai + self.b1[:, np.newaxis])
    
            # output activations
            self.ao = sigmoid(self.W2 @ self.ah1 + self.b2[:, np.newaxis])
    
            # transform to OHE for classification
            self.classification = (self.ao == self.ao.max(axis=0, keepdims=0)).astype(float)
    
        def backward_pass(self, target):
            # calculate error for output
            out_error = dloss(self.ao, target)
            out_delta = out_error * dsigmoid(self.ao)
    
            # calculate error for hidden_1
            hidden_1_error = self.W2.T @ out_delta
            hidden_1_delta = hidden_1_error * dReLU(self.ah1)
    
            # derivative for W2/b2 (hidden_1 --> out)
            w2_deriv = out_delta @ self.ah1.T
            b2_deriv = np.sum(out_delta, axis=1)
    
            # derivative for W1/b1 (input --> hidden_1)
            w1_deriv = hidden_1_delta @ self.ai.T
            b1_deriv = np.sum(hidden_1_delta, axis=1)
    
            return [w1_deriv, b1_deriv, w2_deriv, b2_deriv]
    
        def train(self, epochs, lr, batch_size=128):
    
            self.loss = np.zeros((epochs,))
            self.test_error = np.zeros((epochs,))
    
            for epoch in range(epochs):
    
                indices = np.arange(self.x_train.shape[1])
                np.random.shuffle(indices)
    
                for i in range(0, self.x_train.shape[1] - batch_size + 1, batch_size):
                    excerpt = indices[i:i + batch_size]
    
                    batch_x, batch_y = self.x_train[:, excerpt], self.y_train[:, excerpt]
    
                    # compute output of forward pass
                    self.forward_pass(batch_x)
    
                    # back prop error
                    w1_deriv, b1_deriv, w2_deriv, b2_deriv = self.backward_pass(batch_y)
    
                    # adjust weights with simple SGD
                    self.W1 -= lr * w1_deriv
                    self.b1 -= lr * b1_deriv
                    self.W2 -= lr * w2_deriv
                    self.b2 -= lr * b2_deriv
    
                # compute error
                self.forward_pass(self.x_test)
                error = loss(self.ao, self.y_test)
                self.loss[epoch] = error
    
                t = 0
                for i, pred in enumerate(self.classification.T):
                    if np.argmax(self.y_test.T[i]) == np.argmax(pred):
                        t += 1
    
                acc = t / self.classification.shape[1]
                self.test_error[epoch] = 1 - acc
    
                # print progress
                if (epoch + 1) % 50 == 0:
                    print("Epoch {}/{} -> loss: {:.5f} -> val_acc: {:.5f}".format(epoch + 1, epochs, error, acc))
    
        def predict(self, x):
            self.forward_pass(x)
            return self.classification
    
        def visualize_prediction(self, expected, predicted):
            # plot loss progress
            plt.figure(2)
            plt.plot(self.loss)
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.show()
    
            # plot loss progress
            plt.figure(3)
            plt.plot(self.test_error)
            plt.xlabel("Epochs")
            plt.ylabel("Test Error")
            plt.show()
    
    
    nn = NeuralNet(hidden_dim=784, output_dim=10)
    nn.train(epochs=500, lr=.05)
    
    y_hat = nn.predict(nn.x_test)
    t = 0
    for i, pred in enumerate(y_hat.T):
        if np.argmax(nn.y_test.T[i]) == np.argmax(pred):
            t += 1
    
    print("Accuracy:", t / y_hat.shape[1])
    nn.visualize_prediction(nn.y_test.T, y_hat.T)
    

    Show how the misclassification error (in percent) on the testing set evolves during learning:

    Task 3: Gaussian Processes

    3a) GP Regression

    Full code is below:

    import numpy as np
    from matplotlib import pyplot as plt
    
    def target(x):
        s = np.sin(x)
        return s + np.square(s)
    
    
    def kernel(x, z):
        return np.exp(-(x - z) ** 2)
    
    
    def compute_c(x, noise):
        return kernel(x, x) + noise
    
    
    def predict(x, X, Y, C_inv, noise):
        k = np.array([kernel(x, val) for val in X])
        mean = k.T.dot(C_inv).dot(Y)
        std = np.sqrt(compute_c(x, noise) - k.T.dot(C_inv).dot(k))
        return mean, std
    
    
    def plot_gp(X, mean, std, iteration):
        y1 = mean - 2 * std
        y2 = mean + 2 * std
    
        plt.plot(X, target(X), "-", color="red", label="$sin(x) + sin^2(x)$")
        plt.plot(X, mean, color="black", label="$mu$")
        plt.fill_between(X, y1, y2, facecolor='blue', interpolate=True, alpha=.5, label="$2sigma$")
        plt.title("$mu$ and $sigma$ for iteration={}".format(iteration))
        plt.xlabel("x")
        plt.ylabel("y")
    
    
    def gpr():
        noise = 0.001
        step_size = 0.005
        X = np.arange(0, 2 * np.pi + step_size, step_size)
        iterations = 15
    
        std = np.array([np.sqrt(compute_c(x, noise)) for x in X])
        j = np.argmax(std)
    
        Xn = np.array([X[j]])
        Yn = np.array([target(Xn)])
    
        C = np.array(compute_c(X[j], noise)).reshape((1, 1))
        C_inv = np.linalg.solve(C, np.identity(C.shape[0]))
    
        for iteration in range(0, iterations):
            mean = np.zeros(X.shape[0])
            std = np.zeros(X.shape[0])
    
            for i, x in enumerate(X):
                mean[i], std[i] = predict(x, Xn, Yn, C_inv, noise)
    
            if iteration + 1 in [1, 2, 5, 10, 15]:
                plot_gp(X, mean, std, iteration + 1)
                plt.plot(Xn, Yn, "o", c="blue", label="sampled")
                plt.legend()
                plt.show()
    
            j = np.argmax(std)
            Xn = np.append(Xn, X[j])
            Yn = np.append(Yn, target(Xn[-1]))
    
            # update C matrix
            x_new = Xn[-1]
            k = np.array([kernel(x_new, val) for val in Xn])
            c = kernel(x_new, x_new) + noise
    
            dim = C.shape[0]
            C_new = np.empty((dim + 1, dim + 1))
            C_new[:-1, :-1] = C
            C_new[-1, -1] = c
            C_new[:, -1] = k
            C_new[-1:] = k.T
    
            C = C_new
            C_inv = np.linalg.solve(C, np.identity(C.shape[0]))
    
    gpr()
    






    作者:Rest探路者
    出处:http://www.cnblogs.com/Java-Starter/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意请保留此段声明,请在文章页面明显位置给出原文连接
    Github:https://github.com/cjy513203427

  • 相关阅读:
    skydriver 工具 SDExplorer
    微软出手了:Live Office与Google 文档大PK
    4§7 二次曲面的直纹性
    5§4 二次曲线的直径
    5§5 二次曲线的主直径
    AI第四章 计算智能(1)
    矩阵理论 第五讲 对角化与Jordan标准形
    矩阵理论第 四讲 矩阵的对角化
    5§7 二次曲线方程的化简与分类
    矩阵论 ppt
  • 原文地址:https://www.cnblogs.com/Java-Starter/p/15096534.html
Copyright © 2011-2022 走看看