zoukankan      html  css  js  c++  java
  • SML_Assignment4

    Task 1: Neural Networks

    1a) Multi-layer Perceptron

    We use mean cross entropy loss, because it is good for multi-category problems;
    We use ReLu as activation function due to excellent adaptability to multi-class classification. We have also tested sigmoid, but there are problems with vanishing gradients.
    The input layer has 784 neurons, because the count of pixel is (28 imes 28);
    The output layer has 10 neurons, because we need to classify 10 categories.
    The selection strategy of learning rate is constantly changing in the process of network training. At the beginning, the parameters are relatively random, so we should choose a relatively large learning rate, so that loss will fall faster. When we train for a period of time, the parameter update should have a smaller range of change, and then we choose a smaller learning rate

    Full code is below:

    import numpy as np
    from matplotlib import pyplot as plt
    
    
    def initialize(hidden_dim, output_dim):
        # retrieve mnist data
        X_train = np.loadtxt('./dataSets/mnist_small_train_in.txt', delimiter=',')
        X_test = np.loadtxt('./dataSets/mnist_small_test_in.txt', delimiter=',')
        y_train = np.loadtxt('./dataSets/mnist_small_train_out.txt', delimiter=',', dtype="int32")
        y_test = np.loadtxt('./dataSets/mnist_small_test_out.txt', delimiter=',', dtype="int32")
    
        # normalize data
        X_train = (X_train - np.mean(X_train)) / np.std(X_train)
        X_test = (X_test - np.mean(X_test)) / np.std(X_test)
    
        # one hot encode labels
        temp_y_train = np.eye(10)[y_train]
        temp_y_test = np.eye(10)[y_test]
        y_train = temp_y_train.reshape(list(y_train.shape) + [10])
        y_test = temp_y_test.reshape(list(y_test.shape) + [10])
    
        # weights for single hidden layer
        W1 = np.random.randn(hidden_dim, X_train.shape[1]) * 0.01
        b1 = np.zeros((hidden_dim,))
        W2 = np.random.randn(output_dim, hidden_dim) * 0.01
        b2 = np.zeros((output_dim,))
    
        parameters = [W1, b1, W2, b2]
    
        # return to column vectors
        return parameters, X_train.T, X_test.T, y_train.T, y_test.T
    
    
    def sigmoid(x):
        return 1.0 / (1.0 + np.exp(-x))
    
    
    def dsigmoid(x):
        # input x is already sigmoid, no need to recompute
        return x * (1.0 - x)
    
    
    def ReLU(x):
        return x * (x > 0)
    
    
    def dReLU(x):
        return 1. * (x > 0)
    
    # Cross entropy loss
    def closs(pred, y):
        return np.squeeze(-np.sum(np.multiply(np.log(pred), y)) / len(y))
    
    def dcloss(pred, y):
        return pred - y
    
    # Mean squared error loss
    def loss(pred, y):
        return np.sum(.5 * np.sum((pred - y) ** 2, axis=0), axis=0) / y.shape[1]
    
    def dloss(pred, y):
        return (pred - y) / y.shape[1]
    
    
    class NeuralNet(object):
    
        def __init__(self, hidden_dim, output_dim):
            # size of layers
            self.hidden_dim_1 = hidden_dim
            self.output_dim = output_dim
    
            # weights and data
            parameters, self.x_train, self.x_test, self.y_train, self.y_test = initialize(hidden_dim, output_dim)
            self.W1, self.b1, self.W2, self.b2 = parameters
    
            # activations
            batch_size = self.x_train.shape[0]
    
            self.ai = np.ones((self.x_train.shape[1], batch_size))
            self.ah1 = np.ones((self.hidden_dim_1, batch_size))
            self.ao = np.ones((self.output_dim, batch_size))
    
            # classification output for transformed OHE
            self.classification = np.ones(self.ao.shape)
    
            # container for loss progress
            self.loss = None
            self.test_error = None
    
        def forward_pass(self, x):
            # input activations
            self.ai = x
    
            # hidden_1 activations
            self.ah1 = sigmoid(self.W1 @ self.ai + self.b1[:, np.newaxis])
    
            # output activations
            self.ao = sigmoid(self.W2 @ self.ah1 + self.b2[:, np.newaxis])
    
            # transform to OHE for classification
            self.classification = (self.ao == self.ao.max(axis=0, keepdims=0)).astype(float)
    
        def backward_pass(self, target):
            # calculate error for output
            out_error = dloss(self.ao, target)
            out_delta = out_error * dsigmoid(self.ao)
    
            # calculate error for hidden_1
            hidden_1_error = self.W2.T @ out_delta
            hidden_1_delta = hidden_1_error * dReLU(self.ah1)
    
            # derivative for W2/b2 (hidden_1 --> out)
            w2_deriv = out_delta @ self.ah1.T
            b2_deriv = np.sum(out_delta, axis=1)
    
            # derivative for W1/b1 (input --> hidden_1)
            w1_deriv = hidden_1_delta @ self.ai.T
            b1_deriv = np.sum(hidden_1_delta, axis=1)
    
            return [w1_deriv, b1_deriv, w2_deriv, b2_deriv]
    
        def train(self, epochs, lr, batch_size=128):
    
            self.loss = np.zeros((epochs,))
            self.test_error = np.zeros((epochs,))
    
            for epoch in range(epochs):
    
                indices = np.arange(self.x_train.shape[1])
                np.random.shuffle(indices)
    
                for i in range(0, self.x_train.shape[1] - batch_size + 1, batch_size):
                    excerpt = indices[i:i + batch_size]
    
                    batch_x, batch_y = self.x_train[:, excerpt], self.y_train[:, excerpt]
    
                    # compute output of forward pass
                    self.forward_pass(batch_x)
    
                    # back prop error
                    w1_deriv, b1_deriv, w2_deriv, b2_deriv = self.backward_pass(batch_y)
    
                    # adjust weights with simple SGD
                    self.W1 -= lr * w1_deriv
                    self.b1 -= lr * b1_deriv
                    self.W2 -= lr * w2_deriv
                    self.b2 -= lr * b2_deriv
    
                # compute error
                self.forward_pass(self.x_test)
                error = loss(self.ao, self.y_test)
                self.loss[epoch] = error
    
                t = 0
                for i, pred in enumerate(self.classification.T):
                    if np.argmax(self.y_test.T[i]) == np.argmax(pred):
                        t += 1
    
                acc = t / self.classification.shape[1]
                self.test_error[epoch] = 1 - acc
    
                # print progress
                if (epoch + 1) % 50 == 0:
                    print("Epoch {}/{} -> loss: {:.5f} -> val_acc: {:.5f}".format(epoch + 1, epochs, error, acc))
    
        def predict(self, x):
            self.forward_pass(x)
            return self.classification
    
        def visualize_prediction(self, expected, predicted):
            # plot loss progress
            plt.figure(2)
            plt.plot(self.loss)
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.show()
    
            # plot loss progress
            plt.figure(3)
            plt.plot(self.test_error)
            plt.xlabel("Epochs")
            plt.ylabel("Test Error")
            plt.show()
    
    
    nn = NeuralNet(hidden_dim=784, output_dim=10)
    nn.train(epochs=500, lr=.05)
    
    y_hat = nn.predict(nn.x_test)
    t = 0
    for i, pred in enumerate(y_hat.T):
        if np.argmax(nn.y_test.T[i]) == np.argmax(pred):
            t += 1
    
    print("Accuracy:", t / y_hat.shape[1])
    nn.visualize_prediction(nn.y_test.T, y_hat.T)
    

    Show how the misclassification error (in percent) on the testing set evolves during learning:

    Task 3: Gaussian Processes

    3a) GP Regression

    Full code is below:

    import numpy as np
    from matplotlib import pyplot as plt
    
    def target(x):
        s = np.sin(x)
        return s + np.square(s)
    
    
    def kernel(x, z):
        return np.exp(-(x - z) ** 2)
    
    
    def compute_c(x, noise):
        return kernel(x, x) + noise
    
    
    def predict(x, X, Y, C_inv, noise):
        k = np.array([kernel(x, val) for val in X])
        mean = k.T.dot(C_inv).dot(Y)
        std = np.sqrt(compute_c(x, noise) - k.T.dot(C_inv).dot(k))
        return mean, std
    
    
    def plot_gp(X, mean, std, iteration):
        y1 = mean - 2 * std
        y2 = mean + 2 * std
    
        plt.plot(X, target(X), "-", color="red", label="$sin(x) + sin^2(x)$")
        plt.plot(X, mean, color="black", label="$mu$")
        plt.fill_between(X, y1, y2, facecolor='blue', interpolate=True, alpha=.5, label="$2sigma$")
        plt.title("$mu$ and $sigma$ for iteration={}".format(iteration))
        plt.xlabel("x")
        plt.ylabel("y")
    
    
    def gpr():
        noise = 0.001
        step_size = 0.005
        X = np.arange(0, 2 * np.pi + step_size, step_size)
        iterations = 15
    
        std = np.array([np.sqrt(compute_c(x, noise)) for x in X])
        j = np.argmax(std)
    
        Xn = np.array([X[j]])
        Yn = np.array([target(Xn)])
    
        C = np.array(compute_c(X[j], noise)).reshape((1, 1))
        C_inv = np.linalg.solve(C, np.identity(C.shape[0]))
    
        for iteration in range(0, iterations):
            mean = np.zeros(X.shape[0])
            std = np.zeros(X.shape[0])
    
            for i, x in enumerate(X):
                mean[i], std[i] = predict(x, Xn, Yn, C_inv, noise)
    
            if iteration + 1 in [1, 2, 5, 10, 15]:
                plot_gp(X, mean, std, iteration + 1)
                plt.plot(Xn, Yn, "o", c="blue", label="sampled")
                plt.legend()
                plt.show()
    
            j = np.argmax(std)
            Xn = np.append(Xn, X[j])
            Yn = np.append(Yn, target(Xn[-1]))
    
            # update C matrix
            x_new = Xn[-1]
            k = np.array([kernel(x_new, val) for val in Xn])
            c = kernel(x_new, x_new) + noise
    
            dim = C.shape[0]
            C_new = np.empty((dim + 1, dim + 1))
            C_new[:-1, :-1] = C
            C_new[-1, -1] = c
            C_new[:, -1] = k
            C_new[-1:] = k.T
    
            C = C_new
            C_inv = np.linalg.solve(C, np.identity(C.shape[0]))
    
    gpr()
    






    作者:Rest探路者
    出处:http://www.cnblogs.com/Java-Starter/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意请保留此段声明,请在文章页面明显位置给出原文连接
    Github:https://github.com/cjy513203427

  • 相关阅读:
    codeforces 732D
    我的ubuntu配置
    周末实在无聊,自己写了一遍被人写烂了的“贪吃蛇”,欢迎各位新手低手高手老手前来吐槽哈哈
    两种方式实现点击列表弹出列表索引
    浅谈javascript中原型(prototype)、构造函数、对象实例及三者之间的关系
    写了个超级简单的“进度条”的demo,供新手参考,高手吐槽!
    javascript 使用 NodeList主意的问题
    js中的“||”和“&&”
    javascript call函数实现继承时遇到的问题
    javascript 解决innerText浏览器兼容
  • 原文地址:https://www.cnblogs.com/Java-Starter/p/15096534.html
Copyright © 2011-2022 走看看