zoukankan      html  css  js  c++  java
  • 吴恩达《深度学习》第二课第二周编程作业

    参考链接:https://blog.csdn.net/u013733326/article/details/79907419

    代码:

    # coding=utf-8
    # This is a sample Python script.
    
    # Press ⌃R to execute it or replace it with your code.
    # Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.io
    import math
    import sklearn
    import sklearn.datasets
    
    import opt_utils #参见数据包或者在本文底部copy
    import testCase  #参见数据包或者在本文底部copy
    
    #%matplotlib inline #如果你用的是Jupyter Notebook请取消注释
    
    
    # Press the green button in the gutter to run the script.
    def update_parameters_with_gd(parameters, grads, learning_rate):
        L = len(parameters) // 2
    
        for l in range(L):
            parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
            parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]
    
        return parameters
    
    
    def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
        np.random.seed(seed)
        m = X.shape[1]
        mini_batches = []
    
        permutation = list(np.random.permutation(m))
        shuffled_X = X[:, permutation]
        shuffled_Y = Y[:, permutation].reshape(1, m)
    
        num_complete_minibatches = math.floor(m / mini_batch_size)
        for k in range(0, num_complete_minibatches):
            mini_batches_X = shuffled_X[:, k * mini_batch_size: (k + 1) * mini_batch_size]
            mini_batches_Y = shuffled_Y[:, k * mini_batch_size: (k + 1) * mini_batch_size]
            mini_batch = (mini_batches_X, mini_batches_Y)
            mini_batches.append(mini_batch)
        if m % mini_batch_size != 0:
            mini_batches_X = shuffled_X[:, mini_batch_size * num_complete_minibatches:]
            mini_batches_Y = shuffled_Y[:, mini_batch_size * num_complete_minibatches:]
    
            mini_batch = (mini_batches_X, mini_batches_Y)
            mini_batches.append(mini_batch)
        return mini_batches
    
    
    def init_velocity(parameters):
        L = len(parameters) // 2
        v = {}
    
        for l in range(L):
            v["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l + 1)])
            v["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l + 1)])
    
        return v
    
    
    def update_parameters_with_momentun(parameters, grads, v, beta, learning_rate):
        L = len(parameters) // 2
        for l in range(L):
            v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * grads["dW" + str(l + 1)]
            v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * grads["db" + str(l + 1)]
    
            parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v["dW" + str(l + 1)]
            parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v["db" + str(l + 1)]
    
        return parameters, v
    
    def init_adam(parameters):
        L = len(parameters) // 2
        v = {}
        s = {}
        for l in range(L):
            v["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l + 1)])
            v["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l + 1)])
    
            s["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l + 1)])
            s["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l + 1)])
    
        return v, s
    
    def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01, beta1 = 0.9, beta2 = 0.999, eps = 1e-8):
        L = len(parameters) // 2
        v_corrected = {}#修正偏差之后的值
        s_corrected = {}
    
        for l in range(L):
            #Momentum部分
            v["dW" + str(l + 1)] = beta1 * v["dW" + str(l + 1)] + (1 - beta1) * grads["dW" + str(l + 1)]
            v["db" + str(l + 1)] = beta1 * v["db" + str(l + 1)] + (1 - beta1) * grads["db" + str(l + 1)]
            #修正
            v_corrected["dW" + str(l + 1)] = v["dW" + str(l + 1)] / (1 - np.power(beta1, t))
            v_corrected["db" + str(l + 1)] = v["db" + str(l + 1)] / (1 - np.power(beta1, t))
            #RMSprop部分
            s["dW" + str(l + 1)] = beta2 * s["dW" + str(l + 1)] + (1 - beta2) * np.square(grads["dW" + str(l + 1)])
            s["db" + str(l + 1)] = beta2 * s["db" + str(l + 1)] + (1 - beta2) * np.square(grads["db" + str(l + 1)])
            #修正
            s_corrected["dW" + str(l + 1)] = s["dW" + str(l + 1)] / (1 - np.power(beta2, t))
            s_corrected["db" + str(l + 1)] = s["db" + str(l + 1)] / (1 - np.power(beta2, t))
            #更新参数
            parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * (v_corrected["dW" + str(l + 1)] / np.sqrt(s_corrected["dW" + str(l + 1)] + eps))
            parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * (v_corrected["db" + str(l + 1)] / np.sqrt(s_corrected["db" + str(l + 1)] + eps))
        return parameters, v, s
    
    
    def model(X, Y, layers_dims, optimizer, learning_rate=0.0007,
              mini_batch_size=64, beta=0.9, beta1=0.9, beta2=0.999,
              epsilon=1e-8, num_epochs=10000, print_cost=True, is_plot=True):
        """
        可以运行在不同优化器模式下的3层神经网络模型。
    
        参数:
            X - 输入数据,维度为(2,输入的数据集里面样本数量)
            Y - 与X对应的标签
            layers_dims - 包含层数和节点数量的列表
            optimizer - 字符串类型的参数,用于选择优化类型,【 "gd" | "momentum" | "adam" 】
            learning_rate - 学习率
            mini_batch_size - 每个小批量数据集的大小
            beta - 用于动量优化的一个超参数
            beta1 - 用于计算梯度后的指数衰减的估计的超参数
            beta1 - 用于计算平方梯度后的指数衰减的估计的超参数
            epsilon - 用于在Adam中避免除零操作的超参数,一般不更改
            num_epochs - 整个训练集的遍历次数,(视频2.9学习率衰减,1分55秒处,视频中称作“代”),相当于之前的num_iteration
            print_cost - 是否打印误差值,每遍历1000次数据集打印一次,但是每100次记录一个误差值,又称每1000代打印一次
            is_plot - 是否绘制出曲线图
    
        返回:
            parameters - 包含了学习后的参数
    
        """
        L = len(layers_dims)
        costs = []
        t = 0  # 每学习完一个minibatch就增加1
        seed = 10  # 随机种子
    
        # 初始化参数
        parameters = opt_utils.initialize_parameters(layers_dims)
    
        # 选择优化器
        if optimizer == "gd":
            pass  # 不使用任何优化器,直接使用梯度下降法
        elif optimizer == "momentum":
            v = init_velocity(parameters)  # 使用动量
        elif optimizer == "adam":
            v, s = init_adam(parameters)  # 使用Adam优化
        else:
            print("optimizer参数错误,程序退出。")
            exit(1)
    
        # 开始学习
        for i in range(num_epochs):
            # 定义随机 minibatches,我们在每次遍历数据集之后增加种子以重新排列数据集,使每次数据的顺序都不同
            seed = seed + 1
            minibatches = random_mini_batches(X, Y, mini_batch_size, seed)
    
            for minibatch in minibatches:
                # 选择一个minibatch
                (minibatch_X, minibatch_Y) = minibatch
    
                # 前向传播
                A3, cache = opt_utils.forward_propagation(minibatch_X, parameters)
    
                # 计算误差
                cost = opt_utils.compute_cost(A3, minibatch_Y)
    
                # 反向传播
                grads = opt_utils.backward_propagation(minibatch_X, minibatch_Y, cache)
    
                # 更新参数
                if optimizer == "gd":
                    parameters = update_parameters_with_gd(parameters, grads, learning_rate)
                elif optimizer == "momentum":
                    parameters, v = update_parameters_with_momentun(parameters, grads, v, beta, learning_rate)
                elif optimizer == "adam":
                    t = t + 1
                    parameters, v, s = update_parameters_with_adam(parameters, grads, v, s, t, learning_rate, beta1, beta2,
                                                                   epsilon)
            # 记录误差值
            if i % 100 == 0:
                costs.append(cost)
                # 是否打印误差值
                if print_cost and i % 1000 == 0:
                    print("" + str(i) + "次遍历整个数据集,当前误差值:" + str(cost))
        # 是否绘制曲线图
        if is_plot:
            plt.plot(costs)
            plt.ylabel('cost')
            plt.xlabel('epochs (per 100)')
            plt.title("Learning rate = " + str(learning_rate))
            plt.show()
    
        return parameters
    
    
    if __name__ == '__main__':
        # 测试initialize_adam
        train_X, train_Y = opt_utils.load_dataset(is_plot=True)
        layers_dims = [train_X.shape[0], 5, 2, 1]
        parameters = model(train_X, train_Y, layers_dims, optimizer="adam", is_plot=True)
        preditions = opt_utils.predict(train_X, train_Y, parameters)
    
        # 绘制分类图
        plt.title("Model with Gradient Descent optimization")
        axes = plt.gca()
        axes.set_xlim([-1.5, 2.5])
        axes.set_ylim([-1, 1.5])
        opt_utils.plot_decision_boundary(lambda x: opt_utils.predict_dec(parameters, x.T), train_X, train_Y)
        plt.show()
        # plt.rcParams['figure.figsize'] = (7.0, 4.0)  # set default size of plots
        # plt.rcParams['image.interpolation'] = 'nearest'
        # plt.rcParams['image.cmap'] = 'gray'
        # plt.show()
    
    # See PyCharm help at https://www.jetbrains.com/help/pycharm/
  • 相关阅读:
    【K8s】二进制部署Kubernetes v1.8.15集群环境管理Docker容器
    swift开发笔记10
    解读苹果app证书和描述文件
    swift开发笔记09
    swift开发笔记07
    swift开发笔记08
    默认显示detailViewController
    金融理财类app,被苹果审核拒绝
    swift开发笔记05
    Git安装图解
  • 原文地址:https://www.cnblogs.com/pkgunboat/p/14303667.html
Copyright © 2011-2022 走看看