zoukankan      html  css  js  c++  java
  • 改善深层神经网络-week2编程题(优化算法)

    (批量)梯度下降法

     1 import numpy as np
     2 import matplotlib.pyplot as plt
     3 import scipy.io
     4 import math
     5 import sklearn
     6 import sklearn.datasets
     7 
     8 from opt_utils import load_params_and_grads, initialize_parameters, forward_propagation, backward_propagation
     9 from opt_utils import compute_cost, predict, predict_dec, plot_decision_boundary, load_dataset
    10 from testCases_v3 import *
    11 
    12 # %matplotlib inline
    13 plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
    14 plt.rcParams['image.interpolation'] = 'nearest'
    15 plt.rcParams['image.cmap'] = 'gray'
    16 
    17 
    18 # GRADED FUNCTION: update_parameters_with_gd
    19 def update_parameters_with_gd(parameters, grads, learning_rate):
    20     """
    21     Update parameters using one step of gradient descent
    22     
    23     Arguments:
    24     parameters -- python dictionary containing your parameters to be updated:
    25                     parameters['W' + str(l)] = Wl
    26                     parameters['b' + str(l)] = bl
    27     grads -- python dictionary containing your gradients to update each parameters:
    28                     grads['dW' + str(l)] = dWl
    29                     grads['db' + str(l)] = dbl
    30     learning_rate -- the learning rate, scalar.
    31     
    32     Returns:
    33     parameters -- python dictionary containing your updated parameters 
    34     """
    35 
    36     L = len(parameters) // 2 # number of layers in the neural networks
    37 
    38     # Update rule for each parameter
    39     for l in range(L):
    40         ### START CODE HERE ### (approx. 2 lines)
    41         parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-learning_rate*grads['dW'+str(l+1)]
    42         parameters['b'+str(l+1)]=parameters['b'+str(l+1)]-learning_rate*grads['db'+str(l+1)]
    43         ### END CODE HERE ###
    44         
    45     return parameters
    View Code

    批量梯度下降(mini-batch size=m)

    随机梯度下降(mini-batch size=1)

     1 #(Batch) Gradient Descent:
     2 X = data_input
     3 Y = labels
     4 parameters = initialize_parameters(layers_dims)
     5 for i in range(0, num_iterations):
     6     # Forward propagation
     7     a, caches = forward_propagation(X, parameters)
     8     # Compute cost.
     9     cost = compute_cost(a, Y)
    10     # Backward propagation.
    11     grads = backward_propagation(a, caches, parameters)
    12     # Update parameters.
    13     parameters = update_parameters(parameters, grads)
    14 
    15 #Stochastic Gradient Descent:
    16 X = data_input
    17 Y = labels
    18 parameters = initialize_parameters(layers_dims)
    19 for i in range(0, num_iterations):
    20     for j in range(0, m):
    21         # Forward propagation
    22         a, caches = forward_propagation(X[:,j], parameters)
    23         # Compute cost
    24         cost = compute_cost(a, Y[:,j])
    25         # Backward propagation
    26         grads = backward_propagation(a, caches, parameters)
    27         # Update parameters.
    28         parameters = update_parameters(parameters, grads)

    mini-batch梯度下降

    图中直接按训练集的原顺序分割,代码中先使用permutation函数打乱后再分割。

     1 def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
     2     """
     3     Creates a list of random minibatches from (X, Y)
     4     
     5     Arguments:
     6     X -- input data, of shape (input size, number of examples)
     7     Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples)
     8     mini_batch_size -- size of the mini-batches, integer
     9     
    10     Returns:
    11     mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    12     """
    13     
    14     np.random.seed(seed)            # To make your "random" minibatches the same as ours
    15     m = X.shape[1]                  # number of training examples
    16     mini_batches = []
    17         
    18     # Step 1: Shuffle (X, Y)
    19     permutation = list(np.random.permutation(m))
    20     shuffled_X = X[:, permutation]
    21     shuffled_Y = Y[:, permutation].reshape((1,m))
    22 
    23     # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    24     num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    25     for k in range(0, num_complete_minibatches):
    26         ### START CODE HERE ### (approx. 2 lines)
    27         mini_batch_X=shuffled_X[:,k*mini_batch_size:(k+1)*mini_batch_size]
    28         mini_batch_Y=shuffled_Y[:,k*mini_batch_size:(k+1)*mini_batch_size]
    29         ### END CODE HERE ###
    30         
    31         mini_batch = (mini_batch_X, mini_batch_Y)
    32         mini_batches.append(mini_batch)
    33     
    34     # Handling the end case (last mini-batch < mini_batch_size)
    35     if m % mini_batch_size != 0:
    36         ### START CODE HERE ### (approx. 2 lines)
    37         mini_batch_X=shuffled_X[:,mini_batch_size * num_complete_minibatches:]
    38         mini_batch_Y=shuffled_Y[:,mini_batch_size * num_complete_minibatches:]
    39         ### END CODE HERE ###
    40         
    41         mini_batch = (mini_batch_X, mini_batch_Y)
    42         mini_batches.append(mini_batch)
    43     
    44     return mini_batches
    View Code

    动量梯度下降法

    初始化参数

     1 def initialize_velocity(parameters):
     2     """
     3     Initializes the velocity as a python dictionary with:
     4                 - keys: "dW1", "db1", ..., "dWL", "dbL" 
     5                 - values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.
     6     Arguments:
     7     parameters -- python dictionary containing your parameters.
     8                     parameters['W' + str(l)] = Wl
     9                     parameters['b' + str(l)] = bl
    10     
    11     Returns:
    12     v -- python dictionary containing the current velocity.
    13                     v['dW' + str(l)] = velocity of dWl
    14                     v['db' + str(l)] = velocity of dbl
    15     """
    16     
    17     L = len(parameters) // 2 # number of layers in the neural networks
    18     v = {}
    19     
    20     # Initialize velocity
    21     for l in range(L):
    22         ### START CODE HERE ### (approx. 2 lines)
    23         v['dW'+str(l+1)]=np.zeros_like(parameters['W'+str(l+1)])
    24         v['db'+str(l+1)]=np.zeros_like(parameters['b'+str(l+1)])
    25         ### END CODE HERE ###
    26         
    27     return v
    View Code

    更新参数

     1 def update_parameters_with_momentum(parameters, grads, v, beta, learning_rate):
     2     """
     3     Update parameters using Momentum
     4     
     5     Arguments:
     6     parameters -- python dictionary containing your parameters:
     7                     parameters['W' + str(l)] = Wl
     8                     parameters['b' + str(l)] = bl
     9     grads -- python dictionary containing your gradients for each parameters:
    10                     grads['dW' + str(l)] = dWl
    11                     grads['db' + str(l)] = dbl
    12     v -- python dictionary containing the current velocity:
    13                     v['dW' + str(l)] = ...
    14                     v['db' + str(l)] = ...
    15     beta -- the momentum hyperparameter, scalar
    16     learning_rate -- the learning rate, scalar
    17     
    18     Returns:
    19     parameters -- python dictionary containing your updated parameters 
    20     v -- python dictionary containing your updated velocities
    21     """
    22 
    23     L = len(parameters) // 2 # number of layers in the neural networks
    24     
    25     # Momentum update for each parameter
    26     for l in range(L):
    27         
    28         ### START CODE HERE ### (approx. 4 lines)
    29         # compute velocities
    30         v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * grads['dW' + str(l + 1)]
    31         v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * grads['db' + str(l + 1)]
    32         # update parameters
    33         parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v["dW" + str(l + 1)]
    34         parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v["db" + str(l + 1)]
    35         ### END CODE HERE ###
    36         
    37     return parameters, v
    View Code

    Adam优化算法

    初始化参数

     1 def initialize_adam(parameters) :
     2     """
     3     Initializes v and s as two python dictionaries with:
     4                 - keys: "dW1", "db1", ..., "dWL", "dbL" 
     5                 - values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.
     6     
     7     Arguments:
     8     parameters -- python dictionary containing your parameters.
     9                     parameters["W" + str(l)] = Wl
    10                     parameters["b" + str(l)] = bl
    11     
    12     Returns: 
    13     v -- python dictionary that will contain the exponentially weighted average of the gradient.
    14                     v["dW" + str(l)] = ...
    15                     v["db" + str(l)] = ...
    16     s -- python dictionary that will contain the exponentially weighted average of the squared gradient.
    17                     s["dW" + str(l)] = ...
    18                     s["db" + str(l)] = ...
    19 
    20     """
    21     
    22     L = len(parameters) // 2 # number of layers in the neural networks
    23     v = {}
    24     s = {}
    25     
    26     # Initialize v, s. Input: "parameters". Outputs: "v, s".
    27     for l in range(L):
    28     ### START CODE HERE ### (approx. 4 lines)
    29         v["dW" + str(l+1)] = np.zeros_like(parameters['W'+str(l+1)])
    30         v["db" + str(l+1)] = np.zeros_like(parameters['b'+str(l+1)])
    31         
    32         s["dW" + str(l+1)] = np.zeros_like(parameters['W'+str(l+1)])
    33         s["db" + str(l+1)] = np.zeros_like(parameters['b'+str(l+1)])
    34     ### END CODE HERE ###
    35     
    36     return v, s
    View Code

    更新参数

     1 def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate=0.01,
     2                                 beta1=0.9, beta2=0.999, epsilon=1e-8):
     3     """
     4     Update parameters using Adam
     5     
     6     Arguments:
     7     parameters -- python dictionary containing your parameters:
     8                     parameters['W' + str(l)] = Wl
     9                     parameters['b' + str(l)] = bl
    10     grads -- python dictionary containing your gradients for each parameters:
    11                     grads['dW' + str(l)] = dWl
    12                     grads['db' + str(l)] = dbl
    13     v -- Adam variable, moving average of the first gradient, python dictionary
    14     s -- Adam variable, moving average of the squared gradient, python dictionary
    15     learning_rate -- the learning rate, scalar.
    16     beta1 -- Exponential decay hyperparameter for the first moment estimates 
    17     beta2 -- Exponential decay hyperparameter for the second moment estimates 
    18     epsilon -- hyperparameter preventing division by zero in Adam updates
    19 
    20     Returns:
    21     parameters -- python dictionary containing your updated parameters 
    22     v -- Adam variable, moving average of the first gradient, python dictionary
    23     s -- Adam variable, moving average of the squared gradient, python dictionary
    24     """
    25     
    26     L = len(parameters) // 2                 # number of layers in the neural networks
    27     v_corrected = {}                         # Initializing first moment estimate, python dictionary
    28     s_corrected = {}                         # Initializing second moment estimate, python dictionary
    29     
    30     # Perform Adam update on all parameters
    31     for l in range(L):
    32         # Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".
    33         ### START CODE HERE ### (approx. 2 lines)
    34         v['dW'+str(l+1)]=beta1*v["dW"+str(l+1)]+(1-beta1)*grads['dW'+str(l+1)]
    35         v['db'+str(l+1)]=beta1*v["db"+str(l+1)]+(1-beta1)*grads['db'+str(l+1)]
    36         ### END CODE HERE ###
    37 
    38         # Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".
    39         ### START CODE HERE ### (approx. 2 lines)
    40         v_corrected['dW'+str(l+1)]=v['dW'+str(l+1)]/(1-beta1**t)
    41         v_corrected['db'+str(l+1)]=v['db'+str(l+1)]/(1-beta1**t)
    42         ### END CODE HERE ###
    43 
    44         # Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".
    45         ### START CODE HERE ### (approx. 2 lines)
    46         s['dW'+str(l+1)]=beta2*s["dW"+str(l+1)]+(1-beta2)*(grads['dW'+str(l+1)])**2
    47         s['db'+str(l+1)]=beta2*s["db"+str(l+1)]+(1-beta2)*(grads['db'+str(l+1)])**2
    48         ### END CODE HERE ###
    49 
    50         # Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".
    51         ### START CODE HERE ### (approx. 2 lines)
    52         s_corrected['dW'+str(l+1)]=s['dW'+str(l+1)]/(1-beta2**t)
    53         s_corrected['db'+str(l+1)]=s['db'+str(l+1)]/(1-beta2**t)
    54         ### END CODE HERE ###
    55 
    56         # Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".
    57         ### START CODE HERE ### (approx. 2 lines)
    58         parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-learning_rate*(v_corrected['dW'+str(l+1)]/np.sqrt(s_corrected['dW'+str(l+1)]+epsilon))
    59         parameters['b'+str(l+1)]=parameters['b'+str(l+1)]-learning_rate*(v_corrected['db'+str(l+1)]/np.sqrt(s_corrected['db'+str(l+1)]+epsilon))   
    60         ### END CODE HERE ###
    61 
    62     return parameters, v, s
    View Code

    测试

    加载数据集

    train_X, train_Y = load_dataset()

     

    定义模型

     1 def model(X, Y, layers_dims, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9,
     2           beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True):
     3     """
     4     3-layer neural network model which can be run in different optimizer modes.
     5     
     6     Arguments:
     7     X -- input data, of shape (2, number of examples)
     8     Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples)
     9     layers_dims -- python list, containing the size of each layer
    10     learning_rate -- the learning rate, scalar.
    11     mini_batch_size -- the size of a mini batch
    12     beta -- Momentum hyperparameter
    13     beta1 -- Exponential decay hyperparameter for the past gradients estimates 
    14     beta2 -- Exponential decay hyperparameter for the past squared gradients estimates 
    15     epsilon -- hyperparameter preventing division by zero in Adam updates
    16     num_epochs -- number of epochs
    17     print_cost -- True to print the cost every 1000 epochs
    18 
    19     Returns:
    20     parameters -- python dictionary containing your updated parameters 
    21     """
    22 
    23     L = len(layers_dims)             # number of layers in the neural networks
    24     costs = []                       # to keep track of the cost
    25     t = 0                            # initializing the counter required for Adam update
    26     seed = 10                        # For grading purposes, so that your "random" minibatches are the same as ours
    27     
    28     # Initialize parameters
    29     parameters = initialize_parameters(layers_dims)
    30 
    31     # Initialize the optimizer
    32     if optimizer == "gd":
    33         pass # no initialization required for gradient descent
    34     elif optimizer == "momentum":
    35         v = initialize_velocity(parameters)
    36     elif optimizer == "adam":
    37         v, s = initialize_adam(parameters)
    38     
    39     # Optimization loop
    40     for i in range(num_epochs):
    41         
    42         # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch
    43         seed = seed + 1
    44         minibatches = random_mini_batches(X, Y, mini_batch_size, seed)
    45 
    46         for minibatch in minibatches:
    47 
    48             # Select a minibatch
    49             (minibatch_X, minibatch_Y) = minibatch
    50 
    51             # Forward propagation
    52             a3, caches = forward_propagation(minibatch_X, parameters)
    53 
    54             # Compute cost
    55             cost = compute_cost(a3, minibatch_Y)
    56 
    57             # Backward propagation
    58             grads = backward_propagation(minibatch_X, minibatch_Y, caches)
    59 
    60             # Update parameters
    61             if optimizer == "gd":
    62                 parameters = update_parameters_with_gd(parameters, grads, learning_rate)
    63             elif optimizer == "momentum":
    64                 parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate)
    65             elif optimizer == "adam":
    66                 t = t + 1 # Adam counter
    67                 parameters, v, s = update_parameters_with_adam(parameters, grads, v, s,
    68                                                                t, learning_rate, beta1, beta2,  epsilon)
    69         
    70         # Print the cost every 1000 epoch
    71         if print_cost and i % 1000 == 0:
    72             print("Cost after epoch %i: %f" % (i, cost))
    73         if print_cost and i % 100 == 0:
    74             costs.append(cost)
    75                 
    76     # plot the cost
    77     plt.plot(costs)
    78     plt.ylabel('cost')
    79     plt.xlabel('epochs (per 100)')
    80     plt.title("Learning rate = " + str(learning_rate))
    81     plt.show()
    82 
    83     return parameters
    View Code

     批量梯度下降

     1 # train 3-layer model
     2 layers_dims = [train_X.shape[0], 5, 2, 1]
     3 parameters = model(train_X, train_Y, layers_dims, optimizer="gd")
     4 
     5 # Predict
     6 predictions = predict(train_X, train_Y, parameters)
     7 
     8 # Plot decision boundary
     9 plt.title("Model with Gradient Descent optimization")
    10 axes = plt.gca()
    11 axes.set_xlim([-1.5, 2.5])
    12 axes.set_ylim([-1, 1.5])
    13 plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)

    预测准确度:0.797

    动量梯度下降

     1 # train 3-layer model
     2 layers_dims = [train_X.shape[0], 5, 2, 1]
     3 parameters = model(train_X, train_Y, layers_dims, beta=0.9, optimizer="momentum")
     4 
     5 # Predict
     6 predictions = predict(train_X, train_Y, parameters)
     7 
     8 # Plot decision boundary
     9 plt.title("Model with Momentum optimization")
    10 axes = plt.gca()
    11 axes.set_xlim([-1.5, 2.5])
    12 axes.set_ylim([-1, 1.5])
    13 plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)

    预测准确率:0.797

    Adam梯度下降

     1 # train 3-layer model
     2 layers_dims = [train_X.shape[0], 5, 2, 1]
     3 parameters = model(train_X, train_Y, layers_dims, optimizer="adam")
     4 
     5 # Predict
     6 predictions = predict(train_X, train_Y, parameters)
     7 
     8 # Plot decision boundary
     9 plt.title("Model with Adam optimization")
    10 axes = plt.gca()
    11 axes.set_xlim([-1.5, 2.5])
    12 axes.set_ylim([-1, 1.5])
    13 plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)

    预测准确度:0.94

  • 相关阅读:
    第七节:Linux之基于Centos系统安装.Net Core环境、部署Core MVC项目、使用nginx反向代理
    第六节:Linux之基于Centos系统安装Redis、MySQL、Nginx
    华为交换机更改旧密码The password has appeared in recent 5 times.
    华为交换机日志Auto port-defend started
    华为交换机日志A hash conflict occurs in MAC addresses
    华为交换机日志The CRC error is rising
    .net core获取http请求中body的数据
    PL/SQL查询数据中文乱码(中文显示问号)
    基于GraphCuts图割算法的图像分割----OpenCV代码与实现
    OpenCV由汉字生成图片(透明)----可以对抗论文查重!!!
  • 原文地址:https://www.cnblogs.com/cxq1126/p/13110189.html
Copyright © 2011-2022 走看看