zoukankan      html  css  js  c++  java
  • 浅层神经网络

    1、神经网络概述:

    dW[L]=(1/m)*dZ[L]A[L-1].T

    db[L]=(1/m)*np.sum(dZ[L],axis=1,keepdims=True)

    dZ[L-1]=W[L].T dZ[L]*g'(Z[L-1])

    2. 激活函数:


     sigmoid(z)=1/(1+e-z),    tanh(z)=(ez+e-z)/(ez-e-z) , RelU(z)=max(0,z) , Leaky RelU(z)=max(0.01z,z)

    sigmoid(z)'=a(1-a),    tanh(z)'=1-a, RelU(z)'=1 or 0 , Leaky RelU(z)'=1 or 0.01

    sigmoid激活函数:除了输出层是一个二分类问题基本不会用它;

    tanh激活函数:tanh是非常优秀的,几乎适合所有场合;

    ReLu激活函数:最常用的默认函数,如果不确定用哪个激活函数,就使用ReLu或者Leaky ReLu;

    3.随机初始化:


    W[L]=np.random.randn(nL,nL-1)*0.01

    bL=np.zeros((nL,1)

    4.编程实践:


        

     

      1 #Defining the neural network structure:
      2 def layer_sizes(X, Y):
      3     """
      4     Arguments:
      5     X -- input dataset of shape (input size, number of examples)
      6     Y -- labels of shape (output size, number of examples)
      7     
      8     Returns:
      9     n_x -- the size of the input layer
     10     n_h -- the size of the hidden layer
     11     n_y -- the size of the output layer
     12     """
     13     n_x = X.shape[0] # size of input layer
     14     n_h = 4
     15     n_y =X.shape[0] # size of output layer
     16     
     17     return (n_x, n_h, n_y)
     18 
     19 #Initialize the model's parameters
     20 def initialize_parameters(n_x, n_h, n_y):
     21     """
     22     Argument:
     23     n_x -- size of the input layer
     24     n_h -- size of the hidden layer
     25     n_y -- size of the output layer
     26     
     27     Returns:
     28     params -- python dictionary containing your parameters:
     29                     W1 -- weight matrix of shape (n_h, n_x)
     30                     b1 -- bias vector of shape (n_h, 1)
     31                     W2 -- weight matrix of shape (n_y, n_h)
     32                     b2 -- bias vector of shape (n_y, 1)
     33     """
     34     
     35     np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random.
     36     
     37     ### START CODE HERE ### (≈ 4 lines of code)
     38     W1 = np.random.randn(n_h,n_x)*0.01
     39     b1 = np.zeros((n_h,1))
     40     W2 = np.random.randn(n_y,n_h)*0.01
     41     b2 = np.zeros((n_y,0))
     42 
     43     ### END CODE HERE ###
     44     
     45     assert (W1.shape == (n_h, n_x))
     46     assert (b1.shape == (n_h, 1))
     47     assert (W2.shape == (n_y, n_h))
     48     assert (b2.shape == (n_y, 1))
     49     
     50     parameters = {"W1": W1,
     51                   "b1": b1,
     52                   "W2": W2,
     53                   "b2": b2}
     54     
     55     return parameters 
     56 
     57 #Implement forward_propagation()
     58 def forward_propagation(X, parameters):
     59     """
     60     Argument:
     61     X -- input data of size (n_x, m)
     62     parameters -- python dictionary containing your parameters (output of initialization function)
     63     
     64     Returns:
     65     A2 -- The sigmoid output of the second activation
     66     cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
     67     """
     68     # Retrieve each parameter from the dictionary "parameters"
     69     ### START CODE HERE ### (≈ 4 lines of code)
     70     W1 = parameters['W1']
     71     b1 = parameters['b1']
     72     W2 = parameters['W2']
     73     b2 = parameters['b2']
     74     ### END CODE HERE ###
     75     
     76     # Implement Forward Propagation to calculate A2 (probabilities)
     77     ### START CODE HERE ### (≈ 4 lines of code)
     78     Z1 = np.dot(W1,X)+b1
     79     A1 = np.tanh(Z1)
     80     Z2 = np.dot(W2,A1)+b2
     81     A2 = sigmoid(Z2)
     82     ### END CODE HERE ###
     83     
     84     assert(A2.shape == (1, X.shape[1]))
     85     
     86     cache = {"Z1": Z1,
     87              "A1": A1,
     88              "Z2": Z2,
     89              "A2": A2}
     90     
     91     return A2, cache
     92 
     93 #implement  compute_cost
     94 def compute_cost(A2, Y, parameters):
     95     """
     96     Computes the cross-entropy cost given in equation (13)
     97     
     98     Arguments:
     99     A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    100     Y -- "true" labels vector of shape (1, number of examples)
    101     parameters -- python dictionary containing your parameters W1, b1, W2 and b2
    102     
    103     Returns:
    104     cost -- cross-entropy cost given equation (13)
    105     """
    106     
    107     m = Y.shape[1] # number of example
    108 
    109     # Compute the cross-entropy cost
    110     ### START CODE HERE ### (≈ 2 lines of code)
    111     logprobs = np.multiply(np.log(A2),Y)+np.multiply((1-Y),np.log((1-A2)))
    112     cost =np.sum(logprobs)/m
    113     ### END CODE HERE ###
    114     
    115     cost = np.squeeze(cost)     # makes sure cost is the dimension we expect. 
    116                                 # E.g., turns [[17]] into 17 
    117     assert(isinstance(cost, float))
    118     
    119     return cost
    120 
    121 #implement backward_propagation:
    122 def backward_propagation(parameters, cache, X, Y):
    123     """
    124     Implement the backward propagation using the instructions above.
    125     
    126     Arguments:
    127     parameters -- python dictionary containing our parameters 
    128     cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    129     X -- input data of shape (2, number of examples)
    130     Y -- "true" labels vector of shape (1, number of examples)
    131     
    132     Returns:
    133     grads -- python dictionary containing your gradients with respect to different parameters
    134     """
    135     m = X.shape[1]
    136     
    137     # First, retrieve W1 and W2 from the dictionary "parameters".
    138     ### START CODE HERE ### (≈ 2 lines of code)
    139     W1 = parameters['W1']
    140     W2 = parameters['W2']
    141     ### END CODE HERE ###
    142         
    143     # Retrieve also A1 and A2 from dictionary "cache".
    144     ### START CODE HERE ### (≈ 2 lines of code)
    145     A1 = cache['A1']
    146     A2 = cache['A2']
    147     ### END CODE HERE ###
    148     
    149     # Backward propagation: calculate dW1, db1, dW2, db2. 
    150     ### START CODE HERE ### (≈ 6 lines of code, corresponding to 6 equations on slide above)
    151     dZ2 = A2-Y
    152     dW2 = (1.0/m)*np.dot(dZ2,A1.T)
    153     db2 = (1.0/m)*np.sum(dZ2,axis=1,keepdims=True)
    154     dZ1 = np.multiply(np.dot(W2.T,dZ2),(1-np.power(A1,2)))
    155     dW1 = (1.0/m)*np.dot(dZ1,X.T)
    156     db1 = (1.0/m)*np.sum(dZ1,axis=1,keepdims=True)
    157     ### END CODE HERE ###
    158     
    159     grads = {"dW1": dW1,
    160              "db1": db1,
    161              "dW2": dW2,
    162              "db2": db2}
    163     
    164     return grads
    165 
    166 #update_parameters:
    167 def update_parameters(parameters, grads, learning_rate = 1.2):
    168     """
    169     Updates parameters using the gradient descent update rule given above
    170     
    171     Arguments:
    172     parameters -- python dictionary containing your parameters 
    173     grads -- python dictionary containing your gradients 
    174     
    175     Returns:
    176     parameters -- python dictionary containing your updated parameters 
    177     """
    178     # Retrieve each parameter from the dictionary "parameters"
    179     ### START CODE HERE ### (≈ 4 lines of code)
    180     W1 = parameters['W1']
    181     b1 = parameters['b1']
    182     W2 = parameters['W2']
    183     b2 = parameters['b2']
    184     ### END CODE HERE ###
    185     
    186     # Retrieve each gradient from the dictionary "grads"
    187     ### START CODE HERE ### (≈ 4 lines of code)
    188     dW1 = grads['dW1']
    189     db1 = grads['db1']
    190     dW2 = grads['dW2']
    191     db2 = grads['db2']
    192     ## END CODE HERE ###
    193     
    194     # Update rule for each parameter
    195     ### START CODE HERE ### (≈ 4 lines of code)
    196     W1 = W1-learning_rate*dW1
    197     b1 = b1-learning_rate*db1
    198     W2 = W2-learning_rate*dW2
    199     b2 = b2-learning_rate*db2
    200     ### END CODE HERE ###
    201     
    202     parameters = {"W1": W1,
    203                   "b1": b1,
    204                   "W2": W2,
    205                   "b2": b2}
    206     
    207     return parameters
    208 
    209 #Build your neural network model 
    210 def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    211     """
    212     Arguments:
    213     X -- dataset of shape (2, number of examples)
    214     Y -- labels of shape (1, number of examples)
    215     n_h -- size of the hidden layer
    216     num_iterations -- Number of iterations in gradient descent loop
    217     print_cost -- if True, print the cost every 1000 iterations
    218     
    219     Returns:
    220     parameters -- parameters learnt by the model. They can then be used to predict.
    221     """
    222     
    223     np.random.seed(3)
    224     n_x = layer_sizes(X, Y)[0]
    225     n_y = layer_sizes(X, Y)[2]
    226     
    227     # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
    228     ### START CODE HERE ### (≈ 5 lines of code)
    229     parameters = initialize_parameters(n_x,n_h,n_y)
    230     W1 = parameters['W1']
    231     b1 = parameters['b1']
    232     W2 = parameters['W2']
    233     b2 = parameters['b2']
    234     ### END CODE HERE ###
    235     
    236     # Loop (gradient descent)
    237 
    238     for i in range(0, num_iterations):
    239          
    240         ### START CODE HERE ### (≈ 4 lines of code)
    241         # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
    242         A2, cache = forward_propagation(X,parameters)
    243         
    244         # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
    245         cost =compute_cost(A2,Y,parameters)
    246  
    247         # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
    248         grads =backward_propagation(parameters,cache,X,Y)
    249  
    250         # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
    251         parameters = update_parameters(parameters,grads)
    252         
    253         ### END CODE HERE ###
    254         
    255         # Print the cost every 1000 iterations
    256         if print_cost and i % 1000 == 0:
    257             print ("Cost after iteration %i: %f" %(i, cost))
    258 
    259     return parameters
    260 
    261 #Use your model to predict by building predict().Use forward propagation to predict results
    262 
    263 def predict(parameters, X):
    264     """
    265     Using the learned parameters, predicts a class for each example in X
    266     
    267     Arguments:
    268     parameters -- python dictionary containing your parameters 
    269     X -- input data of size (n_x, m)
    270     
    271     Returns
    272     predictions -- vector of predictions of our model (red: 0 / blue: 1)
    273     """
    274     
    275     # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    276     ### START CODE HERE ### (≈ 2 lines of code)
    277     A2, cache = forward_propagation(X,parameters)
    278     predictions =np.where(A2>0.5,1,0)
    279     ### END CODE HERE ###
    280     
    281     return predictions
  • 相关阅读:
    给定一个无序数组arr,求出需要排序的最短子数组长度。例如: arr = [1,5,3,4,2,6,7] 返回4,因为只有[5,3,4,2]需要排序。
    Given n pairs of parentheses, write a function to generate all combinations of well-formed parentheses. For example, given n = 3, a solution set is: "((()))", "(()())", "(())()", "()(())", "()()()"
    shell数组
    学习ansible(一)
    nginx搭建简单直播服务器
    rsync
    Linux运维最常用150个命令
    Linux 三剑客
    学习Python(一)
    学习k8s(三)
  • 原文地址:https://www.cnblogs.com/easy-wang/p/9969949.html
Copyright © 2011-2022 走看看