  • 卷积神经网络-week1编程题1(一步步搭建卷积神经网络)


    1 import numpy as np
    2 import h5py
    3 import matplotlib.pyplot as plt
    5 plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
    6 plt.rcParams['image.interpolation'] = 'nearest'
    7 plt.rcParams['image.cmap'] = 'gray'
    9 np.random.seed(1)


    函数解释:np.pad(array, pad_width, mode, **kwargs)

    • array——表示需要填充的数组;
    • pad_width——表示每个轴(axis)边缘需要填充的数值数目;
    • 参数输入方式为:((before_1, after_1), … (before_N, after_N)),其中(before_1, after_1)表示第1轴两边缘分别填充before_1个和after_1个数值。取值为:{sequence, array_like, int}
    • mode——表示填充的方式(取值:str字符串或用户提供的函数),总共有11种填充模式;
     1 def zero_pad(X, pad):
     2     """
     3     Pad with zeros all images of the dataset X. The padding is applied to the height and width of an image, 
     4     as illustrated in Figure 1.
     6     Argument:
     7     X -- python numpy array of shape (m, n_H, n_W, n_C) representing a batch of m images
     8     pad -- integer, amount of padding around each image on vertical and horizontal dimensions
    10     Returns:
    11     X_pad -- padded image of shape (m, n_H + 2*pad, n_W + 2*pad, n_C)
    12     """
    14     ### START CODE HERE ### (≈ 1 line)
    15     X_pad=np.pad(X,(
    16                     (0,0),       #样本数,不填充
    17                     (pad,pad),   #图像高度,你可以视为上面填充x个,下面填充y个(x,y)
    18                     (pad,pad),   #图像宽度,你可以视为左边填充x个,右边填充y个(x,y)
    19                     (0,0)),      #通道数,不填充
    20                     'constant', constant_values=0)      #连续一样的值填充
    21     ### END CODE HERE ###
    23     return X_pad



     1 def conv_single_step(a_slice_prev, W, b):
     2     """
     3     Apply one filter defined by parameters W on a single slice (a_slice_prev) of the output activation 
     4     of the previous layer.
     6     Arguments:
     7     a_slice_prev -- slice of input data of shape (f, f, n_C_prev)
     8     W -- Weight parameters contained in a window - matrix of shape (f, f, n_C_prev)
     9     b -- Bias parameters contained in a window - matrix of shape (1, 1, 1)
    11     Returns:
    12     Z -- a scalar value, result of convolving the sliding window (W, b) on a slice x of the input data
    13     """
    15     ### START CODE HERE ### (≈ 2 lines of code)
    16     # Element-wise product between a_slice and W. Do not add the bias yet.
    17     s = np.multiply(a_slice_prev,W)
    18     # Sum over all entries of the volume s.
    19     Z=np.sum(s)
    20     # Add bias b to Z. Cast b to a float() so that Z results in a scalar value.
    21     Z=Z+float(b)
    22     ### END CODE HERE ###
    24     return Z


    A_prev(shape = (5,5,3))的左上角选择一个2x2的矩阵进行切片操作:a_slice_prev = a_prev[0:2,0:2,:]

     1 def conv_forward(A_prev, W, b, hparameters):
     2     """
     3     Implements the forward propagation for a convolution function
     5     Arguments:
     6     A_prev -- output activations of the previous layer, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
     7     W -- Weights, numpy array of shape (f, f, n_C_prev, n_C)
     8     b -- Biases, numpy array of shape (1, 1, 1, n_C)
     9     hparameters -- python dictionary containing "stride" and "pad"
    11     Returns:
    12     Z -- conv output, numpy array of shape (m, n_H, n_W, n_C)
    13     cache -- cache of values needed for the conv_backward() function
    14     """
    15     ### START CODE HERE ###
    16     # Retrieve dimensions from A_prev's shape (≈1 line)
    17     (m, n_H_prev, n_W_prev, n_C_prev)=A_prev.shape
    18     # Retrieve dimensions from W's shape (≈1 line)
    19     (f, f, n_C_prev, n_C)=W.shape
    20     # Retrieve information from "hparameters" (≈2 lines)
    21     stride=hparameters['stride']
    22     pad=hparameters['pad']
    24     # Compute the dimensions of the CONV output volume using the formula given above. Hint: use int() to floor. (≈2 lines)
    25     n_H=int((n_H_prev+2*pad-f)/stride+1)
    26     n_W=int((n_W_prev+2*pad-f)/stride+1)
    28     # Initialize the output volume Z with zeros. (≈1 line)
    29     Z=np.zeros((m,n_H,n_W,n_C))
    31     # Create A_prev_pad by padding A_prev
    32     A_prev_pad=zero_pad(A_prev, pad)
    34     for i in range(m):                               # loop over the batch of training examples
    35         a_prev_pad = A_prev_pad[i]                     # Select ith training example's padded activation
    36         for h in range(n_H):                           # loop over vertical axis of the output volume
    37             for w in range(n_W):                       # loop over horizontal axis of the output volume
    38                 for c in range(n_C):                   # loop over channels (= #filters) of the output volume
    40                     # Find the corners of the current "slice" (≈4 lines)
    41                     vert_start=h*stride         #竖向,开始的位置
    42                     vert_end=vert_start+f       #竖向,结束的位置
    43                     horiz_start=w*stride        #横向,开始的位置
    44                     horiz_end=horiz_start+f     #横向,结束的位置
    45                     # Use the corners to define the (3D) slice of a_prev_pad (See Hint above the cell). (≈1 line)
    46                     a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
    47                     # Convolve the (3D) slice with the correct filter W and bias b, to get back one output neuron. (≈1 line)
    48                     Z[i,h,w,c]=conv_single_step(a_slice_prev, W[:,:,:,c], b[0,0,0,c])
    49     ### END CODE HERE ###
    51     # Making sure your output shape is correct
    52     assert(Z.shape == (m, n_H, n_W, n_C))
    53     # Save information in "cache" for the backprop
    54     cache = (A_prev, W, b, hparameters)
    55     return Z, cache


     1 def pool_forward(A_prev, hparameters, mode = "max"):
     2     """
     3     Implements the forward pass of the pooling layer
     5     Arguments:
     6     A_prev -- Input data, numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
     7     hparameters -- python dictionary containing "f" and "stride"
     8     mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    10     Returns:
    11     A -- output of the pool layer, a numpy array of shape (m, n_H, n_W, n_C)
    12     cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters 
    13     """
    15     # Retrieve dimensions from the input shape
    16     (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    18     # Retrieve hyperparameters from "hparameters"
    19     f = hparameters["f"]
    20     stride = hparameters["stride"]
    22     # Define the dimensions of the output
    23     n_H = int(1 + (n_H_prev - f) / stride)
    24     n_W = int(1 + (n_W_prev - f) / stride)
    25     n_C = n_C_prev
    27     # Initialize output matrix A
    28     A = np.zeros((m, n_H, n_W, n_C))              
    30     ### START CODE HERE ###
    31     for i in range(m):                         # loop over the training examples
    32         for h in range(n_H):                     # loop on the vertical axis of the output volume
    33             for w in range(n_W):                 # loop on the horizontal axis of the output volume
    34                 for c in range (n_C):            # loop over the channels of the output volume
    36                     # Find the corners of the current "slice" (≈4 lines)
    37                     vert_start = h * stride
    38                     vert_end = vert_start + f
    39                     horiz_start = w * stride
    40                     horiz_end = horiz_start + f
    42                     # Use the corners to define the current slice on the ith training example of A_prev, channel c. (≈1 line)
    43                     a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
    45                     # Compute the pooling operation on the slice. Use an if statment to differentiate the modes. Use np.max/np.mean.
    46                     if mode == "max":
    47                         A[i, h, w, c] = np.max(a_prev_slice)
    48                     elif mode == "average":
    49                         A[i, h, w, c] = np.mean(a_prev_slice)   
    50     ### END CODE HERE ###
    52     # Store the input and hparameters in "cache" for pool_backward()
    53     cache = (A_prev, hparameters)   
    54     # Making sure your output shape is correct
    55     assert(A.shape == (m, n_H, n_W, n_C))
    56     return A, cache


    ​ Wc是过滤器,dZhw是卷积层第h行第w列的使用点乘计算后的输出Z的梯度。

    da_perv_pad[vert_start:vert_end,horiz_start:horiz_end,:] += W[:,:,:,c] * dZ[i,h,w,c]


    dW[:,:,:, c] += a_slice * dZ[i , h , w , c]

    db[:,:,:,c] += dZ[ i, h, w, c] 
     1 def conv_backward(dZ, cache):
     2     """
     3     Implement the backward propagation for a convolution function
     5     Arguments:
     6     dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, n_H, n_W, n_C)
     7     cache -- cache of values needed for the conv_backward(), output of conv_forward()
     9     Returns:
    10     dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev),
    11                numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    12     dW -- gradient of the cost with respect to the weights of the conv layer (W)
    13           numpy array of shape (f, f, n_C_prev, n_C)
    14     db -- gradient of the cost with respect to the biases of the conv layer (b)
    15           numpy array of shape (1, 1, 1, n_C)
    16     """   
    17     ### START CODE HERE ###
    18     # Retrieve information from "cache"
    19     (A_prev, W, b, hparameters)=cache
    20     # Retrieve dimensions from A_prev's shape
    21     (m, n_H_prev, n_W_prev, n_C_prev)=A_prev.shape
    22     # Retrieve dimensions from W's shape
    23     (f, f, n_C_prev, n_C)=W.shape
    24     # Retrieve information from "hparameters"
    25     stride=hparameters['stride']
    26     pad=hparameters['pad']
    27     # Retrieve dimensions from dZ's shape
    28     (m, n_H, n_W, n_C)=dZ.shape
    30     # Initialize dA_prev, dW, db with the correct shapes
    31     dA_prev=np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
    32     dW=np.zeros((f, f, n_C_prev, n_C))
    33     db=np.zeros((1, 1, 1, n_C))
    35     # Pad A_prev and dA_prev
    36     A_prev_pad=zero_pad(A_prev,pad)
    37     dA_prev_pad=zero_pad(dA_prev, pad)
    39     for i in range(m):                       # loop over the training examples
    41         # select ith training example from A_prev_pad and dA_prev_pad
    42         a_prev_pad = A_prev_pad[i]
    43         da_prev_pad = dA_prev_pad[i]
    45         for h in range(n_H):                   # loop over vertical axis of the output volume
    46             for w in range(n_W):               # loop over horizontal axis of the output volume
    47                 for c in range(n_C):           # loop over the channels of the output volume
    49                     # Find the corners of the current "slice"
    50                     vert_start = h
    51                     vert_end = vert_start + f
    52                     horiz_start = w
    53                     horiz_end = horiz_start + f
    55                     # Use the corners to define the slice from a_prev_pad
    56                     a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
    57                     # Update gradients for the window and the filter's parameters using the code formulas given above
    58                     da_prev_pad[vert_start:vert_end, horiz_start:horiz_end,:] += W[:,:,:,c] * dZ[i, h, w, c]
    59                     dW[:,:,:,c] += a_slice * dZ[i,h,w,c]
    60                     db[:,:,:,c] += dZ[i,h,w,c]
    62         # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
    63         dA_prev[i,:,:,:] = da_prev_pad[pad:-pad, pad:-pad, :]
    64     ### END CODE HERE ###
    66     # Making sure your output shape is correct
    67     assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))   
    68     return dA_prev, dW, db



    def create_mask_from_window(x):
        Creates a mask from an input matrix x, to identify the max entry of x.
        x -- Array of shape (f, f)
        mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
        ### START CODE HERE ### (≈1 line)
        mask = x == np.max(x)
        ### END CODE HERE ###
        return mask


     1 def distribute_value(dz, shape):
     2     """
     3     Distributes the input value in the matrix of dimension shape
     5     Arguments:
     6     dz -- input scalar
     7     shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz
     9     Returns:
    10     a -- Array of size (n_H, n_W) for which we distributed the value of dz
    11     """
    13     ### START CODE HERE ###
    14     # Retrieve dimensions from shape (≈1 line)
    15     (n_H, n_W) = shape
    17     # Compute the value to distribute on the matrix (≈1 line)
    18     average = dz / (n_H * n_W)
    20     # Create a matrix where every entry is the "average" value (≈1 line)
    21     a = np.ones(shape) * average
    22     ### END CODE HERE ###
    24     return a


     1 def pool_backward(dA, cache, mode = "max"):
     2     """
     3     Implements the backward pass of the pooling layer
     5     Arguments:
     6     dA -- gradient of cost with respect to the output of the pooling layer, same shape as A
     7     cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters 
     8     mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    10     Returns:
    11     dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev
    12     """    
    13     ### START CODE HERE ###    
    14     # Retrieve information from cache (≈1 line)
    15     (A_prev, hparameters) = cache    
    16     # Retrieve hyperparameters from "hparameters" (≈2 lines)
    17     stride = hparameters["stride"]
    18     f = hparameters["f"]    
    19     # Retrieve dimensions from A_prev's shape and dA's shape (≈2 lines)
    20     m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
    21     m, n_H, n_W, n_C = dA.shape
    23     # Initialize dA_prev with zeros (≈1 line)
    24     dA_prev = np.zeros(A_prev.shape)
    26     for i in range(m):                       # loop over the training examples
    27         # select training example from A_prev (≈1 line)
    28         a_prev = A_prev[i]
    29         for h in range(n_H):                   # loop on the vertical axis
    30             for w in range(n_W):               # loop on the horizontal axis
    31                 for c in range(n_C):           # loop over the channels (depth)
    32                     # Find the corners of the current "slice" (≈4 lines)
    33                     vert_start = h
    34                     vert_end = vert_start + f
    35                     horiz_start = w
    36                     horiz_end = horiz_start + f
    38                     # Compute the backward propagation in both modes.
    39                     if mode == "max":
    40                         # Use the corners and "c" to define the current slice from a_prev (≈1 line)
    41                         a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
    42                         # Create the mask from a_prev_slice (≈1 line)
    43                         mask = create_mask_from_window(a_prev_slice)
    44                         # Set dA_prev to be dA_prev + (the mask multiplied by the correct entry of dA) (≈1 line)
    45                         dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
    47                     elif mode == "average":
    48                         # Get the value a from dA (≈1 line)
    49                         da = dA[i, h, w, c]
    50                         # Define the shape of the filter as fxf (≈1 line)
    51                         shape = (f, f)
    52                         # Distribute it to get the correct slice of dA_prev. i.e. Add the distributed value of da. (≈1 line)
    53                         dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
    55     ### END CODE ###
    57     # Making sure your output shape is correct
    58     assert(dA_prev.shape == A_prev.shape) 
    59     return dA_prev
