zoukankan      html  css  js  c++  java
  • Deep Learning菜鸡篇,我的第一个深度神经网络

      看了一大堆一大堆的框架,作为一个low B , 我还是喜欢从底层实现开始,看了吴恩达Coursera上的视频,同时也在网站上做了一些编程练习,不得不说课程的质量和练习题的质量都是

    杠杠的,很到位,这篇博客算是对第一个课程的总结。

      说是深度的神经网络,也就是层数深一点的全连接网络,只是练练手顺便复习一下课程,我用的样本是自己做的,(30000,400)的训练样本,附件里会上传,正样本是car,200个正样本,

    200个负样本,测试集大小(30000,100),样本的数量神马的选取的都不合理,勿喷,下面上代码

    ++++++++++++++++++++++++++这一部分是需要的函数 relu 和 sigmoid
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import numpy as np

    def sigmoid(Z):
    """
    Implements the sigmoid activation in numpy

    Arguments:
    Z -- numpy array of any shape

    Returns:
    A -- output of sigmoid(z), same shape as Z
    cache -- returns Z as well, useful during backpropagation
    """

    A = 1 / (1 + np.exp(-Z))
    cache = Z

    return A, cache

    def relu(Z):
    """
    Implement the RELU function.

    Arguments:
    Z -- Output of the linear layer, of any shape

    Returns:
    A -- Post-activation parameter, of the same shape as Z
    cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
    """

    A = np.maximum(0, Z)

    assert (A.shape == Z.shape)

    cache = Z
    return A, cache

    # def sigmoid_backprob(dA, cache):
    # """
    # :param dA:
    # :param cache:
    # :return:
    # """
    # Z = cache
    # tmp , _ = sigmoid(Z)
    # dZ = dA * tmp * (1 - tmp)
    # return dZ

    def sigmoid_backprob(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """

    Z = cache

    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)

    assert (dZ.shape == Z.shape)

    return dZ

    def relu_backprob(dA, cache):
    Z = cache
    Daz = Z > 0
    dZ = dA * Daz
    return dZ


    # def relu_backprob(dA, cache):
    # """
    # Implement the backward propagation for a single RELU unit.
    #
    # Arguments:
    # dA -- post-activation gradient, of any shape
    # cache -- 'Z' where we store for computing backward propagation efficiently
    #
    # Returns:
    # dZ -- Gradient of the cost with respect to Z
    # """
    #
    # Z = cache.reshape(dA.shape)
    # dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    #
    # # When z <= 0, you should set dz to 0 as well.
    # dZ[Z <= 0] = 0
    #
    # assert (dZ.shape == Z.shape)
    #
    # return dZ

    ++++++++++++++++++++++++++这一部分是样本的读取++++++++++++++++++++++++++++++++++++++

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import numpy as np

    def load_data_set():
    train_set = np.load('train_set.npy')
    train_label = np.load('train_label.npy')
    test_set = np.load('test_set.npy')
    test_label = np.load('test_label.npy')

    return train_set, train_label, test_set, test_label


    +++++++++++++++++++++++网络+++++++++++++++++++++++++++++
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import numpy as np
    from activation_function import sigmoid, relu, sigmoid_backprob, relu_backprob
    from read_data import load_data_set
    train_set,train_label,test_set, test_label = load_data_set()

    def init_parameters_deep(layer_items):
    paramters= {}
    L= len(layer_items)
    for l in range(1,L):
    W = np.random.randn(layer_items[l], layer_items[l-1])*0.01
    b = np.zeros((layer_items[l], 1))
    paramters['W'+str(l)]= W
    paramters['b'+str(l)]= b

    return paramters

    def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

    def linear_activation_forward(A_prev, W, b, activation):

    Z, linear_cache = linear_forward(A_prev, W, b)
    if activation == 'sigmoid':
    A, activation_cache = sigmoid(Z)

    elif activation == 'relu':
    A, activation_cache = relu(Z)

    cache = (linear_cache, activation_cache)
    return A, cache

    def L_forward_model(X,parameters):
    caches = []
    L = len(parameters) // 2
    A = X
    for l in range(1, L):
    A_prev = A
    A, cache = linear_activation_forward(A_prev, parameters['W'+str(l)], parameters['b'+str(l)], activation= 'relu')
    caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W'+str(L)], parameters['b'+str(L)], activation= 'sigmoid')
    caches.append(cache)

    return AL, caches

    def compute_cost(AL, Y,):
    m = AL.shape[1]
    cost = -1/m *(Y* np.log(AL) + (1 - Y)* np.log(1- AL)).sum()
    cost = np.squeeze(cost)
    assert (cost.shape == ())
    return cost

    def linear_back(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

    def linear_activation_back(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == 'sigmoid':
    dZ = sigmoid_backprob(dA, activation_cache)
    dA_prev, dW, db = linear_back(dZ, linear_cache)

    elif activation == 'relu':
    dZ = relu_backprob(dA, activation_cache)
    dA_prev, dW, db = linear_back(dZ, linear_cache)

    return dA_prev, dW, db

    def L_backprob_model(AL, Y, caches):
    grads = {}
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    L = len(caches)
    cache = caches[L-1]
    grads['dA'+str(L-1)] , grads['dW'+str(L)], grads['db'+str(L)] = linear_activation_back(dAL, cache, activation= 'sigmoid')

    for l in reversed(range(L- 1)):
    cache = caches[l]
    grads['dA' + str(l)], grads['dW' + str(l+1)], grads['db' + str(l+1)] = linear_activation_back(dAL, cache,activation='relu')

    return grads

    def update_parameters(parameters, grads, learning_rate = 0.01):
    L = len(parameters) // 2
    for l in range(1,L+1):
    parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)]
    parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)]
    return parameters

    def L_nn_deep(layer_items, X, Y, num_iter, learning_rate,print_cost = False):
    costs = []
    parameters = init_parameters_deep(layer_items)

    for i in range(num_iter):
    AL, caches = L_forward_model(X, parameters)
    cost = compute_cost(AL, Y)
    grads = L_backprob_model(AL, Y, caches)
    parameters = update_parameters(parameters ,grads, learning_rate)
    if i%100 == 0:
    costs.append(cost)
    if print_cost:
    print('第%d次迭代,cost:%f'%(i, cost))

    return parameters

    layers = [30000, 10000, 5000, 1000, 100,1]

    p = L_nn_deep(layers, train_set, train_label, 2000, 0.01, True)

    我这个参数神马的,还有数据集都坑的要死,反正我的内存被直接刷爆了,,懒得改了




  • 相关阅读:
    frame、window和dialog区别
    wxWidgets窗口类型
    C++中类与结构体的区别
    c++中explicit关键字用法
    解决error C2011: 'fd_set' : 'struct' type redefinition的方法
    jrtplib源码分析 第一篇 jthread的编译与分析
    详解大端模式和小端模式
    C++——重点复习
    Linux组件封装(九)——EchoLib的一些简单综合
    Linux组件封装(八)——Socket的封装
  • 原文地址:https://www.cnblogs.com/zxxian/p/7857467.html
Copyright © 2011-2022 走看看