zoukankan      html  css  js  c++  java
  • CNN1 递归网络

    之前的随笔卷积层和池化层中有一个凌乱的CNN实现,此处做一些修改。

    主要改动:

    1.将ConvLayer和PoolLayer合并。

    在theano的例子(基于theano的深度卷积神经网络)中,这两层共用一套weight和biasis;在caffe中(Caffe学习 一 网络参数和自定义网络),没有设置PoolLayer相应的权重和偏置。

    上面代码中,给PoolLayer添加权重时,只有在loc在1附近,scale较小时才能得到较好的结果。

    我的理解是池化通过采样降低数据量、增加平移不变性的同时保留信息,添加权重后会丢失一部分如max pooling得到的信息。

    2.将不同的层分开。

    在前向和后向传播中,层与层之间方法未变,只是输入输出不一样。

    分开之后,方便配置不同的网络结构。

    根据公式,卷积核在前向和反向传播中需要旋转180度一次。此例中前向不旋转,反向旋转,相当于卷积核是旋转过的weight。(sg.convolve2d自带旋转)

    下一篇中,将采用相反的方式。

    存在的问题:

    1.mini_batch实际上是单个更新的。

    2.卷积速度太慢。

    将在后续修改。

    # coding:utf8
    import cPickle
    import numpy as np
    from  scipy.signal.signaltools import convolve2d
    
    class ConvPoolLayer(object):  # layer init
        def __init__(self, image_shape,filter_shape,poolsize=(2,2)):
            self.filter_shape = filter_shape  # 5, 5, 5, 卷积核5*5,5个
            self.image_shape = image_shape  # 28, 28
            self.w = np.random.normal(loc=0, scale=np.sqrt(1.0/np.prod(filter_shape[1:])),
                                      size=filter_shape)  # 2*2/5/5/5
            self.b = np.random.normal(loc=0, scale=0.2, size=(filter_shape[0],))
            self.samp_shape=(image_shape[0] - filter_shape[1] + 1,image_shape[1] - filter_shape[2] + 1)   # 12*12
            self.poolsize = poolsize  # 2,2
            self.out_shape=(self.samp_shape[0]/poolsize[0],self.samp_shape[1]/poolsize[1])
    
        def conv(self,a, v, full=0):  # valid:0  full:1
            ah, aw = a.shape
            vh, vw = v.shape
            if full:
                temp = np.zeros((ah + 2 * vh - 2, aw + 2 * vw - 2))
                temp[vh - 1:vh - 1 + ah, vw - 1:vw - 1 + aw] = a
                a = temp
                ah, aw = np.shape(a)
            k=np.ones((ah - vh + 1,aw - vw + 1))
            # vt=np.mat(v.flatten()).T
            for i in range(ah - vh + 1):
                for j in range(aw - vw + 1):
                    #k[i, j] = np.dot(a[i:i + vh, j:j + vw].flatten(), vt)
                    k[i, j] = np.sum(np.multiply(a[i:i + vh, j:j + vw], v))
            return k
    
        def feedforward(self, a):  #28*28
            #self.out = [self.relu(self.conv(a, self.rot180(w_))+b_)  for b_,w_ in zip(self.b,self.w)]
            self.out = [self.relu(convolve2d(a, self.rot180(w_),mode='valid')+b_) for b_,w_ in zip(self.b,self.w)]
            return np.array([self.samp(a_) for a_ in self.out])
    
        def backprop(self, x, dnext,eta=0.001):
            if dnext.ndim<3:
                dnext = np.reshape(dnext, (self.filter_shape[0], self.out_shape[0], self.out_shape[1]))  # 5*12*12
            u = self.relu_prime(self.out)  #5*24*24
            delta = [(np.multiply(u_,self.up(d_,2)))
                for u_,d_ in zip(u,dnext)]
            b = np.array([np.sum(d_) for d_ in delta])
            w = [convolve2d(x, d_,mode='valid') for d_ in delta]
            w = np.array([np.rot90(i,2)  for i in w])
            self.w -= eta * w
            self.b -= eta * b
            return delta
    
        def samp(self,a):  # 24*24->12*12
            ah, aw = self.samp_shape  # 24,24
            vh, vw = self.poolsize  # 2,2
            k = [[np.max(a[i*vh:i*vh+vh,j*vw :j*vw+vw]) for j in range(aw / vw)] for i in range(ah / vh)]
            return np.array(k)
    
        def up(self,a,l):
            b=np.ones((l,l))
            return np.kron(a,b)
    
        def relu(self,z):
            return np.maximum(z, 0.0)
    
        def relu_prime(self,z):
            z[z>0]=1
            return z
    
    class SoftmaxLayer(object):
        def __init__(self, in_num=100,out_num=10):
            self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num)
    
        def feedforward(self, input):
            self.out=self.softmax(np.dot(input, self.weights))
            return self.out
    
        def backprop(self, input, y,eta=0.001):
            o = self.out
            delta = o - y
            out_delta = np.dot(delta, self.weights.T)
            w = np.dot(input.T, delta)
            self.weights -= eta * (w)
            return out_delta
    
        def softmax(self,a):
            m = np.exp(a)
            return m / np.sum(m,axis=1)
    
    class FullLayer(object):
        def __init__(self, in_num=720,out_num=100):
            self.in_num=in_num
            self.out_num=out_num
            self.biases = np.random.randn(out_num)
            self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num)
    
        def feedforward(self, x):
            if x.ndim>2:
                x = np.reshape(x, (1, self.in_num))
            self.out = self.sigmoid(np.dot(x, self.weights)+self.biases)
            return self.out
    
        def backprop(self, x,delta,eta=0.001):
            if x.ndim>2:
                x = np.reshape(x, (1, self.in_num))
            sp=self.sigmoid_prime(self.out)
            delta = delta * sp
            out_delta=np.dot(delta,self.weights.T)
            w = np.dot( x.T,delta)
            self.weights-=eta*w
            self.biases -= eta*delta[0]
            return out_delta
    
        def sigmoid(self,z):
            return 1.0/(1.0+np.exp(-z))
    
        def sigmoid_prime(self,z):
            return z*(1-z)
    
    class Network(object):
        def __init__(self, layers):
            self.layers=layers
            self.num_layers = len(layers)
            self.a=[]
    
        def feedforward(self, x):
            self.a.append(x)
            for layer in self.layers:
                x=layer.feedforward(x)
                self.a.append(x)
            return x
    
        def SGD(self, training_data, test_data,epochs, mini_batch_size, eta=0.001):
            self.n = len(training_data[0])
            self.mini_batch_size=mini_batch_size
            self.eta=eta
            cx=range(epochs)
            for j in cx:
                for k in xrange(0, self.n , mini_batch_size):
                    batch_x = training_data[0][k:k + mini_batch_size]
                    batch_y = training_data[1][k:k + mini_batch_size]
                    self.update_mini_batch(batch_x,batch_y)
                    if k%1000==0:
                        print "Epoch {0}:{1}  train: {2}  cost={3}, test: {4}".format(j,k,
                        self.evaluate([training_data[0][:500],training_data[1][:500]]) ,self.cost,
                        self.evaluate([test_data[0],test_data[1]]))
    
        def update_mini_batch(self, batch_x,batch_y):
            for i in range(10):
                self.backprop(batch_x[i], batch_y[i])
    
        def backprop(self, x_in, y):
            self.feedforward(x_in)
            for i in range(self.num_layers):
                delta=self.layers[-i-1].backprop(self.a[-i-2],y,eta=self.eta)
                y=delta
    
        def evaluate(self, test_data):
            x,y=test_data
            x=[self.feedforward(i)[0] for i in x]
            xp = np.argmax(x, axis=1)
            yp= np.argmax(y, axis=1) if y[0].ndim else y
            self.cost = -np.mean(np.log(x)[np.arange(500),yp])
            return np.mean(yp == xp)*100
    
    if __name__ == '__main__':
            def get_data(data):
                return [np.reshape(x, (28,28)) for x in data[0]]
    
            def get_label(i):
                c = np.zeros((10))
                c[i] = 1
                return c
    
            f = open('data/mnist.pkl', 'rb')
            training_data, validation_data, test_data = cPickle.load(f)
            training_inputs = get_data(training_data)
            training_label=[get_label(y_) for y_ in training_data[1]]
            test_inputs = get_data(test_data)
            test = zip(test_inputs,test_data[1])
            net = Network([ConvPoolLayer(image_shape=[28,28],filter_shape=[5,5,5],poolsize=(2,2)),
                           FullLayer(in_num=720,out_num=100),
                           SoftmaxLayer(in_num=100,out_num=10)])
            net.SGD([training_inputs,training_label],[test_inputs[:500],test_data[1][:500]],
                    epochs=10,mini_batch_size=10, eta=0.005)
    
            # Epoch 0:27000  train: 94.6  cost=0.235302322005, test: 94.2
  • 相关阅读:
    [Angular 2] Handle Reactive Async opreations in Service
    移动应用中的流设计
    Linux/Unix--设备类型
    [CF 276C]Little Girl and Maximum Sum[差分数列]
    Java多态
    Shiro(4)默认鉴权与自定义鉴权
    HDU4667(有错)
    weblogic8.1 登陆5 ip 限制
    [置顶] struts2+hibernate+spring整合(annotation版)
    hdu2159 二维完全背包
  • 原文地址:https://www.cnblogs.com/qw12/p/6363562.html
Copyright © 2011-2022 走看看