zoukankan      html  css  js  c++  java
  • CNN1 递归网络

    之前的随笔卷积层和池化层中有一个凌乱的CNN实现,此处做一些修改。

    主要改动:

    1.将ConvLayer和PoolLayer合并。

    在theano的例子(基于theano的深度卷积神经网络)中,这两层共用一套weight和biasis;在caffe中(Caffe学习 一 网络参数和自定义网络),没有设置PoolLayer相应的权重和偏置。

    上面代码中,给PoolLayer添加权重时,只有在loc在1附近,scale较小时才能得到较好的结果。

    我的理解是池化通过采样降低数据量、增加平移不变性的同时保留信息,添加权重后会丢失一部分如max pooling得到的信息。

    2.将不同的层分开。

    在前向和后向传播中,层与层之间方法未变,只是输入输出不一样。

    分开之后,方便配置不同的网络结构。

    根据公式,卷积核在前向和反向传播中需要旋转180度一次。此例中前向不旋转,反向旋转,相当于卷积核是旋转过的weight。(sg.convolve2d自带旋转)

    下一篇中,将采用相反的方式。

    存在的问题:

    1.mini_batch实际上是单个更新的。

    2.卷积速度太慢。

    将在后续修改。

    # coding:utf8
    import cPickle
    import numpy as np
    from  scipy.signal.signaltools import convolve2d
    
    class ConvPoolLayer(object):  # layer init
        def __init__(self, image_shape,filter_shape,poolsize=(2,2)):
            self.filter_shape = filter_shape  # 5, 5, 5, 卷积核5*5,5个
            self.image_shape = image_shape  # 28, 28
            self.w = np.random.normal(loc=0, scale=np.sqrt(1.0/np.prod(filter_shape[1:])),
                                      size=filter_shape)  # 2*2/5/5/5
            self.b = np.random.normal(loc=0, scale=0.2, size=(filter_shape[0],))
            self.samp_shape=(image_shape[0] - filter_shape[1] + 1,image_shape[1] - filter_shape[2] + 1)   # 12*12
            self.poolsize = poolsize  # 2,2
            self.out_shape=(self.samp_shape[0]/poolsize[0],self.samp_shape[1]/poolsize[1])
    
        def conv(self,a, v, full=0):  # valid:0  full:1
            ah, aw = a.shape
            vh, vw = v.shape
            if full:
                temp = np.zeros((ah + 2 * vh - 2, aw + 2 * vw - 2))
                temp[vh - 1:vh - 1 + ah, vw - 1:vw - 1 + aw] = a
                a = temp
                ah, aw = np.shape(a)
            k=np.ones((ah - vh + 1,aw - vw + 1))
            # vt=np.mat(v.flatten()).T
            for i in range(ah - vh + 1):
                for j in range(aw - vw + 1):
                    #k[i, j] = np.dot(a[i:i + vh, j:j + vw].flatten(), vt)
                    k[i, j] = np.sum(np.multiply(a[i:i + vh, j:j + vw], v))
            return k
    
        def feedforward(self, a):  #28*28
            #self.out = [self.relu(self.conv(a, self.rot180(w_))+b_)  for b_,w_ in zip(self.b,self.w)]
            self.out = [self.relu(convolve2d(a, self.rot180(w_),mode='valid')+b_) for b_,w_ in zip(self.b,self.w)]
            return np.array([self.samp(a_) for a_ in self.out])
    
        def backprop(self, x, dnext,eta=0.001):
            if dnext.ndim<3:
                dnext = np.reshape(dnext, (self.filter_shape[0], self.out_shape[0], self.out_shape[1]))  # 5*12*12
            u = self.relu_prime(self.out)  #5*24*24
            delta = [(np.multiply(u_,self.up(d_,2)))
                for u_,d_ in zip(u,dnext)]
            b = np.array([np.sum(d_) for d_ in delta])
            w = [convolve2d(x, d_,mode='valid') for d_ in delta]
            w = np.array([np.rot90(i,2)  for i in w])
            self.w -= eta * w
            self.b -= eta * b
            return delta
    
        def samp(self,a):  # 24*24->12*12
            ah, aw = self.samp_shape  # 24,24
            vh, vw = self.poolsize  # 2,2
            k = [[np.max(a[i*vh:i*vh+vh,j*vw :j*vw+vw]) for j in range(aw / vw)] for i in range(ah / vh)]
            return np.array(k)
    
        def up(self,a,l):
            b=np.ones((l,l))
            return np.kron(a,b)
    
        def relu(self,z):
            return np.maximum(z, 0.0)
    
        def relu_prime(self,z):
            z[z>0]=1
            return z
    
    class SoftmaxLayer(object):
        def __init__(self, in_num=100,out_num=10):
            self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num)
    
        def feedforward(self, input):
            self.out=self.softmax(np.dot(input, self.weights))
            return self.out
    
        def backprop(self, input, y,eta=0.001):
            o = self.out
            delta = o - y
            out_delta = np.dot(delta, self.weights.T)
            w = np.dot(input.T, delta)
            self.weights -= eta * (w)
            return out_delta
    
        def softmax(self,a):
            m = np.exp(a)
            return m / np.sum(m,axis=1)
    
    class FullLayer(object):
        def __init__(self, in_num=720,out_num=100):
            self.in_num=in_num
            self.out_num=out_num
            self.biases = np.random.randn(out_num)
            self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num)
    
        def feedforward(self, x):
            if x.ndim>2:
                x = np.reshape(x, (1, self.in_num))
            self.out = self.sigmoid(np.dot(x, self.weights)+self.biases)
            return self.out
    
        def backprop(self, x,delta,eta=0.001):
            if x.ndim>2:
                x = np.reshape(x, (1, self.in_num))
            sp=self.sigmoid_prime(self.out)
            delta = delta * sp
            out_delta=np.dot(delta,self.weights.T)
            w = np.dot( x.T,delta)
            self.weights-=eta*w
            self.biases -= eta*delta[0]
            return out_delta
    
        def sigmoid(self,z):
            return 1.0/(1.0+np.exp(-z))
    
        def sigmoid_prime(self,z):
            return z*(1-z)
    
    class Network(object):
        def __init__(self, layers):
            self.layers=layers
            self.num_layers = len(layers)
            self.a=[]
    
        def feedforward(self, x):
            self.a.append(x)
            for layer in self.layers:
                x=layer.feedforward(x)
                self.a.append(x)
            return x
    
        def SGD(self, training_data, test_data,epochs, mini_batch_size, eta=0.001):
            self.n = len(training_data[0])
            self.mini_batch_size=mini_batch_size
            self.eta=eta
            cx=range(epochs)
            for j in cx:
                for k in xrange(0, self.n , mini_batch_size):
                    batch_x = training_data[0][k:k + mini_batch_size]
                    batch_y = training_data[1][k:k + mini_batch_size]
                    self.update_mini_batch(batch_x,batch_y)
                    if k%1000==0:
                        print "Epoch {0}:{1}  train: {2}  cost={3}, test: {4}".format(j,k,
                        self.evaluate([training_data[0][:500],training_data[1][:500]]) ,self.cost,
                        self.evaluate([test_data[0],test_data[1]]))
    
        def update_mini_batch(self, batch_x,batch_y):
            for i in range(10):
                self.backprop(batch_x[i], batch_y[i])
    
        def backprop(self, x_in, y):
            self.feedforward(x_in)
            for i in range(self.num_layers):
                delta=self.layers[-i-1].backprop(self.a[-i-2],y,eta=self.eta)
                y=delta
    
        def evaluate(self, test_data):
            x,y=test_data
            x=[self.feedforward(i)[0] for i in x]
            xp = np.argmax(x, axis=1)
            yp= np.argmax(y, axis=1) if y[0].ndim else y
            self.cost = -np.mean(np.log(x)[np.arange(500),yp])
            return np.mean(yp == xp)*100
    
    if __name__ == '__main__':
            def get_data(data):
                return [np.reshape(x, (28,28)) for x in data[0]]
    
            def get_label(i):
                c = np.zeros((10))
                c[i] = 1
                return c
    
            f = open('data/mnist.pkl', 'rb')
            training_data, validation_data, test_data = cPickle.load(f)
            training_inputs = get_data(training_data)
            training_label=[get_label(y_) for y_ in training_data[1]]
            test_inputs = get_data(test_data)
            test = zip(test_inputs,test_data[1])
            net = Network([ConvPoolLayer(image_shape=[28,28],filter_shape=[5,5,5],poolsize=(2,2)),
                           FullLayer(in_num=720,out_num=100),
                           SoftmaxLayer(in_num=100,out_num=10)])
            net.SGD([training_inputs,training_label],[test_inputs[:500],test_data[1][:500]],
                    epochs=10,mini_batch_size=10, eta=0.005)
    
            # Epoch 0:27000  train: 94.6  cost=0.235302322005, test: 94.2
  • 相关阅读:
    JavaScript模态对话框类
    事件模块的演变(1)
    html5中可通过document.head获取head元素
    How to search for just a specific file type in Visual Studio code?
    What do 'lazy' and 'greedy' mean in the context of regular expressions?
    正则非获取匹配 Lookahead and Lookbehind ZeroLength Assertions
    regex length 正则长度问题
    Inversion of Control vs Dependency Injection
    How to return View with QueryString in ASP.NET MVC 2?
    今天才发现Google Reader
  • 原文地址:https://www.cnblogs.com/qw12/p/6363562.html
Copyright © 2011-2022 走看看