zoukankan      html  css  js  c++  java
  • 基于theano的深度卷积神经网络

    使用了两个卷积层、一个全连接层和一个softmax分类器。

    在测试数据集上正确率可以达到99.22%。

    代码参考了neural-networks-and-deep-learning

      1 #coding:utf8
      2 import cPickle
      3 import numpy as np
      4 import theano
      5 import theano.tensor as T
      6 from theano.tensor.nnet import conv
      7 from theano.tensor.nnet import softmax
      8 from theano.tensor import shared_randomstreams
      9 from theano.tensor.signal import downsample
     10 def ReLU(z): return T.maximum(0.0, z)
     11 from theano.tensor.nnet import sigmoid
     12 
     13 def load_data_shared():
     14     f = open('mnist.pkl', 'rb')
     15     training_data, validation_data, test_data = cPickle.load(f)
     16     f.close()
     17     def shared(data):
     18         shared_x = theano.shared(
     19             np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
     20         shared_y = theano.shared(
     21             np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
     22         return shared_x, T.cast(shared_y, "int32")
     23     return [shared(training_data), shared(validation_data), shared(test_data)]
     24 
     25 class Network(object):
     26     def __init__(self, layers, mini_batch_size):
     27         self.layers = layers
     28         self.mini_batch_size = mini_batch_size
     29         self.params = [param for layer in self.layers for param in layer.params]  # w,b
     30         self.x = T.matrix("x")
     31         self.y = T.ivector("y")  # 1 dimensional
     32         init_layer = self.layers[0]
     33         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
     34         for j in xrange(1, len(self.layers)):
     35             prev_layer, layer  = self.layers[j-1], self.layers[j]  # layer[j-1]->j
     36             layer.set_inpt(
     37                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
     38         self.output = self.layers[-1].output
     39         self.output_dropout = self.layers[-1].output_dropout
     40 
     41     def SGD(self, training_data, epochs, mini_batch_size, eta,
     42             validation_data, test_data, lmbda=0.0):
     43         training_x, training_y = training_data
     44         validation_x, validation_y = validation_data
     45         test_x, test_y = test_data
     46         num_training_batches = size(training_data)/mini_batch_size
     47         num_validation_batches = size(validation_data)/mini_batch_size
     48         num_test_batches = size(test_data)/mini_batch_size
     49         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
     50         cost = self.layers[-1].cost(self)+
     51                0.5*lmbda*l2_norm_squared/num_training_batches
     52         grads = T.grad(cost, self.params)  # 根据cost计算梯度,无需prime函数
     53         updates = [(param, param-eta*grad)
     54                    for param, grad in zip(self.params, grads)]
     55 
     56         i = T.lscalar() # mini-batch index
     57         train_mb = theano.function(
     58             [i], cost, updates=updates,
     59             givens={
     60                 self.x:
     61                 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
     62                 self.y:
     63                 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
     64             })
     65         validate_mb_accuracy = theano.function(
     66             [i], self.layers[-1].accuracy(self.y),
     67             givens={
     68                 self.x:
     69                 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
     70                 self.y:
     71                 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
     72             })
     73         test_mb_accuracy = theano.function(
     74             [i], self.layers[-1].accuracy(self.y),
     75             givens={
     76                 self.x:
     77                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
     78                 self.y:
     79                 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
     80             })
     81         self.test_mb_predictions = theano.function(
     82             [i], self.layers[-1].y_out,
     83             givens={
     84                 self.x:
     85                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
     86             })
     87 
     88         best_validation_accuracy = 0.0
     89         for epoch in xrange(epochs):
     90             for minibatch_index in xrange(num_training_batches):
     91                 iteration = num_training_batches*epoch+minibatch_index
     92                 if iteration % 1000 == 0:
     93                     print("Training mini-batch number {0}".format(iteration))
     94                 cost_ij = train_mb(minibatch_index)
     95                 if (iteration+1) % num_training_batches == 0:
     96                     validation_accuracy = np.mean(
     97                         [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
     98                     print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(
     99                         epoch, validation_accuracy,cost_ij))
    100                     if validation_accuracy >= best_validation_accuracy:
    101                         print("This is the best validation accuracy to date.")
    102                         best_validation_accuracy = validation_accuracy
    103                         best_iteration = iteration
    104                         if test_data:
    105                             test_accuracy = np.mean(
    106                                 [test_mb_accuracy(j) for j in xrange(num_test_batches)])
    107                             print('The corresponding test accuracy is {0:.2%}'.format(
    108                                 test_accuracy))
    109         print("Finished training network.")
    110         print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
    111             best_validation_accuracy, best_iteration))
    112         print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
    113 
    114 
    115 class ConvPoolLayer(object):  # layer init
    116     def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
    117                  activation_fn=ReLU):
    118         self.filter_shape = filter_shape  # 20, 1, 5, 5, 输入个数1, 卷积核5*5,20个
    119         self.image_shape = image_shape  # 10, 1, 28, 28, 1与上面一致
    120         self.poolsize = poolsize  # 2,2
    121         self.activation_fn=activation_fn  # theano.tensor.nnet.sigmoid
    122         n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))  # 20*5*5/2/2=125
    123         self.w = theano.shared(  # 20, 1, 5, 5
    124             np.asarray(
    125                 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
    126                 dtype=theano.config.floatX),
    127             borrow=True)
    128         self.b = theano.shared(  # 20
    129             np.asarray(
    130                 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
    131                 dtype=theano.config.floatX),
    132             borrow=True)
    133         self.params = [self.w, self.b]
    134 
    135     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
    136         self.inpt = inpt.reshape(self.image_shape)    # 10, 1, 28, 28
    137         conv_out = conv.conv2d(  # 28-5+1=24   1, 20, 24, 24
    138             input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
    139             image_shape=self.image_shape)
    140         pooled_out = downsample.max_pool_2d(  # 24/2=12   1, 20, 12, 12
    141             input=conv_out, ds=self.poolsize, ignore_border=True)
    142         self.output = self.activation_fn(  # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12
    143             pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))  # 1, 20, 1, 1
    144         self.output_dropout = self.output  # no dropout in the convolutional layers
    145 
    146 class FullyConnectedLayer(object):
    147     def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
    148         self.n_in = n_in
    149         self.n_out = n_out
    150         self.activation_fn = activation_fn
    151         self.p_dropout = p_dropout
    152         self.w = theano.shared(
    153             np.asarray(
    154                 np.random.normal(
    155                     loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
    156                 dtype=theano.config.floatX),
    157             name='w', borrow=True)
    158         self.b = theano.shared(
    159             np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
    160                        dtype=theano.config.floatX),
    161             name='b', borrow=True)
    162         self.params = [self.w, self.b]
    163 
    164     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
    165         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
    166         self.output = self.activation_fn(
    167             (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
    168         self.y_out = T.argmax(self.output, axis=1)
    169         self.inpt_dropout = dropout_layer(
    170             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
    171         self.output_dropout = self.activation_fn(
    172             T.dot(self.inpt_dropout, self.w) + self.b)
    173 
    174     def accuracy(self, y):
    175         return T.mean(T.eq(y, self.y_out))
    176 
    177 class SoftmaxLayer(object):
    178 
    179     def __init__(self, n_in, n_out, p_dropout=0.0):
    180         self.n_in = n_in
    181         self.n_out = n_out
    182         self.p_dropout = p_dropout
    183         self.w = theano.shared(
    184             np.zeros((n_in, n_out), dtype=theano.config.floatX),
    185             name='w', borrow=True)
    186         self.b = theano.shared(
    187             np.zeros((n_out,), dtype=theano.config.floatX),
    188             name='b', borrow=True)
    189         self.params = [self.w, self.b]
    190 
    191     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
    192         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
    193         self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)  # theano.tensor.nnet.softmax
    194         self.y_out = T.argmax(self.output, axis=1)
    195         self.inpt_dropout = dropout_layer(
    196             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
    197         self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
    198 
    199     def cost(self, net):
    200         return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
    201 
    202     def accuracy(self, y):
    203         return T.mean(T.eq(y, self.y_out))
    204 
    205 
    206 def size(data):  # for shared data
    207     return len(data[0].get_value())
    208 
    209 
    210 def dropout_layer(layer, p_dropout):  # 随机无视p_dropout的隐含层节点
    211     srng = shared_randomstreams.RandomStreams(
    212         np.random.RandomState(0).randint(999999))
    213     mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
    214     return layer*T.cast(mask, theano.config.floatX)
    215 
    216 
    217 if __name__ =='__main__':
    218     training_data, validation_data, test_data = load_data_shared()
    219     mini_batch_size = 10
    220     net = Network([
    221         ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
    222                       filter_shape=(20, 1, 5, 5),
    223                       poolsize=(2, 2)),
    224         ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
    225                       filter_shape=(40, 20, 5, 5),
    226                       poolsize=(2, 2)),
    227         FullyConnectedLayer(n_in=40*4*4, n_out=100),
    228         SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
    229     net.SGD(training_data, 30, mini_batch_size, 0.1,
    230             validation_data, test_data)
    231 
    232 # Sigmoid ConvPoolLayer
    233 # Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05
    234 # This is the best validation accuracy to date.
    235 # The corresponding test accuracy is 98.86%
    236 
    237 # ReLU ConvPoolLayer
    238 # Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06
    239 # This is the best validation accuracy to date.
    240 # The corresponding test accuracy is 99.22%
  • 相关阅读:
    activity
    笔记
    创建敌人基类
    让精灵改变方向并前进
    给敌人精灵创建帧动画
    每帧创建一个item
    lua -- 所有UI组件的基类
    lua -- 系统提示框
    lua -- 生成协议
    ES6,数组遍历
  • 原文地址:https://www.cnblogs.com/qw12/p/6123707.html
Copyright © 2011-2022 走看看