zoukankan      html  css  js  c++  java
  • tensorflow bilstm官方示例

      1 '''
      2 A Bidirectional Recurrent Neural Network (LSTM) implementation example using TensorFlow library.
      3 This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)
      4 Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
      5 
      6 Author: Aymeric Damien
      7 Project: https://github.com/aymericdamien/TensorFlow-Examples/
      8 '''
      9 
     10 from __future__ import print_function
     11 
     12 import tensorflow as tf
     13 from tensorflow.contrib import rnn
     14 import numpy as np
     15 
     16 # Import MNIST data
     17 from tensorflow.examples.tutorials.mnist import input_data
     18 mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
     19 
     20 '''
     21 To classify images using a bidirectional recurrent neural network, we consider
     22 every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
     23 we will then handle 28 sequences of 28 steps for every sample.
     24 '''
     25 
     26 # Parameters
     27 learning_rate = 0.001
     28 
     29 # 可以理解为,训练时总共用的样本数
     30 training_iters = 100000
     31 
     32 # 每次训练的样本大小
     33 batch_size = 128
     34 
     35 # 这个是用来显示的。
     36 display_step = 10
     37 
     38 # Network Parameters
     39 # n_steps*n_input其实就是那张图 把每一行拆到每个time step上。
     40 n_input = 28 # MNIST data input (img shape: 28*28)
     41 n_steps = 28 # timesteps
     42 
     43 # 隐藏层大小
     44 n_hidden = 128 # hidden layer num of features
     45 n_classes = 10 # MNIST total classes (0-9 digits)
     46 
     47 # tf Graph input
     48 # [None, n_steps, n_input]这个None表示这一维不确定大小
     49 x = tf.placeholder("float", [None, n_steps, n_input])
     50 y = tf.placeholder("float", [None, n_classes])
     51 
     52 # Define weights
     53 weights = {
     54     # Hidden layer weights => 2*n_hidden because of forward + backward cells
     55     'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
     56 }
     57 biases = {
     58     'out': tf.Variable(tf.random_normal([n_classes]))
     59 }
     60 
     61 
     62 def BiRNN(x, weights, biases):
     63 
     64     # Prepare data shape to match `bidirectional_rnn` function requirements
     65     # Current data input shape: (batch_size, n_steps, n_input)
     66     # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
     67 
     68     # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
     69     # 变成了n_steps*(batch_size, n_input)
     70     x = tf.unstack(x, n_steps, 1)
     71 
     72     # Define lstm cells with tensorflow
     73     # Forward direction cell
     74     lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
     75     # Backward direction cell
     76     lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
     77 
     78     # Get lstm cell output
     79     try:
     80         outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
     81                                               dtype=tf.float32)
     82     except Exception: # Old TensorFlow version only returns outputs not states
     83         outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
     84                                         dtype=tf.float32)
     85 
     86     # Linear activation, using rnn inner loop last output
     87     return tf.matmul(outputs[-1], weights['out']) + biases['out']
     88 
     89 pred = BiRNN(x, weights, biases)
     90 
     91 # Define loss and optimizer
     92 # softmax_cross_entropy_with_logits:Measures the probability error in discrete classification tasks in which the classes are mutually exclusive
     93 # return a 1-D Tensor of length batch_size of the same type as logits with the softmax cross entropy loss.
     94 # reduce_mean就是对所有数值(这里没有指定哪一维)求均值。
     95 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
     96 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
     97 
     98 # Evaluate model
     99 correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
    100 accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    101 
    102 # Initializing the variables
    103 init = tf.global_variables_initializer()
    104 
    105 # Launch the graph
    106 with tf.Session() as sess:
    107     sess.run(init)
    108     step = 1
    109     # Keep training until reach max iterations
    110     while step * batch_size < training_iters:
    111         batch_x, batch_y = mnist.train.next_batch(batch_size)
    112         # Reshape data to get 28 seq of 28 elements
    113         batch_x = batch_x.reshape((batch_size, n_steps, n_input))
    114         # Run optimization op (backprop)
    115         sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
    116         if step % display_step == 0:
    117             # Calculate batch accuracy
    118             acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
    119             # Calculate batch loss
    120             loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
    121             print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + 
    122                   "{:.6f}".format(loss) + ", Training Accuracy= " + 
    123                   "{:.5f}".format(acc))
    124         step += 1
    125     print("Optimization Finished!")
    126 
    127     # Calculate accuracy for 128 mnist test images
    128     test_len = 128
    129     test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    130     test_label = mnist.test.labels[:test_len]
    131     print("Testing Accuracy:", 
    132         sess.run(accuracy, feed_dict={x: test_data, y: test_label}))

    官方关于bilstm的例子写的很清楚了。因为是第一次看,还是要查许多东西。尤其是数据处理方面。

    数据的处理(https://segmentfault.com/a/1190000008793389)

    拼接

    t1 = [[1, 2, 3], [4, 5, 6]]
    t2 = [[7, 8, 9], [10, 11, 12]]
    tf.concat([t1, t2], 0) ==> [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
    tf.concat([t1, t2], 1) ==> [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
    tf.stack([t1, t2], 0)  ==> [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
    tf.stack([t1, t2], 1)  ==> [[[1, 2, 3], [7, 8, 9]], [[4, 5, 6], [10, 11, 12]]]
    tf.stack([t1, t2], 2)  ==> [[[1, 7], [2, 8], [3, 9]], [[4, 10], [5, 11], [6, 12]]]

    从shape的角度看:

    t1 = [[1, 2, 3], [4, 5, 6]]
    t2 = [[7, 8, 9], [10, 11, 12]]
    tf.concat([t1, t2], 0)  # [2,3] + [2,3] ==> [4, 3]
    tf.concat([t1, t2], 1)  # [2,3] + [2,3] ==> [2, 6]
    tf.stack([t1, t2], 0)   # [2,3] + [2,3] ==> [2*,2,3]
    tf.stack([t1, t2], 1)   # [2,3] + [2,3] ==> [2,2*,3]
    tf.stack([t1, t2], 2)   # [2,3] + [2,3] ==> [2,3,2*]

    抽取:

    input = [[[1, 1, 1], [2, 2, 2]],
             [[3, 3, 3], [4, 4, 4]],
             [[5, 5, 5], [6, 6, 6]]]
    tf.slice(input, [1, 0, 0], [1, 1, 3]) ==> [[[3, 3, 3]]]
    tf.slice(input, [1, 0, 0], [1, 2, 3]) ==> [[[3, 3, 3],
                                                [4, 4, 4]]]
    tf.slice(input, [1, 0, 0], [2, 1, 3]) ==> [[[3, 3, 3]],
                                               [[5, 5, 5]]]
                                               
    tf.gather(input, [0, 2]) ==> [[[1, 1, 1], [2, 2, 2]],
                                  [[5, 5, 5], [6, 6, 6]]]
  • 相关阅读:
    eclipse安装Aptana 插件,支持Javascript
    C++字符串转换成uint64类型
    C语言字节对齐
    Windows版本Traceroute
    ubuntu下使用FireBug调试Javascript脚本
    TCP拥塞控制图
    nodejs点滴
    你应该知道的16个Linux服务器监控命令
    C语言运算符(转载)
    常用正则表达式
  • 原文地址:https://www.cnblogs.com/linyx/p/6979119.html
Copyright © 2011-2022 走看看