zoukankan      html  css  js  c++  java
  • Dualembedded LSTM for QA match: 双embedding的LSTM聊天匹配式模型

    先上模型结构图,

    LSTM模型的话,rnn的一种,用法很常见基本上就是用来做序列模型的encoding,有很多的关于LSTM的paper自行谷歌;

    下面这个模型是我自己试验出来的,效果还不错,可以用来做聊天机器人的深度学习训练,只要有语料库;

    用了embedding + bidirectional LSTM + full connected + max-pooling 这些方法,也不难

    最后还是上一段tensorflow的代码, 模型的实现部分

    def model_imp(ques, ques_len, ans, ans_len, ans_f, ans_f_len, batch_size):
        n_characters = 2
    
        w_embed = tf.get_variable('w_embed', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))
        w_embed_2 = tf.get_variable('w_embed_2', shape = [hparams.vocab_size, n_characters*hparams.embedding_dim], initializer = tf.random_uniform_initializer(-1.0, 1.0))
    
        # 1.2 --- rnn for question ---
        ques_1 = tf.nn.embedding_lookup(w_embed, ques, name = 'ques_1')
        ques_2 = tf.nn.embedding_lookup(w_embed_2, ques, name = 'ques_2')
    
        # 1.2.0 --- calculate the distribution for the question --- 
        with tf.variable_scope('character') as vs_latent_character:
            cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            output, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, tf.concat(2, [ques_1, ques_2]), sequence_length = ques_len, dtype = tf.float32)
            character_information = tf.reduce_max(tf.concat(2, [output[0], output[1]]), 1)
            T = tf.get_variable('T', shape = [hparams.rnn_dim*2, n_characters])
            character_dist = tf.expand_dims(tf.nn.softmax(tf.matmul(character_information, T), -1), 1)
            #character = tf.argmax(tf.matmul(character_information, T), 1)
            #character_dist = tf.expand_dims(tf.one_hot(character, n_characters, on_value = 1.0, off_value = 0.0), 1)
            print character_dist.get_shape()
    
    
    
    
        # 1.2.1 -- Three different ques-ans combinations --- 
        with tf.variable_scope('rnn_ques') as vs_ques:
            cell = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            cell_r = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            output_ques, state_ques = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ques_1, sequence_length = ques_len, dtype = tf.float32)
    
        with tf.variable_scope('rnn_ques2') as vs_ques:
            cell_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            cell_r_2 = tf.nn.rnn_cell.LSTMCell(hparams.rnn_dim,forget_bias = 2.0,use_peepholes=True,state_is_tuple=True)
            output_ques_2, state_ques = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ques_2, sequence_length = ques_len, dtype = tf.float32)
            
        ques_output_1 = tf.reduce_max(tf.concat(2, [output_ques[0], output_ques[1]]), 1)
        ques_output_2 = tf.reduce_max(tf.concat(2, [output_ques_2[0], output_ques_2[1]]), 1)
        ques_output = tf.batch_matmul(character_dist ,tf.pack([ques_output_1, ques_output_2], axis = 1))
        ques_output = tf.squeeze(ques_output, [1])
     
        M = tf.get_variable('M', shape = [hparams.rnn_dim*2, hparams.rnn_dim*2], initializer = tf.random_uniform_initializer(-1.0, 1.0))
        ques_output = tf.matmul(ques_output, M)
    
    
        # 1.3 --- rnn for ans ---
        ans_1 = tf.nn.embedding_lookup(w_embed, ans, name = 'ans')
        ans_f_1 = tf.nn.embedding_lookup(w_embed, ans_f, name = 'ans_f')
        ans_2 = tf.nn.embedding_lookup(w_embed_2, ans, name = 'ans')
        ans_f_2 = tf.nn.embedding_lookup(w_embed_2, ans_f, name = 'ans_f')
        with tf.variable_scope('rnn_ques', reuse=True) as vs_ans:    
            output_1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_1, sequence_length = ans_len,dtype = tf.float32)
            output_f1, state = tf.nn.bidirectional_dynamic_rnn(cell, cell_r, ans_f_1, sequence_length = ans_f_len,dtype = tf.float32)
        with tf.variable_scope('rnn_ques2', reuse=True) as vs_ans:
            output_2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_2, sequence_length = ans_len,dtype = tf.float32)
            output_f2, state = tf.nn.bidirectional_dynamic_rnn(cell_2, cell_r_2, ans_f_2, sequence_length = ans_f_len,dtype = tf.float32)
    
        ans_output_1 = tf.reduce_max(tf.concat(2, [output_1[0], output_1[1]]), 1)
        ans_output_2 = tf.reduce_max(tf.concat(2, [output_2[0], output_2[1]]), 1)
    
        ans_output =tf.batch_matmul(character_dist ,tf.pack([ans_output_1, ans_output_2], axis=1))
        ans_output = tf.squeeze(ans_output, [1])
    
    
        ans_output_f1 = tf.reduce_max(tf.concat(2, [output_f1[0], output_f1[1]]), 1)
        ans_output_f2 = tf.reduce_max(tf.concat(2, [output_f2[0], output_f2[1]]), 1)
    
        ans_output_f =tf.batch_matmul(character_dist ,tf.pack([ans_output_f1, ans_output_f2], axis=1))
        ans_output_f = tf.squeeze(ans_output_f, [1])
    
    
        # 1.4 -----------------    the prediction part ---------------------------
    
        ques_output = tf.nn.l2_normalize(ques_output, 1)
        ans_output = tf.nn.l2_normalize(ans_output, 1)
        ans_output_f = tf.nn.l2_normalize(ans_output_f, 1)
    
        prob = [ques_output, ans_output]
        simi = tf.reduce_sum(tf.mul(ques_output, ans_output), 1)
        simi_f = tf.reduce_sum(tf.mul(ques_output, ans_output_f), 1)
    
        loss = tf.maximum(0.0, 0.25 - simi + simi_f)
    
        loss_ = tf.reduce_mean(loss)
        return prob, loss_
  • 相关阅读:
    清除ListBox的列表项(删除所有项目)
    创建对象
    搜索功能
    为下拉式菜单(DropDownList)添加第一个选项
    事件接口
    用户控件(UserControl) 使用事件 Ver2
    MS SQL动态创建临时表
    炒粉还是煮面
    输入数字动态创建行
    DataList中TextBox onfocus调用后台静态方法
  • 原文地址:https://www.cnblogs.com/LarryGates/p/6559792.html
Copyright © 2011-2022 走看看