zoukankan      html  css  js  c++  java
  • 跟我学算法-match-LSTM(向唐老师看齐)

    对于match-lstm,将hi文本与输出的match-lstm(由si,hi,qi)组合重新输入到LSTM网络中,以端对端的操作理念。

    参考的博客:https://blog.csdn.net/laddie132/article/details/79159895  #MATCH-LSTM原理

                          https://blog.csdn.net/jdbc/article/details/80755576          # 将SQUAD数据集转换为id

                          https://blog.csdn.net/xbinworld/article/details/54607525   # 注意机制模型

                         https://blog.csdn.net/appleml/article/details/76607980      #point-net模型

    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-
    
    import tensorflow as tf
    import numpy as np
    import tensorflow.contrib as contrib
    
    # from app.decorator import exe_time
    
    
    class MatchLstm:
        # @exe_time
        def __init__(self, vocab_size, sentence_size, embedding_size,
                     word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
                     session=tf.Session(), num_class=3,
                     window_size=4, name='MatchLstm', initial_lr=0.001):
            # 字典的大小
            self._vocab_size = vocab_size
            # 句子的大小
            self._sentence_size = sentence_size
            # 隐含层的大小
            self._embedding_size = embedding_size
            # 用于构造向量
            self._we = word_embedding
            # 初始化
            self._initializer = initializer
            # 名字
            self._name = name
            # 输出种类
            self._num_class = num_class
            self._sess = session
            # 窗口的大小
            self._window_size = window_size
            # 学习率
            self._initial_lr = initial_lr
            # 编码原文和上下文的信息
            self._build_inputs_and_vars()
            # 构造模型的结构
            self._inference()
            # 初始化
            self._initial_optimizer()
    
        def _build_inputs_and_vars(self):
            # 文章的内容
            self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                           name='premises')
            # 问题
            self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                             name='hypotheses')
            # 标签
            self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
                                         name='labels')
            # 根据输入的大小来获得样本的大小
            self._batch_size = tf.shape(self.premises)[0]
            # 初始化学习率
            self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
                                      initializer=tf.constant_initializer(self._initial_lr), name='lr')
            # 初始化new_lr
            self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
                                         name='new_lr')
            # 将self.new_lr 赋值给self.lr
            self.lr_update_op = tf.assign(self.lr, self.new_lr)
    
            with tf.variable_scope(self._name):
                # self._word_embedding用于进行单词向量化操作
                self._word_embedding = tf.get_variable(name='word_embedding',
                                                       shape=[self._vocab_size, self._embedding_size],
                                                       initializer=tf.constant_initializer(self._we),
                                                       trainable=False)
            # 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
            self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
            # 对问题进行向量化操作
            self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)
    
        def _inference(self):
            with tf.variable_scope('{}_lstm_s'.format(self._name)):
                # 对原文进行了一次LSTM操作
                lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
                pre_length = self._length(self.premises)
                h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
                                           dtype=tf.float32)
                self.h_s = h_s
    
            with tf.variable_scope('{}_lstm_t'.format(self._name)):
                # 对问题进行了一次LSTM操作
                lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
                hyp_length = self._length(self.hypotheses)
                h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
                                           dtype=tf.float32)
                self.h_t = h_t
                # 构造一个lstm网络
            self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
                                                    forget_bias=0.0)
            # 构造一个可以变化的向量矩阵
            h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
    
            i = tf.constant(0)
            # while_loop,cond作为条件,body做为操作过程
            c = lambda x, y: tf.less(x, self._batch_size)
            b = lambda x, y: self._match_sent(x, y)
            res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
            # LSTM的输出结果
            self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
            # 进行一次全连接操作,使得最后的输出结果是一维的
            with tf.variable_scope('{}_fully_connect'.format(self._name)):
                w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
                                       initializer=self._initializer, name='w_fc')
                b_fc = tf.get_variable(shape=[self._num_class],
                                       initializer=self._initializer, name='b_fc')
                self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
            # softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
                                                                    logits=self.logits,
                                                                    name='cross_entropy')
            # 把batch_size的样本的损失函数进行加和
            cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
            # 加和以后相除,求损失的平均值
            self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
            # argmax,求出每个样本中最大的概率值
            self.predict_op = tf.arg_max(self.logits, dimension=1)
    
        def _match_sent(self, i, h_m_arr):
            # 对每一个句子进行操作
            h_s_i = self.h_s[i]
            h_t_i = self.h_t[i]
            # 输入句子的长度
            length_s_i = self._length(self.premises[i])
            length_t_i = self._length(self.hypotheses[i])
    
            state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)
    
            k = tf.constant(0)
            c = lambda a, x, y, z, s: tf.less(a, length_t_i)
            b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
            res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
            # 只获取最后一次的输出结果
            final_state_h = res[-1].h
            # 将其写入到h_m_arr文件中
            h_m_arr = h_m_arr.write(i, final_state_h)
    
            i = tf.add(i, 1)
            return i, h_m_arr
    
        def _match_attention(self, k, h_s, h_t, length_s, state):
    
            h_t_k = tf.reshape(h_t[k], [1, -1])
            h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])
    
            with tf.variable_scope('{}_attention_w'.format(self._name)):
                w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_s')
                w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_t')
                w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_m')
                w_e = tf.get_variable(shape=[self._embedding_size, 1],
                                      initializer=self._initializer, name='w_e')
    
            last_m_h = state.h
            # sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
            sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
            # 经过一个激活层然后再与w_e进行相乘
            e_kj = tf.matmul(tf.tanh(sum_h), w_e)
            # 求得ai,j
            a_kj = tf.nn.softmax(e_kj)
            # 进行原文的参数加权
            alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)
    
            alpha_k.set_shape([1, self._embedding_size])
            # 将context与即将输入的h_t_k组合输入到下一次的LSTM中
            m_k = tf.concat([alpha_k, h_t_k], axis=1)
            #
            with tf.variable_scope('{}_lstm_m'.format(self._name)):
                # 输入到LSTM重新进行计算
                # state表示的是si
                _, new_state = self.lstm_m(inputs=m_k, state=state)
    
            k = tf.add(k, 1)
            return k, h_s, h_t, length_s, new_state
    
        def _embed_inputs(self, inputs, embeddings):
            ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
            i = tf.constant(0)
            # tf.less 当x大于self._batch_size时返回为假
            c = lambda x, y, z, n: tf.less(x, self._batch_size)
            b = lambda x, y, z, n: self._embed_line(x, y, z, n)
            # cond为条件,body为内容
            res = tf.while_loop(cond=c, body=b,
                                loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
            ndim0_tensor = res[-1].stack()
            ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
            return ndim0_tensor
    
        def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
            ndim1_list = []
            # 对输入的每一句话进行操作
            for j in range(self._sentence_size):
                # 输入的第一个字符串
                word = inputs[i][j]
                unk_word = tf.constant(-1)
                # tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
                f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
                # 如果没有的话使用0向量代替
                f2 = lambda: tf.zeros(shape=[self._embedding_size])
                # 如果wordunk与word不相等,执行f1,否者执行f2
                res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
                # 添加到ndim1_list 向量中
                ndim1_list.append(res_tensor)
            for j in range(self._sentence_size):
                word = inputs[i][j]
                unk_word = tf.constant(-1)
                # 如果word等于-1代表了提取答案上下文的内容
                f1 = lambda: self._ave_vec(ndim1_list, j)
                f2 = lambda: ndim1_list[j]
                ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
                                        default=f1)
            # tf.stack是一个函数拼接
            ndim1_tensor = tf.stack(ndim1_list)
            ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
            i = tf.add(i, 1)
            return i, inputs, embeddings, ndim0_tensor_arr
    
        def _ave_vec(self, embed_list, cur_pos):
            """
            生词的词向量为词窗口的词向量平均值
            :param embed_list:
            :param cur_pos:
            :return:
            """
            # 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
            left_pos = max(0, cur_pos - self._window_size)
            right_pos = min(cur_pos + self._window_size, self._sentence_size)
            # 获得上下文的词向量
            e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
            # tf.stack合并词向量
            e_tensor = tf.stack(e_list)
            # 对上下文的内容使用reduce_mean来替代原来的位置的信息
            ave_tensor = tf.reduce_mean(e_tensor, axis=0)
            return ave_tensor
    
        @staticmethod
        def _length(sequence):
            mask = tf.sign(tf.abs(sequence))
            length = tf.reduce_sum(mask, axis=-1)
            return length
    
        def _initial_optimizer(self):
            with tf.variable_scope('{}_step'.format(self._name)):
                # 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
                self.global_step = tf.get_variable(shape=[],
                                                   initializer=tf.constant_initializer(0),
                                                   dtype=tf.int32,
                                                   name='global_step')
            # 根据动量平均跟新参数
            self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
            # 缩小loss
            self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)
    
    
    if __name__ == '__main__':
        with tf.Session() as sess:
            # embedding需要翻译的句子
            embedding = np.random.randn(4, 6)
            embedding[0] = 0.0
            model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
                              word_embedding=embedding, session=sess)
            model.batch_size = 1
            sent1 = [[3, -1, 2, 1, 0],
                     [4, 5, 1, 0, 0],
                     [2, 1, 0, 0, 0]]
    
            sent2 = [[2, 1, 0, 0, 0],
                     [3, -1, 2, 1, 0],
                     [4, 5, 1, 0, 0]]
    
            labels = [[1, 0, 0],
                      [0, 1, 0],
                      [0, 0, 1]]
    
            sess.run(tf.global_variables_initializer())
            # 迭代优化
            for temp in range(300):
                loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
                                         feed_dict={model.premises: sent1, model.hypotheses: sent2,
                                                    model.labels: labels, model.lr: 0.001})
                print(step, loss)
                sent1, sent2 = sent2, sent1
    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-

    import tensorflow as tf
    import numpy as np
    import tensorflow.contrib as contrib

    # from app.decorator import exe_time


    class MatchLstm:
    # @exe_time
    def __init__(self, vocab_size, sentence_size, embedding_size,
    word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
    session=tf.Session(), num_class=3,
    window_size=4, name='MatchLstm', initial_lr=0.001):
    # 字典的大小
    self._vocab_size = vocab_size
    # 句子的大小
    self._sentence_size = sentence_size
    # 隐含层的大小
    self._embedding_size = embedding_size
    # 用于构造向量
    self._we = word_embedding
    # 初始化
    self._initializer = initializer
    # 名字
    self._name = name
    # 输出种类
    self._num_class = num_class
    self._sess = session
    # 窗口的大小
    self._window_size = window_size
    # 学习率
    self._initial_lr = initial_lr
    # 编码原文和上下文的信息
    self._build_inputs_and_vars()
    # 构造模型的结构
    self._inference()
    # 初始化
    self._initial_optimizer()

    def _build_inputs_and_vars(self):
    # 文章的内容
    self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
    name='premises')
    # 问题
    self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
    name='hypotheses')
    # 标签
    self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
    name='labels')
    # 根据输入的大小来获得样本的大小
    self._batch_size = tf.shape(self.premises)[0]
    # 初始化学习率
    self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
    initializer=tf.constant_initializer(self._initial_lr), name='lr')
    # 初始化new_lr
    self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
    name='new_lr')
    # 将self.new_lr 赋值给self.lr
    self.lr_update_op = tf.assign(self.lr, self.new_lr)

    with tf.variable_scope(self._name):
    # self._word_embedding用于进行单词向量化操作
    self._word_embedding = tf.get_variable(name='word_embedding',
    shape=[self._vocab_size, self._embedding_size],
    initializer=tf.constant_initializer(self._we),
    trainable=False)
    # 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
    self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
    # 对问题进行向量化操作
    self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)

    def _inference(self):
    with tf.variable_scope('{}_lstm_s'.format(self._name)):
    # 对原文进行了一次LSTM操作
    lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
    pre_length = self._length(self.premises)
    h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
    dtype=tf.float32)
    self.h_s = h_s

    with tf.variable_scope('{}_lstm_t'.format(self._name)):
    # 对问题进行了一次LSTM操作
    lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
    hyp_length = self._length(self.hypotheses)
    h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
    dtype=tf.float32)
    self.h_t = h_t
    # 构造一个lstm网络
    self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
    forget_bias=0.0)
    # 构造一个可以变化的向量矩阵
    h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)

    i = tf.constant(0)
    # while_loop,cond作为条件,body做为操作过程
    c = lambda x, y: tf.less(x, self._batch_size)
    b = lambda x, y: self._match_sent(x, y)
    res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
    # LSTM的输出结果
    self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
    # 进行一次全连接操作,使得最后的输出结果是一维的
    with tf.variable_scope('{}_fully_connect'.format(self._name)):
    w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
    initializer=self._initializer, name='w_fc')
    b_fc = tf.get_variable(shape=[self._num_class],
    initializer=self._initializer, name='b_fc')
    self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
    # softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
    logits=self.logits,
    name='cross_entropy')
    # 把batch_size的样本的损失函数进行加和
    cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
    # 加和以后相除,求损失的平均值
    self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
    # argmax,求出每个样本中最大的概率值
    self.predict_op = tf.arg_max(self.logits, dimension=1)

    def _match_sent(self, i, h_m_arr):
    # 对每一个句子进行操作
    h_s_i = self.h_s[i]
    h_t_i = self.h_t[i]
    # 输入句子的长度
    length_s_i = self._length(self.premises[i])
    length_t_i = self._length(self.hypotheses[i])

    state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)

    k = tf.constant(0)
    c = lambda a, x, y, z, s: tf.less(a, length_t_i)
    b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
    res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
    # 只获取最后一次的输出结果
    final_state_h = res[-1].h
    # 将其写入到h_m_arr文件中
    h_m_arr = h_m_arr.write(i, final_state_h)

    i = tf.add(i, 1)
    return i, h_m_arr

    def _match_attention(self, k, h_s, h_t, length_s, state):

    h_t_k = tf.reshape(h_t[k], [1, -1])
    h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])

    with tf.variable_scope('{}_attention_w'.format(self._name)):
    w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_s')
    w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_t')
    w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_m')
    w_e = tf.get_variable(shape=[self._embedding_size, 1],
    initializer=self._initializer, name='w_e')

    last_m_h = state.h
    # sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
    sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
    # 经过一个激活层然后再与w_e进行相乘
    e_kj = tf.matmul(tf.tanh(sum_h), w_e)
    # 求得ai,j
    a_kj = tf.nn.softmax(e_kj)
    # 进行原文的参数加权
    alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)

    alpha_k.set_shape([1, self._embedding_size])
    # 将context与即将输入的h_t_k组合输入到下一次的LSTM中
    m_k = tf.concat([alpha_k, h_t_k], axis=1)
    #
    with tf.variable_scope('{}_lstm_m'.format(self._name)):
    # 输入到LSTM重新进行计算
    # state表示的是si
    _, new_state = self.lstm_m(inputs=m_k, state=state)

    k = tf.add(k, 1)
    return k, h_s, h_t, length_s, new_state

    def _embed_inputs(self, inputs, embeddings):
    ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
    i = tf.constant(0)
    # tf.less 当x大于self._batch_size时返回为假
    c = lambda x, y, z, n: tf.less(x, self._batch_size)
    b = lambda x, y, z, n: self._embed_line(x, y, z, n)
    # cond为条件,body为内容
    res = tf.while_loop(cond=c, body=b,
    loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
    ndim0_tensor = res[-1].stack()
    ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
    return ndim0_tensor

    def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
    ndim1_list = []
    # 对输入的每一句话进行操作
    for j in range(self._sentence_size):
    # 输入的第一个字符串
    word = inputs[i][j]
    unk_word = tf.constant(-1)
    # tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
    f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
    # 如果没有的话使用0向量代替
    f2 = lambda: tf.zeros(shape=[self._embedding_size])
    # 如果wordunk与word不相等,执行f1,否者执行f2
    res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
    # 添加到ndim1_list 向量中
    ndim1_list.append(res_tensor)
    for j in range(self._sentence_size):
    word = inputs[i][j]
    unk_word = tf.constant(-1)
    # 如果word等于-1代表了提取答案上下文的内容
    f1 = lambda: self._ave_vec(ndim1_list, j)
    f2 = lambda: ndim1_list[j]
    ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
    default=f1)
    # tf.stack是一个函数拼接
    ndim1_tensor = tf.stack(ndim1_list)
    ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
    i = tf.add(i, 1)
    return i, inputs, embeddings, ndim0_tensor_arr

    def _ave_vec(self, embed_list, cur_pos):
    """
    生词的词向量为词窗口的词向量平均值
    :param embed_list:
    :param cur_pos:
    :return:
    """
    # 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
    left_pos = max(0, cur_pos - self._window_size)
    right_pos = min(cur_pos + self._window_size, self._sentence_size)
    # 获得上下文的词向量
    e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
    # tf.stack合并词向量
    e_tensor = tf.stack(e_list)
    # 对上下文的内容使用reduce_mean来替代原来的位置的信息
    ave_tensor = tf.reduce_mean(e_tensor, axis=0)
    return ave_tensor

    @staticmethod
    def _length(sequence):
    mask = tf.sign(tf.abs(sequence))
    length = tf.reduce_sum(mask, axis=-1)
    return length

    def _initial_optimizer(self):
    with tf.variable_scope('{}_step'.format(self._name)):
    # 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
    self.global_step = tf.get_variable(shape=[],
    initializer=tf.constant_initializer(0),
    dtype=tf.int32,
    name='global_step')
    # 根据动量平均跟新参数
    self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
    # 缩小loss
    self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)


    if __name__ == '__main__':
    with tf.Session() as sess:
    # embedding需要翻译的句子
    embedding = np.random.randn(4, 6)
    embedding[0] = 0.0
    model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
    word_embedding=embedding, session=sess)
    model.batch_size = 1
    sent1 = [[3, -1, 2, 1, 0],
    [4, 5, 1, 0, 0],
    [2, 1, 0, 0, 0]]

    sent2 = [[2, 1, 0, 0, 0],
    [3, -1, 2, 1, 0],
    [4, 5, 1, 0, 0]]

    labels = [[1, 0, 0],
    [0, 1, 0],
    [0, 0, 1]]

    sess.run(tf.global_variables_initializer())
    # 迭代优化
    for temp in range(300):
    loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
    feed_dict={model.premises: sent1, model.hypotheses: sent2,
    model.labels: labels, model.lr: 0.001})
    print(step, loss)
    sent1, sent2 = sent2, sent1
  • 相关阅读:
    〖Linux〗zigbee实验之cc2430移植tinyos2.x的步骤(Ubuntu13.10)
    〖Linux〗clang3.4的编译与安装
    〖Linux〗打开qtcreater出现错误的解决方法
    〖Linux〗gvim使用alt+1,2,3..进行标签页切换
    〖Linux〗Ubuntu13.10 安装qt开发环境
    c#, 输出二进制
    unity shader在小米2s上的问题
    unity, animtion倒放
    反射矩阵(reflection matrix)推导
    unity, 用unity profiler进行真机profile,需要退出360
  • 原文地址:https://www.cnblogs.com/my-love-is-python/p/10079876.html
Copyright © 2011-2022 走看看