zoukankan      html  css  js  c++  java
  • cnn for qa

    最近在做QA系统,用tensorflow做了些实验,下面的的是一个cnn的评分网络。主要参考了《APPLYING DEEP LEARNING TO ANSWER SELECTION: A STUDY AND AN OPEN TASK》这篇论文与wildml博客中的一篇文章

    import tensorflow as tf
    import numpy as np
    class QaCNN():
        def __init__(self , batchsize , sequencesize , vecsize , outsize , filtersizes , num_filters):
            self.vecsize = vecsize
            self.outsize = outsize
            self.batchsize = batchsize
            self.sequencesize = sequencesize
            self.question = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='question')
            self.answer_rigth = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_right')
            self.answer_wrong = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_wrong')
    
            tenQ = tf.reshape(self.question , [-1 , self.sequencesize , self.vecsize , 1])
            tenR = tf.reshape(self.answer_rigth , [-1 , self.sequencesize , self.vecsize , 1])
            tenW = tf.reshape(self.answer_wrong , [-1 , self.sequencesize , self.vecsize , 1])
            tensorResultQ = []
            tensorResultR = []
            tensorResultW = []
            for i , filtersize in enumerate(filtersizes):
                with tf.name_scope("conv-maxpool-%s" % filtersize):
                    filter_shape = [filtersize, self.vecsize, 1, num_filters]
                    #W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),name='W')
                    W = tf.get_variable(initializer=tf.truncated_normal(filter_shape, stddev=0.1), 
                          name="W-%s" % str(filtersize))
                    #b = tf.Variable(tf.constant(0.1, shape=[num_filters]),name='b')
                    b = tf.get_variable(initializer=tf.constant(0.1, shape=[num_filters]), 
                          name="b-%s" % str(filtersize))
                    pooledQ = self.conv2dPool(tenQ , W , b , filtersize)
                    pooledR = self.conv2dPool(tenR, W , b , filtersize)
                    pooledW = self.conv2dPool(tenW , W , b , filtersize)
                    tensorResultQ.append(pooledQ)
                    tensorResultR.append(pooledR)
                    tensorResultW.append(pooledW)
    
            flat_length = len(filtersizes) * num_filters
            tenQ_flat = tf.reshape(tf.concat(tensorResultQ,3),[-1,flat_length])
            tenR_flat = tf.reshape(tf.concat(tensorResultR,3),[-1,flat_length])
            tenW_flat = tf.reshape(tf.concat(tensorResultW,3),[-1,flat_length])
    
            exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenR_flat) , 1)
            x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
            y = tf.sqrt(tf.reduce_sum(tf.multiply(tenR_flat , tenR_flat) , 1))
            cosineQR =  tf.div(exy , tf.multiply(x , y),name = 'cosineQR')
    
            exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenW_flat) , 1)
            x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
            y = tf.sqrt(tf.reduce_sum(tf.multiply(tenW_flat , tenW_flat) , 1))
            cosineQW =  tf.div(exy , tf.multiply(x , y),name = 'cosineQW')
    
            with tf.name_scope('losses'):
                zero = tf.constant(0, shape=[self.batchsize], dtype=tf.float32)
                margin = tf.constant(0.05, shape=[self.batchsize], dtype=tf.float32)
                self.losses = tf.maximum(zero, tf.subtract(margin, tf.subtract(cosineQR, cosineQW)),name = 'loss_tensor')
                self.loss =  tf.reduce_sum(self.losses,name='loss')
            with tf.name_scope('acc'):
                self.correct = tf.equal(zero,self.losses)
                self.accuracy = tf.reduce_mean(tf.cast(self.correct , 'float'),name='accuracy')
            tf.summary.scalar('loss',self.loss)
            self.variable_summaries(self.accuracy)
            self.merged = tf.summary.merge_all()
    
        def variable_summaries(self , var):
            '''Attach a lot of summaries to a Tensor (for TensorBoard visualization).'''
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean',mean)
                with tf.name_scope('stddev'):
                    stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                tf.summary.scalar('stddev',stddev)
                tf.summary.scalar('max',tf.reduce_max(var))
                tf.summary.scalar('min',tf.reduce_min(var))
                tf.summary.histogram('histogram',var)
    
    
        def conv2dPool(self ,x,W,b,filtersize):
            conv = tf.nn.conv2d(x , W , strides =[1,1,1,1],padding='VALID')
            h = tf.nn.relu(tf.nn.bias_add(conv ,b))
            pooled = tf.nn.max_pool(h,ksize=[1,self.sequencesize - filtersize + 1 , 1, 1],strides=[1,1,1,1],padding='VALID')
            return pooled
    
    
    
    import numpy as np
    import time
    import os
    import tensorflow as tf
    from qacnn_g import *
    from process import *
    batchsize = 100
    sequencesize = 10
    vecsize = 200
    outsize = 10
    root = './lib/'
    filtersize = [1,2,3,5]
    num_filter = 500
    if os.path.exists('./lib/corpus.seg.length.out'):
        os.remove(root + 'corpus.seg.length.out')
    logfolder = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    cnn = QaCNN(batchsize , sequencesize , vecsize , outsize , filtersize , num_filter)
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cnn.loss)
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter('./LOGS/'+logfolder,sess.graph)
    saver = tf.train.Saver(tf.all_variables())
    print ('init...')
    dataprocess = DataProcess(root + 'word2vec.bin')
    dataprocess.normalize(root + 'corpus.seg.out', root + 'corpus.seg.length.out')
    dataprocess.initdata(root + 'corpus.seg.length.out')
    start = time.time()
    for i in range(120000):
        batch =dataprocess.nextbatch(batchsize)#ake_data(batchsize) #fake_data(20, True)
        sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
        if i % 10 == 0:
            summary,loss, accuracy,_  = sess.run([cnn.merged , cnn.loss , cnn.accuracy , train_step ] , {cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
            train_writer.add_summary(summary , i)
            end = time.time()
            elapse = (end - start)
            print ('iterator %d.	loss=%f	accuracy=%f	elapse=%f'%(i,loss,accuracy,elapse))
            start = time.time()
        else:
            sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
    train_writer.close()
    saver.save(sess , './model/qa.cnn')
    sess.close()
    print ('end...')
    
    

    使用模型进行打分,这里取了cosineR这个正向答案与问题的cosine值进行度量。

    # -*- coding: utf-8 -*-
    import tensorflow as tf
    import numpy as np
    import os
    import time
    from process import *
    
    tf.flags.DEFINE_string('./lib/corpus.out','','Data to predict')
    tf.flags.DEFINE_string('checkpoint_dir','./model/','checkpoint directory from training run')
    tf.flags.DEFINE_integer('batch_size',1000,'batch size')
    tf.flags.DEFINE_string('root','./lib','root dir')
    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    print ('
    Parameters:')
    for attr , value in sorted(FLAGS.__flags.items()):
        print ('{}={}'.format(attr.upper() , value))
    
    print('')
    
    checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    dataprocess = DataProcess('./lib/word2vec.bin')
    dataprocess.initdata('./lib/corpus.seg.length.out')
    graph = tf.Graph()
    with graph.as_default():
        sess = tf.Session()
        with sess.as_default():
            saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file))
            saver.restore(sess , checkpoint_file)
    
            start = time.time()
            bestAnswer = None
            maxScore = -1.0
            for i in range(10000):
                question = graph.get_operation_by_name('question').outputs[0]
                answer_right = graph.get_operation_by_name('answer_right').outputs[0]
                answer_wrong = graph.get_operation_by_name('answer_wrong').outputs[0]
                loss = graph.get_operation_by_name('losses/loss').outputs[0]
                cosineQR = graph.get_operation_by_name('cosineQR').outputs[0]
                questionbatch = dataprocess.getSentenceVec('你叫什么名字',10,FLAGS.batch_size)
                batchs = dataprocess.nextbatch(FLAGS.batch_size)
                cosineQR =  sess.run(cosineQR, {question:questionbatch, answer_right:batchs[1],answer_wrong:batchs[2]})
                ndx = np.argmax(cosineQR)
                score = cosineQR[ndx]
                if maxScore < score:
                    maxScore = score
                    bestAnswer = batchs[3][ndx]
                print ('iterate : %d	score:%f	maxscore:%f	answer:%s'%(i,score,maxScore,(batchs[3][ndx]).strip('
    ')))
            end = time.time()
            print('time used:%f'%(end - start))
            print('maxScore:%f'%maxScore)
            print('best answer :%s'%bestAnswer)
    def find(cosineTensor):
        return  np.argmax(cosineTensor)
    
    
  • 相关阅读:
    kafka 配置属性
    mybatis 启动流程源码分析(二)之 Configuration-Properties解析
    mybatis 配置文件
    mybatis 启动流程源码分析(一)
    mybatis configuration
    使用函数式编程替换if-else
    mybatis 基本使用
    第十二周学习笔记
    T-SQL的进阶:超越基本级别3:构建相关子查询——701小组
    第十周学习笔记
  • 原文地址:https://www.cnblogs.com/nocml/p/6773058.html
Copyright © 2011-2022 走看看