zoukankan      html  css  js  c++  java
  • 用VAE(variational autoencoder)做sentence embedding/representation或者其他任何结构数据的热presentation

    VAE是一个神奇得算法,其实思想倒是有点像word2vec,只是在其上加了一层bayesian的思想,这点上又倒是有点像LDA了;

    个人觉得,VAE挖掘的好的话,倒是有很大的潜力和应用的,因为它是真正意义上的无监督的,句子表示成向量之后,然后你就可以想干嘛就干嘛了;

    简单介绍一下VAE,就是一个变分估算参数后验概率,输入输出都是句子本身;

    下面介绍一种最简单的VAE的实现,用1-gram,就是把句子表示成1*vocabulary size的向量;

    tensorflow实现的;现在我在开发基于seq2seq的VAE算法,github上面倒是有几个,但是基于tensorflow的没一个写的让我满意的;

    #encoding=utf-8
    import os
    import itertools
    import numpy as np
    import tensorflow as tf
    from reader import TextReader
    import random 
    
    embed_dim = 500
    h_dim = 100
    
    
    data_path = './n_gram/'
    model_dir = './n_gram/model_dir/'
    reader = TextReader(data_path)
    def create_train_op(loss):
        train_op = tf.contrib.layers.optimize_loss(loss = loss, 
            global_step = tf.contrib.framework.get_global_step(), 
            learning_rate = 0.01, 
            clip_gradients = 10.0, 
            optimizer = "Adam")
        return train_op
    
    
    global_step = tf.Variable(0, name = 'global_step', trainable=False)
    
    tx = tf.placeholder(tf.int64, [None, reader.vocab_size])
    x = tf.to_float(tx)
    
    batch_size = tf.placeholder(tf.int64)
    w = tf.placeholder(tf.float32)
    
    with tf.variable_scope('encoder'):
        w_1 = tf.get_variable('w_1', [reader.vocab_size, embed_dim], initializer = tf.truncated_normal_initializer())
        b_1 = tf.get_variable('b_1', [embed_dim], initializer = tf.truncated_normal_initializer())
    
        L1 = tf.nn.bias_add(tf.matmul(x, w_1), b_1)
        L1 = tf.nn.tanh(L1)
    
        w_2 = tf.get_variable('w_2', [embed_dim, embed_dim], initializer = tf.truncated_normal_initializer())
        b_2 = tf.get_variable('b_2', [embed_dim], initializer = tf.truncated_normal_initializer())
    
        L2 = tf.nn.bias_add(tf.matmul(L1, w_2), b_2)
        L2 = tf.nn.tanh(L2)
    
        w_encoder_mu = tf.get_variable('w_encoder_mu', [embed_dim, h_dim], initializer = tf.truncated_normal_initializer(0, 0.01))
        b_encoder_mu = tf.get_variable('b_encoder_mu', [h_dim], initializer = tf.truncated_normal_initializer(0, 0.001))
    
        w_encoder_var = tf.get_variable('w_encoder_var', [embed_dim, h_dim], initializer = tf.truncated_normal_initializer(0, 0.01))
        b_encoder_var = tf.get_variable('b_encoder_var', [h_dim], initializer = tf.truncated_normal_initializer(0, 0.01))
    
        mu = tf.nn.bias_add(tf.matmul(L2, w_encoder_mu), b_encoder_mu)
        log_sigma_sq = tf.nn.bias_add(tf.matmul(L2, w_encoder_var), b_encoder_var)
    
        eps = tf.random_normal([batch_size, h_dim], 0, 1, dtype = tf.float32)
        sigma = tf.sqrt(tf.exp(log_sigma_sq))
    
        h = mu + sigma*eps
    
    with tf.variable_scope('decoder') as vs:
        R = tf.get_variable('R', [h_dim, reader.vocab_size], initializer = tf.truncated_normal_initializer(0, 0.0001))
        b = tf.get_variable('b', [reader.vocab_size], initializer = tf.truncated_normal_initializer(0, 0.0001))
    
        e = -tf.matmul(h, R) + b 
        p_x_i = tf.nn.softmax(e, -1)
    
    e_loss = -0.5 * tf.reduce_sum(1.0 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), 1)
    g_loss = -tf.reduce_sum(tf.log(p_x_i + 1e-10)*x, 1)
    g_loss_stand = -tf.log(1.0/tf.reduce_sum(x, 1))*tf.reduce_sum(x, 1)
    #g_loss = g_loss/tf.maximum(g_loss_stand, 1.0)
    
    
    e_loss_mean = tf.reduce_mean(e_loss)
    g_loss_mean = tf.reduce_mean(g_loss)
    
    loss = 0.1*e_loss + g_loss 
    loss = tf.reduce_mean(loss)
    
    encoder_var_list = []
    decoder_var_list = []
    for var in tf.trainable_variables():
        if 'encoder' in var.name:
            encoder_var_list.append(var)
        elif 'decoder' in var.name:
            decoder_var_list.append(var)
    
    
    optim_e = tf.train.AdamOptimizer(learning_rate=0.05).minimize(e_loss, global_step=global_step, var_list=encoder_var_list)
    optim_g = tf.train.AdamOptimizer(learning_rate=0.05).minimize(g_loss, global_step=global_step, var_list=decoder_var_list)
    train_op = create_train_op(loss)
    
    saver = tf.train.Saver()
    with tf.Session() as sess:
    
        sess.run(tf.initialize_all_variables())
        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print 'the model being restored is '
            print ckpt.model_checkpoint_path 
            saver.restore(sess, ckpt.model_checkpoint_path)
            print 'sucesssfully restored the session'
    
        count = global_step.eval()
        for k in range(0, 0):
            data, length = reader.iterator()
            em, gm, lm, _= sess.run([e_loss_mean, g_loss_mean, loss, train_op], feed_dict = {tx: data, 
                batch_size:length,
                w:k/1000.0})
            print 'After	' + str(global_step.eval()) + ' th step,the loss	' + str(lm) + '	 kL loss	' + str(em) + '	decoder loss	' + str(gm)
            global_step.assign(count).eval()
            if k%10 == 0:
                saver.save(sess, model_dir + 'model.ckpt', global_step = global_step)
            count += 1
  • 相关阅读:
    Mayi_Maven安装与配置Myeclipse、Idea
    MY_Selenium登录126邮箱,定位不到账号输入框解决办法
    MY_使用selenium自动登录126/163邮箱并发送邮件
    2、TestNG+Maven+IDEA环境搭建
    1、Maven安装教程详解
    git常用命令
    LUA_linux的安装
    vsftp虚拟用户配置
    apache日志切割
    NTP Reply Flood Attack (NTP反射型DDos攻击)
  • 原文地址:https://www.cnblogs.com/LarryGates/p/6565851.html
Copyright © 2011-2022 走看看