zoukankan      html  css  js  c++  java
  • 词向量可视化--[tensorflow , python]

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    """
    ----------------------------------
    Version    : ??
    File Name :     visual_vec.py
    Description :   
    Author  :       xijun1
    Email   :
    Date    :       2018/12/25
    -----------------------------------
    Change Activiy  :   2018/12/25
    -----------------------------------
    
    """
    __author__ = 'xijun1'
    from tqdm import tqdm
    import numpy as np
    import tensorflow as tf
    from tensorflow.contrib.tensorboard.plugins import projector
    import os
    import codecs
    
    words, embeddings = [], []
    log_path = 'model'
    
    with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:
        header = f.readline()
        vocab_size, vector_size = map(int, header.split())
        for line in tqdm(range(vocab_size)):
            word_list = f.readline().split(' ')
            word = word_list[0]
            vector = word_list[1:-1]
            if word == "":
                continue
            words.append(word)
            embeddings.append(np.array(vector))
    assert len(words) == len(embeddings)
    print(len(words))
    
    with tf.Session() as sess:
        X = tf.Variable([0.0], name='embedding')
        place = tf.placeholder(tf.float32, shape=[len(words), vector_size])
        set_x = tf.assign(X, place, validate_shape=False)
        sess.run(tf.global_variables_initializer())
        sess.run(set_x, feed_dict={place: embeddings})
        with codecs.open(log_path + '/metadata.tsv', 'w') as f:
            for word in tqdm(words):
                f.write(word + '
    ')
    
        # with summary
        summary_writer = tf.summary.FileWriter(log_path, sess.graph)
        config = projector.ProjectorConfig()
        embedding_conf = config.embeddings.add()
        embedding_conf.tensor_name = 'embedding:0'
        embedding_conf.metadata_path = os.path.join('metadata.tsv')
        projector.visualize_embeddings(summary_writer, config)
    
        # save
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(log_path, "model.ckpt"))
    
    

    结果:

  • 相关阅读:
    清理yum源
    XZ压缩
    Linux命令之dot
    calltree查看工程代码中的函数调用关系
    valgrind 打印程序调用树+进行多线程性能分析
    LINUX 性能 测试 优化工具
    TCP/IP(84) 详解
    perf---LINUX内核研究
    廖雪锋笔记3:类型转换
    廖雪锋笔记2:list,tuble
  • 原文地址:https://www.cnblogs.com/gongxijun/p/10175937.html
Copyright © 2011-2022 走看看