zoukankan      html  css  js  c++  java
  • pytorch-Embedding

    Embedding

    无初始化embedding

    import torch.nn as nn
    emb=nn.Embedding(num_embeddings, embedding_dim)

    加载预训练模型(如glove)

    def build_embedding_matrix(word2idx, embed_dim, dat_fname):
        if os.path.exists(dat_fname):
            print('loading embedding_matrix:', dat_fname)
            embedding_matrix = pickle.load(open(dat_fname, 'rb'))
        else:
            print('loading word vectors...')
            embedding_matrix = np.zeros((len(word2idx) + 2, embed_dim))  # idx 0 and len(word2idx)+1 are all-zeros
            fname = './glove.twitter.27B/glove.twitter.27B.' + str(embed_dim) + 'd.txt' 
                if embed_dim != 300 else './glove/glove.42B.300d.txt'
            word_vec = _load_word_vec(fname, word2idx=word2idx)
            print('building embedding_matrix:', dat_fname)
            for word, i in word2idx.items(): # 根据word_vec(包括word和vec)创建embedding_matrix(只有vec)
                vec = word_vec.get(word)
                if vec is not None:
                    # words not found in embedding index will be all-zeros.
                    embedding_matrix[i] = vec
            pickle.dump(embedding_matrix, open(dat_fname, 'wb'))
    
        return embedding_matrix
    
    def _load_word_vec(path, word2idx=None): # word2idx: index->word
        fin = open(path, 'r', encoding='utf-8', newline='
    ', errors='ignore')  # glove
        word_vec = {}
        for line in fin:
            tokens = line.rstrip().split()
            if word2idx is None or tokens[0] in word2idx.keys():
                word_vec[tokens[0]] = np.asarray(tokens[1:], dtype='float32')  #
                # np.asarray :将token[1:]结构数据转化为ndarray
                # tokens[0]应该是个单词?
        return word_vec
    

    Model
    emb = nn.Embedding.from_pretrained_embedding(torch.tensor(embedding_matrix(加载好的),dtype=torch.float))

  • 相关阅读:
    数据库连接池
    JDBC事务
    oracle 11g
    python自动化办公1-os模块学习
    python模块学习1
    requests-post请求
    linux学习二-目录文件相关命令
    Linux学习一常见的7个命令及命令的信息查看
    python-文件操作
    异常以及异常处理
  • 原文地址:https://www.cnblogs.com/ArdenWang/p/14798131.html
Copyright © 2011-2022 走看看