zoukankan      html  css  js  c++  java
  • pytorch --Rnn语言模型(LSTM,BiLSTM) -- 《Recurrent neural network based language model》

    论文通过实现RNN来完成了文本分类。

    论文地址:88888888

    模型结构图:

          

     原理自行参考论文,code and comment(https://github.com/graykode/nlp-tutorial):

     1 # -*- coding: utf-8 -*-
     2 # @time : 2019/11/9  15:12
     3 
     4 import numpy as np
     5 import torch
     6 import torch.nn as nn
     7 import torch.optim as optim
     8 from torch.autograd import Variable
     9 
    10 dtype = torch.FloatTensor
    11 
    12 sentences = [ "i like dog", "i love coffee", "i hate milk"]
    13 
    14 word_list = " ".join(sentences).split()
    15 word_list = list(set(word_list))
    16 word_dict = {w: i for i, w in enumerate(word_list)}
    17 number_dict = {i: w for i, w in enumerate(word_list)}
    18 n_class = len(word_dict)
    19 
    20 # TextRNN Parameter
    21 batch_size = len(sentences)
    22 n_step = 2 # number of cells(= number of Step)
    23 n_hidden = 5 # number of hidden units in one cell
    24 
    25 def make_batch(sentences):
    26     input_batch = []
    27     target_batch = []
    28 
    29     for sen in sentences:
    30         word = sen.split()
    31         input = [word_dict[n] for n in word[:-1]]
    32         target = word_dict[word[-1]]
    33 
    34         input_batch.append(np.eye(n_class)[input])
    35         target_batch.append(target)
    36 
    37     return input_batch, target_batch
    38 
    39 # to Torch.Tensor
    40 input_batch, target_batch = make_batch(sentences)
    41 input_batch = Variable(torch.Tensor(input_batch))
    42 target_batch = Variable(torch.LongTensor(target_batch))
    43 
    44 class TextRNN(nn.Module):
    45     def __init__(self):
    46         super(TextRNN, self).__init__()
    47 
    48         self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden,batch_first=True)
    49         self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    50         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
    51 
    52     def forward(self, hidden, X):
    53         if self.rnn.batch_first == True:
    54             # X [batch_size,time_step,word_vector]
    55             outputs, hidden = self.rnn(X, hidden)
    56 
    57             # outputs [batch_size, time_step, hidden_size*num_directions]
    58             output = outputs[:, -1, :]  # [batch_size, num_directions(=1) * n_hidden]
    59             model = torch.mm(output, self.W) + self.b  # model : [batch_size, n_class]
    60             return model
    61         else:
    62             X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]
    63             outputs, hidden = self.rnn(X, hidden)
    64             # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]
    65             # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
    66 
    67             output = outputs[-1,:,:] # [batch_size, num_directions(=1) * n_hidden]
    68             model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class]
    69             return model
    70 
    71 model = TextRNN()
    72 
    73 criterion = nn.CrossEntropyLoss()
    74 optimizer = optim.Adam(model.parameters(), lr=0.001)
    75 
    76 # Training
    77 for epoch in range(5000):
    78     optimizer.zero_grad()
    79 
    80     # hidden : [num_layers * num_directions, batch, hidden_size]
    81     hidden = Variable(torch.zeros(1, batch_size, n_hidden))
    82     # input_batch : [batch_size, n_step, n_class]
    83     output = model(hidden, input_batch)
    84 
    85     # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    86     loss = criterion(output, target_batch)
    87     if (epoch + 1) % 1000 == 0:
    88         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    89 
    90     loss.backward()
    91     optimizer.step()
    92 
    93 
    94 # Predict
    95 hidden_initial = Variable(torch.zeros(1, batch_size, n_hidden))
    96 predict = model(hidden_initial, input_batch).data.max(1, keepdim=True)[1]
    97 print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

    LSTM unit的RNN模型:

     1 import numpy as np
     2 import torch
     3 import torch.nn as nn
     4 import torch.optim as optim
     5 from torch.autograd import Variable
     6 
     7 dtype = torch.FloatTensor
     8 
     9 char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
    10 word_dict = {n: i for i, n in enumerate(char_arr)}
    11 number_dict = {i: w for i, w in enumerate(char_arr)}
    12 n_class = len(word_dict)  # number of class(=number of vocab)
    13 
    14 seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star']
    15 
    16 # TextLSTM Parameters
    17 n_step = 3
    18 n_hidden = 128
    19 
    20 
    21 def make_batch(seq_data):
    22     input_batch, target_batch = [], []
    23 
    24     for seq in seq_data:
    25         input = [word_dict[n] for n in seq[:-1]]  # 'm', 'a' , 'k' is input
    26         target = word_dict[seq[-1]]  # 'e' is target
    27         input_batch.append(np.eye(n_class)[input])
    28         target_batch.append(target)
    29 
    30     return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))
    31 
    32 
    33 class TextLSTM(nn.Module):
    34     def __init__(self):
    35         super(TextLSTM, self).__init__()
    36 
    37         self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
    38         self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
    39         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
    40 
    41     def forward(self, X):
    42         input = X.transpose(0, 1)  # X : [n_step, batch_size, n_class]
    43 
    44         hidden_state = Variable(
    45             torch.zeros(1, len(X), n_hidden))  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
    46         cell_state = Variable(
    47             torch.zeros(1, len(X), n_hidden))  # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
    48 
    49         outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
    50         outputs = outputs[-1]  # [batch_size, n_hidden]
    51         model = torch.mm(outputs, self.W) + self.b  # model : [batch_size, n_class]
    52         return model
    53 
    54 
    55 input_batch, target_batch = make_batch(seq_data)
    56 
    57 model = TextLSTM()
    58 
    59 criterion = nn.CrossEntropyLoss()
    60 optimizer = optim.Adam(model.parameters(), lr=0.001)
    61 
    62 # Training
    63 for epoch in range(1000):
    64 
    65     output = model(input_batch)
    66     loss = criterion(output, target_batch)
    67     if (epoch + 1) % 100 == 0:
    68         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    69     optimizer.zero_grad()
    70     loss.backward()
    71     optimizer.step()
    72 
    73 inputs = [sen[:3] for sen in seq_data]
    74 
    75 predict = model(input_batch).data.max(1, keepdim=True)[1]
    76 print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])

    BiLSTM RNN model:

     1 import numpy as np
     2 import torch
     3 import torch.nn as nn
     4 import torch.optim as optim
     5 from torch.autograd import Variable
     6 import torch.nn.functional as F
     7 
     8 dtype = torch.FloatTensor
     9 
    10 sentence = (
    11     'Lorem ipsum dolor sit amet consectetur adipisicing elit '
    12     'sed do eiusmod tempor incididunt ut labore et dolore magna '
    13     'aliqua Ut enim ad minim veniam quis nostrud exercitation'
    14 )
    15 
    16 word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}
    17 number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}
    18 n_class = len(word_dict)
    19 max_len = len(sentence.split())
    20 n_hidden = 5
    21 
    22 def make_batch(sentence):
    23     input_batch = []
    24     target_batch = []
    25 
    26     words = sentence.split()
    27     for i, word in enumerate(words[:-1]):
    28         input = [word_dict[n] for n in words[:(i + 1)]]
    29         input = input + [0] * (max_len - len(input))
    30         target = word_dict[words[i + 1]]
    31         input_batch.append(np.eye(n_class)[input])
    32         target_batch.append(target)
    33 
    34     return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))
    35 
    36 class BiLSTM(nn.Module):
    37     def __init__(self):
    38         super(BiLSTM, self).__init__()
    39 
    40         self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)
    41         self.W = nn.Parameter(torch.randn([n_hidden * 2, n_class]).type(dtype))
    42         self.b = nn.Parameter(torch.randn([n_class]).type(dtype))
    43 
    44     def forward(self, X):
    45         input = X.transpose(0, 1)  # input : [n_step, batch_size, n_class]
    46 
    47         hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden))   # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
    48         cell_state = Variable(torch.zeros(1*2, len(X), n_hidden))     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
    49 
    50         outputs, (_, _) = self.lstm(input, (hidden_state, cell_state))
    51         outputs = outputs[-1]  # [batch_size, n_hidden]
    52         model = torch.mm(outputs, self.W) + self.b  # model : [batch_size, n_class]
    53         return model
    54 
    55 input_batch, target_batch = make_batch(sentence)
    56 
    57 model = BiLSTM()
    58 
    59 criterion = nn.CrossEntropyLoss()
    60 optimizer = optim.Adam(model.parameters(), lr=0.001)
    61 
    62 # Training
    63 for epoch in range(10000):
    64     output = model(input_batch)
    65     loss = criterion(output, target_batch)
    66     if (epoch + 1) % 1000 == 0:
    67         print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    68         
    69     optimizer.zero_grad()
    70     loss.backward()
    71     optimizer.step()
    72 
    73 predict = model(input_batch).data.max(1, keepdim=True)[1]
    74 print(sentence)
    75 print([number_dict[n.item()] for n in predict.squeeze()])
  • 相关阅读:
    python高阶函数——返回函数(闭包)
    python高阶函数——sorted排序算法
    python高阶函数—filter
    python高阶函数——map/reduce
    Python——高阶函数概念(Higher-order function)
    Python高级特性——迭代器
    Python高级特性——生成器(generator)
    Python高级特性——列表生成式(list Comprehensions)
    Python高级特性——迭代(Iteration)
    Python高级特性——切片(Slice)
  • 原文地址:https://www.cnblogs.com/dhName/p/11826541.html
Copyright © 2011-2022 走看看