zoukankan      html  css  js  c++  java
  • 深度学习之 seq2seq 进行 英文到法文的翻译

    深度学习之 seq2seq 进行 英文到法文的翻译

    import os
    import torch
    import random
    
    source_path = "data/small_vocab_en"
    target_path = "data/small_vocab_fr"
    MAX_LENGTH = 100
    SOS_token = 0
    EOS_token = 1
    
    def load_data(path):
        input_file = os.path.join(path)
        with open(input_file, 'r', encoding='utf-8') as f:
            data = f.read()
        return data
    
    source_text = load_data(source_path)
    target_text = load_data(target_path)
    
    
    class Dictionary(object):
        def __init__(self):
            self.word2idx = {'<SOS>': 0, '<EOS>': 1}
            self.idx2word = {0: '<SOS>', 1: '<EOS>'}
            self.count = 2
    
        def add_word(self, word):
            if word not in self.word2idx:
                
                self.idx2word[self.count - 1] = word
                
                self.word2idx[word] = len(self.idx2word) - 1
                
                self.count += 1
            return self.word2idx[word]
    
        def __len__(self):
            return len(self.idx2word)
    
    class Lang(object):
        def __init__(self, name):
            self.name = name
            self.dictionary = Dictionary()
        
        def addSentence(self, sentence):
            return [self.addWord(w) for w in sentence.split()]
        
        def addWord(self, word):
            return self.dictionary.add_word(word)
        
        def __len__(self):
            return len(self.dictionary)
    
    def readLangs(source_name, source_lang_text, target_name, target_lang_text):
        source_lang = Lang(source_name)
        source_data = [source_lang.addSentence(s) for s in source_lang_text.lower().split('
    ')]
        
        target_lang = Lang(target_name)
        target_sentences = [ s + ' <EOS>' for s in target_lang_text.lower().split('
    ')]
        target_data = [target_lang.addSentence(s) for s in target_sentences]
        
        pairs = list(zip(source_data, target_data))
        return source_lang, target_lang, pairs
    
    source_lang, target_lang, pairs_data = readLangs('en', source_text, 'fe', target_text)
    
    import torch.nn as nn
    from torch.autograd import Variable
    from torch import optim
    import torch.nn.functional as F
    
    class EncoderRNN(nn.Module):
        def __init__(self, input_size, hidden_size):
            super(EncoderRNN, self).__init__()
            self.hidden_size = hidden_size
            
            self.embedding = nn.Embedding(input_size, hidden_size)
            
            self.gru = nn.GRU(hidden_size, hidden_size)
        
        def forward(self, input, hidden):
            embedded = self.embedding(input).view(1, 1, -1)
            output = embedded
            output, hidden = self.gru(output, hidden)
            return output, hidden
        
        def initHidden(self):
            result = Variable(torch.zeros(1, 1, self.hidden_size))
            return result
    
    class DecoderRNN(nn.Module):
        def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
            super(DecoderRNN, self).__init__()
            
            self.hidden_size = hidden_size
            self.output_size = output_size
            self.dropout_p = dropout_p
            self.max_length = max_length
            
            self.embedding = nn.Embedding(self.output_size, self.hidden_size)
            self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
            self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
            self.dropout = nn.Dropout(self.dropout_p)
            self.gru = nn.GRU(self.hidden_size, self.hidden_size)
            self.out = nn.Linear(self.hidden_size, self.output_size)
        
        def forward(self, input, hidden, encoder_outputs):
            embedded = self.embedding(input).view(1, 1, -1)
            embedded = self.dropout(embedded)
            
            attn_weights = F.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
            attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0))
            
            output = torch.cat((embedded[0], attn_applied[0]), 1)
            output = self.attn_combine(output).unsqueeze(0)
            
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)
            
            output = F.log_softmax(self.out(output[0]), dim=1)
            return output, hidden, attn_weights
        
        def initHidden(self):
            result = Variable(torch.zeros(1, 1, self.hidden_size))
            return result
    
    epochs = 10
    print_every = 2
    hidden_size = 256
    teacher_forcing_ratio = 0.5
    
    encoder_model = EncoderRNN(len(source_lang), hidden_size)
    att_decoder_model = DecoderRNN(hidden_size, len(target_lang), dropout_p=0.1)
    
    def variablesFromIds(ids):
        return Variable(torch.LongTensor(ids).view(-1, 1))
    
    def variablesFromPair(pair):
        input_var = variablesFromIds(pair[0])
        output_var = variablesFromIds(pair[1])
        return (input_var, output_var)
    
    def train(input, target, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
        encoder_hidden = encoder.initHidden()
        
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        
        input_length = input.size()[0]
        target_length = target.size()[0]
        
        encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
        
        loss = 0
        
        for i in range(input_length):
            encoder_output, encoder_hidden = encoder(input[i], encoder_hidden)
            encoder_outputs[i] = encoder_output[0][0]
        
        decoder_input = Variable(torch.LongTensor([[SOS_token]]))
        
        decoder_hidden = encoder_hidden
        
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        
        if use_teacher_forcing:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
                
                loss += criterion(decoder_output, target[di])
                decoder_input = target[di]
        else:
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
                
                topv, topi = decoder_output.data.topk(1)
                ni = topi[0][0]
            
                decoder_input = Variable(torch.LongTensor([[ni]]))
                
                loss += criterion(decoder_output, target[di])
                
                if ni == EOS_token:
                    break;
        loss.backward()
        
        encoder_optimizer.step()
        decoder_optimizer.step()
        
        return loss.data[0] / target_length
    
    def trainIters(encoder, decoder, n_iters, print_every=10, learning_rate=0.01):
        encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
        decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
        
        training_pairs = [variablesFromPair(random.choice(pairs_data)) for i in range(n_iters)]
        criterion = nn.NLLLoss()
        
        total_loss = 0
        
        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_variable = training_pair[0]
            target_variable = training_pair[1]
            
            loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            total_loss  += loss
            
            if iter % print_every == 0:
                
                print('(%d %d%%) loss %d total-loss %d percent %.4f' % (iter, iter / n_iters * 100, loss ,total_loss, total_loss / print_every))
    
    trainIters(encoder_model, att_decoder_model, 5000)
    
    def evaluate(encoder, decoder, sentence, max_length = MAX_LENGTH):
        input_variable = variablesFromIds(sentence)
        
        input_length = input_variable.size()[0]
        encoder_hidden = encoder.initHidden()
        
        encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
        
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden)
            encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]
        
        decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS
        
        decoder_hidden = encoder_hidden
    
        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)
        
        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0][0]
            
            if ni == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(target_lang.dictionary.idx2word[ni])
            
            decoder_input = Variable(torch.LongTensor([[ni]]))
        return decoded_words, decoder_attentions[:di + 1]  
    
    evaluateRandomly(encoder_model, att_decoder_model)
    

    结论

    训练少,正确率较低,后面再实现一个对话机器人

  • 相关阅读:
    第三次个人作业
    第二次结对作业
    第一次结对作业
    第二次编程作业
    第一次编程
    第一次博客作业
    个人总结
    第二次结对作业
    软件工程-个人总结
    第三次个人作业
  • 原文地址:https://www.cnblogs.com/htoooth/p/8676156.html
Copyright © 2011-2022 走看看