zoukankan      html  css  js  c++  java
  • 深度学习情感分析(随机梯度下降代码实现)

    1.代码没有引入深度学习包,实现了简单的随机梯度下降算法。

    2.理论较简单。

    # coding:utf8
    # Author:Chaz
    import sys,time
    import numpy as np
    
    g = open("reviews.txt","r")
    reviews = list(map(lambda x:x[:-1],g.readlines()))
    g.close()
    
    f = open("labels.txt","r")
    labels = list(map(lambda x:x[:-1].upper(),f.readlines()))
    f.close()
    
    class SentimentNetwork():
        def __init__(self,reviews,labels,hidden_nodes = 10 ,learning_rate = 0.1):
            np.random.seed(1)
            self.pre_process_data(reviews,labels)
            self.init_network(len(self.review_vocab),hidden_nodes,1,learning_rate)
    
        def pre_process_data(self,reviews,labels):
            review_vocab = set()
            for review in reviews:
                for word in review.split(" "):
                    review_vocab.add(word)
            self.review_vocab = list(review_vocab)
    
            label_vocab = set()
            for label in labels:
                label_vocab.add(label)
            self.label_vocab = list(label_vocab)
    
            self.review_vocab_size = len(self.review_vocab)
            self.label_vocab_size = len(self.label_vocab)
    
            self.word2index = {}
            for i,word in enumerate(review_vocab):
                self.word2index[word] = i
    
            self.label2index = {}
            for i,label in enumerate(label_vocab):
                self.label2index[label] = i
    
        def init_network(self,input_nodes,hidden_nodes,output_nodes,learning_rate):
            self.input_nodes = input_nodes
            self.hidden_nodes = hidden_nodes
            self.output_nodes = output_nodes
    
            self.learning_rate = learning_rate
    
            self.weight_0_1 = np.zeros((self.input_nodes,self.hidden_nodes))
            self.weight_1_2 = np.random.normal(0.0,self.output_nodes**-0.5,(self.hidden_nodes,self.output_nodes))
    
            self.layer_0 = np.zeros((1,self.input_nodes))
            self.layer_1 = np.zeros((1, hidden_nodes))
    
        def update_input_layer(self,review):
            self.layer_0 *=0
            for word in review.split(" "):
                if word in self.word2index.keys():
                    # print(self.word2index[word])
                    # print(self.layer_0[0])
                    self.layer_0[0][self.word2index[word]] = 1
    
    
        def get_target_for_label(self,label):
            if label == "POSITIVE":
                return 1
            else:
                return 0
    
        def sigmoid(self,x):
            return 1/(1 + np.exp(-x))
    
        def sigmoid_output_2_derivative(self,output):
            return output * (1 - output)
    
        def train(self,train_reviews_raw,train_labels):
            train_reviews = list()
            for review in train_reviews_raw:
                indices = set()
                for word in review.split(" "):
                    if (word in self.word2index.keys()):
                        indices.add(self.word2index[word])
                train_reviews.append(list(indices))
            assert (len(train_reviews) == len(train_labels))
            correct_so_far = 0
            start = time.time()
    
            for i in range(len(train_reviews)):
                review = train_reviews[i]
                label = train_labels[i]
                self.update_input_layer(train_reviews_raw[i])
                self.layer_1 *= 0
                for index in review:
                    self.layer_1 += self.weight_0_1[index]
    
                layer_2 = self.sigmoid(self.layer_1.dot(self.weight_1_2))
    
                layer_2_error = layer_2 - self.get_target_for_label(label) # Output layer error is the difference between desired target and actual output.
                layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)
    
                layer_1_error = layer_2_delta.dot(self.weight_1_2.T) # errors propagated to the hidden layer
                layer_1_delta = layer_1_error # hidden layer gradients - no nonlinearity so it's the same as the error
    
                self.weight_1_2 -= self.layer_1.T.dot(layer_2_delta) * self.learning_rate # update hidden-to-output weights with gradient descent step
                for index in review:
    
                    self.weight_0_1[index] -= layer_1_delta[0] * self.learning_rate
    
                if layer_2 > 0.5 and label == "POSITIVE":
                    correct_so_far += 1
                elif layer_2 <0.5 and label =="NEGATIVE":
                    correct_so_far += 1
    
                elapsed_time = float(time.time() - start)
    
                reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
    
                sys.stdout.write("
    Progress:" + str(100 * i / float(len(train_reviews)))[:4] 
                                 + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] 
                                 + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i + 1) 
                                 + " Training Accuracy:" + str(correct_so_far * 100 / float(i + 1))[:4] + "%")
                if i % 2500 == 0 :
                    print("")
    
    
        def test(self,test_reviews,test_labels):
            correct = 0
            start = time.time()
    
            for i in range(len(test_reviews)):
                pred = self.run(test_reviews[i])
                if pred == test_labels[i]:
                    correct +=1
                elapsed_time = float(time.time() - start)
    
                reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
                sys.stdout.write("
    Progress:" + str(100 * i / float(len(test_reviews)))[:4] 
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] 
                             + " #Correct:" + str(correct) + " #Tested:" + str(i + 1) 
                             + " Test Accuracy:" + str(correct * 100 / float(i + 1))[:4] + "%")
        def run(self,review):
            self.update_input_layer(review.lower())
            # print(self.layer_0.shape,self.weight_0_1.shape)
            layer_1 = self.layer_0.dot(self.weight_0_1)
            # print(layer_1.shape,self.weight_1_2.shape)
            layer_2 = self.sigmoid(layer_1.dot(self.weight_1_2))
    
            if layer_2[0] > 0.5 :
                return "POSITIVE"
            else:
                return "NEGATIVE"
    
    mlp = SentimentNetwork(reviews[:-1000],labels[:-1000],learning_rate=0.001)
    mlp.train(reviews[:-1000],labels[:-1000])
    mlp.test(reviews[-1000:],labels[-1000:])
    View Code

    某一层w梯度 = 输入.T * ((后一层delta * 后一层权重.T == error)* 激活函数导数 ==这一层delta)* 学习速率

  • 相关阅读:
    (转)消息队列 Kafka 的基本知识及 .NET Core 客户端
    Neo4j学习笔记
    科技论文推荐系统
    下载pubmed数据
    杂项
    Scrapy 知乎验证码
    Scrapy 爬取网站文章
    爬虫基础知识
    Django linux uWsgi Nginx 部署
    DocumentSimilarity
  • 原文地址:https://www.cnblogs.com/jackzone/p/8413745.html
Copyright © 2011-2022 走看看