zoukankan      html  css  js  c++  java
  • RNN与情感分类问题实战-加载IMDB数据集

    Sentiment Analysis

    44-RNN与情感分类问题实战-加载IMDB数据集-情感分析.jpg

    Two approaches

    • SimpleRNNCell

      • single layer

      • multi-layers

    • RNNCell

    Single layer

    import os
    import tensorflow as tf
    import numpy as np
    from tensorflow import keras
    from tensorflow.keras import layers
    
    tf.random.set_seed(22)
    np.random.seed(22)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    
    batchsz = 128
    
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train,
     y_train), (x_test,
                y_test) = keras.datasets.imdb.load_data(num_words=total_words)
    # x_train:[b, 80]
    # x_test: [b, 80]
    x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                         maxlen=max_review_len)
    x_test = keras.preprocessing.sequence.pad_sequences(x_test,
                                                        maxlen=max_review_len)
    
    db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
    db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    db_test = db_test.batch(batchsz, drop_remainder=True)
    print('x_train shape:', x_train.shape, tf.reduce_max(y_train),
          tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(keras.Model):
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            # [b, 64]
            self.state0 = [tf.zeros([batchsz, units])]
            self.state1 = [tf.zeros([batchsz, units])]
    
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = layers.Embedding(total_words,
                                              embedding_len,
                                              input_length=max_review_len)
    
            # [b, 80, 100] , h_dim: 64
            # RNN: cell1 ,cell2, cell3
            # SimpleRNN,units=64表示100个向量转成64个初始的状态
            self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
            self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
    
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.outlayer = layers.Dense(1)
    
        def call(self, inputs, training=None):
            """
            net(x) net(x, training=True) :train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :return:
            """
            # [b, 80]
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # rnn cell compute
            # [b, 80, 100] => [b, 64]
            state0 = self.state0
            state1 = self.state1
            for word in tf.unstack(x, axis=1):  # word: [b, 100]
                # h1 = x*wxh+h0*whh
                # out0: [b, 64]
                out0, state0 = self.rnn_cell0(word, state0, training)
                # out1: [b, 64]
                out1, state1 = self.rnn_cell1(out0, state1, training)
    
            # out: [b, 64] => [b, 1]
            x = self.outlayer(out1)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
    
        model = MyRNN(units)
        model.compile(optimizer=keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])
        model.fit(db_train, epochs=epochs, validation_data=db_test)
    
        model.evaluate(db_test)
    
    
    if __name__ == '__main__':
        main()
    

    Multi-layers

    import os
    import tensorflow as tf
    import numpy as np
    from tensorflow import keras
    from tensorflow.keras import layers
    
    tf.random.set_seed(22)
    np.random.seed(22)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    
    batchsz = 128
    
    # the most frequest words
    total_words = 10000  # 编码10000个单词
    max_review_len = 80  # 句子长度80
    embedding_len = 100
    (x_train,
     y_train), (x_test,
                y_test) = keras.datasets.imdb.load_data(num_words=total_words)
    # x_train:[b, 80]
    # x_test: [b, 80]
    x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                         maxlen=max_review_len)
    x_test = keras.preprocessing.sequence.pad_sequences(x_test,
                                                        maxlen=max_review_len)
    
    db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    # drop_remainder,丢弃最后一个大小不合适的batch
    db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
    db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    db_test = db_test.batch(batchsz, drop_remainder=True)
    print('x_train shape:', x_train.shape, tf.reduce_max(y_train),
          tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(keras.Model):
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]  # embedding_len=100表示一个单词为100的向量
            self.embedding = layers.Embedding(total_words,
                                              embedding_len,
                                              input_length=max_review_len)
    
            # [b, 80, 100] , h_dim: 64
            self.rnn = keras.Sequential([
                layers.SimpleRNN(units,
                                 dropout=0.5,
                                 return_sequences=True,
                                 unroll=True),
                layers.SimpleRNN(units, dropout=0.5, unroll=True)
            ])
    
            # fc, [b, 80, 100] => [b, 64] => [b, 1] # 得到分类结果
            self.outlayer = layers.Dense(1)
    
        def call(self, inputs, training=None):
            """
            net(x) net(x, training=True) :train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training: 计算过程是train还是test
            :return:
            """
            # [b, 80]
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # rnn cell compute
            # x: [b, 80, 100] => [b, 64]
            x = self.rnn(x)
    
            # out: [b, 64] => [b, 1]
            x = self.outlayer(x)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
    
        model = MyRNN(units)
        model.compile(optimizer=keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])
        model.fit(db_train, epochs=epochs, validation_data=db_test)
    
        model.evaluate(db_test)
    
    
    if __name__ == '__main__':
        main()
    
  • 相关阅读:
    gcc链接g++编译生成的静态库和动态库的makefile示例
    gcc将多个静态库链接成一个静态库
    linux c redirect 重定向
    iOS开发如何提高
    致PHP路上的“年轻人”
    显示系统时间--带有秒数
    在 Linux 中使用日志来排错
    程序员必备:技术面试准备手册
    你的Java代码对JIT编译友好么?
    悟空:用Go语言编写的全文搜索引擎
  • 原文地址:https://www.cnblogs.com/nickchen121/p/10963848.html
Copyright © 2011-2022 走看看