zoukankan      html  css  js  c++  java
  • 电影评论分类:二分类问题

     1 ##数据预处理
     2 #加载IMDB数据集
     3 from keras.datasets import imdb
     4 (train_data, train_labels),(test_data, test_labels) = imdb.load_data(num_words = 10000)
     5 word_index = imdb.get_word_index()
     6 reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
     7 decoded_review = ''.join(
     8 [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
     9 )
    10 decoded_review  #将评论解码
    11 import numpy as np
    12 def vectorize_sequences(sequences, dimension = 10000):
    13     result = np.zeros((len(sequences), dimension))
    14     for i ,sequence in enumerate(sequences):
    15         result[i, sequence] = 1
    16     return result
    17 x_train = vectorize_sequences(train_data)
    18 x_test = vectorize_sequences(test_data)
    19 ##将标签向量化
    20 y_train = np.asarray(train_labels).astype('float32')
    21 y_test = np.asarray(test_labels).astype('float32')
    22 
    23 from keras import models
    24 from keras import layers
    25 #定义模型
    26 model = models.Sequential()
    27 model.add(layers.Dense(16, activation = 'relu', input_shape = (10000,) ))
    28 model.add(layers.Dense(16, activation = 'relu'))
    29 model.add(layers.Dense(1, activation = 'sigmoid'))
    30 #编译模型
    31 model.compile(optimizer = 'rmsprop',
    32               loss = 'binary_crossentropy',
    33               metrics = ['accuracy'])
    34 #配置优化器
    35 from keras import optimizers
    36 model.compile(optimizer = optimizers.RMSprop(lr = 0.001),
    37               loss = 'binary_crossentropy',
    38               metrics = ['accuracy'])
    39 ##使用自定义的损失和指标
    40 from keras import losses
    41 from keras import metrics
    42 model.compile(optimizer = optimizers.RMSprop(lr = 0.001),
    43               loss = losses.binary_crossentropy,
    44               metrics = [metrics.binary_accuracy])
    45 ##留出验证集
    46 x_val = x_train[:10000]
    47 partial_x_train = x_train[10000:]
    48 y_val = y_train[:10000]
    49 partial_y_train = y_train[10000:]
    50 
    51 #训练模型
    52 model.compile(optimizer = 'rmsprop',
    53               loss = 'binary_crossentropy',
    54               metrics = ['acc'])
    55 history = model.fit(partial_x_train,
    56                     partial_y_train,
    57                     epochs = 20,
    58                     batch_size = 512,
    59                     validation_data = (x_val, y_val))
    60 ##调用model.fit()返回一个history对象
    61 history_dict = history.history
    62 history_dict.keys()
    63 
    64 ##绘制训练损失和验证损失
    65 %matplotlib inline
    66 import matplotlib.pyplot as plt
    67 history_dict = history.history
    68 loss_values = history_dict['loss']
    69 val_loss_values = history_dict['val_loss']
    70 epochs = range(1, len(loss_values) + 1)
    71 plt.plot(epochs, loss_values, 'bo', label = 'Training loss')
    72 plt.plot(epochs, val_loss_values, 'b', label = 'Validation loss')
    73 plt.title('Training and Validation loss')
    74 plt.xlabel('Epochs')
    75 plt.ylabel('Loss')
    76 plt.legend()
  • 相关阅读:
    自学Linux Shell2.1-进入shell命令行
    自学Linux Shell1.3-Linux文件系统
    自学Linux Shell1.2-Linux目录结构
    自学Linux Shell1.1-Linux初识
    03 自学Aruba之2.4GHz及5GHz无线信道
    02 自学Aruba之无线频段---ISM频段及UNII频段
    01 自学Aruba之功率单位和相对单位
    1.Zabbix报错信息:It probably means that the systems requires more physical memory.
    自学Aruba5.3.4-Aruba安全认证-有PEFNG 许可证环境的认证配置802.1x
    自学Aruba5.3.3-Aruba安全认证-有PEFNG 许可证环境的认证配置Captive-Portal
  • 原文地址:https://www.cnblogs.com/wangmengzhu/p/10748343.html
Copyright © 2011-2022 走看看