zoukankan      html  css  js  c++  java
  • keras学习笔记(4)

    1.gpu配置

    import tensorflow as tf
    
    #当前程序可用GPU
    gpu=tf.config.experimental.list_physical_devices(device_type='GPU')
    cpu=tf.config.experimental.list_physical_devices(device_type='CPU')
    
    gpu
    
    cpu
    
    #设置可用gpu
    tf.config.experimental.set_visible_devices(devices=gpu[0:2],device_type='GPU')
    
    #当前程序只可见部分gpu的另一种方法
    import os
    os.environ['CUDA_VISIBLE_DEVICES']='2,3'
    
    #设置gpu动态分配显存
    gpus=tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(device=gpu,Tue)
    
    #设置gpu只用部分显存
    gpus=tf.config.experimental.list_physical_devices(device_type='GPU')
    tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
    

     2.#图像语义分割FCN

    #1.全卷积网络FCN
    import tensorflow as tf
    import matplotlib.pyplot as plt
    import numpy as np
    import os
    import glob
    
    os.listdir('./annotations/trimaps')[-5:]
    
    img=tf.io.read_file('./annotations/trimaps/Abyssinian_1.png')
    
    img=tf.image.decode_png(img)
    
    img.shape
    
    img=tf.squeeze(img)#将维度为1的维度压缩掉,缩小为度
    
    img.shape
    
    plt.imshow(img)
    
    #看标签图像有几种像素
    np.unique(img.numpy())
    
    img=tf.io.read_file('./images/Abyssinian_1.jpg')
    
    img=tf.image.decode_jpeg(img)
    
    plt.imshow(img)
    
    #开始整理数据
    images=glob.glob('./images/*.jpg')
    
    images_count=len(images)
    
    images_count
    
    anno=glob.glob('./annotations/trimaps/*.png')
    
    len(anno)
    
    np.random.seed(2019)
    index=np.random.permutation(images_count)
    
    images=np.array(images)[index]
    anno=np.array(anno)[index]#因为使用了同样的索引,所以还是一一对应的
    
    dataset=tf.data.Dataset.from_tensor_slices((images,anno))#里面是图像和标签的地址值,以元组形式传入
    
    test_count=int(images_count*0.2)
    train_count=images_count-test_count
    
    test_count,train_count
    
    data_train=dataset.skip(test_count)
    data_test=dataset.take(test_count)
    
    #图像预处理
    def read_jpg(path):
        img=tf.io.read_file(path)
        img=tf.image.decode_jpeg(img,channels=3)
        return img
    def read_png(path):
        img=tf.io.read_file(path)
        img=tf.image.decode_png(img,channels=1)
        return img
    
    def normal_img(input_image,input_anno):
        input_image=tf.cast(input_image,tf.float32)/127.5-1
        input_anno=input_anno-1#1,2,3>>>0,1,2
        return input_image,input_anno
    
    def load_images(input_image_path,input_anno_path):
        input_image=read_jpg(input_image_path)
        input_anno=read_png(input_anno_path)
        input_image=tf.image.resize(input_image,(224,224))
        input_anno=tf.image.resize(input_anno,(224,224))
        return normal_img(input_image,input_anno)
    
    data_train=data_train.map(load_images,num_parallel_calls=tf.data.experimental.AUTOTUNE)
    data_test=data_test.map(load_images,num_parallel_calls=tf.data.experimental.AUTOTUNE)
    
    BATCH_SIZE=8
    
    data_train=data_train.repeat().shuffle(100).batch(BATCH_SIZE)
    data_test=data_test.batch(BATCH_SIZE)
    
    data_train
    
    for img,anno in data_train.take(1):
        plt.subplot(1,2,1)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(img[0]))
        plt.subplot(1,2,2)
        plt.imshow(tf.keras.preprocessing.image.array_to_img(anno[0]))
    
    #使用预训练网络
    conv_base=tf.keras.applications.VGG16(weights='imagenet',input_shape=(224,224,3),include_top=False)
    
    conv_base.summary()
    
    conv_base.layers
    
    #使用name获取某一层
    conv_base.get_layer('block5_conv3')
    
    conv_base.get_layer('block5_conv3').output
    
    submodel=tf.keras.models.Model(inputs=conv_base.input,outputs=conv_base.get_layer('block5_conv3').output)#去掉随后一层的池化
    
    submodel.summary()
    
    layer_names=['block5_conv3','block4_conv3','block3_conv3','block5_pool']
    
    #多输出层的引出
    layers_output=[conv_base.get_layer(layer_name).output for layer_name in layer_names]
    
    multi_out_model=tf.keras.models.Model(inputs=conv_base.input,outputs=layers_output)
    
    multi_out_model.trainable=False
    
    multi_out_model.summary()
    
    inputs=tf.keras.layers.Input(shape=(224,224,3))
    out_block5_conv3,out_block4_conv3,out_block3_conv3,out=multi_out_model(inputs)
    
    
    out.shape,out_block5_conv3.shape,out_block4_conv3.shape,out_block3_conv3.shape
    
    #上采样(反卷积)
    x1=tf.keras.layers.Conv2DTranspose(512,3,strides=2,padding='same',activation='relu')(out)
    
    
    x1.shape
    
    x1=tf.keras.layers.Conv2D(512,3,padding='same',activation='relu')(x1)
    
    x1.shape
    
    x2=tf.add(x1,out_block5_conv3)
    
    x2.shape
    
    x2=tf.keras.layers.Conv2DTranspose(512,3,strides=2,padding='same',activation='relu')(x2)
    x2=tf.keras.layers.Conv2D(512,3,padding='same',activation='relu')(x2)
    x3=tf.add(x2,out_block4_conv3)
    
    x3.shape
    
    x3=tf.keras.layers.Conv2DTranspose(256,3,strides=2,padding='same',activation='relu')(x3)
    x3=tf.keras.layers.Conv2D(256,3,padding='same',activation='relu')(x3)
    x4=tf.add(x3,out_block3_conv3)
    
    x4.shape
    
    #上采样
    x5=tf.keras.layers.Conv2DTranspose(128,3,strides=2,padding='same',activation='relu')(x4)
    x5=tf.keras.layers.Conv2D(128,3,padding='same',activation='relu')(x5)
    prediction=tf.keras.layers.Conv2DTranspose(3,3,strides=2,padding='same',activation='softmax')(x5)
    
    prediction.shape
    
    model=tf.keras.models.Model(inputs=inputs,outputs=prediction)
    
    model.summary()
    
    #模型配置
    model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['acc'])
    
    #模型训练
    history=model.fit(data_train,epochs=3,steps_per_epoch=train_count//BATCH_SIZE,validation_data=data_test,validation_steps=test_count//BATCH_SIZE)
    
    num=3
    for image,mask in data_test.take(1):#取一个batch
        pred_mask=model.predict(image)
        pred_mask=tf.argmax(pred_mask,axis=-1)
        pred_mask=pred_mask[...,tf.newaxis]
        plt.figure(figsize=(10,10))
        for i in range(num):
            plt.subplot(num,3,i*num+1)
            plt.imshow(tf.keras.preprocessing.image.array_to_img(image[i]))
            plt.subplot(num,3,i*num+2)
            plt.imshow(tf.keras.preprocessing.image.array_to_img(mask[i]))
            plt.subplot(num,3,i*num+3)
            plt.imshow(tf.keras.preprocessing.image.array_to_img(pred_mask[i]))
    

     3.RNN预测航空公司评论

    #航空公司评论
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    import numpy as np
    import pandas as pd
    
    data=pd.read_csv('Tweets.csv')
    
    data.head()
    
    data=data[['airline_sentiment','text']]#取出这两列
    
    data.head()
    
    data.airline_sentiment.unique()
    
    data.airline_sentiment.value_counts()
    
    data_p=data[data.airline_sentiment=='positive']
    
    data_n=data[data.airline_sentiment=='negative']
    
    data_n=data_n.iloc[:len(data_p)]#使得正负样本数量一样多
    
    data=pd.concat([data_n,data_p])#将两种数据垂直方向堆叠
    
    data
    
    #乱序
    data.sample(len(data))
    
    #数据预处理:标签数字化
    data['review']=(data.airline_sentiment=='positive').astype(int)#标签数字化,并增加为新的一列review
    
    
    del data['airline_sentiment']#数字化后删除文字列
    
    data
    
    #单词向量化tf.keras.layers.Embedding
    
    
    #处理文本
    #单词向量化tf.keras.layers.Embedding
    import re 
    token=re.compile('[A-Za-z]+|[!?,.()]')#利用正则匹配出大小写字母和标点符号
    
    
    def reg_text(text):
        new_text=token.findall(text)
        new_text=[word.lower() for word in new_text]#单词全部小写化
        return new_text
    
    data['text']=data.text.apply(reg_text)
    
    word_set=set()#集合,遍历所有文本,提取每个单词
    for text in data.text:
        for word in text:
            word_set.add(word)
    
    max_word=1+len(word_set)#总共这么多单词
    
    word_list=list(word_set)#集合传换成列表,列表下表对应每个单词索引
    
    word_index=dict((word,word_list.index(word)+1) for word in word_list)#遍历,将单词索引和单词转换成字典格式,索引从1开始,因为等会填充用0
    
    word_index
    
    #每一个文本列表转换成数字向量
    data_ok=data.text.apply(lambda x:[word_index.get(word,0) for word in x])
    
    maxlen=max(len(x) for x in data_ok)#最长评论
    
    #填充
    data_ok=keras.preprocessing.sequence.pad_sequences(data_ok.values,maxlen)#
    
    data_ok.shape#(4726, 40)
    
    #搭建模型
    model=keras.Sequential()
    #embeding:把文本映射为密集向量
    model.add(layers.Embedding(max_word,50,input_length=maxlen))#1输入的word个数,要映射的密集向量的长度,每个语句的单词长度
    
    model.add(layers.LSTM(64))#隐藏单元个数64
    
    model.add(layers.Dense(1,activation='sigmoid'))
    
    model.summary()
    
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc'])
    
    model.fit(data_ok,data.review.values,epochs=5,batch_size=128,validation_split=0.2)#用0.2数据做验证集
    

     4.RNN北京空气污染序列预测

    from tensorflow import keras
    from tensorflow.keras import layers
    import pandas as pd 
    import numpy as np
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    data=pd.read_csv('./PRSA_data_2010.1.1-2014.12.31.csv')
    
    data.head()
    
    data.info()#数据信息
    
    data.tail()#数据后5条
    
    #因为是序列数据,所以剔除non值会出现断裂,所以填充值
    data.columns
    
    data['pm2.5'].isna().sum()#是否non值,返回布尔
    
    data=data.iloc[24:].fillna(method='ffill')#前面24个连续non数据可以剔除,后面的用前向填充
    
    data['pm2.5'].isna().sum()
    
    data.head()
    
    #提取出时间值,去掉多余列
    import datetime
    data['tm']=data.apply(lambda x: datetime.datetime(year=x['year'],month=x['month'],day=x['day'],hour=x['hour']),axis=1)#按行计算
    data.drop(columns=['year','month','day','hour','No'],inplace=True)
    
    data=data.set_index('tm')#将时间列设置为索引
    
    #cbwd风向,化成数字形式》独热编码
    data.cbwd.unique()
    
    data=data.join(pd.get_dummies(data.cbwd))
    del data['cbwd']
    
    data
    
    seq_length=5*24#以之前多久的数据作为记忆值来进行预测,这里是前5天的值
    delay=24#要预测之后多久的值#预测24小时之后的值,延迟值
    
    data_=[]
    for i in range(len(data)-seq_length-delay):#依次取出一段数据,最后的不够训练与预测,就不取
        data_.append(data.iloc[i:i+seq_length+delay])
    
    data_[0].shape#(144, 11)
    
    data_=np.array([df.values for df in data_])
    
    data_.shape
    
    #将拿到的一段一段的数据作乱序处理
    np.random.shuffle(data_)
    
    x=data_[:,:5*24,:]
    
    y=data_[:,-1,0]#预测这段数据的最后一个数据的第一个属性(PM2.5)
    
    x.shape
    
    y.shape
    
    #划分训练集与测试集数据
    split_b=int(data_.shape[0]*0.8)
    
    train_x=x[:split_b]
    train_y=y[:split_b]
    test_x=x[split_b:]
    test_y=y[split_b:]
    
    train_x.shape,train_y.shape,test_x.shape,test_y.shape#((34924, 120, 11), (34924,), (8732, 120, 11), (8732,))
    
    #数据标准化
    mean=train_x.mean(axis=0)#对列求均值
    std=train_x.std(axis=0)
    train_x=(train_x-mean)/std
    test_x=(test_x-mean)/std
    
    #建立模型
    batch_size=128
    model=keras.Sequential()
    model.add(layers.Flatten(input_shape=(train_x.shape[1:])))#做flatten会破坏序列性,此模型不适合做序列预测
    model.add(layers.Dense(32,activation='relu'))
    model.add(layers.Dense(1))
    
    model.compile(optimizer='adam',loss='mse',metrics=['mae'])
    
    history=model.fit(train_x,train_y,batch_size=batch_size,epochs=10,validation_data=(test_x,test_y))
    
    history.history.keys()
    
    import matplotlib.pyplot as plt
    %matplotlib inline
    plt.plot(history.epoch,history.history['val_mae'],c='r')
    plt.plot(history.epoch,history.history['mae'],c='b')
    
    #使用LSTM网络做序列预测
    model=keras.Sequential()
    #return_sequence=True时,LSTM网络可以堆叠,这是把120次每次的输出都再输入到下一个网络,否则,输出只有最后一次的值的输出
    model.add(layers.LSTM(32,input_shape=(120,11),return_sequence=True))#1batch大小即隐藏单元数,2序列大小,3每个序列的特征值个数,默认激活‘tanh'
    model.add(layers.LSTM(32,return_sequence=True))
    model.add(layers.LSTM(32))#最后一层LSTM只要最后的输出
    
    model.add(layers.Dense(1))
    
    #利用回调函数,在训练中降低学习率
    lr_reduce=keras.callbacks.ReduceLROnPlateau('val_loss',patience=3,factor=0.5,min_lr=0.000001)#'val_loss'三次不降低,则学习率*0.5,最小只能降低到min_lr
    
    model.compile(optimizer='adam',loss='mse',metrics=['mae'])
    history=model.fit(train_x,train_y,batch_size=batch_size,epochs=10,callbacks=[lr_reduce],validation_data=(test_x,test_y))
    
    import matplotlib.pyplot as plt
    %matplotlib inline
    plt.plot(history.epoch,history.history['val_mae'],c='r')
    plt.plot(history.epoch,history.history['mae'],c='b')
    
    #使用模型预测与评价
    model.evaluate(test_x,test_y,verbose=0)
    
    pre_test=model.predict(test_x)
    
    test_x.shape,pre_test.shape
    
    pre_test[:5]#前5条预测结果
    
    #预测最后一次数据,单数据预测
    data_test=data[-120:]
    
    data_test.shape#(120, 11),要扩展维度,前面有个batch
    
    data_test=np.expand_dims(data_test,0)
    
    data_test.shape#(1, 120, 11)
    
    model.predict(data_test)
    

      

  • 相关阅读:
    【转】Storm并行度详解
    Storm 集群安装配置
    【原】storm源码之storm代码结构【译】
    Storm中-Worker Executor Task的关系
    Storm源码分析--Nimbus-data
    storm配置
    Nimbus<三>Storm源码分析--Nimbus启动过程
    Nimbus<二>storm启动nimbus源码分析-nimbus.clj
    Twitter Storm源代码分析之Nimbus/Supervisor本地目录结构
    linux 相关学习记录
  • 原文地址:https://www.cnblogs.com/Turing-dz/p/13193932.html
Copyright © 2011-2022 走看看