zoukankan      html  css  js  c++  java
  • Transfomer+CNN+Loss

    from tensorflow.keras.callbacks import EarlyStopping
    import tensorflow as tf
    import time
    import numpy as np
    import matplotlib.pyplot as plt
    import sys
    from tensorflow import keras
    import os
    from tensorflow import nn
    import math
    
    #   设置相关底层配置
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 使用第2块gpu
    
    def my_loss(y_true, y_pred):
        sub = y_true - y_pred
        sub = tf.square(sub)
    
        #   分段计算loss
        sub = tf.where(sub< 25., sub* 0.5, sub)
        sub = tf.where(y_true > 30., sub * 2, sub)
        # sub = tf.where(y_pred < 0., sub * 100, sub)
        return_data = sub
        # print('***********************return_data.shape:{}'.format(return_data.shape))
    
        return return_data
    def positional_encoding(pos, d_model):
        '''
        :param pos: 词在句子中的位置,句子上的维族;(i是d_model上的维度)
        :param d_model: 隐状态的维度,相当于num_units
        :return: 位置编码 shape=[1, position_num, d_model], 其中第一个维度是为了匹配batch_size
        '''
        def get_angles(position, i):
            # 这里的i相当于公式里面的2i或2i+1
            # 返回shape=[position_num, d_model]
            return position / np.power(10000., 2. * (i // 2.) / np.float(d_model))
    
        angle_rates = get_angles(np.arange(pos)[:, np.newaxis],
                                 np.arange(d_model)[np.newaxis, :])
        # 2i位置使用sin编码,2i+1位置使用cos编码
        pe_sin = np.sin(angle_rates[:, 0::2])
        pe_cos = np.cos(angle_rates[:, 1::2])
        pos_encoding = np.concatenate([pe_sin, pe_cos], axis=-1)
        pos_encoding = tf.cast(pos_encoding[np.newaxis, ...], tf.float32)
        return pos_encoding
    
    '''*************** 第一部分: Scaled dot-product attention ***************'''
    def my_mask(inputs):
        # print('inputs:',inputs.shape)
        # print('mask前:',inputs)
        s_dim = inputs.shape[-1]
        mask = tf.ones((s_dim, s_dim))  # (T_q, T_k)
        # 这一句的意思是生成一个上三角矩阵,上三角矩阵用来对decoder的结果进行mask
        mask = tf.linalg.LinearOperatorLowerTriangular(mask).to_dense()
        padding_num = -2 ** 32 + 1.1
        mask_data = tf.multiply(mask, inputs)
        # print('mask_data:',float(mask_data))
        outputs = tf.where(tf.equal(mask_data, 0.), padding_num, mask_data)
        # so_data = tf.nn.softmax(outputs,axis=1)
        # print('mask后:',  outputs)
        # sys.exit(2)
        return outputs
    
    def scaled_dot_product_attention(q, k, v, mask = None):
        '''attention(Q, K, V) = softmax(Q * K^T / sqrt(dk)) * V'''
        # query 和 Key相乘
        # print('q:',q.shape,q)
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        # print('matmul_qk:',matmul_qk.shape,matmul_qk)
        # print('完毕')
        # 使用dk进行缩放
        dk = tf.cast(tf.shape(q)[-1], tf.float32)
        scaled_attention =matmul_qk / tf.math.sqrt(dk)
        # 掩码mask
        if mask is not None:
            print('有mask')
            scaled_attention = my_mask(scaled_attention)
        # 通过softmax获取attention权重, mask部分softmax后为0
        attention_weights = tf.nn.softmax(scaled_attention)  # shape=[batch_size, seq_len_q, seq_len_k]
        # print('attention_weights:',attention_weights)
        # 乘以value
        outputs = tf.matmul(attention_weights, v)  # shape=[batch_size, seq_len_q, depth]
        return outputs, attention_weights
    
    '''*************** 第二部分: Multi-Head Attention ***************'''
    '''
    multi-head attention包含3部分: - 线性层与分头 - 缩放点积注意力 - 头连接 - 末尾线性层
    每个多头注意块有三个输入; Q(查询),K(密钥),V(值)。 它们通过第一层线性层并分成多个头。
    注意:点积注意力时需要使用mask, 多头输出需要使用tf.transpose调整各维度。
    Q,K和V不是一个单独的注意头,而是分成多个头,因为它允许模型共同参与来自不同表征空间的不同信息。
    在拆分之后,每个头部具有降低的维度,总计算成本与具有全维度的单个头部注意力相同。
    '''
    class MultiHeadAttention(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads):
            super(MultiHeadAttention, self).__init__()
            self.num_heads = num_heads
            self.d_model = d_model
            # d_model必须可以正确分成多个头
            assert d_model % num_heads == 0
            # 分头之后维度
            self.depth = d_model // num_heads
            self.wq = tf.keras.layers.Dense(d_model)
            self.wk = tf.keras.layers.Dense(d_model)
            self.wv = tf.keras.layers.Dense(d_model)
            self.dense = tf.keras.layers.Dense(d_model)
    
        def split_heads(self, x, batch_size):
            # 分头,将头个数的维度,放到seq_len前面 x输入shape=[batch_size, seq_len, d_model]
            # print('split前.shape:',x.shape)
            x = tf.reshape(x, [batch_size, -1, self.num_heads, self.depth])
            # print('split后.shape:', x.shape)
            return tf.transpose(x, perm=[0, 2, 1, 3])
    
    
    
        def call(self, q, k, v, mask = None):
            # print('-----------------------------    multi   ---------------------------------------')
            # print('multil_q:',q.shape,q)
            # print('********************************   multi   ************************************')
            batch_size = tf.shape(q)[0]
            # 分头前的前向网络,根据q,k,v的输入,计算Q, K, V语义
            q = self.wq(q)  # shape=[batch_size, seq_len_q, d_model]
            k = self.wq(k)
            v = self.wq(v)
            # 分头
            q = self.split_heads(q, batch_size)  # shape=[batch_size, num_heads, seq_len_q, depth]
            k = self.split_heads(k, batch_size)
            v = self.split_heads(v, batch_size)
            # 通过缩放点积注意力层
            # scaled_attention shape=[batch_size, num_heads, seq_len_q, depth]
            # attention_weights shape=[batch_size, num_heads, seq_len_q, seq_len_k]
            scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
            # 把多头维度后移
            scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # shape=[batch_size, seq_len_q, num_heads, depth]
            # 把多头合并
            concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # shape=[batch_size, seq_len_q, d_model]
            # 全连接重塑
            output = self.dense(concat_attention)
            return output, attention_weights
    
    class LayerNormalization(tf.keras.layers.Layer):
        def __init__(self, epsilon=1e-8, **kwargs):
            super(LayerNormalization, self).__init__(**kwargs)
            self.epsilon = epsilon
        def build(self, input_shape):
            self.gamma = self.add_weight(name='gamma',
                                         shape=input_shape[-1:],
                                         initializer=tf.ones_initializer(),
                                         trainable=True)
            self.beta = self.add_weight(name='beta',
                                        shape=input_shape[-1:],
                                        initializer=tf.zeros_initializer(),
                                        trainable=True)
            super(LayerNormalization, self).build(input_shape)
        def call(self, x): # x shape=[batch_size, seq_len, d_model]
            mean = tf.keras.backend.mean(x, axis=-1, keepdims=True)
            std = tf.keras.backend.std(x, axis=-1, keepdims=True)
            return self.gamma * (x - mean) / (std + self.epsilon) + self.beta
    
    def point_wise_feed_forward(d_model, diff):
        return tf.keras.Sequential([
            tf.keras.layers.Dense(diff, activation=tf.nn.relu),
            tf.keras.layers.Dense(d_model)
        ])
    '''encoder layer:
    每个编码层包含以下子层 - Multi-head attention(带掩码) - Point wise feed forward networks
    每个子层中都有残差连接,并最后通过一个正则化层。残差连接有助于避免深度网络中的梯度消失问题。 
    每个子层输出是LayerNorm(x + Sublayer(x)),规范化是在d_model维的向量上。Transformer一共有n个编码层。
    '''
    class EncoderLayer(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
            super(EncoderLayer, self).__init__()
            self.mha = MultiHeadAttention(d_model, num_heads)
            self.ffn = point_wise_feed_forward(d_model, dff)
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, training):
            # multi head attention (encoder时Q = K = V)
            att_output, _ = self.mha(inputs, inputs, inputs)
            att_output = self.dropout1(att_output, training=training)
            output1 = self.layernorm1(inputs + att_output)  # shape=[batch_size, seq_len, d_model]
            # feed forward network
            ffn_output = self.ffn(output1)
            ffn_output = self.dropout2(ffn_output, training=training)
            output2 = self.layernorm2(output1 + ffn_output)  # shape=[batch_size, seq_len, d_model]
            return output2
    
    class Encoder(tf.keras.layers.Layer):
        def __init__(self, d_model, num_layers, num_heads, dff,
                   max_seq_len, dropout_rate=0.1):
            super(Encoder, self).__init__()
            self.indata = tf.keras.layers.Dense(d_model)
            self.num_layers = num_layers
            self.d_model = d_model
            # self.emb = tf.keras.layers.Embedding(5000, d_model)  # shape=[batch_size, seq_len, d_model]
            self.pos_encoding = positional_encoding(max_seq_len, d_model)  # shape=[1, max_seq_len, d_model]
            self.encoder_layer = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                                  for _ in range(num_layers)]
            self.dropout = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, training):
            # print('inputs.shape:',inputs.shape)
            # sys.exit(2)
            # 输入部分;inputs shape=[batch_size, seq_len]
            seq_len = inputs.shape[1]  # 句子真实长度
            # word_embedding = self.emb(inputs)  # shape=[batch_size, seq_len, d_model]
            # print('word_embedding .shape:',word_embedding .shape)
            # sys.exit(2)
            # word_embedding = self.indata(inputs)
            word_embedding = inputs
            word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
            emb= word_embedding + self.pos_encoding[:, :seq_len, :]
            x = self.dropout(emb, training=training)
            for i in range(self.num_layers):
                x = self.encoder_layer[i](x, training)
            return x  # shape=[batch_size, seq_len, d_model]
    
    class DecoderLayer(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
            super(DecoderLayer, self).__init__()
            self.mha1 = MultiHeadAttention(d_model, num_heads)
            self.mha2 = MultiHeadAttention(d_model, num_heads)
            self.ffn = point_wise_feed_forward(d_model, dff)
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            self.layernorm3 = LayerNormalization()
            self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout3 = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, encoder_out, training):
            # masked multi-head attention: Q = K = V
            # print('decode_inputs:',inputs)
            # sys.exit(2)
    
            att_out1, att_weight1 = self.mha1(inputs, inputs, inputs,mask = True)
    
            att_out1 = self.dropout1(att_out1, training=training)
            att_out1 = self.layernorm1(inputs + att_out1)
            # multi-head attention: Q=att_out1, K = V = encoder_out
            # print('-------------------------    mh2 ---------------------------------------')
            att_out2, att_weight2 = self.mha2(att_out1, encoder_out, encoder_out)
            # print('att_out2 :', att_out2)
            att_out2 = self.dropout2(att_out2, training=training)
            att_out2 = self.layernorm2(att_out1 + att_out2)
            # att_out2 = self.layernorm2(att_out2)
            # print('att_out2 :',att_out2 )
            # sys.exit(2)
            # feed forward network
            ffn_out = self.ffn(att_out2)
            ffn_out = self.dropout3(ffn_out, training=training)
            output = self.layernorm3(att_out2 + ffn_out)
            return output, att_weight1, att_weight2
    
    class Decoder(tf.keras.layers.Layer):
        def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
            super(Decoder, self).__init__()
            self.seq_len = tf.shape
            self.indata = tf.keras.layers.Dense(d_model)
            self.d_model = d_model
            self.num_layers = num_layers
            self.pos_encoding = positional_encoding(max_seq_len, d_model)
            self.decoder_layers = [DecoderLayer(d_model, num_heads, dff, dropout_rate)
                                   for _ in range(num_layers)]
            self.dropout = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, encoder_out, training):
            # print('---------------------------------    decode  --------------------------------')
            seq_len = inputs.shape[1]
            attention_weights = {}
            word_embedding = self.indata(inputs)
            # print('inputs:',inputs.shape)
            # print('self.pos_encoding[:, :seq_len, :]:',self.pos_encoding[:, :seq_len, :].shape)
            # word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
            # emb = word_embedding + self.pos_encoding[:, :seq_len, :]
            emb = word_embedding
            # print('emb:',emb)
            # sys.exit(2)
    
            x = self.dropout(emb, training=training)
            for i in range(self.num_layers):
                # print('i:',i)
                x, att1, att2 = self.decoder_layers[i](x, encoder_out, training)
                attention_weights['decoder_layer{}_att_w1'.format(i+1)] = att1
                attention_weights['decoder_layer{}_att_w2'.format(i + 1)] = att2
            return x, attention_weights
    
    def deinput_padding(seq_len,dim,batch_size):
        # print('pading_batch_size:',batch_size)
        zero = np.zeros((seq_len,dim))
        # print('zero:',zero.shape)
        zero[0,:] = 1
    
        zero = np.tile(zero,(batch_size,1,1))
        # print('zero:', zero, zero.shape)
        return zero
    
    #   超参数
    learn_rate = 2e-4
    # learn_rate = 2.0e-4
    epochs =200
    bat = 10
    pt = 120        #   patience
    # path = './8000_np_img.npz'
    path = './5000_np_img.npz'
    # path = './2000_np_img.npz'
    # path = './500_np_img.npz'
    #
    base_dim = 8
    mid_dim = 10
    dense_dim = 100
    time_list = [1,2,4,8,16,32]
    
    unit = 40
    def c_b(chanel,kernel_size,stride = 1 ,padding ='valid'):
        initial = tf.keras.initializers.TruncatedNormal(stddev=0.02)
        result = keras.Sequential([
            tf.keras.layers.Conv2D(chanel, kernel_size=kernel_size, strides=stride, padding=padding,
                                   kernel_initializer=initial),
            tf.keras.layers.BatchNormalization(),
            keras.layers.LeakyReLU()
        ])
        return result
    class Transformer(tf.keras.Model):
        def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
            super(Transformer, self).__init__()
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            # self.mycnn = tf.keras.Sequential([
            #     c_b(6, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(12, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(24, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(48, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(48, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #     tf.keras.layers.Flatten()
            # ])
            self.mycnn = tf.keras.Sequential([
                c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
                # # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
                c_b(base_dim * time_list[0], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[1], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[2], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[3], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[4], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[5], [3, 3], stride=2, padding='SAME'),
    
                # tf.keras.layers.Flatten()
    
                tf.keras.layers.GlobalAveragePooling2D()
            ])
    
            self.myDense1 = tf.keras.Sequential([
                # tf.keras.layers.Dense(10, activation='relu'),
                tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
                tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
    
                tf.keras.layers.Dense(d_model)
                # tf.keras.layers.Dense(3)
            ])
            self.encoder = Encoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
            self.emb = tf.keras.layers.Dense(d_model)
            # self.decoder = Decoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
            self.dim_dense = tf.keras.layers.Dense(1)
            self.final_layer = tf.keras.layers.Dense(1)
            self.flat = tf.keras.layers.Flatten()
        def call(self, inputs):
            inputs = tf.cast(inputs, dtype=tf.float32)
            inputs = tf.reshape(inputs, (-1, 15, 101, 101, 1))
    
            inputs = tf.keras.layers.TimeDistributed(self.mycnn)(inputs)
    
            # print('out.shape:', out.shape,out[0,0,:5])
            out = tf.keras.layers.TimeDistributed(self.myDense1)(inputs)
            inputs = out
            # sys.exit(2)
            inputs = self.layernorm1(inputs)
            inputs = self.emb(inputs)
            print('trains_inputs:',inputs)
            # print('inputs.shape:', inputs.shape)
            # sys.exit(2)
            # 首先encoder过程,输出shape=[batch_size, seq_len_input, d_model]
            inputs = self.layernorm2(inputs)
            print('layerhoutrains_inputs:', inputs)
            encoder_output = self.encoder(inputs)
            # 再进行decoder, 输出shape=[batch_size, seq_len_target, d_model]
            encoder_output = self.dim_dense(encoder_output)
            print('encoder_output:',encoder_output.shape)
            encoder_output = self.flat(encoder_output)
            print('encoder_output:', encoder_output.shape)
            final_out = self.final_layer(encoder_output)
            # sys.exit(2)
    
    
            #######################################      Decode      ################################
            # decode_input = deinput_padding(s_dim,in_dim,bat_size)
            #                                             #   (inputs, encoder_out, training)
            # # print(' decode_input ', decode_input )
            # # print('encoder_output:',encoder_output.shape)
            # # print('decode_input:',decode_input.shape)
            # decoder_output, att_weights = self.decoder(decode_input, encoder_output, True)
            # # print('decoder_output:',decoder_output)
            # # sys.exit(2)
            # # 最后映射到输出层
            # final_out = self.final_layer(decoder_output) # shape=[batch_size, seq_len_target, target_vocab_size]
            # # print('final_out:',final_out.shape)
            # final_out = final_out[:,-2,:]
            # # print('final_out:', final_out.shape)
            # #######################################      Decode      ################################
            return final_out
    
    # transformer测试
    # sample_transformer = Transformer(num_layers=2, d_model=8, num_heads=4, dff=200, max_seq_len=30)
    # temp_input = tf.random.uniform((10,30, 100))
    # trana_out = sample_transformer(temp_input)
    # print('trana_out.shape:',trana_out.shape)
    # print(trana_out)
    # sys.exit(2)
    
    
    #   加载数据
    
    def split_data(x_data,y_data,amount):
        all_sample = x_data.shape[0]
        indices = np.random.permutation(all_sample)
        indices = list(indices)
        print('indices:',indices,type(indices))
        split_index = int(all_sample*amount)
        train_x= x_data[indices[:split_index ]]
        train_y = y_data[indices[:split_index ]]
    
        test_x = x_data[indices[split_index :]]
        test_y = y_data[indices[split_index :]]
    
        return train_x,train_y,test_x,test_y
    
    data = np.load(path)
    timelength = 15
    #   Img_data=result,Samid_data = result_id,Rain_data = result_rain
    train_imgs = data['Img_data'].astype(np.float32)         #   imgs.shape: (500, 15, 4, 101, 101)
    # train_imgs = np.reshape(train_imgs[:,14,:,:,:],(-1,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
    train_imgs = np.reshape(train_imgs[:,15-timelength:,:,:,:],(-1,timelength,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
    print('train_imgs.shape:',train_imgs.shape)
    
    
    # sys.exit(2)
    #   将img归一化到[0,1]
    train_imgs = train_imgs / 255.
    train_rain = data['Rain_data'].astype(np.float32)      #   rain.shape: (5000, 1)
    train_rain = np.reshape(train_rain,(-1,1))
    
    train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.8)
    
    #   分割出小雨中雨大雨类型
    type_train = np.where(train_rain > 15,2,1)
    type_train = np.where(train_rain < 5 ,0,type_train)
    
    #   类型占比的数据字典
    num_dict = {}
    for i in type_train:
        raintype = i[0]
        if raintype in num_dict.keys():
            num_dict[raintype] += 1
        else:
            num_dict[raintype] = 0
        # print(i)
    
    for key,value in num_dict.items():
        value = value / train_imgs.shape[0]
        num_dict[key] = value
        print('key:{},value:{}'.format(key,value))
    print('num_dict[0]:',num_dict[0])
    # sys.exit(2)
    
    
    onehot_train = tf.one_hot(type_train,depth=3)
    onehot_train = tf.reshape(onehot_train,(-1,3))
    #   获得每个高度的img,返回的字典key:['high0','high1','high2']   每个value的value.shape: (b, 101, 101)
    def get_high_img(or_img):
        high_dic = {}
        for i in range(4):
            high_key = 'high{}'.format(i)
            # high_img = or_img[:,i,:,:]
            # high_dic[high_key] = np.reshape(high_img,(-1,101,101))
            high_img = or_img[:, :, i, :, :]
            high_dic[high_key] = np.reshape(high_img, (-1, timelength, 101, 101))
            print('high_key:',high_key)
        return high_dic
    
    which_high = 'high3'
    train_high_dic = get_high_img(train_imgs)
    train_high0_img = train_high_dic[which_high]
    
    print('train_high0_img.shape:',train_high0_img.shape)
    print('high0_img max:{},min:{}'.format(np.max(train_high0_img),np.min(train_high0_img)))
    
    test_high_dic = get_high_img(test_imgs)
    test_high0_img = test_high_dic[which_high]
    
    train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).batch(bat)
    # train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).repeat()
    test_db = tf.data.Dataset.from_tensor_slices((test_high0_img, test_rain)).batch(bat)
    
    early_stoping = EarlyStopping(monitor='val_loss',patience=pt)
    opt = tf.keras.optimizers.Adam(lr=learn_rate,clipnorm=0.1)
    # opt = tf.keras.optimizers.Adam(lr=learn_rate)
    # opt = tf.keras.optimizers.SGD(lr=learn_rate)
    # opt = tf.keras.optimizers.RMSprop(lr=learn_rate)
    my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
    # my_model = My_ConvLSTM(unit)
    
    ###################   加载模型    ####################################
    # model_name = './my_save_model/trans_model_LOSS1/transmodel_LOSS1.ckpt'
    # my_model.load_weights(model_name)
    ###################   加载模型    ####################################
    # my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
    my_model.compile(optimizer=opt,loss=my_loss)
    
    # my_model.fit(train_high0_img,train_rain,validation_data=(test_high0_img,test_rain),epochs=epochs, validation_freq=1,batch_size=bat)
    my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,callbacks=early_stoping)
    # my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,steps_per_epoch=train_imgs.shape[0]//bat)
    
    
    #   保存模型
    model_name = './my_save_model/trans_model_LOSS_3/transmodel_LOSS_3.ckpt'
    print('Save_model_name',model_name)
    # model_name = 'my_model1.ckpt'
    my_model.save_weights(model_name)
    print('保存完成')
    del (my_model)
    # #   加载模型
    # my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
    # model_name = model_name
    # my_model.load_weights(model_name)
    # my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
    # print('加载完成')
    # my_model.evaluate(test_db)
  • 相关阅读:
    scikit-learn与数据预处理
    TensorFlow2.0(11):tf.keras建模三部曲
    机器学习回顾篇(11):支持向量机(SVM)
    机器学习回顾篇(10):感知机模型
    TensorFlow2.0(10):加载自定义图片数据集到Dataset
    机器学习回顾篇(9):K-means聚类算法
    FRP代理及其在数据库安全上的实践
    Elasticsearch系列---实战零停机重建索引
    Elasticsearch系列---索引管理
    Elasticsearch系列---搜索执行过程及scroll游标查询
  • 原文地址:https://www.cnblogs.com/cxhzy/p/14713972.html
Copyright © 2011-2022 走看看