zoukankan      html  css  js  c++  java
  • Transformer+CNN+Smote

    import tensorflow as tf
    import time
    import numpy as np
    import matplotlib.pyplot as plt
    import sys
    from tensorflow import keras
    import os
    from tensorflow import nn
    import math
    import random
    from sklearn.neighbors import NearestNeighbors
    from tensorflow.keras.callbacks import EarlyStopping
    
    #   设置相关底层配置
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    # path = '../8000_np_img.npz'
    path = '../5000_np_img.npz'
    # path = '../2000_np_img.npz'
    # path = '../500_np_img.npz'
    def split_data(x_data,y_data,amount):
        all_sample = x_data.shape[0]
        indices = np.random.permutation(all_sample)
        indices = list(indices)
        print('indices:',indices,type(indices))
        split_index = int(all_sample*amount)
        train_x= x_data[indices[:split_index ]]
        train_y = y_data[indices[:split_index ]]
    
        test_x = x_data[indices[split_index :]]
        test_y = y_data[indices[split_index :]]
    
        return train_x,train_y,test_x,test_y
    
    data = np.load(path)
    timelength = 15
    #   Img_data=result,Samid_data = result_id,Rain_data = result_rain
    train_imgs = data['Img_data'].astype(np.float32)         #   imgs.shape: (500, 15, 4, 101, 101)
    # train_imgs = np.reshape(train_imgs[:,14,:,:,:],(-1,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
    train_imgs = np.reshape(train_imgs[:,15-timelength:,:,:,:],(-1,timelength,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
    print('train_imgs.shape:',train_imgs.shape)
    
    # sys.exit(2)
    #   将img归一化到[0,1]
    train_imgs = train_imgs / 255.
    train_rain = data['Rain_data'].astype(np.float32)      #   rain.shape: (5000, 1)
    train_rain = np.reshape(train_rain,(-1,1))
    
    # train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.8)
    train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.9999)
    
    #   分割出小雨中雨大雨类型
    type_train = np.where(train_rain > 15,2,1)
    type_train = np.where(train_rain < 5 ,0,type_train)
    
    #   类型占比的数据字典
    num_dict = {}
    for i in type_train:
        raintype = i[0]
        if raintype in num_dict.keys():
            num_dict[raintype] += 1
        else:
            num_dict[raintype] = 0
        # print(i)
    
    for key,value in num_dict.items():
        value = value / train_imgs.shape[0]
        num_dict[key] = value
        print('key:{},value:{}'.format(key,value))
    print('num_dict[0]:',num_dict[0])
    # sys.exit(2)
    
    onehot_train = tf.one_hot(type_train,depth=3)
    onehot_train = tf.reshape(onehot_train,(-1,3))
    #   获得每个高度的img,返回的字典key:['high0','high1','high2']   每个value的value.shape: (b, 101, 101)
    def get_high_img(or_img):
        high_dic = {}
        for i in range(4):
            high_key = 'high{}'.format(i)
    
            # high_img = or_img[:,i,:,:]
            # high_dic[high_key] = np.reshape(high_img,(-1,101,101))
            high_img = or_img[:, :, i, :, :]
            high_dic[high_key] = np.reshape(high_img, (-1, timelength, 101, 101))
            print('high_key:',high_key)
        return high_dic
    
    which_high = 'high3'
    train_high_dic = get_high_img(train_imgs)
    train_high0_img = train_high_dic[which_high]
    
    print('train_high0_img.shape:',train_high0_img.shape)
    print('high0_img max:{},min:{}'.format(np.max(train_high0_img),np.min(train_high0_img)))
    
    test_high_dic = get_high_img(test_imgs)
    test_high0_img = test_high_dic[which_high]
    
    print('train_high0_img:',train_high0_img[0].shape)
    
    def pure_smote(X,Y,K,Max_add=400):
        #   分出强降雨标签数据集  ##############
        strong_rain_label = []      #   强降雨标签序列
        strong_rain_index = []      #   强降雨序号序列
        strong_rain_pic = np.array([])        #   强降雨雷达图序列
        for i_index,i in enumerate(Y):
            i = i[0]
            # print('{}:{}'.format(i_index,i))
            if i >= 30:
                strong_rain_label.append(i)
                strong_rain_index.append(i_index)
                pic = X[i_index]
                # print('pic:',pic.shape)
                pic = np.reshape(pic,(-1,))
    
                # print('pic:', pic.shape)
                pic = np.reshape(pic, (1, -1))
                if strong_rain_pic.shape[0] == 0:
                    print('初始化雷达图')
                    # pic = np.reshape(pic,(1,-1))
                    print('pic.shape:',pic.shape)
                    strong_rain_pic = pic
                    # sys.exit(2)
                else:
                    strong_rain_pic = np.row_stack((strong_rain_pic,pic))
                # pic = np.reshape(pic, (15,101,101))
                # print('pic:', pic.shape)
                # sys.exit(2)
                ###################   查看雷达图   ##################
                # plt.suptitle('{}'.format(i))
                # for j in range(pic.shape[0]):
                #     plt.subplot(4,4,j+1)
                #     plt.title('{}'.format(j))
                #     plt.imshow(pic[j])
                # plt.show()
                # print('strong_rain_pic:',strong_rain_pic.shape)
        # #   测试强降雨雷达图是否可用    #####################
        # for i in range(strong_rain_pic.shape[0]):
        #     mypic = strong_rain_pic[i]
        #     mypic = np.reshape(mypic,(15,101,101))
        #     ###################   查看雷达图   ##################
        #     plt.suptitle('{}'.format(i))
        #     for j in range(mypic.shape[0]):
        #         plt.subplot(4,4,j+1)
        #         plt.title('{}'.format(j))
        #         plt.imshow(mypic[j])
        #     plt.show()
        print('strong_rain_pic:',strong_rain_pic.shape)
        # sys.exit(2)
        # print('strong_rain_label:',len(strong_rain_label),strong_rain_label)
        # print('strong_rain_index:',len(strong_rain_index),strong_rain_index)
        train_size = len(Y)
        print('train_size:',train_size)
        append_size = (train_size- len(strong_rain_label))//len(strong_rain_label)          #   扩充倍数
        # print('append_size:',append_size)
        # print('扩充后,非强降雨:{},强降雨:{}'.format((train_size- len(strong_rain_label)),len(strong_rain_label)*append_size))
        append_num = len(strong_rain_label)* (append_size-1)
        if append_num >= Max_add:
            append_num = Max_add
        print('append_num:',append_num)
        start = 0
        k = K
        nbrs = NearestNeighbors(n_neighbors=k,algorithm='ball_tree').fit(strong_rain_pic)
        distances, indices = nbrs.kneighbors(strong_rain_pic)
        # print('distances:',distances)
        # print('indices:',indices)
        new_pic = np.array([])          #   生成的图像新数据集
        new_label = np.array([])        #   生成的标签新数据集
        while start<=append_num:
            print('start:',start)
            for pic_index in range(strong_rain_pic.shape[0]):
                cur_pic = strong_rain_pic[pic_index]
                cur_label = strong_rain_label[pic_index]
                # print('cur_pic:',cur_pic.shape)
                # print('cur_pic:',cur_pic)
                alpha = random.uniform(0,1)
                # print('alpha =:',alpha)
                select_rand = random.randint(1, k-1)
                nb_index = indices[pic_index][select_rand]
                # print('nb_index:',nb_index)
                nb_pic = strong_rain_pic[nb_index]
                nb_label = strong_rain_label[nb_index]
    
                x_new = cur_pic + alpha * (nb_pic-cur_pic)
                y_new = cur_label + alpha * (nb_label - cur_label)
    
                # print('x_new:',x_new.shape)
                x_new = np.reshape(x_new,(15,101,101))
                cur_pic = np.reshape(cur_pic,(15,101,101))
                nb_pic = np.reshape(nb_pic,(15,101,101))
                #       可视化展示新生成图       ##############
                # plt.subplot(221)
                # plt.title('cur_pic:{}'.format(cur_label))
                # plt.imshow(cur_pic[0])
                #
                # plt.subplot(222)
                # plt.title('nb_pic:{}'.format(nb_label))
                # plt.imshow(nb_pic[0])
                #
                # plt.subplot(223)
                # plt.title('new_pic:{}'.format(y_new))
                # plt.imshow(x_new[0])
                #
                # plt.show()
                # sys.exit(2)
                #       可视化展示新生成图       ##############
                #   将新生成的图和标签加入变量中
                x_new = np.reshape(x_new, (1, 15, 101, 101))
                y_new = np.reshape(y_new, (1, -1))
                if new_pic.shape[0] == 0:
                    new_pic = x_new
                    new_label = y_new
                else:
                    new_pic = np.row_stack((new_pic,x_new))
                    new_label = np.row_stack((new_label, y_new))
                # print('new_pic:',new_pic.shape)
                # print('new_label:', new_label.shape)
                start+=1
                if start >= append_num:
                    break
    
        result_pic = np.vstack((X,new_pic))
        result_label = np.vstack((Y,new_label))
        print()
        # for i in range(15):
        #     plt.subplot(4,4,i+1)
        #     plt.title(i)
        #     plt.imshow(result_data[60,i])
        # plt.show()
        # for i in range(15):
        #     plt.subplot(4,4,i+1)
        #     plt.title(i)
        #     plt.imshow(result_data[700,i])
        # plt.show()
        # print(' result_pic:', result_pic.shape)
        # print(' result_label:', result_label.shape)
        return  result_pic,result_label
    
    deal_x,deal_y = pure_smote(train_high0_img,train_rain,K = 6)
    print(' deal_x:', deal_x.shape)
    print(' deal_y:', deal_y.shape)
    
    ####################################################   模型  ######################################################
    def positional_encoding(pos, d_model):
        '''
        :param pos: 词在句子中的位置,句子上的维族;(i是d_model上的维度)
        :param d_model: 隐状态的维度,相当于num_units
        :return: 位置编码 shape=[1, position_num, d_model], 其中第一个维度是为了匹配batch_size
        '''
        def get_angles(position, i):
            # 这里的i相当于公式里面的2i或2i+1
            # 返回shape=[position_num, d_model]
            return position / np.power(10000., 2. * (i // 2.) / np.float(d_model))
    
        angle_rates = get_angles(np.arange(pos)[:, np.newaxis],
                                 np.arange(d_model)[np.newaxis, :])
        # 2i位置使用sin编码,2i+1位置使用cos编码
        pe_sin = np.sin(angle_rates[:, 0::2])
        pe_cos = np.cos(angle_rates[:, 1::2])
        pos_encoding = np.concatenate([pe_sin, pe_cos], axis=-1)
        pos_encoding = tf.cast(pos_encoding[np.newaxis, ...], tf.float32)
        return pos_encoding
    
    '''*************** 第一部分: Scaled dot-product attention ***************'''
    def my_mask(inputs):
        # print('inputs:',inputs.shape)
        # print('mask前:',inputs)
        s_dim = inputs.shape[-1]
        mask = tf.ones((s_dim, s_dim))  # (T_q, T_k)
        # 这一句的意思是生成一个上三角矩阵,上三角矩阵用来对decoder的结果进行mask
        mask = tf.linalg.LinearOperatorLowerTriangular(mask).to_dense()
        padding_num = -2 ** 32 + 1.1
        mask_data = tf.multiply(mask, inputs)
        # print('mask_data:',float(mask_data))
        outputs = tf.where(tf.equal(mask_data, 0.), padding_num, mask_data)
        # so_data = tf.nn.softmax(outputs,axis=1)
        # print('mask后:',  outputs)
        # sys.exit(2)
        return outputs
    
    def scaled_dot_product_attention(q, k, v, mask = None):
        '''attention(Q, K, V) = softmax(Q * K^T / sqrt(dk)) * V'''
        # query 和 Key相乘
        # print('q:',q.shape,q)
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        # print('matmul_qk:',matmul_qk.shape,matmul_qk)
        # print('完毕')
        # 使用dk进行缩放
        dk = tf.cast(tf.shape(q)[-1], tf.float32)
        scaled_attention =matmul_qk / tf.math.sqrt(dk)
        # 掩码mask
        if mask is not None:
            print('有mask')
            scaled_attention = my_mask(scaled_attention)
        # 通过softmax获取attention权重, mask部分softmax后为0
        attention_weights = tf.nn.softmax(scaled_attention)  # shape=[batch_size, seq_len_q, seq_len_k]
        # print('attention_weights:',attention_weights)
        # 乘以value
        outputs = tf.matmul(attention_weights, v)  # shape=[batch_size, seq_len_q, depth]
        return outputs, attention_weights
    
    '''*************** 第二部分: Multi-Head Attention ***************'''
    '''
    multi-head attention包含3部分: - 线性层与分头 - 缩放点积注意力 - 头连接 - 末尾线性层
    每个多头注意块有三个输入; Q(查询),K(密钥),V(值)。 它们通过第一层线性层并分成多个头。
    注意:点积注意力时需要使用mask, 多头输出需要使用tf.transpose调整各维度。
    Q,K和V不是一个单独的注意头,而是分成多个头,因为它允许模型共同参与来自不同表征空间的不同信息。
    在拆分之后,每个头部具有降低的维度,总计算成本与具有全维度的单个头部注意力相同。
    '''
    class MultiHeadAttention(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads):
            super(MultiHeadAttention, self).__init__()
            self.num_heads = num_heads
            self.d_model = d_model
            # d_model必须可以正确分成多个头
            assert d_model % num_heads == 0
            # 分头之后维度
            self.depth = d_model // num_heads
            self.wq = tf.keras.layers.Dense(d_model)
            self.wk = tf.keras.layers.Dense(d_model)
            self.wv = tf.keras.layers.Dense(d_model)
            self.dense = tf.keras.layers.Dense(d_model)
    
        def split_heads(self, x, batch_size):
            # 分头,将头个数的维度,放到seq_len前面 x输入shape=[batch_size, seq_len, d_model]
            # print('split前.shape:',x.shape)
            x = tf.reshape(x, [batch_size, -1, self.num_heads, self.depth])
            # print('split后.shape:', x.shape)
            return tf.transpose(x, perm=[0, 2, 1, 3])
    
    
    
        def call(self, q, k, v, mask = None):
            # print('-----------------------------    multi   ---------------------------------------')
            # print('multil_q:',q.shape,q)
            # print('********************************   multi   ************************************')
            batch_size = tf.shape(q)[0]
            # 分头前的前向网络,根据q,k,v的输入,计算Q, K, V语义
            q = self.wq(q)  # shape=[batch_size, seq_len_q, d_model]
            k = self.wq(k)
            v = self.wq(v)
            # 分头
            q = self.split_heads(q, batch_size)  # shape=[batch_size, num_heads, seq_len_q, depth]
            k = self.split_heads(k, batch_size)
            v = self.split_heads(v, batch_size)
            # 通过缩放点积注意力层
            # scaled_attention shape=[batch_size, num_heads, seq_len_q, depth]
            # attention_weights shape=[batch_size, num_heads, seq_len_q, seq_len_k]
            scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
            # 把多头维度后移
            scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # shape=[batch_size, seq_len_q, num_heads, depth]
            # 把多头合并
            concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # shape=[batch_size, seq_len_q, d_model]
            # 全连接重塑
            output = self.dense(concat_attention)
            return output, attention_weights
    
    class LayerNormalization(tf.keras.layers.Layer):
        def __init__(self, epsilon=1e-8, **kwargs):
            super(LayerNormalization, self).__init__(**kwargs)
            self.epsilon = epsilon
        def build(self, input_shape):
            self.gamma = self.add_weight(name='gamma',
                                         shape=input_shape[-1:],
                                         initializer=tf.ones_initializer(),
                                         trainable=True)
            self.beta = self.add_weight(name='beta',
                                        shape=input_shape[-1:],
                                        initializer=tf.zeros_initializer(),
                                        trainable=True)
            super(LayerNormalization, self).build(input_shape)
        def call(self, x): # x shape=[batch_size, seq_len, d_model]
            mean = tf.keras.backend.mean(x, axis=-1, keepdims=True)
            std = tf.keras.backend.std(x, axis=-1, keepdims=True)
            return self.gamma * (x - mean) / (std + self.epsilon) + self.beta
    
    def point_wise_feed_forward(d_model, diff):
        return tf.keras.Sequential([
            tf.keras.layers.Dense(diff, activation=tf.nn.relu),
            tf.keras.layers.Dense(d_model)
        ])
    '''encoder layer:
    每个编码层包含以下子层 - Multi-head attention(带掩码) - Point wise feed forward networks
    每个子层中都有残差连接,并最后通过一个正则化层。残差连接有助于避免深度网络中的梯度消失问题。 
    每个子层输出是LayerNorm(x + Sublayer(x)),规范化是在d_model维的向量上。Transformer一共有n个编码层。
    '''
    class EncoderLayer(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
            super(EncoderLayer, self).__init__()
            self.mha = MultiHeadAttention(d_model, num_heads)
            self.ffn = point_wise_feed_forward(d_model, dff)
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, training):
            # multi head attention (encoder时Q = K = V)
            att_output, _ = self.mha(inputs, inputs, inputs)
            att_output = self.dropout1(att_output, training=training)
            output1 = self.layernorm1(inputs + att_output)  # shape=[batch_size, seq_len, d_model]
            # feed forward network
            ffn_output = self.ffn(output1)
            ffn_output = self.dropout2(ffn_output, training=training)
            output2 = self.layernorm2(output1 + ffn_output)  # shape=[batch_size, seq_len, d_model]
            return output2
    
    class Encoder(tf.keras.layers.Layer):
        def __init__(self, d_model, num_layers, num_heads, dff,
                   max_seq_len, dropout_rate=0.1):
            super(Encoder, self).__init__()
            self.indata = tf.keras.layers.Dense(d_model)
            self.num_layers = num_layers
            self.d_model = d_model
            # self.emb = tf.keras.layers.Embedding(5000, d_model)  # shape=[batch_size, seq_len, d_model]
            self.pos_encoding = positional_encoding(max_seq_len, d_model)  # shape=[1, max_seq_len, d_model]
            self.encoder_layer = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                                  for _ in range(num_layers)]
            self.dropout = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, training):
            # print('inputs.shape:',inputs.shape)
            # sys.exit(2)
            # 输入部分;inputs shape=[batch_size, seq_len]
            seq_len = inputs.shape[1]  # 句子真实长度
            # word_embedding = self.emb(inputs)  # shape=[batch_size, seq_len, d_model]
            # print('word_embedding .shape:',word_embedding .shape)
            # sys.exit(2)
            # word_embedding = self.indata(inputs)
            word_embedding = inputs
            word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
            emb= word_embedding + self.pos_encoding[:, :seq_len, :]
            x = self.dropout(emb, training=training)
            for i in range(self.num_layers):
                x = self.encoder_layer[i](x, training)
            return x  # shape=[batch_size, seq_len, d_model]
    
    class DecoderLayer(tf.keras.layers.Layer):
        def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
            super(DecoderLayer, self).__init__()
            self.mha1 = MultiHeadAttention(d_model, num_heads)
            self.mha2 = MultiHeadAttention(d_model, num_heads)
            self.ffn = point_wise_feed_forward(d_model, dff)
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            self.layernorm3 = LayerNormalization()
            self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
            self.dropout3 = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, encoder_out, training):
            # masked multi-head attention: Q = K = V
            # print('decode_inputs:',inputs)
            # sys.exit(2)
    
            att_out1, att_weight1 = self.mha1(inputs, inputs, inputs,mask = True)
    
            att_out1 = self.dropout1(att_out1, training=training)
            att_out1 = self.layernorm1(inputs + att_out1)
            # multi-head attention: Q=att_out1, K = V = encoder_out
            # print('-------------------------    mh2 ---------------------------------------')
            att_out2, att_weight2 = self.mha2(att_out1, encoder_out, encoder_out)
            # print('att_out2 :', att_out2)
            att_out2 = self.dropout2(att_out2, training=training)
            att_out2 = self.layernorm2(att_out1 + att_out2)
            # att_out2 = self.layernorm2(att_out2)
            # print('att_out2 :',att_out2 )
            # sys.exit(2)
            # feed forward network
            ffn_out = self.ffn(att_out2)
            ffn_out = self.dropout3(ffn_out, training=training)
            output = self.layernorm3(att_out2 + ffn_out)
            return output, att_weight1, att_weight2
    
    class Decoder(tf.keras.layers.Layer):
        def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
            super(Decoder, self).__init__()
            self.seq_len = tf.shape
            self.indata = tf.keras.layers.Dense(d_model)
            self.d_model = d_model
            self.num_layers = num_layers
            self.pos_encoding = positional_encoding(max_seq_len, d_model)
            self.decoder_layers = [DecoderLayer(d_model, num_heads, dff, dropout_rate)
                                   for _ in range(num_layers)]
            self.dropout = tf.keras.layers.Dropout(dropout_rate)
        def call(self, inputs, encoder_out, training):
            # print('---------------------------------    decode  --------------------------------')
            seq_len = inputs.shape[1]
            attention_weights = {}
            word_embedding = self.indata(inputs)
            # print('inputs:',inputs.shape)
            # print('self.pos_encoding[:, :seq_len, :]:',self.pos_encoding[:, :seq_len, :].shape)
            # word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
            # emb = word_embedding + self.pos_encoding[:, :seq_len, :]
            emb = word_embedding
            # print('emb:',emb)
            # sys.exit(2)
    
            x = self.dropout(emb, training=training)
            for i in range(self.num_layers):
                # print('i:',i)
                x, att1, att2 = self.decoder_layers[i](x, encoder_out, training)
                attention_weights['decoder_layer{}_att_w1'.format(i+1)] = att1
                attention_weights['decoder_layer{}_att_w2'.format(i + 1)] = att2
            return x, attention_weights
    
    def deinput_padding(seq_len,dim,batch_size):
        # print('pading_batch_size:',batch_size)
        zero = np.zeros((seq_len,dim))
        # print('zero:',zero.shape)
        zero[0,:] = 1
    
        zero = np.tile(zero,(batch_size,1,1))
        # print('zero:', zero, zero.shape)
        return zero
    
    #   超参数
    learn_rate = 2e-4
    # learn_rate = 2.0e-4
    epochs =5000
    bat = 10
    
    #
    base_dim = 8
    mid_dim = 10
    dense_dim = 100
    time_list = [1,2,4,8,16,32]
    
    unit = 40
    def c_b(chanel,kernel_size,stride = 1 ,padding ='valid'):
        initial = tf.keras.initializers.TruncatedNormal(stddev=0.02)
        result = keras.Sequential([
            tf.keras.layers.Conv2D(chanel, kernel_size=kernel_size, strides=stride, padding=padding,
                                   kernel_initializer=initial),
            tf.keras.layers.BatchNormalization(),
            keras.layers.LeakyReLU()
        ])
        return result
    class Transformer(tf.keras.Model):
        def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
            super(Transformer, self).__init__()
            self.layernorm1 = LayerNormalization()
            self.layernorm2 = LayerNormalization()
            # self.mycnn = tf.keras.Sequential([
            #     c_b(6, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(12, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(24, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(48, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #
            #     c_b(48, [3, 3], stride=1, padding='SAME'),
            #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            #     tf.keras.layers.Flatten()
            # ])
            self.mycnn = tf.keras.Sequential([
                c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
                # # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
                c_b(base_dim * time_list[0], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[1], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[2], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[3], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[4], [3, 3], stride=2, padding='SAME'),
    
                c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
                c_b(base_dim * time_list[5], [3, 3], stride=2, padding='SAME'),
    
                # tf.keras.layers.Flatten()
    
                tf.keras.layers.GlobalAveragePooling2D()
            ])
    
            self.myDense1 = tf.keras.Sequential([
                # tf.keras.layers.Dense(10, activation='relu'),
                tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
                tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
    
                tf.keras.layers.Dense(d_model)
                # tf.keras.layers.Dense(3)
            ])
            self.encoder = Encoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
            self.emb = tf.keras.layers.Dense(d_model)
            # self.decoder = Decoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
            self.dim_dense = tf.keras.layers.Dense(1)
            self.final_layer = tf.keras.layers.Dense(1)
            self.flat = tf.keras.layers.Flatten()
        def call(self, inputs):
            inputs = tf.cast(inputs, dtype=tf.float32)
            inputs = tf.reshape(inputs, (-1, 15, 101, 101, 1))
    
            inputs = tf.keras.layers.TimeDistributed(self.mycnn)(inputs)
    
            # print('out.shape:', out.shape,out[0,0,:5])
            out = tf.keras.layers.TimeDistributed(self.myDense1)(inputs)
            inputs = out
            # sys.exit(2)
            inputs = self.layernorm1(inputs)
            inputs = self.emb(inputs)
            print('trains_inputs:',inputs)
            # print('inputs.shape:', inputs.shape)
            # sys.exit(2)
            # 首先encoder过程,输出shape=[batch_size, seq_len_input, d_model]
            inputs = self.layernorm2(inputs)
            print('layerhoutrains_inputs:', inputs)
            encoder_output = self.encoder(inputs)
            # 再进行decoder, 输出shape=[batch_size, seq_len_target, d_model]
            encoder_output = self.dim_dense(encoder_output)
            print('encoder_output:',encoder_output.shape)
            encoder_output = self.flat(encoder_output)
            print('encoder_output:', encoder_output.shape)
            final_out = self.final_layer(encoder_output)
            # sys.exit(2)
    
    
            #######################################      Decode      ################################
            # decode_input = deinput_padding(s_dim,in_dim,bat_size)
            #                                             #   (inputs, encoder_out, training)
            # # print(' decode_input ', decode_input )
            # # print('encoder_output:',encoder_output.shape)
            # # print('decode_input:',decode_input.shape)
            # decoder_output, att_weights = self.decoder(decode_input, encoder_output, True)
            # # print('decoder_output:',decoder_output)
            # # sys.exit(2)
            # # 最后映射到输出层
            # final_out = self.final_layer(decoder_output) # shape=[batch_size, seq_len_target, target_vocab_size]
            # # print('final_out:',final_out.shape)
            # final_out = final_out[:,-2,:]
            # # print('final_out:', final_out.shape)
            # #######################################      Decode      ################################
            return final_out
    
    train_db = tf.data.Dataset.from_tensor_slices((deal_x, deal_y)).shuffle(500).batch(bat)
    # train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).repeat()
    test_db = tf.data.Dataset.from_tensor_slices((test_high0_img, test_rain)).batch(bat)
    
    opt = tf.keras.optimizers.Adam(lr=learn_rate,clipnorm=0.1)
    early_stoping = EarlyStopping(monitor='val_loss',patience=100)
    my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
    # ###################   加载模型    ####################################
    # model_name = './my_save_model/trans_model_LOSS1/transmodel_LOSS1.ckpt'
    # my_model.load_weights(model_name)
    # ###################   加载模型    ####################################
    my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
    # my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,callbacks=early_stoping)
    my_model.fit(train_db,validation_data=test_db,epochs=200, validation_freq=1,callbacks=early_stoping)
    my_model.evaluate(test_db)
    
    #   保存模型
    model_name = './my_save_model/pure_smote_model_5/my_model_5.ckpt'
    # model_name = 'my_model1.ckpt'
    my_model.save_weights(model_name)
    print('保存完成')
    del (my_model)
    #   加载模型
    my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
    model_name = model_name
    my_model.load_weights(model_name)
    my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
    print('加载完成')
  • 相关阅读:
    LVS,NET,TUNL,DR实现负载均衡
    Redis的基本数据结构,基本操作
    mongoDB基本操作
    mysql高级操作(优化)
    sphix使用及即时索引配置
    java client 通过zookeeper连接hbase问题
    TCP/UDP load balance
    Hacking your way to enlightenment with Kafka and Node.js
    flume问题
    linux点滴
  • 原文地址:https://www.cnblogs.com/cxhzy/p/14713982.html
Copyright © 2011-2022 走看看