一、目的
以imagenet2012作为数据集,用Inception-v3对图像提取特征作为输入,来训练一个自编码器。
以上作为预训练模型,随后在该自编码器的基础上,中间加入一个自表示层,将最终学习到的自表示层系数,作为相似度矩阵,对imagenet2012的1000类进行聚类。
二、预训练
1.原理
inception-v3+自编码器
2.代码
1 import tensorflow as tf 2 import os 3 import numpy as np 4 import random 5 import tensorflow.contrib.slim as slim 6 import shutil 7 8 tf.app.flags.DEFINE_string('model_dir', 'model/inception', 'Inception-v3 pretrain model dir') 9 tf.app.flags.DEFINE_string('class_list', 'imagenet12/train_class_list.txt', 'ILSVRC2012 image class list') 10 tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path') 11 tf.app.flags.DEFINE_integer('max_train_steps_pre', 200000, 'max train num') 12 tf.app.flags.DEFINE_boolean('restore', True, 'wheather restore model and variable from previous saved') 13 tf.app.flags.DEFINE_string('checkpoint_path', 'model/pre/', 'model saved path') 14 tf.app.flags.DEFINE_string('feature_train_path','feature_train','ILSVRC2012 train feature save path') 15 tf.app.flags.DEFINE_integer('large_multi', 100, 'enlarge the feature data') 16 tf.app.flags.DEFINE_integer('width', 32, 'the width of feature input') 17 tf.app.flags.DEFINE_integer('inception_out_size', 2048, 'the dim of feature input,inception out dim') 18 tf.app.flags.DEFINE_integer('train_num_of_every_batch', 2000, 'change the data every 2000 epochs') 19 FLAGS = tf.app.flags.FLAGS 20 21 kernel_num_list = [16, 32, 64] #channel num 22 kernel_size_list = [[3, 3], [3, 3], [3, 3]] #channel size 23 kernel_stride_list = [2, 2, 2] #stride 24 batch_size = 500 25 26 def get_inception_graph(): 27 ''' 28 load inception-v3 gragh for get_inception_output to 29 get the feature from Inception-v3 30 ''' 31 with tf.gfile.FastGFile(os.path.join(FLAGS.model_dir, 'inception-v3.pb'), 'rb') as f: 32 graph_def = tf.GraphDef() 33 graph_def.ParseFromString(f.read()) 34 inception_out = tf.import_graph_def(graph_def,name='',return_elements=['pool_3/_reshape:0']) 35 return inception_out 36 37 38 def create_graph_pre(): 39 ''' 40 create graph and loss 41 ''' 42 inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, FLAGS.inception_out_size/FLAGS.width, 1], name='inception_holder') 43 with tf.variable_scope('DSC'): 44 with tf.variable_scope('encoder'): 45 net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0') 46 net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1') 47 net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2') 48 49 with tf.variable_scope('decoder'): 50 net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2') 51 net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1') 52 net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0') 53 54 restruct_loss = tf.losses.mean_squared_error(net, inception_input) 55 return restruct_loss,inception_input,net 56 57 58 def get_inception_output(sess, img, txt_name,inception_out,save): 59 ''' 60 get the inception-v3 feature for img and save in txt_name 61 ''' 62 image_data = tf.gfile.FastGFile(img, 'rb').read() 63 output = sess.run(inception_out, feed_dict={'DecodeJpeg/contents:0': image_data}) 64 output = np.squeeze(output) 65 output = output.reshape(FLAGS.width,-1) 66 if save == True: 67 np.savetxt(txt_name, output, fmt='%.6f') 68 return output 69 70 71 def get_inception_batch(sess,inception_out,save=True): 72 ''' 73 get inception-v3 feature for a batch as input of the new graph(create_graph_pre) 74 ''' 75 class_list = np.loadtxt(FLAGS.class_list, dtype= str)[0:batch_size] 76 batch = [] 77 78 for i, item in enumerate(class_list): 79 class_img_path = os.path.join(FLAGS.img_path, item) 80 class_img_list = os.listdir(class_img_path) 81 82 img_name = random.choice(class_img_list) 83 txt_name = os.path.join(FLAGS.feature_train_path, item, img_name[:-4]+'txt') 84 img = os.path.join(class_img_path, img_name) 85 86 if os.path.exists(txt_name): 87 print('%s Found!' % os.path.join(item, img_name[:-4]+'txt')) 88 batch_i = np.loadtxt(txt_name) 89 else: 90 #print('%s Extracting!' % os.path.join(item, img_name[:-4]+'txt')) 91 dir_name = os.path.join(FLAGS.feature_train_path, item) 92 if not os.path.exists(dir_name): 93 os.makedirs(dir_name) 94 batch_i = get_inception_output(sess, img,txt_name, inception_out,save=save) 95 batch.append(batch_i) 96 large_batch = np.array(batch) * FLAGS.large_multi 97 98 return large_batch 99 100 101 def reconstruct(sess, net, img_inception): 102 ''' 103 get the loss for the input(img_inception) to varify the result of reconstruct 104 ''' 105 output = sess.run([net], feed_dict={'inception_holder:0': img_inception}) 106 img_inception=np.squeeze(img_inception) 107 output=np.squeeze(np.array(output)) 108 test_loss = pow(img_inception-output,2) 109 110 return output, sum(sum(test_loss))/(32*64) 111 112 113 def interface_pre(): 114 115 total_loss, inception_input, net = create_graph_pre() 116 117 global_step = tf.Variable(0) 118 learning_rate = tf.train.exponential_decay(1e-3, global_step, decay_steps=100, decay_rate=0.98, staircase=True) 119 train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss) 120 121 saver = tf.train.Saver(max_to_keep=3) 122 123 with tf.Session() as sess: 124 125 if FLAGS.restore: 126 print('continue training from previous checkpoint') 127 ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 128 pre_step = int(ckpt.replace(FLAGS.checkpoint_path + '-', '')) 129 saver.restore(sess, ckpt) 130 else: 131 #remove previous model 132 if os.path.exists(FLAGS.checkpoint_path): 133 shutil.rmtree(FLAGS.checkpoint_path) 134 os.makedirs(FLAGS.checkpoint_path) 135 sess.run(tf.global_variables_initializer()) 136 pre_step = 0 137 138 inception_out = get_inception_graph() 139 140 for step in range(FLAGS.max_train_steps_pre): 141 if step % FLAGS.train_num_of_every_batch == 0: 142 inception_output = get_inception_batch(sess, inception_out, save=False) 143 inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1) 144 perm = np.arange(batch_size) 145 np.random.shuffle(perm) 146 inception_output = inception_output[perm] 147 148 inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1) 149 _, loss_value= sess.run([train_op, total_loss],feed_dict={'inception_holder:0':inception_output}) 150 if step % 100 == 0: 151 print("step %d :total_loss= %f" % (step, loss_value)) 152 if step % 500 == 0 and step > 0: 153 # save model 154 if step > 500 : 155 write_meta_graph = False 156 else: 157 write_meta_graph = True 158 all_step = pre_step + step 159 saver.save(sess, FLAGS.checkpoint_path, global_step=all_step, write_meta_graph=write_meta_graph) 160 #construct 161 img_inception = get_inception_output(sess, 'cropped_panda.jpg', 'cropped_panda.txt',inception_out,False) 162 img_out, test_loss = reconstruct(sess, net, FLAGS.large_multi*img_inception.reshape(-1,32,64,1)) 163 print("test loss= %.5f" % test_loss) 164 165 if __name__ == '__main__': 166 interface_pre()
三、训练
1.原理
以imagenet2012在inception-v3特征上的类平均向量作为输入,来训练模型,获得自表示系数作为聚类输入,从而获得聚类结果并可视化。
2.代码
1 import tensorflow as tf 2 import os 3 import numpy as np 4 import random 5 import tensorflow.contrib.slim as slim 6 import tensorflow.contrib.slim.nets as nets 7 import shutil 8 from scipy.sparse import coo_matrix 9 from sklearn.cluster import spectral_clustering 10 from scipy.sparse.linalg import svds 11 from sklearn import cluster 12 from sklearn.preprocessing import normalize 13 14 tf.app.flags.DEFINE_string('class_list', '../imagenet12/train_class_list.txt', 'ILSVRC2012 image class list') 15 tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path') 16 tf.app.flags.DEFINE_integer('max_train_steps', 200000, 'max train num') 17 tf.app.flags.DEFINE_boolean('restore', False, 'wheather restore model and variable from previous saved') 18 tf.app.flags.DEFINE_string('pretrain_path', '../model/pre/', 'pretrain model path') 19 tf.app.flags.DEFINE_string('train_path', 'model/train/', 'train model path') 20 tf.app.flags.DEFINE_string('Coef_path','Coef/','save path of self_express xishu') 21 tf.app.flags.DEFINE_integer('large_multi', 100, '') 22 tf.app.flags.DEFINE_integer('width', 32, '') 23 tf.app.flags.DEFINE_integer('inception_out_size', 2048, '') 24 tf.app.flags.DEFINE_float('self_express_loss_weight',1,'') 25 tf.app.flags.DEFINE_float('regularizer_loss_weight',0.01,'') 26 tf.app.flags.DEFINE_integer('train_num_of_every_batch', 5000, '') 27 tf.app.flags.DEFINE_string('cluster_path','cluster','cluster result path') 28 tf.app.flags.DEFINE_string('data_path','avg_train_vector','imagenet2012 average feature path') 29 FLAGS = tf.app.flags.FLAGS 30 31 kernel_num_list = [16, 32, 64] 32 kernel_size_list = [[3, 3], [3, 3], [3, 3]] 33 kernel_stride_list = [2, 2, 2] 34 batch_size = 1000 35 learn_rate=0.001 36 37 38 def create_graph_pre(): 39 40 inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, int(FLAGS.inception_out_size/FLAGS.width), 1], name='inception_holder') 41 with tf.variable_scope('DSC'): 42 with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(0.0005)): 43 with tf.variable_scope('encoder'): 44 net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0') 45 net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1') 46 net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2') 47 self_express_x = net 48 net = tf.reshape(net, [batch_size, -1], name='reshape_to_flat') 49 Coef = slim.model_variable('Coef', 50 shape=[batch_size, batch_size], 51 initializer=tf.truncated_normal_initializer(stddev=0.1), 52 regularizer=slim.l2_regularizer(0.0005), trainable=True) 53 net = tf.matmul(Coef, net, name='mutmul') 54 55 with tf.variable_scope('decoder'): 56 net = tf.reshape(net, [batch_size, int(FLAGS.width/8), int(FLAGS.inception_out_size/FLAGS.width/8), kernel_num_list[2]], name='reshape_to_normal') 57 self_express_x_c = net 58 net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2') 59 net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1') 60 net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0') 61 62 reconstruct_loss = tf.losses.mean_squared_error(net, inception_input) 63 self_express_loss = FLAGS.self_express_loss_weight *tf.losses.mean_squared_error(self_express_x, self_express_x_c) 64 regularizer_loss = FLAGS.regularizer_loss_weight * tf.reduce_sum(tf.pow(Coef, 2.0)) 65 #regularizer_loss = tf.add_n(tf.losses.get_regularization_losses()) 66 67 loss = reconstruct_loss + self_express_loss + regularizer_loss 68 #loss = self_express_loss 69 return net, loss, Coef,reconstruct_loss, self_express_loss, regularizer_loss 70 71 def get_inception_batch_avg(): 72 class_list = np.loadtxt(FLAGS.class_list, dtype=str)[0:batch_size] 73 res=[] 74 for i in range(len(class_list)): 75 data_path = os.path.join(FLAGS.data_path,class_list[i]+'.txt') 76 data = np.loadtxt(data_path) 77 data = data.reshape(32,64) 78 res.append(data*100) 79 return np.array(res) 80 81 def interface(): 82 net, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss = create_graph_pre() 83 84 global_step = tf.Variable(0) 85 learning_rate = tf.train.exponential_decay(1e-4, global_step, decay_steps=100, decay_rate=0.98, staircase=True) 86 87 train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss) 88 saver = tf.train.Saver(max_to_keep=3) 89 with tf.Session() as sess: 90 if FLAGS.restore: 91 print('continue training from previous checkpoint') 92 ckpt = tf.train.latest_checkpoint(FLAGS.train_path) 93 pre_step = int(ckpt.replace(FLAGS.train_path+'-', '')) 94 saver.restore(sess, ckpt) 95 else: 96 # remove previous model and Coef 97 if os.path.exists(FLAGS.train_path): 98 shutil.rmtree(FLAGS.train_path) 99 if os.path.exists(FLAGS.Coef_path): 100 shutil.rmtree(FLAGS.Coef_path) 101 os.makedirs(FLAGS.train_path) 102 os.makedirs(FLAGS.Coef_path) 103 # restore from pretrain 104 sess.run(tf.global_variables_initializer()) 105 pre_step = 0 106 ckpt = tf.train.latest_checkpoint(FLAGS.pretrain_path) 107 variable_restore_op = slim.assign_from_checkpoint_fn(ckpt,slim.get_variables_to_restore(),ignore_missing_vars=True) 108 variable_restore_op(sess) 109 110 inception_out = get_inception_graph() 111 inception_output = get_inception_batch_avg() 112 inception_output = inception_output.reshape(-1, inception_output.shape[1], inception_output.shape[2], 1) 113 for step in range(FLAGS.max_train_steps): 114 _, loss_value, Coef_val, rec_val, see_val, reg_val= 115 sess.run([train_op, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss], 116 feed_dict={'inception_holder:0':inception_output}) 117 if step % 100 == 0: 118 print("step %d :total_loss= %f,rec_loss= %f,see_val=%f,reg_val=%f" 119 % (step,loss_value,rec_val, see_val,reg_val)) 120 121 if step % 1000 == 0 and step > 0: 122 if step > 500 : 123 write_meta_graph = False 124 else: 125 write_meta_graph = True 126 all_step = pre_step+step 127 saver.save(sess, FLAGS.train_path, global_step=all_step,write_meta_graph=write_meta_graph) 128 np.savetxt(FLAGS.Coef_path+str(all_step)+'.txt',Coef_val,fmt='%.6f') 129 130 131 def thrC(C): 132 row,col = C.shape 133 for i in range(row): 134 for j in range(col): 135 C[i,j]=abs(C[i,j]) 136 return C 137 138 139 def post_proC(C,N): 140 # C: coefficient matrix 141 C = 0.5 * (C + C.T) 142 np.savetxt(FLAGS.cluster_path + 'C_abs.txt', C, fmt='%.6f') 143 graph = coo_matrix(C) 144 labels = spectral_clustering(graph, n_clusters=N) 145 return labels 146 147 def vis(N,labels): 148 ## visual 149 for i in range(N): 150 print(i) 151 index = [j for j in range(len(labels)) if labels[j]==i] 152 class_list=np.loadtxt(FLAGS.class_list,dtype=str) 153 154 sub_class_list = class_list[index] 155 np.savetxt(os.path.join(FLAGS.cluster_path, str(i) + '.txt'), sub_class_list, fmt='%s') 156 if vis: 157 dir_path = os.path.join(FLAGS.cluster_path, str(i)) 158 if os.path.exists(dir_path): 159 shutil.rmtree(dir_path) 160 os.makedirs(dir_path) 161 # copy an example to dir_path 162 for sub_class_item in sub_class_list: 163 img_path = os.path.join(FLAGS.img_path, sub_class_item) 164 random_img = random.choice(os.listdir(img_path)) 165 src = os.path.join(img_path, random_img) 166 dst = os.path.join(dir_path, random_img) 167 168 shutil.copyfile(src, dst) 169 170 if __name__ == '__main__': 171 interface() 172 173 C=np.loadtxt('Coef/199000.txt') #系数,相似度矩阵 174 C=thrC(C) 175 N=32 176 grp = post_proC(C,N) 177 178 vis(N,grp)