zoukankan html css js c++ java

【GAN】基础GAN代码解析

基础GAN代码解析

运行教程

使用Tensorflow 1.14.0版本可以直接运行。若Mnist数据集因为网络原因下载不下来，可以通过以下链接下载压缩包，解压到项目根目录即可。
Mnist数据集压缩包

训练过程会创建两个文件夹，一个【out】目录，存放着生成的图片，一个是【mnist_gan】目录，存放着保存着的权重文件。

代码中GAN网络结构：

网络没有采用卷积神经网络的结构，就是最最基础的神经网络结构。
生成器G输入的初始维度为128x100，输出维度为128x784。生成器网络一共有三层，分别是输入层、中间层、输出层。中间层的激活函数是relu函数，输出层则用的是sigmod函数。
判别器D输入的初始维度是128x784，输出维度为128x1。中间层的激活函数同样是relu函数，输出层则用的同样是sigmod函数。
判别器D和生成器G的损失函数都采用交叉熵函数。
生成器G的目标是 max(D(fake))
判别器D的目标是 min(D(fake)) + max(D(real))

GAN的代码如下：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

sess = tf.InteractiveSession()

def weight_var(shape, name): #定义权重，传入权重shape和name
    return tf.get_variable(name=name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())

def bias_var(shape, name):#定义偏置，传入偏置shape和name
    return tf.get_variable(name=name, shape=shape, initializer=tf.constant_initializer(0))


def generator(z): #定义G，传入随机噪声z，返回G的输出。
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) #G_h1中间层经过激活函数后的输出。
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 #G输出层没有经过激活函数的输出。
    G_prob = tf.nn.sigmoid(G_log_prob)#G输出层经过激活函数后的输出。
    return G_prob

def discriminator(x):#定义D，传入样本x，返回D的输出和没有经过激活函数的输出。
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)#D_h1中间层经过激活函数后的输出
    D_logit = tf.matmul(D_h1, D_W2) + D_b2#D输出层没有经过激活函数的输出
    D_prob = tf.nn.sigmoid(D_logit)#D输出层经过激活函数后的输出
    return D_prob, D_logit

def sample_Z(m, n):#随机噪声向量的生成，维度为m*n
    return np.random.uniform(-1., 1., size=[m, n])

def plot(samples):#画图
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):  # [i,samples[i]] imax=16
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
    return fig

mb_size = 128
Z_dim = 100
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)#mnist数据集 one_hot是为了让标签二元，即只有0和1.

# discriminater net
X = tf.placeholder(tf.float32, shape=[None, 784], name='X') #样本x的shape是【batchsize】【784】
D_W1 = weight_var([784, 128], 'D_W1') #D的中间层的w1
D_b1 = bias_var([128], 'D_b1')
D_W2 = weight_var([128, 1], 'D_W2')#D的输出层的w2
D_b2 = bias_var([1], 'D_b2')
theta_D = [D_W1, D_W2, D_b1, D_b2]#D的参数列表

# generator net 
#随机噪声向量z的shape是【batchsize】【100】
Z = tf.placeholder(tf.float32, shape=[None, 100], name='Z')

G_W1 = weight_var([100, 128], 'G_W1')#D的中间层的w1
G_b1 = bias_var([128], 'G_B1')
G_W2 = weight_var([128, 784], 'G_W2')#D的输出层的w2
G_b2 = bias_var([784], 'G_B2')
theta_G = [G_W1, G_W2, G_b1, G_b2]#G的参数列表


G_sample = generator(Z) #调用generator(z)生成G样本
D_real, D_logit_real = discriminator(X)#discriminator(x)辨别样本
D_fake, D_logit_fake = discriminator(G_sample)

# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
# G_loss = -tf.reduce_mean(tf.log(D_fake))

#使用交叉熵代价函数，D的目标：对于真实样本，target=1
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
    logits=D_logit_real, labels=tf.ones_like(D_logit_real))) 
tf.summary.scalar("D_loss_real", D_loss_real)

#使用交叉熵代价函数，D的目标：对于生成器生成的样本，target=0
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
    logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
tf.summary.scalar("D_loss_fake", D_loss_fake)

#D最后的损失函数为D(真)+D(假)
D_loss = D_loss_real + D_loss_fake 


#使用交叉熵代价函数，G的目标：对于生成器生成的样本，target=1
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
    logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))
tf.summary.scalar("G_loss", G_loss)

#使用GradientDescentOptimizer优化器
D_optimizer = tf.train.GradientDescentOptimizer(0.002).minimize(D_loss, var_list=theta_D)
G_optimizer = tf.train.GradientDescentOptimizer(0.002).minimize(G_loss, var_list=theta_G)

# init variables
sess.run(tf.global_variables_initializer())


if not os.path.exists('out/'):
    os.makedirs('out/')
i = 0

summary_op = tf.summary.merge_all()
writer = tf.summary.FileWriter(".mnist_gan",sess.graph)

for it in range(1000000):
    if it % 1000 == 0:
        #生成一个维度为16*100的向量，其值是-1.——1.的随机值。
        samples = sess.run(G_sample, feed_dict={
                           Z: sample_Z(16, Z_dim)})
        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

    #调用了mnist里的方法，返回x和label
    X_mb, _ = mnist.train.next_batch(mb_size)

    #run(D_optimizer)，开始进行梯度下降。
    #run(D_loss),获得d_loss值
    #D喂入x样本和Z样本
    _, D_loss_curr = sess.run([D_optimizer, D_loss], feed_dict={
                                  X: X_mb, Z: sample_Z(mb_size, Z_dim)}) 

    #G喂入Z样本  
    _, G_loss_curr = sess.run([G_optimizer, G_loss], feed_dict={
                                  Z: sample_Z(mb_size, Z_dim)})
    result = sess.run(summary_op, feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
    writer.add_summary(result, i)
    if it % 1000 == 0:
        print('Iter: {}'.format(it)) #用format()里的数字来替换“{}”
        print('D_loss: {:.4}'.format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()

Tensorflow函数基础整理：

initializer变量初始化

tf.constant_initializer(value)

功能：将变量初始化为给定的常量,初始化一切所提供的值。
tf.random_normal_initializer(mean,stddev)

功能：将变量初始化为满足正态分布的随机值，主要参数（正太分布的均值和标准差），用所给的均值和标准差初始化均匀分布。

tf.truncated_normal_initializer(mean,stddev,seed,dtype)

mean:用于指定均值；stddev用于指定标准差；

seed:用于指定随机数种子；dtype：用于指定随机数的数据类型。

功能：将变量初始化为满足正态分布的随机值，但如果随机出来的值偏离平均值超过2个标准差，那么这个数将会被重新随机，通常只需要设定一个标准差stddev这一个参数就可以。
tf.random_uniform_initializer(a,b,seed,dtype)

功能：从a到b均匀初始化，将变量初始化为满足平均分布的随机值，主要参数（最大值，最小值）

优化器构造

compute_gradients(loss,var_list=None,gate_gradients=GATE_OP,aggregation_method=None,colocate_gradients_with_ops=False,grad_loss=None)

作用：对于在变量列表（var_list）中的变量计算对于损失函数的梯度,这个函数返回一个（梯度，变量）对的列表，其中梯度就是相对应变量的梯度了。这是minimize()函数的第一个部分，

参数：
1. loss: 待减小的值；
2. var_list: 默认是在GraphKey.TRAINABLE_VARIABLES.
apply_gradients(grads_and_vars,global_step=None,name=None)

作用：把梯度“应用”（Apply）到变量上面去。其实就是按照梯度下降的方式加到上面去。这是minimize()函数的第二个步骤。返回一个应用的操作。

参数:
1. grads_and_vars: compute_gradients()函数返回的(gradient, variable)对的列表
2. global_step: Optional Variable to increment by one after the variables have been updated.
minimize(loss,global_step=None,var_list=None,gate_gradients=GATE_OP,aggregation_method=None,colocate_gradients_with_ops=False,name=None,grad_loss=None)

TF初始化：

sess.run(tf.global_variables_initializer())

解析：函数中调用了 variable_initializer() 和 global_variables()

global_variables() 返回一个 Variable list ，里面保存的是 gloabal variables。variable_initializer() 将 Variable list 中的所有 Variable 取出来，将其 variable.initializer 属性做成一个 op group。然后看 Variable 类的源码可以发现， variable.initializer 就是一个 assign op。

所以： sess.run(tf.global_variables_initializer()) 就是 run了所有global Variable 的 assign op，这就是初始化参数的本来面目。

查看全文

相关阅读:
Hadoop集群(三) Hbase搭建
 Hadoop集群(二) HDFS搭建
 Hadoop集群(一) Zookeeper搭建
 Redis Cluster 添加/删除完整折腾步骤
 Redis Cluster在线迁移
 Hadoop分布式HA的安装部署
 Describe the difference between repeater, bridge and router.
what is the “handover” and "soft handover" in mobile communication system?
The main roles of LTE eNodeB.
The architecture of LTE network.

原文地址：https://www.cnblogs.com/lwp-nicol/p/14656331.html