zoukankan      html  css  js  c++  java
  • TensorFlow使用记录 (五): 激活函数和初始化方式

    In general ELU > leaky ReLU(and its variants) > ReLU > tanh > logistic. If you care a lot about runtime performance, then you may prefer leaky ReLUs over ELUs. If you don't want to tweak yet another hyperparameter, you may just use the default $alpha$ value suggested earlier(0.01 for the leaky ReLU, and 1 for ELU). If you have spare time and computing power, you can use cross-validation to evaluate other activation functions, in particular RReLU if your network is overfitting, or PReLU if you have a huge training set.

    ReLU

    egin{equation}
    ReLU(z) = max(0, z)
    end{equation}

    tf.nn.relu
    import matplotlib.pyplot as plt
    import numpy as np
    
    def relu(z):
        return np.maximum(0, z)
    
    z = np.linspace(-5, 5, 200)
    plt.plot(z, relu(z), "r--", linewidth=2)
    props = dict(facecolor='black', shrink=0.1)
    plt.annotate('ReLU', xytext=(-3.5, 0.5), xy=(-5, 0.1), arrowprops=props, fontsize=14, ha="center")
    plt.title("ReLU activation function", fontsize=14)
    plt.plot([-5, 5], [0, 0], 'k-')
    plt.plot([0, 0], [-0.5, 4.2], 'k-')
    plt.grid(True)
    plt.axis([-5, 5, -0.5, 4.2])
    
    plt.tight_layout()
    plt.show()
    View Code

    leaky ReLU

    egin{equation}
    LeakyReLU_{alpha}(z) = max(alpha z, z)
    end{equation}

    nn.leaky_relu
    import matplotlib.pyplot as plt
    import numpy as np
    
    def leaky_relu(z, alpha=0.01):
        return np.maximum(alpha*z, z)
    
    z = np.linspace(-5, 5, 200)
    plt.plot(z, leaky_relu(z, 0.05), "b-", linewidth=2)
    plt.plot([-5, 5], [0, 0], 'k-')
    plt.plot([0, 0], [-0.5, 4.2], 'k-')
    plt.grid(True)
    props = dict(facecolor='black', shrink=0.1)
    plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha="center")
    plt.title("Leaky ReLU activation function", fontsize=14)
    plt.axis([-5, 5, -0.5, 4.2])
    
    plt.tight_layout()
    plt.show()
    View Code

    ELU

    egin{equation}
    label{b}
    ELU(z)=
    egin{cases}
    alpha(e^{z} - 1) & if z < 0 \
    z & if zge 0
    end{cases}
    end{equation}

    tf.nn.elu
    import matplotlib.pyplot as plt
    import numpy as np
    
    def elu(z, alpha=1):
        return np.where(z < 0, alpha * (np.exp(z) - 1), z)
    
    z = np.linspace(-5, 5, 200)
    plt.plot(z, elu(z), "g-", linewidth=2)
    plt.plot([-5, 5], [0, 0], 'k-')
    plt.plot([-5, 5], [-1, -1], 'k--')
    plt.plot([0, 0], [-2.2, 3.2], 'k-')
    plt.grid(True)
    plt.title(r"ELU activation function ($alpha=1$)", fontsize=14)
    plt.axis([-5, 5, -2.2, 3.2])
    
    plt.tight_layout()
    plt.show()
    View Code

    Relu6

    egin{equation}
    ReLU6(z) =  min(max(z, 0), 6)
    end{equation}

    tf.nn.relu6

     

    Swish

    egin{equation}
    Swish(z) =  z*sigmoid(eta z)
    end{equation}

    def swish(x, b = 1):
        return x * tf.nn.sigmoid(b * x)
    import matplotlib.pyplot as plt
    import numpy as np
    
    def swish(z, b=1):
        return z/(1+np.exp(-b*z))
    
    z = np.linspace(-5, 5, 200)
    plt.plot(z, swish(z), "g--", linewidth=2)
    plt.plot([-5, 5], [0, 0], 'k-')
    plt.plot([0, 0], [-0.5, 5.2], 'k-')
    plt.grid(True)
    plt.title(r"Swish activation function", fontsize=14)
    plt.axis([-5, 5, -0.5, 5.2])
    
    plt.tight_layout()
    plt.show()
    View Code

    简单训练 MNIST

    import tensorflow as tf
    def Swish(features):
        return features*tf.nn.sigmoid(features)
    
    # 1. create data
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)
    
    X = tf.placeholder(tf.float32, shape=(None, 784), name='X')
    y = tf.placeholder(tf.int32, shape=(None), name='y')
    
    # 2. define network
    with tf.name_scope('dnn'):
        hidden1 = tf.layers.dense(X, 300, activation=Swish, name='hidden1')
        hidden2 = tf.layers.dense(hidden1, 100, activation=Swish, name='hidden2')
        logits = tf.layers.dense(hidden2, 10, name='output')
        # prob = tf.layers.dense(hidden2, 10, tf.nn.softmax, name='prob')
    
    # 3. define loss
    with tf.name_scope('loss'):
        # tf.losses.sparse_softmax_cross_entropy() label is not one_hot and dtype is int*
        # xentropy = tf.losses.sparse_softmax_cross_entropy(labels=tf.argmax(y, axis=1), logits=logits)
        # tf.nn.sparse_softmax_cross_entropy_with_logits() label is not one_hot and dtype is int*
        # xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y, axis=1), logits=logits)
        # loss = tf.reduce_mean(xentropy)
        loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
    
    # 4. define optimizer
    learning_rate = 0.01
    with tf.name_scope('train'):
        optimizer_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    
    with tf.name_scope('eval'):
        correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1) # 目标是否在前K个预测中, label's dtype is int*
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    # 5. initialize
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    saver = tf.train.Saver()
    
    # 5. train & test
    n_epochs = 20
    n_batches = 50
    batch_size = 50
    
    with tf.Session() as sess:
        sess.run(init_op)
        for epoch in range(n_epochs):
            for iteration in range(mnist.train.num_examples // batch_size):
                X_batch, y_batch = mnist.train.next_batch(batch_size)
                sess.run(optimizer_op, feed_dict={X: X_batch, y: y_batch})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) # 最后一个 batch 的 accuracy
            acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
            loss_test = loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
            print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test, "Test loss:", loss_test)
        save_path = saver.save(sess, "./my_model_final.ckpt")
    
    # with tf.Session() as sess:
    #     sess.run(init_op)
    #     saver.restore(sess, "./my_model_final.ckpt")
    #     acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
    #     loss_test = loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
    #     print("Test accuracy:", acc_test, ", Test loss:", loss_test)
    
    """
    tf.sigmoid        0.9062
    tf.tanh           0.9611
    tf.relu           0.9713
    tf.nn.leaky_relu  0.9674
    tf.nn.elu         0.9613
    Swish             0.9605
    """

    初始化方式

    tensorflowpythonopsinit_ops.py

    zeros_initializer = Zeros
    ones_initializer = Ones
    constant_initializer = Constant
    random_uniform_initializer = RandomUniform
    random_normal_initializer = RandomNormal
    truncated_normal_initializer = TruncatedNormal
    uniform_unit_scaling_initializer = UniformUnitScaling
    variance_scaling_initializer = VarianceScaling
    glorot_uniform_initializer = GlorotUniform
    glorot_normal_initializer = GlorotNormal
    orthogonal_initializer = Orthogonal
    identity_initializer = Identity
    convolutional_delta_orthogonal = ConvolutionDeltaOrthogonal
    convolutional_orthogonal_1d = ConvolutionOrthogonal1D
    convolutional_orthogonal_2d = ConvolutionOrthogonal2D
    convolutional_orthogonal_3d = ConvolutionOrthogonal3D

    random_uniform_initializer

    生成均匀分布的随机数

    __init__(
        minval=0,
        maxval=None,
        seed=None,
        dtype=tf.dtypes.float32
    )

    random_normal_initializer

    生成标准正态分布的随机数

    __init__(
        mean=0.0,
        stddev=1.0,
        seed=None,
        dtype=tf.dtypes.float32
    )

    truncated_normal_initializer

    生成截断正态分布的随机数,参数同 random_normal_initializer 

    uniform_unit_scaling_initializer

    和均匀分布差不多,只是这个初始化方法不需要指定最小最大值,是通过计算出来的。参数为(factor=1.0, seed=None, dtype=dtypes.float32)

    max_val = math.sqrt(3 / input_size) * self.factor

    variance_scaling_initializer

    综合体

    __init__(self,
             scale=1.0,
             mode="fan_in",
             distribution="truncated_normal",
             seed=None,
             dtype=dtypes.float32)
    scale = self.scaleif self.mode == "fan_in":
      scale /= max(1., fan_in)
    elif self.mode == "fan_out":
      scale /= max(1., fan_out)
    else:
      scale /= max(1., (fan_in + fan_out) / 2.)
    if self.distribution == "normal" or self.distribution == "truncated_normal":
      # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
      stddev = math.sqrt(scale) / .87962566103423978
      return random_ops.truncated_normal(
          shape, 0.0, stddev, dtype, seed=self.seed)
    elif self.distribution == "untruncated_normal":
      stddev = math.sqrt(scale)
      return random_ops.random_normal(shape, 0.0, stddev, dtype, seed=self.seed)
    else:
      limit = math.sqrt(3.0 * scale)
      return random_ops.random_uniform(
          shape, -limit, limit, dtype, seed=self.seed)

    glorot_uniform_initializerglorot_normal_initializer

    标准的 Xavier 初始化方式

    limit = sqrt(6 / (fan_in + fan_out))
    stddev = sqrt(2 / (fan_in + fan_out))

     

  • 相关阅读:
    python 中文文档地址总结
    清除celery 任务队列
    celery work logging 问题
    mysql 数据库 存储数据类型
    python 补0的方法
    lunix 命令积累
    python3 import Crypto 失败的解决办法 (AES对称加密使用 模块)
    从excel 获取内容 模块:xlrd
    api h5 sdk 接入的说明
    Excel 打开两个单独的页面
  • 原文地址:https://www.cnblogs.com/xuanyuyt/p/11644440.html
Copyright © 2011-2022 走看看