zoukankan      html  css  js  c++  java
  • TensorFlow使用记录 (十二): ℓ1 and ℓ2 Regularization

    实现方式

    以 ℓ2 Regularization 为例,主要有两种实现方式

    1. 手动累加

    with tf.name_scope('loss'):
        loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
        l2_reg_loss = tf.constant(0.0, tf.float32)
        for vv in tf.trainable_variables():
            if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name 
                    or 'batch_normalization' in vv.name or 'gn' in vv.name:
                continue
            else:
                l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
        l2_reg_loss *= 0.001
        loss = loss + l2_reg_loss

    2. 借助于 kernel_regularizer

    with tf.name_scope('dnn'):
        hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',
                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        ......
    
    with tf.name_scope('loss'):
        loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        loss = tf.add_n([loss] + reg_losses)

    实例验证

    import tensorflow as tf
    
    # 1. create data
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)
    
    X = tf.placeholder(tf.float32, shape=(None, 784), name='X')
    y = tf.placeholder(tf.int32, shape=(None), name='y')
    is_training = tf.placeholder(tf.bool, None, name='is_training')
    
    # 2. define network
    he_init = tf.contrib.layers.variance_scaling_initializer()
    with tf.name_scope('dnn'):
        hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',
                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        hidden1 = tf.layers.batch_normalization(hidden1, momentum=0.9)
        hidden1 = tf.nn.relu(hidden1)
        hidden2 = tf.layers.dense(hidden1, 100, kernel_initializer=he_init, name='hidden2',
                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
        hidden2 = tf.layers.batch_normalization(hidden2, training=is_training, momentum=0.9)
        hidden2 = tf.nn.relu(hidden2)
        logits = tf.layers.dense(hidden2, 10, kernel_initializer=he_init, name='output',
                                 kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
    
    # 3. define loss
    with tf.name_scope('loss'):
        loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
        # =================
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        reg_loss = tf.reduce_sum(reg_losses)
        # loss = tf.add_n([loss] + reg_losses)
        # =================
        l2_reg_loss = tf.constant(0.0, tf.float32)
        for vv in tf.trainable_variables():
            if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name 
                    or 'batch_normalization' in vv.name or 'gn' in vv.name:
                continue
            else:
                l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
        l2_reg_loss *= 0.001
        # loss = loss + l2_reg_loss
        # =================
    
    # 4. define optimizer
    learning_rate_init = 0.01
    global_step = tf.Variable(0, trainable=False)
    with tf.name_scope('train'):
        learning_rate = tf.train.polynomial_decay(  # 多项式衰减
            learning_rate=learning_rate_init,  # 初始学习率
            global_step=global_step,  # 当前迭代次数
            decay_steps=22000,  # 在迭代到该次数实际,学习率衰减为 learning_rate * dacay_rate
            end_learning_rate=learning_rate_init / 10,  # 最小的学习率
            power=0.9,
            cycle=False
        )
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  # for batch normalization
        with tf.control_dependencies(update_ops):
            optimizer_op = tf.train.MomentumOptimizer(
                learning_rate=learning_rate, momentum=0.9).minimize(
                loss=loss,
                var_list=tf.trainable_variables(),
                global_step=global_step # 不指定的话学习率不更新
            )
    
    with tf.name_scope('eval'):
        correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1) # 目标是否在前K个预测中, label's dtype is int*
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    # 5. initialize
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    saver = tf.train.Saver()
    
    # 5. train & test
    n_epochs = 1
    batch_size = 55000
    with tf.Session() as sess:
        saver.restore(sess, './my_model_final.ckpt')
        for epoch in range(n_epochs):
            for iteration in range(mnist.train.num_examples // batch_size):
                X_batch, y_batch = mnist.train.next_batch(batch_size)
                loss_, l2_reg_loss_, reg_loss_ = sess.run([loss, l2_reg_loss, reg_loss], feed_dict={X: X_batch, y: y_batch, is_training:True})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch, is_training:False}) # 最后一个 batch 的 accuracy
            acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
            loss_test = loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
            l2_reg_loss_test = l2_reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
            reg_loss_test = reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
            print("Train loss:", loss_, "Train l2_reg_loss:", l2_reg_loss_, "Train reg_loss:", reg_loss_, "Train accuracy:", acc_train)
            print("Test loss:", loss_test, "Test l2_reg_loss:", l2_reg_loss_test, "Test reg_loss:", reg_loss_test, "Test accuracy:", acc_test)
    
    """
    # =================
    Train loss: 0.000636433 Train l2_reg_loss: 0.48696715 Train reg_loss: 0.48683384 Train accuracy: 1.0
    Test loss: 0.059231624 Test l2_reg_loss: 0.48696715 Test reg_loss: 0.48683384 Test accuracy: 0.983
    """
    View Code
  • 相关阅读:
    ASP.NET 文件下载
    Asp.net 加密解密类
    ASP.Net 获取服务器信息
    Visual Studio 2013 和 ASP.NET 预览
    Windows Server 2012安装时所需要的KEY
    WordPress主题模板层次和常用模板函数
    小meta的大作用
    《淘宝技术这十年》之LAMP架构的网站
    面试题(八)
    面试题(七)
  • 原文地址:https://www.cnblogs.com/xuanyuyt/p/11667395.html
Copyright © 2011-2022 走看看