zoukankan      html  css  js  c++  java
  • TensorFlow基础笔记(6) 图像风格化实验

    参考 http://blog.csdn.net/wspba/article/details/53994649

    https://www.ctolib.com/AdaIN-style.html

    Acknowledgement

    This project is inspired by many existing style transfer methods and their open-source implementations, including:

    注意点:

      初始化了一个TensorFlow的变量,即为我们需要训练的对象。

      image = tf.Variable(initial)

      注意这里我们训练的对象是一张图像,而不是weight和bias。

    #optimizer setup
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    train_step.run()
    执行后,开始不断进行改变变量值image使得loss变小,达到优化目标

    在minist分类实验中
    #这里的x和y并不是特定的值,相反,他们都只是一个占位符,可以在TensorFlow运行某一计算时根据该占位符输入具体的值。
    x = tf.placeholder("float", shape=[None, 784])
    y_ = tf.placeholder("float", shape=[None, 10])
    W = tf.Variable(tf.zeros([784,10]))#需要进行优化的变量
    b = tf.Variable(tf.zeros([10]))#需要进行优化的变量
    sess.run(tf.global_variables_initializer())
    y = tf.nn.softmax(tf.matmul(x,W) + b)
    cross_entropy = -tf.reduce_sum(y_*tf.log(y))
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
    执行后,开始不断进行改变变量值W和b使得cross_entropy变小,达到优化目标
     

    neural_style.py

    # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
    
    import os
    
    import numpy as np
    import scipy.misc
    
    from stylize import stylize
    
    import math
    from argparse import ArgumentParser
    
    #python neural_style.py --content gril.jpg --styles 3dtest.jpg --out output.jpg
    
    # default arguments
    CONTENT_WEIGHT = 5e0
    STYLE_WEIGHT = 1e2
    TV_WEIGHT = 1e2
    LEARNING_RATE = 1e2
    STYLE_SCALE = 1.0
    ITERATIONS = 1000
    VGG_PATH = './imagenet-vgg-verydeep-19.mat'
    content = './image/content.jpg'
    styles = './image/styles.jpg'
    out =  './image/output.jpg'
    
    def build_parser():
        parser = ArgumentParser()
        parser.add_argument('--content',
                dest='content', help='content image',
                metavar='CONTENT', default=content)
        parser.add_argument('--styles',
                dest='styles',
                nargs='+', help='one or more style images',
                metavar='STYLE', default=styles)
        parser.add_argument('--output',
                dest='output', help='output path',
                metavar='OUTPUT', default=out)
        parser.add_argument('--checkpoint-output',
                dest='checkpoint_output', help='checkpoint output format',
                metavar='OUTPUT')
        parser.add_argument('--iterations', type=int,
                dest='iterations', help='iterations (default %(default)s)',
                metavar='ITERATIONS', default=ITERATIONS)
        parser.add_argument('--width', type=int,
                dest='width', help='output width',
                metavar='WIDTH')
        parser.add_argument('--style-scales', type=float,
                dest='style_scales',
                nargs='+', help='one or more style scales',
                metavar='STYLE_SCALE')
        parser.add_argument('--network',
                dest='network', help='path to network parameters (default %(default)s)',
                metavar='VGG_PATH', default=VGG_PATH)
        parser.add_argument('--content-weight', type=float,
                dest='content_weight', help='content weight (default %(default)s)',
                metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT)
        parser.add_argument('--style-weight', type=float,
                dest='style_weight', help='style weight (default %(default)s)',
                metavar='STYLE_WEIGHT', default=STYLE_WEIGHT)
        parser.add_argument('--style-blend-weights', type=float,
                dest='style_blend_weights', help='style blending weights',
                nargs='+', metavar='STYLE_BLEND_WEIGHT')
        parser.add_argument('--tv-weight', type=float,
                dest='tv_weight', help='total variation regularization weight (default %(default)s)',
                metavar='TV_WEIGHT', default=TV_WEIGHT)
        parser.add_argument('--learning-rate', type=float,
                dest='learning_rate', help='learning rate (default %(default)s)',
                metavar='LEARNING_RATE', default=LEARNING_RATE)
        parser.add_argument('--initial',
                dest='initial', help='initial image',
                metavar='INITIAL')
        parser.add_argument('--print-iterations', type=int,
                dest='print_iterations', help='statistics printing frequency',
                metavar='PRINT_ITERATIONS')
        parser.add_argument('--checkpoint-iterations', type=int,
                dest='checkpoint_iterations', help='checkpoint frequency',
                metavar='CHECKPOINT_ITERATIONS')
        return parser
    
    
    def build_parser_src():
        parser = ArgumentParser()
        parser.add_argument('--content',
                dest='content', help='content image',
                metavar='CONTENT', required=True)
        parser.add_argument('--styles',
                dest='styles',
                nargs='+', help='one or more style images',
                metavar='STYLE', required=True)
        parser.add_argument('--output',
                dest='output', help='output path',
                metavar='OUTPUT', required=True)
        parser.add_argument('--checkpoint-output',
                dest='checkpoint_output', help='checkpoint output format',
                metavar='OUTPUT')
        parser.add_argument('--iterations', type=int,
                dest='iterations', help='iterations (default %(default)s)',
                metavar='ITERATIONS', default=ITERATIONS)
        parser.add_argument('--width', type=int,
                dest='width', help='output width',
                metavar='WIDTH')
        parser.add_argument('--style-scales', type=float,
                dest='style_scales',
                nargs='+', help='one or more style scales',
                metavar='STYLE_SCALE')
        parser.add_argument('--network',
                dest='network', help='path to network parameters (default %(default)s)',
                metavar='VGG_PATH', default=VGG_PATH)
        parser.add_argument('--content-weight', type=float,
                dest='content_weight', help='content weight (default %(default)s)',
                metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT)
        parser.add_argument('--style-weight', type=float,
                dest='style_weight', help='style weight (default %(default)s)',
                metavar='STYLE_WEIGHT', default=STYLE_WEIGHT)
        parser.add_argument('--style-blend-weights', type=float,
                dest='style_blend_weights', help='style blending weights',
                nargs='+', metavar='STYLE_BLEND_WEIGHT')
        parser.add_argument('--tv-weight', type=float,
                dest='tv_weight', help='total variation regularization weight (default %(default)s)',
                metavar='TV_WEIGHT', default=TV_WEIGHT)
        parser.add_argument('--learning-rate', type=float,
                dest='learning_rate', help='learning rate (default %(default)s)',
                metavar='LEARNING_RATE', default=LEARNING_RATE)
        parser.add_argument('--initial',
                dest='initial', help='initial image',
                metavar='INITIAL')
        parser.add_argument('--print-iterations', type=int,
                dest='print_iterations', help='statistics printing frequency',
                metavar='PRINT_ITERATIONS')
        parser.add_argument('--checkpoint-iterations', type=int,
                dest='checkpoint_iterations', help='checkpoint frequency',
                metavar='CHECKPOINT_ITERATIONS')
        return parser
    
    
    def main():
        parser = build_parser()
        options = parser.parse_args()
    
        if not os.path.isfile(options.network):
            parser.error("Network %s does not exist. (Did you forget to download it?)" % options.network)
    
        content_image = imread(options.content)
        #for style in options.styles:
        #    print(style)
        #style_images = [imread(style) for style in options.styles]
        style_images = [imread(options.styles)]
    
        width = options.width
        if width is not None:
            new_shape = (int(math.floor(float(content_image.shape[0]) / content_image.shape[1] * width)), width)
            content_image = scipy.misc.imresize(content_image, new_shape)
        target_shape = content_image.shape
        for i in range(len(style_images)):
            style_scale = STYLE_SCALE
            if options.style_scales is not None:
                style_scale = options.style_scales[i]
            style_images[i] = scipy.misc.imresize(style_images[i], style_scale * target_shape[1] / style_images[i].shape[1])
    
        style_blend_weights = options.style_blend_weights
        if style_blend_weights is None:
            # default is equal weights
            style_blend_weights = [1.0/len(style_images) for _ in style_images]
        else:
            total_blend_weight = sum(style_blend_weights)
            style_blend_weights = [weight/total_blend_weight
                                   for weight in style_blend_weights]
    
        initial = options.initial
        if initial is not None:
            initial = scipy.misc.imresize(imread(initial), content_image.shape[:2])
    
        if options.checkpoint_output and "%s" not in options.checkpoint_output:
            parser.error("To save intermediate images, the checkpoint output "
                         "parameter must contain `%s` (e.g. `foo%s.jpg`)")
    
        for iteration, image in stylize(
            network=options.network,
            initial=initial,
            content=content_image,
            styles=style_images,
            iterations=options.iterations,
            content_weight=options.content_weight,
            style_weight=options.style_weight,
            style_blend_weights=style_blend_weights,
            tv_weight=options.tv_weight,
            learning_rate=options.learning_rate,
            print_iterations=options.print_iterations,
            checkpoint_iterations=options.checkpoint_iterations
        ):
            output_file = None
            if iteration is not None:
                if options.checkpoint_output:
                    output_file = options.checkpoint_output % iteration
            else:
                output_file = options.output
            if output_file:
                imsave(output_file, image)
    
    
    def imread(path):
        return scipy.misc.imread(path).astype(np.float)
    
    
    def imsave(path, img):
        img = np.clip(img, 0, 255).astype(np.uint8)
        scipy.misc.imsave(path, img)
    
    
    if __name__ == '__main__':
        main()

    stylize.py

    # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
    
    import vgg
    
    import tensorflow as tf
    import numpy as np
    
    from sys import stderr
    
    CONTENT_LAYER = 'relu4_2'
    STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
    
    
    try:
        reduce
    except NameError:
        from functools import reduce
    
    
    def stylize(network, initial, content, styles, iterations,
            content_weight, style_weight, style_blend_weights, tv_weight,
            learning_rate, print_iterations=None, checkpoint_iterations=None):
        """
        Stylize images.
    
        This function yields tuples (iteration, image); `iteration` is None
        if this is the final image (the last iteration).  Other tuples are yielded
        every `checkpoint_iterations` iterations.
    
        :rtype: iterator[tuple[int|None,image]]
        """
        shape = (1,) + content.shape
        style_shapes = [(1,) + style.shape for style in styles]
        content_features = {}
        style_features = [{} for _ in styles]
    
        # compute content features in feedforward mode
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=shape)
            net, mean_pixel = vgg.net(network, image)
            content_pre = np.array([vgg.preprocess(content, mean_pixel)])
            content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                    feed_dict={image: content_pre})
    
        # compute style features in feedforward mode
        for i in range(len(styles)):
            g = tf.Graph()
            with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
                image = tf.placeholder('float', shape=style_shapes[i])
                net, _ = vgg.net(network, image)
                style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
                for layer in STYLE_LAYERS:
                    features = net[layer].eval(feed_dict={image: style_pre})
                    features = np.reshape(features, (-1, features.shape[3]))
                    gram = np.matmul(features.T, features) / features.size
                    style_features[i][layer] = gram
    
        # make stylized image using backpropogation
        with tf.Graph().as_default():
            if initial is None:
                noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
                initial = tf.random_normal(shape) * 0.256
            else:
                initial = np.array([vgg.preprocess(initial, mean_pixel)])
                initial = initial.astype('float32')
            image = tf.Variable(initial)
            net, _ = vgg.net(network, image)
    
            # content loss
            content_loss = content_weight * (2 * tf.nn.l2_loss(
                    net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                    content_features[CONTENT_LAYER].size)
            # style loss
            style_loss = 0
            for i in range(len(styles)):
                style_losses = []
                for style_layer in STYLE_LAYERS:
                    layer = net[style_layer]
                    _, height, width, number = map(lambda i: i.value, layer.get_shape())
                    size = height * width * number
                    feats = tf.reshape(layer, (-1, number))
                    gram = tf.matmul(tf.transpose(feats), feats) / size
                    style_gram = style_features[i][style_layer]
                    style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
                style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
            # total variation denoising
            tv_y_size = _tensor_size(image[:,1:,:,:])
            tv_x_size = _tensor_size(image[:,:,1:,:])
            tv_loss = tv_weight * 2 * (
                    (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                        tv_y_size) +
                    (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                        tv_x_size))
            # overall loss
            loss = content_loss + style_loss + tv_loss
    
            # optimizer setup
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
            def print_progress(i, last=False):
                stderr.write('Iteration %d/%d
    ' % (i + 1, iterations))
                if last or (print_iterations and i % print_iterations == 0):
                    stderr.write('  content loss: %g
    ' % content_loss.eval())
                    stderr.write('    style loss: %g
    ' % style_loss.eval())
                    stderr.write('       tv loss: %g
    ' % tv_loss.eval())
                    stderr.write('    total loss: %g
    ' % loss.eval())
    
            # optimization
            best_loss = float('inf')
            best = None
            with tf.Session() as sess:
                sess.run(tf.initialize_all_variables())
                for i in range(iterations):
                    last_step = (i == iterations - 1)
                    print_progress(i, last=last_step)
                    train_step.run()
    
                    if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                        this_loss = loss.eval()
                        if this_loss < best_loss:
                            best_loss = this_loss
                            best = image.eval()
                        yield (
                            (None if last_step else i),
                            vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
                        )
    
    
    def _tensor_size(tensor):
        from operator import mul
        return reduce(mul, (d.value for d in tensor.get_shape()), 1)

    vgg.py

    # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
    
    import tensorflow as tf
    import numpy as np
    import scipy.io
    
    
    def net(data_path, input_image):
        layers = (
            'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    
            'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    
            'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
            'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
    
            'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
            'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
    
            'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
            'relu5_3', 'conv5_4', 'relu5_4'
        )
    
        data = scipy.io.loadmat(data_path)
        mean = data['normalization'][0][0][0]
        mean_pixel = np.mean(mean, axis=(0, 1))
        weights = data['layers'][0]
    
        net = {}
        current = input_image
        for i, name in enumerate(layers):
            kind = name[:4]
            if kind == 'conv':
                kernels, bias = weights[i][0][0][0][0]
                # matconvnet: weights are [width, height, in_channels, out_channels]
                # tensorflow: weights are [height, width, in_channels, out_channels]
                kernels = np.transpose(kernels, (1, 0, 2, 3))
                bias = bias.reshape(-1)
                current = _conv_layer(current, kernels, bias)
            elif kind == 'relu':
                current = tf.nn.relu(current)
            elif kind == 'pool':
                current = _pool_layer(current)
            net[name] = current
    
        assert len(net) == len(layers)
        return net, mean_pixel
    
    
    def _conv_layer(input, weights, bias):
        conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
                padding='SAME')
        return tf.nn.bias_add(conv, bias)
    
    
    def _pool_layer(input):
        return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
                padding='SAME')
    
    
    def preprocess(image, mean_pixel):
        return image - mean_pixel
    
    
    def unprocess(image, mean_pixel):
        return image + mean_pixel
  • 相关阅读:
    YTU 2802: 判断字符串是否为回文
    YTU 2392: 求各位数字之和
    YTU 2391: 求素数
    HDU 2064:汉诺塔III
    HDU 2187:悼念512汶川大地震遇难同胞——老人是真饿了
    HDU 2036:改革春风吹满地
    HDU 1081:To The Max
    HDU 1257:最少拦截系统
    YTU 2425: C语言习题 输出月份
    YTU 2420: C语言习题 不等长字符串排序
  • 原文地址:https://www.cnblogs.com/adong7639/p/7652862.html
Copyright © 2011-2022 走看看