zoukankan      html  css  js  c++  java
  • 深度学习 ——style reconstruction

    多层神经网络的实质就是为了找出更复杂,更内在的features...图像的style, how to describe, impossible! 但是人眼却可以分辨。

    (参考论文 A Neural algorithm of artistic style)使用卷积神经网络来做  content, style reconstruction. 

    • loss= content loss+ style loss
    1. content reconstruction: 经过pre-trained 的vgg16 的卷积网络来做content 的重构, 使用前三层的content reconstruction 效果比较好,d,e 丢失了部分细节信息。保留了比较high-level 的信息(我做显著性检测中,使用高层信息,细节信息过少,考虑添加浅层的信息,加上c层的信息检测效果就很perfect)
    2. style reconstruction: 在5层cnn上计算,相当于在不同的尺度上匹配图像本身的style

    步骤:

    1. 将content经过CNN,取relu4_2和relu5_2 的content_feature_maps

    2. 将 style 经过CNN, 取5层relu 的style_feature_maps

    3. 将最后结果图定义为一个变量tensor,经过CNN,得到feature_map_net最优化loss, loss=(feature_map_net-content_feature_maps)+(feature_map_net-style_feature_maps), 最优化结果图变量

    实现:

    1. 入口函数:

    import os
    
    import numpy as np
    import scipy.misc
    
    from stylize import stylize
    
    import math
    from argparse import ArgumentParser
    
    from PIL import Image
    
    # default arguments
    CONTENT_WEIGHT = 5e0
    CONTENT_WEIGHT_BLEND = 1
    STYLE_WEIGHT = 5e2
    TV_WEIGHT = 1e2
    STYLE_LAYER_WEIGHT_EXP = 1
    LEARNING_RATE = 1e1
    BETA1 = 0.9
    BETA2 = 0.999
    EPSILON = 1e-08
    STYLE_SCALE = 1.0
    ITERATIONS = 1000
    VGG_PATH = 'imagenet-vgg-verydeep-19.mat'
    POOLING = 'max'
    
    # 定义命令行参数 def build_parser(): parser = ArgumentParser()
    # 要转化的图像 parser.add_argument('--content', dest='content', help='content image', metavar='CONTENT', required=True)
    # 想要转化为的style,可以为多种style,nargs='+' parser.add_argument('--styles', dest='styles', nargs='+', help='one or more style images', metavar='STYLE', required=True)
    # 输出文件位置 parser.add_argument('--output', dest='output', help='output path', metavar='OUTPUT', required=True)
    # 迭代次数 parser.add_argument('--iterations', type=int, dest='iterations', help='iterations (default %(default)s)', metavar='ITERATIONS', default=ITERATIONS) parser.add_argument('--print-iterations', type=int, dest='print_iterations', help='statistics printing frequency', metavar='PRINT_ITERATIONS') parser.add_argument('--checkpoint-output', dest='checkpoint_output', help='checkpoint output format, e.g. output%%s.jpg', metavar='OUTPUT') parser.add_argument('--checkpoint-iterations', type=int, dest='checkpoint_iterations', help='checkpoint frequency', metavar='CHECKPOINT_ITERATIONS') parser.add_argument('--width', type=int, dest='width', help='output width', metavar='WIDTH') parser.add_argument('--style-scales', type=float, dest='style_scales', nargs='+', help='one or more style scales', metavar='STYLE_SCALE') parser.add_argument('--network', dest='network', help='path to network parameters (default %(default)s)', metavar='VGG_PATH', default=VGG_PATH) parser.add_argument('--content-weight-blend', type=float, dest='content_weight_blend', help='content weight blend, conv4_2 * blend + conv5_2 * (1-blend) (default %(default)s)', metavar='CONTENT_WEIGHT_BLEND', default=CONTENT_WEIGHT_BLEND) parser.add_argument('--content-weight', type=float, dest='content_weight', help='content weight (default %(default)s)', metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT) parser.add_argument('--style-weight', type=float, dest='style_weight', help='style weight (default %(default)s)', metavar='STYLE_WEIGHT', default=STYLE_WEIGHT) parser.add_argument('--style-layer-weight-exp', type=float, dest='style_layer_weight_exp', help='style layer weight exponentional increase - weight(layer<n+1>) = weight_exp*weight(layer<n>) (default %(default)s)', metavar='STYLE_LAYER_WEIGHT_EXP', default=STYLE_LAYER_WEIGHT_EXP) parser.add_argument('--style-blend-weights', type=float, dest='style_blend_weights', help='style blending weights', nargs='+', metavar='STYLE_BLEND_WEIGHT') parser.add_argument('--tv-weight', type=float, dest='tv_weight', help='total variation regularization weight (default %(default)s)', metavar='TV_WEIGHT', default=TV_WEIGHT) parser.add_argument('--learning-rate', type=float, dest='learning_rate', help='learning rate (default %(default)s)', metavar='LEARNING_RATE', default=LEARNING_RATE) parser.add_argument('--beta1', type=float, dest='beta1', help='Adam: beta1 parameter (default %(default)s)', metavar='BETA1', default=BETA1) parser.add_argument('--beta2', type=float, dest='beta2', help='Adam: beta2 parameter (default %(default)s)', metavar='BETA2', default=BETA2) parser.add_argument('--eps', type=float, dest='epsilon', help='Adam: epsilon parameter (default %(default)s)', metavar='EPSILON', default=EPSILON) parser.add_argument('--initial', dest='initial', help='initial image', metavar='INITIAL') parser.add_argument('--initial-noiseblend', type=float, dest='initial_noiseblend', help='ratio of blending initial image with normalized noise (if no initial image specified, content image is used) (default %(default)s)', metavar='INITIAL_NOISEBLEND') parser.add_argument('--preserve-colors', action='store_true', dest='preserve_colors', help='style-only transfer (preserving colors) - if color transfer is not needed') parser.add_argument('--pooling', dest='pooling', help='pooling layer configuration: max or avg (default %(default)s)', metavar='POOLING', default=POOLING) return parser def main(): parser = build_parser() options = parser.parse_args() # 对图像尺寸的限制 if not os.path.isfile(options.network): parser.error("Network %s does not exist. (Did you forget to download it?)" % options.network) content_image = imread(options.content) style_images = [imread(style) for style in options.styles] width = options.width if width is not None: new_shape = (int(math.floor(float(content_image.shape[0]) / content_image.shape[1] * width)), width) content_image = scipy.misc.imresize(content_image, new_shape) target_shape = content_image.shape for i in range(len(style_images)): style_scale = STYLE_SCALE if options.style_scales is not None: style_scale = options.style_scales[i] style_images[i] = scipy.misc.imresize(style_images[i], style_scale * target_shape[1] / style_images[i].shape[1])
    # 多种style中,归一化每种style的权重 style_blend_weights = options.style_blend_weights if style_blend_weights is None: # default is equal weights style_blend_weights = [1.0/len(style_images) for _ in style_images] else: total_blend_weight = sum(style_blend_weights) style_blend_weights = [weight/total_blend_weight for weight in style_blend_weights] initial = options.initial if initial is not None: initial = scipy.misc.imresize(imread(initial), content_image.shape[:2]) # Initial guess is specified, but not noiseblend - no noise should be blended if options.initial_noiseblend is None: options.initial_noiseblend = 0.0 else: # Neither inital, nor noiseblend is provided, falling back to random generated initial guess if options.initial_noiseblend is None: options.initial_noiseblend = 1.0 if options.initial_noiseblend < 1.0: initial = content_image if options.checkpoint_output and "%s" not in options.checkpoint_output: parser.error("To save intermediate images, the checkpoint output " "parameter must contain `%s` (e.g. `foo%s.jpg`)")
    # 调用 stylize 函数 for iteration, image in stylize( network=options.network, initial=initial, initial_noiseblend=options.initial_noiseblend, content=content_image, styles=style_images, preserve_colors=options.preserve_colors, iterations=options.iterations, content_weight=options.content_weight, content_weight_blend=options.content_weight_blend, style_weight=options.style_weight, style_layer_weight_exp=options.style_layer_weight_exp, style_blend_weights=style_blend_weights, tv_weight=options.tv_weight, learning_rate=options.learning_rate, beta1=options.beta1, beta2=options.beta2, epsilon=options.epsilon, pooling=options.pooling, print_iterations=options.print_iterations, checkpoint_iterations=options.checkpoint_iterations ): output_file = None combined_rgb = image if iteration is not None: if options.checkpoint_output: output_file = options.checkpoint_output % iteration else: output_file = options.output if output_file: imsave(output_file, combined_rgb) def imread(path): img = scipy.misc.imread(path).astype(np.float) if len(img.shape) == 2: # grayscale img = np.dstack((img,img,img)) elif img.shape[2] == 4: # PNG with alpha channel img = img[:,:,:3] return img def imsave(path, img): img = np.clip(img, 0, 255).astype(np.uint8) Image.fromarray(img).save(path, quality=95) if __name__ == '__main__': main()

      2. style 生成函数:

    import vgg
    
    import tensorflow as tf
    import numpy as np
    
    from sys import stderr
    
    from PIL import Image
    
    # content_layers 和style_layers分布对应要计算loss 的层 CONTENT_LAYERS = ('relu4_2', 'relu5_2') STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1') try: reduce except NameError: from functools import reduce def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations, content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight, learning_rate, beta1, beta2, epsilon, pooling, print_iterations=None, checkpoint_iterations=None): """ Stylize images. This function yields tuples (iteration, image); `iteration` is None if this is the final image (the last iteration). Other tuples are yielded every `checkpoint_iterations` iterations. :rtype: iterator[tuple[int|None,image]] """
    # 将shape从图像的三维变为4维 shape = (1,) + content.shape style_shapes = [(1,) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles]
    # 加载网络 vgg_weights, vgg_mean_pixel = vgg.load_net(network) # 计算style loss 时,每层的权重 layer_weight = 1.0 style_layers_weights = {} for style_layer in STYLE_LAYERS: style_layers_weights[style_layer] = layer_weight layer_weight *= style_layer_weight_exp # normalize style layer weights layer_weights_sum = 0 for style_layer in STYLE_LAYERS: layer_weights_sum += style_layers_weights[style_layer] for style_layer in STYLE_LAYERS: style_layers_weights[style_layer] /= layer_weights_sum # compute content features in feedforward mode
    #定义一个graph来计算content feature maps g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=shape) net = vgg.net_preloaded(vgg_weights, image, pooling) content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)]) for layer in CONTENT_LAYERS: content_features[layer] = net[layer].eval(feed_dict={image: content_pre})
    # 循环定义graph 计算每种style 的feature maps # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder('float', shape=style_shapes[i]) net = vgg.net_preloaded(vgg_weights, image, pooling) style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size style_features[i][layer] = gram initial_content_noise_coeff = 1.0 - initial_noiseblend
    # 计算content loss 和style loss, 对loss 最小化,优化image 变量。 # make stylized image using backpropogation with tf.Graph().as_default(): if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * 0.256 else: initial = np.array([vgg.preprocess(initial, vgg_mean_pixel)]) initial = initial.astype('float32') noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff) image = tf.Variable(initial) net = vgg.net_preloaded(vgg_weights, image, pooling) # content loss content_layers_weights = {} content_layers_weights['relu4_2'] = content_weight_blend content_layers_weights['relu5_2'] = 1.0 - content_weight_blend content_loss = 0 content_losses = [] for content_layer in CONTENT_LAYERS: content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss( net[content_layer] - content_features[content_layer]) / content_features[content_layer].size)) content_loss += reduce(tf.add, content_losses) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses) # total variation denoising tv_y_size = _tensor_size(image[:,1:,:,:]) tv_x_size = _tensor_size(image[:,:,1:,:]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) / tv_y_size) + (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / tv_x_size)) # overall loss loss = content_loss + style_loss + tv_loss # optimizer setup train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss) def print_progress(): stderr.write(' content loss: %g ' % content_loss.eval()) stderr.write(' style loss: %g ' % style_loss.eval()) stderr.write(' tv loss: %g ' % tv_loss.eval()) stderr.write(' total loss: %g ' % loss.eval()) # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.global_variables_initializer()) stderr.write('Optimization started... ') if (print_iterations and print_iterations != 0): print_progress() for i in range(iterations): stderr.write('Iteration %4d/%4d ' % (i + 1, iterations)) train_step.run() last_step = (i == iterations - 1) if last_step or (print_iterations and i % print_iterations == 0): print_progress() if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel) if preserve_colors and preserve_colors == True: original_image = np.clip(content, 0, 255) styled_image = np.clip(img_out, 0, 255) # Luminosity transfer steps: # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114) # 2. Convert stylized grayscale into YUV (YCbCr) # 3. Convert original image into YUV (YCbCr) # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V) # 5. Convert recombined image from YUV back to RGB # 1 styled_grayscale = rgb2gray(styled_image) styled_grayscale_rgb = gray2rgb(styled_grayscale) # 2 styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr')) # 3 original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr')) # 4 w, h, _ = original_image.shape combined_yuv = np.empty((w, h, 3), dtype=np.uint8) combined_yuv[..., 0] = styled_grayscale_yuv[..., 0] combined_yuv[..., 1] = original_yuv[..., 1] combined_yuv[..., 2] = original_yuv[..., 2] # 5 img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB')) yield ( (None if last_step else i), img_out ) def _tensor_size(tensor): from operator import mul return reduce(mul, (d.value for d in tensor.get_shape()), 1) def rgb2gray(rgb): return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) def gray2rgb(gray): w, h = gray.shape rgb = np.empty((w, h, 3), dtype=np.float32) rgb[:, :, 2] = rgb[:, :, 1] = rgb[:, :, 0] = gray return rgb

      3. vgg 函数

    import tensorflow as tf
    import numpy as np
    import scipy.io
    
    VGG19_LAYERS = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
    
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
    
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )
    
    # 加载mat,得到权重和图像平均值 def load_net(data_path): data = scipy.io.loadmat(data_path) if not all(i in data for i in ('layers', 'classes', 'normalization')): raise ValueError("You're using the wrong VGG19 data. Please follow the instructions in the README to download the correct data.") mean = data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = data['layers'][0] return weights, mean_pixel
    # 表示每层的feature map 的计算 def net_preloaded(weights, input_image, pooling): net = {} current = input_image for i, name in enumerate(VGG19_LAYERS): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = np.transpose(kernels, (1, 0, 2, 3)) bias = bias.reshape(-1) current = _conv_layer(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current) elif kind == 'pool': current = _pool_layer(current, pooling) net[name] = current assert len(net) == len(VGG19_LAYERS) return net def _conv_layer(input, weights, bias): conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1), padding='SAME') return tf.nn.bias_add(conv, bias) def _pool_layer(input, pooling): if pooling == 'avg': return tf.nn.avg_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME') else: return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME') def preprocess(image, mean_pixel): return image - mean_pixel def unprocess(image, mean_pixel): return image + mean_pixel

      

  • 相关阅读:
    2020重新出发,NOSQL,MongoDB分布式集群架构
    2020重新出发,NOSQL,MongoDB的操作和索引
    2020重新出发,NOSQL,MongoDB是什么?
    2020重新出发,NOSQL,redis高并发系统的分析和设计
    2020重新出发,NOSQL,redis互联网架构分析
    2020重新出发,NOSQL,Redis和数据库结合
    2020重新出发,NOSQL,Redis主从复制
    collections模块
    常用模块
    python面向对象的内置函数和反射
  • 原文地址:https://www.cnblogs.com/fanhaha/p/7632992.html
Copyright © 2011-2022 走看看