zoukankan      html  css  js  c++  java
  • 02-人脸识别-基于MTCNN,框选人脸区域-detect_face

    (本系列随笔持续更新)

    这部分代码是基于参考中的链接,修改后适用于TensorFlow1.6.0版本的代码。由于TensorFlow的频繁更新,所以不一定支持后续新或者就版本,特此说明。

    程序的最初版,来自“山人7” [参考1,参考2],但是在新的TensorFlow下面不能直接运行。

    修改后版本,来自ShyBigBoy,[参考3,参考4],可以在TensorFlow1.6.0上运行。

    代码:detect_face.py

    """ Tensorflow implementation of the face detection / alignment algorithm found at
    https://github.com/kpzhang93/MTCNN_face_detection_alignment
    """
    # MIT License
    # 
    # Copyright (c) 2016 David Sandberg
    # 
    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, including without limitation the rights
    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    # copies of the Software, and to permit persons to whom the Software is
    # furnished to do so, subject to the following conditions:
    # 
    # The above copyright notice and this permission notice shall be included in all
    # copies or substantial portions of the Software.
    # 
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    # SOFTWARE.
    
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function
    from six import string_types, iteritems
    
    import numpy as np
    import tensorflow as tf
    #from math import floor
    import cv2
    import os
    
    def layer(op):
        """Decorator for composable network layers."""
    
        def layer_decorated(self, *args, **kwargs):
            # Automatically set a name if not provided.
            name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
            # Figure out the layer inputs.
            if len(self.terminals) == 0:
                raise RuntimeError('No input variables found for layer %s.' % name)
            elif len(self.terminals) == 1:
                layer_input = self.terminals[0]
            else:
                layer_input = list(self.terminals)
            # Perform the operation and get the output.
            layer_output = op(self, layer_input, *args, **kwargs)
            # Add to layer LUT.
            self.layers[name] = layer_output
            # This output is now the input for the next layer.
            self.feed(layer_output)
            # Return self for chained calls.
            return self
    
        return layer_decorated
    
    class Network(object):
    
        def __init__(self, inputs, trainable=True):
            # The input nodes for this network
            self.inputs = inputs
            # The current list of terminal nodes
            self.terminals = []
            # Mapping from layer names to layers
            self.layers = dict(inputs)
            # If true, the resulting variables are set as trainable
            self.trainable = trainable
    
            self.setup()
    
        def setup(self):
            """Construct the network. """
            raise NotImplementedError('Must be implemented by the subclass.')
    
        def load(self, data_path, session, ignore_missing=False):
            """Load network weights.
            data_path: The path to the numpy-serialized network weights
            session: The current TensorFlow session
            ignore_missing: If true, serialized weights for missing layers are ignored.
            """
            data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
    
            for op_name in data_dict:
                with tf.variable_scope(op_name, reuse=True):
                    for param_name, data in iteritems(data_dict[op_name]):
                        try:
                            var = tf.get_variable(param_name)
                            session.run(var.assign(data))
                        except ValueError:
                            if not ignore_missing:
                                raise
    
        def feed(self, *args):
            """Set the input(s) for the next operation by replacing the terminal nodes.
            The arguments can be either layer names or the actual layers.
            """
            assert len(args) != 0
            self.terminals = []
            for fed_layer in args:
                if isinstance(fed_layer, string_types):
                    try:
                        fed_layer = self.layers[fed_layer]
                    except KeyError:
                        raise KeyError('Unknown layer name fed: %s' % fed_layer)
                self.terminals.append(fed_layer)
            return self
    
        def get_output(self):
            """Returns the current network output."""
            return self.terminals[-1]
    
        def get_unique_name(self, prefix):
            """Returns an index-suffixed unique name for the given prefix.
            This is used for auto-generating layer names based on the type-prefix.
            """
            ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
            return '%s_%d' % (prefix, ident)
    
        def make_var(self, name, shape):
            """Creates a new TensorFlow variable."""
            return tf.get_variable(name, shape, trainable=self.trainable)
    
        def validate_padding(self, padding):
            """Verifies that the padding is one of the supported ones."""
            assert padding in ('SAME', 'VALID')
    
        @layer
        def conv(self,
                 inp,
                 k_h,
                 k_w,
                 c_o,
                 s_h,
                 s_w,
                 name,
                 relu=True,
                 padding='SAME',
                 group=1,
                 biased=True):
            # Verify that the padding is acceptable
            self.validate_padding(padding)
            # Get the number of channels in the input
            c_i = int(inp.get_shape()[-1])
            # Verify that the grouping parameter is valid
            assert c_i % group == 0
            assert c_o % group == 0
            # Convolution for a given input and kernel
            convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
            with tf.variable_scope(name) as scope:
                kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
                # This is the common-case. Convolve the input without any further complications.
                output = convolve(inp, kernel)
                # Add the biases
                if biased:
                    biases = self.make_var('biases', [c_o])
                    output = tf.nn.bias_add(output, biases)
                if relu:
                    # ReLU non-linearity
                    output = tf.nn.relu(output, name=scope.name)
                return output
    
        @layer
        def prelu(self, inp, name):
            with tf.variable_scope(name):
                i = int(inp.get_shape()[-1])
                alpha = self.make_var('alpha', shape=(i,))
                output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
            return output
    
        @layer
        def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
            self.validate_padding(padding)
            return tf.nn.max_pool(inp,
                                  ksize=[1, k_h, k_w, 1],
                                  strides=[1, s_h, s_w, 1],
                                  padding=padding,
                                  name=name)
    
        @layer
        def fc(self, inp, num_out, name, relu=True):
            with tf.variable_scope(name):
                input_shape = inp.get_shape()
                if input_shape.ndims == 4:
                    # The input is spatial. Vectorize it first.
                    dim = 1
                    for d in input_shape[1:].as_list():
                        dim *= int(d)
                    feed_in = tf.reshape(inp, [-1, dim])
                else:
                    feed_in, dim = (inp, input_shape[-1].value)
                weights = self.make_var('weights', shape=[dim, num_out])
                biases = self.make_var('biases', [num_out])
                op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
                fc = op(feed_in, weights, biases, name=name)
                return fc
    
    
        """
        Multi dimensional softmax,
        refer to https://github.com/tensorflow/tensorflow/issues/210
        compute softmax along the dimension of target
        the native softmax only supports batch_size x dimension
        """
        @layer
        def softmax(self, target, axis, name=None):
            max_axis = tf.reduce_max(target, axis, keepdims=True)
            target_exp = tf.exp(target-max_axis)
            normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
            softmax = tf.div(target_exp, normalize, name)
            return softmax
        
    class PNet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='PReLU1')
                 .max_pool(2, 2, 2, 2, name='pool1')
                 .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='PReLU2')
                 .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='PReLU3')
                 .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
                 .softmax(3,name='prob1'))
    
            (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
                 .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
            
    class RNet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='prelu1')
                 .max_pool(3, 3, 2, 2, name='pool1')
                 .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='prelu2')
                 .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
                 .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='prelu3')
                 .fc(128, relu=False, name='conv4')
                 .prelu(name='prelu4')
                 .fc(2, relu=False, name='conv5-1')
                 .softmax(1,name='prob1'))
    
            (self.feed('prelu4') #pylint: disable=no-value-for-parameter
                 .fc(4, relu=False, name='conv5-2'))
    
    class ONet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='prelu1')
                 .max_pool(3, 3, 2, 2, name='pool1')
                 .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='prelu2')
                 .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
                 .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='prelu3')
                 .max_pool(2, 2, 2, 2, name='pool3')
                 .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
                 .prelu(name='prelu4')
                 .fc(256, relu=False, name='conv5')
                 .prelu(name='prelu5')
                 .fc(2, relu=False, name='conv6-1')
                 .softmax(1, name='prob1'))
    
            (self.feed('prelu5') #pylint: disable=no-value-for-parameter
                 .fc(4, relu=False, name='conv6-2'))
    
            (self.feed('prelu5') #pylint: disable=no-value-for-parameter
                 .fc(10, relu=False, name='conv6-3'))
    
    def create_mtcnn(sess, model_path):
        if not model_path:
            model_path,_ = os.path.split(os.path.realpath(__file__))
    
        with tf.variable_scope('pnet'):
            data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
            pnet = PNet({'data':data})
            pnet.load(os.path.join(model_path, 'det1.npy'), sess)
        with tf.variable_scope('rnet'):
            data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
            rnet = RNet({'data':data})
            rnet.load(os.path.join(model_path, 'det2.npy'), sess)
        with tf.variable_scope('onet'):
            data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
            onet = ONet({'data':data})
            onet.load(os.path.join(model_path, 'det3.npy'), sess)
            
        pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
        rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
        onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
        return pnet_fun, rnet_fun, onet_fun
    
    def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
        """Detects faces in an image, and returns bounding boxes and points for them.
        img: input image
        minsize: minimum faces' size
        pnet, rnet, onet: caffemodel
        threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
        factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
        """
        factor_count=0
        total_boxes=np.empty((0,9))
        points=np.empty(0)
        h=img.shape[0]
        w=img.shape[1]
        minl=np.amin([h, w])
        m=12.0/minsize
        minl=minl*m
        # create scale pyramid
        scales=[]
        while minl>=12:
            scales += [m*np.power(factor, factor_count)]
            minl = minl*factor
            factor_count += 1
    
        # first stage
        for scale in scales:
            hs=int(np.ceil(h*scale))
            ws=int(np.ceil(w*scale))
            im_data = imresample(img, (hs, ws))
            im_data = (im_data-127.5)*0.0078125
            img_x = np.expand_dims(im_data, 0)
            img_y = np.transpose(img_x, (0,2,1,3))
            out = pnet(img_y)
            out0 = np.transpose(out[0], (0,2,1,3))
            out1 = np.transpose(out[1], (0,2,1,3))
            
            boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
            
            # inter-scale nms
            pick = nms(boxes.copy(), 0.5, 'Union')
            if boxes.size>0 and pick.size>0:
                boxes = boxes[pick,:]
                total_boxes = np.append(total_boxes, boxes, axis=0)
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            pick = nms(total_boxes.copy(), 0.7, 'Union')
            total_boxes = total_boxes[pick,:]
            regw = total_boxes[:,2]-total_boxes[:,0]
            regh = total_boxes[:,3]-total_boxes[:,1]
            qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
            qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
            qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
            qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
            total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
            total_boxes = rerec(total_boxes.copy())
            total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            # second stage
            tempimg = np.zeros((24,24,3,numbox))
            for k in range(0,numbox):
                tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
                tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
                if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
                    tempimg[:,:,:,k] = imresample(tmp, (24, 24))
                else:
                    return np.empty()
            tempimg = (tempimg-127.5)*0.0078125
            tempimg1 = np.transpose(tempimg, (3,1,0,2))
            out = rnet(tempimg1)
            out0 = np.transpose(out[0])
            out1 = np.transpose(out[1])
            score = out1[1,:]
            ipass = np.where(score>threshold[1])
            total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
            mv = out0[:,ipass[0]]
            if total_boxes.shape[0]>0:
                pick = nms(total_boxes, 0.7, 'Union')
                total_boxes = total_boxes[pick,:]
                total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
                total_boxes = rerec(total_boxes.copy())
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            # third stage
            total_boxes = np.fix(total_boxes).astype(np.int32)
            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
            tempimg = np.zeros((48,48,3,numbox))
            for k in range(0,numbox):
                tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
                tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
                if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
                    tempimg[:,:,:,k] = imresample(tmp, (48, 48))
                else:
                    return np.empty()
            tempimg = (tempimg-127.5)*0.0078125
            tempimg1 = np.transpose(tempimg, (3,1,0,2))
            out = onet(tempimg1)
            out0 = np.transpose(out[0])
            out1 = np.transpose(out[1])
            out2 = np.transpose(out[2])
            score = out2[1,:]
            points = out1
            ipass = np.where(score>threshold[2])
            points = points[:,ipass[0]]
            total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
            mv = out0[:,ipass[0]]
    
            w = total_boxes[:,2]-total_boxes[:,0]+1
            h = total_boxes[:,3]-total_boxes[:,1]+1
            points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
            points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
            if total_boxes.shape[0]>0:
                total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
                pick = nms(total_boxes.copy(), 0.7, 'Min')
                total_boxes = total_boxes[pick,:]
                points = points[:,pick]
                    
        return total_boxes, points
    
    
    def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
        """Detects faces in a list of images
        images: list containing input images
        detection_window_size_ratio: ratio of minimum face size to smallest image dimension
        pnet, rnet, onet: caffemodel
        threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
        factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
        """
        all_scales = [None] * len(images)
        images_with_boxes = [None] * len(images)
    
        for i in range(len(images)):
            images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
    
        # create scale pyramid
        for index, img in enumerate(images):
            all_scales[index] = []
            h = img.shape[0]
            w = img.shape[1]
            minsize = int(detection_window_size_ratio * np.minimum(w, h))
            factor_count = 0
            minl = np.amin([h, w])
            if minsize <= 12:
                minsize = 12
    
            m = 12.0 / minsize
            minl = minl * m
            while minl >= 12:
                all_scales[index].append(m * np.power(factor, factor_count))
                minl = minl * factor
                factor_count += 1
    
        # # # # # # # # # # # # #
        # first stage - fast proposal network (pnet) to obtain face candidates
        # # # # # # # # # # # # #
    
        images_obj_per_resolution = {}
    
        # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
    
        for index, scales in enumerate(all_scales):
            h = images[index].shape[0]
            w = images[index].shape[1]
    
            for scale in scales:
                hs = int(np.ceil(h * scale))
                ws = int(np.ceil(w * scale))
    
                if (ws, hs) not in images_obj_per_resolution:
                    images_obj_per_resolution[(ws, hs)] = []
    
                im_data = imresample(images[index], (hs, ws))
                im_data = (im_data - 127.5) * 0.0078125
                img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
                images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
    
        for resolution in images_obj_per_resolution:
            images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
            outs = pnet(images_per_resolution)
    
            for index in range(len(outs[0])):
                scale = images_obj_per_resolution[resolution][index]['scale']
                image_index = images_obj_per_resolution[resolution][index]['index']
                out0 = np.transpose(outs[0][index], (1, 0, 2))
                out1 = np.transpose(outs[1][index], (1, 0, 2))
    
                boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
    
                # inter-scale nms
                pick = nms(boxes.copy(), 0.5, 'Union')
                if boxes.size > 0 and pick.size > 0:
                    boxes = boxes[pick, :]
                    images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
                                                                              boxes,
                                                                              axis=0)
    
        for index, image_obj in enumerate(images_with_boxes):
            numbox = image_obj['total_boxes'].shape[0]
            if numbox > 0:
                h = images[index].shape[0]
                w = images[index].shape[1]
                pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
                regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
                qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
                qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
                qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
                qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
                image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
                image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
                image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
                dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
    
                numbox = image_obj['total_boxes'].shape[0]
                tempimg = np.zeros((24, 24, 3, numbox))
    
                if numbox > 0:
                    for k in range(0, numbox):
                        tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
                        tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
                        if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
                            tempimg[:, :, :, k] = imresample(tmp, (24, 24))
                        else:
                            return np.empty()
    
                    tempimg = (tempimg - 127.5) * 0.0078125
                    image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
    
        # # # # # # # # # # # # #
        # second stage - refinement of face candidates with rnet
        # # # # # # # # # # # # #
    
        bulk_rnet_input = np.empty((0, 24, 24, 3))
        for index, image_obj in enumerate(images_with_boxes):
            if 'rnet_input' in image_obj:
                bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
    
        out = rnet(bulk_rnet_input)
        out0 = np.transpose(out[0])
        out1 = np.transpose(out[1])
        score = out1[1, :]
    
        i = 0
        for index, image_obj in enumerate(images_with_boxes):
            if 'rnet_input' not in image_obj:
                continue
    
            rnet_input_count = image_obj['rnet_input'].shape[0]
            score_per_image = score[i:i + rnet_input_count]
            out0_per_image = out0[:, i:i + rnet_input_count]
    
            ipass = np.where(score_per_image > threshold[1])
            image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
                                                  np.expand_dims(score_per_image[ipass].copy(), 1)])
    
            mv = out0_per_image[:, ipass[0]]
    
            if image_obj['total_boxes'].shape[0] > 0:
                h = images[index].shape[0]
                w = images[index].shape[1]
                pick = nms(image_obj['total_boxes'], 0.7, 'Union')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
                image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
    
                numbox = image_obj['total_boxes'].shape[0]
    
                if numbox > 0:
                    tempimg = np.zeros((48, 48, 3, numbox))
                    image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
                    dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
    
                    for k in range(0, numbox):
                        tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
                        tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
                        if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
                            tempimg[:, :, :, k] = imresample(tmp, (48, 48))
                        else:
                            return np.empty()
                    tempimg = (tempimg - 127.5) * 0.0078125
                    image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
    
            i += rnet_input_count
    
        # # # # # # # # # # # # #
        # third stage - further refinement and facial landmarks positions with onet
        # # # # # # # # # # # # #
    
        bulk_onet_input = np.empty((0, 48, 48, 3))
        for index, image_obj in enumerate(images_with_boxes):
            if 'onet_input' in image_obj:
                bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
    
        out = onet(bulk_onet_input)
    
        out0 = np.transpose(out[0])
        out1 = np.transpose(out[1])
        out2 = np.transpose(out[2])
        score = out2[1, :]
        points = out1
    
        i = 0
        ret = []
        for index, image_obj in enumerate(images_with_boxes):
            if 'onet_input' not in image_obj:
                ret.append(None)
                continue
    
            onet_input_count = image_obj['onet_input'].shape[0]
    
            out0_per_image = out0[:, i:i + onet_input_count]
            score_per_image = score[i:i + onet_input_count]
            points_per_image = points[:, i:i + onet_input_count]
    
            ipass = np.where(score_per_image > threshold[2])
            points_per_image = points_per_image[:, ipass[0]]
    
            image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
                                                  np.expand_dims(score_per_image[ipass].copy(), 1)])
            mv = out0_per_image[:, ipass[0]]
    
            w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
            h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
            points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
                image_obj['total_boxes'][:, 0], (5, 1)) - 1
            points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
                image_obj['total_boxes'][:, 1], (5, 1)) - 1
    
            if image_obj['total_boxes'].shape[0] > 0:
                image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
                pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                points_per_image = points_per_image[:, pick]
    
                ret.append((image_obj['total_boxes'], points_per_image))
            else:
                ret.append(None)
    
            i += onet_input_count
    
        return ret
    
    
    # function [boundingbox] = bbreg(boundingbox,reg)
    def bbreg(boundingbox,reg):
        """Calibrate bounding boxes"""
        if reg.shape[1]==1:
            reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
    
        w = boundingbox[:,2]-boundingbox[:,0]+1
        h = boundingbox[:,3]-boundingbox[:,1]+1
        b1 = boundingbox[:,0]+reg[:,0]*w
        b2 = boundingbox[:,1]+reg[:,1]*h
        b3 = boundingbox[:,2]+reg[:,2]*w
        b4 = boundingbox[:,3]+reg[:,3]*h
        boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
        return boundingbox
     
    def generateBoundingBox(imap, reg, scale, t):
        """Use heatmap to generate bounding boxes"""
        stride=2
        cellsize=12
    
        imap = np.transpose(imap)
        dx1 = np.transpose(reg[:,:,0])
        dy1 = np.transpose(reg[:,:,1])
        dx2 = np.transpose(reg[:,:,2])
        dy2 = np.transpose(reg[:,:,3])
        y, x = np.where(imap >= t)
        if y.shape[0]==1:
            dx1 = np.flipud(dx1)
            dy1 = np.flipud(dy1)
            dx2 = np.flipud(dx2)
            dy2 = np.flipud(dy2)
        score = imap[(y,x)]
        reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
        if reg.size==0:
            reg = np.empty((0,3))
        bb = np.transpose(np.vstack([y,x]))
        q1 = np.fix((stride*bb+1)/scale)
        q2 = np.fix((stride*bb+cellsize-1+1)/scale)
        boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
        return boundingbox, reg
     
    # function pick = nms(boxes,threshold,type)
    def nms(boxes, threshold, method):
        if boxes.size==0:
            return np.empty((0,3))
        x1 = boxes[:,0]
        y1 = boxes[:,1]
        x2 = boxes[:,2]
        y2 = boxes[:,3]
        s = boxes[:,4]
        area = (x2-x1+1) * (y2-y1+1)
        I = np.argsort(s)
        pick = np.zeros_like(s, dtype=np.int16)
        counter = 0
        while I.size>0:
            i = I[-1]
            pick[counter] = i
            counter += 1
            idx = I[0:-1]
            xx1 = np.maximum(x1[i], x1[idx])
            yy1 = np.maximum(y1[i], y1[idx])
            xx2 = np.minimum(x2[i], x2[idx])
            yy2 = np.minimum(y2[i], y2[idx])
            w = np.maximum(0.0, xx2-xx1+1)
            h = np.maximum(0.0, yy2-yy1+1)
            inter = w * h
            if method is 'Min':
                o = inter / np.minimum(area[i], area[idx])
            else:
                o = inter / (area[i] + area[idx] - inter)
            I = I[np.where(o<=threshold)]
        pick = pick[0:counter]
        return pick
    
    # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
    def pad(total_boxes, w, h):
        """Compute the padding coordinates (pad the bounding boxes to square)"""
        tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
        tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
        numbox = total_boxes.shape[0]
    
        dx = np.ones((numbox), dtype=np.int32)
        dy = np.ones((numbox), dtype=np.int32)
        edx = tmpw.copy().astype(np.int32)
        edy = tmph.copy().astype(np.int32)
    
        x = total_boxes[:,0].copy().astype(np.int32)
        y = total_boxes[:,1].copy().astype(np.int32)
        ex = total_boxes[:,2].copy().astype(np.int32)
        ey = total_boxes[:,3].copy().astype(np.int32)
    
        tmp = np.where(ex>w)
        edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
        ex[tmp] = w
        
        tmp = np.where(ey>h)
        edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
        ey[tmp] = h
    
        tmp = np.where(x<1)
        dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
        x[tmp] = 1
    
        tmp = np.where(y<1)
        dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
        y[tmp] = 1
        
        return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
    
    # function [bboxA] = rerec(bboxA)
    def rerec(bboxA):
        """Convert bboxA to square."""
        h = bboxA[:,3]-bboxA[:,1]
        w = bboxA[:,2]-bboxA[:,0]
        l = np.maximum(w, h)
        bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
        bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
        bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
        return bboxA
    
    def imresample(img, sz):
        im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
        return im_data
    
        # This method is kept for debugging purpose
    #     h=img.shape[0]
    #     w=img.shape[1]
    #     hs, ws = sz
    #     dx = float(w) / ws
    #     dy = float(h) / hs
    #     im_data = np.zeros((hs,ws,3))
    #     for a1 in range(0,hs):
    #         for a2 in range(0,ws):
    #             for a3 in range(0,3):
    #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
    #     return im_data
    

      

    参考:

    1 https://zhuanlan.zhihu.com/p/25025596

    2 https://link.zhihu.com/?target=https%3A//github.com/shanren7/real_time_face_recognition

    3 https://blog.csdn.net/mr_evanchen/article/details/77650883

    4 https://github.com/ShyBigBoy/face-detection-mtcnn

  • 相关阅读:
    浏览器刷新缓存机制
    Asp.Net获取IP的方法
    c# 了解委托
    用什么方法来判断字符串是否存在的函数
    怎么样从地址中获得数据?
    新网站如何不被百度查封,请注意以下事项。
    搜索引擎如何抓取网页和如何索引网页?
    什么情况下include_path不起作用?
    用户注册演示程序操作案例
    用户提交的cookie提交时为什么传不到服务器
  • 原文地址:https://www.cnblogs.com/alexYuin/p/8820658.html
Copyright © 2011-2022 走看看