zoukankan      html  css  js  c++  java
  • 02-人脸识别-基于MTCNN,框选人脸区域-detect_face

    (本系列随笔持续更新)

    这部分代码是基于参考中的链接,修改后适用于TensorFlow1.6.0版本的代码。由于TensorFlow的频繁更新,所以不一定支持后续新或者就版本,特此说明。

    程序的最初版,来自“山人7” [参考1,参考2],但是在新的TensorFlow下面不能直接运行。

    修改后版本,来自ShyBigBoy,[参考3,参考4],可以在TensorFlow1.6.0上运行。

    代码:detect_face.py

    """ Tensorflow implementation of the face detection / alignment algorithm found at
    https://github.com/kpzhang93/MTCNN_face_detection_alignment
    """
    # MIT License
    # 
    # Copyright (c) 2016 David Sandberg
    # 
    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, including without limitation the rights
    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    # copies of the Software, and to permit persons to whom the Software is
    # furnished to do so, subject to the following conditions:
    # 
    # The above copyright notice and this permission notice shall be included in all
    # copies or substantial portions of the Software.
    # 
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    # SOFTWARE.
    
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function
    from six import string_types, iteritems
    
    import numpy as np
    import tensorflow as tf
    #from math import floor
    import cv2
    import os
    
    def layer(op):
        """Decorator for composable network layers."""
    
        def layer_decorated(self, *args, **kwargs):
            # Automatically set a name if not provided.
            name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
            # Figure out the layer inputs.
            if len(self.terminals) == 0:
                raise RuntimeError('No input variables found for layer %s.' % name)
            elif len(self.terminals) == 1:
                layer_input = self.terminals[0]
            else:
                layer_input = list(self.terminals)
            # Perform the operation and get the output.
            layer_output = op(self, layer_input, *args, **kwargs)
            # Add to layer LUT.
            self.layers[name] = layer_output
            # This output is now the input for the next layer.
            self.feed(layer_output)
            # Return self for chained calls.
            return self
    
        return layer_decorated
    
    class Network(object):
    
        def __init__(self, inputs, trainable=True):
            # The input nodes for this network
            self.inputs = inputs
            # The current list of terminal nodes
            self.terminals = []
            # Mapping from layer names to layers
            self.layers = dict(inputs)
            # If true, the resulting variables are set as trainable
            self.trainable = trainable
    
            self.setup()
    
        def setup(self):
            """Construct the network. """
            raise NotImplementedError('Must be implemented by the subclass.')
    
        def load(self, data_path, session, ignore_missing=False):
            """Load network weights.
            data_path: The path to the numpy-serialized network weights
            session: The current TensorFlow session
            ignore_missing: If true, serialized weights for missing layers are ignored.
            """
            data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
    
            for op_name in data_dict:
                with tf.variable_scope(op_name, reuse=True):
                    for param_name, data in iteritems(data_dict[op_name]):
                        try:
                            var = tf.get_variable(param_name)
                            session.run(var.assign(data))
                        except ValueError:
                            if not ignore_missing:
                                raise
    
        def feed(self, *args):
            """Set the input(s) for the next operation by replacing the terminal nodes.
            The arguments can be either layer names or the actual layers.
            """
            assert len(args) != 0
            self.terminals = []
            for fed_layer in args:
                if isinstance(fed_layer, string_types):
                    try:
                        fed_layer = self.layers[fed_layer]
                    except KeyError:
                        raise KeyError('Unknown layer name fed: %s' % fed_layer)
                self.terminals.append(fed_layer)
            return self
    
        def get_output(self):
            """Returns the current network output."""
            return self.terminals[-1]
    
        def get_unique_name(self, prefix):
            """Returns an index-suffixed unique name for the given prefix.
            This is used for auto-generating layer names based on the type-prefix.
            """
            ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
            return '%s_%d' % (prefix, ident)
    
        def make_var(self, name, shape):
            """Creates a new TensorFlow variable."""
            return tf.get_variable(name, shape, trainable=self.trainable)
    
        def validate_padding(self, padding):
            """Verifies that the padding is one of the supported ones."""
            assert padding in ('SAME', 'VALID')
    
        @layer
        def conv(self,
                 inp,
                 k_h,
                 k_w,
                 c_o,
                 s_h,
                 s_w,
                 name,
                 relu=True,
                 padding='SAME',
                 group=1,
                 biased=True):
            # Verify that the padding is acceptable
            self.validate_padding(padding)
            # Get the number of channels in the input
            c_i = int(inp.get_shape()[-1])
            # Verify that the grouping parameter is valid
            assert c_i % group == 0
            assert c_o % group == 0
            # Convolution for a given input and kernel
            convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
            with tf.variable_scope(name) as scope:
                kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
                # This is the common-case. Convolve the input without any further complications.
                output = convolve(inp, kernel)
                # Add the biases
                if biased:
                    biases = self.make_var('biases', [c_o])
                    output = tf.nn.bias_add(output, biases)
                if relu:
                    # ReLU non-linearity
                    output = tf.nn.relu(output, name=scope.name)
                return output
    
        @layer
        def prelu(self, inp, name):
            with tf.variable_scope(name):
                i = int(inp.get_shape()[-1])
                alpha = self.make_var('alpha', shape=(i,))
                output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
            return output
    
        @layer
        def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
            self.validate_padding(padding)
            return tf.nn.max_pool(inp,
                                  ksize=[1, k_h, k_w, 1],
                                  strides=[1, s_h, s_w, 1],
                                  padding=padding,
                                  name=name)
    
        @layer
        def fc(self, inp, num_out, name, relu=True):
            with tf.variable_scope(name):
                input_shape = inp.get_shape()
                if input_shape.ndims == 4:
                    # The input is spatial. Vectorize it first.
                    dim = 1
                    for d in input_shape[1:].as_list():
                        dim *= int(d)
                    feed_in = tf.reshape(inp, [-1, dim])
                else:
                    feed_in, dim = (inp, input_shape[-1].value)
                weights = self.make_var('weights', shape=[dim, num_out])
                biases = self.make_var('biases', [num_out])
                op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
                fc = op(feed_in, weights, biases, name=name)
                return fc
    
    
        """
        Multi dimensional softmax,
        refer to https://github.com/tensorflow/tensorflow/issues/210
        compute softmax along the dimension of target
        the native softmax only supports batch_size x dimension
        """
        @layer
        def softmax(self, target, axis, name=None):
            max_axis = tf.reduce_max(target, axis, keepdims=True)
            target_exp = tf.exp(target-max_axis)
            normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
            softmax = tf.div(target_exp, normalize, name)
            return softmax
        
    class PNet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='PReLU1')
                 .max_pool(2, 2, 2, 2, name='pool1')
                 .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='PReLU2')
                 .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='PReLU3')
                 .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
                 .softmax(3,name='prob1'))
    
            (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
                 .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
            
    class RNet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='prelu1')
                 .max_pool(3, 3, 2, 2, name='pool1')
                 .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='prelu2')
                 .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
                 .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='prelu3')
                 .fc(128, relu=False, name='conv4')
                 .prelu(name='prelu4')
                 .fc(2, relu=False, name='conv5-1')
                 .softmax(1,name='prob1'))
    
            (self.feed('prelu4') #pylint: disable=no-value-for-parameter
                 .fc(4, relu=False, name='conv5-2'))
    
    class ONet(Network):
        def setup(self):
            (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
                 .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
                 .prelu(name='prelu1')
                 .max_pool(3, 3, 2, 2, name='pool1')
                 .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
                 .prelu(name='prelu2')
                 .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
                 .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
                 .prelu(name='prelu3')
                 .max_pool(2, 2, 2, 2, name='pool3')
                 .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
                 .prelu(name='prelu4')
                 .fc(256, relu=False, name='conv5')
                 .prelu(name='prelu5')
                 .fc(2, relu=False, name='conv6-1')
                 .softmax(1, name='prob1'))
    
            (self.feed('prelu5') #pylint: disable=no-value-for-parameter
                 .fc(4, relu=False, name='conv6-2'))
    
            (self.feed('prelu5') #pylint: disable=no-value-for-parameter
                 .fc(10, relu=False, name='conv6-3'))
    
    def create_mtcnn(sess, model_path):
        if not model_path:
            model_path,_ = os.path.split(os.path.realpath(__file__))
    
        with tf.variable_scope('pnet'):
            data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
            pnet = PNet({'data':data})
            pnet.load(os.path.join(model_path, 'det1.npy'), sess)
        with tf.variable_scope('rnet'):
            data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
            rnet = RNet({'data':data})
            rnet.load(os.path.join(model_path, 'det2.npy'), sess)
        with tf.variable_scope('onet'):
            data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
            onet = ONet({'data':data})
            onet.load(os.path.join(model_path, 'det3.npy'), sess)
            
        pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
        rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
        onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
        return pnet_fun, rnet_fun, onet_fun
    
    def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
        """Detects faces in an image, and returns bounding boxes and points for them.
        img: input image
        minsize: minimum faces' size
        pnet, rnet, onet: caffemodel
        threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
        factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
        """
        factor_count=0
        total_boxes=np.empty((0,9))
        points=np.empty(0)
        h=img.shape[0]
        w=img.shape[1]
        minl=np.amin([h, w])
        m=12.0/minsize
        minl=minl*m
        # create scale pyramid
        scales=[]
        while minl>=12:
            scales += [m*np.power(factor, factor_count)]
            minl = minl*factor
            factor_count += 1
    
        # first stage
        for scale in scales:
            hs=int(np.ceil(h*scale))
            ws=int(np.ceil(w*scale))
            im_data = imresample(img, (hs, ws))
            im_data = (im_data-127.5)*0.0078125
            img_x = np.expand_dims(im_data, 0)
            img_y = np.transpose(img_x, (0,2,1,3))
            out = pnet(img_y)
            out0 = np.transpose(out[0], (0,2,1,3))
            out1 = np.transpose(out[1], (0,2,1,3))
            
            boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
            
            # inter-scale nms
            pick = nms(boxes.copy(), 0.5, 'Union')
            if boxes.size>0 and pick.size>0:
                boxes = boxes[pick,:]
                total_boxes = np.append(total_boxes, boxes, axis=0)
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            pick = nms(total_boxes.copy(), 0.7, 'Union')
            total_boxes = total_boxes[pick,:]
            regw = total_boxes[:,2]-total_boxes[:,0]
            regh = total_boxes[:,3]-total_boxes[:,1]
            qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
            qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
            qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
            qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
            total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
            total_boxes = rerec(total_boxes.copy())
            total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            # second stage
            tempimg = np.zeros((24,24,3,numbox))
            for k in range(0,numbox):
                tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
                tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
                if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
                    tempimg[:,:,:,k] = imresample(tmp, (24, 24))
                else:
                    return np.empty()
            tempimg = (tempimg-127.5)*0.0078125
            tempimg1 = np.transpose(tempimg, (3,1,0,2))
            out = rnet(tempimg1)
            out0 = np.transpose(out[0])
            out1 = np.transpose(out[1])
            score = out1[1,:]
            ipass = np.where(score>threshold[1])
            total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
            mv = out0[:,ipass[0]]
            if total_boxes.shape[0]>0:
                pick = nms(total_boxes, 0.7, 'Union')
                total_boxes = total_boxes[pick,:]
                total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
                total_boxes = rerec(total_boxes.copy())
    
        numbox = total_boxes.shape[0]
        if numbox>0:
            # third stage
            total_boxes = np.fix(total_boxes).astype(np.int32)
            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
            tempimg = np.zeros((48,48,3,numbox))
            for k in range(0,numbox):
                tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
                tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
                if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
                    tempimg[:,:,:,k] = imresample(tmp, (48, 48))
                else:
                    return np.empty()
            tempimg = (tempimg-127.5)*0.0078125
            tempimg1 = np.transpose(tempimg, (3,1,0,2))
            out = onet(tempimg1)
            out0 = np.transpose(out[0])
            out1 = np.transpose(out[1])
            out2 = np.transpose(out[2])
            score = out2[1,:]
            points = out1
            ipass = np.where(score>threshold[2])
            points = points[:,ipass[0]]
            total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
            mv = out0[:,ipass[0]]
    
            w = total_boxes[:,2]-total_boxes[:,0]+1
            h = total_boxes[:,3]-total_boxes[:,1]+1
            points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
            points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
            if total_boxes.shape[0]>0:
                total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
                pick = nms(total_boxes.copy(), 0.7, 'Min')
                total_boxes = total_boxes[pick,:]
                points = points[:,pick]
                    
        return total_boxes, points
    
    
    def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
        """Detects faces in a list of images
        images: list containing input images
        detection_window_size_ratio: ratio of minimum face size to smallest image dimension
        pnet, rnet, onet: caffemodel
        threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
        factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
        """
        all_scales = [None] * len(images)
        images_with_boxes = [None] * len(images)
    
        for i in range(len(images)):
            images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
    
        # create scale pyramid
        for index, img in enumerate(images):
            all_scales[index] = []
            h = img.shape[0]
            w = img.shape[1]
            minsize = int(detection_window_size_ratio * np.minimum(w, h))
            factor_count = 0
            minl = np.amin([h, w])
            if minsize <= 12:
                minsize = 12
    
            m = 12.0 / minsize
            minl = minl * m
            while minl >= 12:
                all_scales[index].append(m * np.power(factor, factor_count))
                minl = minl * factor
                factor_count += 1
    
        # # # # # # # # # # # # #
        # first stage - fast proposal network (pnet) to obtain face candidates
        # # # # # # # # # # # # #
    
        images_obj_per_resolution = {}
    
        # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
    
        for index, scales in enumerate(all_scales):
            h = images[index].shape[0]
            w = images[index].shape[1]
    
            for scale in scales:
                hs = int(np.ceil(h * scale))
                ws = int(np.ceil(w * scale))
    
                if (ws, hs) not in images_obj_per_resolution:
                    images_obj_per_resolution[(ws, hs)] = []
    
                im_data = imresample(images[index], (hs, ws))
                im_data = (im_data - 127.5) * 0.0078125
                img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
                images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
    
        for resolution in images_obj_per_resolution:
            images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
            outs = pnet(images_per_resolution)
    
            for index in range(len(outs[0])):
                scale = images_obj_per_resolution[resolution][index]['scale']
                image_index = images_obj_per_resolution[resolution][index]['index']
                out0 = np.transpose(outs[0][index], (1, 0, 2))
                out1 = np.transpose(outs[1][index], (1, 0, 2))
    
                boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
    
                # inter-scale nms
                pick = nms(boxes.copy(), 0.5, 'Union')
                if boxes.size > 0 and pick.size > 0:
                    boxes = boxes[pick, :]
                    images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
                                                                              boxes,
                                                                              axis=0)
    
        for index, image_obj in enumerate(images_with_boxes):
            numbox = image_obj['total_boxes'].shape[0]
            if numbox > 0:
                h = images[index].shape[0]
                w = images[index].shape[1]
                pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
                regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
                qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
                qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
                qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
                qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
                image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
                image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
                image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
                dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
    
                numbox = image_obj['total_boxes'].shape[0]
                tempimg = np.zeros((24, 24, 3, numbox))
    
                if numbox > 0:
                    for k in range(0, numbox):
                        tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
                        tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
                        if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
                            tempimg[:, :, :, k] = imresample(tmp, (24, 24))
                        else:
                            return np.empty()
    
                    tempimg = (tempimg - 127.5) * 0.0078125
                    image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
    
        # # # # # # # # # # # # #
        # second stage - refinement of face candidates with rnet
        # # # # # # # # # # # # #
    
        bulk_rnet_input = np.empty((0, 24, 24, 3))
        for index, image_obj in enumerate(images_with_boxes):
            if 'rnet_input' in image_obj:
                bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
    
        out = rnet(bulk_rnet_input)
        out0 = np.transpose(out[0])
        out1 = np.transpose(out[1])
        score = out1[1, :]
    
        i = 0
        for index, image_obj in enumerate(images_with_boxes):
            if 'rnet_input' not in image_obj:
                continue
    
            rnet_input_count = image_obj['rnet_input'].shape[0]
            score_per_image = score[i:i + rnet_input_count]
            out0_per_image = out0[:, i:i + rnet_input_count]
    
            ipass = np.where(score_per_image > threshold[1])
            image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
                                                  np.expand_dims(score_per_image[ipass].copy(), 1)])
    
            mv = out0_per_image[:, ipass[0]]
    
            if image_obj['total_boxes'].shape[0] > 0:
                h = images[index].shape[0]
                w = images[index].shape[1]
                pick = nms(image_obj['total_boxes'], 0.7, 'Union')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
                image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
    
                numbox = image_obj['total_boxes'].shape[0]
    
                if numbox > 0:
                    tempimg = np.zeros((48, 48, 3, numbox))
                    image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
                    dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
    
                    for k in range(0, numbox):
                        tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
                        tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
                        if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
                            tempimg[:, :, :, k] = imresample(tmp, (48, 48))
                        else:
                            return np.empty()
                    tempimg = (tempimg - 127.5) * 0.0078125
                    image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
    
            i += rnet_input_count
    
        # # # # # # # # # # # # #
        # third stage - further refinement and facial landmarks positions with onet
        # # # # # # # # # # # # #
    
        bulk_onet_input = np.empty((0, 48, 48, 3))
        for index, image_obj in enumerate(images_with_boxes):
            if 'onet_input' in image_obj:
                bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
    
        out = onet(bulk_onet_input)
    
        out0 = np.transpose(out[0])
        out1 = np.transpose(out[1])
        out2 = np.transpose(out[2])
        score = out2[1, :]
        points = out1
    
        i = 0
        ret = []
        for index, image_obj in enumerate(images_with_boxes):
            if 'onet_input' not in image_obj:
                ret.append(None)
                continue
    
            onet_input_count = image_obj['onet_input'].shape[0]
    
            out0_per_image = out0[:, i:i + onet_input_count]
            score_per_image = score[i:i + onet_input_count]
            points_per_image = points[:, i:i + onet_input_count]
    
            ipass = np.where(score_per_image > threshold[2])
            points_per_image = points_per_image[:, ipass[0]]
    
            image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
                                                  np.expand_dims(score_per_image[ipass].copy(), 1)])
            mv = out0_per_image[:, ipass[0]]
    
            w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
            h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
            points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
                image_obj['total_boxes'][:, 0], (5, 1)) - 1
            points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
                image_obj['total_boxes'][:, 1], (5, 1)) - 1
    
            if image_obj['total_boxes'].shape[0] > 0:
                image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
                pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
                image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
                points_per_image = points_per_image[:, pick]
    
                ret.append((image_obj['total_boxes'], points_per_image))
            else:
                ret.append(None)
    
            i += onet_input_count
    
        return ret
    
    
    # function [boundingbox] = bbreg(boundingbox,reg)
    def bbreg(boundingbox,reg):
        """Calibrate bounding boxes"""
        if reg.shape[1]==1:
            reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
    
        w = boundingbox[:,2]-boundingbox[:,0]+1
        h = boundingbox[:,3]-boundingbox[:,1]+1
        b1 = boundingbox[:,0]+reg[:,0]*w
        b2 = boundingbox[:,1]+reg[:,1]*h
        b3 = boundingbox[:,2]+reg[:,2]*w
        b4 = boundingbox[:,3]+reg[:,3]*h
        boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
        return boundingbox
     
    def generateBoundingBox(imap, reg, scale, t):
        """Use heatmap to generate bounding boxes"""
        stride=2
        cellsize=12
    
        imap = np.transpose(imap)
        dx1 = np.transpose(reg[:,:,0])
        dy1 = np.transpose(reg[:,:,1])
        dx2 = np.transpose(reg[:,:,2])
        dy2 = np.transpose(reg[:,:,3])
        y, x = np.where(imap >= t)
        if y.shape[0]==1:
            dx1 = np.flipud(dx1)
            dy1 = np.flipud(dy1)
            dx2 = np.flipud(dx2)
            dy2 = np.flipud(dy2)
        score = imap[(y,x)]
        reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
        if reg.size==0:
            reg = np.empty((0,3))
        bb = np.transpose(np.vstack([y,x]))
        q1 = np.fix((stride*bb+1)/scale)
        q2 = np.fix((stride*bb+cellsize-1+1)/scale)
        boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
        return boundingbox, reg
     
    # function pick = nms(boxes,threshold,type)
    def nms(boxes, threshold, method):
        if boxes.size==0:
            return np.empty((0,3))
        x1 = boxes[:,0]
        y1 = boxes[:,1]
        x2 = boxes[:,2]
        y2 = boxes[:,3]
        s = boxes[:,4]
        area = (x2-x1+1) * (y2-y1+1)
        I = np.argsort(s)
        pick = np.zeros_like(s, dtype=np.int16)
        counter = 0
        while I.size>0:
            i = I[-1]
            pick[counter] = i
            counter += 1
            idx = I[0:-1]
            xx1 = np.maximum(x1[i], x1[idx])
            yy1 = np.maximum(y1[i], y1[idx])
            xx2 = np.minimum(x2[i], x2[idx])
            yy2 = np.minimum(y2[i], y2[idx])
            w = np.maximum(0.0, xx2-xx1+1)
            h = np.maximum(0.0, yy2-yy1+1)
            inter = w * h
            if method is 'Min':
                o = inter / np.minimum(area[i], area[idx])
            else:
                o = inter / (area[i] + area[idx] - inter)
            I = I[np.where(o<=threshold)]
        pick = pick[0:counter]
        return pick
    
    # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
    def pad(total_boxes, w, h):
        """Compute the padding coordinates (pad the bounding boxes to square)"""
        tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
        tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
        numbox = total_boxes.shape[0]
    
        dx = np.ones((numbox), dtype=np.int32)
        dy = np.ones((numbox), dtype=np.int32)
        edx = tmpw.copy().astype(np.int32)
        edy = tmph.copy().astype(np.int32)
    
        x = total_boxes[:,0].copy().astype(np.int32)
        y = total_boxes[:,1].copy().astype(np.int32)
        ex = total_boxes[:,2].copy().astype(np.int32)
        ey = total_boxes[:,3].copy().astype(np.int32)
    
        tmp = np.where(ex>w)
        edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
        ex[tmp] = w
        
        tmp = np.where(ey>h)
        edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
        ey[tmp] = h
    
        tmp = np.where(x<1)
        dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
        x[tmp] = 1
    
        tmp = np.where(y<1)
        dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
        y[tmp] = 1
        
        return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
    
    # function [bboxA] = rerec(bboxA)
    def rerec(bboxA):
        """Convert bboxA to square."""
        h = bboxA[:,3]-bboxA[:,1]
        w = bboxA[:,2]-bboxA[:,0]
        l = np.maximum(w, h)
        bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
        bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
        bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
        return bboxA
    
    def imresample(img, sz):
        im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
        return im_data
    
        # This method is kept for debugging purpose
    #     h=img.shape[0]
    #     w=img.shape[1]
    #     hs, ws = sz
    #     dx = float(w) / ws
    #     dy = float(h) / hs
    #     im_data = np.zeros((hs,ws,3))
    #     for a1 in range(0,hs):
    #         for a2 in range(0,ws):
    #             for a3 in range(0,3):
    #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
    #     return im_data
    

      

    参考:

    1 https://zhuanlan.zhihu.com/p/25025596

    2 https://link.zhihu.com/?target=https%3A//github.com/shanren7/real_time_face_recognition

    3 https://blog.csdn.net/mr_evanchen/article/details/77650883

    4 https://github.com/ShyBigBoy/face-detection-mtcnn

  • 相关阅读:
    sql2slack alash3al 开源的又个轻量级工具
    pgspider fetchq 扩展docker镜像
    godns 集成coredns 的demo
    godns 简单dnsmasq 的dns 替换方案
    aviary.sh 一个基于bash的分布式配置管理工具
    使用coredns 的template plugin实现一个xip 服务
    nginx 代理 coredns dns 服务
    基于nginx proxy dns server
    几个不错的geodns server
    spring boot rest api 最好添加servlet.context-path
  • 原文地址:https://www.cnblogs.com/alexYuin/p/8820658.html
Copyright © 2011-2022 走看看