  • 『计算机视觉_转载』可变形卷积实现逻辑


    可变形卷积的论文为:Deformable Convolutional Networks
    而之前google一篇论文对这篇论文有指导意义:Spatial Transformer Networks
    Deformable Convolutional 的 github 代码地址为:https://github.com/felixlaumon/deform-conv
    可变形卷积很好理解,Keras 中的接口为 ConvOffset2D,但如何实现呢?实现方面需要关注两个限制:










    3、例,取一个坐标值(a,b),将其转换为四个整数,floor(a), ceil(a), floor(b), ceil(b),将这四个整数进行整合,得到四对坐标(floor(a),floor(b)),  ((floor(a),ceil(b)),  ((ceil(a),floor(b)),  ((ceil(a),ceil(b))。这四对坐标每个坐标都对应U中的一个像素值,而我们需要得到(a,b)的像素值,这里采用双线性差值的方式计算(一方面得到的像素准确,另一方面可以进行反向传播)。









    def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
        self.filters = filters
        super(ConvOffset2D, self).__init__(
            self.filters * 2, (3, 3), padding='same', use_bias=False,
            kernel_initializer=RandomNormal(0, init_normal_stddev),
    def call(self, x):
        """Return the deformed featured map"""
        x_shape = x.get_shape()
        offsets = super(ConvOffset2D, self).call(x)
        # offsets: (b*c, h, w, 2)    
        offsets = self._to_bc_h_w_2(offsets, x_shape)
        # x: (b*c, h, w)
        x = self._to_bc_h_w(x, x_shape)
        # X_offset: (b*c, h, w)
        x_offset = tf_batch_map_offsets(x, offsets)
        # x_offset: (b, h, w, c)
        x_offset = self._to_b_h_w_c(x_offset, x_shape)
        return x_offset
    def compute_output_shape(self, input_shape):
        """Output shape is the same as input shape
        Because this layer does only the deformation part
        return input_shape
    def _to_bc_h_w_2(x, x_shape):
        """(b, h, w, 2c) -> (b*c, h, w, 2)"""
        x = tf.transpose(x, [0, 3, 1, 2])
        x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2))
        return x
    def _to_bc_h_w(x, x_shape):
        """(b, h, w, c) -> (b*c, h, w)"""
        x = tf.transpose(x, [0, 3, 1, 2])
        x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2])))
        return x
    def _to_b_h_w_c(x, x_shape):
        """(b*c, h, w) -> (b, h, w, c)"""
        x = tf.reshape(
            x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2]))
        x = tf.transpose(x, [0, 2, 3, 1])
        return x


    def tf_flatten(a):
        """Flatten tensor"""
        return tf.reshape(a, [-1])
    def tf_repeat(a, repeats, axis=0):
        """TensorFlow version of np.repeat for 1D"""
        # https://github.com/tensorflow/tensorflow/issues/8521
        assert len(a.get_shape()) == 1
        a = tf.expand_dims(a, -1)
        a = tf.tile(a, [1, repeats])
        a = tf_flatten(a)
        return a
    def tf_repeat_2d(a, repeats):
        """Tensorflow version of np.repeat for 2D"""
        assert len(a.get_shape()) == 2
        a = tf.expand_dims(a, 0)
        a = tf.tile(a, [repeats, 1, 1])
        return a
    def tf_map_coordinates(input, coords, order=1):
        """Tensorflow verion of scipy.ndimage.map_coordinates
        Note that coords is transposed and only 2D is supported
        input : tf.Tensor. shape = (s, s)
        coords : tf.Tensor. shape = (n_points, 2)
        assert order == 1
        coords_lt = tf.cast(tf.floor(coords), 'int32')
        coords_rb = tf.cast(tf.ceil(coords), 'int32')
        coords_lb = tf.stack([coords_lt[:, 0], coords_rb[:, 1]], axis=1)
        coords_rt = tf.stack([coords_rb[:, 0], coords_lt[:, 1]], axis=1)
        vals_lt = tf.gather_nd(input, coords_lt)
        vals_rb = tf.gather_nd(input, coords_rb)
        vals_lb = tf.gather_nd(input, coords_lb)
        vals_rt = tf.gather_nd(input, coords_rt)
        coords_offset_lt = coords - tf.cast(coords_lt, 'float32')
        vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, 0]
        vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, 0]
        mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, 1]
        return mapped_vals
    def sp_batch_map_coordinates(inputs, coords):
        """Reference implementation for batch_map_coordinates"""
        coords = coords.clip(0, inputs.shape[1] - 1)
        mapped_vals = np.array([
            sp_map_coordinates(input, coord.T, mode='nearest', order=1)
            for input, coord in zip(inputs, coords)
        return mapped_vals
    def tf_batch_map_coordinates(input, coords, order=1):
        """Batch version of tf_map_coordinates
        Only supports 2D feature maps
        input : tf.Tensor. shape = (b, s, s)
        coords : tf.Tensor. shape = (b, n_points, 2)
        tf.Tensor. shape = (b, s, s)
        input_shape = tf.shape(input)
        batch_size = input_shape[0]
        input_size = input_shape[1]
        n_coords = tf.shape(coords)[1]
        coords = tf.clip_by_value(coords, 0, tf.cast(input_size, 'float32') - 1)
        #得到目标坐标左上角(left top)的整数坐标
        coords_lt = tf.cast(tf.floor(coords), 'int32')
        coords_rb = tf.cast(tf.ceil(coords), 'int32')
        coords_lb = tf.stack([coords_lt[..., 0], coords_rb[..., 1]], axis=-1)
        coords_rt = tf.stack([coords_rb[..., 0], coords_lt[..., 1]], axis=-1)
        idx = tf_repeat(tf.range(batch_size), n_coords)
        def _get_vals_by_coords(input, coords):
    	    # (3,2,1,2,3,1,2,3,0,0,0,3,2,1,1,2,3,2,0,0,2)
                # (3,2,1,0,0,2,0,3,1,2,3,0,0,2,3,0,1,2,0,2,3)
            indices = tf.stack([
                idx, tf_flatten(coords[..., 0]), tf_flatten(coords[..., 1])
            ], axis=-1)
            vals = tf.gather_nd(input, indices)
            vals = tf.reshape(vals, (batch_size, n_coords))
            return vals
        vals_lt = _get_vals_by_coords(input, coords_lt)
        vals_rb = _get_vals_by_coords(input, coords_rb)
        vals_lb = _get_vals_by_coords(input, coords_lb)
        vals_rt = _get_vals_by_coords(input, coords_rt)
        coords_offset_lt = coords - tf.cast(coords_lt, 'float32')
        vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[..., 0]
        vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[..., 0]
        mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[..., 1]
        return mapped_vals
    def sp_batch_map_offsets(input, offsets):
        """Reference implementation for tf_batch_map_offsets"""
        batch_size = input.shape[0]
        input_size = input.shape[1]
        offsets = offsets.reshape(batch_size, -1, 2)
        grid = np.stack(np.mgrid[:input_size, :input_size], -1).reshape(-1, 2)
        grid = np.repeat([grid], batch_size, axis=0)
        coords = offsets + grid
        coords = coords.clip(0, input_size - 1)
        mapped_vals = sp_batch_map_coordinates(input, coords)
        return mapped_vals
    def tf_batch_map_offsets(input, offsets, order=1):
        """Batch map offsets into input
        input : tf.Tensor. shape = (b, s, s)
        offsets: tf.Tensor. shape = (b, s, s, 2)
        tf.Tensor. shape = (b, s, s)
        input_shape = tf.shape(input)
        batch_size = input_shape[0]
        input_size = input_shape[1]
        offsets = tf.reshape(offsets, (batch_size, -1, 2))
        grid = tf.meshgrid(
            tf.range(input_size), tf.range(input_size), indexing='ij'
        grid = tf.stack(grid, axis=-1)
        grid = tf.cast(grid, 'float32')
        grid = tf.reshape(grid, (-1, 2))
        grid = tf_repeat_2d(grid, batch_size)
        coords = offsets + grid
        mapped_vals = tf_batch_map_coordinates(input, coords)
        return mapped_vals

