  • An intriguing failing of convolutional neural networks and the CoordConv solution

    An intriguing failing of convolutional neural networks and the CoordConv solution

    NeurIPS 2018

    2019-10-10 15:01:48


    Official TensorFlow Codehttps://github.com/uber-research/coordconv 

    Unofficial PyTorch Codehttps://github.com/walsvid/CoordConv 



    1. 给定 feature map and 坐标(x, y)如何生成对应的 relative CoordinateMap?

    The following code is from: [ICCV19] AdaptIS: Adaptive Instance Selection Network [Github

        def get_instances_maps(self, F, points, adaptive_input, controller_input):
            if isinstance(points, mx.nd.NDArray):
                self.num_points = points.shape[1]
            if getattr(self.controller_net, 'return_map', False):
                w = self.eqf(controller_input, points)
                w = self.eqf(controller_input, points)
                w = self.controller_net(w)
            points = F.reshape(points, shape=(-1, 2))
            x = F.repeat(adaptive_input, self.num_points, axis=0)
            x = self.add_coord_features(x, points)
            x = self.block0(x)
            x = self.adain(x, w)
            x = self.block1(x)
            return x
    class AppendCoordFeatures(gluon.HybridBlock):
        def __init__(self, norm_radius, append_dist=True, spatial_scale=1.0):
            super(AppendCoordFeatures, self).__init__()
            self.xs = None
            self.spatial_scale = spatial_scale
            self.norm_radius = norm_radius
            self.append_dist = append_dist
        def _ctx_kwarg(self, x):
            if isinstance(x, mx.nd.NDArray):
                return {"ctx": x.context}
            return {}
        def get_coord_features(self, F, points, rows, cols, batch_size, **ctx_kwarg):
            row_array = F.arange(start=0, stop=rows, step=1, **ctx_kwarg)
            col_array = F.arange(start=0, stop=cols, step=1, **ctx_kwarg)
            coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3)
            coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2)
            coord_rows = F.repeat(coord_rows, repeats=batch_size, axis=0)
            coord_cols = F.repeat(coord_cols, repeats=batch_size, axis=0)
            coords = F.concat(coord_rows, coord_cols, dim=1)
            add_xy = F.reshape(points * self.spatial_scale, shape=(0, 0, 1))
            add_xy = F.reshape(F.repeat(add_xy, rows * cols, axis=2),
                               shape=(0, 0, rows, cols))
            coords = (coords - add_xy) / (self.norm_radius * self.spatial_scale)
            if self.append_dist:
                dist = F.sqrt(F.sum(F.square(coords), axis=1, keepdims=1))
                coord_features = F.concat(coords, dist, dim=1)
                coord_features = coords
            coord_features = F.clip(coord_features, a_min=-1, a_max=1)
            return coord_features
        def hybrid_forward(self, F, x, coords):
            if isinstance(x, mx.nd.NDArray):
                self.xs = x.shape
            batch_size, rows, cols = self.xs[0], self.xs[2], self.xs[3]
            coord_features = self.get_coord_features(F, coords, rows, cols, batch_size, **self._ctx_kwarg(x))
            return F.concat(coord_features, x, dim=1)
        def get_coord_features(self, F, points, rows, cols, batch_size, **ctx_kwarg):
            # (Pdb) points, rows, cols, batch_size
            # ([[61. 71.]] <NDArray 1x2 @gpu(0)>, 96, 96, 1)        
            # row_array and col_array: 
            # [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
            #  18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
            #  36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53.
            #  54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71.
            #  72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89.
            #  90. 91. 92. 93. 94. 95.]
            # <NDArray 96 @gpu(0)>
            # (Pdb) coord_rows
            # [[[[ 0.  0.  0. ...  0.  0.  0.]
            #    [ 1.  1.  1. ...  1.  1.  1.]
            #    [ 2.  2.  2. ...  2.  2.  2.]
            #    ...
            #    [93. 93. 93. ... 93. 93. 93.]
            #    [94. 94. 94. ... 94. 94. 94.]
            #    [95. 95. 95. ... 95. 95. 95.]]]]
            # <NDArray 1x1x96x96 @gpu(0)>
            # (Pdb) coord_cols
            # [[[[ 0.  1.  2. ... 93. 94. 95.]
            #    [ 0.  1.  2. ... 93. 94. 95.]
            #    [ 0.  1.  2. ... 93. 94. 95.]
            #    ...
            #    [ 0.  1.  2. ... 93. 94. 95.]
            #    [ 0.  1.  2. ... 93. 94. 95.]
            #    [ 0.  1.  2. ... 93. 94. 95.]]]]
            # <NDArray 1x1x96x96 @gpu(0)>        
            # (Pdb) add_xy
            # [[[[61. 61. 61. ... 61. 61. 61.]
            #    [61. 61. 61. ... 61. 61. 61.]
            #    [61. 61. 61. ... 61. 61. 61.]
            #    ...
            #    [61. 61. 61. ... 61. 61. 61.]
            #    [61. 61. 61. ... 61. 61. 61.]
            #    [61. 61. 61. ... 61. 61. 61.]]
            #   [[71. 71. 71. ... 71. 71. 71.]
            #    [71. 71. 71. ... 71. 71. 71.]
            #    [71. 71. 71. ... 71. 71. 71.]
            #    ...
            #    [71. 71. 71. ... 71. 71. 71.]
            #    [71. 71. 71. ... 71. 71. 71.]
            #    [71. 71. 71. ... 71. 71. 71.]]]]
            # <NDArray 1x2x96x96 @gpu(0)>    
            # (Pdb) if self.append_dist, then coord_features is: 
            # [[[[-1.         -1.         -1.         ... -1.         -1.
            #     -1.        ]
            #    [-1.         -1.         -1.         ... -1.         -1.
            #     -1.        ]
            #    [-1.         -1.         -1.         ... -1.         -1.
            #     -1.        ]
            #    ...
            #    [ 0.7619048   0.7619048   0.7619048  ...  0.7619048   0.7619048
            #      0.7619048 ]
            #    [ 0.78571427  0.78571427  0.78571427 ...  0.78571427  0.78571427
            #      0.78571427]
            #    [ 0.8095238   0.8095238   0.8095238  ...  0.8095238   0.8095238
            #      0.8095238 ]]
            #   [[-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]
            #    [-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]
            #    [-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]
            #    ...
            #    [-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]
            #    [-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]
            #    [-1.         -1.         -1.         ...  0.52380955  0.54761904
            #      0.5714286 ]]
            #   [[ 1.          1.          1.         ...  1.          1.
            #      1.        ]
            #    [ 1.          1.          1.         ...  1.          1.
            #      1.        ]
            #    [ 1.          1.          1.         ...  1.          1.
            #      1.        ]
            #    ...
            #    [ 1.          1.          1.         ...  0.9245947   0.9382886
            #      0.95238096]
            #    [ 1.          1.          1.         ...  0.944311    0.9577231
            #      0.9715336 ]
            #    [ 1.          1.          1.         ...  0.96421224  0.97735125
            #      0.99088824]]]]
            # <NDArray 1x3x96x96 @gpu(0)>
            row_array = F.arange(start=0, stop=rows, step=1, **ctx_kwarg)   ## (96,) 
            col_array = F.arange(start=0, stop=cols, step=1, **ctx_kwarg)   ## (96,)
            coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3)
            coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2)
            coord_rows = F.repeat(coord_rows, repeats=batch_size, axis=0)
            coord_cols = F.repeat(coord_cols, repeats=batch_size, axis=0)
            coords = F.concat(coord_rows, coord_cols, dim=1)    ## (1, 2, 96, 96) 
            add_xy = F.reshape(points * self.spatial_scale, shape=(0, 0, 1))    ## [[[61.] [71.]]] <NDArray 1x2x1 @gpu(0)>
            add_xy = F.reshape(F.repeat(add_xy, rows * cols, axis=2), shape=(0, 0, rows, cols))
            ## self.norm_radius: 42 
            coords = (coords - add_xy) / (self.norm_radius * self.spatial_scale)    ## <NDArray 1x2x96x96 @gpu(0)> 
            if self.append_dist:
                dist = F.sqrt(F.sum(F.square(coords), axis=1, keepdims=1))  ## <NDArray 1x1x96x96 @gpu(0)>
                coord_features = F.concat(coords, dist, dim=1)
                coord_features = coords
            coord_features = F.clip(coord_features, a_min=-1, a_max=1)
            return coord_features

    I also write one PyTorch version according to the MXNet version: 

    class AddCoords(nn.Module):
        def __init__(self, ):
        def forward(self, input_tensor, points):
            _, x_dim, y_dim = input_tensor.size()
            batch_size = 1 
            xx_channel = torch.arange(x_dim).repeat(1, y_dim, 1)    ## torch.Size([1, 9, 9]) 
            yy_channel = torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2)    ## torch.Size([1, 9, 9]) 
            xx_channel = xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
            yy_channel = yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
            coords = torch.cat((xx_channel, yy_channel), dim=1)     ## torch.Size([20, 2, 9, 9])
            coords = coords.type(torch.FloatTensor)
            add_xy = torch.reshape(points, (1, 2, 1))   ## torch.Size([1, 2, 1]) 
            add_xy_ = add_xy.repeat(1, 1, x_dim * y_dim)  ## torch.Size([1, 2, 81])
            add_xy_ = torch.reshape(add_xy_, (1, 2, x_dim, y_dim))  ## torch.Size([1, 2, 9, 9]) 
            add_xy_ = add_xy_.type(torch.FloatTensor)
            coords = (coords - add_xy_)     ## torch.Size([1, 2, 9, 9]) 
            coord_features = np.clip(np.array(coords), -1, 1)   ## (1, 2, 9, 9) 
            coord_features = torch.from_numpy(coord_features).cuda() 
            return coord_features





