An intriguing failing of convolutional neural networks and the CoordConv solution
NeurIPS 2018
2019-10-10 15:01:48
Paper: https://arxiv.org/pdf/1807.03247.pdf
Official TensorFlow Code: https://github.com/uber-research/coordconv
Unofficial PyTorch Code: https://github.com/walsvid/CoordConv
机器之心:卷积神经网络「失陷」,CoordConv 来填坑: https://zhuanlan.zhihu.com/p/39665894
Uber提出CoordConv:解决普通CNN坐标变换问题: https://zhuanlan.zhihu.com/p/39919038
要拯救CNN的CoordConv受嘲讽,翻译个坐标还用训练? https://zhuanlan.zhihu.com/p/39841356
1. 给定 feature map and 坐标(x, y)如何生成对应的 relative CoordinateMap?
The following code is from: [ICCV19] AdaptIS: Adaptive Instance Selection Network [Github]
def get_instances_maps(self, F, points, adaptive_input, controller_input): if isinstance(points, mx.nd.NDArray): self.num_points = points.shape[1] if getattr(self.controller_net, 'return_map', False): w = self.eqf(controller_input, points) else: w = self.eqf(controller_input, points) w = self.controller_net(w) points = F.reshape(points, shape=(-1, 2)) x = F.repeat(adaptive_input, self.num_points, axis=0) x = self.add_coord_features(x, points) x = self.block0(x) x = self.adain(x, w) x = self.block1(x) return x
class AppendCoordFeatures(gluon.HybridBlock): def __init__(self, norm_radius, append_dist=True, spatial_scale=1.0): super(AppendCoordFeatures, self).__init__() self.xs = None self.spatial_scale = spatial_scale self.norm_radius = norm_radius self.append_dist = append_dist def _ctx_kwarg(self, x): if isinstance(x, mx.nd.NDArray): return {"ctx": x.context} return {} def get_coord_features(self, F, points, rows, cols, batch_size, **ctx_kwarg): row_array = F.arange(start=0, stop=rows, step=1, **ctx_kwarg) col_array = F.arange(start=0, stop=cols, step=1, **ctx_kwarg) coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3) coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2) coord_rows = F.repeat(coord_rows, repeats=batch_size, axis=0) coord_cols = F.repeat(coord_cols, repeats=batch_size, axis=0) coords = F.concat(coord_rows, coord_cols, dim=1) add_xy = F.reshape(points * self.spatial_scale, shape=(0, 0, 1)) add_xy = F.reshape(F.repeat(add_xy, rows * cols, axis=2), shape=(0, 0, rows, cols)) coords = (coords - add_xy) / (self.norm_radius * self.spatial_scale) if self.append_dist: dist = F.sqrt(F.sum(F.square(coords), axis=1, keepdims=1)) coord_features = F.concat(coords, dist, dim=1) else: coord_features = coords coord_features = F.clip(coord_features, a_min=-1, a_max=1) return coord_features def hybrid_forward(self, F, x, coords): if isinstance(x, mx.nd.NDArray): self.xs = x.shape batch_size, rows, cols = self.xs[0], self.xs[2], self.xs[3] coord_features = self.get_coord_features(F, coords, rows, cols, batch_size, **self._ctx_kwarg(x)) return F.concat(coord_features, x, dim=1)
def get_coord_features(self, F, points, rows, cols, batch_size, **ctx_kwarg): # (Pdb) points, rows, cols, batch_size # ([[61. 71.]] <NDArray 1x2 @gpu(0)>, 96, 96, 1) # row_array and col_array: # [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16. 17. # 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. # 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. # 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71. # 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89. # 90. 91. 92. 93. 94. 95.] # <NDArray 96 @gpu(0)> # (Pdb) coord_rows # [[[[ 0. 0. 0. ... 0. 0. 0.] # [ 1. 1. 1. ... 1. 1. 1.] # [ 2. 2. 2. ... 2. 2. 2.] # ... # [93. 93. 93. ... 93. 93. 93.] # [94. 94. 94. ... 94. 94. 94.] # [95. 95. 95. ... 95. 95. 95.]]]] # <NDArray 1x1x96x96 @gpu(0)> # (Pdb) coord_cols # [[[[ 0. 1. 2. ... 93. 94. 95.] # [ 0. 1. 2. ... 93. 94. 95.] # [ 0. 1. 2. ... 93. 94. 95.] # ... # [ 0. 1. 2. ... 93. 94. 95.] # [ 0. 1. 2. ... 93. 94. 95.] # [ 0. 1. 2. ... 93. 94. 95.]]]] # <NDArray 1x1x96x96 @gpu(0)> # (Pdb) add_xy # [[[[61. 61. 61. ... 61. 61. 61.] # [61. 61. 61. ... 61. 61. 61.] # [61. 61. 61. ... 61. 61. 61.] # ... # [61. 61. 61. ... 61. 61. 61.] # [61. 61. 61. ... 61. 61. 61.] # [61. 61. 61. ... 61. 61. 61.]] # [[71. 71. 71. ... 71. 71. 71.] # [71. 71. 71. ... 71. 71. 71.] # [71. 71. 71. ... 71. 71. 71.] # ... # [71. 71. 71. ... 71. 71. 71.] # [71. 71. 71. ... 71. 71. 71.] # [71. 71. 71. ... 71. 71. 71.]]]] # <NDArray 1x2x96x96 @gpu(0)> # (Pdb) if self.append_dist, then coord_features is: # [[[[-1. -1. -1. ... -1. -1. # -1. ] # [-1. -1. -1. ... -1. -1. # -1. ] # [-1. -1. -1. ... -1. -1. # -1. ] # ... # [ 0.7619048 0.7619048 0.7619048 ... 0.7619048 0.7619048 # 0.7619048 ] # [ 0.78571427 0.78571427 0.78571427 ... 0.78571427 0.78571427 # 0.78571427] # [ 0.8095238 0.8095238 0.8095238 ... 0.8095238 0.8095238 # 0.8095238 ]] # [[-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ] # [-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ] # [-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ] # ... # [-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ] # [-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ] # [-1. -1. -1. ... 0.52380955 0.54761904 # 0.5714286 ]] # [[ 1. 1. 1. ... 1. 1. # 1. ] # [ 1. 1. 1. ... 1. 1. # 1. ] # [ 1. 1. 1. ... 1. 1. # 1. ] # ... # [ 1. 1. 1. ... 0.9245947 0.9382886 # 0.95238096] # [ 1. 1. 1. ... 0.944311 0.9577231 # 0.9715336 ] # [ 1. 1. 1. ... 0.96421224 0.97735125 # 0.99088824]]]] # <NDArray 1x3x96x96 @gpu(0)> pdb.set_trace() row_array = F.arange(start=0, stop=rows, step=1, **ctx_kwarg) ## (96,) col_array = F.arange(start=0, stop=cols, step=1, **ctx_kwarg) ## (96,) coord_rows = F.repeat(F.reshape(row_array, (1, 1, rows, 1)), repeats=cols, axis=3) coord_cols = F.repeat(F.reshape(col_array, (1, 1, 1, cols)), repeats=rows, axis=2) coord_rows = F.repeat(coord_rows, repeats=batch_size, axis=0) coord_cols = F.repeat(coord_cols, repeats=batch_size, axis=0) coords = F.concat(coord_rows, coord_cols, dim=1) ## (1, 2, 96, 96) add_xy = F.reshape(points * self.spatial_scale, shape=(0, 0, 1)) ## [[[61.] [71.]]] <NDArray 1x2x1 @gpu(0)> add_xy = F.reshape(F.repeat(add_xy, rows * cols, axis=2), shape=(0, 0, rows, cols)) ## self.norm_radius: 42 coords = (coords - add_xy) / (self.norm_radius * self.spatial_scale) ## <NDArray 1x2x96x96 @gpu(0)> if self.append_dist: dist = F.sqrt(F.sum(F.square(coords), axis=1, keepdims=1)) ## <NDArray 1x1x96x96 @gpu(0)> coord_features = F.concat(coords, dist, dim=1) else: coord_features = coords coord_features = F.clip(coord_features, a_min=-1, a_max=1) return coord_features
I also write one PyTorch version according to the MXNet version:
class AddCoords(nn.Module): def __init__(self, ): super().__init__() def forward(self, input_tensor, points): _, x_dim, y_dim = input_tensor.size() batch_size = 1 xx_channel = torch.arange(x_dim).repeat(1, y_dim, 1) ## torch.Size([1, 9, 9]) yy_channel = torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2) ## torch.Size([1, 9, 9]) xx_channel = xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3) yy_channel = yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3) coords = torch.cat((xx_channel, yy_channel), dim=1) ## torch.Size([20, 2, 9, 9]) coords = coords.type(torch.FloatTensor) add_xy = torch.reshape(points, (1, 2, 1)) ## torch.Size([1, 2, 1]) add_xy_ = add_xy.repeat(1, 1, x_dim * y_dim) ## torch.Size([1, 2, 81]) add_xy_ = torch.reshape(add_xy_, (1, 2, x_dim, y_dim)) ## torch.Size([1, 2, 9, 9]) add_xy_ = add_xy_.type(torch.FloatTensor) coords = (coords - add_xy_) ## torch.Size([1, 2, 9, 9]) coord_features = np.clip(np.array(coords), -1, 1) ## (1, 2, 9, 9) coord_features = torch.from_numpy(coord_features).cuda() return coord_features