原理参照Caffe学习 五 conv_layer与im2col。
以下是一个比较,使用自己写的conv2d的卷积速度非常慢。
im2col转换进行的卷积与sg.convolve2d(不旋转180°)速度接近。
除此之外,im2col还使得反向传播更为简单。
im2col将卷积操作变为矩阵乘法,反向传播便和全连接层一样。
import scipy.signal.signaltools as sg import numpy as np import timeit b=range(20*4*5*5) b=np.reshape(b,(20,4,5,5))*0.001 a=range(10*4*28*28) a=np.reshape(a,(10,4,28,28))*0.001 def imcol(a, v): n, m, h, w = np.shape(a) vn, vm, vh, vw = np.shape(v) t = h - vh + 1 kh = t ** 2 kw = vh * vh z = np.zeros((n, kh, kw * m)) for i in range(n): for j in range(m): kt = [] for it in range(t): for jt in range(t): kt.append((a[i, j, it:it + vh, jt:jt + vh]).flatten()) z[i, :,j*kw:j*kw+kw] = kt z=np.reshape(z,(n*kh,kw*m)) v=np.reshape(v,(-1,kw*m)) res = np.dot(z, v.T) res = np.reshape(res, (n, kh, -1)) res = np.rollaxis(res, 2, 1) return np.reshape(res, (n, -1, t, t)) def conv2d(a, v): ah, aw = np.shape(a) vh, vw = np.shape(v) k = [[np.sum(np.multiply(a[i:i + vh, j:j + vw], v)) for j in range(aw - vw + 1)] for i in range(ah - vh + 1)] return k def aconv(a, v): n, m, h, w = np.shape(a) vn, vm, vh, vw = np.shape(v) d, s = h -vh +1, w -vw +1 z = np.zeros((n, vn, d, s)) for i in range(n): for j in range(vn): for k in range(m): z[i][j] += conv2d(a[i][k], v[j][k]) return z def sconv(a, v): n, m, h, w = np.shape(a) vn, vm, vh, vw = np.shape(v) d, s = h -vh +1, w -vw +1 z = np.zeros((n, vn, d, s)) for i in range(n): for j in range(vn): for k in range(m): z[i][j] += sg.convolve2d(a[i][k], np.rot90(v[j][k],2), mode='valid') return z def agconv(): return aconv(a,b) def sgconv(): return sconv(a, b) def imconv(): return imcol(a, b) if __name__=='__main__': print sgconv()[0, 0, 0] print imconv()[0, 0, 0] print timeit.timeit('agconv()',setup='from __main__ import agconv',number=10) print timeit.timeit('sgconv()',setup='from __main__ import sgconv',number=10) print timeit.timeit('imconv()', setup='from __main__ import imconv', number=10) """ [ 8.5865 8.59145 8.5964 8.60135 8.6063 8.61125 8.6162 8.62115 8.6261 8.63105 8.636 8.64095 8.6459 8.65085 8.6558 8.66075 8.6657 8.67065 8.6756 8.68055 8.6855 8.69045 8.6954 8.70035] [ 8.5865 8.59145 8.5964 8.60135 8.6063 8.61125 8.6162 8.62115 8.6261 8.63105 8.636 8.64095 8.6459 8.65085 8.6558 8.66075 8.6657 8.67065 8.6756 8.68055 8.6855 8.69045 8.6954 8.70035] 24.7670379661 0.686509787089 0.523622603254 """
在反向传播的时候需要一个相应的col2im。
import numpy as np a=range(3*2*20*20) a=np.reshape(a,(3,2,20,20))*0.001 image_shape=a.shape filter_shape=[2,2,5,5] def im2col(a): n, m, h, w = image_shape vn, vm, vh, vw = filter_shape t = h - vh + 1 kh = t ** 2 kw = vh * vh z = np.zeros((n, kh, kw * m)) for i in range(n): for j in range(m): kt = [] for it in range(t): for jt in range(t): kt.append((a[i, j, it:it + vh, jt:jt + vh]).flatten()) z[i, :, j * kw:j * kw + kw] = kt z = np.reshape(z, (n * kh, kw * m)) return z def col2im(a): n, m, h, w = image_shape vn, vm, vh, vw = filter_shape t = h - vh + 1 kh = t ** 2 kw = vh * vh a = np.reshape(a, (n, kh, kw * m)) z=np.zeros((n,m,h,h)) for i in range(n): for k in range(m): for j in range(h): rh = vh * min(j, vh - 1) b = max(j + 1 - vh, 0) * t z[i,k,j] = np.append(a[i,b,rh:rh + vh], a[i, b + 1:b + t, rh + vh - 1]) return z x=im2col(a) t=col2im(x) print t[0][0]==a[0,0] """ [[ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True] [ True True True True True True True True True True True True True True True True True True True True]] """
完整的代码如下。
# coding:utf8 import cPickle import numpy as np class ConvPoolLayer(object): def __init__(self, image_shape,filter_shape,poolsize=(2,2)): self.filter_shape = filter_shape self.image_shape = image_shape self.w = np.random.normal(loc=0, scale=np.sqrt(1.0/np.prod(filter_shape[1:])), size=(np.prod(filter_shape[1:]),filter_shape[0])) self.b = np.random.normal(loc=0, scale=0.2, size=(1,filter_shape[0],1,1)) self.poolsize = poolsize self.samp_shape=(image_shape[-2] - filter_shape[-2] + 1,image_shape[-1] - filter_shape[-1] + 1) self.out_shape=(self.samp_shape[0]/poolsize[0],self.samp_shape[1]/poolsize[1]) def im2col(self,a): n, m, h, w = self.image_shape vn, vm, vh, vw = self.filter_shape t = h - vh + 1 kh = t ** 2 kw = vh * vh z = np.zeros((n, kh, kw * m)) for i in range(n): for j in range(m): kt = [] for it in range(t): for jt in range(t): kt.append((a[i, j, it:it + vh, jt:jt + vh]).flatten()) z[i, :, j * kw:j * kw + kw] = kt z = np.reshape(z, (n * kh, kw * m)) return z def col2im(self,a): n, m, h, w = self.image_shape vn, vm, vh, vw = self.filter_shape t = h - vh + 1 kh = t ** 2 kw = vh * vh a = np.reshape(a, (n, kh, kw * m)) z = np.zeros((n, m, h, h)) for i in range(n): for k in range(m): for j in range(h): rh = vh * min(j, vh - 1) b = max(j + 1 - vh, 0) * t z[i, k, j] = np.append(a[i, b, rh:rh + vh], a[i, b + 1:b + t, rh + vh - 1]) return z def fw_shape(self,a): res = np.reshape(a, (self.image_shape[0], -1, self.filter_shape[0])) res = np.rollaxis(res, 2, 1) res = np.reshape(res, (self.image_shape[0], -1, self.samp_shape[0], self.samp_shape[1])) return res def bp_shape(self,a): res = np.reshape(a, (self.image_shape[0], self.filter_shape[0], -1)) res = np.rollaxis(res, 1, 3) res = np.reshape(res, (-1, self.filter_shape[0])) return res def feedforward(self, a): z = self.im2col(a) res = np.dot(z, self.w) res = self.fw_shape(res) self.out = self.relu(res+self.b) return np.array(self.pool2d(self.out)) def backprop(self, x, dnext,eta=0.001): if dnext.ndim<3: dnext = np.reshape(dnext,(self.image_shape[0],self.filter_shape[0], self.out_shape[0], self.out_shape[1])) u = self.relu_prime(self.out) dnext = np.multiply(u,self.up(dnext,self.poolsize[0])) delta = self.bp_shape(dnext)/self.image_shape[0] x=self.im2col(x) out_delta = np.dot(delta, self.w.T) out_delta=self.col2im(out_delta) w = np.dot(x.T, delta) self.w -= eta * w self.b -= eta * np.reshape(np.sum(delta,0),(self.b.shape)) return out_delta def pool2d(self,input, ds=(2, 2), mode='max'): fun = np.max if mode == 'sum': fun = np.sum elif mode == 'average': fun = np.average n, m, h, w = np.shape(input) d, s = ds zh = h / d + h % d zw = w / s + w % s z = np.zeros((n, m, zh, zw)) for k in range(n): for o in range(m): for i in range(zh): for j in range(zw): z[k, o, i, j] = fun(input[k, o, d * i:min(d * i + d, h), s * j:min(s * j + s, w)]) return z def up(self,a,n): b=np.ones((n,n)) return np.kron(a,b) def relu(self,z): return np.maximum(z, 0.0) def relu_prime(self,z): z[z>0]=1 return z class SoftmaxLayer(object): def __init__(self, in_num=100,out_num=10): self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num) def feedforward(self, input): self.out=self.softmax(np.dot(input, self.weights)) return self.out def backprop(self, input, y,eta=0.001): o=self.out delta =o-y out_delta=np.dot(delta,self.weights.T) w = np.dot(input.T,delta) self.weights-=eta*(w) return out_delta def softmax(self,a): m = np.exp(a) return m / (np.sum(m,axis=1)[:,np.newaxis]) class FullLayer(object): def __init__(self, in_num=720,out_num=100): self.in_num=in_num self.out_num=out_num self.biases = np.random.randn(out_num) self.weights = np.random.randn(in_num, out_num)/np.sqrt(out_num) def feedforward(self, x): if x.ndim>2: x = np.reshape(x, (len(x), self.in_num)) self.out = self.sigmoid(np.dot(x, self.weights)+self.biases) return self.out def backprop(self, x,delta,eta=0.001): if x.ndim>2: x = np.reshape(x, (len(x), self.in_num)) sp=self.sigmoid_prime(self.out) delta = delta * sp out_delta = np.dot(delta, self.weights.T) w = np.dot( x.T,delta) self.weights-=eta*w self.biases -= eta*np.sum(delta,0) return out_delta def sigmoid(self,z): return 1.0/(1.0+np.exp(-z)) def sigmoid_prime(self,z): return z*(1-z) class Network(object): def __init__(self, layers): self.layers=layers self.num_layers = len(layers) self.a=[] def feedforward(self, x): self.a.append(x) for layer in self.layers: x=layer.feedforward(x) self.a.append(x) return x def SGD(self, training_data, test_data,epochs, mini_batch_size, eta=0.001): self.n = len(training_data[0]) self.mini_batch_size=mini_batch_size self.eta = eta cx=range(epochs) for j in cx: for k in xrange(0, self.n , mini_batch_size): batch_x = np.array(training_data[0][k:k + mini_batch_size]) batch_y = training_data[1][k:k + mini_batch_size] self.backprop(batch_x,batch_y) if k%500==0: print "Epoch {0}:{1} cost={3}, test:{2}".format(j,k, self.evaluate([test_data[0],test_data[1]]),self.cost) def backprop(self, x_in, y): self.feedforward(x_in) for i in range(self.num_layers): delta=self.layers[-i-1].backprop(self.a[-i-2],y,eta=self.eta) y=delta def evaluate(self, test_data): x,y=test_data num=len(x) size=self.mini_batch_size x=[self.feedforward(np.array(x[size*i:size*i+size])) for i in range((num/size))] x=np.reshape(x,(num,np.shape(x)[-1])) xp = np.argmax(x, axis=1) yp= np.argmax(y, axis=1) if y[0].ndim else y self.cost = -np.mean(np.log(x)[np.arange(num),yp]) return np.mean(yp == xp)*100 if __name__ == '__main__': def get_data(data): return [np.reshape(x, (1,28,28)) for x in data[0]] def get_label(i): c = np.zeros((10)) c[i] = 1 return c f = open('data/mnist.pkl', 'rb') training_data, validation_data, test_data = cPickle.load(f) training_inputs = get_data(training_data) training_label=[get_label(y_) for y_ in training_data[1]] test_inputs = get_data(test_data) test = zip(test_inputs,test_data[1]) size=10 net = Network([ConvPoolLayer(image_shape=[size,1,28,28],filter_shape=[8,1,5,5],poolsize=(2,2)), ConvPoolLayer(image_shape=[size,8,12,12],filter_shape=[16,8,5,5], poolsize=(2,2)), FullLayer(in_num=16*4*4,out_num=100), SoftmaxLayer(in_num=100,out_num=10)]) net.SGD([training_inputs,training_label],[test_inputs[:500],test_data[1][:500]], epochs=3,mini_batch_size=size, eta=0.05) # Epoch 0:14000 cost=0.134660777083, test:96.0