二话不说给代码:
1 #urllib is used to download the utils file from deeplearning.net 2 import urllib.request 3 response = urllib.request.urlopen('http://deeplearning.net/tutorial/code/utils.py') 4 content = response.read() 5 target = open('utils.py', 'wb+') 6 target.write(content) 7 target.close() 8 #Import the math function for calculations 9 import math 10 #Tensorflow library. Used to implement machine learning models 11 import tensorflow as tf 12 #Numpy contains helpful functions for efficient mathematical calculations 13 import numpy as np 14 #Image library for image manipulation 15 from PIL import Image 16 #import Image 17 #Utils file 18 19 #导入MNIST数据 20 21 #Getting the MNIST data provided by Tensorflow 22 old_v = tf.compat.v1.logging.get_verbosity() 23 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) 24 from tensorflow.examples.tutorials.mnist import input_data 25 26 #Loading in the mnist data 27 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 28 tf.compat.v1.logging.set_verbosity(old_v) 29 trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, 30 mnist.test.labels 31 32 33 34 35 #构建RBM层 36 37 #Class that defines the behavior of the RBM 38 class RBM(object): 39 40 def __init__(self, input_size, output_size): 41 #Defining the hyperparameters 42 self._input_size = input_size #Size of input 43 self._output_size = output_size #Size of output 44 self.epochs = 5 #Amount of training iterations 45 self.learning_rate = 1.0 #The step used in gradient descent 46 self.batchsize = 100 #The size of how much data will be used for training per sub iteration 47 48 #Initializing weights and biases as matrices full of zeroes 49 self.w = np.zeros([input_size, output_size], np.float32) #Creates and initializes the weights with 0 50 self.hb = np.zeros([output_size], np.float32) #Creates and initializes the hidden biases with 0 51 self.vb = np.zeros([input_size], np.float32) #Creates and initializes the visible biases with 0 52 53 54 #Fits the result from the weighted visible layer plus the bias into a sigmoid curve 55 def prob_h_given_v(self, visible, w, hb): 56 #Sigmoid 57 return tf.nn.sigmoid(tf.matmul(visible, w) + hb) 58 59 #Fits the result from the weighted hidden layer plus the bias into a sigmoid curve 60 def prob_v_given_h(self, hidden, w, vb): 61 return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(w)) + vb) 62 63 #Generate the sample probability 64 def sample_prob(self, probs): 65 return tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs)))) 66 67 #Training method for the model 68 def train(self, X): 69 #Create the placeholders for our parameters 70 _w = tf.placeholder("float", [self._input_size, self._output_size]) 71 _hb = tf.placeholder("float", [self._output_size]) 72 _vb = tf.placeholder("float", [self._input_size]) 73 74 prv_w = np.zeros([self._input_size, self._output_size], np.float32) #Creates and initializes the weights with 0 75 prv_hb = np.zeros([self._output_size], np.float32) #Creates and initializes the hidden biases with 0 76 prv_vb = np.zeros([self._input_size], np.float32) #Creates and initializes the visible biases with 0 77 78 79 cur_w = np.zeros([self._input_size, self._output_size], np.float32) 80 cur_hb = np.zeros([self._output_size], np.float32) 81 cur_vb = np.zeros([self._input_size], np.float32) 82 v0 = tf.placeholder("float", [None, self._input_size]) 83 84 #Initialize with sample probabilities 85 h0 = self.sample_prob(self.prob_h_given_v(v0, _w, _hb)) 86 v1 = self.sample_prob(self.prob_v_given_h(h0, _w, _vb)) 87 h1 = self.prob_h_given_v(v1, _w, _hb) 88 89 #Create the Gradients 90 positive_grad = tf.matmul(tf.transpose(v0), h0) 91 negative_grad = tf.matmul(tf.transpose(v1), h1) 92 93 #Update learning rates for the layers 94 update_w = _w + self.learning_rate *(positive_grad - negative_grad) / tf.to_float(tf.shape(v0)[0]) 95 update_vb = _vb + self.learning_rate * tf.reduce_mean(v0 - v1, 0) 96 update_hb = _hb + self.learning_rate * tf.reduce_mean(h0 - h1, 0) 97 98 #Find the error rate 99 err = tf.reduce_mean(tf.square(v0 - v1)) 100 101 #Training loop 102 with tf.Session() as sess: 103 sess.run(tf.initialize_all_variables()) 104 #For each epoch 105 for epoch in range(self.epochs): 106 #For each step/batch 107 for start, end in zip(range(0, len(X), self.batchsize),range(self.batchsize,len(X), self.batchsize)): 108 batch = X[start:end] 109 #Update the rates 110 cur_w = sess.run(update_w, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb}) 111 cur_hb = sess.run(update_hb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb}) 112 cur_vb = sess.run(update_vb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb}) 113 prv_w = cur_w 114 prv_hb = cur_hb 115 prv_vb = cur_vb 116 error=sess.run(err, feed_dict={v0: X, _w: cur_w, _vb: cur_vb, _hb: cur_hb}) 117 print('Epoch: %d' % epoch,'reconstruction error: %f' % error) 118 self.w = prv_w 119 self.hb = prv_hb 120 self.vb = prv_vb 121 122 #Create expected output for our DBN 123 def rbm_outpt(self, X): 124 input_X = tf.constant(X) 125 _w = tf.constant(self.w) 126 _hb = tf.constant(self.hb) 127 out = tf.nn.sigmoid(tf.matmul(input_X, _w) + _hb) 128 with tf.Session() as sess: 129 sess.run(tf.global_variables_initializer()) 130 return sess.run(out) 131 132 #建立DBN 133 134 RBM_hidden_sizes = [500, 200 , 50 ] #create 2 layers of RBM with size 400 and 100 135 136 #Since we are training, set input as training data 137 inpX = trX 138 139 #Create list to hold our RBMs 140 rbm_list = [] 141 142 #Size of inputs is the number of inputs in the training set 143 input_size = inpX.shape[1] 144 145 #For each RBM we want to generate 146 for i, size in enumerate(RBM_hidden_sizes): 147 print('RBM: ',i,' ',input_size,'->', size) 148 rbm_list.append(RBM(input_size, size)) 149 input_size = size 150 151 152 153 154 #神经网络 155 156 class NN(object): 157 158 def __init__(self, sizes, X, Y): 159 #Initialize hyperparameters 160 self._sizes = sizes 161 self._X = X 162 self._Y = Y 163 self.w_list = [] 164 self.b_list = [] 165 self._learning_rate = 1.0 166 self._momentum = 0.0 167 self._epoches = 10 168 self._batchsize = 100 169 input_size = X.shape[1] 170 171 #initialization loop 172 for size in self._sizes + [Y.shape[1]]: 173 #Define upper limit for the uniform distribution range 174 max_range = 4 * math.sqrt(6. / (input_size + size)) 175 176 #Initialize weights through a random uniform distribution 177 self.w_list.append( 178 np.random.uniform( -max_range, max_range, [input_size, size]).astype(np.float32)) 179 180 #Initialize bias as zeroes 181 self.b_list.append(np.zeros([size], np.float32)) 182 input_size = size 183 184 #load data from rbm 185 def load_from_rbms(self, dbn_sizes,rbm_list): 186 #Check if expected sizes are correct 187 assert len(dbn_sizes) == len(self._sizes) 188 189 for i in range(len(self._sizes)): 190 #Check if for each RBN the expected sizes are correct 191 assert dbn_sizes[i] == self._sizes[i] 192 193 #If everything is correct, bring over the weights and biases 194 for i in range(len(self._sizes)): 195 self.w_list[i] = rbm_list[i].w 196 self.b_list[i] = rbm_list[i].hb 197 198 #Training method 199 def train(self): 200 #Create placeholders for input, weights, biases, output 201 _a = [None] * (len(self._sizes) + 2) 202 _w = [None] * (len(self._sizes) + 1) 203 _b = [None] * (len(self._sizes) + 1) 204 _a[0] = tf.placeholder("float", [None, self._X.shape[1]]) 205 y = tf.placeholder("float", [None, self._Y.shape[1]]) 206 207 #Define variables and activation functoin 208 for i in range(len(self._sizes) + 1): 209 _w[i] = tf.Variable(self.w_list[i]) 210 _b[i] = tf.Variable(self.b_list[i]) 211 for i in range(1, len(self._sizes) + 2): 212 _a[i] = tf.nn.sigmoid(tf.matmul(_a[i - 1], _w[i - 1]) + _b[i - 1]) 213 214 #Define the cost function 215 cost = tf.reduce_mean(tf.square(_a[-1] - y)) 216 217 #Define the training operation (Momentum Optimizer minimizing the Cost function) 218 train_op = tf.train.MomentumOptimizer( 219 self._learning_rate, self._momentum).minimize(cost) 220 221 #Prediction operation 222 predict_op = tf.argmax(_a[-1], 1) 223 224 #Training Loop 225 with tf.Session() as sess: 226 #Initialize Variables 227 sess.run(tf.global_variables_initializer()) 228 229 #For each epoch 230 for i in range(self._epoches): 231 232 #For each step 233 for start, end in zip( 234 range(0, len(self._X), self._batchsize), range(self._batchsize, len(self._X), self._batchsize)): 235 236 #Run the training operation on the input data 237 sess.run(train_op, feed_dict={ 238 _a[0]: self._X[start:end], y: self._Y[start:end]}) 239 for j in range(len(self._sizes) + 1): 240 #Retrieve weights and biases 241 self.w_list[j] = sess.run(_w[j]) 242 self.b_list[j] = sess.run(_b[j]) 243 244 print("Accuracy rating for epoch " + str(i) + ": " + str(np.mean(np.argmax(self._Y, axis=1) == 245 sess.run(predict_op, feed_dict={_a[0]: self._X, y: self._Y})))) 246 247 248 if __name__ =='__main__': 249 ##训练数据集 250 # For each RBM in our list 251 for rbm in rbm_list: 252 print('New RBM:') 253 # Train a new one 254 rbm.train(inpX) 255 # Return the output layer 256 inpX = rbm.rbm_outpt(inpX) 257 258 print("正在训练。。。。。。") 259 nNet = NN(RBM_hidden_sizes, trX, trY) 260 nNet.load_from_rbms(RBM_hidden_sizes, rbm_list) 261 nNet.train()