参考资料:500 lines or less ocr 其中包括神经网络算法的简单介绍,如果看不懂您需要使用谷歌翻译呢
设计feedforward ANN(前馈神经网络,也称bp神经网络)时,我们需要考虑以下因素:
所以我们可以参看 常用激活函数列表
2. biases(偏移因子,又译成阀值)
""" In order to decide how many hidden nodes the hidden layer should have, split up the data set into training and testing data and create networks with various hidden node counts (5, 10, 15, ... 45), testing the performance for each. The best-performing node count is used in the actual system. If multiple counts perform similarly, choose the smallest count for a smaller network with fewer computations. """ import numpy as np from ocr import OCRNeuralNetwork from sklearn.cross_validation import train_test_split def test(data_matrix, data_labels, test_indices, nn): avg_sum = 0 for j in xrange(100): correct_guess_count = 0 for i in test_indices: test = data_matrix[i] prediction = nn.predict(test) if data_labels[i] == prediction: correct_guess_count += 1 avg_sum += (correct_guess_count / float(len(test_indices))) return avg_sum / 100 # Load data samples and labels into matrix data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter = ',').tolist() data_labels = np.loadtxt(open('dataLabels.csv', 'rb')).tolist() # Create training and testing sets. train_indices, test_indices = train_test_split(list(range(5000))) print "PERFORMANCE" print "-----------" # Try various number of hidden nodes and see what performs best for i in xrange(5, 50, 5): nn = OCRNeuralNetwork(i, data_matrix, data_labels, train_indices, False) performance = str(test(data_matrix, data_labels, test_indices, nn)) print "{i} Hidden Nodes: {val}".format(i=i, val=performance)
2.forward propagation(前向传播)
第二步是前向传播,其本质上是如[什么是anns]中所描述的那样从输入节点开始逐层地计算的节点输出。这里,`y0`是我们希望用来训练ANN的大小为400的数组输入。我们将theta1乘以`y0`的转置矩阵,使得我们有两个大小为(`num_hidden_nodes×400)*(400×1)`的矩阵,并且具有对于大小为`num_hidden_nodes`的隐藏层的输出的结果向量。然后,我们添加偏移因子,并应用矢量化S形激活函数得到一个输出向量`y1`。 `y1`是我们隐藏层的输出向量。再次重复相同的过程以计算输出节点的`y2`。 `y2`现在是我们的输出层向量,其值表示它们的索引是绘制数字的可能性。例如,如果有人绘制一个8,如果ANN做出正确的预测,则在第8个索引处的`y2`的值将是最大的。然而,6可能具有比为所绘制的数字的1更高的似然性,因为其看起来更类似于8,并且和8也有着更多重叠得像素.`y2`随着很多用于训练的绘制的数字,ANN将会变得更准确。
3.back propagation
import csv import matplotlib.pyplot as plt import matplotlib.cm as cm import numpy as np from numpy import matrix from math import pow from collections import namedtuple import math import random import os import json """ This class does some initial training of a neural network for predicting drawn digits based on a data set in data_matrix and data_labels. It can then be used to train the network further by calling train() with any array of data or to predict what a drawn digit is by calling predict(). The weights that define the neural network can be saved to a file, NN_FILE_PATH, to be reloaded upon initilization. """ class OCRNeuralNetwork: LEARNING_RATE = 0.1 WIDTH_IN_PIXELS = 20 NN_FILE_PATH = 'nn.json' def __init__(self, num_hidden_nodes, data_matrix, data_labels, training_indices, use_file=True): self.sigmoid = np.vectorize(self._sigmoid_scalar) self.sigmoid_prime = np.vectorize(self._sigmoid_prime_scalar) self._use_file = use_file self.data_matrix = data_matrix self.data_labels = data_labels if (not os.path.isfile(OCRNeuralNetwork.NN_FILE_PATH) or not use_file): # Step 1: Initialize weights to small numbers self.theta1 = self._rand_initialize_weights(400, num_hidden_nodes) self.theta2 = self._rand_initialize_weights(num_hidden_nodes, 10) self.input_layer_bias = self._rand_initialize_weights(1, num_hidden_nodes) self.hidden_layer_bias = self._rand_initialize_weights(1, 10) # Train using sample data TrainData = namedtuple('TrainData', ['y0', 'label']) self.train([TrainData(self.data_matrix[i], int(self.data_labels[i])) for i in training_indices]) self.save() else: self._load() def _rand_initialize_weights(self, size_in, size_out): return [((x * 0.12) - 0.06) for x in np.random.rand(size_out, size_in)] # The sigmoid activation function. Operates on scalars. def _sigmoid_scalar(self, z): return 1 / (1 + math.e ** -z) def _sigmoid_prime_scalar(self, z): return self.sigmoid(z) * (1 - self.sigmoid(z)) def _draw(self, sample): pixelArray = [sample[j:j+self.WIDTH_IN_PIXELS] for j in xrange(0, len(sample), self.WIDTH_IN_PIXELS)] plt.imshow(zip(*pixelArray), cmap = cm.Greys_r, interpolation="nearest") plt.show() def train(self, training_data_array): for data in training_data_array: # Step 2: Forward propagation y1 = np.dot(np.mat(self.theta1), np.mat(data['y0']).T) sum1 = y1 + np.mat(self.input_layer_bias) # Add the bias y1 = self.sigmoid(sum1) y2 = np.dot(np.array(self.theta2), y1) y2 = np.add(y2, self.hidden_layer_bias) # Add the bias y2 = self.sigmoid(y2) # Step 3: Back propagation actual_vals = [0] * 10 # actual_vals is a python list for easy initialization and is later turned into an np matrix (2 lines down). actual_vals[data['label']] = 1 output_errors = np.mat(actual_vals).T - np.mat(y2) hidden_errors = np.multiply(np.dot(np.mat(self.theta2).T, output_errors), self.sigmoid_prime(sum1)) # Step 4: Update weights self.theta1 += self.LEARNING_RATE * np.dot(np.mat(hidden_errors), np.mat(data['y0'])) self.theta2 += self.LEARNING_RATE * np.dot(np.mat(output_errors), np.mat(y1).T) self.hidden_layer_bias += self.LEARNING_RATE * output_errors self.input_layer_bias += self.LEARNING_RATE * hidden_errors def predict(self, test): y1 = np.dot(np.mat(self.theta1), np.mat(test).T) y1 = y1 + np.mat(self.input_layer_bias) # Add the bias y1 = self.sigmoid(y1) y2 = np.dot(np.array(self.theta2), y1) y2 = np.add(y2, self.hidden_layer_bias) # Add the bias y2 = self.sigmoid(y2) results = y2.T.tolist()[0] return results.index(max(results)) def save(self): if not self._use_file: return json_neural_network = { "theta1":[np_mat.tolist()[0] for np_mat in self.theta1], "theta2":[np_mat.tolist()[0] for np_mat in self.theta2], "b1":self.input_layer_bias[0].tolist()[0], "b2":self.hidden_layer_bias[0].tolist()[0] }; with open(OCRNeuralNetwork.NN_FILE_PATH,'w') as nnFile: json.dump(json_neural_network, nnFile) def _load(self): if not self._use_file: return with open(OCRNeuralNetwork.NN_FILE_PATH) as nnFile: nn = json.load(nnFile) self.theta1 = [np.array(li) for li in nn['theta1']] self.theta2 = [np.array(li) for li in nn['theta2']] self.input_layer_bias = [np.array(nn['b1'][0])] self.hidden_layer_bias = [np.array(nn['b2'][0])]
至于算法证明这锅不背了, orz)