一.环境配置
需要安装python的数据计算的库numpy和Matplotlib画出图形
sudo apt-get install python-numpy
sudo apt-get install python-matplotlib
为了方便还安装了ipython
sudo apt-get install ipython
二.编辑test.py
1 from numpy import * 2 3 def loadDataSet(): 4 dataMat = []; labelMat = [] 5 fr = open('testSet.txt') 6 for line in fr.readlines(): 7 lineArr = line.strip().split() 8 dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) 9 labelMat.append(int(lineArr[2])) 10 return dataMat,labelMat 11 12 def sigmoid(inX): 13 return 1.0/(1+exp(-inX)) 14 15 def gradAscent(dataMatIn, classLabels): 16 dataMatrix = mat(dataMatIn) #convert to NumPy matrix 17 labelMat = mat(classLabels).transpose() #convert to NumPy matrix 18 m,n = shape(dataMatrix) 19 alpha = 0.001 20 maxCycles = 500 21 weights = ones((n,1)) 22 for k in range(maxCycles): #heavy on matrix operations 23 h = sigmoid(dataMatrix*weights) #matrix mult 24 error = (labelMat - h) #vector subtraction 25 weights = weights + alpha * dataMatrix.transpose()* error #matrix mult 26 return weights 27 28 def plotBestFit(weights): 29 import matplotlib.pyplot as plt 30 dataMat,labelMat=loadDataSet() 31 dataArr = array(dataMat) 32 n = shape(dataArr)[0] 33 xcord1 = []; ycord1 = [] 34 xcord2 = []; ycord2 = [] 35 for i in range(n): 36 if int(labelMat[i])== 1: 37 xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) 38 else: 39 xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) 40 fig = plt.figure() 41 ax = fig.add_subplot(111) 42 ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 43 ax.scatter(xcord2, ycord2, s=30, c='green') 44 x = arange(-3.0, 3.0, 0.1) 45 y = (-weights[0]-weights[1]*x)/weights[2] 46 ax.plot(x, y) 47 plt.xlabel('X1'); plt.ylabel('X2'); 48 plt.show()
运行命令
In [13]: test.plotBestFit(weights.getA())
In [14]: import test
In [15]: dataArr,labelMat=test.loadDataSet()
In [17]: test.gradAscent(dataArr,labelMat)
Out[17]:
matrix([[ 4.12414349],
[ 0.48007329],
[-0.6168482 ]])
In [18]: weights=test.gradAscent(dataArr,labelMat)
In [19]: test.plotBestFit(weights.getA())
画出的分类图形还不错就两个点分错了