zoukankan      html  css  js  c++  java
  • KNN 分类程序

    # coding: utf-8
    import numpy as np
    import operator
    import matplotlib
    from numpy import *
    import matplotlib.pyplot as plt
    import os
    
    
    def CreateDataSet():
      group = np.array([
        [1.0, 1.1],
        [1.0, 1.0],
        [0.0, 0.0],
        [0.0, 0.1]])
      label = ['a', 'a', 'b', 'b']
      return group, label
    
    
    def Classify(intx, datax, label, k):
      datasize = datax.shape[0]
      diffmat = np.tile(intx, (datasize, 1)) - datax #每一位相减
      sqdiffmat = diffmat ** 2 #每一位平方
      sqdistence = sqdiffmat.sum(axis=1) #axis=1按照行求和 axix=0按照列求和
      distence = sqdistence ** 0.5
      sorteddistenceindicies = distence.argsort()
      classcount = {}
      for i in range(k):
        voteilabel = label[sorteddistenceindicies[i]]
        classcount[voteilabel] = classcount.get(voteilabel, 0) + 1 #map标记
      sortedclasscount = sorted(classcount.items(), key=operator.itemgetter(1), reverse = True) #map排序
      return sortedclasscount[0][0]
    def file2matrix(filename) :
      with open(filename, mode = "r") as fr : #表示打开文件,使用这一句会系统自动调用 fr.close关闭文件,无论文件是否打开都会调用
        arrayolines = fr.readlines() #https://blog.csdn.net/liuyhoo/article/details/80756812
        numberoflines = len(arrayolines)
        returnmat = np.zeros((numberoflines, 3)) #生成一个 num * 3 d的全0矩阵
        labels = []
        index = 0
        for line in arrayolines :
          listfromline = line.split("	") #数据中间是	 结尾是
    
          returnmat[index, :] = listfromline[0: 3]
          labels.append(int(listfromline[-1])) # 处理结尾
    
          index = index + 1
        return returnmat, labels
    
    def autonorm(datax) :
      minval = datax.min(0) #min() 表示矩阵中最小是 min(0)表示每列中最小值 min(1)表示每行中最小值
      maxval = datax.max(0)
      ranges = maxval - minval
      rows = datax.shape[0] #查看矩阵的维数
      newval = datax - tile(minval, (rows, 1)) #minval是三维,后面的是生成的矩阵为 rows * 1 倍
      newval = newval / tile(ranges, (rows, 1)) # 矩阵除法相当于c中每一位直接整除
      return newval, ranges, minval
    
    def datingClassTest():
      hoRatio = 0.1  # 设置测试集百分比
      filename = "datingTestSet2.txt"
      dataX, labels = file2matrix(filename) #读数据
      normMat, ranges, minVals = autonorm(dataX)  # 归一化
      m = dataX.shape[0]  #numbers of rows
      numTestVecs = int(m * hoRatio)
      errorcount = 0  # 错误数
      for i in range(numTestVecs):
        classifierResult = Classify(normMat[i, :], normMat[numTestVecs:m, :], labels[numTestVecs:m], 5)  # 前10%作为测试数据
        #   print("the classifier predict %d, the real answer is :%d" %((classifierResult),labels[i]))
        if (classifierResult != labels[i]):
          errorcount = errorcount + 1.0
      print("error rate :%f" % ((errorcount) / (numTestVecs)))
    
    def plot():  # 画datingTestSet2.txt这个数据的图像
      k = 3
      filename = "datingTestSet2.txt"
      dataX, labels = file2matrix(filename)
      fig = plt.figure() #创建一个图
      ax = fig.add_subplot(111)
      ax.scatter(dataX[:, 0], dataX[:, 1], c=15 * np.array(labels), s=15 * np.array(labels))
      ax = fig.add_subplot(121)
      ax.scatter(dataX[:, 0], dataX[:, 2], c=15 * np.array(labels), s=15 * np.array(labels))
      ax = fig.add_subplot(131)
      ax.scatter(dataX[:, 1], dataX[:, 2], c=15 * np.array(labels), s=15 * np.array(labels))
      plt.show()
    
    if __name__ == '__main__':
        # plot()
        datingClassTest()

     搬运门

  • 相关阅读:
    C# 进制转换
    使用Struts1完成用户登录功能
    【Maven实战】仓库介绍和Nexus的安装
    【Maven实战】依赖的聚合和版本管理
    【Maven实战】传递性依赖的问题
    【Maven实战】依赖的范围
    【Maven实战】archetype的使用和eclipse的配置
    【Maven实战】Maven开发环境的搭建和案例展示
    【手机安全卫士02】连接服务器获取更新信息
    【手机安全卫士01】项目Splash页面的开发与设计
  • 原文地址:https://www.cnblogs.com/lalalatianlalu/p/11241152.html
Copyright © 2011-2022 走看看