zoukankan      html  css  js  c++  java
  • 重写轮子之 kNN

    # !/usr/bin/python
    # -*- coding:utf-8 -*-
    
    
    """
    Re-implement kNN algorithm as a practice
    使用该 kNN re-implement 的前提:
        train data 的标签必须转成0,1,2,...的形式
    """
    
    # Author: 相忠良(Zhong-Liang Xiang) <ugoood@163.com>
    # Finished at July 11th, 2017
    
    import sys
    from numpy import array
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets, cross_validation
    from sklearn import neighbors
    
    
    ## Euclidean Distance
    def euclidean(v1, v2):
        v11 = np.mat(v1)
        v22 = np.mat(v2)
        return np.sqrt((v11 - v22) * ((v11 - v22).T))[0, 0]
    
    
    ## Cosin Distance
    def cosdis(v1, v2):
        return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    
    
    ## load data
    def load_data():
        iris = datasets.load_iris()
        return cross_validation.train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
    
    
    class MyKNeighborsClassifier:
        predict_label = []
        n_neighbors = 0
        X_train = []
        y_train = []
        content = [] # 中间变量
    
        def __init__(self, n_neighbors=20):
            self.n_neighbors = n_neighbors
            return
    
        def fit(self, X, y):
            self.y_train = y
            self.X_train = X
    
        def predict(self, X):
            for item in X:
                for sample in self.X_train:
                    self.content.append(euclidean(item, sample))
                temp = []
                i = 1
                while (i <= self.n_neighbors):
                    index = np.argmin(self.content)
                    temp.append(y_train[index])
                    self.content[index] = sys.maxint
                    i += 1
                self.predict_label.append(np.argmax(np.bincount(temp)))  # 重要1,2
                self.content = []
                temp = []
            return self.predict_label
    
        def score(self, X, y):
            pass
    
    ## 测试用例
    X_train, X_test, y_train, y_test = load_data()
    
    cls = MyKNeighborsClassifier()
    cls.fit(X_train, y_train)
    mine = cls.predict(X_test)
    print 'my kNN: ', mine
    
    cls1 = neighbors.KNeighborsClassifier(n_neighbors=20, p=2)
    cls1.fit(X_train, y_train)
    sklearnkNN = cls1.predict(X_test)
    print 'sklearn kNN: ', sklearnkNN
    print mine == sklearnkNN
    print mine == y_test
    
    
    
    '''
    下面是编程过程中留下的经验
    '''
    
    # 重要1: np.bincount(list)
    # >>> a=[1,1,2,2,4]
    # >>> print np.bincount(a)
    # 结果为 [0 2 2 0 1]
    
    # 重要2: np.argmax(list)
    # 返回最大值索引
    
    # 重要3: 标识整数最大值
    # >>> import sys
    # >>> sys.maxint
    
    
    ## kNN 小示例
    # def createDataset():
    #     group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
    #     labels = ['A', 'A', 'B', 'B']
    #     return group, labels
    #
    #
    # dataset, labels = createDataset()
    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # index = 0
    # testdata = [0.2, 0.2]
    #
    # for point in dataset:
    #     if labels[index] == 'A':
    #         ax.scatter(point[0], point[1], c='blue', marker='o', s=300)
    #     else:
    #         ax.scatter(point[0], point[1], c='red', marker='^', s=300)
    #     index += 1
    #
    # ax.scatter(testdata[0], testdata[1], c='green', marker='^', s=300)
    # plt.show()
    
  • 相关阅读:
    网络流二十四题之魔术球问题
    网络流二十四题之P2764 最小路径覆盖问题
    网络二十四题 之 P2756 飞行员配对方案问题
    网络流 之 dinic算法
    网络流 之 增广路
    中南
    2249: Altruistic Amphibians 01背包的应用 + lh的简单图论 图转树求lca
    今日训练 搜索
    AD-logon workstation
    Centos7-docker安装
  • 原文地址:https://www.cnblogs.com/ZhongliangXiang/p/7357173.html
Copyright © 2011-2022 走看看