zoukankan      html  css  js  c++  java
  • DBSCAN——python实现

    # -*- coding: utf-8 -*-  
    from matplotlib.pyplot import *
    from collections import defaultdict
    import random
    import json
    """
        计算两点欧式距离的函数
    """
    def dist(p1,p2):
        return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** (0.5)
    
    
    all_points = []
    index = 1000
    #use python build-in library to load the json file
    flickr_data = json.load(file("Paris_points.json"))
    for i in range(index):
        Coord = [flickr_data['latitudes'][i],flickr_data['longitudes'][i]]
        all_points.append(Coord)
    
    """
        设置E和minPts的值
    """    
    E = 0.001
    minPts = 7
    
    
    """
        随机产生100个直角坐标,测试用,测试时用E = 8, minPts = 8
    """
    #all_points = []
    # for i in range(100):
    #     randCoord = [random.randint(1,50),random.randint(1,50)]
    #     if not randCoord in all_points:
    #         all_points.append(randCoord)
    
    
    """
        找出核心点
    """
    other_points = []
    core_points = []
    plotted_points = []
    for point in all_points:
        point.append(0)    #assign initial level 0,即定义核心点的类型,每个核心点作为一个中心
        total = 0
        for otherPoint in all_points:
            distance = dist(otherPoint,point)
            if distance <= E:
                total += 1
    
        if total > minPts:
            core_points.append(point)
            plotted_points.append(point)
        else:
            other_points.append(point)
        
    
    
    """
        找到边界点
    """
    border_points = []
    for core in core_points:
        for other in other_points:
            if dist(core,other) <= E:
                border_points.append(other)
                plotted_points.append(other)
    
    
    
    """
        完成分类的算法,给核心点都贴上标签
    """
    cluster_label = 0
    
    for point in core_points:
        if point[2] == 0:
            cluster_label += 1
            point[2] = cluster_label
    
        for point2 in plotted_points:
            distance = dist(point2, point)
            if point2[2] == 0 and distance <= E:
                #print point,point2
                point2[2] = point[2]
    
    
    """
        当所有的点都分配到相应的标签后,我们把同一簇的划分到一起
    """
    cluster_list = defaultdict(lambda:[[],[]])
    for point in plotted_points:
        cluster_list[point[2]][0].append(point[0])
        cluster_list[point[2]][1].append(point[1])
    
    markers = ['+','*','.','d','^','v','>','<','p']
    #markers = ['b.','g.','r.','c.','m.','y.','k.']
    
    
    """
        画出所有点的图
    """
    figure(1)
    allx = []
    ally = []
    for plot_point in all_points:
        allx.append(plot_point[0])
        ally.append(plot_point[1])
    plot(allx, ally,"r.")
    title("total points=" + str(len(all_points)) + " E =" + str(E) + " Min Points=" + str(minPts))
    
    
    """
        画出核心点的图
    """
    figure(2)
    i = 0
    print cluster_list
    for value in cluster_list:
        cluster = cluster_list[value]
        plot(cluster[0],cluster[1],markers[i])
        i = i % 8 + 1
        #i = i % 6 + 1
    title(str(len(cluster_list)) + " clusters created with E = "+ str(E) + " Min Points=" + str(minPts))
    
    """
        画出噪音点的图
    """
    figure(3)
    noise_points = []
    for point in all_points:
        if not point in core_points and not point in border_points:
            noise_points.append(point)
    noisex = []
    noisey = []
    for point in noise_points:
        noisex.append(point[0])
        noisey.append(point[1])
    plot(noisex,noisey,"x")
    
    
    title("noise Points = "+ str(len(noise_points)) + " E ="+str(E)+" Min Points="+str(minPts))   
    #axis((0,60,0,60))  
    show()  
  • 相关阅读:
    数据库中的LEFT JOIN 个人理解
    C++ 类的继承方式
    KMP字符串匹配算法
    C++ 运算符重载_加号
    Pin API INS
    Python 爬虫爬取多页数据
    Pin
    NO.12 13 章 字符串&扩展(哈希、KMP、分块、BIT)
    NO.11章 DP(递归递推、最大连续子序列和、LIS、LCS、最长回文子串、DAG、背包)
    NO.10章 图(遍历、最短路、生成树、拓扑、关键路径)
  • 原文地址:https://www.cnblogs.com/GDUT-xiang/p/5714110.html
Copyright © 2011-2022 走看看