# -*- coding: utf-8 -*- from matplotlib.pyplot import * from collections import defaultdict import random import json """ 计算两点欧式距离的函数 """ def dist(p1,p2): return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** (0.5) all_points = [] index = 1000 #use python build-in library to load the json file flickr_data = json.load(file("Paris_points.json")) for i in range(index): Coord = [flickr_data['latitudes'][i],flickr_data['longitudes'][i]] all_points.append(Coord) """ 设置E和minPts的值 """ E = 0.001 minPts = 7 """ 随机产生100个直角坐标,测试用,测试时用E = 8, minPts = 8 """ #all_points = [] # for i in range(100): # randCoord = [random.randint(1,50),random.randint(1,50)] # if not randCoord in all_points: # all_points.append(randCoord) """ 找出核心点 """ other_points = [] core_points = [] plotted_points = [] for point in all_points: point.append(0) #assign initial level 0,即定义核心点的类型,每个核心点作为一个中心 total = 0 for otherPoint in all_points: distance = dist(otherPoint,point) if distance <= E: total += 1 if total > minPts: core_points.append(point) plotted_points.append(point) else: other_points.append(point) """ 找到边界点 """ border_points = [] for core in core_points: for other in other_points: if dist(core,other) <= E: border_points.append(other) plotted_points.append(other) """ 完成分类的算法,给核心点都贴上标签 """ cluster_label = 0 for point in core_points: if point[2] == 0: cluster_label += 1 point[2] = cluster_label for point2 in plotted_points: distance = dist(point2, point) if point2[2] == 0 and distance <= E: #print point,point2 point2[2] = point[2] """ 当所有的点都分配到相应的标签后,我们把同一簇的划分到一起 """ cluster_list = defaultdict(lambda:[[],[]]) for point in plotted_points: cluster_list[point[2]][0].append(point[0]) cluster_list[point[2]][1].append(point[1]) markers = ['+','*','.','d','^','v','>','<','p'] #markers = ['b.','g.','r.','c.','m.','y.','k.'] """ 画出所有点的图 """ figure(1) allx = [] ally = [] for plot_point in all_points: allx.append(plot_point[0]) ally.append(plot_point[1]) plot(allx, ally,"r.") title("total points=" + str(len(all_points)) + " E =" + str(E) + " Min Points=" + str(minPts)) """ 画出核心点的图 """ figure(2) i = 0 print cluster_list for value in cluster_list: cluster = cluster_list[value] plot(cluster[0],cluster[1],markers[i]) i = i % 8 + 1 #i = i % 6 + 1 title(str(len(cluster_list)) + " clusters created with E = "+ str(E) + " Min Points=" + str(minPts)) """ 画出噪音点的图 """ figure(3) noise_points = [] for point in all_points: if not point in core_points and not point in border_points: noise_points.append(point) noisex = [] noisey = [] for point in noise_points: noisex.append(point[0]) noisey.append(point[1]) plot(noisex,noisey,"x") title("noise Points = "+ str(len(noise_points)) + " E ="+str(E)+" Min Points="+str(minPts)) #axis((0,60,0,60)) show()