# -*- coding: utf-8 -*- import networkx as nx import matplotlib.pyplot as plt iplist={} goodiplist={} #相似度 N=0.5 #黑客团伙IP最少个数 M=3 #黑客IP攻击目标最小个数 R=2 #jarccard系数 def get_len(d1,d2): ds1=set() for d in d1.keys(): ds1.add(d) ds2=set() for d in d2.keys(): ds2.add(d) return len(ds1&ds2)/len(ds1|ds2) filename="../data/etl-ip-domain-train.txt" G=nx.Graph() with open(filename) as f: for line in f: (ip,domain)=line.split(" ") if not ip=="0.0.0.0": if not iplist.has_key(ip): iplist[ip]={} iplist[ip][domain]=1 for ip in iplist.keys(): if len(iplist[ip]) >= R: goodiplist[ip]=1 for ip1 in iplist.keys(): for ip2 in iplist.keys(): if not ip1 == ip2 : weight=get_len(iplist[ip1],iplist[ip2]) if (weight >= N) and (ip1 in goodiplist.keys()) and (ip2 in goodiplist.keys()): #点不存在会自动添加 G.add_edge(ip1,ip2,weight=weight) n_sub_graphs=nx.number_connected_components(G) sub_graphs=nx.connected_component_subgraphs(G) for i,sub_graph in enumerate(sub_graphs): n_nodes=len(sub_graph.nodes()) if n_nodes >= M: print("Subgraph {0} has {1} nodes {2}".format(i,n_nodes,sub_graph.nodes())) nx.draw(G) plt.show()