# -*- coding: utf-8 -*- import os import sys import jieba import codecs import math import jieba.posseg as pseg names = {} relationships = {} lineNames = [] jieba.load_userdict("dict.txt") with codecs.open("busan.txt", "r", "utf8") as f: for line in f.readlines(): poss = pseg.cut(line) lineNames.append([]) for w in poss: if w.flag != "nr" or len(w.word) < 2: continue lineNames[-1].append(w.word) if names.get(w.word) is None: names[w.word] = 0 relationships[w.word] = {} names[w.word] += 1 for name, times in names.items(): print(name, times) for line in lineNames: for name1 in line: for name2 in line: if name1 == name2: continue if relationships[name1].get(name2) is None: relationships[name1][name2] = 1 else: relationships[name1][name2] = relationships[name1][name2] + 1 with codecs.open("busan_node.txt", "w", "gbk") as f: f.write("Id Label Weight ") for name, times in names.items(): f.write(name + " " + name + " " + str(times) + " ") with codecs.open("busan_edge.txt", "w", "gbk") as f: f.write("Source Target Weight ") for name, edges in relationships.items(): for v, w in edges.items(): if w > 3: f.write(name + " " + v + " " + str(w) + " ")