zoukankan      html  css  js  c++  java
  • 获取釜山行人物关系

    # -*- coding: utf-8 -*-
    import os
    import sys
    import jieba
    import codecs
    import math
    import jieba.posseg as pseg
    
    names = {}           
    relationships = {}   
    lineNames = []      
    jieba.load_userdict("dict.txt")      
    with codecs.open("busan.txt", "r", "utf8") as f:
        for line in f.readlines():
            poss = pseg.cut(line)      
            lineNames.append([])       
            for w in poss:
                if w.flag != "nr" or len(w.word) < 2:
                    continue         
                lineNames[-1].append(w.word)       
                if names.get(w.word) is None:
                    names[w.word] = 0
                    relationships[w.word] = {}
                names[w.word] += 1                   
    for name, times in names.items():
        print(name, times)
    
    for line in lineNames:                  
        for name1 in line:                    
            for name2 in line:                
                if name1 == name2:
                    continue
                if relationships[name1].get(name2) is None:      
                    relationships[name1][name2] = 1
                else:
                    relationships[name1][name2] = relationships[name1][name2] + 1       
    
    with codecs.open("busan_node.txt", "w", "gbk") as f:
        f.write("Id Label Weight
    ")
        for name, times in names.items():
            f.write(name + " " + name + " " + str(times) + "
    ")
    
    with codecs.open("busan_edge.txt", "w", "gbk") as f:
        f.write("Source Target Weight
    ")
        for name, edges in relationships.items():
            for v, w in edges.items():
                if w > 3:
                    f.write(name + " " + v + " " + str(w) + "
    ")
  • 相关阅读:
    python——(os, shutil)
    python-(subprocess, commands)
    PHP设计模式二:单例模式
    PHP设计模式一:工厂方法设计模式
    PHP垃圾回收机制
    PHP异常处理机制
    超文本传送协议HTTP
    IP地址
    Linux系统网络基本配置
    Linux系统LVM基本使用
  • 原文地址:https://www.cnblogs.com/jestin/p/12911477.html
Copyright © 2011-2022 走看看