zoukankan      html  css  js  c++  java
  • 【知识图谱】知识图谱的构建-python-neo4j

    环境依赖

    jdk、neo4j图数据库
    neo4j具体的安装过程可以参考这里:https://cloud.tencent.com/developer/article/1387732

    json数据

    {
    	"_id": {
    		"$oid": "5bb578b6831b973a137e3ee6"
    	},
    	"name": "肺泡蛋白质沉积症",
    	"desc": "肺泡蛋白质沉积症(简称PAP),又称Rosen-Castle-man-Liebow综合征,是一种罕见疾病。该病以肺泡和细支气管腔内充满PAS染色阳性,来自肺的富磷脂蛋白质物质为其特征,好发于青中年,男性发病约3倍于女性。",
    	"category": ["疾病百科", "内科", "呼吸内科"],
    	"prevent": "1、避免感染分支杆菌病,卡氏肺囊肿肺炎,巨细胞病毒等。
    2、注意锻炼身体,提高免疫力。",
    	"cause": "病因未明,推测与几方面因素有关:如大量粉尘吸入(铝,二氧化硅等),机体免疫功能下降(尤其婴幼儿),遗传因素,酗酒,微生物感染等,而对于感染,有时很难确认是原发致病因素还是继发于肺泡蛋白沉着症,例如巨细胞病毒,卡氏肺孢子虫,组织胞浆菌感染等均发现有肺泡内高蛋白沉着。
    虽然启动因素尚不明确,但基本上同意发病过程为脂质代谢障碍所致,即由于机体内,外因素作用引起肺泡表面活性物质的代谢异常,到目前为止,研究较多的有肺泡巨噬细胞活力,动物实验证明巨噬细胞吞噬粉尘后其活力明显下降,而病员灌洗液中的巨噬细胞内颗粒可使正常细胞活力下降,经支气管肺泡灌洗治疗后,其肺泡巨噬细胞活力可上升,而研究未发现Ⅱ型细胞生成蛋白增加,全身脂代谢也无异常,因此目前一般认为本病与清除能力下降有关。",
    	"symptom": ["紫绀", "胸痛", "呼吸困难", "乏力", "毓卓"],
    	"yibao_status": "否",
    	"get_prob": "0.00002%",
    	"get_way": "无传染性",
    	"acompany": ["多重肺部感染"],
    	"cure_department": ["内科", "呼吸内科"],
    	"cure_way": ["支气管肺泡灌洗"],
    	"cure_lasttime": "约3个月",
    	"cured_prob": "约40%",
    	"cost_money": "根据不同医院,收费标准不一致,省市三甲医院约( 8000——15000 元)",
    	"check": ["胸部CT检查", "肺活检", "支气管镜检查"],
    	"recommand_drug": [],
    	"drug_detail": []
    } ......
    

    实例

    import os
    import json
    from py2neo import Graph, Node
    
    class MedicalGraph:
        def __init__(self):
            cur_dir = '\'.join(os.path.abspath(__file__).split('\')[:-1])
            self.data_path = os.path.join(cur_dir, 'data\medical2.json')
            self.g = Graph("http://localhost:7474", username="neo4j", password="rhino1qaz@wsx")
    
        def read_nodes(self):
            diseases = []  # 疾病
            drugs = []  # 药品
            departments = []  # 科室
    
            disease_infos = []
    
            rels_disease_drug = [] #疾病和药品之间的关系
            rels_disease_department = [] #疾病和科室之间的关系
            rels_department_department = [] #科室和科室之间的关系
    
            count = 0
            for data in open(self.data_path):
                disease_dict = {}
                count += 1
                print(count)
                # 读取每一行数据
                data_json = json.loads(data)
                print(data_json)
                disease = data_json['name']
                disease_dict['name'] = disease  # 疾病名
                diseases.append(disease)
                if 'cure_department' in data_json:
                    cure_department = data_json['cure_department']
                    if len(cure_department) == 1:
                        rels_disease_department.append([disease, cure_department[0]])
                    if len(cure_department) == 2:
                        big = cure_department[0]
                        small = cure_department[1]
                        rels_department_department.append([small, big])
                        rels_disease_department.append([disease, small])
                    disease_dict['cure_department'] = cure_department
                    departments += cure_department
                if 'recommand_drug' in data_json:
                    recommand_drug = data_json['recommand_drug']
                    drugs += recommand_drug
                    for drug in recommand_drug:
                        rels_disease_drug.append([disease, drug])
                    disease_dict['recommand_drug'] = recommand_drug
                disease_infos.append(disease_dict)
            return set(diseases), set(drugs), set(departments), disease_infos, 
                   rels_disease_drug, rels_disease_department, rels_department_department
    
        def create_node(self, label, nodes):
            count = 0
            for node_name in nodes:
                node = Node(label, name=node_name)
                self.g.create(node)
                count += 1
                print(count, len(nodes))
            return
    
        '''创建知识图谱中心疾病的节点'''
        def create_diseases_nodes(self, disease_infos):
            count = 0
            for disease_dict in disease_infos:
                node = Node("Disease", name=disease_dict['name'], recommand_drug=disease_dict['recommand_drug'],
                            cure_department=disease_dict['cure_department'])
                self.g.create(node)
                count += 1
                print(count)
            return
    
        '''创建知识图谱实体节点类型schema'''
        def create_graphnodes(self):
            diseases, Drugs, Departments, disease_infos, 
            rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
            self.create_diseases_nodes(disease_infos)
            self.create_node('Drug', Drugs)
            print(len(Drugs))
            self.create_node('Department', Departments)
            print(len(Departments))
            return
    
        '''创建实体关系边'''
        def create_graphrels(self):
            diseases, Drugs, Departments, disease_infos, 
            rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
            self.create_relationship('Disease', 'Drug', rels_disease_drug, 'recommand_eat', '宜吃')
            self.create_relationship('Disease', 'Department', rels_disease_department, 'belongs_to', '所属科室')
            self.create_relationship('Department', 'Department', rels_department_department, 'belongs_to', '属于')
    
    
        def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
            count = 0
            # 去重处理
            set_edges = []
            for edge in edges:
                set_edges.append('###'.join(edge))
            all = len(set(set_edges))
            for edge in set(set_edges):
                edge = edge.split('###')
                p = edge[0]
                q = edge[1]
                query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                    start_node, end_node, p, q, rel_type, rel_name)
                try:
                    self.g.run(query)
                    count += 1
                    print(rel_type, count, all)
                except Exception as e:
                    print(e)
            return
    
        '''导出数据'''
        def export_data(self):
            diseases, Drugs, Departments, disease_infos, 
            rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
            f_disease = open('disease.txt', 'w+')
            f_drug = open('drug.txt', 'w+')
            f_department = open('department.txt', 'w+')
            f_disease.write('
    '.join(list(diseases)))
            f_drug.write('
    '.join(list(Drugs)))
            f_department.write('
    '.join(list(Departments)))
            f_disease.close()
            f_drug.close()
            f_department.close()
            return
    
    if __name__ == '__main__':
        medicalGraph = MedicalGraph()
        medicalGraph.create_graphnodes()
        medicalGraph.create_graphrels()
        medicalGraph.export_data()
    

    无非就是连接图数据库,然后创建节点、创建关系,当做模板来看就行了,最后结果:
    image

  • 相关阅读:
    看到一篇好文章 和大家分享《别让灵魂赶不上自己的脚步!》
    第二次结对编程作业
    css的用法
    MicrosoftWord2013基本用法
    使用winshark分析三次握手,四次挥手
    取石子(博弈游戏)
    java.lang.NullPointerException的可能原因及处理
    Git的用法
    C++ 各种基本类型间的转换
    Servlet技术(使用myeclipse)
  • 原文地址:https://www.cnblogs.com/xiximayou/p/14594798.html
Copyright © 2011-2022 走看看