zoukankan      html  css  js  c++  java
  • 1120地点实体与事件实体导入

    地点实体

    预处理

    对于每个诗人的地点进行处理,只要我们能够识别出经纬度的古今地点,保存已下信息到地点的属性中

    代码

    import pandas as pd
    import numpy as np
    import re
    from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
    
    # 创建节点
    def CreateNode(m_graph,m_label,m_attrs):
        #根绝节点name属性,查找节点
        m_n="_.name="+"\'"+m_attrs['name']+"\'"
        matcher = NodeMatcher(m_graph)
        re_value = matcher.match(m_label).where(m_n).first()
        #print(re_value)
        if re_value is None:
            m_mode = Node(m_label,**m_attrs)
            n = graph.create(m_mode)
            return n
        return None
    # 查询节点
    def MatchNode(m_graph,m_label,m_attrs):
        m_n="_.name="+"\'"+m_attrs['name']+"\'"
        matcher = NodeMatcher(m_graph)
        re_value = matcher.match(m_label).where(m_n).first()
        return re_value
    # 创建关系
    def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
        reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        m_r = Relationship(reValue1,m_r_name,reValue2)
        n = graph.create(m_r)
        return n
    
    #查找关系
    def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        m_r = Relationship(reValue1, m_r_name['name'], reValue2)
        return m_r
    
    def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        print(m_r_name)
        propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
        m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
        graph.merge(m_r)
    
    #修改节点属性
    def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        if reValue1 is None:
            return False
        reValue1.update(new_attrs)
        graph.push(reValue1)
    
    
    
    graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
    
    #获取指定文件夹下的excel
    import os
    def get_filename(path,filetype):  # 输入路径、文件类型例如'.xlsx'
        name = []
        for root,dirs,files in os.walk(path):
            for i in files:
                if os.path.splitext(i)[1]==filetype:
                    name.append(i)
        return name            # 输出由有后缀的文件名组成的列表
    
    def read_real_where_name():
        file='data2/gu_jin_lng_lat.xlsx'
        data=pd.read_excel(file)
        gu_name=list(data.gu_name)
        return gu_name
    
    def read_gu_dict():
        file = 'data2/gu_jin_lng_lat.xlsx'
        data = pd.read_excel(file)
        gu_name = list(data.gu_name)
        jin_name=list(data.jin_name)
        lng=list(data.lng)
        lat=list(data.lat)
        gu_dict={}
        for i in range(len(gu_name)):
            gu=gu_name[i]
            gu_dict[gu]={"jin_name":jin_name[i],"lng":lng[i],"lat":lat[i]}
        return gu_dict
    
    def read_where(author,file,gu_name):
        data=pd.read_excel(file)
        wheres=data.wheres
        real_where=[]
        for i in range(len(wheres)):
            where_name=wheres[i]
            where_list=where_name.split(',')
            for it in where_list:
                if it in gu_name and it!='':
                    real_where.append(it)
        real_where=list(set(real_where))
        if len(real_where)!=0:
            for it in real_where:
                jin=gu_dict[it]['jin_name']
                lat=gu_dict[it]['lat']
                lng=gu_dict[it]['lng']
                attr1 = {"name":it,"jin_name": jin,"lng":lng,"lat":lat}
                CreateNode(graph, where_name_label, attr1)
                print("创建地点:" + it + "成功!!")
    
                attr2 = {"name": author}
                # 创建关系
                m_r_name1 = "轨迹"
                reValue1 = CreateRelationship(graph, author_label, attr2, where_name_label, attr1, m_r_name1)
                print("创建关系:" + author + "-轨迹-" + it + "成功")
    
    
    
    if __name__ == '__main__':
        file = 'author/'
        lists = get_filename(file, '.xlsx')
        gu_name = read_real_where_name()
        gu_dict = read_gu_dict()
        author_label='author'
        where_name_label='where_name'
        for it in lists:
            newfile = file + it
            print(newfile)
            author = it.split('.')[0]
            read_where(author,newfile, gu_name)

    展示效果

     事件实体

    属性

    事件属性包括:name为事件全名,时间,地点

    其中对于地点要进行处理,只要包含我们有经纬度的真实可标记的地点

    代码

    import pandas as pd
    import numpy as np
    import re
    from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
    
    # 创建节点
    def CreateNode(m_graph,m_label,m_attrs):
        #根绝节点name属性,查找节点
        m_n="_.name="+"\'"+m_attrs['name']+"\'"
        matcher = NodeMatcher(m_graph)
        re_value = matcher.match(m_label).where(m_n).first()
        #print(re_value)
        if re_value is None:
            m_mode = Node(m_label,**m_attrs)
            n = graph.create(m_mode)
            return n
        return None
    # 查询节点
    def MatchNode(m_graph,m_label,m_attrs):
        m_n="_.name="+"\'"+m_attrs['name']+"\'"
        matcher = NodeMatcher(m_graph)
        re_value = matcher.match(m_label).where(m_n).first()
        return re_value
    # 创建关系
    def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
        reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        m_r = Relationship(reValue1,m_r_name,reValue2)
        n = graph.create(m_r)
        return n
    
    #查找关系
    def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        m_r = Relationship(reValue1, m_r_name['name'], reValue2)
        return m_r
    
    def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
        if reValue1 is None or reValue2 is None:
            return False
        print(m_r_name)
        propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
        m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
        graph.merge(m_r)
    
    #修改节点属性
    def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
        reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
        if reValue1 is None:
            return False
        reValue1.update(new_attrs)
        graph.push(reValue1)
    
    
    
    graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
    
    #获取指定文件夹下的excel
    import os
    def get_filename(path,filetype):  # 输入路径、文件类型例如'.xlsx'
        name = []
        for root,dirs,files in os.walk(path):
            for i in files:
                if os.path.splitext(i)[1]==filetype:
                    name.append(i)
        return name            # 输出由有后缀的文件名组成的列表
    
    def read_real_where_name():
        file='data2/gu_jin_lng_lat.xlsx'
        data=pd.read_excel(file)
        gu_name=list(data.gu_name)
        return gu_name
    
    def read_where(author,file,gu_name):
        data=pd.read_excel(file)
        date=list(data.data)
        where_name=list(data.wheres)
        things=list(data.things)
        for i in range(len(date)):
            #处理地区,满足我们需要的地区条件
            where_list=where_name[i].split(',')
            for it in where_list:
                if it in gu_name and it!='':
                    attr1={"name":things[i],"date":date[i],"where_name":where_name[i]}
                    CreateNode(graph, things_label, attr1)
                    print("创建事件:" + things[i] + "-成功!!")
    
                    attr2 = {"name": author}
                    # 创建关系
                    m_r_name1 = "事迹"
                    reValue1 = CreateRelationship(graph, author_label, attr2, things_label, attr1, m_r_name1)
                    print("创建关系:" + author + "-事迹-" + things[i] + "-成功")
                    break
    
    
    
    if __name__ == '__main__':
        file = 'author/'
        lists = get_filename(file, '.xlsx')
        gu_name = read_real_where_name()
        author_label='author'
        things_label='things'
        for it in lists:
            newfile = file + it
            print(newfile)
            author = it.split('.')[0]
            print(author)
            read_where(author,newfile,gu_name)

    展示效果

     

  • 相关阅读:
    Hadoop源代码分析(五)
    使用Solr Data Import的deltaimport功能
    hbasewriter
    Hadoop源代码分析(四)
    lucene .NET 搜索图片 功能实现
    char类型与string类型的区别
    Windows程序设计:'SM_ MOUSEWHEELPRESENT' : undeclared identifier解决办法
    汇编里的栈空间
    在汇编源程序中,数据不能以字母开头
    中值
  • 原文地址:https://www.cnblogs.com/xiaofengzai/p/15582741.html
Copyright © 2011-2022 走看看