一.Neo4j简介
1.数据构成
Neo4j使用图相关的概念来描述数据模型,把数据保存为图中的节点以及节点之间的关系。数据主要由三部分构成:
- 节点。节点表示对象实例,每个节点有唯一的ID区别其它节点,节点带有属性;
- 关系。就是图里面的边,连接两个节点,另外这里的关系是有向的并带有属性;
- 属性。key-value对,存在于节点和关系中,如图1所示。
2.索引
Neo4j使用遍历操作进行查询。为了加速查询,Neo4j会建立索引,并根据索引找到遍历用的起始节点
3.特点
查询的高性能
利用图结构进行查询,因此效率较高
设计的灵活性
开发的敏捷性
二.导入数据
#账号密码设置 class MedicalGraph: def __init__(self): ... self.graph = Graph("http://localhost:7474", username="neo4j", password="自己的") #获取数据路径 cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1]) self.data_path = os.path.join(cur_dir, 'DATA/disease.csv') #读取文件 def read_file(self): """ 读取文件,获得实体,实体关系 :return: """ # cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom", # "complication", "treatment", "drug", "period", "rate", "money"] # 实体 diseases = [] # 疾病 aliases = [] # 别名 symptoms = [] # 症状 parts = [] # 部位 departments = [] # 科室 complications = [] # 并发症 drugs = [] # 药品 # 疾病的属性:age, infection, insurance, checklist, treatment, period, rate, money diseases_infos = [] # 关系 disease_to_symptom = [] # 疾病与症状关系 disease_to_alias = [] # 疾病与别名关系 diseases_to_part = [] # 疾病与部位关系 disease_to_department = [] # 疾病与科室关系 disease_to_complication = [] # 疾病与并发症关系 disease_to_drug = [] # 疾病与药品关系 all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values for data in all_data: disease_dict = {} # 疾病信息 # 疾病 disease = str(data[0]).replace("...", " ").strip() disease_dict["name"] = disease # 别名 line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知" for alias in line.strip().split(): aliases.append(alias) disease_to_alias.append([disease, alias]) # 部位 part_list = str(data[2]).strip().split() if str(data[2]) else "未知" for part in part_list: parts.append(part) diseases_to_part.append([disease, part]) # 年龄 age = str(data[3]).strip() disease_dict["age"] = age # 传染性 infect = str(data[4]).strip() disease_dict["infection"] = infect # 医保 insurance = str(data[5]).strip() disease_dict["insurance"] = insurance # 科室 department_list = str(data[6]).strip().split() for department in department_list: departments.append(department) disease_to_department.append([disease, department]) # 检查项 check = str(data[7]).strip() disease_dict["checklist"] = check # 症状 symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1] for symptom in symptom_list: symptoms.append(symptom) disease_to_symptom.append([disease, symptom]) # 并发症 complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知" for complication in complication_list: complications.append(complication) disease_to_complication.append([disease, complication]) # 治疗方法 treat = str(data[10]).strip()[:-4] disease_dict["treatment"] = treat # 药品 drug_string = str(data[11]).replace("...", " ").strip() for drug in drug_string.split()[:-1]: drugs.append(drug) disease_to_drug.append([disease, drug]) # 治愈周期 period = str(data[12]).strip() disease_dict["period"] = period # 治愈率 rate = str(data[13]).strip() disease_dict["rate"] = rate # 费用 money = str(data[14]).strip() if str(data[14]) else "未知" disease_dict["money"] = money diseases_infos.append(disease_dict) return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, disease_to_complication, disease_to_drug, diseases_infos