python机器学习-乳腺癌细胞挖掘(博主亲自录制视频)
https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share
脚本名称
censor_relation_xwgj2_functions.py
需要导入中药表excel
# -*- coding: utf-8 -*- """ Created on Thu Aug 25 10:06:52 2016 审核性味归经与功能的脚本(超过四元素) @author: toby qq:231469242 """ import xlrd,csv import chi_square,list_xwgj_function_multiElements #多元素组合 thelist_xwgj_function=list_xwgj_function_multiElements.list_xwgj2_function #数据处理,把['寒,苦,肝,胃', '清热']转换为[['寒','苦','肝','胃'], '清热'] thelist_xwgj_function=[[i[0].split(","),i[1]] for i in thelist_xwgj_function] fileName="性味归经_功能_关系_多元素.csv" excelFilename="中药表.xlsx" sheetName="Sheet1" #打开excel数据 excelFile=xlrd.open_workbook(excelFilename) sheet=excelFile.sheet_by_name(sheetName) #表单行与列 number_rows=sheet.nrows number_columns=sheet.ncols #表单内数据 list_sheetData=[] # list_xingWeiFunction_relation=[] #获取表格数据 def Get_sheetData(): for row in range(1,number_rows): list_sheetData.append(sheet.row_values(row)) return list_sheetData list_sheetData=Get_sheetData() #,卡方统计的a是否成立,实验组True,对照组True #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热' #i为['苦,寒归胃经 ', '清热解毒,消痈,下乳,舒筋通脉。'] def True_a(keyWord1,keyWord2,i): #key为['寒', '苦', '肝', '胃']内元素 for key in keyWord1: if key not in i[0]: return False if keyWord2 not in i[1]: return False return True #,卡方统计的b是否成立,实验组True,对照组False #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热' #i为['苦,寒归胃经 ', '清热解毒,消痈,下乳,舒筋通脉。'] def True_b(keyWord1,keyWord2,i): #key为['寒', '苦', '肝', '胃']内元素 for key in keyWord1: if key not in i[0]: return False if keyWord2 in i[1]: return False return True #,卡方统计的c是否成立,实验组False,对照组True #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热' #i为['苦,寒归胃经 ', '清热解毒,消痈,下乳,舒筋通脉。'] def True_c(keyWord1,keyWord2,i): #只要count不等于len(keyWord1),就表示keyWord1至少有一个元素不符合 len_keyWord1=len(keyWord1) count=0 #key为['寒', '苦', '肝', '胃']内元素 for key in keyWord1: if key in i[0]: count+=1 if len_keyWord1==count: return False if keyWord2 not in i[1]: return False return True #,卡方统计的d是否成立,实验组False,对照组False #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热' #i为['苦,寒归胃经 ', '清热解毒,消痈,下乳,舒筋通脉。'] def True_d(keyWord1,keyWord2,i): #只要count不等于len(keyWord1),就表示keyWord1至少有一个元素不符合 len_keyWord1=len(keyWord1) count=0 #key为['寒', '苦', '肝', '胃']内元素 for key in keyWord1: if key in i[0]: count+=1 if len_keyWord1==count: return False if keyWord2 in i[1]: return False return True #卡方统计的keyWord1与keyWord2的关系 #三元素和多元素的脚本不一样,此函数需要修改 def Get_single_chiSquare(keyWord1,keyWord2): #记录包含卡方值a的uid号,方便逆向检测 list_a=[] a=0 b=0 c=0 d=0 #统计包含a数量 for k in range(len(list_sheetData)): i=list_sheetData[k] if True_a(keyWord1,keyWord2,i): a+=1 list_a.append(k+2) #统计包含b数量 for i in list_sheetData: if True_b(keyWord1,keyWord2,i): b+=1 #统计包含c数量 for i in list_sheetData: if True_c(keyWord1,keyWord2,i): c+=1 #统计包含d数量 for i in list_sheetData: if True_d(keyWord1,keyWord2,i): d+=1 #print("a:",a) #print("b:",b) #print("c:",c) #print("d:",d) k2=chi_square.value_independence(a,b,c,d) relation=chi_square.judge_independence(a,b,c,d) lis1=[[a,b,c,d],k2,relation,list_a] return lis1 #计算所有性味的相关性 def Get_all_chiSquare(thelist_xwgj_function): for i in thelist_xwgj_function: try: keyWord1,keyWord2=i[0],i[1] relation=Get_single_chiSquare(keyWord1,keyWord2) list1=[i,relation,relation[2]] list_xingWeiFunction_relation.append(list1) except: print("wrong at:",i) continue return list_xingWeiFunction_relation def Write_table_to_csv(list1,fileName): #对列表格式修改,字符串写入的格式不对 file=open(fileName,'w',newline='') writer1=csv.writer(file) writer1.writerows(list1) file.close() list_xingWeiFunction_relation=Get_all_chiSquare(thelist_xwgj_function) Write_table_to_csv(list_xingWeiFunction_relation,fileName) ''' #测试数据 i=['苦,寒,胃经 ', '清热,解毒,消痈,下乳,舒筋通脉。'] b=thelist_xwgj_function[0] keyWord1,keyWord2=b[0],b[1] '''
chi_square脚本
#coding=utf-8 #独立性检验test for independence,也是卡方检验chi_square #前提条件:a,b,c,d 必须大于5 #2.706是判断标准(90概率),值越大,越有关,值越小,越无关 def value_independence(a,b,c,d): if a>=5 and b>=5 and c>=5 and d>=5: return ((a+b+c+d)*(a*d-b*c)**2)/float((a+b)*(c+d)*(a+c)*(b+d)) #返回True表示有关 #返回False表示无关 #2.706表示有90%概率 def judge_independence(a,b,c,d): num_independence=value_independence(a,b,c,d) print("chi_square:",num_independence) if num_independence==None: print("not suit for chi_square statistics") return "wrong" if num_independence>2.706: print ("there is relationship") return True if num_independence<=2.706: print("there is no relationship") return False
list_xwgj_function_multiElements 脚本
# -*- coding: utf-8 -*- """ Created on Tue Aug 23 11:38:35 2016 生成多个因素的性味归经组合,四位,五位 算法:卡方 @author: Administrator """ import xlrd,csv #性味归经的元数据 list_xing=["寒","凉","温","热"] list_wei=["酸","苦","甘","辛","咸"] list_guiJing=["胃","肝","胆","心","肺","肾","脾","膀胱","大肠","小肠"] #四元素以上的性味归经功能吧 excelFilename="性味归经清理3.xlsx" excelFilename1="性味归经清理4.xlsx" sheetName="Sheet1" #打开excel数据 excelFile=xlrd.open_workbook(excelFilename) sheet=excelFile.sheet_by_name(sheetName) #表单行与列 number_rows=sheet.nrows number_columns=sheet.ncols #表格性味归经 list_xwg2j=sheet.col_values(0)[1:] #功能 list_function=sheet.col_values(1)[1:] list_function1=[i for i in list_function if i!=""] #四元素以上性味归经与功能组合 list_xwgj2_function=[] #生成两个归经的组合 def Get_list_xwg2j(): for i in list_xwg2j: for k in list_guiJing: if k not in i: element=i+","+k list_xwg2j.append(element) #print ("i:",i) return list_xwg2j #添加一些元素 def add_some_elements(list_xwg2j): list_xwg2j.append('甘,温,肝,肾') list_xwg2j.append('甘,温,脾,胃') list_xwg2j.append('苦,寒,肝,胃,大肠') list_xwg2j.append('辛,温,脾,胃,肺') list_xwg2j.append('苦,寒,大肠,肺') list_xwg2j.append('苦,寒,心,肝') return list_xwg2j #写入csv前准备,添加序列号 def add_uid(list_xwgj_clean): list1=[] for i in range(len(list_xwgj_clean)): value=list_xwgj_clean[i] item=[i,value] list1.append(item) return list1 def Write_table_to_csv(list1,fileName): #对列表格式修改,字符串写入的格式不对 file=open(fileName,'w',newline='') writer1=csv.writer(file) writer1.writerows(list1) file.close() #性味归经大于四的多元素组合 def Get_list_xwgj2_function(list1): for i in list1: for k in list_function1: list_xwgj2_function.append([i,k]) return list_xwgj2_function #list_xwg2j=Get_list_xwg2j() #list_xwg2j_add=add_some_elements(list_xwg2j) #写入csv前准备,添加序列号 #list_write=add_uid(list_xwg2j_add) #Write_table_to_csv(list_write,"性味归经清理3.csv") list_xwgj2_function=Get_list_xwgj2_function(list_xwg2j)
list_xwgj_function_multiElements 脚本 需要性味归经清理3excel
结果
计算出细化的性味归经组合,从三万多组合中,得到472种组合
https://study.163.com/provider/400000000398149/index.htm?share=2&shareId=400000000398149( 欢迎关注博主主页,学习python视频资源,还有大量免费python经典文章)