zoukankan      html  css  js  c++  java
  • 统计中药性味归经与功能的脚本(超过四元素)

     python机器学习-乳腺癌细胞挖掘(博主亲自录制视频)

    https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

     

    脚本名称

    censor_relation_xwgj2_functions.py

    需要导入中药表excel

    # -*- coding: utf-8 -*-
    """
    Created on Thu Aug 25 10:06:52 2016
    
    审核性味归经与功能的脚本(超过四元素)
    
    @author: toby qq:231469242
    """
    
    import xlrd,csv
    import chi_square,list_xwgj_function_multiElements
    
    #多元素组合
    thelist_xwgj_function=list_xwgj_function_multiElements.list_xwgj2_function
    #数据处理,把['寒,苦,肝,胃', '清热']转换为[['寒','苦','肝','胃'], '清热']
    thelist_xwgj_function=[[i[0].split(","),i[1]] for i in thelist_xwgj_function]
    
    
    fileName="性味归经_功能_关系_多元素.csv"
    excelFilename="中药表.xlsx"
    sheetName="Sheet1"
    #打开excel数据
    excelFile=xlrd.open_workbook(excelFilename)
      
    sheet=excelFile.sheet_by_name(sheetName)
    
    #表单行与列
    number_rows=sheet.nrows
    number_columns=sheet.ncols
    #表单内数据
    list_sheetData=[]  
    #
    list_xingWeiFunction_relation=[]
    
    
    #获取表格数据
    def Get_sheetData():
        for row in range(1,number_rows):
            list_sheetData.append(sheet.row_values(row))
        return list_sheetData
    
    
    list_sheetData=Get_sheetData()
    
    #,卡方统计的a是否成立,实验组True,对照组True
    #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热'
    #i为['苦,寒归胃经
    	', '清热解毒,消痈,下乳,舒筋通脉。']
    def True_a(keyWord1,keyWord2,i):
        
        #key为['寒', '苦', '肝', '胃']内元素
        for key in keyWord1:
            if key not in i[0]:
                return False
        if keyWord2 not in i[1]:
            return False
        
        return True
        
        
    #,卡方统计的b是否成立,实验组True,对照组False
    #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热'
    #i为['苦,寒归胃经
    	', '清热解毒,消痈,下乳,舒筋通脉。']
    def True_b(keyWord1,keyWord2,i):
        
        #key为['寒', '苦', '肝', '胃']内元素
        for key in keyWord1:
            if key not in i[0]:
                return False
        if keyWord2 in i[1]:
            return False
        return True    
    
    
    #,卡方统计的c是否成立,实验组False,对照组True
    #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热'
    #i为['苦,寒归胃经
    	', '清热解毒,消痈,下乳,舒筋通脉。']
    def True_c(keyWord1,keyWord2,i):
        #只要count不等于len(keyWord1),就表示keyWord1至少有一个元素不符合
        len_keyWord1=len(keyWord1)
        count=0
        
        #key为['寒', '苦', '肝', '胃']内元素
        for key in keyWord1:
            if key in i[0]:
                count+=1
        if len_keyWord1==count:
            return False
        
        if keyWord2 not in i[1]:
            return False
        return True    
        
        
    #,卡方统计的d是否成立,实验组False,对照组False
    #例如keyWord1=['寒', '苦', '肝', '胃'],keyWord2='清热'
    #i为['苦,寒归胃经
    	', '清热解毒,消痈,下乳,舒筋通脉。']
    def True_d(keyWord1,keyWord2,i):
        #只要count不等于len(keyWord1),就表示keyWord1至少有一个元素不符合
        len_keyWord1=len(keyWord1)
        count=0
        
        #key为['寒', '苦', '肝', '胃']内元素
        for key in keyWord1:
            if key in i[0]:
                count+=1
        if len_keyWord1==count:
            return False
        
        if keyWord2 in i[1]:
            return False
        return True    
                
    #卡方统计的keyWord1与keyWord2的关系
    #三元素和多元素的脚本不一样,此函数需要修改
    def Get_single_chiSquare(keyWord1,keyWord2):
        #记录包含卡方值a的uid号,方便逆向检测
        list_a=[]
        a=0
        b=0
        c=0
        d=0
        #统计包含a数量
        for k in range(len(list_sheetData)):
            i=list_sheetData[k]
            if True_a(keyWord1,keyWord2,i):
                a+=1
                list_a.append(k+2)
        
        #统计包含b数量
        for i in list_sheetData:
            if True_b(keyWord1,keyWord2,i):
                b+=1    
    
    
        #统计包含c数量
        for i in list_sheetData:
            if True_c(keyWord1,keyWord2,i):
                c+=1
    
        #统计包含d数量
        for i in list_sheetData:
            if True_d(keyWord1,keyWord2,i):
                d+=1
        #print("a:",a)
        #print("b:",b)
        #print("c:",c)
        #print("d:",d)
        k2=chi_square.value_independence(a,b,c,d)
        relation=chi_square.judge_independence(a,b,c,d)
        lis1=[[a,b,c,d],k2,relation,list_a]
        return lis1
        
    #计算所有性味的相关性    
    def Get_all_chiSquare(thelist_xwgj_function):
        for i in thelist_xwgj_function:
            try:
                keyWord1,keyWord2=i[0],i[1]
                relation=Get_single_chiSquare(keyWord1,keyWord2)
                list1=[i,relation,relation[2]]
                list_xingWeiFunction_relation.append(list1)
            except:
                print("wrong at:",i)
                continue
        return list_xingWeiFunction_relation
        
        
        
    def Write_table_to_csv(list1,fileName):
        #对列表格式修改,字符串写入的格式不对
        file=open(fileName,'w',newline='')
        writer1=csv.writer(file)
        writer1.writerows(list1)
        file.close()  
    
      
    list_xingWeiFunction_relation=Get_all_chiSquare(thelist_xwgj_function)  
    Write_table_to_csv(list_xingWeiFunction_relation,fileName)
    
    
    '''
    #测试数据
    i=['苦,寒,胃经
    	', '清热,解毒,消痈,下乳,舒筋通脉。']
    b=thelist_xwgj_function[0]
    keyWord1,keyWord2=b[0],b[1]
    '''
    
    chi_square脚本
    #coding=utf-8
    
    #独立性检验test for independence,也是卡方检验chi_square
    #前提条件:a,b,c,d 必须大于5
    
    #2.706是判断标准(90概率),值越大,越有关,值越小,越无关
    def value_independence(a,b,c,d):
        if a>=5 and b>=5 and c>=5 and d>=5:
            return ((a+b+c+d)*(a*d-b*c)**2)/float((a+b)*(c+d)*(a+c)*(b+d))
    
    #返回True表示有关
    #返回False表示无关
    #2.706表示有90%概率
    def judge_independence(a,b,c,d):
        num_independence=value_independence(a,b,c,d)
        print("chi_square:",num_independence)
        if num_independence==None:
            print("not suit for chi_square statistics")
            return "wrong"
        if num_independence>2.706:
            print ("there is relationship")
            return True
        if num_independence<=2.706:
            print("there is no relationship")
            return False
        
        
    
    list_xwgj_function_multiElements  脚本
    # -*- coding: utf-8 -*-
    """
    Created on Tue Aug 23 11:38:35 2016
    生成多个因素的性味归经组合,四位,五位
    
    算法:卡方
    
    @author: Administrator
    """
    
    import xlrd,csv
    #性味归经的元数据
    list_xing=["寒","凉","温","热"]
    list_wei=["酸","苦","甘","辛","咸"]
    list_guiJing=["胃","肝","胆","心","肺","肾","脾","膀胱","大肠","小肠"]
    
    
    #四元素以上的性味归经功能吧
    excelFilename="性味归经清理3.xlsx"
    excelFilename1="性味归经清理4.xlsx"
    sheetName="Sheet1"
    #打开excel数据
    excelFile=xlrd.open_workbook(excelFilename)
      
    sheet=excelFile.sheet_by_name(sheetName)
    
    #表单行与列
    number_rows=sheet.nrows
    number_columns=sheet.ncols
    
    #表格性味归经
    list_xwg2j=sheet.col_values(0)[1:]
    #功能
    list_function=sheet.col_values(1)[1:]
    list_function1=[i for i in list_function if i!=""]
    #四元素以上性味归经与功能组合
    list_xwgj2_function=[]
    
    #生成两个归经的组合
    def Get_list_xwg2j():
        
        for i in list_xwg2j:
            for k in list_guiJing:
                if k not in i:
                    element=i+","+k
                    list_xwg2j.append(element)
                    #print ("i:",i)
        return list_xwg2j
    
    #添加一些元素
    def add_some_elements(list_xwg2j):
        list_xwg2j.append('甘,温,肝,肾')
        list_xwg2j.append('甘,温,脾,胃')
        list_xwg2j.append('苦,寒,肝,胃,大肠')
        list_xwg2j.append('辛,温,脾,胃,肺')
        list_xwg2j.append('苦,寒,大肠,肺')    
        list_xwg2j.append('苦,寒,心,肝')
        return list_xwg2j
        
    #写入csv前准备,添加序列号
    def add_uid(list_xwgj_clean):
        list1=[]
        for i in range(len(list_xwgj_clean)):
            value=list_xwgj_clean[i]
            item=[i,value]
            list1.append(item)
        return list1
        
    def Write_table_to_csv(list1,fileName):
        #对列表格式修改,字符串写入的格式不对
        file=open(fileName,'w',newline='')
        writer1=csv.writer(file)
        writer1.writerows(list1)
        file.close()     
    
    #性味归经大于四的多元素组合    
    def Get_list_xwgj2_function(list1):
        for i in list1:
            
            for k in list_function1:
                list_xwgj2_function.append([i,k])
            
        return list_xwgj2_function    
        
    #list_xwg2j=Get_list_xwg2j()
    #list_xwg2j_add=add_some_elements(list_xwg2j)
    
    #写入csv前准备,添加序列号
    #list_write=add_uid(list_xwg2j_add)
    #Write_table_to_csv(list_write,"性味归经清理3.csv")
    list_xwgj2_function=Get_list_xwgj2_function(list_xwg2j)
    

    list_xwgj_function_multiElements 脚本  需要性味归经清理3excel

    结果

    计算出细化的性味归经组合,从三万多组合中,得到472种组合

     https://study.163.com/provider/400000000398149/index.htm?share=2&shareId=400000000398149( 欢迎关注博主主页,学习python视频资源,还有大量免费python经典文章)


     
  • 相关阅读:
    基于VLC的视频播放器
    IOS开发之新浪微博OAuth2
    Android之官方导航栏ActionBar
    IOS中键盘隐藏几种方式
    在Android中使用Android Ksoap2调用WebService
    Android之属性动画(二)
    IOS 内存管理
    利用scp 远程上传下载文件/文件夹和ssh远程执行命令
    Centos 检查磁盘读写性能
    JPA, JNDI, OSGi
  • 原文地址:https://www.cnblogs.com/webRobot/p/5841815.html
Copyright © 2011-2022 走看看