zoukankan      html  css  js  c++  java
  • 单因素测试综合法

     python机器学习-乳腺癌细胞挖掘(博主亲自录制视频)

    https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

    单因素方差+kruskalwallis

    onewayTest.py

    # -*- coding: utf-8 -*-
    import numpy as np,variance_check
    # additional packages
    from scipy.stats.mstats import kruskalwallis
    from scipy import stats
    
    group1=[27,2,4,18,7,9]
    group2=[20,8,14,36,21,22]
    group3=[34,31,3,23,30,6]
    list_groups=[group1,group2,group3]
    
    #前期检验
    normality=variance_check.NormalTest(list_groups)   
    leveneResult=variance_check.Levene_test(list_groups[0],list_groups[1],list_groups[2]) 
    equal_lenth=variance_check.Equal_lenth(list_groups)  
    
    
    def Choose_mode(normality,leveneResult,group1,group2,group3):
        if normality==True and leveneResult==True:
            print"Use anova test:"
            statistic,p=stats.f_oneway(group1,group2,group3)
            print"statistic,p:",statistic,p
            if p<0.05:
                print "There is significant difference"
                return True
            else:
                print "There is no significant difference"
                return False
        if normality==False:
            print"Use kruskawallis test:"
            h, p = kruskalwallis(list_groups)
            print"H value:",h
            print"p",p
        
            # Print the results
            if p<0.05:
                print('There is a significant difference between the cities.')
                return True
            else:
                print('No significant difference between the cities.')
                return False
    
    
    Choose_mode(normality,leveneResult,group1,group2,group3)
    作者toby,qq:231469242

    variance_check.py

    # -*- coding: utf-8 -*-
    '''
    用于方差齐性检验
    正太性检验
    配对相等检验
    '''
    import scipy,math
    from scipy.stats import f
    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    # additional packages
    from statsmodels.stats.diagnostic import lillifors
    #多重比较
    from statsmodels.sandbox.stats.multicomp import multipletests
    #用于排列组合
    import itertools
    '''
    #测试数据
    group1=[2,3,7,2,6]
    group2=[10,8,7,5,10]
    group3=[10,13,14,13,15]
    list_groups=[group1,group2,group3]
    list_total=group1+group2+group3
    '''
    a=0.05
    
    #正态分布测试
    def check_normality(testData):
         
        #20<样本数<50用normal test算法检验正态分布性
        if 20<len(testData) <50:
           p_value= stats.normaltest(testData)[1]
           
           if p_value<0.05:
               print"use normaltest"
               print"p of normal:",p_value
               print "data are not normal distributed"
               return  False
           else:
               print"use normaltest"
               print"p of normal:",p_value
               print "data are normal distributed"
               return True
         
        #样本数小于50用Shapiro-Wilk算法检验正态分布性
        if len(testData) <50:
           p_value= stats.shapiro(testData)[1]
           if p_value<0.05:
               print "use shapiro:"
               print"p of normal:",p_value
               print "data are not normal distributed"
               return  False
           else:
               print "use shapiro:"
               print"p of normal:",p_value
               print "data are normal distributed"
               return True
           
        if 300>=len(testData) >=50:
           p_value= lillifors(testData)[1]
           if p_value<0.05:
               print "use lillifors:"
               print"p of normal:",p_value
               print "data are not normal distributed"
               return  False
           else:
               print "use lillifors:"
               print"p of normal:",p_value
               print "data are normal distributed"
               return True
         
        if len(testData) >300: 
           p_value= stats.kstest(testData,'norm')[1]
           if p_value<0.05:
               print "use kstest:"
               print"p of normal:",p_value
               print "data are not normal distributed"
               return  False
           else:
               print "use kstest:"
               print"p of normal:",p_value
               print "data are normal distributed"
               return True
     
     
    #对所有样本组进行正态性检验
    def NormalTest(list_groups):
        for group in list_groups:
            #正态性检验
            status=check_normality(group)
            if status==False :
                return False
        return True
                 
    #排列组合函数
    def Combination(list_groups):
        combination= []
        for i in range(1,len(list_groups)+1):
            iter = itertools.combinations(list_groups,i)
            combination.append(list(iter))
        #需要排除第一个和最后一个
        return combination[1:-1][0]
    '''
    Out[57]:
    [[([2, 3, 7, 2, 6], [10, 8, 7, 5, 10]),
      ([2, 3, 7, 2, 6], [10, 13, 14, 13, 15]),
      ([10, 8, 7, 5, 10], [10, 13, 14, 13, 15])]]
    '''       
    
    
    #方差齐性检测
    def Levene_test(group1,group2,group3):
        leveneResult=scipy.stats.levene(group1,group2,group3)
        p=leveneResult[1]
        print"levene test:"
        if p<0.05:
            print"variances of groups are not equal"
            return False
        else:
            print"variances of groups are equal"
            return True
              
    '''
    H0成立,三组数据方差无显著差异
    Out[9]: LeveneResult(statistic=0.24561403508771934, pvalue=0.7860617221429711)
    '''
    
    #比较组内的样本是否相等,如果不相等,不适合于tukey等方法
    #此函数有问题,无法解决nan排除
    def Equal_lenth(list_groups):
        list1=list_groups[0]
        list2=list_groups[1]
        list3=list_groups[2]
        
        list1_removeNan=[x for x in list1 if str(x) != 'nan' and str(x)!= '-inf']
        list2_removeNan=[x for x in list2 if str(x) != 'nan' and str(x)!= '-inf']
        list3_removeNan=[x for x in list3 if str(x) != 'nan' and str(x)!= '-inf']
        
        len1=len(list1_removeNan)
        len2=len(list2_removeNan)
        len3=len(list3_removeNan)
        if len1==len2==len3:
            return True
        else:
            return False
    
    
    '''
    #返回True or false 
    normality=NormalTest(list_groups)   
    leveneResult=Levene_test(list_groups[0],list_groups[1],list_groups[2])  
    '''
    作者toby,qq:231469242

    练习例题

    8.2 Multiple Groups
    The following example is taken from the really good, but somewhat advanced book
    by A.J. Dobson: “An Introduction to Generalized Linear Models”:
    • Get the data
    The file Data/data_others/Table 6.6 Plant experiment.xls, which can also
    be found on https://github.com/thomas-haslwanter/statsintro/tree/master/Data/
    data_others, contains data from an experiment with plants in three different
    growing conditions. Read the data into Python. Hint: use the module xlrd.
    • Perform an ANOVA
    Are the three groups different? (Correct answer: yes, they are.)
    • Multiple Comparisons
    Using the Tukey test, which of the pairs are different? (Correct answer: only
    TreamtmentA and TreatmentB differ.)
    • Kruskal–Wallis
    Would a nonparametric comparison lead to a different result? (Correct answer:
    no.)

    # -*- coding: utf-8 -*-
    
    # Import standard packages
    import variance_check
    import numpy as np
    import matplotlib.pyplot as plt
    from scipy import stats
    import pandas as pd
    from scipy.stats.mstats import kruskalwallis
    
    #数据data from an experiment with plants in three different growing conditions
    #所以用配对T试验进行事后多重检测
    list_Control=[4.17,5.58,5.18,6.11,4.5,4.61,5.17,4.53,5.33,5.14]
    list_treatmentA=[4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69]
    list_treatmentB=[6.31,5.12,5.54,5.5,5.37,5.29,4.92,6.15,5.8,5.26]
    
    list_groups=[list_Control,list_treatmentA,list_treatmentB]
    normality=variance_check.NormalTest(list_groups) 
    leveneResult=variance_check.Levene_test(list_groups[0],list_groups[1],list_groups[2])     
    
        
    print(stats.f_oneway(list_Control,list_treatmentA,list_treatmentB))
    
    print(stats.ttest_rel(list_Control,list_treatmentA))
    
    print(stats.ttest_rel(list_Control,list_treatmentB))
    
    print(stats.ttest_rel(list_treatmentA,list_treatmentB))
    
    
    '''
    #解读:三组数据正态分布,方差剂型符合,只有treatmentA和treatmentB有显著区别
    use shapiro:
    p of normal: 0.747474491596
    data are normal distributed
    use shapiro:
    p of normal: 0.451944738626
    data are normal distributed
    use shapiro:
    p of normal: 0.564250946045
    data are normal distributed
    levene test:
    variances of groups are equal
    F_onewayResult(statistic=4.846087862380136, pvalue=0.015909958325622899)
    Ttest_relResult(statistic=0.99384151305794055, pvalue=0.3462672871440382)
    Ttest_relResult(statistic=-1.772083360883858, pvalue=0.11014394200586315)
    Ttest_relResult(statistic=-2.8463513880802855, pvalue=0.0192031388472628)
    '''
    
    
    kruskalwallis(list_groups)
    '''
    #与方差检验结果一致
    KruskalResult(statistic=7.9882287494437154, pvalue=0.018423755731471966)
    '''
    作者toby,qq:231469242

    spss检验与python统计结果一致,group与weight有显著关系,多重检验:treatmentA和treatmentB有显著关系,

    此样本tukey,lsd,bonferroni结果一致

     https://study.163.com/provider/400000000398149/index.htm?share=2&shareId=400000000398149( 欢迎关注博主主页,学习python视频资源,还有大量免费python经典文章)

  • 相关阅读:
    RxSwift 核心
    用 @media 控制图片显示大小
    关于媒体查询 @media 的用法
    再次搞懂弹性盒模型
    由淘宝想起,在css无法加载的情况下 依旧可以点击链接调整
    nth-child()和nth-of-type 用法
    如何消除img间的默认间隙
    由淘宝鼠标经过显示头像想起的 定位分析
    水平居中和垂直居中
    position 和 transform【鼠标经过显示一个div滑过】&导航效果应用 以及定位自己的总结
  • 原文地址:https://www.cnblogs.com/webRobot/p/6921453.html
Copyright © 2011-2022 走看看