zoukankan html css js c++ java

单因素测试综合法

python机器学习-乳腺癌细胞挖掘（博主亲自录制视频）

https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

单因素方差+kruskalwallis

onewayTest.py

# -*- coding: utf-8 -*-
import numpy as np,variance_check
# additional packages
from scipy.stats.mstats import kruskalwallis
from scipy import stats

group1=[27,2,4,18,7,9]
group2=[20,8,14,36,21,22]
group3=[34,31,3,23,30,6]
list_groups=[group1,group2,group3]

#前期检验
normality=variance_check.NormalTest(list_groups)   
leveneResult=variance_check.Levene_test(list_groups[0],list_groups[1],list_groups[2]) 
equal_lenth=variance_check.Equal_lenth(list_groups)  


def Choose_mode(normality,leveneResult,group1,group2,group3):
    if normality==True and leveneResult==True:
        print"Use anova test:"
        statistic,p=stats.f_oneway(group1,group2,group3)
        print"statistic,p:",statistic,p
        if p<0.05:
            print "There is significant difference"
            return True
        else:
            print "There is no significant difference"
            return False
    if normality==False:
        print"Use kruskawallis test:"
        h, p = kruskalwallis(list_groups)
        print"H value:",h
        print"p",p
    
        # Print the results
        if p<0.05:
            print('There is a significant difference between the cities.')
            return True
        else:
            print('No significant difference between the cities.')
            return False


Choose_mode(normality,leveneResult,group1,group2,group3)

作者toby，qq:231469242

variance_check.py

# -*- coding: utf-8 -*-
'''
用于方差齐性检验
正太性检验
配对相等检验
'''
import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors
#多重比较
from statsmodels.sandbox.stats.multicomp import multipletests
#用于排列组合
import itertools
'''
#测试数据
group1=[2,3,7,2,6]
group2=[10,8,7,5,10]
group3=[10,13,14,13,15]
list_groups=[group1,group2,group3]
list_total=group1+group2+group3
'''
a=0.05

#正态分布测试
def check_normality(testData):
     
    #20<样本数<50用normal test算法检验正态分布性
    if 20<len(testData) <50:
       p_value= stats.normaltest(testData)[1]
       
       if p_value<0.05:
           print"use normaltest"
           print"p of normal:",p_value
           print "data are not normal distributed"
           return  False
       else:
           print"use normaltest"
           print"p of normal:",p_value
           print "data are normal distributed"
           return True
     
    #样本数小于50用Shapiro-Wilk算法检验正态分布性
    if len(testData) <50:
       p_value= stats.shapiro(testData)[1]
       if p_value<0.05:
           print "use shapiro:"
           print"p of normal:",p_value
           print "data are not normal distributed"
           return  False
       else:
           print "use shapiro:"
           print"p of normal:",p_value
           print "data are normal distributed"
           return True
       
    if 300>=len(testData) >=50:
       p_value= lillifors(testData)[1]
       if p_value<0.05:
           print "use lillifors:"
           print"p of normal:",p_value
           print "data are not normal distributed"
           return  False
       else:
           print "use lillifors:"
           print"p of normal:",p_value
           print "data are normal distributed"
           return True
     
    if len(testData) >300: 
       p_value= stats.kstest(testData,'norm')[1]
       if p_value<0.05:
           print "use kstest:"
           print"p of normal:",p_value
           print "data are not normal distributed"
           return  False
       else:
           print "use kstest:"
           print"p of normal:",p_value
           print "data are normal distributed"
           return True
 
 
#对所有样本组进行正态性检验
def NormalTest(list_groups):
    for group in list_groups:
        #正态性检验
        status=check_normality(group)
        if status==False :
            return False
    return True
             
#排列组合函数
def Combination(list_groups):
    combination= []
    for i in range(1,len(list_groups)+1):
        iter = itertools.combinations(list_groups,i)
        combination.append(list(iter))
    #需要排除第一个和最后一个
    return combination[1:-1][0]
'''
Out[57]:
[[([2, 3, 7, 2, 6], [10, 8, 7, 5, 10]),
  ([2, 3, 7, 2, 6], [10, 13, 14, 13, 15]),
  ([10, 8, 7, 5, 10], [10, 13, 14, 13, 15])]]
'''       


#方差齐性检测
def Levene_test(group1,group2,group3):
    leveneResult=scipy.stats.levene(group1,group2,group3)
    p=leveneResult[1]
    print"levene test:"
    if p<0.05:
        print"variances of groups are not equal"
        return False
    else:
        print"variances of groups are equal"
        return True
          
'''
H0成立，三组数据方差无显著差异
Out[9]: LeveneResult(statistic=0.24561403508771934, pvalue=0.7860617221429711)
'''

#比较组内的样本是否相等，如果不相等，不适合于tukey等方法
#此函数有问题，无法解决nan排除
def Equal_lenth(list_groups):
    list1=list_groups[0]
    list2=list_groups[1]
    list3=list_groups[2]
    
    list1_removeNan=[x for x in list1 if str(x) != 'nan' and str(x)!= '-inf']
    list2_removeNan=[x for x in list2 if str(x) != 'nan' and str(x)!= '-inf']
    list3_removeNan=[x for x in list3 if str(x) != 'nan' and str(x)!= '-inf']
    
    len1=len(list1_removeNan)
    len2=len(list2_removeNan)
    len3=len(list3_removeNan)
    if len1==len2==len3:
        return True
    else:
        return False


'''
#返回True or false 
normality=NormalTest(list_groups)   
leveneResult=Levene_test(list_groups[0],list_groups[1],list_groups[2])  
'''

作者toby，qq:231469242

练习例题

8.2 Multiple Groups
The following example is taken from the really good, but somewhat advanced book
by A.J. Dobson: “An Introduction to Generalized Linear Models”:
• Get the data
The file Data/data_others/Table 6.6 Plant experiment.xls, which can also
be found on https://github.com/thomas-haslwanter/statsintro/tree/master/Data/
data_others, contains data from an experiment with plants in three different
growing conditions. Read the data into Python. Hint: use the module xlrd.
• Perform an ANOVA
Are the three groups different? (Correct answer: yes, they are.)
• Multiple Comparisons
Using the Tukey test, which of the pairs are different? (Correct answer: only
TreamtmentA and TreatmentB differ.)
• Kruskal–Wallis
Would a nonparametric comparison lead to a different result? (Correct answer:
no.)

# -*- coding: utf-8 -*-

# Import standard packages
import variance_check
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
from scipy.stats.mstats import kruskalwallis

#数据data from an experiment with plants in three different growing conditions
#所以用配对T试验进行事后多重检测
list_Control=[4.17,5.58,5.18,6.11,4.5,4.61,5.17,4.53,5.33,5.14]
list_treatmentA=[4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69]
list_treatmentB=[6.31,5.12,5.54,5.5,5.37,5.29,4.92,6.15,5.8,5.26]

list_groups=[list_Control,list_treatmentA,list_treatmentB]
normality=variance_check.NormalTest(list_groups) 
leveneResult=variance_check.Levene_test(list_groups[0],list_groups[1],list_groups[2])     

    
print(stats.f_oneway(list_Control,list_treatmentA,list_treatmentB))

print(stats.ttest_rel(list_Control,list_treatmentA))

print(stats.ttest_rel(list_Control,list_treatmentB))

print(stats.ttest_rel(list_treatmentA,list_treatmentB))


'''
#解读：三组数据正态分布，方差剂型符合，只有treatmentA和treatmentB有显著区别
use shapiro:
p of normal: 0.747474491596
data are normal distributed
use shapiro:
p of normal: 0.451944738626
data are normal distributed
use shapiro:
p of normal: 0.564250946045
data are normal distributed
levene test:
variances of groups are equal
F_onewayResult(statistic=4.846087862380136, pvalue=0.015909958325622899)
Ttest_relResult(statistic=0.99384151305794055, pvalue=0.3462672871440382)
Ttest_relResult(statistic=-1.772083360883858, pvalue=0.11014394200586315)
Ttest_relResult(statistic=-2.8463513880802855, pvalue=0.0192031388472628)
'''


kruskalwallis(list_groups)
'''
#与方差检验结果一致
KruskalResult(statistic=7.9882287494437154, pvalue=0.018423755731471966)
'''

作者toby，qq:231469242

spss检验与python统计结果一致，group与weight有显著关系，多重检验：treatmentA和treatmentB有显著关系，

此样本tukey，lsd,bonferroni结果一致

https://study.163.com/provider/400000000398149/index.htm?share=2&shareId=400000000398149（欢迎关注博主主页，学习python视频资源，还有大量免费python经典文章）

查看全文

相关阅读:
数据库之联合查询和连接查询
 数据库要素 ER
数据库事务
 关系数据库常用名词及解释
 数据库索引
 关于数据库主键和外键（终于弄懂啦）
coredata 删除与更新
 Predicate Programming Guide
NSPredicate
coreData-Fetching Managed Objects

原文地址：https://www.cnblogs.com/webRobot/p/6921453.html