zoukankan      html  css  js  c++  java
  • Python for Data Science

    Chapter 5 - Basic Math and Statistics

    Segment 6 - Delving into non-parametric methods using pandas and scipy

    import numpy as np
    import pandas as pd
    
    import matplotlib.pyplot as plt
    import seaborn as sb
    from pylab import rcParams
    
    import scipy
    from scipy.stats import spearmanr
    
    %matplotlib inline
    rcParams['figure.figsize'] = 14, 7
    plt.style.use('seaborn-whitegrid')
    

    The Spearman Rank Correlation

    address = '~/Data/mtcars.csv'
    
    cars = pd.read_csv(address)
    cars.columns = ['car_names','mpg','cyl','disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']
    
    cars.head()
    
    car_names mpg cyl disp hp drat wt qsec vs am gear carb
    0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
    1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
    2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
    3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
    4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
    sb.pairplot(cars)
    
    <seaborn.axisgrid.PairGrid at 0x7f1891238e80>
    

    output_6_1--

    X = cars[['cyl','vs','am','gear']]
    sb.pairplot(X)
    
    <seaborn.axisgrid.PairGrid at 0x7f188b9b8ba8>
    

    output_7_1--

    cyl = cars['cyl']
    vs = cars['vs']
    am = cars['am']
    gear = cars['gear']
    
    spearmanr_coefficient, p_value = spearmanr(cyl,vs)
    
    print('Spearman Rank Correlation Coefficient %0.3f' % (spearmanr_coefficient))
    
    Spearman Rank Correlation Coefficient -0.814
    
    spearmanr_coefficient, p_value = spearmanr(cyl,am)
    
    print('Spearman Rank Correlation Coefficient %0.3f' % (spearmanr_coefficient))
    
    Spearman Rank Correlation Coefficient -0.522
    
    spearmanr_coefficient, p_value = spearmanr(cyl,gear)
    
    print('Spearman Rank Correlation Coefficient %0.3f' % (spearmanr_coefficient))
    
    Spearman Rank Correlation Coefficient -0.564
    

    Chi-square test for independence

    table = pd.crosstab(cyl, am)
    
    from scipy.stats import chi2_contingency
    chi2, p, dof, expected = chi2_contingency(table.values)
    print('Chi-square statistic %0.3f p_value %0.3f' % (chi2,p))
    
    Chi-square statistic 8.741 p_value 0.013
    
    table = pd.crosstab(cyl, vs)
    
    from scipy.stats import chi2_contingency
    chi2, p, dof, expected = chi2_contingency(table.values)
    print('Chi-square statistic %0.3f p_value %0.3f' % (chi2,p))
    
    Chi-square statistic 21.340 p_value 0.000
    
    table = pd.crosstab(cyl, gear)
    
    from scipy.stats import chi2_contingency
    chi2, p, dof, expected = chi2_contingency(table.values)
    print('Chi-square statistic %0.3f p_value %0.3f' % (chi2,p))
    
    Chi-square statistic 18.036 p_value 0.001
  • 相关阅读:
    团队冲刺第四天
    团队冲刺第三天
    找1的个数
    寻找最水之王
    最优价格买书
    团队冲刺第二天
    团队冲刺第一天
    团队开发项目特点
    第一阶段冲刺站立会议报告——4
    第一阶段冲刺站立会议报告——3
  • 原文地址:https://www.cnblogs.com/keepmoving1113/p/14285316.html
Copyright © 2011-2022 走看看