zoukankan      html  css  js  c++  java
  • 001_python实现数据分析

    一、

    # coding:utf8
    # !/usr/bin/python
    # import numpy as np
    import pandas as pd
    import np
    
    def example2():
        '''
        Describing a numeric ``Series``.
        :return:
        '''
        s = pd.Series([1, 2, 3])
        print s.describe()
        '''
        count    3.0     
        mean     2.0
        std      1.0
        min      1.0
        25%      1.5
        50%      2.0
        75%      2.5
        max      3.0
        dtype: float64
        '''
    def example3():
        '''
        Describing a categorical ``Series``.
        :return:
        '''
        s = pd.Series(['a', 'a', 'b', 'c'])
        print s.describe()
        '''
        count     4
        unique    3
        top       a
        freq      2
        dtype: object
        '''
    def example4():
        '''
        Describing a timestamp ``Series``.
        :return:
        '''
        s = pd.Series([
            np.datetime64("2000-01-01"),
            np.datetime64("2010-01-01"),
            np.datetime64("2010-01-01")
            ])
        print s.describe()
        '''
        count                       3
        unique                      2
        top       2010-01-01 00:00:00
        freq                        2
        first     2000-01-01 00:00:00
        last      2010-01-01 00:00:00
        dtype: object
        '''
    def example5():
        '''
        Describing a ``DataFrame``. By default only numeric fields are returned.
        :return:
        '''
        df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),
                           'numeric': [1, 2, 3],
                            'object': ['a', 'b', 'c']})
        print df.describe()
        '''
        #Describing all columns of a ``DataFrame`` regardless of data type.
        print df.describe(include='all')
        #Describing a column from a ``DataFrame`` by accessing it as an attribute.
        print df.numeric.describe()
        #Including only numeric columns in a ``DataFrame`` description.
        print df.describe(include=[np.number])
        #Including only string columns in a ``DataFrame`` description.
        print df.describe(include=[np.object])
        #Including only categorical columns from a ``DataFrame`` description.
        print df.describe(include=['category'])
        #Excluding numeric columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.number])
        #Excluding object columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.object])
        '''
    def example1():
        dic1={'000':{'a':1,'b':2,'c':3},'001':{'d':4,'e':5,'f':6}}
        df2=pd.DataFrame(dic1)
        # print df2.describe()
        '''
               000  001
        count  3.0  3.0
        mean   2.0  5.0
        std    1.0  1.0
        min    1.0  4.0
        25%    1.5  4.5
        50%    2.0  5.0
        75%    2.5  5.5
        max    3.0  6.0
        '''
        print "返回非NAN数据项数量=>count()
    {count}
    ".format(count = df2.describe().count())
        print "返回中位数,等价第50位百分位数的值=>median()
    {median}
    ".format(median = df2.describe().median())
        print "返回数据的众值=>mode()
    {mode}
    ".format(mode = df2.describe().mode())
        print "返回数据的标准差(描述离散度)=>std()
    {std}
    ".format(std = df2.describe().std())
        print "返回方差=>var()
    {var}
    ".format(var = df2.describe().var())
        print "偏态系数(skewness,表示数据分布的对称程度)=>skew()
    {skew}
    ".format(skew = df2.describe().skew())
    
    def main():
        example1()
    if __name__ == '__main__':
        main()
    

    输出=>

    返回非NAN数据项数量=>count()
    000    8
    001    8
    dtype: int64
    返回中位数,等价第50位百分位数的值=>median()
    000    2.00
    001    4.75
    dtype: float64
    返回数据的众值=>mode()
       000  001
    0  1.0  5.0
    1  2.0  NaN
    2  3.0  NaN
    返回数据的标准差(描述离散度)=>std()
    000    0.801784
    001    1.603567
    dtype: float64
    返回方差=>var()
    000    0.642857
    001    2.571429
    dtype: float64
    偏态系数(skewness,表示数据分布的对称程度)=>skew()
    000    0.000000
    001   -1.299187
    dtype: float64
    

      

  • 相关阅读:
    BrowserSync,自动刷新,解放F5,去掉更新提示
    js获取手机系统语言
    块元素,行内元素,行内块区别
    原生js判断某个元素是否有指定的class名的几种方法
    如何实现调用console.log(‘good’.repeat(3))时输出goodgoodgood?
    数组如何去除重复数据,只保留一条
    Sentinel笔记-Flow流控规则
    sentinel笔记 NodeSelectorSlot,ClusterBuilderSlot
    Sentinel笔记--Slotchain
    Sentinel笔记-核心类
  • 原文地址:https://www.cnblogs.com/arun-python/p/10423077.html
Copyright © 2011-2022 走看看