zoukankan      html  css  js  c++  java
  • 001_python实现数据分析

    一、

    # coding:utf8
    # !/usr/bin/python
    # import numpy as np
    import pandas as pd
    import np
    
    def example2():
        '''
        Describing a numeric ``Series``.
        :return:
        '''
        s = pd.Series([1, 2, 3])
        print s.describe()
        '''
        count    3.0     
        mean     2.0
        std      1.0
        min      1.0
        25%      1.5
        50%      2.0
        75%      2.5
        max      3.0
        dtype: float64
        '''
    def example3():
        '''
        Describing a categorical ``Series``.
        :return:
        '''
        s = pd.Series(['a', 'a', 'b', 'c'])
        print s.describe()
        '''
        count     4
        unique    3
        top       a
        freq      2
        dtype: object
        '''
    def example4():
        '''
        Describing a timestamp ``Series``.
        :return:
        '''
        s = pd.Series([
            np.datetime64("2000-01-01"),
            np.datetime64("2010-01-01"),
            np.datetime64("2010-01-01")
            ])
        print s.describe()
        '''
        count                       3
        unique                      2
        top       2010-01-01 00:00:00
        freq                        2
        first     2000-01-01 00:00:00
        last      2010-01-01 00:00:00
        dtype: object
        '''
    def example5():
        '''
        Describing a ``DataFrame``. By default only numeric fields are returned.
        :return:
        '''
        df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),
                           'numeric': [1, 2, 3],
                            'object': ['a', 'b', 'c']})
        print df.describe()
        '''
        #Describing all columns of a ``DataFrame`` regardless of data type.
        print df.describe(include='all')
        #Describing a column from a ``DataFrame`` by accessing it as an attribute.
        print df.numeric.describe()
        #Including only numeric columns in a ``DataFrame`` description.
        print df.describe(include=[np.number])
        #Including only string columns in a ``DataFrame`` description.
        print df.describe(include=[np.object])
        #Including only categorical columns from a ``DataFrame`` description.
        print df.describe(include=['category'])
        #Excluding numeric columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.number])
        #Excluding object columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.object])
        '''
    def example1():
        dic1={'000':{'a':1,'b':2,'c':3},'001':{'d':4,'e':5,'f':6}}
        df2=pd.DataFrame(dic1)
        # print df2.describe()
        '''
               000  001
        count  3.0  3.0
        mean   2.0  5.0
        std    1.0  1.0
        min    1.0  4.0
        25%    1.5  4.5
        50%    2.0  5.0
        75%    2.5  5.5
        max    3.0  6.0
        '''
        print "返回非NAN数据项数量=>count()
    {count}
    ".format(count = df2.describe().count())
        print "返回中位数,等价第50位百分位数的值=>median()
    {median}
    ".format(median = df2.describe().median())
        print "返回数据的众值=>mode()
    {mode}
    ".format(mode = df2.describe().mode())
        print "返回数据的标准差(描述离散度)=>std()
    {std}
    ".format(std = df2.describe().std())
        print "返回方差=>var()
    {var}
    ".format(var = df2.describe().var())
        print "偏态系数(skewness,表示数据分布的对称程度)=>skew()
    {skew}
    ".format(skew = df2.describe().skew())
    
    def main():
        example1()
    if __name__ == '__main__':
        main()
    

    输出=>

    返回非NAN数据项数量=>count()
    000    8
    001    8
    dtype: int64
    返回中位数,等价第50位百分位数的值=>median()
    000    2.00
    001    4.75
    dtype: float64
    返回数据的众值=>mode()
       000  001
    0  1.0  5.0
    1  2.0  NaN
    2  3.0  NaN
    返回数据的标准差(描述离散度)=>std()
    000    0.801784
    001    1.603567
    dtype: float64
    返回方差=>var()
    000    0.642857
    001    2.571429
    dtype: float64
    偏态系数(skewness,表示数据分布的对称程度)=>skew()
    000    0.000000
    001   -1.299187
    dtype: float64
    

      

  • 相关阅读:
    区别@ControllerAdvice 和@RestControllerAdvice
    Cannot determine embedded database driver class for database type NONE
    使用HttpClient 发送 GET、POST、PUT、Delete请求及文件上传
    Markdown语法笔记
    Property 'sqlSessionFactory' or 'sqlSessionTemplate' are required
    Mysql 查看连接数,状态 最大并发数(赞)
    OncePerRequestFilter的作用
    java连接MySql数据库 zeroDateTimeBehavior
    Intellij IDEA 安装lombok及使用详解
    ps -ef |grep xxx 输出的具体含义
  • 原文地址:https://www.cnblogs.com/arun-python/p/10423077.html
Copyright © 2011-2022 走看看