zoukankan      html  css  js  c++  java
  • python pandas笔记1

    1.pandas的Seriess使用介绍

    1.1 Series数据结构

    import pandas as pd
    #pd.Series?
    animals = ['Tiger', 'Bear', 'Moose']
    pd.Series(animals)
    #output: 
    """
    0    Tiger
    1     Bear
    2    Moose
    dtype: object
    """
    numbers = [1, 2, None]
    pd.Series(numbers)
    #output:
    """
    0    1.0
    1    2.0
    2    NaN
    dtype: float64
    """

    1.2  numpy使用

    import numpy as np
    np.nan == None
    #output: False
    np.nan == np.nan
    #output: False
    np.isnan(np.nan)
    #output: True

    1.3 字典和Series使用

    sports = {'Archery': 'Bhutan',
              'Golf': 'Scotland',
              'Sumo': 'Japan',
              'Taekwondo': 'South Korea'}
    s = pd.Series(sports)
    s
    #output:
    """
    Archery           Bhutan
    Golf            Scotland
    Sumo               Japan
    Taekwondo    South Korea
    dtype: object
    """
    #索引
    s.index
    """
    Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')
    """
    s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
    s
    #output:
    """
    India      Tiger
    America     Bear
    Canada     Moose
    dtype: object
    """
    sports = {'Archery': 'Bhutan',
              'Golf': 'Scotland',
              'Sumo': 'Japan',
              'Taekwondo': 'South Korea'}
    s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
    s
    #output:
    """
    Golf      Scotland
    Sumo         Japan
    Hockey         NaN
    dtype: object
    """

    2. 对Series的索引操作

    sports = {'Archery': 'Bhutan',
              'Golf': 'Scotland',
              'Sumo': 'Japan',
              'Taekwondo': 'South Korea'}
    s = pd.Series(sports)
    s
    # ouptut:
    """
    sports = {'Archery': 'Bhutan',
    
              'Golf': 'Scotland',
    
              'Sumo': 'Japan',
    
              'Taekwondo': 'South Korea'}
    
    s = pd.Series(sports)
    
    s
    
    Archery           Bhutan
    Golf            Scotland
    Sumo               Japan
    Taekwondo    South Korea
    dtype: object
    """

    2.1 iloc()和 loc()方法

    s.iloc[3] #数字索引
    #output: 'South Korea'
    s.loc['Golf'] #键值
    #output : 'Scotland'

    2.2 关于向量化操作

    #this creates a big series of random numbers
    s = pd.Series(np.random.randint(0,1000,10000))
    s.head()
    #output: 
    """
    0    396
    1    779
    2    752
    3     30
    4    493
    dtype: int64
    """

    时间对比 :

    import numpy as np
    %%timeit -n 100
    summary = 0
    for item in s:
        summary+=item
    # 100 loops, best of 3: 1.87 ms per loop
    
    #向量化操作
    %%timeit -n 100
    summary = np.sum(s)
    # 100 loops, best of 3: 100 µs per loop
    #broadcasting操作
    s+=2 #adds two to each item in s using broadcasting
    s.head()
    """
    0    398
    1    781
    2    754
    3     32
    4    495
    dtype: int64
    """

    迭代:

    for label, value in s.iteritems():
        s.set_value(label, value+2)
    s.head()
    #output:
    """
    0    400
    1    783
    2    756
    3     34
    4    497
    dtype: int64
    """

    时间对比:

    #迭代的方法
    %%timeit -n 10
    s = pd.Series(np.random.randint(0,1000,10000))
    for label, value in s.iteritems():
        s.loc[label]= value+2
    #时间: 10 loops, best of 3: 1.62 s per loop
    # broadcasting 
    %%timeit -n 10
    s = pd.Series(np.random.randint(0,1000,10000))
    s+=2
    # 10 loops, best of 3: 472 µs per loop

    2.3 元素操作

    s = pd.Series([1, 2, 3])
    s.loc['Animal'] = 'Bears'
    s
    #output
    """
    0             1
    1             2
    2             3
    Animal    Bears
    dtype: object
    """

    2.4 Series的append()方法

    original_sports = pd.Series({'Archery': 'Bhutan',
                                 'Golf': 'Scotland',
                                 'Sumo': 'Japan',
                                 'Taekwondo': 'South Korea'})
    cricket_loving_countries = pd.Series(['Australia',
                                          'Barbados',
                                          'Pakistan',
                                          'England'], 
                                       index=['Cricket',
                                              'Cricket',
                                              'Cricket',
                                              'Cricket'])
    all_countries = original_sports.append(cricket_loving_countries)

    original_sports未发生改变

    #未发生变化
    print(original_sports)
    """
    Archery           Bhutan
    Golf            Scotland
    Sumo               Japan
    Taekwondo    South Korea
    dtype: object
    """

    cricket_loving_countries的值:

    print(cricket_loving_countries)
    """
    Cricket    Australia
    Cricket     Barbados
    Cricket     Pakistan
    Cricket      England
    dtype: object
    """

    all_countries的值

    print(all_countries)
    """
    Archery           Bhutan
    Golf            Scotland
    Sumo               Japan
    Taekwondo    South Korea
    Cricket        Australia
    Cricket         Barbados
    Cricket         Pakistan
    Cricket          England
    dtype: object
    """
    print(all_countries.loc['Cricket'])
    """
    Cricket    Australia
    Cricket     Barbados
    Cricket     Pakistan
    Cricket      England
    dtype: object
    """

    3 .DataFrame数据结构

    3.1 可以看做是多维的Series.

    import pandas as pd
    purchase_1 = pd.Series({'Name': 'Chris',
                            'Item Purchased': 'Dog Food',
                            'Cost': 22.50})
    purchase_2 = pd.Series({'Name': 'Kevyn',
                            'Item Purchased': 'Kitty Litter',
                            'Cost': 2.50})
    purchase_3 = pd.Series({'Name': 'Vinod',
                            'Item Purchased': 'Bird Seed',
                            'Cost': 5.00})
    df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
    print(df.head())
    """
       Cost Item Purchased   Name
    Store 1  22.5       Dog Food  Chris
    Store 1   2.5   Kitty Litter  Kevyn
    Store 2   5.0      Bird Seed  Vinod
    """

    3.2 loc()操作

    print(df.loc['Store 2'])
    """
    Cost                      5
    Item Purchased    Bird Seed
    Name                  Vinod
    Name: Store 2, dtype: object
    """
    print(df.loc['Store 1'])
    """
            Cost   Item       Purchased     Name
    Store  1      22.5       Dog Food       Chris
    Store  1      2.5         Kitty Litter      Kevyn
    """
    df.loc['Store 1', 'Cost']
    """
    Store 1    22.5
    Store 1     2.5
    Name: Cost, dtype: float64
    """

    3.3 转置

    print(df.T)
    """
                            Store 1        Store 1       Store 2
    Cost                  22.5            2.5             5
    Item Purchased  Dog Food     Kitty Litter   Bird Seed
    Name                Chris           Kevyn         Vinod
    """
    print(df.T.loc['Cost'])
    """
    print(df.T.loc['Cost'])
    
    Store 1    22.5
    Store 1     2.5
    Store 2       5
    Name: Cost, dtype: object
    """
    print(df['Cost'])
    """
    Store 1    22.5
    Store 1     2.5
    Store 2     5.0
    Name: Cost, dtype: float64
    """
    print(df.loc['Store 1']['Cost'])
    """
    Store 1    22.5
    Store 1     2.5
    Name: Cost, dtype: float64
    """
    print(df.loc[:,['Name', 'Cost']])
    """
                Name   Cost
    Store 1  Chris    22.5
    Store 1  Kevyn   2.5
    Store 2  Vinod    5.0
    """

    3.4 关于drop()方法

    print(df.drop('Store 1'))
    """
             Cost Item Purchased   Name
    Store 2   5.0      Bird Seed  Vinod
    """
    #但是原来的df没有发生变化
    print(df)
    """
                Cost Item   Purchased    Name
    Store 1  22.5          Dog Food     Chris
    Store 1   2.5           Kitty Litter   Kevyn
    Store 2   5.0           Bird Seed     Vinod
    """

    3.5 copy()方法

    copy_df = df.copy()
    copy_df = copy_df.drop('Store 1')
    print(copy_df)
    """
                 Cost Item  Purchased    Name     Location
    Store 2   5.0           Bird Seed     Vinod     None
    """
    # copy_df.drop?

    3.6 del 操作和加列操作

    del copy_df['Name']
    print(copy_df) 
    """
                 Cost Item   Purchased    Location
    Store 2   5.0            Bird Seed     None
    """
    df['Location'] = None
    print(df)
    """
                Cost Item Purchased   Name      Location
    Store 1  22.5        Dog Food     Chris       None
    Store 1   2.5         Kitty Litter    Kevyn     None
    Store 2   5.0         Bird Seed     Vinod      None
    """
    The Safest Way to Get what you Want is to Try and Deserve What you Want.
  • 相关阅读:
    FreeMarker的<#if></#if>标签
    ubuntu的dpkg命令安装和卸载软件
    ubuntu建立软链接注意事项
    halo的工作目录,有一个是在代码里配置的,硬编码了
    Springboot的多环境配置
    idea中的springboot+gradle项目报错springboot configuration annotation processor not found in classpath
    maven中的pom.xml中的scope的作用
    设置idea的快捷键组合 设置为默认
    springboot无法查询到后台的数据
    ssh互信条件下的多机拷贝脚本和执行远程命令
  • 原文地址:https://www.cnblogs.com/Shinered/p/9213520.html
Copyright © 2011-2022 走看看