zoukankan      html  css  js  c++  java
  • pandas --index ,reindex, set_index, reset_index , reindex_like函数 之 reindex

     
    reindex(
        labels=None,
        index=None,
        columns=None,
        axis=None,
        method=None,
        copy=True,
        level=None,
        fill_value=nan,
        limit=None,
        tolerance=None,
    )
    Docstring:
    Conform DataFrame to new index with optional filling logic.
    or
    Conform Series to new index with optional filling logic.
    Parameters
    ----------
    
    index : array-like, optional
        New labels / index to conform to, should be specified using
        keywords. Preferably an Index object to avoid duplicating data.
    
    method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
        Method to use for filling holes in reindexed DataFrame.
        Please note: this is only applicable to DataFrames/Series with a
        monotonically increasing/decreasing index.
    
        * None (default): don't fill gaps
        * pad / ffill: Propagate last valid observation forward to next
          valid.
        * backfill / bfill: Use next valid observation to fill gap.
        * nearest: Use nearest valid observations to fill gap.
    
    copy : bool, default True
        Return a new object, even if the passed indexes are the same.
    level : int or name
        Broadcast across a level, matching Index values on the
        passed MultiIndex level.
    fill_value : scalar, default np.NaN
        Value to use for missing values. Defaults to NaN, but can be any
        "compatible" value.
    limit : int, default None
        Maximum number of consecutive elements to forward or backward fill.
    tolerance : optional
        Maximum distance between original and new labels for inexact
        matches. The values of the index at the matching locations most
        satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
    
        Tolerance may be a scalar value, which applies the same tolerance
        to all values, or list-like, which applies variable tolerance per
        element. List-like includes list, tuple, array, Series, and must be
        the same size as the index and its dtype must exactly match the
        index's type.

      

    import numpy as np
    import pandas as pd
    from pandas import Series, DataFrame
    
    np.random.seed(666)
    
    # series reindex
    s1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
    print(s1)
    '''
    A    1
    B    2
    C    3
    D    4
    dtype: int64
    '''
    
    
    # 重新指定 index, 多出来的index,可以使用fill_value 填充
    print(s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value = 10))
    '''
    A     1
    B     2
    C     3
    D     4
    E    10
    dtype: int64
    '''
    
    s2 = Series(['A', 'B', 'C'], index = [1, 5, 10])
    print(s2)
    '''
    1     A
    5     B
    10    C
    dtype: object
    '''
    
    # 修改索引,
    # 将s2的索引增加到15个
    # 如果新增加的索引值不存在,默认为 Nan
    print(s2.reindex(index=range(15)))
    '''
    0     NaN
    1       A
    2     NaN
    3     NaN
    4     NaN
    5       B
    6     NaN
    7     NaN
    8     NaN
    9     NaN
    10      C
    11    NaN
    12    NaN
    13    NaN
    14    NaN
    dtype: object
    '''
    
    # ffill : foreaward fill 向前填充,
    # 如果新增加索引的值不存在,那么按照前一个非nan的值填充进去
    print(s2.reindex(index=range(15), method='ffill'))
    '''
    0     NaN
    1       A
    2       A
    3       A
    4       A
    5       B
    6       B
    7       B
    8       B
    9       B
    10      C
    11      C
    12      C
    13      C
    14      C
    dtype: object
    '''
    
    # reindex dataframe
    df1 = DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5'])
    print(df1)
    '''
             c1        c2        c3        c4        c5
    A  0.700437  0.844187  0.676514  0.727858  0.951458
    B  0.012703  0.413588  0.048813  0.099929  0.508066
    D  0.200248  0.744154  0.192892  0.700845  0.293228
    E  0.774479  0.005109  0.112858  0.110954  0.247668
    F  0.023236  0.727321  0.340035  0.197503  0.909180
    '''
    
    # 为 dataframe 添加一个新的索引
    # 可以看到 自动 扩充为 nan
    print(df1.reindex(index=['A', 'B', 'C', 'D', 'E', 'F']))
    ''' 自动填充为 nan
             c1        c2        c3        c4        c5
    A  0.700437  0.844187  0.676514  0.727858  0.951458
    B  0.012703  0.413588  0.048813  0.099929  0.508066
    C       NaN       NaN       NaN       NaN       NaN
    D  0.200248  0.744154  0.192892  0.700845  0.293228
    E  0.774479  0.005109  0.112858  0.110954  0.247668
    F  0.023236  0.727321  0.340035  0.197503  0.909180
    '''
    
    # 扩充列, 也是一样的
    print(df1.reindex(columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6']))
    '''
             c1        c2        c3        c4        c5  c6
    A  0.700437  0.844187  0.676514  0.727858  0.951458 NaN
    B  0.012703  0.413588  0.048813  0.099929  0.508066 NaN
    D  0.200248  0.744154  0.192892  0.700845  0.293228 NaN
    E  0.774479  0.005109  0.112858  0.110954  0.247668 NaN
    F  0.023236  0.727321  0.340035  0.197503  0.909180 NaN
    '''
    
    # 减小 index
    print(s1.reindex(['A', 'B']))
    ''' 相当于一个切割效果
    A    1
    B    2
    dtype: int64
    '''
    
    print(df1.reindex(index=['A', 'B']))
    ''' 同样是一个切片的效果
             c1        c2        c3        c4        c5
    A  0.601977  0.619927  0.251234  0.305101  0.491200
    B  0.244261  0.734863  0.569936  0.889996  0.017936
    '''
    
    # 对于一个 serie 来说,可以使用 drop,来丢掉某些 index
    print(s1.drop('A'))
    ''' 就只剩下 三个了
    B    2
    C    3
    D    4
    dtype: int64
    '''
    
    # dataframe drop(A) 直接去掉一行
    print(df1.drop('A', axis=0))
    ''' axis 默认 是 行
             c1        c2        c3        c4        c5
    B  0.571883  0.254364  0.530883  0.295224  0.352663
    D  0.858452  0.379495  0.593284  0.786078  0.949718
    E  0.556276  0.643187  0.808664  0.289422  0.501041
    F  0.737993  0.286072  0.332714  0.873371  0.421615
    '''
    
    print(df1.drop('c1', axis=1))
    ''' 将 c1 的列 去掉
             c2        c3        c4        c5
    A  0.326681  0.247832  0.601982  0.145905
    B  0.373961  0.393819  0.439284  0.926706
    D  0.558490  0.617851  0.461280  0.373102
    E  0.030434  0.566498  0.383103  0.739243
    F  0.982220  0.989826  0.957863  0.411514
    '''
  • 相关阅读:
    The Mac Application Environment 不及格的程序员
    Xcode Plugin: Change Code In Running App Without Restart 不及格的程序员
    The property delegate of CALayer cause Crash. 不及格的程序员
    nil localizedTitle in SKProduct 不及格的程序员
    InApp Purchase 不及格的程序员
    Safari Web Content Guide 不及格的程序员
    在Mac OS X Lion 安装 XCode 3.2 不及格的程序员
    illustrate ARC with graphs 不及格的程序员
    Viewing iPhoneOptimized PNGs 不及格的程序员
    What is the dSYM? 不及格的程序员
  • 原文地址:https://www.cnblogs.com/vincent-sh/p/12861591.html
Copyright © 2011-2022 走看看