zoukankan      html  css  js  c++  java
  • pandas --index ,reindex, set_index, reset_index , reindex_like函数 之 reindex

     
    reindex(
        labels=None,
        index=None,
        columns=None,
        axis=None,
        method=None,
        copy=True,
        level=None,
        fill_value=nan,
        limit=None,
        tolerance=None,
    )
    Docstring:
    Conform DataFrame to new index with optional filling logic.
    or
    Conform Series to new index with optional filling logic.
    Parameters
    ----------
    
    index : array-like, optional
        New labels / index to conform to, should be specified using
        keywords. Preferably an Index object to avoid duplicating data.
    
    method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
        Method to use for filling holes in reindexed DataFrame.
        Please note: this is only applicable to DataFrames/Series with a
        monotonically increasing/decreasing index.
    
        * None (default): don't fill gaps
        * pad / ffill: Propagate last valid observation forward to next
          valid.
        * backfill / bfill: Use next valid observation to fill gap.
        * nearest: Use nearest valid observations to fill gap.
    
    copy : bool, default True
        Return a new object, even if the passed indexes are the same.
    level : int or name
        Broadcast across a level, matching Index values on the
        passed MultiIndex level.
    fill_value : scalar, default np.NaN
        Value to use for missing values. Defaults to NaN, but can be any
        "compatible" value.
    limit : int, default None
        Maximum number of consecutive elements to forward or backward fill.
    tolerance : optional
        Maximum distance between original and new labels for inexact
        matches. The values of the index at the matching locations most
        satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
    
        Tolerance may be a scalar value, which applies the same tolerance
        to all values, or list-like, which applies variable tolerance per
        element. List-like includes list, tuple, array, Series, and must be
        the same size as the index and its dtype must exactly match the
        index's type.

      

    import numpy as np
    import pandas as pd
    from pandas import Series, DataFrame
    
    np.random.seed(666)
    
    # series reindex
    s1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
    print(s1)
    '''
    A    1
    B    2
    C    3
    D    4
    dtype: int64
    '''
    
    
    # 重新指定 index, 多出来的index,可以使用fill_value 填充
    print(s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value = 10))
    '''
    A     1
    B     2
    C     3
    D     4
    E    10
    dtype: int64
    '''
    
    s2 = Series(['A', 'B', 'C'], index = [1, 5, 10])
    print(s2)
    '''
    1     A
    5     B
    10    C
    dtype: object
    '''
    
    # 修改索引,
    # 将s2的索引增加到15个
    # 如果新增加的索引值不存在,默认为 Nan
    print(s2.reindex(index=range(15)))
    '''
    0     NaN
    1       A
    2     NaN
    3     NaN
    4     NaN
    5       B
    6     NaN
    7     NaN
    8     NaN
    9     NaN
    10      C
    11    NaN
    12    NaN
    13    NaN
    14    NaN
    dtype: object
    '''
    
    # ffill : foreaward fill 向前填充,
    # 如果新增加索引的值不存在,那么按照前一个非nan的值填充进去
    print(s2.reindex(index=range(15), method='ffill'))
    '''
    0     NaN
    1       A
    2       A
    3       A
    4       A
    5       B
    6       B
    7       B
    8       B
    9       B
    10      C
    11      C
    12      C
    13      C
    14      C
    dtype: object
    '''
    
    # reindex dataframe
    df1 = DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5'])
    print(df1)
    '''
             c1        c2        c3        c4        c5
    A  0.700437  0.844187  0.676514  0.727858  0.951458
    B  0.012703  0.413588  0.048813  0.099929  0.508066
    D  0.200248  0.744154  0.192892  0.700845  0.293228
    E  0.774479  0.005109  0.112858  0.110954  0.247668
    F  0.023236  0.727321  0.340035  0.197503  0.909180
    '''
    
    # 为 dataframe 添加一个新的索引
    # 可以看到 自动 扩充为 nan
    print(df1.reindex(index=['A', 'B', 'C', 'D', 'E', 'F']))
    ''' 自动填充为 nan
             c1        c2        c3        c4        c5
    A  0.700437  0.844187  0.676514  0.727858  0.951458
    B  0.012703  0.413588  0.048813  0.099929  0.508066
    C       NaN       NaN       NaN       NaN       NaN
    D  0.200248  0.744154  0.192892  0.700845  0.293228
    E  0.774479  0.005109  0.112858  0.110954  0.247668
    F  0.023236  0.727321  0.340035  0.197503  0.909180
    '''
    
    # 扩充列, 也是一样的
    print(df1.reindex(columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6']))
    '''
             c1        c2        c3        c4        c5  c6
    A  0.700437  0.844187  0.676514  0.727858  0.951458 NaN
    B  0.012703  0.413588  0.048813  0.099929  0.508066 NaN
    D  0.200248  0.744154  0.192892  0.700845  0.293228 NaN
    E  0.774479  0.005109  0.112858  0.110954  0.247668 NaN
    F  0.023236  0.727321  0.340035  0.197503  0.909180 NaN
    '''
    
    # 减小 index
    print(s1.reindex(['A', 'B']))
    ''' 相当于一个切割效果
    A    1
    B    2
    dtype: int64
    '''
    
    print(df1.reindex(index=['A', 'B']))
    ''' 同样是一个切片的效果
             c1        c2        c3        c4        c5
    A  0.601977  0.619927  0.251234  0.305101  0.491200
    B  0.244261  0.734863  0.569936  0.889996  0.017936
    '''
    
    # 对于一个 serie 来说,可以使用 drop,来丢掉某些 index
    print(s1.drop('A'))
    ''' 就只剩下 三个了
    B    2
    C    3
    D    4
    dtype: int64
    '''
    
    # dataframe drop(A) 直接去掉一行
    print(df1.drop('A', axis=0))
    ''' axis 默认 是 行
             c1        c2        c3        c4        c5
    B  0.571883  0.254364  0.530883  0.295224  0.352663
    D  0.858452  0.379495  0.593284  0.786078  0.949718
    E  0.556276  0.643187  0.808664  0.289422  0.501041
    F  0.737993  0.286072  0.332714  0.873371  0.421615
    '''
    
    print(df1.drop('c1', axis=1))
    ''' 将 c1 的列 去掉
             c2        c3        c4        c5
    A  0.326681  0.247832  0.601982  0.145905
    B  0.373961  0.393819  0.439284  0.926706
    D  0.558490  0.617851  0.461280  0.373102
    E  0.030434  0.566498  0.383103  0.739243
    F  0.982220  0.989826  0.957863  0.411514
    '''
  • 相关阅读:
    JZOJ5809 数羊
    P3313 [SDOI2014]旅行
    2019.10.22 NOIP模拟测试 day2
    P4322 [JSOI2016]最佳团体
    P1850 换教室
    P3225 [HNOI2012]矿场搭建
    P2607 [ZJOI2008]骑士
    2019.10.21 NOIP模拟测试 day1
    AFO
    禁止加载浏览器图片
  • 原文地址:https://www.cnblogs.com/vincent-sh/p/12861591.html
Copyright © 2011-2022 走看看