zoukankan      html  css  js  c++  java
  • Series对象与DataFrame对象

    import pandas as pd
    import numpy as np
    
    #创建Series对象的方法
    
    # #指定index,可以不按顺序,不连续:pd.Series(data, index=index)
    
    x = pd.Series([1,2,3,4], index=[3,4,5,6])
    print(x)
    
    #data可以为标量,类似广播
    x = pd.Series("Hanks" ,index = [1,2,4,5])
    print(x)
    
    #data可以是字典,index默认是排序的字典键,series对象仅仅保留index定义的key-value对
    x = pd.Series({3:'c',2:'b',1:'a'},index=[2,3])
    print(x)
    
    
    #创建DataFrame对象的方法
    population = {'henan':1000,'shandong':200,'hubei':400}
    area = {'henan':98,'shandong':900,'hubei':4000}
    
    population = pd.Series(population)#dataframe里的对象必须是series
    province = pd.DataFrame({'population':population,'area':area})
    
    print(province)
    
    print(province['area'])#与一般多维数组不同,该操作返回的是一列
    
    #二维数组建立dataframe对象
    abc = pd.DataFrame(np.random.rand(3, 2), columns=['foo', 'bar'],
                              index=['a', 'b', 'c'])
    
    print(abc)
    
    
    #index:不可变数组 not mutable 
    x = pd.Index([3,2,5,9])
    x[4] = 5#该句子报错:Index does not support mutable operations
    print(x)
    y = pd.Index([4,6,9,23,3])
    print(x & y)#交集
    print(x | y)#并集
    print(x ^ y)#差集
    
    #索引器:loc,iloc和ix
    data = pd.Series(['a','b','c'] , index=[1,3,5])
    print(data[3])#显式索引
    print(data[1:3])#隐式索引
    
    print(data.loc[1:3])#显式索引
    print(data.iloc[1:3])#隐式索引
    
    #dataframe的取值方法
    area = pd.Series({'California': 423967, 'Texas': 695662, 'New York': 141297, 'Florida': 170312,
    'Illinois': 149995})
    pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                                  'New York': 19651127, 'Florida': 19552860,
    'Illinois': 12882135})
    data = pd.DataFrame({'area':area, 'pop':pop})
    
    print(data.area,data.pop)
    # #属性获取方法并不总是有用,当列名与方法名重合或者不全为字符串时,不可使用属性方法
    print(data.area is data['area'])
    
    data['density'] = data['pop']/data['area']
    print(data.values)
    print(data.T)
    
    #iloc
    print(data.iloc[:3,:2])
    #loc
    print(data.loc[:'Illinois',:'pop'])
    #ix:混合,该功能已经被移除
    print(data.ix[:3,:'pop'])
    
    
    x = np.random.RandomState(43)
    print(x)
    #pandas计算:一元运算保留索引和列标签;二元计算自动对其索引进行计算
    
    area = pd.Series({'Alaska': 1723337, 'Texas': 695662, 'California': 423967}, name='area')
    population = pd.Series({'California': 38332521, 'Texas': 26448193, 'New York': 19651127}, name='population')
    
    print(area/population)#索引对齐
    x = area/population
    
    A = np.random.randint(10,size=(3,4))
    print(A)
    print(A - A[0])
    
    print(x.isnull())
    print(x[x.notnull()])
    print(x.dropna())
    print(x)
    
    
    df = pd.DataFrame([[1, np.nan, 2]
                        ,[2, 3 , 5]
                        ,[np.nan , 4 ,6]])
    print(df.dropna())
    print(df.dropna(axis='columns'))
    
    df[3] = np.nan
    print(df)
    print(df.dropna(axis='columns' , how='all'))
    print(df.dropna(axis='rows' , thresh=3))
    
    #全局填充
    print(df.fillna(9999))
    #前值填充
    print(df.fillna(method='ffill',axis=1))
    #后值填充
    print(df.fillna(method='bfill',axis=1))
     
  • 相关阅读:
    C++ sort()函数的用法
    对C++里面 的知识积累:
    codevs 1160
    hdu 1020 Encoding
    poj 2591 Set Definition
    hdu 1505,1506
    hdu 1284 钱币兑换
    hdu 1231 最大连续子序列 ,1003 Max Sum;
    尺取法
    android OTA package packing and extract to partition
  • 原文地址:https://www.cnblogs.com/maggie94/p/14157265.html
Copyright © 2011-2022 走看看