zoukankan      html  css  js  c++  java
  • pandas 模块

    In [46]:
     
    import numpy as np
    import pandas as pd
    np.add()/df.add() 加法运算    +
    np.aubtract()/df.aubtract()  减法运算  -
    np.megative() 负数运算 -
    np.multiply()  乘法运算 *
    np.divide()    除法运算 /
    np.floor_dicide()  向下整除 //
    np.power()     指数运算 **
    np.mod()      求余数 %
    np.abs()    求绝对值
    np.sin() ,cos(), tan()  求正弦  余弦 正切
    np.exp(x) e为底的x次方  np.exp2(x),2为底的x次方
    np.log(x) e为底对x开根号  np.log2(x) 2为底
    np.sum() min()  max() 求和   求最小值  求最大值   axis=0 列   axis=1  行
    np.prod() 计算元素的积
    np.mean() 计算元素的平均值
    np.std()  计算元素的标准差
    np.var() 计算元素的方差
    np.argmin()找出最小值的索引
    np.median()计算元素的中位数
    np.any()  验证是否存在元素为真
    np.all()    验证所有元素是否为真

    Out[46]:

    3.0
    In [2]:
    data=pd.Series([0.25,0.5,0.75,1.0])#Series是一个带索引的一维数组
    data
    Out[2]:
    0    0.25
    1    0.50
    2    0.75
    3    1.00
    dtype: float64
    In [3]:
    data.index#可获取索引与值
    Out[3]:
    RangeIndex(start=0, stop=4, step=1)
    In [4]:
    data.values
    Out[4]:
    array([0.25, 0.5 , 0.75, 1.  ])
    In [5]:
    data[0]#可切片
    Out[5]:
    0.25
    In [6]:
    data.index=["a","b","c","d"]#可定义索引
    data
    Out[6]:
    a    0.25
    b    0.50
    c    0.75
    d    1.00
    dtype: float64
    In [7]:
    population_dic={"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995}
    population=pd.Series(population_dic)
    population
     
    Out[7]:
    california    423967
    florida       170312
    new york      141297
    texas         695663
    tllinois      149995
    dtype: int64
    In [8]:
    area_dict={"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995}
    aeea=pd.Series(area_dict)
    aeea
    Out[8]:
    california    423967
    florida       170312
    new york      141297
    texas         695663
    tllinois      149995
    dtype: int64
    In [9]:
    states=pd.DataFrame({"population":population,"aeea":aeea})#DataFrom是特殊的二维字典,一个标题行对应一列数据,创建时用字典格式创建
    states
     
    Out[9]:
     aeeapopulation
    california 423967 423967
    florida 170312 170312
    new york 141297 141297
    texas 695663 695663
    tllinois 149995 149995
    In [10]:
    states.index
    Out[10]:
    Index(['california', 'florida', 'new york', 'texas', 'tllinois'], dtype='object')
    In [11]:
    states.columns
    Out[11]:
    Index(['aeea', 'population'], dtype='object')
    In [12]:
    states["aeea"]
    Out[12]:
    california    423967
    florida       170312
    new york      141297
    texas         695663
    tllinois      149995
    Name: aeea, dtype: int64
    In [13]:
    pd.DataFrame(population,columns=["population"])#创建单列

    Out[13]:

     population
    california 423967
    florida 170312
    new york 141297
    texas 695663
    tllinois 149995
    In [14]:
    data=[{"a":i,"b":i*2}for i in range(5)]
    pd.DataFrame(data)
    Out[14]:
     ab
    0 0 0
    1 1 2
    2 2 4
    3 3 6
    4 4 8
    In [15]:
    pd.DataFrame(np.random.rand(3,2),index=(["a","b","c"]),columns=["foo","bar"])
    Out[15]:
     foobar
    a 0.196654 0.866753
    b 0.175826 0.056842
    c 0.356369 0.272122
    In [16]:
    data=pd.Series([0.25,0.5,0.75,1.0],index=["a","b","c","d"])#Series是一个带索引的一维数组
    data
     
    Out[16]:
    a    0.25
    b    0.50
    c    0.75
    d    1.00
    dtype: float64
    In [17]:
    data["e"]=1.25#添加数据
    data
    Out[17]:
    a    0.25
    b    0.50
    c    0.75
    d    1.00
    e    1.25
    dtype: float64
    In [18]:
    #切片应用
    data["a":"c"]
    Out[18]:
    a    0.25
    b    0.50
    c    0.75
    dtype: float64
    In [19]:
    data[0:2]
    Out[19]:
    a    0.25
    b    0.50
    dtype: float64
    In [20]:
    data[(data>0.3)&(data<0.8)]

    Out[20]:

    b    0.50
    c    0.75
    dtype: float64
    In [21]:
    data=pd.Series(["a","b","c"],index=[1,3,5])
    data
    Out[21]:
    1    a
    3    b
    5    c
    dtype: object
    In [22]:
    data.loc[1]#看得见的索引
    Out[22]:
    'a'
    In [23]:
    data.loc[1:3]
    Out[23]:
    1    a
    3    b
    dtype: object
    In [24]:
    data.iloc[1]#看不见的索引

    Out[24]:

    'b'
    In [25]:
    data.iloc[1:3]
    Out[25]:
    3    b
    5    c
    dtype: object
    In [26]:
    #DataFrame的数据选择方法
    area=pd.Series({"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995})
    pop=pd.Series({"california":33323967,"texas":33395663,"new york":11111297,"florida":22220312,"tllinois":22229995})
    data=pd.DataFrame({"area":area,"pop":pop})
    data
    Out[26]:
     areapop
    california 423967 33323967
    florida 170312 22220312
    new york 141297 11111297
    texas 695663 33395663
    tllinois 149995 22229995
    In [27]:
    data["area"]

    Out[27]:

    california    423967
    florida       170312
    new york      141297
    texas         695663
    tllinois      149995
    Name: area, dtype: int64
    In [28]:
    data.area
    Out[28]:
    california    423967
    florida       170312
    new york      141297
    texas         695663
    tllinois      149995
    Name: area, dtype: int64
    In [29]:
    data["density"]=data["pop"]/data["area"]#增加一列

    data
    Out[29]:
     areapopdensity
    california 423967 33323967 78.600379
    florida 170312 22220312 130.468270
    new york 141297 11111297 78.637883
    texas 695663 33395663 48.005518
    tllinois 149995 22229995 148.204907
    In [30]:
    data.values#查看数据
    Out[30]:
    array([[4.23967000e+05, 3.33239670e+07, 7.86003793e+01],
           [1.70312000e+05, 2.22203120e+07, 1.30468270e+02],
           [1.41297000e+05, 1.11112970e+07, 7.86378833e+01],
           [6.95663000e+05, 3.33956630e+07, 4.80055185e+01],
           [1.49995000e+05, 2.22299950e+07, 1.48204907e+02]])
    In [31]:
    data.T#转置

    Out[31]:

     californiafloridanew yorktexastllinois
    area 4.239670e+05 1.703120e+05 1.412970e+05 6.956630e+05 1.499950e+05
    pop 3.332397e+07 2.222031e+07 1.111130e+07 3.339566e+07 2.223000e+07
    density 7.860038e+01 1.304683e+02 7.863788e+01 4.800552e+01 1.482049e+02
    In [32]:
    data.loc[data.density>100,["pop","density"]]
    Out[32]:
     popdensity
    florida 22220312 130.468270
    tllinois 22229995 148.204907
    In [33]:
    data.iloc[0,2]=90#修改数据
    data
    Out[33]:
     areapopdensity
    california 423967 33323967 90.000000
    florida 170312 22220312 130.468270
    new york 141297 11111297 78.637883
    texas 695663 33395663 48.005518
    tllinois 149995 22229995 148.204907
    In [34]:
    data[data.density>100]#掩码过滤

    Out[34]:

     areapopdensity
    florida 170312 22220312 130.468270
    tllinois 149995 22229995 148.204907
    In [35]:
    #pandas 的数值运算方法
    import numpy as np
    import pandas as pd
    In [36]:
    rng=np.random.RandomState(42)
    ser=pd.Series(rng.randint(0,10,4))
    ser
     

    Out[36]:

    0    6
    1    3
    2    7
    3    4
    dtype: int32
    In [38]:
    df=pd.DataFrame(rng.randint(0,10,(3,4)),columns=["A","B","C","D"])
    df
     
    Out[38]:
     ABCD
    0 1 7 5 1
    1 4 0 9 5
    2 8 0 9 2
    In [39]:
    np.exp(ser)
    Out[39]:
    0     403.428793
    1      20.085537
    2    1096.633158
    3      54.598150
    dtype: float64
    In [40]:
    np.sin(df*np.pi/4)
    Out[40]:
     ABCD
    0 7.071068e-01 -0.707107 -0.707107 0.707107
    1 1.224647e-16 0.000000 0.707107 -0.707107
    2 -2.449294e-16 0.000000 0.707107 1.000000
    In [45]:
    ser//2
    Out[45]:
    0    3
    1    1
    2    3
    3    2
    dtype: int32
  • 相关阅读:
    EasyBPM进销存之物料管理
    水厂流程三维场景可视化解决方案
    构造器
    可变形参
    重写
    拥塞处理(一)——拥塞处理的历史概述
    idea的各种乱码问题
    MySQL的主键也想使用uuid
    使用spring security明文密码校验时报错-BadCredentialsException: Bad credentials
    ValueError: check_hostname requires server_hostname
  • 原文地址:https://www.cnblogs.com/Koi504330/p/11909272.html
Copyright © 2011-2022 走看看