zoukankan      html  css  js  c++  java
  • numpy and pandas

    import numpy as np
    array=np.array([[1,2,3]
                  ,[4,5,6]], dtype=np.int)
    print(array)
    print(array.ndim)  #是数组有几个维度
    print(array.size)  #有多少个元素
    print(array.shape) #维度大小,array.shape[0]为2,array.shape[1]为3
    print(array.dtype) #数据类型
    print(len(array))  #行数
    print(array.reshape(1,6)) #改变形状
    a=np.arange(4) #左闭右开
    print(a)
    print(a<2) #布尔mask

    输出:

    [[1 2 3]
     [4 5 6]]
    2
    6
    (2, 3)
    int32
    2
    [[1 2 3 4 5 6]]
    [0 1 2 3]
    [ True  True False False]
    #numpy dot multiply and matrix multiply
    a=np.ones((2,2))
    b=np.array([[1,2],[3,4]])
    print(a*b)  #对应元素相城
    print(np.dot(a,b))  #矩阵乘法
    print(a.dot(b))     #矩阵乘法

    输出

    [[1. 2.]
     [3. 4.]]
    [[4. 6.]
     [4. 6.]]
    [[4. 6.]
     [4. 6.]]
    a =np.random.random((2,4))
    print(a)
    print(np.sum(a, axis=1)) #0 按列求和; 1 按行求和(0是竖方向,1是横方向)
    print(np.min(a, axis=0))
    print(np.max(a, axis=1))

    输出

    [[0.64231375 0.18094835 0.1069843  0.99325735]
     [0.58826183 0.45672993 0.31294667 0.56790276]]
    [1.92350376 1.92584119]
    [0.58826183 0.18094835 0.1069843  0.56790276]
    [0.99325735 0.58826183]
    A =np.arange(2,14).reshape((3,4))
    print(A)
    print(np.mean(A))   #均值
    print(np.median(A)) #中位数
    print(np.argmin(A)) #最小数字的索引
    print(np.argmax(A)) # 索引
    print(np.cumsum(A)) # 依次累加
    print(np.diff(A))   # 前后差
    print(np.sort(A))  #逐行排序
    print(np.transpose(A)) #转置
    print(A.T)
    print(np.clip(A, 3, 5)) # 截断,min, max
    
    for col in A.T:
        print(col)
    print(A.flatten())
    for item in A.flat: # A.flat 得到一个迭代器
        print(item)

    输出

    [[ 2  3  4  5]
     [ 6  7  8  9]
     [10 11 12 13]]
    7.5
    7.5
    0
    11
    [ 2  5  9 14 20 27 35 44 54 65 77 90]
    [[1 1 1]
     [1 1 1]
     [1 1 1]]
    [[ 2  3  4  5]
     [ 6  7  8  9]
     [10 11 12 13]]
    [[ 2  6 10]
     [ 3  7 11]
     [ 4  8 12]
     [ 5  9 13]]
    [[ 2  6 10]
     [ 3  7 11]
     [ 4  8 12]
     [ 5  9 13]]
    [[3 3 4 5]
     [5 5 5 5]
     [5 5 5 5]]
    [ 2  6 10]
    [ 3  7 11]
    [ 4  8 12]
    [ 5  9 13]
    [ 2  3  4  5  6  7  8  9 10 11 12 13]
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # pandas 做个性化筛选
    import pandas as pd
    import numpy as np
    
    dates=pd.date_range('20201004',periods=6)
    df=pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A','B','C','D'])
    
    # print(df['A'],df.A)  #根据行和列的名称来索引
    # print(df[0:3])
    
    # loc 通过标签名称索引
    print(df.loc['2020-10-05'])
    print(df.loc[:,['A','D']])
    
    # iloc 通过下标进行索引
    print(df.iloc[[1,3,5],1:3])
    
    # 更改元素值
    df.iloc[2,2]=555
    df.loc['2020-10-05','A']=999
    print(df)
    
    df.B[df.A>16]=0
    print(df)
    
    # 处理为Nan的数据
    df.iloc[2,3]=np.nan
    df.iloc[1,2]=np.nan
    print(df)
    
    print(df.dropna(axis=0, how='any')) # how ={'any','all'}
    print(df.fillna(value=0))
    print(df.isnull())
    print(np.any(df.isnull())==True)

    输出

    A    4
    B    5
    C    6
    D    7
    Name: 2020-10-05 00:00:00, dtype: int32
                 A   D
    2020-10-04   0   3
    2020-10-05   4   7
    2020-10-06   8  11
    2020-10-07  12  15
    2020-10-08  16  19
    2020-10-09  20  23
                 B   C
    2020-10-05   5   6
    2020-10-07  13  14
    2020-10-09  21  22
                  A   B    C   D
    2020-10-04    0   1    2   3
    2020-10-05  999   5    6   7
    2020-10-06    8   9  555  11
    2020-10-07   12  13   14  15
    2020-10-08   16  17   18  19
    2020-10-09   20  21   22  23
                  A   B    C   D
    2020-10-04    0   1    2   3
    2020-10-05  999   0    6   7
    2020-10-06    8   9  555  11
    2020-10-07   12  13   14  15
    2020-10-08   16  17   18  19
    2020-10-09   20   0   22  23
                  A   B      C     D
    2020-10-04    0   1    2.0   3.0
    2020-10-05  999   0    NaN   7.0
    2020-10-06    8   9  555.0   NaN
    2020-10-07   12  13   14.0  15.0
    2020-10-08   16  17   18.0  19.0
    2020-10-09   20   0   22.0  23.0
                 A   B     C     D
    2020-10-04   0   1   2.0   3.0
    2020-10-07  12  13  14.0  15.0
    2020-10-08  16  17  18.0  19.0
    2020-10-09  20   0  22.0  23.0
                  A   B      C     D
    2020-10-04    0   1    2.0   3.0
    2020-10-05  999   0    0.0   7.0
    2020-10-06    8   9  555.0   0.0
    2020-10-07   12  13   14.0  15.0
    2020-10-08   16  17   18.0  19.0
    2020-10-09   20   0   22.0  23.0
                    A      B      C      D
    2020-10-04  False  False  False  False
    2020-10-05  False  False   True  False
    2020-10-06  False  False  False   True
    2020-10-07  False  False  False  False
    2020-10-08  False  False  False  False
    2020-10-09  False  False  False  False
    True
  • 相关阅读:
    对我影响最大的老师
    介绍自己
    JavaScript 时间特效 显示当前时间
    js 获取函数的所有参数名
    node.js 在函数内获取当前函数
    js 实现二叉排序树
    命令行下mysql的部分操作
    浅析js的函数的按值传递参数
    返回上一页时,保存恢复浏览记录(模拟返回不刷新)
    让mongodb执行js文件
  • 原文地址:https://www.cnblogs.com/lemonzhang/p/13781569.html
Copyright © 2011-2022 走看看