zoukankan      html  css  js  c++  java
  • Numpy数据处理函数

    Numpy函数介绍

    import numpy as np
    #sqrt 计算各元素的平方根
    arr = np.arange(10)
    np.sqrt(arr)
    array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
           2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])
    
    # square 计算各元素的平方
    arr1 = np.arange(10)
    np.square(arr1)
    array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81], dtype=int32)
    
    
    
    # modf将数组的小数和整数部分以两个独立数组的形式返回
    arr2 = np.array([1.22,3.55])
    np.modf(arr2)
    (array([0.22, 0.55]), array([1., 3.]))
    
    

    利用数组进行数据处理

    Numpy数组表达式代替循环,可以比等价的Python快上一两个数量级,这就是矢量化计算的强大手段。

    points = np.arange(-5,5,0.01) # 1000个间隔相等的点
    xs,ys = np.meshgrid(points,points)
    xs
    array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
           [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
           [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
           ...,
           [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
           [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
           [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])
    ys
    array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
           [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
           [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
           ...,
           [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
           [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
           [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
    
    
    z = np.sqrt(xs ** 2 + ys ** 2)
    z
    array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
            7.06400028],
           [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
            7.05692568],
           [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
            7.04985815],
           ...,
           [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
            7.04279774],
           [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
            7.04985815],
           [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
            7.05692568]])
    
    import matplotlib.pyplot as plt
    plt.imshow(z, cmap = plt.cm.gray);plt.colorbar()
    plt.title("Image plot")
    

    阴影

    将条件逻辑表述为数组运算

    xarr = np.array([1.1,1.2,1.3,1.4,1.5])
    yarr = np.array([2.1,2.2,2.3,2.4,2.5])
    cond = np.array([True,False,True,True,False])
    # cond为True取xarr 否则取yarr
    # 传统写法
    result = [(x if c else y) for x,y,c in zip(xarr,yarr,cond)]
    result
    [1.1, 2.2, 1.3, 1.4, 2.5]
    
    
    # numpy做法 
    result1 = np.where(cond,xarr,yarr)
    result1
    array([1.1, 2.2, 1.3, 1.4, 2.5])
    

    where闪亮登场

    • np.where(条件,真值,假值)
    • 传递给where的数组大小可以不相等,甚至是标量值
    arr3 = np.random.randn(4, 4)
    arr3
    array([[ 0.6498161 ,  0.35784392, -1.47023858,  1.09367264],
           [-0.62756846,  0.23898718,  1.41371883,  0.48955242],
           [-0.10017446,  0.24327529,  0.04354429,  0.80346031],
           [-0.74234979, -0.11921036, -0.11432723, -0.37912988]])
    
    
    result2 = np.where(arr3>0,2,-2)
    result2
    array([[ 2,  2, -2,  2],
           [-2,  2,  2,  2],
           [-2,  2,  2,  2],
           [-2, -2, -2, -2]])
    result3 = np.where(arr3>0,2,arr3)
    result3
    array([[ 2.        ,  2.        , -1.47023858,  2.        ],
           [-0.62756846,  2.        ,  2.        ,  2.        ],
           [-0.10017446,  2.        ,  2.        ,  2.        ],
           [-0.74234979, -0.11921036, -0.11432723, -0.37912988]])
    
    cond1 = np.array([True,True,False,False])
    cond2 = np.array([True,False,True,False])
    
    # 如果cond1和cond2都为真,则输出0,如果cond1为真,则输出1,如果cond2为真,则输出2,如果都是为假,则输出3
    result4 = np.where(cond1 & cond2, 0, np.where(cond1, 1, np.where(cond2, 2, 3)))
    result4
    array([0, 1, 2, 3])
    

    练习

    数学和统计方法

    • sum/mean/std等聚合计算(和,平均值,标准差)
    test1 = np.array([[2,2,3,4,5],[6,7,8,9,10]])
    test1
    
    array([[ 2,  2,  3,  4,  5],
           [ 6,  7,  8,  9, 10]])
    np.mean(test1)
    5.5
    
    test1.sum()
    55
    
    test1.mean(axis=1)
    array([3., 8.])
    
    test1.mean(axis=0)
    array([3.5, 4.5, 5.5, 6.5, 7.5])
    
    test1.cumsum(0) # cumsum 所有元素的累计和 cumprod 所有元素的累计积
    array([[ 2,  2,  3,  4,  5],
           [ 8,  9, 11, 13, 15]], dtype=int32)
    - axis轴,指的是维度
    
    test1.std(axis=0) # 标准差
    array([2. , 2.5, 2.5, 2.5, 2.5])
    
    test1.var(axis=0) # 方差
    array([4.  , 6.25, 6.25, 6.25, 6.25])
    
    test1.argmin(axis=0)
    array([0, 0, 0, 0, 0], dtype=int64)
    
    test1.argmax(axis=0)
    array([1, 1, 1, 1, 1], dtype=int64)
    

    用于布尔型数组的方法

    test2 = np.random.randn(100)
    test2
    array([ 0.25903273,  0.13939567, -0.10597059,  0.77790221, -0.76341781,
           -0.61086865,  0.36002937, -0.1423488 ,  1.69071728,  1.63576044,
            0.39950296,  1.37198449,  0.454591  , -1.67927663,  0.73649581,
            0.88126353, -1.82159175, -0.46323513, -0.30399076,  2.16435963,
           -0.79892847,  0.05767935,  0.45429729,  0.96934967, -0.78818112,
           -0.8438922 ,  0.31373184,  0.05242094,  0.2332054 ,  0.26647064,
            1.56850088,  0.41425585, -1.62452194, -1.17165311,  0.23586585,
            0.45476575, -0.57501697,  1.42377017,  0.00666962,  1.53916711,
            0.508553  , -1.37573917,  0.51378532,  1.72682708, -0.76148258,
           -1.19819233, -1.05367328,  1.0792924 ,  0.80229908,  1.03273504,
            0.71938515,  0.28893472, -0.08472809,  1.02170717,  0.03897593,
           -0.0693723 , -0.60612239, -0.35538122, -1.09975843,  0.23485432,
           -0.4513678 , -0.8119979 , -0.53072714,  1.02247374,  0.52980399,
           -1.17365366, -0.4948684 , -0.81596822,  1.10386231, -1.10894077,
            1.33491691,  0.21015349, -0.32206128, -0.33041407, -0.06815369,
            2.27874416, -0.26642346, -0.95616127, -1.38222481, -0.89619146,
            2.70433   , -1.8758817 , -1.61408998, -0.70112051,  0.63143197,
           -0.5937125 , -0.82650637,  1.24456287, -0.61903984, -0.45140393,
            0.25139079, -0.18882441, -0.61667939,  0.84566077, -1.08506887,
           -0.45491845, -1.68915454,  0.58872177, -0.30961048, -0.43431663])
    
    (test2 > 0).sum()
    49
    
    # any用于检测数组中是否存在True
    bools = np.array([False,False,False,False])
    bools.any()
    False
    
    # all用于检测数组中所有值是否都是True
    bools.all()
    False
    

    排序

    sort()方法直接修改数组本身

    test2 = np.array([11,55,33,44,88])
    test2
    array([11, 55, 33, 44, 88])
    
    test2.sort()
    test2
    array([11, 33, 44, 55, 88])
    
    test4 = np.array([[82,222,1,4,5],[62,72,8,93,10]])
    test4
    array([[ 82, 222,   1,   4,   5],
           [ 62,  72,   8,  93,  10]])
    
    
    # 最终的值排序
    test4.sort(1)
    test4
    array([[  1,   4,   5,  62,  72],
           [  8,  10,  82,  93, 222]])
    
    # 两个位于同一0维度的值排序
    test4.sort(0)
    test4
    
    array([[  1,   4,   5,  62,  72],
           [  8,  10,  82,  93, 222]])
    

    唯一化以及其他的集合逻辑

    np.unique找出唯一值并返回已排序的结果

    names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
    np.unique(names)
    array(['Bob', 'Joe', 'Will'], dtype='<U4')
    

    是1不是l

    方法 说明
    unique(x) 计算x中的唯一元素,并返回有序结果
    intersect1d(x,y) x和y的公共元素
    union1d(x,y) 计算x和y的并集
    in1d(x,y) 得到一个表示"x的元素是否包含于y"的布尔型数组
    setdiff1d(x,y) 集合的差,即元素在x中且不再y中
    setxor1d(x,y) 集合的对称差,存在一个数组中,但不同时存在于两个数组中的元素
    values = np.array([6,0,0,3,2,5,6])
    np.in1d(values,[2,3,6])
    array([ True, False, False,  True,  True, False,  True])
    
  • 相关阅读:
    NAT基本原理及应用
    端口转发和端口映射的区别
    Xshell不能连接Kali系统SSH的解决
    PowerSploit
    powertool
    Windows/Linux 下反弹shell
    Apache Shiro 反序列化漏洞复现(CVE-2016-4437)
    渗透测试神器Cobalt Strike使用教程
    Notepad++ 小技巧
    Linux:Day44(上)
  • 原文地址:https://www.cnblogs.com/lishi-jie/p/9851568.html
Copyright © 2011-2022 走看看