zoukankan      html  css  js  c++  java
  • 科学计算库numpy

    老唐数据分析机器学习
    numpy1
    import numpy
    
    world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str)
    print(type(world_alcohol))
    print (world_alcohol) 
    #print (help(numpy.genfromtxt)) #帮助文档
    '''
    <class 'numpy.ndarray'>
    [['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value']
     ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
     ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
     ...
     ['1987' 'Africa' 'Malawi' 'Other' '0.75']
     ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
     ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
    '''
    
    #The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
    vector = numpy.array([5, 10, 15, 20])
    #When we input a list of lists, we get a matrix as a result:
    matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
    print (vector)
    print (matrix)
    '''
    [ 5 10 15 20]
    [[ 5 10 15]
     [20 25 30]
     [35 40 45]]
    '''
    
    #We can use the ndarray.shape property to figure out how many elements are in the array
    vector = numpy.array([1, 2, 3, 4])
    print(vector.shape)
    #For matrices, the shape property contains a tuple with 2 elements.
    matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
    print(matrix.shape)
    '''
    (4,)
    (2, 3)
    '''
    
    #Each value in a NumPy array has to have the same data type
    #NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays. 
    #You can check the data type of a NumPy array using the dtype property.
    numbers = numpy.array([1, 2, 3, 4])
    numbers.dtype
    '''
    dtype('int32')
    '''
    
    #When NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for Not a Number
    #nan is the missing data
    #1.98600000e+03 is actually 1.986 * 10 ^ 3
    world_alcohol
    '''
    array([['Year', 'WHO region', 'Country', 'Beverage Types',
            'Display Value'],
           ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
           ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
           ...,
           ['1987', 'Africa', 'Malawi', 'Other', '0.75'],
           ['1989', 'Americas', 'Bahamas', 'Wine', '1.5'],
           ['1985', 'Africa', 'Malawi', 'Spirits', '0.31']], dtype='<U52')
    '''
    
    world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
    print(world_alcohol)
    '''
    [['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
     ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
     ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
     ...
     ['1987' 'Africa' 'Malawi' 'Other' '0.75']
     ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
     ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
    '''
    
    uruguay_other_1986 = world_alcohol[1,4]
    third_country = world_alcohol[2,2]
    print (uruguay_other_1986)
    print (third_country)
    '''
    0.5
    Cte d'Ivoire
    '''
    
    vector = numpy.array([5, 10, 15, 20])
    print(vector[0:3])  
    '''
    [ 5 10 15]
    '''
    
    matrix = numpy.array([
                        [5, 10, 15], 
                        [20, 25, 30],
                        [35, 40, 45]
                     ])
    print(matrix[:,1])
    '''
    [10 25 40]
    '''
    
    matrix = numpy.array([
                        [5, 10, 15], 
                        [20, 25, 30],
                        [35, 40, 45]
                     ])
    print(matrix[:,0:2])
    '''
    [[ 5 10]
     [20 25]
     [35 40]]
    '''
    
    matrix = numpy.array([
                        [5, 10, 15], 
                        [20, 25, 30],
                        [35, 40, 45]
                     ])
    print(matrix[1:3,0:2])
    '''
    [[20 25]
     [35 40]]
    '''
    numpy2
    
    import numpy
    #it will compare the second value to each element in the vector
    # If the values are equal, the Python interpreter returns True; otherwise, it returns False
    vector = numpy.array([5, 10, 15, 20])
    vector == 10
    '''
    array([False,  True, False, False])
    '''
    
    matrix = numpy.array([
                        [5, 10, 15], 
                        [20, 25, 30],
                        [35, 40, 45]
                     ])
    matrix == 25
    '''
    array([[False, False, False],
           [False,  True, False],
           [False, False, False]])
    '''
    
    #Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
    vector = numpy.array([5, 10, 15, 20])
    equal_to_ten = (vector == 10)
    print (equal_to_ten)
    print(vector[equal_to_ten])
    '''
    [False  True False False]
    [10]
    '''
    
    matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
    second_column_25 = (matrix[:,1] == 25)
    print (second_column_25)
    print(matrix[second_column_25, :])
    '''
    [False  True False]
    [[20 25 30]]
    '''
    
    #We can also perform comparisons with multiple conditions
    vector = numpy.array([5, 10, 15, 20])
    equal_to_ten_and_five = (vector == 10) & (vector == 5)
    print (equal_to_ten_and_five)
    '''
    [False False False False]
    '''
    
    vector = numpy.array([5, 10, 15, 20])
    equal_to_ten_or_five = (vector == 10) | (vector == 5)
    print (equal_to_ten_or_five)
    '''
    [ True  True False False]
    '''
    
    vector = numpy.array([5, 10, 15, 20])
    equal_to_ten_or_five = (vector == 10) | (vector == 5)
    vector[equal_to_ten_or_five] = 50
    print(vector)
    '''
    [50 50 15 20]
    '''
    
    matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
    second_column_25 = matrix[:,1] == 25
    print (second_column_25)
    matrix[second_column_25, 1] = 10
    print (matrix)
    '''
    [False  True False]
    [[ 5 10 15]
     [20 10 30]
     [35 40 45]]
    '''
    
    #We can convert the data type of an array with the ndarray.astype() method.
    vector = numpy.array(["1", "2", "3"])
    print (vector.dtype)
    print (vector)
    vector = vector.astype(float)
    print (vector.dtype)
    print (vector)
    '''
    <U1
    ['1' '2' '3']
    float64
    [1. 2. 3.]
    '''
    
    vector = numpy.array([5, 10, 15, 20])
    vector.sum()
    '''
    50
    '''
    
    # The axis dictates which dimension we perform the operation on
    #1 means that we want to perform the operation on each row, and 0 means on each column
    matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
    matrix.sum(axis=1)
    '''
    array([ 30,  75, 120])
    '''
    
    matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
    matrix.sum(axis=0)
    '''
    array([60, 75, 90])
    '''
    
    #replace nan value with 0
    world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
    #print world_alcohol
    is_value_empty = numpy.isnan(world_alcohol[:,4])
    #print is_value_empty
    world_alcohol[is_value_empty, 4] = '0'
    alcohol_consumption = world_alcohol[:,4]
    alcohol_consumption = alcohol_consumption.astype(float)
    total_alcohol = alcohol_consumption.sum()
    average_alcohol = alcohol_consumption.mean()
    print (total_alcohol)
    print (average_alcohol)
    '''
    1137.78
    1.140060120240481
    '''
    numpy3
    import numpy as np
    print(np.arange(15))
    a = np.arange(15).reshape(3, 5)
    a
    '''
    [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
    array([[ 0,  1,  2,  3,  4],
           [ 5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14]])
    '''
    
    a.shape
    '''
    (3, 5)
    '''
    
    #the number of axes (dimensions) of the array
    a.ndim
    '''
    2
    '''
    
    a.dtype.name
    '''
    'int32'
    '''
    
    #the total number of elements of the array
    a.size
    '''
    15
    '''
    
    np.zeros ((3,4)) 
    '''
    array([[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]])
    '''
    
    np.ones( (2,3,4), dtype=np.int32 )
    '''
    array([[[1, 1, 1, 1],
            [1, 1, 1, 1],
            [1, 1, 1, 1]],
    
           [[1, 1, 1, 1],
            [1, 1, 1, 1],
            [1, 1, 1, 1]]])
    '''
    
    #To create sequences of numbers
    np.arange( 10, 30, 5 )
    '''
    array([10, 15, 20, 25])
    '''
    
    np.arange( 0, 2, 0.3 )
    '''
    array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])
    '''
    
    np.arange(12).reshape(4,3)
    '''
    array([[ 0,  1,  2],
           [ 3,  4,  5],
           [ 6,  7,  8],
           [ 9, 10, 11]])
    '''
    
    np.random.random((2,3))#random随机产生(-1,1)区间的数
    '''
    array([[0.06665873, 0.92526157, 0.42866618],
           [0.19151176, 0.79870056, 0.32145198]])
    '''
    
    from numpy import pi 
    #linspace取100个间隔为2*pi的数 np.linspace( 0,
    2*pi, 100 ) ''' array([0. , 0.06346652, 0.12693304, 0.19039955, 0.25386607, 0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866, 0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126, 0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385, 1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644, 1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903, 1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162, 2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421, 2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 , 2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939, 3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199, 3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458, 3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717, 4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976, 4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235, 4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494, 5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753, 5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012, 5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272, 6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531]) ''' np.sin(np.linspace( 0, 2*pi, 100 )) ''' array([ 0.00000000e+00, 6.34239197e-02, 1.26592454e-01, 1.89251244e-01, 2.51147987e-01, 3.12033446e-01, 3.71662456e-01, 4.29794912e-01, 4.86196736e-01, 5.40640817e-01, 5.92907929e-01, 6.42787610e-01, 6.90079011e-01, 7.34591709e-01, 7.76146464e-01, 8.14575952e-01, 8.49725430e-01, 8.81453363e-01, 9.09631995e-01, 9.34147860e-01, 9.54902241e-01, 9.71811568e-01, 9.84807753e-01, 9.93838464e-01, 9.98867339e-01, 9.99874128e-01, 9.96854776e-01, 9.89821442e-01, 9.78802446e-01, 9.63842159e-01, 9.45000819e-01, 9.22354294e-01, 8.95993774e-01, 8.66025404e-01, 8.32569855e-01, 7.95761841e-01, 7.55749574e-01, 7.12694171e-01, 6.66769001e-01, 6.18158986e-01, 5.67059864e-01, 5.13677392e-01, 4.58226522e-01, 4.00930535e-01, 3.42020143e-01, 2.81732557e-01, 2.20310533e-01, 1.58001396e-01, 9.50560433e-02, 3.17279335e-02, -3.17279335e-02, -9.50560433e-02, -1.58001396e-01, -2.20310533e-01, -2.81732557e-01, -3.42020143e-01, -4.00930535e-01, -4.58226522e-01, -5.13677392e-01, -5.67059864e-01, -6.18158986e-01, -6.66769001e-01, -7.12694171e-01, -7.55749574e-01, -7.95761841e-01, -8.32569855e-01, -8.66025404e-01, -8.95993774e-01, -9.22354294e-01, -9.45000819e-01, -9.63842159e-01, -9.78802446e-01, -9.89821442e-01, -9.96854776e-01, -9.99874128e-01, -9.98867339e-01, -9.93838464e-01, -9.84807753e-01, -9.71811568e-01, -9.54902241e-01, -9.34147860e-01, -9.09631995e-01, -8.81453363e-01, -8.49725430e-01, -8.14575952e-01, -7.76146464e-01, -7.34591709e-01, -6.90079011e-01, -6.42787610e-01, -5.92907929e-01, -5.40640817e-01, -4.86196736e-01, -4.29794912e-01, -3.71662456e-01, -3.12033446e-01, -2.51147987e-01, -1.89251244e-01, -1.26592454e-01, -6.34239197e-02, -2.44929360e-16]) ''' #the product operator * operates elementwise in NumPy arrays a = np.array( [20,30,40,50] ) b = np.arange( 4 ) #print (a) #print (b) #b c = a-b #print (c) b**2 #print (b**2) print (a<35) ''' [ True True False False] ''' #The matrix product can be performed using the dot function or method A = np.array( [[1,1], [0,1]] ) B = np.array( [[2,0], [3,4]] ) print (A) print (B) #print (A*B) #对应位置上的数相乘 print (A.dot(B)) #数学中的矩阵相乘 print (np.dot(A, B)) #数学中的矩阵相乘 ''' [[1 1] [0 1]] [[2 0] [3 4]] [[5 4] [3 4]] [[5 4] [3 4]] '''
    numpy4
    
    import numpy as np
    B = np.arange(3)
    print(B)
    print(np.exp(B))
    print(np.sqrt(B))
    '''
    [0 1 2]
    [1.         2.71828183 7.3890561 ]
    [0.         1.         1.41421356]
    '''
    
    #Return the floor of the input
    a = np.floor(10*np.random.random((3,4)))
    print(a)
    print('--------------')
    print(a.shape)
    print('--------------')
    ## flatten the array
    print(a.ravel()) #拉平
    print('--------------')
    a.shape = (6, 2)
    print(a)
    print('--------------')
    print(a.T) #转置
    print(a.resize((2,6)))
    print(a)
    
    #If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated:
    #a.reshape(3,-1) #用-1表示会进行自动计算
    '''
    [[1. 6. 7. 4.]
     [5. 4. 1. 0.]
     [2. 3. 9. 7.]]
    --------------
    (3, 4)
    [1. 6. 7. 4. 5. 4. 1. 0. 2. 3. 9. 7.]
    --------------
    [[1. 6.]
     [7. 4.]
     [5. 4.]
     [1. 0.]
     [2. 3.]
     [9. 7.]]
    --------------
    [[1. 7. 5. 1. 2. 9.]
     [6. 4. 4. 0. 3. 7.]]
    None
    [[1. 6. 7. 4. 5. 4.]
     [1. 0. 2. 3. 9. 7.]]
    '''
    
    a = np.floor(10*np.random.random((2,2)))
    b = np.floor(10*np.random.random((2,2)))
    print(a)
    print('---')
    print(b)
    print('---')
    print(np.hstack((a,b))) #横向拼接
    print(np.vstack((a,b))) #纵向拼接
    #np.hstack((a,b))
    '''
    [[7. 5.]
     [9. 1.]]
    ---
    [[6. 2.]
     [4. 7.]]
    ---
    [[7. 5. 6. 2.]
     [9. 1. 4. 7.]]
    [[7. 5.]
     [9. 1.]
     [6. 2.]
     [4. 7.]]
    '''
    
    a = np.floor(10*np.random.random((2,12)))
    print(a)
    print('-------------')
    print(np.hsplit(a,3)) #横向平均切分三份
    print('-------------')
    print(np.hsplit(a,(3,4)))   # Split a after the third and the fourth column
    a = np.floor(10*np.random.random((12,2)))
    print('-------------')
    print(a)
    np.vsplit(a,3) #纵向平均切分三份
    '''
    
    [[0. 8. 1. 3. 4. 7. 7. 1. 9. 8. 7. 2.]
     [4. 2. 7. 3. 9. 6. 9. 1. 7. 8. 3. 8.]]
    -------------
    [array([[0., 8., 1., 3.],
           [4., 2., 7., 3.]]), array([[4., 7., 7., 1.],
           [9., 6., 9., 1.]]), array([[9., 8., 7., 2.],
           [7., 8., 3., 8.]])]
    -------------
    [array([[0., 8., 1.],
           [4., 2., 7.]]), array([[3.],
           [3.]]), array([[4., 7., 7., 1., 9., 8., 7., 2.],
           [9., 6., 9., 1., 7., 8., 3., 8.]])]
    -------------
    [[9. 3.]
     [3. 5.]
     [1. 1.]
     [0. 3.]
     [6. 4.]
     [5. 6.]
     [9. 4.]
     [1. 7.]
     [6. 2.]
     [1. 6.]
     [1. 1.]
     [8. 9.]]
    [array([[9., 3.],
            [3., 5.],
            [1., 1.],
            [0., 3.]]), array([[6., 4.],
            [5., 6.],
            [9., 4.],
            [1., 7.]]), array([[6., 2.],
            [1., 6.],
            [1., 1.],
            [8., 9.]])]
    '''
    
    
    python三种复制
    
    #Simple assignments make no copy of array objects or of their data.
    a = np.arange(12)
    b = a
    # a and b are two names for the same ndarray object
    print(b is a)
    b.shape = (3,4)
    print(a.shape)
    print(id(a))
    print(id(b))
    '''
    True
    (3, 4)
    1229965715056
    1229965715056
    '''
    
    #The view method creates a new array object that looks at the same data.
    c = a.view()
    print(c is a)
    c.shape = 2,6
    print(a.shape)
    c[0,4] = 1234
    print(a)
    print(id(a))
    print(id(c))
    '''
    False
    (3, 4)
    [[   0    1    2    3]
     [1234    5    6    7]
     [   8    9   10   11]]
    1229965715056
    1229965716336
    '''
    
    #The copy method makes a complete copy of the array and its data.
    d = a.copy() 
    print(d is a)
    d[0,0] = 9999
    print(d)
    print(a)
    '''
    False
    [[9999    1    2    3]
     [1234    5    6    7]
     [   8    9   10   11]]
    [[   0    1    2    3]
     [1234    5    6    7]
     [   8    9   10   11]]
    '''
    numpy5
    
    import numpy as np
    data = np.sin(np.arange(20)).reshape(5,4)
    print(data)
    ind = data.argmax(axis=0)
    print(ind)
    print(data.shape)
    print(data.shape[1])
    data_max = data[ind, range(data.shape[1])]
    print(data_max)
    all(data_max == data.max(axis=0))
    '''
    [[ 0.          0.84147098  0.90929743  0.14112001]
     [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
     [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
     [-0.53657292  0.42016704  0.99060736  0.65028784]
     [-0.28790332 -0.96139749 -0.75098725  0.14987721]]
    [2 0 3 1]
    (5, 4)
    4
    [0.98935825 0.84147098 0.99060736 0.6569866 ]
    True
    '''
    
    a = np.arange(0, 40, 10)
    print(a)
    b = np.tile(a, (3, 5)) #扩展
    print(b)
    '''
    [ 0 10 20 30]
    [[ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
     [ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
     [ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]]
    '''
    
    a = np.array([[4, 3, 5], [1, 2, 1]])
    print(a)
    print('------------')
    b = np.sort(a, axis=1)
    print(b)
    #b
    a.sort(axis=1)
    print('------------')
    print(a)
    a = np.array([4, 3, 1, 2])
    j = np.argsort(a) #排序得到索引值
    print('------------')
    print(j)
    print('------------')
    print(a[j])
    '''
    [[4 3 5]
     [1 2 1]]
    ------------
    [[3 4 5]
     [1 1 2]]
    ------------
    [[3 4 5]
     [1 1 2]]
    ------------
    [2 3 1 0]
    ------------
    [1 2 3 4]
    '''
  • 相关阅读:
    IO模式和IO多路复用详解
    消息队列RabbitMQ、缓存数据库Redis
    rest framework认证组件和django自带csrf组件区别详解
    django进阶之缓存
    关于CSRF攻击详解
    Linux学习常用命令大全
    .NET 开源工作流: Slickflow流程引擎基础介绍(四) -- 多数据库支持实现
    .NET 开源工作流: Slickflow流程引擎基础介绍(三) -- 基于HTML5/Bootstrap的Web流程设计器
    .NET 开源工作流: Slickflow流程引擎基础介绍(二) -- 引擎组件和业务系统的集成
    .NET开源敏捷开发框架: SlickOne介绍(一) -- 基于Dapper, Mvc和WebAPI 的快速开发框架
  • 原文地址:https://www.cnblogs.com/LXL616/p/11722448.html
Copyright © 2011-2022 走看看