zoukankan      html  css  js  c++  java
  • Numpy 学习

    I'm so hard (=;

    Time fies so fast ~ there is no time left for you to sad.
    
    (#~# my poor english)
    

    understand AXIS in NUMPY ~

    import numpy as np
    import torch
    arr = np.arange(0, 10)
    out = np.where(arr%2==0, 0, arr)
    
    #1 repeat, tile
    arr = np.array([1, 2, 3])
    out1 = np.hstack((arr.repeat(3), np.tile(arr, 3)))
    
    a = np.arange(6).reshape(-1, 3)
    b = np.full((2,3), 1)
    # print(np.r_[a, b])
    # print(np.c_[a, b])
    
    #2 r: 矩阵上下相加, c:左右相加
    a = np.arange(3)
    b = np.arange(3)*(-1)
    # print(np.r_[a, b])
    # print(np.c_[a, b])
    
    #3 the same numbers in (a, b)
    a = np.array([1,2,3,2,3,4,3,4,5,6])
    b = np.array([7,2,10,2,7,4,9,4,9,8])
    # print(np.intersect1d(a, b))
    
    #4 the numbers in a but not in b
    a = np.array([1,2,3,4,5])
    b = np.array([5,6,7,8,9])
    # print(np.setdiff1d(a, b))
    
    a = np.array([1,2,3,2,3,4,3,4,5,6])
    b = np.array([7,2,10,2,7,4,9,4,9,8])
    # print(np.where(a==b))
    # print(np.where([True, True, False, False]))
    
    #5 condition
    a = np.array([2, 6, 1, 9, 10, 3, 27])
    # a[np.where( (5<=a) & (a<=10 ))]
    # print(a[(5<=a) & (a<=10 )])
    
    #6 exchange column
    # reverse  row & column
    arr = np.arange(9).reshape(3, 3)
    # print(arr[:, [1, 0, 2]])
    # print(arr[::-1])
    # print(arr[:,::-1])
    
    #7 Create a 2D array of shape 5x3 to contain random decimal numbers between 5 and 10.
    # print(np.random.uniform(5, 10, (2,2)))
    # print(np.random.random((2,2))) # 范围(0,1)
    # print(np.random.randint(5, 10, (2,2))) # int
    
    #8 设置精度
    # np.set_printoptions(3)
    
    #10 设置是否科学计数法
    X = np.random.random((3, 3))/1e3
    # np.set_printoptions(suppress=False)
    # print(X)
    
    #11 设置打印个数
    a = np.arange(10)
    # print(a)
    # np.set_printoptions(threshold=6)
    # print(a)
    
    #12
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
    iris = np.genfromtxt(url, delimiter=',', dtype='object')
    names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
    # print(iris[:3])
    out = np.array(iris[:, 4])
    # print(out[:5])
    
    # iris_1D = np.genfromtxt(url, delimiter=',', dtype=None) # shape, 1D
    # out = np.array([ row.tolist()[:4] for row in iris_1D])  # 1d -> 2d
    # print(out[:3])
    
    # 13 AXIS
    A = np.arange(12).reshape(2, 2, 3)
    # print(A)
    # input(">>>")
    mean = np.mean(A, axis=0, dtype='int')
    # 2*3 [ [3, 4, 5], [6, 7, 8] ]
    # print(mean)
    mean = np.mean(A, axis=1, dtype='int')
    # 2*3 [ [1, 2, 3], [7, 8, 9] ]
    # print(mean)
    mean = np.mean(A, axis=2, dtype='int')
    # 2*2 [ [1, 4], [7, 10] ]
    # print(mean)
    
    #14 normalize
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
    sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
    Smax, Smin = sepallength.max(), sepallength.min()
    out = (sepallength - Smin)/(Smax - Smin)
    # print(out)
    
    #15 softmax
    iris = np.genfromtxt(url, delimiter=',', dtype='object')
    sepallength = np.array([float(row[0]) for row in iris])
    out = torch.softmax(torch.from_numpy(sepallength), dim=0)
    # print(out[:5])
    x = np.exp(sepallength)
    out = x/np.sum(x)
    # print(out[:5])
    
    #15 percentile 百分数
    sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
    out = np.percentile(sepallength, q=[5, 95])
    # print(out)
    
    #16 np.random
    np.random.seed(233)
    x = np.arange(10)
    y = np.random.choice(x, 5)
    # print(y)
    iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
    ##################################
    i, j = np.where(iris_2d)
    ##################################
    iris_2d[np.arange(150), np.random.choice(j, 150)] = np.nan
    x = np.where(np.isnan(iris_2d))
    # 不可以用 iris_2d == np.nan
    x = np.c_[x] # 合并
    # print(x)
    
    #17 condition of where
    iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
    condition = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
    # print(iris_2d[condition])
    
    #18  drop rows that contain a missing value
    iris_2d[np.random.randint(150, size=10), np.random.randint(4, size=10)] = np.nan
    # 方法1
    pos = np.array([~np.any(np.isnan(row)) for row in iris_2d])
    # print(iris_2d[pos].shape)
    # 方法2
    pos = ~np.any(np.isnan(iris_2d), axis=1)
    # print(iris_2d[pos].shape)
    
    #19 相关系数
    iris = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
    out = np.corrcoef(iris[:, 0], iris[:, 2])[1, 0]
    # print(out)
    # 法2
    l1, l2 = iris[:, 0], iris[:, 2]
    out = np.mean((l1-np.mean(l1)) * (l2-np.mean(l2))) / (np.std(l1)*np.std(l2))
    # print(out)
    
    #20 替换
    iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
    iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
    
    # print(iris_2d[:30])
    iris_2d[np.isnan(iris_2d)] = 0
    # print(iris_2d[:30])
    
    #21 np.unique(很重要) 数清种类数
    iris = np.genfromtxt(url, delimiter=',', dtype='object')
    names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
    
    species = np.array([row.tolist()[4] for row in iris])
    #####################################################
    out = np.unique(species, return_counts=True)
    #####################################################
    # print(out)
    
    # 22 映射
    ##########################################################################
    petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])
    ##########################################################################
    # print(petal_length_bin)
    mp = {1: 'small', 2: 'meidum', 3: 'large', 4: np.nan}
    petal_length_bin_cat = [mp[x] for x in petal_length_bin]
    # print(petal_length_bin_cat)
    
    #23 add new columns - pi*(row[2]^2)*row[0]/3
    iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')
    r0 = iris_2d[:, 0].astype('float')
    r2 = iris_2d[:, 2].astype('float')
    x = np.pi * r0 * (r2**2) / 3
    iris_2d = np.c_[iris_2d[:, 0:4], x]
    # print(iris_2d)
    
    #24 概率采样
    np.random.seed(None)
    x = np.arange(3)
    y = np.random.choice(x, 2000, p=[0.1, 0.3, 0.6])
    count = np.unique(y, return_counts=True)
    # print(count[1]/2000)
    
    # 25 searchsorted 寻找到可以插入的位置
    x = np.linspace(0, 1, num=11)
    y = np.searchsorted(x, 0.21)
    # print(x, y)
    
    # 26 What is the value of second longest petallength of species setosa
    iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')
    setosa_r2 = iris_2d[iris_2d[:, 4] == b'Iris-setosa'][:, 2].astype('float')
    x = np.unique(np.sort(setosa_r2))[-2]
    # print(x)
    
    # 27 Find the most frequent value of petal length (3rd column) in iris dataset.
    x = np.unique(iris_2d[:, 2], return_counts=True)
    val = x[0][np.argmax(x[1])]
    # print(val)
    
    #28 Find the position of the first occurrence of a value greater than 1.0 in petalwidth 4th column
    #######################################################
    x = np.argwhere(iris_2d[:, 3].astype('float')>1.0)
    #######################################################
    # print(x[0])
    
    # 29 np.clip
    np.random.seed(100)
    a = np.random.uniform(1,50, 20)
    # a[a>30] = 30
    # a[a<10] = 10
    # 法二
    ################################################
    np.where(a < 10, 10, np.where(a > 30, 30, a))
    ################################################
    # 法三
    np.clip(a, a_min=10, a_max=30)
    # print(a)
    
    # 30 np.argsort()
    np.random.seed(100)
    a = np.random.randint(1,50, 20)
    pos = a.argsort()[-5:]
    # print(pos)
    
    # 31 partition
    x = np.array([1,4,2,7,5,3])
    # print(np.partition(x, kth=1)) #类似快排, 以第k+1大数为界限分割数组
    # >>>[1, 2, 4, 7, 5, 3] # 以2为界限
    
    # 32 find 数组各个元素出现的个数
    np.random.seed(100)
    arr = np.random.randint(1,11,size=(6, 10))
    def f(arr):
        num_counts = [np.unique(row, return_counts=True) for row in arr]
        # np.unique(arr) 代之1-10
        # b[a==i] 寻找i在a中出现的位置
        # 疑问这样遍历效率高嘛?
        return [[int(b[a==i]) if i in a else 0 for i in np.unique(arr)] for a, b in num_counts]
    # print(f(arr))
    
    # 33 Convert array_of_arrays into a flat linear 1d array.
    arr1 = np.arange(3)
    arr2 = np.arange(3,7)
    arr3 = np.arange(7,10)
    array_of_arrays = np.array([arr1, arr2, arr3])
    # x = array_of_arrays.flatten()
    x = np.concatenate(array_of_arrays)
    # 法二
    # x = np.array([a for arr in array_of_arrays for a in arr])
    print(x)
    
  • 相关阅读:
    2013,爱上暗色调
    [转]Sublime Text 使用介绍、全套快捷键及插件推荐
    委托、事件
    ASP.NET中常用的三十三种代码 .NET技术 / ASP.NET
    台湾综艺节目
    C# 中的委托和事件
    ASP.net 静态化页面之的URL重写
    源码
    毕业5年决定你的一生
    常见错误 不能打开注册表关键字
  • 原文地址:https://www.cnblogs.com/xidian-mao/p/11853396.html
Copyright © 2011-2022 走看看