np.set_printoptions(precision=3),只显示小数点后三位
np.random.seed(100)
rand_arr = np.random.random([2, 2])
np.set_printoptions(suppress=True, precision=3) # 设置为可使用科学计数法
print(rand_arr) # [[0.54340494 0.27836939] [0.42451759 0.84477613]]
np.set_printoptions(suppress=False) # 设置为不使用科学计数法
rand_arr = rand_arr/1e10 # 强制转成科学计数法表示。通过除以科学技术实现
print(rand_arr) # [[5.43404942e-11 2.78369385e-11] [4.24517591e-11 8.44776132e-11]]
np.set_printoptions(threshold=6) # 设置只显示6个数据
np.set_printoptions(threshold=np.nan) # 设置显示所有的数据
import numpy as np
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
#### import a dataset with numbers and texts
iris = np.genfromtxt(url, delimiter=',', dtype='object')
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None)
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
print(iris[:3])
print(iris.shape) # (150, 5)
print(iris_1d[:3])
print(iris_1d.shape) # (150,)
How to extract a particular column from 1D array of tuples?
species = np.array([row[4] for row in iris_1d])
print(species[:2]) # [b'Iris-setosa' b'Iris-setosa']
How to convert a 1d array of tuples to a 2d numpy array?
Method 1: Convert each row to a list and get the first 4 items
iris_2d = np.array([row.tolist()[:] for row in iris_1d])
print(iris_2d[:4])
Alt Method 2: Import only the first 4 columns from source url
iris_2d = np.genfromtxt(url, delimiter=',', usecols=[0, 1, 2, 3])
print(iris_2d[:4])
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mu, med, sd)
How to normalize an array so the values range exactly between 0 and 1?
Smax, Smin = sepallength.max(), sepallength.min()
S = (sepallength - Smin) / (Smax - Smin)
# or
S = (sepallength - Smin) / sepallength.ptp()
print(S[:4])
30. How to compute the softmax score?
def softmax(x):
e_x = np.exp(x - np.max(x)) # ???????????
return e_x / e_x.sum(axis=0)
print(softmax(sepallength[:3]))
How to find the percentile scores of a numpy array?
print(np.percentile(sepallength, q=[5, 95]))
How to insert values at random positions in an array?
iris_2d = np.genfromtxt(url, delimiter=',', dtype='object')
print(np.shape(iris_2d)) # (150, 5)
# Method 1
i, j = np.where(iris_2d)
np.random.seed(200)
iris_2d[np.random.choice((i), 20), np.random.choice((j), 20)] = np.nan
print(iris_2d[:4])
> [[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
> [b'4.9' b'3.0' nan b'0.2' b'Iris-setosa']
> [b'4.7' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
> [b'4.6' b'3.1' b'1.5' b'0.2' b'Iris-setosa']]
# Method 2
np.random.seed(100)
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
print(iris_2d[:4])
# [[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
# [b'4.9' b'3.0' nan b'0.2' b'Iris-setosa']
# [b'4.7' b'3.2' b'1.3' b'0.2' b'Iris-setosa']
# [b'4.6' b'3.1' b'1.5' b'0.2' b'Iris-setosa']]
How to filter a numpy array based on two or more conditions?
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0, 1, 2, 3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
print(np.isnan(iris_2d[:, 0]).sum()) # 5
print(np.where(np.isnan(iris_2d[:, 0]))) # (array([ 38, 80, 106, 113, 121]),)
How to filter a numpy array based on two or more conditions?
Q. Filter the rows of iris_2d that has petallength (3rd column) > 1.5 and sepallength (1st column) < 5.0
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0, 1, 2, 3])
conditon = (iris_2d[:, 2] < 1.5) & (iris_2d[:, 0] < 5.0)
print(iris_2d[conditon][:4])
35. How to drop rows that contain a missing value from a numpy array?
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0, 1, 2, 3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
Method 1:
any_nan_in_row = np.array([~np.any(np.isnan(row)) for row in iris_2d])
print(iris_2d[any_nan_in_row][:5])
Methond 2:
print(iris_2d[np.sum(np.isnan(iris_2d), axis=1) == 0][:5])