import numpy as np array=np.array([[1,2,3] ,[4,5,6]], dtype=np.int) print(array) print(array.ndim) #是数组有几个维度 print(array.size) #有多少个元素 print(array.shape) #维度大小,array.shape[0]为2,array.shape[1]为3 print(array.dtype) #数据类型 print(len(array)) #行数 print(array.reshape(1,6)) #改变形状 a=np.arange(4) #左闭右开 print(a) print(a<2) #布尔mask
输出:
[[1 2 3] [4 5 6]] 2 6 (2, 3) int32 2 [[1 2 3 4 5 6]] [0 1 2 3] [ True True False False]
#numpy dot multiply and matrix multiply a=np.ones((2,2)) b=np.array([[1,2],[3,4]]) print(a*b) #对应元素相城 print(np.dot(a,b)) #矩阵乘法 print(a.dot(b)) #矩阵乘法
输出
[[1. 2.] [3. 4.]] [[4. 6.] [4. 6.]] [[4. 6.] [4. 6.]]
a =np.random.random((2,4)) print(a) print(np.sum(a, axis=1)) #0 按列求和; 1 按行求和(0是竖方向,1是横方向) print(np.min(a, axis=0)) print(np.max(a, axis=1))
输出
[[0.64231375 0.18094835 0.1069843 0.99325735] [0.58826183 0.45672993 0.31294667 0.56790276]] [1.92350376 1.92584119] [0.58826183 0.18094835 0.1069843 0.56790276] [0.99325735 0.58826183]
A =np.arange(2,14).reshape((3,4)) print(A) print(np.mean(A)) #均值 print(np.median(A)) #中位数 print(np.argmin(A)) #最小数字的索引 print(np.argmax(A)) # 索引 print(np.cumsum(A)) # 依次累加 print(np.diff(A)) # 前后差 print(np.sort(A)) #逐行排序 print(np.transpose(A)) #转置 print(A.T) print(np.clip(A, 3, 5)) # 截断,min, max for col in A.T: print(col) print(A.flatten()) for item in A.flat: # A.flat 得到一个迭代器 print(item)
输出
[[ 2 3 4 5] [ 6 7 8 9] [10 11 12 13]] 7.5 7.5 0 11 [ 2 5 9 14 20 27 35 44 54 65 77 90] [[1 1 1] [1 1 1] [1 1 1]] [[ 2 3 4 5] [ 6 7 8 9] [10 11 12 13]] [[ 2 6 10] [ 3 7 11] [ 4 8 12] [ 5 9 13]] [[ 2 6 10] [ 3 7 11] [ 4 8 12] [ 5 9 13]] [[3 3 4 5] [5 5 5 5] [5 5 5 5]] [ 2 6 10] [ 3 7 11] [ 4 8 12] [ 5 9 13] [ 2 3 4 5 6 7 8 9 10 11 12 13] 2 3 4 5 6 7 8 9 10 11 12 13
# pandas 做个性化筛选 import pandas as pd import numpy as np dates=pd.date_range('20201004',periods=6) df=pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A','B','C','D']) # print(df['A'],df.A) #根据行和列的名称来索引 # print(df[0:3]) # loc 通过标签名称索引 print(df.loc['2020-10-05']) print(df.loc[:,['A','D']]) # iloc 通过下标进行索引 print(df.iloc[[1,3,5],1:3]) # 更改元素值 df.iloc[2,2]=555 df.loc['2020-10-05','A']=999 print(df) df.B[df.A>16]=0 print(df) # 处理为Nan的数据 df.iloc[2,3]=np.nan df.iloc[1,2]=np.nan print(df) print(df.dropna(axis=0, how='any')) # how ={'any','all'} print(df.fillna(value=0)) print(df.isnull()) print(np.any(df.isnull())==True)
输出
A 4 B 5 C 6 D 7 Name: 2020-10-05 00:00:00, dtype: int32 A D 2020-10-04 0 3 2020-10-05 4 7 2020-10-06 8 11 2020-10-07 12 15 2020-10-08 16 19 2020-10-09 20 23 B C 2020-10-05 5 6 2020-10-07 13 14 2020-10-09 21 22 A B C D 2020-10-04 0 1 2 3 2020-10-05 999 5 6 7 2020-10-06 8 9 555 11 2020-10-07 12 13 14 15 2020-10-08 16 17 18 19 2020-10-09 20 21 22 23 A B C D 2020-10-04 0 1 2 3 2020-10-05 999 0 6 7 2020-10-06 8 9 555 11 2020-10-07 12 13 14 15 2020-10-08 16 17 18 19 2020-10-09 20 0 22 23 A B C D 2020-10-04 0 1 2.0 3.0 2020-10-05 999 0 NaN 7.0 2020-10-06 8 9 555.0 NaN 2020-10-07 12 13 14.0 15.0 2020-10-08 16 17 18.0 19.0 2020-10-09 20 0 22.0 23.0 A B C D 2020-10-04 0 1 2.0 3.0 2020-10-07 12 13 14.0 15.0 2020-10-08 16 17 18.0 19.0 2020-10-09 20 0 22.0 23.0 A B C D 2020-10-04 0 1 2.0 3.0 2020-10-05 999 0 0.0 7.0 2020-10-06 8 9 555.0 0.0 2020-10-07 12 13 14.0 15.0 2020-10-08 16 17 18.0 19.0 2020-10-09 20 0 22.0 23.0 A B C D 2020-10-04 False False False False 2020-10-05 False False True False 2020-10-06 False False False True 2020-10-07 False False False False 2020-10-08 False False False False 2020-10-09 False False False False True