我们做模型时,可能需要画不同类别的标记不同颜色的值,因此就需要的numpy的布尔值索引,比如说:
# 导入可视化工具包 import matplotlib.pyplot as plt %matplotlib inline from sklearn.datasets import load_iris from sklearn.cluster import KMeans import numpy as np import pandas as pd X=load_iris().data clf = KMeans(n_clusters=3,random_state=0) clf.fit(X) label = clf.predict(X) # 颜色和标签列表 colors_list = ['red', 'blue', 'green'] labels_list = ['1','2','3'] x=X for i in range(3): plt.scatter(x[label==i,0], x[label== i,1], s=100,c=colors_list[i],label=labels_list[i]) # 聚类中心点 plt.scatter(clf.cluster_centers_[:,0],clf.cluster_centers_[:,1], s=300,c='black',label='Centroids') plt.legend() plt.xlabel('Annual Income (k$)') plt.ylabel('Spending Score (1-100)') plt.show()
下面介绍一下用法以及例子
#构造数据 import numpy as np names=np.array(["Bob","Joe","Will","Bob","Will","Joe","Joe"]) np1=np.random.randn(7,4) print(names) print(np1)
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[-0.48639127 0.65433213 -0.55487863 -1.41199409]
[ 0.63196795 0.8091702 0.52156802 -0.7897757 ]
[-0.2681137 1.58992163 -0.55409051 1.08141762]
[-0.358811 -0.89604778 1.3655091 0.56664758]
[ 1.22746199 0.47409502 -0.90998782 0.57441837]
[-0.23616238 -1.18461867 0.68114549 1.25545332]
[ 0.04767722 0.9783612 -0.70834461 -0.30132274]]
#布尔值索引 print(names=="Bob") print(np1[names=="Bob"])
[ True False False True False False False]
[[-0.48639127 0.65433213 -0.55487863 -1.41199409]
[-0.358811 -0.89604778 1.3655091 0.56664758]]
#将布尔型数组跟切片混合使用 print(np1[names=="Bob",2:])
[[-0.55487863 -1.41199409]
[ 1.3655091 0.56664758]]
#也可用不等号(!=),负号对条件进行操作 print(np1[names!="Bob"])
[[ 0.63196795 0.8091702 0.52156802 -0.7897757 ]
[-0.2681137 1.58992163 -0.55409051 1.08141762]
[ 1.22746199 0.47409502 -0.90998782 0.57441837]
[-0.23616238 -1.18461867 0.68114549 1.25545332]
[ 0.04767722 0.9783612 -0.70834461 -0.30132274]]
#多种条件组合(&,|等) np1[(names=="Bob")|(names=="Will")]
array([[-0.48639127, 0.65433213, -0.55487863, -1.41199409],
[-0.2681137 , 1.58992163, -0.55409051, 1.08141762],
[-0.358811 , -0.89604778, 1.3655091 , 0.56664758],
[ 1.22746199, 0.47409502, -0.90998782, 0.57441837]])
#为了将data中的所有负值都设置为0 np1[np1<0]=0 print(np1)
[[0. 0.65433213 0. 0. ]
[0.63196795 0.8091702 0.52156802 0. ]
[0. 1.58992163 0. 1.08141762]
[0. 0. 1.3655091 0.56664758]
[1.22746199 0.47409502 0. 0.57441837]
[0. 0. 0.68114549 1.25545332]
[0.04767722 0.9783612 0. 0. ]]