参考博客:
1.https://blog.csdn.net/Dhane/article/details/86661208
2.https://www.cnblogs.com/txx120/p/11487674.html
以及
3.微信公众号:《科普:目标检测Anchor是什么?怎么科学设置?[附代码]》
源码链接:使用K-means聚类合理设置anchor
https://github.com/AIZOOTech/object-detection-anchors
以及
4.【白话机器学习】算法理论+实战之K-Means聚类算法
为3中的源码添加了一些注释,kmeans.py
1 import numpy as np 2 3 4 def iou(box, clusters): 5 """ 6 Calculates the Intersection over Union (IoU) between a box and k clusters. 7 :param box: tuple or array, shifted to the origin (i. e. width and height) 8 :param clusters: numpy array of shape (k, 2) where k is the number of clusters 9 :return: numpy array of shape (k, 0) where k is the number of clusters 10 """ 11 x = np.minimum(clusters[:, 0], box[0])#取width最小值 12 y = np.minimum(clusters[:, 1], box[1])#取height最小值 13 if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: 14 #先判断x是否为0,是返回True,否则返回False,然后用np.count_nonzero()返回非零个数,如果非零个数>0,说明box里的宽或高有零值,则触发异常 15 raise ValueError("Box has no area") 16 17 intersection = x * y#最小的宽高相乘得到交集面积 18 box_area = box[0] * box[1]#当前框面积 19 cluster_area = clusters[:, 0] * clusters[:, 1]#随机抽取的25个框的面积 20 21 iou_ = intersection / (box_area + cluster_area - intersection) 22 23 return iou_ 24 25 26 def avg_iou(boxes, clusters): 27 """ 28 Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters. 29 :param boxes: numpy array of shape (r, 2), where r is the number of rows 30 :param clusters: numpy array of shape (k, 2) where k is the number of clusters 31 :return: average IoU as a single float 32 返回:每个框与所有聚类中心点的iou取最大值,将这些最大值相加再取均值 33 """ 34 return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) 35 36 37 def translate_boxes(boxes): 38 """ 39 Translates all the boxes to the origin. 40 :param boxes: numpy array of shape (r, 4) 41 :return: numpy array of shape (r, 2) 42 """ 43 new_boxes = boxes.copy() 44 for row in range(new_boxes.shape[0]): 45 new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0]) 46 new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1]) 47 return np.delete(new_boxes, [0, 1], axis=1) 48 49 50 def kmeans(boxes, k, dist=np.median): 51 """ 52 Calculates k-means clustering with the Intersection over Union (IoU) metric. 53 :param boxes: numpy array of shape (r, 2), where r is the number of rows 54 :param k: number of clusters 55 :param dist: distance function 56 :return: numpy array of shape (k, 2) 57 """ 58 rows = boxes.shape[0] 59 60 distances = np.empty((rows, k))#返回(rows,k)形状的空数组 61 last_clusters = np.zeros((rows,))#返回(rows,)的全零数组 62 63 np.random.seed()#随机生成种子数 64 65 # the Forgy method will fail if the whole array contains the same rows 66 clusters = boxes[np.random.choice(rows, k, replace=False)]#在rows中随机抽取数字组成(k,)的一维数组,作为k个聚类中心,不能取重复数字 67 print("clusters id {}".format(clusters)) 68 69 iter_num = 1 70 while True: 71 print("Iteration: %d" % iter_num) 72 iter_num += 1 73 74 for row in range(rows): 75 distances[row] = 1 - iou(boxes[row], clusters) 76 #计算第row个box与随机抽取的25个box的iou,用此公式计算第row个box与随机抽取的25个box之间的距离 77 print('{}'.format(distances.shape))#(144027, 25) 78 79 nearest_clusters = np.argmin(distances, axis=1)#按行取最小值索引,每一个框属于第几个聚类中心 80 print('nearest_clusters',nearest_clusters) 81 print('{}'.format(type(nearest_clusters)))#(144027,) 82 83 if (last_clusters == nearest_clusters).all():#所有的返回值都为True才会执行,即当每个框属于某个聚类中心的索引不再更新时跳出循环 84 break 85 86 for cluster in range(k): 87 print('len(boxes[nearest_clusters == cluster]):{}'.format(len(boxes[nearest_clusters == cluster])))#返回True的数量 88 #print('boxes[nearest_clusters == cluster]:{}'.format(boxes[nearest_clusters == cluster])) 89 #print('(nearest_clusters == cluster):{}'.format(nearest_clusters == cluster)) 90 #[False False False ... True True False] 91 if len(boxes[nearest_clusters == cluster]) == 0:# 92 print("Cluster %d is zero size" % cluster) 93 # to avoid empty cluster 94 clusters[cluster] = boxes[np.random.choice(rows, 1, replace=False)]#此聚类中心size为0时重新为当前位置随机选择一个聚类中心 95 continue 96 97 clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)#dist=np.median,在列的方向上求中位数 98 #clusters[cluster] = np.median(boxes[nearest_clusters == cluster], axis=0) 99 print('clusters[cluster]:{}'.format(clusters[cluster]))#[0.015625 0.02635432] 100 #print('clusters[cluster]:{}'.format(clusters[cluster])) 101 102 last_clusters = nearest_clusters 103 #返回的是每一个聚类中心重新计算中位数,反复迭代计算后的新聚类中心点 104 105 return clusters
examples.py
1 import glob 2 import xml.etree.ElementTree as ET 3 4 import numpy as np 5 import matplotlib.pyplot as plt 6 from kmeans import kmeans, avg_iou 7 8 # ANNOTATIONS_PATH = "./data/pascalvoc07-annotations" 9 ANNOTATIONS_PATH = "./data/widerface-annotations" 10 CLUSTERS = 25 11 BBOX_NORMALIZE = False 12 13 def show_cluster(data, cluster, max_points=2000): 14 ''' 15 Display bouding box's size distribution and anchor generated in scatter.散点图 16 ''' 17 if len(data) > max_points: 18 idx = np.random.choice(len(data), max_points) 19 data = data[idx]#在所有的data中随机抽取max_points个数据 20 plt.scatter(data[:,0], data[:,1], s=5, c='lavender')#输入数据是data的宽和高,s=5是点的大小,c是颜色 21 plt.scatter(cluster[:,0], cluster[:, 1], c='red', s=100, marker="^")#‘^’是正三角形 22 plt.xlabel("Width") 23 plt.ylabel("Height") 24 plt.title("Bounding and anchor distribution") 25 plt.savefig("cluster.png") 26 plt.show() 27 28 def show_width_height(data, cluster, bins=50): 29 ''' 30 Display bouding box distribution with histgram.直方图 31 ''' 32 if data.dtype != np.float32: 33 data = data.astype(np.float32) 34 width = data[:, 0] 35 print('width_in show_width_height)',len(width)) 36 height = data[:, 1] 37 print('height in show_width_height',height) 38 ratio = height / width 39 40 plt.figure(1,figsize=(20, 6))#num:图像编号或名称,数字为编号 ,字符串为名称;figsize:指定figure的宽和高,单位为英寸; 41 plt.subplot(131) 42 #subplot可以规划figure划分为n个子图,但每条subplot命令只会创建一个子图,131表示整个figure分成1行3列,共3个子图,这里子图在第一行第一列 43 plt.hist(width, bins=bins, color='green') 44 #width指定每个bin(箱子)分布的数据,对应x轴;bins这个参数指定bin(箱子)的个数,也就是总共有几条条状图;color指定条状图的颜色;默认y轴是个数 45 plt.xlabel('width') 46 plt.ylabel('number') 47 plt.title('Distribution of Width') 48 49 plt.subplot(132) 50 plt.hist(height,bins=bins, color='blue') 51 plt.xlabel('Height') 52 plt.ylabel('Number') 53 plt.title('Distribution of Height') 54 55 plt.subplot(133) 56 plt.hist(ratio, bins=bins, color='magenta') 57 plt.xlabel('Height / Width') 58 plt.ylabel('number') 59 plt.title('Distribution of aspect ratio(Height / Width)') 60 plt.savefig("shape-distribution.png") 61 plt.show() 62 63 64 def sort_cluster(cluster): 65 ''' 66 Sort the cluster to with area small to big. 67 ''' 68 if cluster.dtype != np.float32: 69 cluster = cluster.astype(np.float32) 70 print('cluster',cluster) 71 area = cluster[:, 0] * cluster[:, 1]#计算每一个聚类中心点横纵坐标的乘积 72 cluster = cluster[area.argsort()]#argsort函数返回的是数组值从小到大的索引值,此处将cluster按从小到大进行排序 73 print('sorted cluster',cluster) 74 ratio = cluster[:,1:2] / cluster[:, 0:1] 75 print('ratio',ratio) 76 return np.concatenate([cluster, ratio], axis=-1) # 按轴axis连接array组成一个新的array,-1表示在最后一维进行合并,也就是行的方向合并 77 78 79 def load_dataset(path, normalized=True): 80 ''' 81 load dataset from pasvoc formatl xml files 82 ''' 83 dataset = [] 84 for xml_file in glob.glob("{}/*xml".format(path)):#获取path路径下所有的xml文件并返回一个list 85 tree = ET.parse(xml_file)#调用parse()方法,返回解析树 86 87 height = int(tree.findtext("./size/height")) 88 width = int(tree.findtext("./size/width")) 89 90 for obj in tree.iter("object"): 91 if normalized: 92 xmin = int(obj.findtext("bndbox/xmin")) / float(width) 93 ymin = int(obj.findtext("bndbox/ymin")) / float(height) 94 xmax = int(obj.findtext("bndbox/xmax")) / float(width) 95 ymax = int(obj.findtext("bndbox/ymax")) / float(height) 96 else: 97 xmin = int(obj.findtext("bndbox/xmin")) 98 ymin = int(obj.findtext("bndbox/ymin")) 99 xmax = int(obj.findtext("bndbox/xmax")) 100 ymax = int(obj.findtext("bndbox/ymax")) 101 if (xmax - xmin) == 0 or (ymax - ymin) == 0: 102 continue # to avoid divded by zero error. 103 dataset.append([xmax - xmin, ymax - ymin]) 104 105 return np.array(dataset) 106 107 print("Start to load data annotations on: %s" % ANNOTATIONS_PATH) 108 data = load_dataset(ANNOTATIONS_PATH, normalized=BBOX_NORMALIZE) 109 print('{}'.format(type(data)))#<class 'numpy.ndarray'>,(144027, 2) 110 print("Start to do kmeans, please wait for a moment.") 111 out = kmeans(data, k=CLUSTERS)#out为由kmeans找到的聚类中心点 112 113 out_sorted = sort_cluster(out) 114 print('out_sorted',out_sorted) 115 print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))#每个框与聚类中心点的最大IOU的平均值,可以用来表示所有框与聚类中心点的平均相似度 116 117 show_cluster(data, out, max_points=2000) 118 119 if out.dtype != np.float32: 120 out = out.astype(np.float32) 121 122 print("Recommanded aspect ratios(width/height)") 123 print("Width Height Height/Width") 124 for i in range(len(out_sorted)): 125 print("%.3f %.3f %.1f" % (out_sorted[i,0], out_sorted[i,1], out_sorted[i,2])) 126 show_width_height(data, out, bins=50)