zoukankan      html  css  js  c++  java
  • voc定位数据xml转coco数据集格式json

    代码里面集合了python处理xml的精髓

    1. voc定位数据xml转coco数据集格式json

    import xml.etree.ElementTree as ET
    import os
    import json
    
    coco = dict()
    coco['images'] = []
    coco['type'] = 'instances'
    coco['annotations'] = []
    coco['categories'] = []
    
    category_set = dict()
    image_set = set()
    
    category_item_id = 0
    image_id = 20180000000
    annotation_id = 0
    
    def addCatItem(name):
        global category_item_id
        category_item = dict()
        category_item['supercategory'] = 'none'
        category_item_id += 1
        category_item['id'] = category_item_id
        category_item['name'] = name
        coco['categories'].append(category_item)
        category_set[name] = category_item_id
        return category_item_id
    
    def addImgItem(file_name, size):
        global image_id
        if file_name is None:
            raise Exception('Could not find filename tag in xml file.')
        if size['width'] is None:
            raise Exception('Could not find width tag in xml file.')
        if size['height'] is None:
            raise Exception('Could not find height tag in xml file.')
        image_id += 1
        image_item = dict()
        image_item['id'] = image_id
        image_item['file_name'] = file_name
        image_item['width'] = size['width']
        image_item['height'] = size['height']
        coco['images'].append(image_item)
        image_set.add(file_name)
        return image_id
    
    def addAnnoItem(object_name, image_id, category_id, bbox):
        global annotation_id
        annotation_item = dict()
        annotation_item['segmentation'] = []
        seg = []
        #bbox[] is x,y,w,h
        #left_top
        seg.append(bbox[0])
        seg.append(bbox[1])
        #left_bottom
        seg.append(bbox[0])
        seg.append(bbox[1] + bbox[3])
        #right_bottom
        seg.append(bbox[0] + bbox[2])
        seg.append(bbox[1] + bbox[3])
        #right_top
        seg.append(bbox[0] + bbox[2])
        seg.append(bbox[1])
    
        annotation_item['segmentation'].append(seg)
    
        annotation_item['area'] = bbox[2] * bbox[3]
        annotation_item['iscrowd'] = 0
        annotation_item['ignore'] = 0
        annotation_item['image_id'] = image_id
        annotation_item['bbox'] = bbox
        annotation_item['category_id'] = category_id
        annotation_id += 1
        annotation_item['id'] = annotation_id
        coco['annotations'].append(annotation_item)
    
    def parseXmlFiles(xml_path): 
        for f in os.listdir(xml_path):
            if not f.endswith('.xml'):
                continue
            
            bndbox = dict()
            size = dict()
            current_image_id = None
            current_category_id = None
            file_name = None
            size['width'] = None
            size['height'] = None
            size['depth'] = None
    
            xml_file = os.path.join(xml_path, f)
            print(xml_file)
    
            tree = ET.parse(xml_file)
            root = tree.getroot()
            if root.tag != 'annotation':
                raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
    
            #elem is <folder>, <filename>, <size>, <object>
            for elem in root:
                current_parent = elem.tag
                current_sub = None
                object_name = None
                
                if elem.tag == 'folder':
                    continue
                
                if elem.tag == 'filename':
                    file_name = elem.text
                    if file_name in category_set:
                        raise Exception('file_name duplicated')
                    
                #add img item only after parse <size> tag
                elif current_image_id is None and file_name is not None and size['width'] is not None:
                    if file_name not in image_set:
                        current_image_id = addImgItem(file_name, size)
                        print('add image with {} and {}'.format(file_name, size))
                    else:
                        raise Exception('duplicated image: {}'.format(file_name)) 
                #subelem is <width>, <height>, <depth>, <name>, <bndbox>
                for subelem in elem:
                    bndbox ['xmin'] = None
                    bndbox ['xmax'] = None
                    bndbox ['ymin'] = None
                    bndbox ['ymax'] = None
                    
                    current_sub = subelem.tag
                    if current_parent == 'object' and subelem.tag == 'name':
                        object_name = subelem.text
                        if object_name not in category_set:
                            current_category_id = addCatItem(object_name)
                        else:
                            current_category_id = category_set[object_name]
    
                    elif current_parent == 'size':
                        if size[subelem.tag] is not None:
                            raise Exception('xml structure broken at size tag.')
                        size[subelem.tag] = int(subelem.text)
    
                    #option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
                    for option in subelem:
                        if current_sub == 'bndbox':
                            if bndbox[option.tag] is not None:
                                raise Exception('xml structure corrupted at bndbox tag.')
                            bndbox[option.tag] = int(option.text)
    
                    #only after parse the <object> tag
                    if bndbox['xmin'] is not None:
                        if object_name is None:
                            raise Exception('xml structure broken at bndbox tag')
                        if current_image_id is None:
                            raise Exception('xml structure broken at bndbox tag')
                        if current_category_id is None:
                            raise Exception('xml structure broken at bndbox tag')
                        bbox = []
                        #x
                        bbox.append(bndbox['xmin'])
                        #y
                        bbox.append(bndbox['ymin'])
                        #w
                        bbox.append(bndbox['xmax'] - bndbox['xmin'])
                        #h
                        bbox.append(bndbox['ymax'] - bndbox['ymin'])
                        print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
                        addAnnoItem(object_name, current_image_id, current_category_id, bbox )
    
    if __name__ == '__main__':
        xml_path = '/media/data_2/everyday/0709/maskrcnn/data/ret/xml_test'
        json_file = './instances_test.json'
        parseXmlFiles(xml_path)
        json.dump(coco, open(json_file, 'w'))
    

    我自己xml数据示例:

    <annotation>
       <folder>VOC2007</folder>
       <filename>mingpai_104.jpg</filename>
       <source>
           <database>The VOC2007 Database</database>
           <annotation>PASCAL VOC2007</annotation>
           <image>flickr</image>
           <flickrid>329145082</flickrid>
       </source>
       <owner>>
           <flickrid>hiromori2</flickrid>
           <name>Hiroyuki Mori</name>
       </owner>>
       <size>
           <width>997</width>
           <height>1000</height>
           <depth>3</depth>
       </size>
       <segmented>0</segmented>
       <object>
           <name>ZhiZaoNianYue</name>
           <pose>Unspecified</pose>
           <truncated>0</truncated>
           <difficult>0</difficult>
           <bndbox>
               <xmin>444</xmin>
               <ymin>679</ymin>
               <xmax>575</xmax>
               <ymax>720</ymax>
           </bndbox>
       </object>
       <object>
           <name>FDJ_PaiL</name>
           <pose>Unspecified</pose>
           <truncated>0</truncated>
           <difficult>0</difficult>
           <bndbox>
               <xmin>747</xmin>
               <ymin>545</ymin>
               <xmax>806</xmax>
               <ymax>586</ymax>
           </bndbox>
       </object>
       <object>
           <name>FDJ_Hao</name>
           <pose>Unspecified</pose>
           <truncated>0</truncated>
           <difficult>0</difficult>
           <bndbox>
               <xmin>448</xmin>
               <ymin>536</ymin>
               <xmax>562</xmax>
               <ymax>576</ymax>
           </bndbox>
       </object>
       <object>
           <name>CJHao</name>
           <pose>Unspecified</pose>
           <truncated>0</truncated>
           <difficult>0</difficult>
           <bndbox>
               <xmin>445</xmin>
               <ymin>364</ymin>
               <xmax>724</xmax>
               <ymax>432</ymax>
           </bndbox>
       </object>
    </annotation>
    

    生成的coco json格式示例:

    {
      "images": [
        {
          "file_name": "mingpai_1083.jpg",
          "height": 747,
          "id": 20180000001,
          "width": 843
        },
        {
          "file_name": "mingpai_1079.jpg",
          "height": 544,
          "id": 20180000002,
          "width": 826
        }
      ],
      "type": "instances",
      "annotations": [
        {
          "segmentation": [
            [
              526,
              315,
              526,
              356,
              701,
              356,
              701,
              315
            ]
          ],
          "area": 7175,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000001,
          "bbox": [
            526,
            315,
            175,
            41
          ],
          "category_id": 1,
          "id": 1
        },
        {
          "segmentation": [
            [
              580,
              286,
              580,
              320,
              702,
              320,
              702,
              286
            ]
          ],
          "area": 4148,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000001,
          "bbox": [
            580,
            286,
            122,
            34
          ],
          "category_id": 2,
          "id": 2
        },
        {
          "segmentation": [
            [
              299,
              292,
              299,
              331,
              384,
              331,
              384,
              292
            ]
          ],
          "area": 3315,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000001,
          "bbox": [
            299,
            292,
            85,
            39
          ],
          "category_id": 3,
          "id": 3
        },
        {
          "segmentation": [
            [
              292,
              256,
              292,
              292,
              683,
              292,
              683,
              256
            ]
          ],
          "area": 14076,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000001,
          "bbox": [
            292,
            256,
            391,
            36
          ],
          "category_id": 4,
          "id": 4
        },
        {
          "segmentation": [
            [
              171,
              335,
              171,
              377,
              318,
              377,
              318,
              335
            ]
          ],
          "area": 6174,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000002,
          "bbox": [
            171,
            335,
            147,
            42
          ],
          "category_id": 1,
          "id": 5
        },
        {
          "segmentation": [
            [
              622,
              400,
              622,
              444,
              733,
              444,
              733,
              400
            ]
          ],
          "area": 4884,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000002,
          "bbox": [
            622,
            400,
            111,
            44
          ],
          "category_id": 2,
          "id": 6
        },
        {
          "segmentation": [
            [
              188,
              385,
              188,
              418,
              273,
              418,
              273,
              385
            ]
          ],
          "area": 2805,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000002,
          "bbox": [
            188,
            385,
            85,
            33
          ],
          "category_id": 3,
          "id": 7
        },
        {
          "segmentation": [
            [
              196,
              258,
              196,
              327,
              725,
              327,
              725,
              258
            ]
          ],
          "area": 36501,
          "iscrowd": 0,
          "ignore": 0,
          "image_id": 20180000002,
          "bbox": [
            196,
            258,
            529,
            69
          ],
          "category_id": 4,
          "id": 8
        }
      ],
      "categories": [
        {
          "supercategory": "none",
          "id": 1,
          "name": "ZhiZaoNianYue"
        },
        {
          "supercategory": "none",
          "id": 2,
          "name": "FDJ_PaiL"
        },
        {
          "supercategory": "none",
          "id": 3,
          "name": "FDJ_Hao"
        },
        {
          "supercategory": "none",
          "id": 4,
          "name": "CJHao"
        }
      ]
    }
    

    2.抽取voc里面的坐标信息到txt

    import xml.etree.ElementTree as ET
    import os
    import json
    xml_dir = "/media/data_1/bigdata_tmp/20190820chejiahao/deal/1_xml/"
    
    def xml2txt(txt_dir,xml_name):
        xml_file = xml_dir + xml_name
        tree = ET.parse(xml_file)
        root = tree.getroot() #annotation
        print (root.tag)
    
        bndbox = dict()
        bndbox['xmin'] = None
        bndbox['xmax'] = None
        bndbox['ymin'] = None
        bndbox['ymax'] = None
        for elem in root: #folder filename source owner size segmented object
            if "object" == elem.tag:
                for sub_elem in elem:
                    if 'bndbox' == sub_elem.tag:
                        for x_y_sub_elem in sub_elem:
                            bndbox[x_y_sub_elem.tag] = int(x_y_sub_elem.text)
    
        tl_x = bndbox['xmin']
        tl_y = bndbox['ymin']
    
        tr_x = bndbox['xmax']
        tr_y = bndbox['ymin']
    
        bl_x = bndbox['xmin']
        bl_y = bndbox['ymax']
    
        br_x = bndbox['xmax']
        br_y = bndbox['ymax']
    
        with open(txt_dir + xml_name.replace('.xml','.txt'),'w') as f:
            f.write(str(tl_x))
            f.write(',')
            f.write(str(tl_y))
            f.write(',')
    
            f.write(str(tr_x))
            f.write(',')
            f.write(str(tr_y))
            f.write(',')
    
            f.write(str(br_x))
            f.write(',')
            f.write(str(br_y))
            f.write(',')
    
            f.write(str(bl_x))
            f.write(',')
            f.write(str(bl_y))
            f.write('
    ')
    
    
    
    txt_dir = os.path.dirname(os.path.dirname(xml_dir)) + '/txt/'
    if not os.path.exists(txt_dir):
        os.makedirs(txt_dir)
    
    list_xml = os.listdir(xml_dir)
    for cnt,xml_name in enumerate(list_xml):
        print("cnt=%d,xml=%s"%(cnt,xml_name))
        xml2txt(txt_dir, xml_name)
    
  • 相关阅读:
    经典面试题目C语言
    论C语言中二级指针和二维数组之间的区别
    判断单链表中是否有环找到环的入口节点
    论decltype和auto的区别
    在ubuntu下安装opencv
    C中有关引用和指针的异同
    (四)关于读文件的结束的判别方法(EOF和feof)以及区别
    (三)论sizeof与strlen之间的区别
    (二)C语言文本流和二进制流的区别
    (一)C的编译,printf,规范化
  • 原文地址:https://www.cnblogs.com/yanghailin/p/11189871.html
Copyright © 2011-2022 走看看