zoukankan      html  css  js  c++  java
  • python 解析XML文件

    比较高效的python 解析XML文件

    参考 http://codingpy.com/article/parsing-xml-using-python/

    try:
        import xml.etree.cElementTree as ET
    except ImportError:
        import xml.etree.ElementTree as ET
    import time
    
    
    def parse_poi_by_elementTree(filepath):
        t0 = time.time()
    
        tree = ET.ElementTree(file=filepath)
        pois_element_num = 0
        vde_poi = 0
        # for elem in tree.iter(tag='Pois'):
        #     pois_element_num = elem.get('Num')
        #     print pois_element_num
        #     for e in elem.iter():
        #         if e.tag == 'Poi':
        #             vde_poi += 1
        pois_element_num = tree.iter(tag='Pois').next().get('Num')
        vde_poi = len(list(tree.iter(tag='Poi')))
    
        cost_time = time.time() - t0
        print 'parse_poi_by_elementTree time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    def parse_poi_by_iterparse(filepath):
        t0 = time.time()
    
        pois_element_num = 0
        vde_poi = 0
        for event, elem in ET.iterparse(filepath):
            # if event == 'end':
            if elem.tag == 'Poi':
                vde_poi += 1
            if elem.tag == "Pois":
                pois_element_num = int(elem.get('Num'))
    
            elem.clear()
    
        cost_time = time.time() - t0
        print 'parse_poi_by_iterparse time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    from statistic import StatisticItem, XML_STREET, XML_POI
    import os
    
    
    def parse_street_xml_by_ET(street_file):
        if not os.path.exists(street_file):
            return StatisticItem(XML_STREET, [0, 0])
    
        street_num = 0
        vde_streets = 0  # actual count
    
        for event, elem in ET.iterparse(street_file):
            if elem.tag == 'Street':
                vde_streets += 1
            if elem.tag == "Streets":
                street_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_STREET, [vde_streets, street_num])
    
    
    def parse_poi_xml_by_ET(poi_file):
        if not os.path.exists(poi_file):
            return StatisticItem(XML_POI, [0, 0])
    
        poi_num = 0
        vde_pois = 0  # actual count
    
        for event, elem in ET.iterparse(poi_file):
            if elem.tag == 'Poi':
                vde_pois += 1
            if elem.tag == "Pois":
                poi_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_POI, [vde_pois, poi_num])
    
    
    if __name__ == '__main__':
        # C:UsersshchshanDesktopvdeState_14120002POI_1414000018.xml
        # C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml
        print parse_poi_by_elementTree(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
        print parse_poi_by_iterparse(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
  • 相关阅读:
    2.2、Dstreams数据源之高级数据源
    配置git 环境变量
    AngularJS的 $resource服务 关于CRUD操作
    如何安装和使用Karma-Jasmine
    ui-router 留存
    angular的service与factory
    留存- angularjs 弹出框 $modal
    js 的eval()方法 计算某个字符串,并执行其中的的 JavaScript 代码;
    javascript函数作用域和提前声明
    npm 全局环境变量配置
  • 原文地址:https://www.cnblogs.com/dasheng-maritime/p/7491171.html
Copyright © 2011-2022 走看看