zoukankan      html  css  js  c++  java
  • 快速解析超大XML不占用太大内存

     1 import xml.etree.ElementTree as ET
     2 
     3 def parse_res(xml_file):
     4     res_dic = {}
     5     tmp_lst_lev1 = []
     6     tmp_lst_lev2 = []
     7     add_flag = False
     8     for event, elem in ET.iterparse(xml_file):
     9         if event == 'end':
    10             if elem.tag == 'Item':
    11                 tmp_lst_lev1.append(dict(elem.attrib))
    12             elif elem.tag == 'Enum':
    13                 enum_str = ''.join([ ''.join(['[', item['value'], '=', item['name'], ']']) for item in tmp_lst_lev1])
    14                 res_enum = {}
    15                 res_enum['id'] = elem.attrib['id']
    16                 res_enum['name'] = enum_str
    17                 tmp_lst_lev2.append(res_enum)
    18                 tmp_lst_lev1 = []
    19             elif elem.tag == 'EnumRes':
    20                 res_dic['EnumRes'] = {}
    21                 tmp_dic = res_dic['EnumRes']
    22                 for item in tmp_lst_lev2:
    23                     tmp_dic[ item['id'].split('.')[1] ] = item['name']
    24                 tmp_lst_lev2 = []
    25             elif elem.tag == 'MeasUnitRes' or elem.tag == 'CounterNameRes' or elem.tag == 'CounterUnitRes':
    26                 res_dic[elem.tag] = {}
    27                 tmp_dic = res_dic[elem.tag]
    28                 for item in tmp_lst_lev1:
    29                     tmp_dic[ item['id'].split('.')[1] ] = item['name']
    30                 tmp_lst_lev1 = []
    31             #CommonInfo.Resource.xml
    32             elif elem.tag == 'DevTypeNameRes' or elem.tag == 'VendorRes' or elem.tag == 'MocRes':
    33                 res_dic[elem.tag] = {}
    34                 tmp_dic = res_dic[elem.tag]
    35                 for item in tmp_lst_lev1:
    36                     tmp_dic['id'] = item['id'].split('.')[1]
    37                     tmp_dic['name'] = item['name']
    38                 tmp_lst_lev1 = []
    39             #StaticList.xml
    40             elif elem.tag == 'param':
    41                 if 'alarmId' == elem.attrib['name']:
    42                     id = elem.text
    43             elif elem.tag == 'alarm':
    44                 tmp_lst_lev1.append([id, elem.attrib['name']])
    45             elif elem.tag == 'alarms':
    46                 res_dic[elem.tag] = {}
    47                 tmp_dic = res_dic[elem.tag]
    48                 for item in tmp_lst_lev1:
    49                     tmp_dic[ item[0] ] = item[1]
    50                 tmp_lst_lev1 = []
    51         elem.clear()   #关键在这一名,处理完节点及时清理内存
    52     return res_dic
  • 相关阅读:
    【题解】P2569 [SCOI2010]股票交易
    【题解】P3354 [IOI2005]Riv 河流
    入职阿里蚂蚁三个月有感
    搞懂G1垃圾收集器
    MySql分库分表与分区的区别和思考
    Kafka源码分析及图解原理之Broker端
    Kafka源码分析及图解原理之Producer端
    Oracle GoldenGate mysql To Kafka上车记录
    从动态代理到Spring AOP(中)
    从动态代理到Spring AOP(上)
  • 原文地址:https://www.cnblogs.com/bongem/p/6128606.html
Copyright © 2011-2022 走看看