zoukankan      html  css  js  c++  java
  • Python解析生成XML-ElementTree VS minidom

    OS:Windows 7

    关键字:Python3.4,XML,ElementTree,minidom

    本文介绍用Python解析生成以下XML:

    <Persons>
        <Person>
            <Name>LDL</Name>
            <Description Language='English'><![CDATA[cdata text]]></Description>
        </Person>
        <Person>
            <Name>China</Name>
            <Description Language='English'><![CDATA[cdata text]]></Description>
        </Person>
    </Persons>

    1.创建一个xml文件名为src.xml,内容如上,放到c: emp

    2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

    ElementTreeSample.py如下:

    # -*- coding: utf-8 -*-
    """
    Sample of xml.etree.ElementTree
    
    @author: ldlchina
    """
    
    import os
    import sys
    import logging
    import traceback
    import xml.etree.ElementTree as ET
    import time
    
    def copy_node(src_node, target_node):
        # Copy attr
        for key in src_node.keys():
            target_node.set(key, src_node.get(key))
        
        if len(list(src_node)) > 0:
            for child in src_node:
                target_child = ET.Element(child.tag)
                target_node.append(target_child)
                copy_node(child, target_child)
        else:
            target_node.text = src_node.text
        
    def read_write_xml(src, target):
        tree = ET.parse(src)
        root = tree.getroot()
        
        target_root = ET.Element(root.tag)
        start_time = time.time() * 1000
        copy_node(root, target_root)
        end_time = time.time() * 1000
        print('copy_node:' + str(end_time - start_time))
        
        target_tree = ET.ElementTree(target_root)
        target_tree.write(target)
        logging.info(target)
    
    def main():
        try:
            current_file = os.path.realpath(__file__)
            
            # Configure logger
            log_file = current_file.replace('.py', '.log')
            logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
            
            # Create console handler
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            
            logger = logging.getLogger('')
            logger.addHandler(ch)
            
            #src = sys.argv[1]
            #target = sys.argv[2]
            
            # For debugging
            src = 'C:/temp/src.xml'
            target = 'C:/temp/target-tree.xml'
            
            # Generate results
            start_time = time.time() * 1000
            read_write_xml(src, target)
            end_time = time.time() * 1000
            print('read_write_xml:' + str(end_time - start_time))
        except:
            logging.exception(''.format(traceback.format_exc()))
        
        input('Press any key to exit...')
    
    main()

     3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

    MinidomSample.py如下:

    # -*- coding: utf-8 -*-
    """
    Sample of xml.dom.minidom
    
    @author: ldlchina
    """
    
    import os
    import sys
    import logging
    import traceback
    import xml.dom.minidom as MD
    import time
    
    def get_text(n):
        nodelist = n.childNodes
        rc = ""
        for node in nodelist:
            if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
                rc = rc + node.data
        return rc
    
    def copy_node(target_doc, src_node, target_node):
        if not isinstance(src_node, MD.Document) and src_node.hasAttributes():
            for item in src_node.attributes.items():
                target_node.setAttribute(item[0], item[1])
        for node in src_node.childNodes:
            if node.nodeType == node.TEXT_NODE:
                target_child = target_doc.createTextNode(node.nodeValue)
                target_node.appendChild(target_child)
            elif node.nodeType == node.CDATA_SECTION_NODE:
                target_child = target_doc.createCDATASection(node.nodeValue)
                target_node.appendChild(target_child)
            elif node.nodeType == node.ELEMENT_NODE:
                target_child = target_doc.createElement(node.nodeName)
                target_node.appendChild(target_child)
                copy_node(target_doc, node, target_child)
        
    def read_write_xml(src, target):
        doc = MD.parse(src)
        target_doc = MD.Document()
    
        start_time = time.time() * 1000
        copy_node(target_doc, doc, target_doc)
        end_time = time.time() * 1000
        print('copy_node: ' + str(end_time - start_time))
        
        # Write to file
        f = open(target, 'w')
        f.write(target_doc.documentElement.toxml())
        f.close()
        logging.info(target)
    
    def main():
        try:
            current_file = os.path.realpath(__file__)
            
            # Configure logger
            log_file = current_file.replace('.py', '.log')
            logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
            
            # Create console handler
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            
            logger = logging.getLogger('')
            logger.addHandler(ch)
            
            #src = sys.argv[1]
            #target = sys.argv[2]
            
            # For debugging
            src = 'C:/temp/src.xml'
            target = 'C:/temp/target-dom.xml'
            
            # Generate results
            start_time = time.time() * 1000
            read_write_xml(src, target)
            end_time = time.time() * 1000
            print('read_write_xml: ' + str(end_time - start_time))
        except:
            logging.exception(''.format(traceback.format_exc()))
        
        input('Press any key to exit...')
    
    main()

    4.运行ElementTreeSample.py,得到XML如下:

    <Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

    5.运行MinidomSample.py,得到XML如下:

    <Persons>
        <Person>
            <Name>LDL</Name>
            <Description Language="English"><![CDATA[cdata text]]></Description>
        </Person>
        <Person>
            <Name>China</Name>
            <Description Language="Chinese"><![CDATA[cdata text]]></Description>
        </Person>
    </Persons>

    ElementTree VS minidom:

    1.ElementTree执行速度会比minidom快一些。

    2.ElemenTree不能分析XML的转行和缩进。minidom可以。

    3.ElemenTree不支持CDATA,minidom可以。

  • 相关阅读:
    P3391 【模板】文艺平衡树(Splay)
    P4198 楼房重建
    P1491 集合位置
    P3957 跳房子
    P4016 负载平衡问题
    bzoj1077: [SCOI2008]天平 差分约束
    bzoj1151: [CTSC2007]动物园zoo 状压dp
    bzoj1076: [SCOI2008]奖励关 状压dp
    bzoj1226: [SDOI2009]学校食堂Dining 状压dp
    bzoj1879: [Sdoi2009]Bill的挑战 状压dp
  • 原文地址:https://www.cnblogs.com/ldlchina/p/4469026.html
Copyright © 2011-2022 走看看