zoukankan      html  css  js  c++  java
  • Python解析生成XML-ElementTree VS minidom

    OS:Windows 7

    关键字:Python3.4,XML,ElementTree,minidom

    本文介绍用Python解析生成以下XML:

    <Persons>
        <Person>
            <Name>LDL</Name>
            <Description Language='English'><![CDATA[cdata text]]></Description>
        </Person>
        <Person>
            <Name>China</Name>
            <Description Language='English'><![CDATA[cdata text]]></Description>
        </Person>
    </Persons>

    1.创建一个xml文件名为src.xml,内容如上,放到c: emp

    2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

    ElementTreeSample.py如下:

    # -*- coding: utf-8 -*-
    """
    Sample of xml.etree.ElementTree
    
    @author: ldlchina
    """
    
    import os
    import sys
    import logging
    import traceback
    import xml.etree.ElementTree as ET
    import time
    
    def copy_node(src_node, target_node):
        # Copy attr
        for key in src_node.keys():
            target_node.set(key, src_node.get(key))
        
        if len(list(src_node)) > 0:
            for child in src_node:
                target_child = ET.Element(child.tag)
                target_node.append(target_child)
                copy_node(child, target_child)
        else:
            target_node.text = src_node.text
        
    def read_write_xml(src, target):
        tree = ET.parse(src)
        root = tree.getroot()
        
        target_root = ET.Element(root.tag)
        start_time = time.time() * 1000
        copy_node(root, target_root)
        end_time = time.time() * 1000
        print('copy_node:' + str(end_time - start_time))
        
        target_tree = ET.ElementTree(target_root)
        target_tree.write(target)
        logging.info(target)
    
    def main():
        try:
            current_file = os.path.realpath(__file__)
            
            # Configure logger
            log_file = current_file.replace('.py', '.log')
            logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
            
            # Create console handler
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            
            logger = logging.getLogger('')
            logger.addHandler(ch)
            
            #src = sys.argv[1]
            #target = sys.argv[2]
            
            # For debugging
            src = 'C:/temp/src.xml'
            target = 'C:/temp/target-tree.xml'
            
            # Generate results
            start_time = time.time() * 1000
            read_write_xml(src, target)
            end_time = time.time() * 1000
            print('read_write_xml:' + str(end_time - start_time))
        except:
            logging.exception(''.format(traceback.format_exc()))
        
        input('Press any key to exit...')
    
    main()

     3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

    MinidomSample.py如下:

    # -*- coding: utf-8 -*-
    """
    Sample of xml.dom.minidom
    
    @author: ldlchina
    """
    
    import os
    import sys
    import logging
    import traceback
    import xml.dom.minidom as MD
    import time
    
    def get_text(n):
        nodelist = n.childNodes
        rc = ""
        for node in nodelist:
            if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
                rc = rc + node.data
        return rc
    
    def copy_node(target_doc, src_node, target_node):
        if not isinstance(src_node, MD.Document) and src_node.hasAttributes():
            for item in src_node.attributes.items():
                target_node.setAttribute(item[0], item[1])
        for node in src_node.childNodes:
            if node.nodeType == node.TEXT_NODE:
                target_child = target_doc.createTextNode(node.nodeValue)
                target_node.appendChild(target_child)
            elif node.nodeType == node.CDATA_SECTION_NODE:
                target_child = target_doc.createCDATASection(node.nodeValue)
                target_node.appendChild(target_child)
            elif node.nodeType == node.ELEMENT_NODE:
                target_child = target_doc.createElement(node.nodeName)
                target_node.appendChild(target_child)
                copy_node(target_doc, node, target_child)
        
    def read_write_xml(src, target):
        doc = MD.parse(src)
        target_doc = MD.Document()
    
        start_time = time.time() * 1000
        copy_node(target_doc, doc, target_doc)
        end_time = time.time() * 1000
        print('copy_node: ' + str(end_time - start_time))
        
        # Write to file
        f = open(target, 'w')
        f.write(target_doc.documentElement.toxml())
        f.close()
        logging.info(target)
    
    def main():
        try:
            current_file = os.path.realpath(__file__)
            
            # Configure logger
            log_file = current_file.replace('.py', '.log')
            logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
            
            # Create console handler
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            
            logger = logging.getLogger('')
            logger.addHandler(ch)
            
            #src = sys.argv[1]
            #target = sys.argv[2]
            
            # For debugging
            src = 'C:/temp/src.xml'
            target = 'C:/temp/target-dom.xml'
            
            # Generate results
            start_time = time.time() * 1000
            read_write_xml(src, target)
            end_time = time.time() * 1000
            print('read_write_xml: ' + str(end_time - start_time))
        except:
            logging.exception(''.format(traceback.format_exc()))
        
        input('Press any key to exit...')
    
    main()

    4.运行ElementTreeSample.py,得到XML如下:

    <Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

    5.运行MinidomSample.py,得到XML如下:

    <Persons>
        <Person>
            <Name>LDL</Name>
            <Description Language="English"><![CDATA[cdata text]]></Description>
        </Person>
        <Person>
            <Name>China</Name>
            <Description Language="Chinese"><![CDATA[cdata text]]></Description>
        </Person>
    </Persons>

    ElementTree VS minidom:

    1.ElementTree执行速度会比minidom快一些。

    2.ElemenTree不能分析XML的转行和缩进。minidom可以。

    3.ElemenTree不支持CDATA,minidom可以。

  • 相关阅读:
    图书管理系统---基于form组件和modelform改造添加和编辑
    Keepalived和Heartbeat
    SCAN IP 解释
    Configure Active DataGuard and DG BROKER
    Oracle 11gR2
    我在管理工作中積累的九種最重要的領導力 (李開復)
    公募基金公司超融合基础架构与同城灾备建设实践
    Oracle 11g RAC for LINUX rhel 6.X silent install(静默安装)
    11gR2 静默安装RAC 集群和数据库软件
    Setting Up Oracle GoldenGate 12
  • 原文地址:https://www.cnblogs.com/ldlchina/p/4469026.html
Copyright © 2011-2022 走看看