zoukankan      html  css  js  c++  java
  • python解析xml模块封装代码

    python中解析xml文件的模块用法,以及对模块封装的方法。
    原文转自:http://www.jbxue.com/article/16586.html

    有如下的xml文件:
    <?xml version="1.0" encoding="utf-8" ?> 

    <root>
    <childs>
    <child name='first' >1</child>
    <child value="2">2</child>
    </childs>
    </root>
    下面介绍python解析xml文件的几种方法,使用python模块实现。
    方式1,python模块实现自动遍历所有节点:
    #!/usr/bin/env python 

    # -*- coding: utf-8 -*-
    from xml.sax.handler import ContentHandler
    from xml.sax import parse
    class TestHandle(ContentHandler):
    def __init__(self, inlist):
    self.inlist = inlist

    def startElement(self,name,attrs):
    print 'name:',name, 'attrs:',attrs.keys()

    def endElement(self,name):
    print 'endname',name

    def characters(self,chars):
    print 'chars',chars
    self.inlist.append(chars)


    if __name__ == '__main__':
    lt = []
    parse('test.xml', TestHandle(lt))
    print lt
    结果:
    [html] view plaincopy
    name: root attrs: []
    chars

    name: childs attrs: []
    chars

    name: child attrs: [u'name']
    chars 1
    endname child
    chars

    name: child attrs: [u'value']
    chars 2
    endname child
    chars

    endname childs
    chars

    endname root
    [u' ', u' ', u'1', u' ', u'2', u' ', u' ']
    方式2,python模块实现获取根节点,按需查找指定节点:
    #!/usr/bin/env python 

    # -*- coding: utf-8 -*-
    from xml.dom import minidom
    xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?>
    <hash>
    <request name='first'>/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
    </hash>
    '''
    def doxml(xmlstr):
    dom = minidom.parseString(xmlstr)
    print 'Dom:'
    print dom.toxml()

    root = dom.firstChild
    print 'root:'
    print root.toxml()

    childs = root.childNodes
    for child in childs:
    print child.toxml()
    if child.nodeType == child.TEXT_NODE:
    pass
    else:
    print 'child node attribute name:', child.getAttribute('name')
    print 'child node name:', child.nodeName
    print 'child node len:',len(child.childNodes)
    print 'child data:',child.childNodes[0].data
    print '======================================='
    print 'more help info to see:'
    for med in dir(child):
    print help(med)


    if __name__ == '__main__':
    doxml(xmlstr)
    结果:
    [html] view plaincopy
    Dom:
    <?xml version="1.0" ?><hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
    </hash>
    root: www.jbxue.com
    <hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
    </hash>

    <request name="first">/2/photos/square/type.xml</request>
    child node attribute name: first
    child node name: request
    child node len: 1
    child data: /2/photos/square/type.xml
    =======================================
    more help info to see:
    两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。
    =====补充分割线================
    实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。
    网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。
    本节是 python解析xml模块封装代码 的第二部分。
    ====写xml内容的分割线=========
    #!ursinenv python 

    #encoding: utf-8
    from xml.dom import minidom

    class xmlwrite:
    def __init__(self, resultfile):
    self.resultfile = resultfile
    self.rootname = 'api'
    self.__create_xml_dom()

    def __create_xml_dom(self):
    xmlimpl = minidom.getDOMImplementation()
    self.dom = xmlimpl.createDocument(None, self.rootname, None)
    self.root = self.dom.documentElement

    def __get_spec_node(self, xpath):
    patharr = xpath.split(r'/')
    parentnode = self.root
    exist = 1
    for nodename in patharr:
    if nodename.strip() == '':
    continue
    if not exist:
    return None
    spcindex = nodename.find('[')
    if spcindex > -1:
    index = int(nodename[spcindex+1:-1])
    else:
    index = 0
    count = 0
    childs = parentnode.childNodes
    for child in childs:
    if child.nodeName == nodename[:spcindex]:
    if count == index:
    parentnode = child
    exist = 1
    break
    count += 1
    continue
    else:
    exist = 0
    return parentnode

    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
    node = self.dom.createElement(nodename)
    if value:
    if CDATA:
    nodedata = self.dom.createCDATASection(value)
    else:
    nodedata = self.dom.createTextNode(value)
    node.appendChild(nodedata)
    if attribute and isinstance(attribute, dict):
    for key, value in attribute.items():
    node.setAttribute(key, value)
    try:
    parentnode = self.__get_spec_node(parent)
    except:
    print 'Get parent Node Fail, Use the Root as parent Node'
    parentnode = self.root
    parentnode.appendChild(node)


    def write_start_time(self, time):
    self.write_node('/','StartTime', time)

    def write_end_time(self, time):
    self.write_node('/','EndTime', time)

    def write_pass_count(self, count):
    self.write_node('/','PassCount', count)

    def write_fail_count(self, count):
    self.write_node('/','FailCount', count)

    def write_case(self):
    self.write_node('/','Case', None)

    def write_case_no(self, index, value):
    self.write_node('/Case[%s]/' % index,'No', value)

    def write_case_url(self, index, value):
    self.write_node('/Case[%s]/' % index,'URL', value)

    def write_case_dbdata(self, index, value):
    self.write_node('/Case[%s]/' % index,'DBData', value)

    def write_case_apidata(self, index, value):
    self.write_node('/Case[%s]/' % index,'APIData', value)

    def write_case_dbsql(self, index, value):
    self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)

    def write_case_apixpath(self, index, value):
    self.write_node('/Case[%s]/' % index,'APIXPath', value)

    def save_xml(self):
    myfile = file(self.resultfile, 'w')
    self.dom.writexml(myfile, encoding='utf-8')
    myfile.close()

    if __name__ == '__main__':
    xr = xmlwrite(r'D: est.xml')
    xr.write_start_time('2223')
    xr.write_end_time('444')
    xr.write_pass_count('22')
    xr.write_fail_count('33')
    xr.write_case()
    xr.write_case()
    xr.write_case_no(0, '0')
    xr.write_case_url(0, 'http://www.google.com')
    xr.write_case_url(0, 'http://www.google.com')
    xr.write_case_dbsql(0, 'select * from ')
    xr.write_case_dbdata(0, 'dbtata')
    xr.write_case_apixpath(0, '/xpath')
    xr.write_case_apidata(0, 'apidata')
    xr.write_case_no(1, '1')
    xr.write_case_url(1, 'http://www.baidu.com')
    xr.write_case_url(1, 'http://www.baidu.com')
    xr.write_case_dbsql(1, 'select 1 from ')
    xr.write_case_dbdata(1, 'dbtata1')
    xr.write_case_apixpath(1, '/xpath1')
    xr.write_case_apidata(1, 'apidata1')
    xr.save_xml()
    以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。
    比如:/root/child[1], 表示root的第2个child节点。

  • 相关阅读:
    java面试总结之框架问题
    数据库设计三大范式
    js 一些技巧
    el 和 fmt 常用
    iframe自适应高度
    MySQL基础
    任意精度整数算法 (BigInteger) 和任意精度小数算法 (BigDecimal)
    hibernate
    Struts2
    Servlet、Cookie、Session
  • 原文地址:https://www.cnblogs.com/study100/p/3539521.html
Copyright © 2011-2022 走看看