python 解析XML expat方式
将xml转换成python对象树
今天项目组一个同事要写这样的代码,他之前都是写Java 没用过python,我回来翻了下pythonCookBook这个书
449页有讲
贴代码
# -*- codeing:utf-8 -*-
'''
Created on 2011-12-7
@author: simon
'''
from xml.parsers import expat
class Element(object):
''' element '''
def __init__(self, name, attributes):
self.name = name
self.attributes = attributes
self.cdata = ''
self.children = [ ]
def addChild(self,element):
self.children.append(element)
def getAttribut(self, key):
return self.attributes.get(key)
def getData(self):
return self.cdata
def getElements(self, name=''):
if name:
return [c for c in self.children if c.name == name]
else:
return list(self.children)
class xml2obj(object):
def __init__(self):
self.root = None
self.nodeStack = []
def StartElement(self, name , attributes):
'expat start element enent handler'
element = Element(name.encode( ), attributes)
if self.nodeStack:
parent = self.nodeStack[-1]
parent.addChild(element)
else:
self.root = element;
self.nodeStack.append(element)
def EndElement(self, name):
self.nodeStack.pop()
def CharacterData(self, data):
if data.strip():
data = data.encode()
element = self.nodeStack[-1]
element.cdata += data
def Parse(self, filename):
Parser = expat.ParserCreate()
Parser.StartElementHandler = self.StartElement
Parser.EndElementHandler = self.EndElement
Parser.CharacterDataHandler = self.CharacterData
ParserStatus = Parser.Parse(open(filename).read(), -1)
return self.root
parser = xml2obj()
rootElement = parser.Parse("sample.xml")
print rootElement
但是pythonCookBook书上有个错误 上面的代码已经修正
http://oreilly.com/catalog/errataunconfirmed.csp?isbn=9780596007973
有说明
BOOK: Python Cookbook by Alex Martelli; Anna Martelli Ravenscroft; David
Ascher
PUBLISHER: O'Reilly Media, Inc.
DATE OF PUBLICATION: March 18, 2005
Chapter 12. Processing XML
Section: Converting an XML Document into a tree of Python Objects
12.6.2 Solution
[Code... snippet]
def EndElement(self, name):
'Expat end element event handler'
self.nodeStack[-1].pop( )
The code above attempts execute the method pop on an Element object
contained in the nodeStack(list). However, there is no method pop defined
in the Element Class. I believe that it was the authors intention to write.
[Code... snippet]
def EndElement(self, name):
'Expat end element event handler'
self.nodeStack.pop( )
Which pops an element of the stack.