zoukankan      html  css  js  c++  java
  • python模块:xml.etree.ElementTree

       1 """Lightweight XML support for Python.
       2 
       3  XML is an inherently hierarchical data format, and the most natural way to
       4  represent it is with a tree.  This module has two classes for this purpose:
       5 
       6     1. ElementTree represents the whole XML document as a tree and
       7 
       8     2. Element represents a single node in this tree.
       9 
      10  Interactions with the whole document (reading and writing to/from files) are
      11  usually done on the ElementTree level.  Interactions with a single XML element
      12  and its sub-elements are done on the Element level.
      13 
      14  Element is a flexible container object designed to store hierarchical data
      15  structures in memory. It can be described as a cross between a list and a
      16  dictionary.  Each Element has a number of properties associated with it:
      17 
      18     'tag' - a string containing the element's name.
      19 
      20     'attributes' - a Python dictionary storing the element's attributes.
      21 
      22     'text' - a string containing the element's text content.
      23 
      24     'tail' - an optional string containing text after the element's end tag.
      25 
      26     And a number of child elements stored in a Python sequence.
      27 
      28  To create an element instance, use the Element constructor,
      29  or the SubElement factory function.
      30 
      31  You can also use the ElementTree class to wrap an element structure
      32  and convert it to and from XML.
      33 
      34 """
      35 
      36 #---------------------------------------------------------------------
      37 # Licensed to PSF under a Contributor Agreement.
      38 # See http://www.python.org/psf/license for licensing details.
      39 #
      40 # ElementTree
      41 # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
      42 #
      43 # fredrik@pythonware.com
      44 # http://www.pythonware.com
      45 # --------------------------------------------------------------------
      46 # The ElementTree toolkit is
      47 #
      48 # Copyright (c) 1999-2008 by Fredrik Lundh
      49 #
      50 # By obtaining, using, and/or copying this software and/or its
      51 # associated documentation, you agree that you have read, understood,
      52 # and will comply with the following terms and conditions:
      53 #
      54 # Permission to use, copy, modify, and distribute this software and
      55 # its associated documentation for any purpose and without fee is
      56 # hereby granted, provided that the above copyright notice appears in
      57 # all copies, and that both that copyright notice and this permission
      58 # notice appear in supporting documentation, and that the name of
      59 # Secret Labs AB or the author not be used in advertising or publicity
      60 # pertaining to distribution of the software without specific, written
      61 # prior permission.
      62 #
      63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
      64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
      65 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
      66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
      67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
      68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
      69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
      70 # OF THIS SOFTWARE.
      71 # --------------------------------------------------------------------
      72 
      73 __all__ = [
      74     # public symbols
      75     "Comment",
      76     "dump",
      77     "Element", "ElementTree",
      78     "fromstring", "fromstringlist",
      79     "iselement", "iterparse",
      80     "parse", "ParseError",
      81     "PI", "ProcessingInstruction",
      82     "QName",
      83     "SubElement",
      84     "tostring", "tostringlist",
      85     "TreeBuilder",
      86     "VERSION",
      87     "XML", "XMLID",
      88     "XMLParser", "XMLPullParser",
      89     "register_namespace",
      90     ]
      91 
      92 VERSION = "1.3.0"
      93 
      94 import sys
      95 import re
      96 import warnings
      97 import io
      98 import collections
      99 import contextlib
     100 
     101 from . import ElementPath
     102 
     103 
     104 class ParseError(SyntaxError):
     105     """An error when parsing an XML document.
     106 
     107     In addition to its exception value, a ParseError contains
     108     two extra attributes:
     109         'code'     - the specific exception code
     110         'position' - the line and column of the error
     111 
     112     """
     113     pass
     114 
     115 # --------------------------------------------------------------------
     116 
     117 
     118 def iselement(element):
     119     """Return True if *element* appears to be an Element."""
     120     return hasattr(element, 'tag')
     121 
     122 
     123 class Element:
     124     """An XML element.
     125 
     126     This class is the reference implementation of the Element interface.
     127 
     128     An element's length is its number of subelements.  That means if you
     129     want to check if an element is truly empty, you should check BOTH
     130     its length AND its text attribute.
     131 
     132     The element tag, attribute names, and attribute values can be either
     133     bytes or strings.
     134 
     135     *tag* is the element name.  *attrib* is an optional dictionary containing
     136     element attributes. *extra* are additional element attributes given as
     137     keyword arguments.
     138 
     139     Example form:
     140         <tag attrib>text<child/>...</tag>tail
     141 
     142     """
     143 
     144     tag = None
     145     """The element's name."""
     146 
     147     attrib = None
     148     """Dictionary of the element's attributes."""
     149 
     150     text = None
     151     """
     152     Text before first subelement. This is either a string or the value None.
     153     Note that if there is no text, this attribute may be either
     154     None or the empty string, depending on the parser.
     155 
     156     """
     157 
     158     tail = None
     159     """
     160     Text after this element's end tag, but before the next sibling element's
     161     start tag.  This is either a string or the value None.  Note that if there
     162     was no text, this attribute may be either None or an empty string,
     163     depending on the parser.
     164 
     165     """
     166 
     167     def __init__(self, tag, attrib={}, **extra):
     168         if not isinstance(attrib, dict):
     169             raise TypeError("attrib must be dict, not %s" % (
     170                 attrib.__class__.__name__,))
     171         attrib = attrib.copy()
     172         attrib.update(extra)
     173         self.tag = tag
     174         self.attrib = attrib
     175         self._children = []
     176 
     177     def __repr__(self):
     178         return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
     179 
     180     def makeelement(self, tag, attrib):
     181         """Create a new element with the same type.
     182 
     183         *tag* is a string containing the element name.
     184         *attrib* is a dictionary containing the element attributes.
     185 
     186         Do not call this method, use the SubElement factory function instead.
     187 
     188         """
     189         return self.__class__(tag, attrib)
     190 
     191     def copy(self):
     192         """Return copy of current element.
     193 
     194         This creates a shallow copy. Subelements will be shared with the
     195         original tree.
     196 
     197         """
     198         elem = self.makeelement(self.tag, self.attrib)
     199         elem.text = self.text
     200         elem.tail = self.tail
     201         elem[:] = self
     202         return elem
     203 
     204     def __len__(self):
     205         return len(self._children)
     206 
     207     def __bool__(self):
     208         warnings.warn(
     209             "The behavior of this method will change in future versions.  "
     210             "Use specific 'len(elem)' or 'elem is not None' test instead.",
     211             FutureWarning, stacklevel=2
     212             )
     213         return len(self._children) != 0 # emulate old behaviour, for now
     214 
     215     def __getitem__(self, index):
     216         return self._children[index]
     217 
     218     def __setitem__(self, index, element):
     219         # if isinstance(index, slice):
     220         #     for elt in element:
     221         #         assert iselement(elt)
     222         # else:
     223         #     assert iselement(element)
     224         self._children[index] = element
     225 
     226     def __delitem__(self, index):
     227         del self._children[index]
     228 
     229     def append(self, subelement):
     230         """Add *subelement* to the end of this element.
     231 
     232         The new element will appear in document order after the last existing
     233         subelement (or directly after the text, if it's the first subelement),
     234         but before the end tag for this element.
     235 
     236         """
     237         self._assert_is_element(subelement)
     238         self._children.append(subelement)
     239 
     240     def extend(self, elements):
     241         """Append subelements from a sequence.
     242 
     243         *elements* is a sequence with zero or more elements.
     244 
     245         """
     246         for element in elements:
     247             self._assert_is_element(element)
     248         self._children.extend(elements)
     249 
     250     def insert(self, index, subelement):
     251         """Insert *subelement* at position *index*."""
     252         self._assert_is_element(subelement)
     253         self._children.insert(index, subelement)
     254 
     255     def _assert_is_element(self, e):
     256         # Need to refer to the actual Python implementation, not the
     257         # shadowing C implementation.
     258         if not isinstance(e, _Element_Py):
     259             raise TypeError('expected an Element, not %s' % type(e).__name__)
     260 
     261     def remove(self, subelement):
     262         """Remove matching subelement.
     263 
     264         Unlike the find methods, this method compares elements based on
     265         identity, NOT ON tag value or contents.  To remove subelements by
     266         other means, the easiest way is to use a list comprehension to
     267         select what elements to keep, and then use slice assignment to update
     268         the parent element.
     269 
     270         ValueError is raised if a matching element could not be found.
     271 
     272         """
     273         # assert iselement(element)
     274         self._children.remove(subelement)
     275 
     276     def getchildren(self):
     277         """(Deprecated) Return all subelements.
     278 
     279         Elements are returned in document order.
     280 
     281         """
     282         warnings.warn(
     283             "This method will be removed in future versions.  "
     284             "Use 'list(elem)' or iteration over elem instead.",
     285             DeprecationWarning, stacklevel=2
     286             )
     287         return self._children
     288 
     289     def find(self, path, namespaces=None):
     290         """Find first matching element by tag name or path.
     291 
     292         *path* is a string having either an element tag or an XPath,
     293         *namespaces* is an optional mapping from namespace prefix to full name.
     294 
     295         Return the first matching element, or None if no element was found.
     296 
     297         """
     298         return ElementPath.find(self, path, namespaces)
     299 
     300     def findtext(self, path, default=None, namespaces=None):
     301         """Find text for first matching element by tag name or path.
     302 
     303         *path* is a string having either an element tag or an XPath,
     304         *default* is the value to return if the element was not found,
     305         *namespaces* is an optional mapping from namespace prefix to full name.
     306 
     307         Return text content of first matching element, or default value if
     308         none was found.  Note that if an element is found having no text
     309         content, the empty string is returned.
     310 
     311         """
     312         return ElementPath.findtext(self, path, default, namespaces)
     313 
     314     def findall(self, path, namespaces=None):
     315         """Find all matching subelements by tag name or path.
     316 
     317         *path* is a string having either an element tag or an XPath,
     318         *namespaces* is an optional mapping from namespace prefix to full name.
     319 
     320         Returns list containing all matching elements in document order.
     321 
     322         """
     323         return ElementPath.findall(self, path, namespaces)
     324 
     325     def iterfind(self, path, namespaces=None):
     326         """Find all matching subelements by tag name or path.
     327 
     328         *path* is a string having either an element tag or an XPath,
     329         *namespaces* is an optional mapping from namespace prefix to full name.
     330 
     331         Return an iterable yielding all matching elements in document order.
     332 
     333         """
     334         return ElementPath.iterfind(self, path, namespaces)
     335 
     336     def clear(self):
     337         """Reset element.
     338 
     339         This function removes all subelements, clears all attributes, and sets
     340         the text and tail attributes to None.
     341 
     342         """
     343         self.attrib.clear()
     344         self._children = []
     345         self.text = self.tail = None
     346 
     347     def get(self, key, default=None):
     348         """Get element attribute.
     349 
     350         Equivalent to attrib.get, but some implementations may handle this a
     351         bit more efficiently.  *key* is what attribute to look for, and
     352         *default* is what to return if the attribute was not found.
     353 
     354         Returns a string containing the attribute value, or the default if
     355         attribute was not found.
     356 
     357         """
     358         return self.attrib.get(key, default)
     359 
     360     def set(self, key, value):
     361         """Set element attribute.
     362 
     363         Equivalent to attrib[key] = value, but some implementations may handle
     364         this a bit more efficiently.  *key* is what attribute to set, and
     365         *value* is the attribute value to set it to.
     366 
     367         """
     368         self.attrib[key] = value
     369 
     370     def keys(self):
     371         """Get list of attribute names.
     372 
     373         Names are returned in an arbitrary order, just like an ordinary
     374         Python dict.  Equivalent to attrib.keys()
     375 
     376         """
     377         return self.attrib.keys()
     378 
     379     def items(self):
     380         """Get element attributes as a sequence.
     381 
     382         The attributes are returned in arbitrary order.  Equivalent to
     383         attrib.items().
     384 
     385         Return a list of (name, value) tuples.
     386 
     387         """
     388         return self.attrib.items()
     389 
     390     def iter(self, tag=None):
     391         """Create tree iterator.
     392 
     393         The iterator loops over the element and all subelements in document
     394         order, returning all elements with a matching tag.
     395 
     396         If the tree structure is modified during iteration, new or removed
     397         elements may or may not be included.  To get a stable set, use the
     398         list() function on the iterator, and loop over the resulting list.
     399 
     400         *tag* is what tags to look for (default is to return all elements)
     401 
     402         Return an iterator containing all the matching elements.
     403 
     404         """
     405         if tag == "*":
     406             tag = None
     407         if tag is None or self.tag == tag:
     408             yield self
     409         for e in self._children:
     410             yield from e.iter(tag)
     411 
     412     # compatibility
     413     def getiterator(self, tag=None):
     414         # Change for a DeprecationWarning in 1.4
     415         warnings.warn(
     416             "This method will be removed in future versions.  "
     417             "Use 'elem.iter()' or 'list(elem.iter())' instead.",
     418             PendingDeprecationWarning, stacklevel=2
     419         )
     420         return list(self.iter(tag))
     421 
     422     def itertext(self):
     423         """Create text iterator.
     424 
     425         The iterator loops over the element and all subelements in document
     426         order, returning all inner text.
     427 
     428         """
     429         tag = self.tag
     430         if not isinstance(tag, str) and tag is not None:
     431             return
     432         t = self.text
     433         if t:
     434             yield t
     435         for e in self:
     436             yield from e.itertext()
     437             t = e.tail
     438             if t:
     439                 yield t
     440 
     441 
     442 def SubElement(parent, tag, attrib={}, **extra):
     443     """Subelement factory which creates an element instance, and appends it
     444     to an existing parent.
     445 
     446     The element tag, attribute names, and attribute values can be either
     447     bytes or Unicode strings.
     448 
     449     *parent* is the parent element, *tag* is the subelements name, *attrib* is
     450     an optional directory containing element attributes, *extra* are
     451     additional attributes given as keyword arguments.
     452 
     453     """
     454     attrib = attrib.copy()
     455     attrib.update(extra)
     456     element = parent.makeelement(tag, attrib)
     457     parent.append(element)
     458     return element
     459 
     460 
     461 def Comment(text=None):
     462     """Comment element factory.
     463 
     464     This function creates a special element which the standard serializer
     465     serializes as an XML comment.
     466 
     467     *text* is a string containing the comment string.
     468 
     469     """
     470     element = Element(Comment)
     471     element.text = text
     472     return element
     473 
     474 
     475 def ProcessingInstruction(target, text=None):
     476     """Processing Instruction element factory.
     477 
     478     This function creates a special element which the standard serializer
     479     serializes as an XML comment.
     480 
     481     *target* is a string containing the processing instruction, *text* is a
     482     string containing the processing instruction contents, if any.
     483 
     484     """
     485     element = Element(ProcessingInstruction)
     486     element.text = target
     487     if text:
     488         element.text = element.text + " " + text
     489     return element
     490 
     491 PI = ProcessingInstruction
     492 
     493 
     494 class QName:
     495     """Qualified name wrapper.
     496 
     497     This class can be used to wrap a QName attribute value in order to get
     498     proper namespace handing on output.
     499 
     500     *text_or_uri* is a string containing the QName value either in the form
     501     {uri}local, or if the tag argument is given, the URI part of a QName.
     502 
     503     *tag* is an optional argument which if given, will make the first
     504     argument (text_or_uri) be interpreted as a URI, and this argument (tag)
     505     be interpreted as a local name.
     506 
     507     """
     508     def __init__(self, text_or_uri, tag=None):
     509         if tag:
     510             text_or_uri = "{%s}%s" % (text_or_uri, tag)
     511         self.text = text_or_uri
     512     def __str__(self):
     513         return self.text
     514     def __repr__(self):
     515         return '<%s %r>' % (self.__class__.__name__, self.text)
     516     def __hash__(self):
     517         return hash(self.text)
     518     def __le__(self, other):
     519         if isinstance(other, QName):
     520             return self.text <= other.text
     521         return self.text <= other
     522     def __lt__(self, other):
     523         if isinstance(other, QName):
     524             return self.text < other.text
     525         return self.text < other
     526     def __ge__(self, other):
     527         if isinstance(other, QName):
     528             return self.text >= other.text
     529         return self.text >= other
     530     def __gt__(self, other):
     531         if isinstance(other, QName):
     532             return self.text > other.text
     533         return self.text > other
     534     def __eq__(self, other):
     535         if isinstance(other, QName):
     536             return self.text == other.text
     537         return self.text == other
     538 
     539 # --------------------------------------------------------------------
     540 
     541 
     542 class ElementTree:
     543     """An XML element hierarchy.
     544 
     545     This class also provides support for serialization to and from
     546     standard XML.
     547 
     548     *element* is an optional root element node,
     549     *file* is an optional file handle or file name of an XML file whose
     550     contents will be used to initialize the tree with.
     551 
     552     """
     553     def __init__(self, element=None, file=None):
     554         # assert element is None or iselement(element)
     555         self._root = element # first node
     556         if file:
     557             self.parse(file)
     558 
     559     def getroot(self):
     560         """Return root element of this tree."""
     561         return self._root
     562 
     563     def _setroot(self, element):
     564         """Replace root element of this tree.
     565 
     566         This will discard the current contents of the tree and replace it
     567         with the given element.  Use with care!
     568 
     569         """
     570         # assert iselement(element)
     571         self._root = element
     572 
     573     def parse(self, source, parser=None):
     574         """Load external XML document into element tree.
     575 
     576         *source* is a file name or file object, *parser* is an optional parser
     577         instance that defaults to XMLParser.
     578 
     579         ParseError is raised if the parser fails to parse the document.
     580 
     581         Returns the root element of the given source document.
     582 
     583         """
     584         close_source = False
     585         if not hasattr(source, "read"):
     586             source = open(source, "rb")
     587             close_source = True
     588         try:
     589             if parser is None:
     590                 # If no parser was specified, create a default XMLParser
     591                 parser = XMLParser()
     592                 if hasattr(parser, '_parse_whole'):
     593                     # The default XMLParser, when it comes from an accelerator,
     594                     # can define an internal _parse_whole API for efficiency.
     595                     # It can be used to parse the whole source without feeding
     596                     # it with chunks.
     597                     self._root = parser._parse_whole(source)
     598                     return self._root
     599             while True:
     600                 data = source.read(65536)
     601                 if not data:
     602                     break
     603                 parser.feed(data)
     604             self._root = parser.close()
     605             return self._root
     606         finally:
     607             if close_source:
     608                 source.close()
     609 
     610     def iter(self, tag=None):
     611         """Create and return tree iterator for the root element.
     612 
     613         The iterator loops over all elements in this tree, in document order.
     614 
     615         *tag* is a string with the tag name to iterate over
     616         (default is to return all elements).
     617 
     618         """
     619         # assert self._root is not None
     620         return self._root.iter(tag)
     621 
     622     # compatibility
     623     def getiterator(self, tag=None):
     624         # Change for a DeprecationWarning in 1.4
     625         warnings.warn(
     626             "This method will be removed in future versions.  "
     627             "Use 'tree.iter()' or 'list(tree.iter())' instead.",
     628             PendingDeprecationWarning, stacklevel=2
     629         )
     630         return list(self.iter(tag))
     631 
     632     def find(self, path, namespaces=None):
     633         """Find first matching element by tag name or path.
     634 
     635         Same as getroot().find(path), which is Element.find()
     636 
     637         *path* is a string having either an element tag or an XPath,
     638         *namespaces* is an optional mapping from namespace prefix to full name.
     639 
     640         Return the first matching element, or None if no element was found.
     641 
     642         """
     643         # assert self._root is not None
     644         if path[:1] == "/":
     645             path = "." + path
     646             warnings.warn(
     647                 "This search is broken in 1.3 and earlier, and will be "
     648                 "fixed in a future version.  If you rely on the current "
     649                 "behaviour, change it to %r" % path,
     650                 FutureWarning, stacklevel=2
     651                 )
     652         return self._root.find(path, namespaces)
     653 
     654     def findtext(self, path, default=None, namespaces=None):
     655         """Find first matching element by tag name or path.
     656 
     657         Same as getroot().findtext(path),  which is Element.findtext()
     658 
     659         *path* is a string having either an element tag or an XPath,
     660         *namespaces* is an optional mapping from namespace prefix to full name.
     661 
     662         Return the first matching element, or None if no element was found.
     663 
     664         """
     665         # assert self._root is not None
     666         if path[:1] == "/":
     667             path = "." + path
     668             warnings.warn(
     669                 "This search is broken in 1.3 and earlier, and will be "
     670                 "fixed in a future version.  If you rely on the current "
     671                 "behaviour, change it to %r" % path,
     672                 FutureWarning, stacklevel=2
     673                 )
     674         return self._root.findtext(path, default, namespaces)
     675 
     676     def findall(self, path, namespaces=None):
     677         """Find all matching subelements by tag name or path.
     678 
     679         Same as getroot().findall(path), which is Element.findall().
     680 
     681         *path* is a string having either an element tag or an XPath,
     682         *namespaces* is an optional mapping from namespace prefix to full name.
     683 
     684         Return list containing all matching elements in document order.
     685 
     686         """
     687         # assert self._root is not None
     688         if path[:1] == "/":
     689             path = "." + path
     690             warnings.warn(
     691                 "This search is broken in 1.3 and earlier, and will be "
     692                 "fixed in a future version.  If you rely on the current "
     693                 "behaviour, change it to %r" % path,
     694                 FutureWarning, stacklevel=2
     695                 )
     696         return self._root.findall(path, namespaces)
     697 
     698     def iterfind(self, path, namespaces=None):
     699         """Find all matching subelements by tag name or path.
     700 
     701         Same as getroot().iterfind(path), which is element.iterfind()
     702 
     703         *path* is a string having either an element tag or an XPath,
     704         *namespaces* is an optional mapping from namespace prefix to full name.
     705 
     706         Return an iterable yielding all matching elements in document order.
     707 
     708         """
     709         # assert self._root is not None
     710         if path[:1] == "/":
     711             path = "." + path
     712             warnings.warn(
     713                 "This search is broken in 1.3 and earlier, and will be "
     714                 "fixed in a future version.  If you rely on the current "
     715                 "behaviour, change it to %r" % path,
     716                 FutureWarning, stacklevel=2
     717                 )
     718         return self._root.iterfind(path, namespaces)
     719 
     720     def write(self, file_or_filename,
     721               encoding=None,
     722               xml_declaration=None,
     723               default_namespace=None,
     724               method=None, *,
     725               short_empty_elements=True):
     726         """Write element tree to a file as XML.
     727 
     728         Arguments:
     729           *file_or_filename* -- file name or a file object opened for writing
     730 
     731           *encoding* -- the output encoding (default: US-ASCII)
     732 
     733           *xml_declaration* -- bool indicating if an XML declaration should be
     734                                added to the output. If None, an XML declaration
     735                                is added if encoding IS NOT either of:
     736                                US-ASCII, UTF-8, or Unicode
     737 
     738           *default_namespace* -- sets the default XML namespace (for "xmlns")
     739 
     740           *method* -- either "xml" (default), "html, "text", or "c14n"
     741 
     742           *short_empty_elements* -- controls the formatting of elements
     743                                     that contain no content. If True (default)
     744                                     they are emitted as a single self-closed
     745                                     tag, otherwise they are emitted as a pair
     746                                     of start/end tags
     747 
     748         """
     749         if not method:
     750             method = "xml"
     751         elif method not in _serialize:
     752             raise ValueError("unknown method %r" % method)
     753         if not encoding:
     754             if method == "c14n":
     755                 encoding = "utf-8"
     756             else:
     757                 encoding = "us-ascii"
     758         enc_lower = encoding.lower()
     759         with _get_writer(file_or_filename, enc_lower) as write:
     760             if method == "xml" and (xml_declaration or
     761                     (xml_declaration is None and
     762                      enc_lower not in ("utf-8", "us-ascii", "unicode"))):
     763                 declared_encoding = encoding
     764                 if enc_lower == "unicode":
     765                     # Retrieve the default encoding for the xml declaration
     766                     import locale
     767                     declared_encoding = locale.getpreferredencoding()
     768                 write("<?xml version='1.0' encoding='%s'?>
    " % (
     769                     declared_encoding,))
     770             if method == "text":
     771                 _serialize_text(write, self._root)
     772             else:
     773                 qnames, namespaces = _namespaces(self._root, default_namespace)
     774                 serialize = _serialize[method]
     775                 serialize(write, self._root, qnames, namespaces,
     776                           short_empty_elements=short_empty_elements)
     777 
     778     def write_c14n(self, file):
     779         # lxml.etree compatibility.  use output method instead
     780         return self.write(file, method="c14n")
     781 
     782 # --------------------------------------------------------------------
     783 # serialization support
     784 
     785 @contextlib.contextmanager
     786 def _get_writer(file_or_filename, encoding):
     787     # returns text write method and release all resources after using
     788     try:
     789         write = file_or_filename.write
     790     except AttributeError:
     791         # file_or_filename is a file name
     792         if encoding == "unicode":
     793             file = open(file_or_filename, "w")
     794         else:
     795             file = open(file_or_filename, "w", encoding=encoding,
     796                         errors="xmlcharrefreplace")
     797         with file:
     798             yield file.write
     799     else:
     800         # file_or_filename is a file-like object
     801         # encoding determines if it is a text or binary writer
     802         if encoding == "unicode":
     803             # use a text writer as is
     804             yield write
     805         else:
     806             # wrap a binary writer with TextIOWrapper
     807             with contextlib.ExitStack() as stack:
     808                 if isinstance(file_or_filename, io.BufferedIOBase):
     809                     file = file_or_filename
     810                 elif isinstance(file_or_filename, io.RawIOBase):
     811                     file = io.BufferedWriter(file_or_filename)
     812                     # Keep the original file open when the BufferedWriter is
     813                     # destroyed
     814                     stack.callback(file.detach)
     815                 else:
     816                     # This is to handle passed objects that aren't in the
     817                     # IOBase hierarchy, but just have a write method
     818                     file = io.BufferedIOBase()
     819                     file.writable = lambda: True
     820                     file.write = write
     821                     try:
     822                         # TextIOWrapper uses this methods to determine
     823                         # if BOM (for UTF-16, etc) should be added
     824                         file.seekable = file_or_filename.seekable
     825                         file.tell = file_or_filename.tell
     826                     except AttributeError:
     827                         pass
     828                 file = io.TextIOWrapper(file,
     829                                         encoding=encoding,
     830                                         errors="xmlcharrefreplace",
     831                                         newline="
    ")
     832                 # Keep the original file open when the TextIOWrapper is
     833                 # destroyed
     834                 stack.callback(file.detach)
     835                 yield file.write
     836 
     837 def _namespaces(elem, default_namespace=None):
     838     # identify namespaces used in this tree
     839 
     840     # maps qnames to *encoded* prefix:local names
     841     qnames = {None: None}
     842 
     843     # maps uri:s to prefixes
     844     namespaces = {}
     845     if default_namespace:
     846         namespaces[default_namespace] = ""
     847 
     848     def add_qname(qname):
     849         # calculate serialized qname representation
     850         try:
     851             if qname[:1] == "{":
     852                 uri, tag = qname[1:].rsplit("}", 1)
     853                 prefix = namespaces.get(uri)
     854                 if prefix is None:
     855                     prefix = _namespace_map.get(uri)
     856                     if prefix is None:
     857                         prefix = "ns%d" % len(namespaces)
     858                     if prefix != "xml":
     859                         namespaces[uri] = prefix
     860                 if prefix:
     861                     qnames[qname] = "%s:%s" % (prefix, tag)
     862                 else:
     863                     qnames[qname] = tag # default element
     864             else:
     865                 if default_namespace:
     866                     # FIXME: can this be handled in XML 1.0?
     867                     raise ValueError(
     868                         "cannot use non-qualified names with "
     869                         "default_namespace option"
     870                         )
     871                 qnames[qname] = qname
     872         except TypeError:
     873             _raise_serialization_error(qname)
     874 
     875     # populate qname and namespaces table
     876     for elem in elem.iter():
     877         tag = elem.tag
     878         if isinstance(tag, QName):
     879             if tag.text not in qnames:
     880                 add_qname(tag.text)
     881         elif isinstance(tag, str):
     882             if tag not in qnames:
     883                 add_qname(tag)
     884         elif tag is not None and tag is not Comment and tag is not PI:
     885             _raise_serialization_error(tag)
     886         for key, value in elem.items():
     887             if isinstance(key, QName):
     888                 key = key.text
     889             if key not in qnames:
     890                 add_qname(key)
     891             if isinstance(value, QName) and value.text not in qnames:
     892                 add_qname(value.text)
     893         text = elem.text
     894         if isinstance(text, QName) and text.text not in qnames:
     895             add_qname(text.text)
     896     return qnames, namespaces
     897 
     898 def _serialize_xml(write, elem, qnames, namespaces,
     899                    short_empty_elements, **kwargs):
     900     tag = elem.tag
     901     text = elem.text
     902     if tag is Comment:
     903         write("<!--%s-->" % text)
     904     elif tag is ProcessingInstruction:
     905         write("<?%s?>" % text)
     906     else:
     907         tag = qnames[tag]
     908         if tag is None:
     909             if text:
     910                 write(_escape_cdata(text))
     911             for e in elem:
     912                 _serialize_xml(write, e, qnames, None,
     913                                short_empty_elements=short_empty_elements)
     914         else:
     915             write("<" + tag)
     916             items = list(elem.items())
     917             if items or namespaces:
     918                 if namespaces:
     919                     for v, k in sorted(namespaces.items(),
     920                                        key=lambda x: x[1]):  # sort on prefix
     921                         if k:
     922                             k = ":" + k
     923                         write(" xmlns%s="%s"" % (
     924                             k,
     925                             _escape_attrib(v)
     926                             ))
     927                 for k, v in sorted(items):  # lexical order
     928                     if isinstance(k, QName):
     929                         k = k.text
     930                     if isinstance(v, QName):
     931                         v = qnames[v.text]
     932                     else:
     933                         v = _escape_attrib(v)
     934                     write(" %s="%s"" % (qnames[k], v))
     935             if text or len(elem) or not short_empty_elements:
     936                 write(">")
     937                 if text:
     938                     write(_escape_cdata(text))
     939                 for e in elem:
     940                     _serialize_xml(write, e, qnames, None,
     941                                    short_empty_elements=short_empty_elements)
     942                 write("</" + tag + ">")
     943             else:
     944                 write(" />")
     945     if elem.tail:
     946         write(_escape_cdata(elem.tail))
     947 
     948 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
     949               "img", "input", "isindex", "link", "meta", "param")
     950 
     951 try:
     952     HTML_EMPTY = set(HTML_EMPTY)
     953 except NameError:
     954     pass
     955 
     956 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
     957     tag = elem.tag
     958     text = elem.text
     959     if tag is Comment:
     960         write("<!--%s-->" % _escape_cdata(text))
     961     elif tag is ProcessingInstruction:
     962         write("<?%s?>" % _escape_cdata(text))
     963     else:
     964         tag = qnames[tag]
     965         if tag is None:
     966             if text:
     967                 write(_escape_cdata(text))
     968             for e in elem:
     969                 _serialize_html(write, e, qnames, None)
     970         else:
     971             write("<" + tag)
     972             items = list(elem.items())
     973             if items or namespaces:
     974                 if namespaces:
     975                     for v, k in sorted(namespaces.items(),
     976                                        key=lambda x: x[1]):  # sort on prefix
     977                         if k:
     978                             k = ":" + k
     979                         write(" xmlns%s="%s"" % (
     980                             k,
     981                             _escape_attrib(v)
     982                             ))
     983                 for k, v in sorted(items):  # lexical order
     984                     if isinstance(k, QName):
     985                         k = k.text
     986                     if isinstance(v, QName):
     987                         v = qnames[v.text]
     988                     else:
     989                         v = _escape_attrib_html(v)
     990                     # FIXME: handle boolean attributes
     991                     write(" %s="%s"" % (qnames[k], v))
     992             write(">")
     993             ltag = tag.lower()
     994             if text:
     995                 if ltag == "script" or ltag == "style":
     996                     write(text)
     997                 else:
     998                     write(_escape_cdata(text))
     999             for e in elem:
    1000                 _serialize_html(write, e, qnames, None)
    1001             if ltag not in HTML_EMPTY:
    1002                 write("</" + tag + ">")
    1003     if elem.tail:
    1004         write(_escape_cdata(elem.tail))
    1005 
    1006 def _serialize_text(write, elem):
    1007     for part in elem.itertext():
    1008         write(part)
    1009     if elem.tail:
    1010         write(elem.tail)
    1011 
    1012 _serialize = {
    1013     "xml": _serialize_xml,
    1014     "html": _serialize_html,
    1015     "text": _serialize_text,
    1016 # this optional method is imported at the end of the module
    1017 #   "c14n": _serialize_c14n,
    1018 }
    1019 
    1020 
    1021 def register_namespace(prefix, uri):
    1022     """Register a namespace prefix.
    1023 
    1024     The registry is global, and any existing mapping for either the
    1025     given prefix or the namespace URI will be removed.
    1026 
    1027     *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
    1028     attributes in this namespace will be serialized with prefix if possible.
    1029 
    1030     ValueError is raised if prefix is reserved or is invalid.
    1031 
    1032     """
    1033     if re.match(r"nsd+$", prefix):
    1034         raise ValueError("Prefix format reserved for internal use")
    1035     for k, v in list(_namespace_map.items()):
    1036         if k == uri or v == prefix:
    1037             del _namespace_map[k]
    1038     _namespace_map[uri] = prefix
    1039 
    1040 _namespace_map = {
    1041     # "well-known" namespace prefixes
    1042     "http://www.w3.org/XML/1998/namespace": "xml",
    1043     "http://www.w3.org/1999/xhtml": "html",
    1044     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
    1045     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
    1046     # xml schema
    1047     "http://www.w3.org/2001/XMLSchema": "xs",
    1048     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
    1049     # dublin core
    1050     "http://purl.org/dc/elements/1.1/": "dc",
    1051 }
    1052 # For tests and troubleshooting
    1053 register_namespace._namespace_map = _namespace_map
    1054 
    1055 def _raise_serialization_error(text):
    1056     raise TypeError(
    1057         "cannot serialize %r (type %s)" % (text, type(text).__name__)
    1058         )
    1059 
    1060 def _escape_cdata(text):
    1061     # escape character data
    1062     try:
    1063         # it's worth avoiding do-nothing calls for strings that are
    1064         # shorter than 500 character, or so.  assume that's, by far,
    1065         # the most common case in most applications.
    1066         if "&" in text:
    1067             text = text.replace("&", "&amp;")
    1068         if "<" in text:
    1069             text = text.replace("<", "&lt;")
    1070         if ">" in text:
    1071             text = text.replace(">", "&gt;")
    1072         return text
    1073     except (TypeError, AttributeError):
    1074         _raise_serialization_error(text)
    1075 
    1076 def _escape_attrib(text):
    1077     # escape attribute value
    1078     try:
    1079         if "&" in text:
    1080             text = text.replace("&", "&amp;")
    1081         if "<" in text:
    1082             text = text.replace("<", "&lt;")
    1083         if ">" in text:
    1084             text = text.replace(">", "&gt;")
    1085         if """ in text:
    1086             text = text.replace(""", "&quot;")
    1087         # The following business with carriage returns is to satisfy
    1088         # Section 2.11 of the XML specification, stating that
    1089         # CR or CR LN should be replaced with just LN
    1090         # http://www.w3.org/TR/REC-xml/#sec-line-ends
    1091         if "
    " in text:
    1092             text = text.replace("
    ", "
    ")
    1093         if "
    " in text:
    1094             text = text.replace("
    ", "
    ")
    1095         #The following four lines are issue 17582
    1096         if "
    " in text:
    1097             text = text.replace("
    ", "&#10;")
    1098         if "	" in text:
    1099             text = text.replace("	", "&#09;")
    1100         return text
    1101     except (TypeError, AttributeError):
    1102         _raise_serialization_error(text)
    1103 
    1104 def _escape_attrib_html(text):
    1105     # escape attribute value
    1106     try:
    1107         if "&" in text:
    1108             text = text.replace("&", "&amp;")
    1109         if ">" in text:
    1110             text = text.replace(">", "&gt;")
    1111         if """ in text:
    1112             text = text.replace(""", "&quot;")
    1113         return text
    1114     except (TypeError, AttributeError):
    1115         _raise_serialization_error(text)
    1116 
    1117 # --------------------------------------------------------------------
    1118 
    1119 def tostring(element, encoding=None, method=None, *,
    1120              short_empty_elements=True):
    1121     """Generate string representation of XML element.
    1122 
    1123     All subelements are included.  If encoding is "unicode", a string
    1124     is returned. Otherwise a bytestring is returned.
    1125 
    1126     *element* is an Element instance, *encoding* is an optional output
    1127     encoding defaulting to US-ASCII, *method* is an optional output which can
    1128     be one of "xml" (default), "html", "text" or "c14n".
    1129 
    1130     Returns an (optionally) encoded string containing the XML data.
    1131 
    1132     """
    1133     stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
    1134     ElementTree(element).write(stream, encoding, method=method,
    1135                                short_empty_elements=short_empty_elements)
    1136     return stream.getvalue()
    1137 
    1138 class _ListDataStream(io.BufferedIOBase):
    1139     """An auxiliary stream accumulating into a list reference."""
    1140     def __init__(self, lst):
    1141         self.lst = lst
    1142 
    1143     def writable(self):
    1144         return True
    1145 
    1146     def seekable(self):
    1147         return True
    1148 
    1149     def write(self, b):
    1150         self.lst.append(b)
    1151 
    1152     def tell(self):
    1153         return len(self.lst)
    1154 
    1155 def tostringlist(element, encoding=None, method=None, *,
    1156                  short_empty_elements=True):
    1157     lst = []
    1158     stream = _ListDataStream(lst)
    1159     ElementTree(element).write(stream, encoding, method=method,
    1160                                short_empty_elements=short_empty_elements)
    1161     return lst
    1162 
    1163 
    1164 def dump(elem):
    1165     """Write element tree or element structure to sys.stdout.
    1166 
    1167     This function should be used for debugging only.
    1168 
    1169     *elem* is either an ElementTree, or a single Element.  The exact output
    1170     format is implementation dependent.  In this version, it's written as an
    1171     ordinary XML file.
    1172 
    1173     """
    1174     # debugging
    1175     if not isinstance(elem, ElementTree):
    1176         elem = ElementTree(elem)
    1177     elem.write(sys.stdout, encoding="unicode")
    1178     tail = elem.getroot().tail
    1179     if not tail or tail[-1] != "
    ":
    1180         sys.stdout.write("
    ")
    1181 
    1182 # --------------------------------------------------------------------
    1183 # parsing
    1184 
    1185 
    1186 def parse(source, parser=None):
    1187     """Parse XML document into element tree.
    1188 
    1189     *source* is a filename or file object containing XML data,
    1190     *parser* is an optional parser instance defaulting to XMLParser.
    1191 
    1192     Return an ElementTree instance.
    1193 
    1194     """
    1195     tree = ElementTree()
    1196     tree.parse(source, parser)
    1197     return tree
    1198 
    1199 
    1200 def iterparse(source, events=None, parser=None):
    1201     """Incrementally parse XML document into ElementTree.
    1202 
    1203     This class also reports what's going on to the user based on the
    1204     *events* it is initialized with.  The supported events are the strings
    1205     "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
    1206     detailed namespace information).  If *events* is omitted, only
    1207     "end" events are reported.
    1208 
    1209     *source* is a filename or file object containing XML data, *events* is
    1210     a list of events to report back, *parser* is an optional parser instance.
    1211 
    1212     Returns an iterator providing (event, elem) pairs.
    1213 
    1214     """
    1215     # Use the internal, undocumented _parser argument for now; When the
    1216     # parser argument of iterparse is removed, this can be killed.
    1217     pullparser = XMLPullParser(events=events, _parser=parser)
    1218     def iterator():
    1219         try:
    1220             while True:
    1221                 yield from pullparser.read_events()
    1222                 # load event buffer
    1223                 data = source.read(16 * 1024)
    1224                 if not data:
    1225                     break
    1226                 pullparser.feed(data)
    1227             root = pullparser._close_and_return_root()
    1228             yield from pullparser.read_events()
    1229             it.root = root
    1230         finally:
    1231             if close_source:
    1232                 source.close()
    1233 
    1234     class IterParseIterator(collections.Iterator):
    1235         __next__ = iterator().__next__
    1236     it = IterParseIterator()
    1237     it.root = None
    1238     del iterator, IterParseIterator
    1239 
    1240     close_source = False
    1241     if not hasattr(source, "read"):
    1242         source = open(source, "rb")
    1243         close_source = True
    1244 
    1245     return it
    1246 
    1247 
    1248 class XMLPullParser:
    1249 
    1250     def __init__(self, events=None, *, _parser=None):
    1251         # The _parser argument is for internal use only and must not be relied
    1252         # upon in user code. It will be removed in a future release.
    1253         # See http://bugs.python.org/issue17741 for more details.
    1254 
    1255         self._events_queue = collections.deque()
    1256         self._parser = _parser or XMLParser(target=TreeBuilder())
    1257         # wire up the parser for event reporting
    1258         if events is None:
    1259             events = ("end",)
    1260         self._parser._setevents(self._events_queue, events)
    1261 
    1262     def feed(self, data):
    1263         """Feed encoded data to parser."""
    1264         if self._parser is None:
    1265             raise ValueError("feed() called after end of stream")
    1266         if data:
    1267             try:
    1268                 self._parser.feed(data)
    1269             except SyntaxError as exc:
    1270                 self._events_queue.append(exc)
    1271 
    1272     def _close_and_return_root(self):
    1273         # iterparse needs this to set its root attribute properly :(
    1274         root = self._parser.close()
    1275         self._parser = None
    1276         return root
    1277 
    1278     def close(self):
    1279         """Finish feeding data to parser.
    1280 
    1281         Unlike XMLParser, does not return the root element. Use
    1282         read_events() to consume elements from XMLPullParser.
    1283         """
    1284         self._close_and_return_root()
    1285 
    1286     def read_events(self):
    1287         """Return an iterator over currently available (event, elem) pairs.
    1288 
    1289         Events are consumed from the internal event queue as they are
    1290         retrieved from the iterator.
    1291         """
    1292         events = self._events_queue
    1293         while events:
    1294             event = events.popleft()
    1295             if isinstance(event, Exception):
    1296                 raise event
    1297             else:
    1298                 yield event
    1299 
    1300 
    1301 def XML(text, parser=None):
    1302     """Parse XML document from string constant.
    1303 
    1304     This function can be used to embed "XML Literals" in Python code.
    1305 
    1306     *text* is a string containing XML data, *parser* is an
    1307     optional parser instance, defaulting to the standard XMLParser.
    1308 
    1309     Returns an Element instance.
    1310 
    1311     """
    1312     if not parser:
    1313         parser = XMLParser(target=TreeBuilder())
    1314     parser.feed(text)
    1315     return parser.close()
    1316 
    1317 
    1318 def XMLID(text, parser=None):
    1319     """Parse XML document from string constant for its IDs.
    1320 
    1321     *text* is a string containing XML data, *parser* is an
    1322     optional parser instance, defaulting to the standard XMLParser.
    1323 
    1324     Returns an (Element, dict) tuple, in which the
    1325     dict maps element id:s to elements.
    1326 
    1327     """
    1328     if not parser:
    1329         parser = XMLParser(target=TreeBuilder())
    1330     parser.feed(text)
    1331     tree = parser.close()
    1332     ids = {}
    1333     for elem in tree.iter():
    1334         id = elem.get("id")
    1335         if id:
    1336             ids[id] = elem
    1337     return tree, ids
    1338 
    1339 # Parse XML document from string constant.  Alias for XML().
    1340 fromstring = XML
    1341 
    1342 def fromstringlist(sequence, parser=None):
    1343     """Parse XML document from sequence of string fragments.
    1344 
    1345     *sequence* is a list of other sequence, *parser* is an optional parser
    1346     instance, defaulting to the standard XMLParser.
    1347 
    1348     Returns an Element instance.
    1349 
    1350     """
    1351     if not parser:
    1352         parser = XMLParser(target=TreeBuilder())
    1353     for text in sequence:
    1354         parser.feed(text)
    1355     return parser.close()
    1356 
    1357 # --------------------------------------------------------------------
    1358 
    1359 
    1360 class TreeBuilder:
    1361     """Generic element structure builder.
    1362 
    1363     This builder converts a sequence of start, data, and end method
    1364     calls to a well-formed element structure.
    1365 
    1366     You can use this class to build an element structure using a custom XML
    1367     parser, or a parser for some other XML-like format.
    1368 
    1369     *element_factory* is an optional element factory which is called
    1370     to create new Element instances, as necessary.
    1371 
    1372     """
    1373     def __init__(self, element_factory=None):
    1374         self._data = [] # data collector
    1375         self._elem = [] # element stack
    1376         self._last = None # last element
    1377         self._tail = None # true if we're after an end tag
    1378         if element_factory is None:
    1379             element_factory = Element
    1380         self._factory = element_factory
    1381 
    1382     def close(self):
    1383         """Flush builder buffers and return toplevel document Element."""
    1384         assert len(self._elem) == 0, "missing end tags"
    1385         assert self._last is not None, "missing toplevel element"
    1386         return self._last
    1387 
    1388     def _flush(self):
    1389         if self._data:
    1390             if self._last is not None:
    1391                 text = "".join(self._data)
    1392                 if self._tail:
    1393                     assert self._last.tail is None, "internal error (tail)"
    1394                     self._last.tail = text
    1395                 else:
    1396                     assert self._last.text is None, "internal error (text)"
    1397                     self._last.text = text
    1398             self._data = []
    1399 
    1400     def data(self, data):
    1401         """Add text to current element."""
    1402         self._data.append(data)
    1403 
    1404     def start(self, tag, attrs):
    1405         """Open new element and return it.
    1406 
    1407         *tag* is the element name, *attrs* is a dict containing element
    1408         attributes.
    1409 
    1410         """
    1411         self._flush()
    1412         self._last = elem = self._factory(tag, attrs)
    1413         if self._elem:
    1414             self._elem[-1].append(elem)
    1415         self._elem.append(elem)
    1416         self._tail = 0
    1417         return elem
    1418 
    1419     def end(self, tag):
    1420         """Close and return current Element.
    1421 
    1422         *tag* is the element name.
    1423 
    1424         """
    1425         self._flush()
    1426         self._last = self._elem.pop()
    1427         assert self._last.tag == tag,
    1428                "end tag mismatch (expected %s, got %s)" % (
    1429                    self._last.tag, tag)
    1430         self._tail = 1
    1431         return self._last
    1432 
    1433 
    1434 # also see ElementTree and TreeBuilder
    1435 class XMLParser:
    1436     """Element structure builder for XML source data based on the expat parser.
    1437 
    1438     *html* are predefined HTML entities (deprecated and not supported),
    1439     *target* is an optional target object which defaults to an instance of the
    1440     standard TreeBuilder class, *encoding* is an optional encoding string
    1441     which if given, overrides the encoding specified in the XML file:
    1442     http://www.iana.org/assignments/character-sets
    1443 
    1444     """
    1445 
    1446     def __init__(self, html=0, target=None, encoding=None):
    1447         try:
    1448             from xml.parsers import expat
    1449         except ImportError:
    1450             try:
    1451                 import pyexpat as expat
    1452             except ImportError:
    1453                 raise ImportError(
    1454                     "No module named expat; use SimpleXMLTreeBuilder instead"
    1455                     )
    1456         parser = expat.ParserCreate(encoding, "}")
    1457         if target is None:
    1458             target = TreeBuilder()
    1459         # underscored names are provided for compatibility only
    1460         self.parser = self._parser = parser
    1461         self.target = self._target = target
    1462         self._error = expat.error
    1463         self._names = {} # name memo cache
    1464         # main callbacks
    1465         parser.DefaultHandlerExpand = self._default
    1466         if hasattr(target, 'start'):
    1467             parser.StartElementHandler = self._start
    1468         if hasattr(target, 'end'):
    1469             parser.EndElementHandler = self._end
    1470         if hasattr(target, 'data'):
    1471             parser.CharacterDataHandler = target.data
    1472         # miscellaneous callbacks
    1473         if hasattr(target, 'comment'):
    1474             parser.CommentHandler = target.comment
    1475         if hasattr(target, 'pi'):
    1476             parser.ProcessingInstructionHandler = target.pi
    1477         # Configure pyexpat: buffering, new-style attribute handling.
    1478         parser.buffer_text = 1
    1479         parser.ordered_attributes = 1
    1480         parser.specified_attributes = 1
    1481         self._doctype = None
    1482         self.entity = {}
    1483         try:
    1484             self.version = "Expat %d.%d.%d" % expat.version_info
    1485         except AttributeError:
    1486             pass # unknown
    1487 
    1488     def _setevents(self, events_queue, events_to_report):
    1489         # Internal API for XMLPullParser
    1490         # events_to_report: a list of events to report during parsing (same as
    1491         # the *events* of XMLPullParser's constructor.
    1492         # events_queue: a list of actual parsing events that will be populated
    1493         # by the underlying parser.
    1494         #
    1495         parser = self._parser
    1496         append = events_queue.append
    1497         for event_name in events_to_report:
    1498             if event_name == "start":
    1499                 parser.ordered_attributes = 1
    1500                 parser.specified_attributes = 1
    1501                 def handler(tag, attrib_in, event=event_name, append=append,
    1502                             start=self._start):
    1503                     append((event, start(tag, attrib_in)))
    1504                 parser.StartElementHandler = handler
    1505             elif event_name == "end":
    1506                 def handler(tag, event=event_name, append=append,
    1507                             end=self._end):
    1508                     append((event, end(tag)))
    1509                 parser.EndElementHandler = handler
    1510             elif event_name == "start-ns":
    1511                 def handler(prefix, uri, event=event_name, append=append):
    1512                     append((event, (prefix or "", uri or "")))
    1513                 parser.StartNamespaceDeclHandler = handler
    1514             elif event_name == "end-ns":
    1515                 def handler(prefix, event=event_name, append=append):
    1516                     append((event, None))
    1517                 parser.EndNamespaceDeclHandler = handler
    1518             else:
    1519                 raise ValueError("unknown event %r" % event_name)
    1520 
    1521     def _raiseerror(self, value):
    1522         err = ParseError(value)
    1523         err.code = value.code
    1524         err.position = value.lineno, value.offset
    1525         raise err
    1526 
    1527     def _fixname(self, key):
    1528         # expand qname, and convert name string to ascii, if possible
    1529         try:
    1530             name = self._names[key]
    1531         except KeyError:
    1532             name = key
    1533             if "}" in name:
    1534                 name = "{" + name
    1535             self._names[key] = name
    1536         return name
    1537 
    1538     def _start(self, tag, attr_list):
    1539         # Handler for expat's StartElementHandler. Since ordered_attributes
    1540         # is set, the attributes are reported as a list of alternating
    1541         # attribute name,value.
    1542         fixname = self._fixname
    1543         tag = fixname(tag)
    1544         attrib = {}
    1545         if attr_list:
    1546             for i in range(0, len(attr_list), 2):
    1547                 attrib[fixname(attr_list[i])] = attr_list[i+1]
    1548         return self.target.start(tag, attrib)
    1549 
    1550     def _end(self, tag):
    1551         return self.target.end(self._fixname(tag))
    1552 
    1553     def _default(self, text):
    1554         prefix = text[:1]
    1555         if prefix == "&":
    1556             # deal with undefined entities
    1557             try:
    1558                 data_handler = self.target.data
    1559             except AttributeError:
    1560                 return
    1561             try:
    1562                 data_handler(self.entity[text[1:-1]])
    1563             except KeyError:
    1564                 from xml.parsers import expat
    1565                 err = expat.error(
    1566                     "undefined entity %s: line %d, column %d" %
    1567                     (text, self.parser.ErrorLineNumber,
    1568                     self.parser.ErrorColumnNumber)
    1569                     )
    1570                 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
    1571                 err.lineno = self.parser.ErrorLineNumber
    1572                 err.offset = self.parser.ErrorColumnNumber
    1573                 raise err
    1574         elif prefix == "<" and text[:9] == "<!DOCTYPE":
    1575             self._doctype = [] # inside a doctype declaration
    1576         elif self._doctype is not None:
    1577             # parse doctype contents
    1578             if prefix == ">":
    1579                 self._doctype = None
    1580                 return
    1581             text = text.strip()
    1582             if not text:
    1583                 return
    1584             self._doctype.append(text)
    1585             n = len(self._doctype)
    1586             if n > 2:
    1587                 type = self._doctype[1]
    1588                 if type == "PUBLIC" and n == 4:
    1589                     name, type, pubid, system = self._doctype
    1590                     if pubid:
    1591                         pubid = pubid[1:-1]
    1592                 elif type == "SYSTEM" and n == 3:
    1593                     name, type, system = self._doctype
    1594                     pubid = None
    1595                 else:
    1596                     return
    1597                 if hasattr(self.target, "doctype"):
    1598                     self.target.doctype(name, pubid, system[1:-1])
    1599                 elif self.doctype != self._XMLParser__doctype:
    1600                     # warn about deprecated call
    1601                     self._XMLParser__doctype(name, pubid, system[1:-1])
    1602                     self.doctype(name, pubid, system[1:-1])
    1603                 self._doctype = None
    1604 
    1605     def doctype(self, name, pubid, system):
    1606         """(Deprecated)  Handle doctype declaration
    1607 
    1608         *name* is the Doctype name, *pubid* is the public identifier,
    1609         and *system* is the system identifier.
    1610 
    1611         """
    1612         warnings.warn(
    1613             "This method of XMLParser is deprecated.  Define doctype() "
    1614             "method on the TreeBuilder target.",
    1615             DeprecationWarning,
    1616             )
    1617 
    1618     # sentinel, if doctype is redefined in a subclass
    1619     __doctype = doctype
    1620 
    1621     def feed(self, data):
    1622         """Feed encoded data to parser."""
    1623         try:
    1624             self.parser.Parse(data, 0)
    1625         except self._error as v:
    1626             self._raiseerror(v)
    1627 
    1628     def close(self):
    1629         """Finish feeding data to parser and return element structure."""
    1630         try:
    1631             self.parser.Parse("", 1) # end of data
    1632         except self._error as v:
    1633             self._raiseerror(v)
    1634         try:
    1635             close_handler = self.target.close
    1636         except AttributeError:
    1637             pass
    1638         else:
    1639             return close_handler()
    1640         finally:
    1641             # get rid of circular references
    1642             del self.parser, self._parser
    1643             del self.target, self._target
    1644 
    1645 
    1646 # Import the C accelerators
    1647 try:
    1648     # Element is going to be shadowed by the C implementation. We need to keep
    1649     # the Python version of it accessible for some "creative" by external code
    1650     # (see tests)
    1651     _Element_Py = Element
    1652 
    1653     # Element, SubElement, ParseError, TreeBuilder, XMLParser
    1654     from _elementtree import *
    1655 except ImportError:
    1656     pass
    xml.etree.ElementTree
    每天更新一点点,温习一点点点,进步一点点
  • 相关阅读:
    HelloCSS-Border
    开发你的第一个NCS(Zephyr)应用程序
    NanoPi R4S (RK3399) openssl speed 硬件加解密性能测试结果
    Flink-状态
    Flink-时间语义和Watermark
    Flink-Window
    Flink-运行时架构
    Flink-流处理wordcount
    Flink-批处理wordcount
    设计模式七大原则-合成复用原则
  • 原文地址:https://www.cnblogs.com/lmgsanm/p/8379791.html
Copyright © 2011-2022 走看看