zoukankan      html  css  js  c++  java
  • python模块:xml.dom.minidom

       1 """Simple implementation of the Level 1 DOM.
       2 
       3 Namespaces and other minor Level 2 features are also supported.
       4 
       5 parse("foo.xml")
       6 
       7 parseString("<foo><bar/></foo>")
       8 
       9 Todo:
      10 =====
      11  * convenience methods for getting elements and text.
      12  * more testing
      13  * bring some of the writer and linearizer code into conformance with this
      14         interface
      15  * SAX 2 namespaces
      16 """
      17 
      18 import io
      19 import xml.dom
      20 
      21 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
      22 from xml.dom.minicompat import *
      23 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
      24 
      25 # This is used by the ID-cache invalidation checks; the list isn't
      26 # actually complete, since the nodes being checked will never be the
      27 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
      28 # the node being added or removed, not the node being modified.)
      29 #
      30 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
      31                             xml.dom.Node.ENTITY_REFERENCE_NODE)
      32 
      33 
      34 class Node(xml.dom.Node):
      35     namespaceURI = None # this is non-null only for elements and attributes
      36     parentNode = None
      37     ownerDocument = None
      38     nextSibling = None
      39     previousSibling = None
      40 
      41     prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
      42 
      43     def __bool__(self):
      44         return True
      45 
      46     def toxml(self, encoding=None):
      47         return self.toprettyxml("", "", encoding)
      48 
      49     def toprettyxml(self, indent="	", newl="
    ", encoding=None):
      50         if encoding is None:
      51             writer = io.StringIO()
      52         else:
      53             writer = io.TextIOWrapper(io.BytesIO(),
      54                                       encoding=encoding,
      55                                       errors="xmlcharrefreplace",
      56                                       newline='
    ')
      57         if self.nodeType == Node.DOCUMENT_NODE:
      58             # Can pass encoding only to document, to put it into XML header
      59             self.writexml(writer, "", indent, newl, encoding)
      60         else:
      61             self.writexml(writer, "", indent, newl)
      62         if encoding is None:
      63             return writer.getvalue()
      64         else:
      65             return writer.detach().getvalue()
      66 
      67     def hasChildNodes(self):
      68         return bool(self.childNodes)
      69 
      70     def _get_childNodes(self):
      71         return self.childNodes
      72 
      73     def _get_firstChild(self):
      74         if self.childNodes:
      75             return self.childNodes[0]
      76 
      77     def _get_lastChild(self):
      78         if self.childNodes:
      79             return self.childNodes[-1]
      80 
      81     def insertBefore(self, newChild, refChild):
      82         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
      83             for c in tuple(newChild.childNodes):
      84                 self.insertBefore(c, refChild)
      85             ### The DOM does not clearly specify what to return in this case
      86             return newChild
      87         if newChild.nodeType not in self._child_node_types:
      88             raise xml.dom.HierarchyRequestErr(
      89                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
      90         if newChild.parentNode is not None:
      91             newChild.parentNode.removeChild(newChild)
      92         if refChild is None:
      93             self.appendChild(newChild)
      94         else:
      95             try:
      96                 index = self.childNodes.index(refChild)
      97             except ValueError:
      98                 raise xml.dom.NotFoundErr()
      99             if newChild.nodeType in _nodeTypes_with_children:
     100                 _clear_id_cache(self)
     101             self.childNodes.insert(index, newChild)
     102             newChild.nextSibling = refChild
     103             refChild.previousSibling = newChild
     104             if index:
     105                 node = self.childNodes[index-1]
     106                 node.nextSibling = newChild
     107                 newChild.previousSibling = node
     108             else:
     109                 newChild.previousSibling = None
     110             newChild.parentNode = self
     111         return newChild
     112 
     113     def appendChild(self, node):
     114         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
     115             for c in tuple(node.childNodes):
     116                 self.appendChild(c)
     117             ### The DOM does not clearly specify what to return in this case
     118             return node
     119         if node.nodeType not in self._child_node_types:
     120             raise xml.dom.HierarchyRequestErr(
     121                 "%s cannot be child of %s" % (repr(node), repr(self)))
     122         elif node.nodeType in _nodeTypes_with_children:
     123             _clear_id_cache(self)
     124         if node.parentNode is not None:
     125             node.parentNode.removeChild(node)
     126         _append_child(self, node)
     127         node.nextSibling = None
     128         return node
     129 
     130     def replaceChild(self, newChild, oldChild):
     131         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
     132             refChild = oldChild.nextSibling
     133             self.removeChild(oldChild)
     134             return self.insertBefore(newChild, refChild)
     135         if newChild.nodeType not in self._child_node_types:
     136             raise xml.dom.HierarchyRequestErr(
     137                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
     138         if newChild is oldChild:
     139             return
     140         if newChild.parentNode is not None:
     141             newChild.parentNode.removeChild(newChild)
     142         try:
     143             index = self.childNodes.index(oldChild)
     144         except ValueError:
     145             raise xml.dom.NotFoundErr()
     146         self.childNodes[index] = newChild
     147         newChild.parentNode = self
     148         oldChild.parentNode = None
     149         if (newChild.nodeType in _nodeTypes_with_children
     150             or oldChild.nodeType in _nodeTypes_with_children):
     151             _clear_id_cache(self)
     152         newChild.nextSibling = oldChild.nextSibling
     153         newChild.previousSibling = oldChild.previousSibling
     154         oldChild.nextSibling = None
     155         oldChild.previousSibling = None
     156         if newChild.previousSibling:
     157             newChild.previousSibling.nextSibling = newChild
     158         if newChild.nextSibling:
     159             newChild.nextSibling.previousSibling = newChild
     160         return oldChild
     161 
     162     def removeChild(self, oldChild):
     163         try:
     164             self.childNodes.remove(oldChild)
     165         except ValueError:
     166             raise xml.dom.NotFoundErr()
     167         if oldChild.nextSibling is not None:
     168             oldChild.nextSibling.previousSibling = oldChild.previousSibling
     169         if oldChild.previousSibling is not None:
     170             oldChild.previousSibling.nextSibling = oldChild.nextSibling
     171         oldChild.nextSibling = oldChild.previousSibling = None
     172         if oldChild.nodeType in _nodeTypes_with_children:
     173             _clear_id_cache(self)
     174 
     175         oldChild.parentNode = None
     176         return oldChild
     177 
     178     def normalize(self):
     179         L = []
     180         for child in self.childNodes:
     181             if child.nodeType == Node.TEXT_NODE:
     182                 if not child.data:
     183                     # empty text node; discard
     184                     if L:
     185                         L[-1].nextSibling = child.nextSibling
     186                     if child.nextSibling:
     187                         child.nextSibling.previousSibling = child.previousSibling
     188                     child.unlink()
     189                 elif L and L[-1].nodeType == child.nodeType:
     190                     # collapse text node
     191                     node = L[-1]
     192                     node.data = node.data + child.data
     193                     node.nextSibling = child.nextSibling
     194                     if child.nextSibling:
     195                         child.nextSibling.previousSibling = node
     196                     child.unlink()
     197                 else:
     198                     L.append(child)
     199             else:
     200                 L.append(child)
     201                 if child.nodeType == Node.ELEMENT_NODE:
     202                     child.normalize()
     203         self.childNodes[:] = L
     204 
     205     def cloneNode(self, deep):
     206         return _clone_node(self, deep, self.ownerDocument or self)
     207 
     208     def isSupported(self, feature, version):
     209         return self.ownerDocument.implementation.hasFeature(feature, version)
     210 
     211     def _get_localName(self):
     212         # Overridden in Element and Attr where localName can be Non-Null
     213         return None
     214 
     215     # Node interfaces from Level 3 (WD 9 April 2002)
     216 
     217     def isSameNode(self, other):
     218         return self is other
     219 
     220     def getInterface(self, feature):
     221         if self.isSupported(feature, None):
     222             return self
     223         else:
     224             return None
     225 
     226     # The "user data" functions use a dictionary that is only present
     227     # if some user data has been set, so be careful not to assume it
     228     # exists.
     229 
     230     def getUserData(self, key):
     231         try:
     232             return self._user_data[key][0]
     233         except (AttributeError, KeyError):
     234             return None
     235 
     236     def setUserData(self, key, data, handler):
     237         old = None
     238         try:
     239             d = self._user_data
     240         except AttributeError:
     241             d = {}
     242             self._user_data = d
     243         if key in d:
     244             old = d[key][0]
     245         if data is None:
     246             # ignore handlers passed for None
     247             handler = None
     248             if old is not None:
     249                 del d[key]
     250         else:
     251             d[key] = (data, handler)
     252         return old
     253 
     254     def _call_user_data_handler(self, operation, src, dst):
     255         if hasattr(self, "_user_data"):
     256             for key, (data, handler) in list(self._user_data.items()):
     257                 if handler is not None:
     258                     handler.handle(operation, key, data, src, dst)
     259 
     260     # minidom-specific API:
     261 
     262     def unlink(self):
     263         self.parentNode = self.ownerDocument = None
     264         if self.childNodes:
     265             for child in self.childNodes:
     266                 child.unlink()
     267             self.childNodes = NodeList()
     268         self.previousSibling = None
     269         self.nextSibling = None
     270 
     271     # A Node is its own context manager, to ensure that an unlink() call occurs.
     272     # This is similar to how a file object works.
     273     def __enter__(self):
     274         return self
     275 
     276     def __exit__(self, et, ev, tb):
     277         self.unlink()
     278 
     279 defproperty(Node, "firstChild", doc="First child node, or None.")
     280 defproperty(Node, "lastChild",  doc="Last child node, or None.")
     281 defproperty(Node, "localName",  doc="Namespace-local name of this node.")
     282 
     283 
     284 def _append_child(self, node):
     285     # fast path with less checks; usable by DOM builders if careful
     286     childNodes = self.childNodes
     287     if childNodes:
     288         last = childNodes[-1]
     289         node.previousSibling = last
     290         last.nextSibling = node
     291     childNodes.append(node)
     292     node.parentNode = self
     293 
     294 def _in_document(node):
     295     # return True iff node is part of a document tree
     296     while node is not None:
     297         if node.nodeType == Node.DOCUMENT_NODE:
     298             return True
     299         node = node.parentNode
     300     return False
     301 
     302 def _write_data(writer, data):
     303     "Writes datachars to writer."
     304     if data:
     305         data = data.replace("&", "&amp;").replace("<", "&lt;"). 
     306                     replace(""", "&quot;").replace(">", "&gt;")
     307         writer.write(data)
     308 
     309 def _get_elements_by_tagName_helper(parent, name, rc):
     310     for node in parent.childNodes:
     311         if node.nodeType == Node.ELEMENT_NODE and 
     312             (name == "*" or node.tagName == name):
     313             rc.append(node)
     314         _get_elements_by_tagName_helper(node, name, rc)
     315     return rc
     316 
     317 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
     318     for node in parent.childNodes:
     319         if node.nodeType == Node.ELEMENT_NODE:
     320             if ((localName == "*" or node.localName == localName) and
     321                 (nsURI == "*" or node.namespaceURI == nsURI)):
     322                 rc.append(node)
     323             _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
     324     return rc
     325 
     326 class DocumentFragment(Node):
     327     nodeType = Node.DOCUMENT_FRAGMENT_NODE
     328     nodeName = "#document-fragment"
     329     nodeValue = None
     330     attributes = None
     331     parentNode = None
     332     _child_node_types = (Node.ELEMENT_NODE,
     333                          Node.TEXT_NODE,
     334                          Node.CDATA_SECTION_NODE,
     335                          Node.ENTITY_REFERENCE_NODE,
     336                          Node.PROCESSING_INSTRUCTION_NODE,
     337                          Node.COMMENT_NODE,
     338                          Node.NOTATION_NODE)
     339 
     340     def __init__(self):
     341         self.childNodes = NodeList()
     342 
     343 
     344 class Attr(Node):
     345     __slots__=('_name', '_value', 'namespaceURI',
     346                '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
     347     nodeType = Node.ATTRIBUTE_NODE
     348     attributes = None
     349     specified = False
     350     _is_id = False
     351 
     352     _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
     353 
     354     def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
     355                  prefix=None):
     356         self.ownerElement = None
     357         self._name = qName
     358         self.namespaceURI = namespaceURI
     359         self._prefix = prefix
     360         self.childNodes = NodeList()
     361 
     362         # Add the single child node that represents the value of the attr
     363         self.childNodes.append(Text())
     364 
     365         # nodeValue and value are set elsewhere
     366 
     367     def _get_localName(self):
     368         try:
     369             return self._localName
     370         except AttributeError:
     371             return self.nodeName.split(":", 1)[-1]
     372 
     373     def _get_specified(self):
     374         return self.specified
     375 
     376     def _get_name(self):
     377         return self._name
     378 
     379     def _set_name(self, value):
     380         self._name = value
     381         if self.ownerElement is not None:
     382             _clear_id_cache(self.ownerElement)
     383 
     384     nodeName = name = property(_get_name, _set_name)
     385 
     386     def _get_value(self):
     387         return self._value
     388 
     389     def _set_value(self, value):
     390         self._value = value
     391         self.childNodes[0].data = value
     392         if self.ownerElement is not None:
     393             _clear_id_cache(self.ownerElement)
     394         self.childNodes[0].data = value
     395 
     396     nodeValue = value = property(_get_value, _set_value)
     397 
     398     def _get_prefix(self):
     399         return self._prefix
     400 
     401     def _set_prefix(self, prefix):
     402         nsuri = self.namespaceURI
     403         if prefix == "xmlns":
     404             if nsuri and nsuri != XMLNS_NAMESPACE:
     405                 raise xml.dom.NamespaceErr(
     406                     "illegal use of 'xmlns' prefix for the wrong namespace")
     407         self._prefix = prefix
     408         if prefix is None:
     409             newName = self.localName
     410         else:
     411             newName = "%s:%s" % (prefix, self.localName)
     412         if self.ownerElement:
     413             _clear_id_cache(self.ownerElement)
     414         self.name = newName
     415 
     416     prefix = property(_get_prefix, _set_prefix)
     417 
     418     def unlink(self):
     419         # This implementation does not call the base implementation
     420         # since most of that is not needed, and the expense of the
     421         # method call is not warranted.  We duplicate the removal of
     422         # children, but that's all we needed from the base class.
     423         elem = self.ownerElement
     424         if elem is not None:
     425             del elem._attrs[self.nodeName]
     426             del elem._attrsNS[(self.namespaceURI, self.localName)]
     427             if self._is_id:
     428                 self._is_id = False
     429                 elem._magic_id_nodes -= 1
     430                 self.ownerDocument._magic_id_count -= 1
     431         for child in self.childNodes:
     432             child.unlink()
     433         del self.childNodes[:]
     434 
     435     def _get_isId(self):
     436         if self._is_id:
     437             return True
     438         doc = self.ownerDocument
     439         elem = self.ownerElement
     440         if doc is None or elem is None:
     441             return False
     442 
     443         info = doc._get_elem_info(elem)
     444         if info is None:
     445             return False
     446         if self.namespaceURI:
     447             return info.isIdNS(self.namespaceURI, self.localName)
     448         else:
     449             return info.isId(self.nodeName)
     450 
     451     def _get_schemaType(self):
     452         doc = self.ownerDocument
     453         elem = self.ownerElement
     454         if doc is None or elem is None:
     455             return _no_type
     456 
     457         info = doc._get_elem_info(elem)
     458         if info is None:
     459             return _no_type
     460         if self.namespaceURI:
     461             return info.getAttributeTypeNS(self.namespaceURI, self.localName)
     462         else:
     463             return info.getAttributeType(self.nodeName)
     464 
     465 defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
     466 defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
     467 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
     468 
     469 
     470 class NamedNodeMap(object):
     471     """The attribute list is a transient interface to the underlying
     472     dictionaries.  Mutations here will change the underlying element's
     473     dictionary.
     474 
     475     Ordering is imposed artificially and does not reflect the order of
     476     attributes as found in an input document.
     477     """
     478 
     479     __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
     480 
     481     def __init__(self, attrs, attrsNS, ownerElement):
     482         self._attrs = attrs
     483         self._attrsNS = attrsNS
     484         self._ownerElement = ownerElement
     485 
     486     def _get_length(self):
     487         return len(self._attrs)
     488 
     489     def item(self, index):
     490         try:
     491             return self[list(self._attrs.keys())[index]]
     492         except IndexError:
     493             return None
     494 
     495     def items(self):
     496         L = []
     497         for node in self._attrs.values():
     498             L.append((node.nodeName, node.value))
     499         return L
     500 
     501     def itemsNS(self):
     502         L = []
     503         for node in self._attrs.values():
     504             L.append(((node.namespaceURI, node.localName), node.value))
     505         return L
     506 
     507     def __contains__(self, key):
     508         if isinstance(key, str):
     509             return key in self._attrs
     510         else:
     511             return key in self._attrsNS
     512 
     513     def keys(self):
     514         return self._attrs.keys()
     515 
     516     def keysNS(self):
     517         return self._attrsNS.keys()
     518 
     519     def values(self):
     520         return self._attrs.values()
     521 
     522     def get(self, name, value=None):
     523         return self._attrs.get(name, value)
     524 
     525     __len__ = _get_length
     526 
     527     def _cmp(self, other):
     528         if self._attrs is getattr(other, "_attrs", None):
     529             return 0
     530         else:
     531             return (id(self) > id(other)) - (id(self) < id(other))
     532 
     533     def __eq__(self, other):
     534         return self._cmp(other) == 0
     535 
     536     def __ge__(self, other):
     537         return self._cmp(other) >= 0
     538 
     539     def __gt__(self, other):
     540         return self._cmp(other) > 0
     541 
     542     def __le__(self, other):
     543         return self._cmp(other) <= 0
     544 
     545     def __lt__(self, other):
     546         return self._cmp(other) < 0
     547 
     548     def __getitem__(self, attname_or_tuple):
     549         if isinstance(attname_or_tuple, tuple):
     550             return self._attrsNS[attname_or_tuple]
     551         else:
     552             return self._attrs[attname_or_tuple]
     553 
     554     # same as set
     555     def __setitem__(self, attname, value):
     556         if isinstance(value, str):
     557             try:
     558                 node = self._attrs[attname]
     559             except KeyError:
     560                 node = Attr(attname)
     561                 node.ownerDocument = self._ownerElement.ownerDocument
     562                 self.setNamedItem(node)
     563             node.value = value
     564         else:
     565             if not isinstance(value, Attr):
     566                 raise TypeError("value must be a string or Attr object")
     567             node = value
     568             self.setNamedItem(node)
     569 
     570     def getNamedItem(self, name):
     571         try:
     572             return self._attrs[name]
     573         except KeyError:
     574             return None
     575 
     576     def getNamedItemNS(self, namespaceURI, localName):
     577         try:
     578             return self._attrsNS[(namespaceURI, localName)]
     579         except KeyError:
     580             return None
     581 
     582     def removeNamedItem(self, name):
     583         n = self.getNamedItem(name)
     584         if n is not None:
     585             _clear_id_cache(self._ownerElement)
     586             del self._attrs[n.nodeName]
     587             del self._attrsNS[(n.namespaceURI, n.localName)]
     588             if hasattr(n, 'ownerElement'):
     589                 n.ownerElement = None
     590             return n
     591         else:
     592             raise xml.dom.NotFoundErr()
     593 
     594     def removeNamedItemNS(self, namespaceURI, localName):
     595         n = self.getNamedItemNS(namespaceURI, localName)
     596         if n is not None:
     597             _clear_id_cache(self._ownerElement)
     598             del self._attrsNS[(n.namespaceURI, n.localName)]
     599             del self._attrs[n.nodeName]
     600             if hasattr(n, 'ownerElement'):
     601                 n.ownerElement = None
     602             return n
     603         else:
     604             raise xml.dom.NotFoundErr()
     605 
     606     def setNamedItem(self, node):
     607         if not isinstance(node, Attr):
     608             raise xml.dom.HierarchyRequestErr(
     609                 "%s cannot be child of %s" % (repr(node), repr(self)))
     610         old = self._attrs.get(node.name)
     611         if old:
     612             old.unlink()
     613         self._attrs[node.name] = node
     614         self._attrsNS[(node.namespaceURI, node.localName)] = node
     615         node.ownerElement = self._ownerElement
     616         _clear_id_cache(node.ownerElement)
     617         return old
     618 
     619     def setNamedItemNS(self, node):
     620         return self.setNamedItem(node)
     621 
     622     def __delitem__(self, attname_or_tuple):
     623         node = self[attname_or_tuple]
     624         _clear_id_cache(node.ownerElement)
     625         node.unlink()
     626 
     627     def __getstate__(self):
     628         return self._attrs, self._attrsNS, self._ownerElement
     629 
     630     def __setstate__(self, state):
     631         self._attrs, self._attrsNS, self._ownerElement = state
     632 
     633 defproperty(NamedNodeMap, "length",
     634             doc="Number of nodes in the NamedNodeMap.")
     635 
     636 AttributeList = NamedNodeMap
     637 
     638 
     639 class TypeInfo(object):
     640     __slots__ = 'namespace', 'name'
     641 
     642     def __init__(self, namespace, name):
     643         self.namespace = namespace
     644         self.name = name
     645 
     646     def __repr__(self):
     647         if self.namespace:
     648             return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
     649                                           self.namespace)
     650         else:
     651             return "<%s %r>" % (self.__class__.__name__, self.name)
     652 
     653     def _get_name(self):
     654         return self.name
     655 
     656     def _get_namespace(self):
     657         return self.namespace
     658 
     659 _no_type = TypeInfo(None, None)
     660 
     661 class Element(Node):
     662     __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
     663                'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
     664                'nextSibling', 'previousSibling')
     665     nodeType = Node.ELEMENT_NODE
     666     nodeValue = None
     667     schemaType = _no_type
     668 
     669     _magic_id_nodes = 0
     670 
     671     _child_node_types = (Node.ELEMENT_NODE,
     672                          Node.PROCESSING_INSTRUCTION_NODE,
     673                          Node.COMMENT_NODE,
     674                          Node.TEXT_NODE,
     675                          Node.CDATA_SECTION_NODE,
     676                          Node.ENTITY_REFERENCE_NODE)
     677 
     678     def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
     679                  localName=None):
     680         self.parentNode = None
     681         self.tagName = self.nodeName = tagName
     682         self.prefix = prefix
     683         self.namespaceURI = namespaceURI
     684         self.childNodes = NodeList()
     685         self.nextSibling = self.previousSibling = None
     686 
     687         # Attribute dictionaries are lazily created
     688         # attributes are double-indexed:
     689         #    tagName -> Attribute
     690         #    URI,localName -> Attribute
     691         # in the future: consider lazy generation
     692         # of attribute objects this is too tricky
     693         # for now because of headaches with
     694         # namespaces.
     695         self._attrs = None
     696         self._attrsNS = None
     697 
     698     def _ensure_attributes(self):
     699         if self._attrs is None:
     700             self._attrs = {}
     701             self._attrsNS = {}
     702 
     703     def _get_localName(self):
     704         try:
     705             return self._localName
     706         except AttributeError:
     707             return self.tagName.split(":", 1)[-1]
     708 
     709     def _get_tagName(self):
     710         return self.tagName
     711 
     712     def unlink(self):
     713         if self._attrs is not None:
     714             for attr in list(self._attrs.values()):
     715                 attr.unlink()
     716         self._attrs = None
     717         self._attrsNS = None
     718         Node.unlink(self)
     719 
     720     def getAttribute(self, attname):
     721         if self._attrs is None:
     722             return ""
     723         try:
     724             return self._attrs[attname].value
     725         except KeyError:
     726             return ""
     727 
     728     def getAttributeNS(self, namespaceURI, localName):
     729         if self._attrsNS is None:
     730             return ""
     731         try:
     732             return self._attrsNS[(namespaceURI, localName)].value
     733         except KeyError:
     734             return ""
     735 
     736     def setAttribute(self, attname, value):
     737         attr = self.getAttributeNode(attname)
     738         if attr is None:
     739             attr = Attr(attname)
     740             attr.value = value # also sets nodeValue
     741             attr.ownerDocument = self.ownerDocument
     742             self.setAttributeNode(attr)
     743         elif value != attr.value:
     744             attr.value = value
     745             if attr.isId:
     746                 _clear_id_cache(self)
     747 
     748     def setAttributeNS(self, namespaceURI, qualifiedName, value):
     749         prefix, localname = _nssplit(qualifiedName)
     750         attr = self.getAttributeNodeNS(namespaceURI, localname)
     751         if attr is None:
     752             attr = Attr(qualifiedName, namespaceURI, localname, prefix)
     753             attr.value = value
     754             attr.ownerDocument = self.ownerDocument
     755             self.setAttributeNode(attr)
     756         else:
     757             if value != attr.value:
     758                 attr.value = value
     759                 if attr.isId:
     760                     _clear_id_cache(self)
     761             if attr.prefix != prefix:
     762                 attr.prefix = prefix
     763                 attr.nodeName = qualifiedName
     764 
     765     def getAttributeNode(self, attrname):
     766         if self._attrs is None:
     767             return None
     768         return self._attrs.get(attrname)
     769 
     770     def getAttributeNodeNS(self, namespaceURI, localName):
     771         if self._attrsNS is None:
     772             return None
     773         return self._attrsNS.get((namespaceURI, localName))
     774 
     775     def setAttributeNode(self, attr):
     776         if attr.ownerElement not in (None, self):
     777             raise xml.dom.InuseAttributeErr("attribute node already owned")
     778         self._ensure_attributes()
     779         old1 = self._attrs.get(attr.name, None)
     780         if old1 is not None:
     781             self.removeAttributeNode(old1)
     782         old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
     783         if old2 is not None and old2 is not old1:
     784             self.removeAttributeNode(old2)
     785         _set_attribute_node(self, attr)
     786 
     787         if old1 is not attr:
     788             # It might have already been part of this node, in which case
     789             # it doesn't represent a change, and should not be returned.
     790             return old1
     791         if old2 is not attr:
     792             return old2
     793 
     794     setAttributeNodeNS = setAttributeNode
     795 
     796     def removeAttribute(self, name):
     797         if self._attrsNS is None:
     798             raise xml.dom.NotFoundErr()
     799         try:
     800             attr = self._attrs[name]
     801         except KeyError:
     802             raise xml.dom.NotFoundErr()
     803         self.removeAttributeNode(attr)
     804 
     805     def removeAttributeNS(self, namespaceURI, localName):
     806         if self._attrsNS is None:
     807             raise xml.dom.NotFoundErr()
     808         try:
     809             attr = self._attrsNS[(namespaceURI, localName)]
     810         except KeyError:
     811             raise xml.dom.NotFoundErr()
     812         self.removeAttributeNode(attr)
     813 
     814     def removeAttributeNode(self, node):
     815         if node is None:
     816             raise xml.dom.NotFoundErr()
     817         try:
     818             self._attrs[node.name]
     819         except KeyError:
     820             raise xml.dom.NotFoundErr()
     821         _clear_id_cache(self)
     822         node.unlink()
     823         # Restore this since the node is still useful and otherwise
     824         # unlinked
     825         node.ownerDocument = self.ownerDocument
     826 
     827     removeAttributeNodeNS = removeAttributeNode
     828 
     829     def hasAttribute(self, name):
     830         if self._attrs is None:
     831             return False
     832         return name in self._attrs
     833 
     834     def hasAttributeNS(self, namespaceURI, localName):
     835         if self._attrsNS is None:
     836             return False
     837         return (namespaceURI, localName) in self._attrsNS
     838 
     839     def getElementsByTagName(self, name):
     840         return _get_elements_by_tagName_helper(self, name, NodeList())
     841 
     842     def getElementsByTagNameNS(self, namespaceURI, localName):
     843         return _get_elements_by_tagName_ns_helper(
     844             self, namespaceURI, localName, NodeList())
     845 
     846     def __repr__(self):
     847         return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
     848 
     849     def writexml(self, writer, indent="", addindent="", newl=""):
     850         # indent = current indentation
     851         # addindent = indentation to add to higher levels
     852         # newl = newline string
     853         writer.write(indent+"<" + self.tagName)
     854 
     855         attrs = self._get_attributes()
     856         a_names = sorted(attrs.keys())
     857 
     858         for a_name in a_names:
     859             writer.write(" %s="" % a_name)
     860             _write_data(writer, attrs[a_name].value)
     861             writer.write(""")
     862         if self.childNodes:
     863             writer.write(">")
     864             if (len(self.childNodes) == 1 and
     865                 self.childNodes[0].nodeType == Node.TEXT_NODE):
     866                 self.childNodes[0].writexml(writer, '', '', '')
     867             else:
     868                 writer.write(newl)
     869                 for node in self.childNodes:
     870                     node.writexml(writer, indent+addindent, addindent, newl)
     871                 writer.write(indent)
     872             writer.write("</%s>%s" % (self.tagName, newl))
     873         else:
     874             writer.write("/>%s"%(newl))
     875 
     876     def _get_attributes(self):
     877         self._ensure_attributes()
     878         return NamedNodeMap(self._attrs, self._attrsNS, self)
     879 
     880     def hasAttributes(self):
     881         if self._attrs:
     882             return True
     883         else:
     884             return False
     885 
     886     # DOM Level 3 attributes, based on the 22 Oct 2002 draft
     887 
     888     def setIdAttribute(self, name):
     889         idAttr = self.getAttributeNode(name)
     890         self.setIdAttributeNode(idAttr)
     891 
     892     def setIdAttributeNS(self, namespaceURI, localName):
     893         idAttr = self.getAttributeNodeNS(namespaceURI, localName)
     894         self.setIdAttributeNode(idAttr)
     895 
     896     def setIdAttributeNode(self, idAttr):
     897         if idAttr is None or not self.isSameNode(idAttr.ownerElement):
     898             raise xml.dom.NotFoundErr()
     899         if _get_containing_entref(self) is not None:
     900             raise xml.dom.NoModificationAllowedErr()
     901         if not idAttr._is_id:
     902             idAttr._is_id = True
     903             self._magic_id_nodes += 1
     904             self.ownerDocument._magic_id_count += 1
     905             _clear_id_cache(self)
     906 
     907 defproperty(Element, "attributes",
     908             doc="NamedNodeMap of attributes on the element.")
     909 defproperty(Element, "localName",
     910             doc="Namespace-local name of this element.")
     911 
     912 
     913 def _set_attribute_node(element, attr):
     914     _clear_id_cache(element)
     915     element._ensure_attributes()
     916     element._attrs[attr.name] = attr
     917     element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
     918 
     919     # This creates a circular reference, but Element.unlink()
     920     # breaks the cycle since the references to the attribute
     921     # dictionaries are tossed.
     922     attr.ownerElement = element
     923 
     924 class Childless:
     925     """Mixin that makes childless-ness easy to implement and avoids
     926     the complexity of the Node methods that deal with children.
     927     """
     928     __slots__ = ()
     929 
     930     attributes = None
     931     childNodes = EmptyNodeList()
     932     firstChild = None
     933     lastChild = None
     934 
     935     def _get_firstChild(self):
     936         return None
     937 
     938     def _get_lastChild(self):
     939         return None
     940 
     941     def appendChild(self, node):
     942         raise xml.dom.HierarchyRequestErr(
     943             self.nodeName + " nodes cannot have children")
     944 
     945     def hasChildNodes(self):
     946         return False
     947 
     948     def insertBefore(self, newChild, refChild):
     949         raise xml.dom.HierarchyRequestErr(
     950             self.nodeName + " nodes do not have children")
     951 
     952     def removeChild(self, oldChild):
     953         raise xml.dom.NotFoundErr(
     954             self.nodeName + " nodes do not have children")
     955 
     956     def normalize(self):
     957         # For childless nodes, normalize() has nothing to do.
     958         pass
     959 
     960     def replaceChild(self, newChild, oldChild):
     961         raise xml.dom.HierarchyRequestErr(
     962             self.nodeName + " nodes do not have children")
     963 
     964 
     965 class ProcessingInstruction(Childless, Node):
     966     nodeType = Node.PROCESSING_INSTRUCTION_NODE
     967     __slots__ = ('target', 'data')
     968 
     969     def __init__(self, target, data):
     970         self.target = target
     971         self.data = data
     972 
     973     # nodeValue is an alias for data
     974     def _get_nodeValue(self):
     975         return self.data
     976     def _set_nodeValue(self, value):
     977         self.data = value
     978     nodeValue = property(_get_nodeValue, _set_nodeValue)
     979 
     980     # nodeName is an alias for target
     981     def _get_nodeName(self):
     982         return self.target
     983     def _set_nodeName(self, value):
     984         self.target = value
     985     nodeName = property(_get_nodeName, _set_nodeName)
     986 
     987     def writexml(self, writer, indent="", addindent="", newl=""):
     988         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
     989 
     990 
     991 class CharacterData(Childless, Node):
     992     __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
     993 
     994     def __init__(self):
     995         self.ownerDocument = self.parentNode = None
     996         self.previousSibling = self.nextSibling = None
     997         self._data = ''
     998         Node.__init__(self)
     999 
    1000     def _get_length(self):
    1001         return len(self.data)
    1002     __len__ = _get_length
    1003 
    1004     def _get_data(self):
    1005         return self._data
    1006     def _set_data(self, data):
    1007         self._data = data
    1008 
    1009     data = nodeValue = property(_get_data, _set_data)
    1010 
    1011     def __repr__(self):
    1012         data = self.data
    1013         if len(data) > 10:
    1014             dotdotdot = "..."
    1015         else:
    1016             dotdotdot = ""
    1017         return '<DOM %s node "%r%s">' % (
    1018             self.__class__.__name__, data[0:10], dotdotdot)
    1019 
    1020     def substringData(self, offset, count):
    1021         if offset < 0:
    1022             raise xml.dom.IndexSizeErr("offset cannot be negative")
    1023         if offset >= len(self.data):
    1024             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    1025         if count < 0:
    1026             raise xml.dom.IndexSizeErr("count cannot be negative")
    1027         return self.data[offset:offset+count]
    1028 
    1029     def appendData(self, arg):
    1030         self.data = self.data + arg
    1031 
    1032     def insertData(self, offset, arg):
    1033         if offset < 0:
    1034             raise xml.dom.IndexSizeErr("offset cannot be negative")
    1035         if offset >= len(self.data):
    1036             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    1037         if arg:
    1038             self.data = "%s%s%s" % (
    1039                 self.data[:offset], arg, self.data[offset:])
    1040 
    1041     def deleteData(self, offset, count):
    1042         if offset < 0:
    1043             raise xml.dom.IndexSizeErr("offset cannot be negative")
    1044         if offset >= len(self.data):
    1045             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    1046         if count < 0:
    1047             raise xml.dom.IndexSizeErr("count cannot be negative")
    1048         if count:
    1049             self.data = self.data[:offset] + self.data[offset+count:]
    1050 
    1051     def replaceData(self, offset, count, arg):
    1052         if offset < 0:
    1053             raise xml.dom.IndexSizeErr("offset cannot be negative")
    1054         if offset >= len(self.data):
    1055             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    1056         if count < 0:
    1057             raise xml.dom.IndexSizeErr("count cannot be negative")
    1058         if count:
    1059             self.data = "%s%s%s" % (
    1060                 self.data[:offset], arg, self.data[offset+count:])
    1061 
    1062 defproperty(CharacterData, "length", doc="Length of the string data.")
    1063 
    1064 
    1065 class Text(CharacterData):
    1066     __slots__ = ()
    1067 
    1068     nodeType = Node.TEXT_NODE
    1069     nodeName = "#text"
    1070     attributes = None
    1071 
    1072     def splitText(self, offset):
    1073         if offset < 0 or offset > len(self.data):
    1074             raise xml.dom.IndexSizeErr("illegal offset value")
    1075         newText = self.__class__()
    1076         newText.data = self.data[offset:]
    1077         newText.ownerDocument = self.ownerDocument
    1078         next = self.nextSibling
    1079         if self.parentNode and self in self.parentNode.childNodes:
    1080             if next is None:
    1081                 self.parentNode.appendChild(newText)
    1082             else:
    1083                 self.parentNode.insertBefore(newText, next)
    1084         self.data = self.data[:offset]
    1085         return newText
    1086 
    1087     def writexml(self, writer, indent="", addindent="", newl=""):
    1088         _write_data(writer, "%s%s%s" % (indent, self.data, newl))
    1089 
    1090     # DOM Level 3 (WD 9 April 2002)
    1091 
    1092     def _get_wholeText(self):
    1093         L = [self.data]
    1094         n = self.previousSibling
    1095         while n is not None:
    1096             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
    1097                 L.insert(0, n.data)
    1098                 n = n.previousSibling
    1099             else:
    1100                 break
    1101         n = self.nextSibling
    1102         while n is not None:
    1103             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
    1104                 L.append(n.data)
    1105                 n = n.nextSibling
    1106             else:
    1107                 break
    1108         return ''.join(L)
    1109 
    1110     def replaceWholeText(self, content):
    1111         # XXX This needs to be seriously changed if minidom ever
    1112         # supports EntityReference nodes.
    1113         parent = self.parentNode
    1114         n = self.previousSibling
    1115         while n is not None:
    1116             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
    1117                 next = n.previousSibling
    1118                 parent.removeChild(n)
    1119                 n = next
    1120             else:
    1121                 break
    1122         n = self.nextSibling
    1123         if not content:
    1124             parent.removeChild(self)
    1125         while n is not None:
    1126             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
    1127                 next = n.nextSibling
    1128                 parent.removeChild(n)
    1129                 n = next
    1130             else:
    1131                 break
    1132         if content:
    1133             self.data = content
    1134             return self
    1135         else:
    1136             return None
    1137 
    1138     def _get_isWhitespaceInElementContent(self):
    1139         if self.data.strip():
    1140             return False
    1141         elem = _get_containing_element(self)
    1142         if elem is None:
    1143             return False
    1144         info = self.ownerDocument._get_elem_info(elem)
    1145         if info is None:
    1146             return False
    1147         else:
    1148             return info.isElementContent()
    1149 
    1150 defproperty(Text, "isWhitespaceInElementContent",
    1151             doc="True iff this text node contains only whitespace"
    1152                 " and is in element content.")
    1153 defproperty(Text, "wholeText",
    1154             doc="The text of all logically-adjacent text nodes.")
    1155 
    1156 
    1157 def _get_containing_element(node):
    1158     c = node.parentNode
    1159     while c is not None:
    1160         if c.nodeType == Node.ELEMENT_NODE:
    1161             return c
    1162         c = c.parentNode
    1163     return None
    1164 
    1165 def _get_containing_entref(node):
    1166     c = node.parentNode
    1167     while c is not None:
    1168         if c.nodeType == Node.ENTITY_REFERENCE_NODE:
    1169             return c
    1170         c = c.parentNode
    1171     return None
    1172 
    1173 
    1174 class Comment(CharacterData):
    1175     nodeType = Node.COMMENT_NODE
    1176     nodeName = "#comment"
    1177 
    1178     def __init__(self, data):
    1179         CharacterData.__init__(self)
    1180         self._data = data
    1181 
    1182     def writexml(self, writer, indent="", addindent="", newl=""):
    1183         if "--" in self.data:
    1184             raise ValueError("'--' is not allowed in a comment node")
    1185         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
    1186 
    1187 
    1188 class CDATASection(Text):
    1189     __slots__ = ()
    1190 
    1191     nodeType = Node.CDATA_SECTION_NODE
    1192     nodeName = "#cdata-section"
    1193 
    1194     def writexml(self, writer, indent="", addindent="", newl=""):
    1195         if self.data.find("]]>") >= 0:
    1196             raise ValueError("']]>' not allowed in a CDATA section")
    1197         writer.write("<![CDATA[%s]]>" % self.data)
    1198 
    1199 
    1200 class ReadOnlySequentialNamedNodeMap(object):
    1201     __slots__ = '_seq',
    1202 
    1203     def __init__(self, seq=()):
    1204         # seq should be a list or tuple
    1205         self._seq = seq
    1206 
    1207     def __len__(self):
    1208         return len(self._seq)
    1209 
    1210     def _get_length(self):
    1211         return len(self._seq)
    1212 
    1213     def getNamedItem(self, name):
    1214         for n in self._seq:
    1215             if n.nodeName == name:
    1216                 return n
    1217 
    1218     def getNamedItemNS(self, namespaceURI, localName):
    1219         for n in self._seq:
    1220             if n.namespaceURI == namespaceURI and n.localName == localName:
    1221                 return n
    1222 
    1223     def __getitem__(self, name_or_tuple):
    1224         if isinstance(name_or_tuple, tuple):
    1225             node = self.getNamedItemNS(*name_or_tuple)
    1226         else:
    1227             node = self.getNamedItem(name_or_tuple)
    1228         if node is None:
    1229             raise KeyError(name_or_tuple)
    1230         return node
    1231 
    1232     def item(self, index):
    1233         if index < 0:
    1234             return None
    1235         try:
    1236             return self._seq[index]
    1237         except IndexError:
    1238             return None
    1239 
    1240     def removeNamedItem(self, name):
    1241         raise xml.dom.NoModificationAllowedErr(
    1242             "NamedNodeMap instance is read-only")
    1243 
    1244     def removeNamedItemNS(self, namespaceURI, localName):
    1245         raise xml.dom.NoModificationAllowedErr(
    1246             "NamedNodeMap instance is read-only")
    1247 
    1248     def setNamedItem(self, node):
    1249         raise xml.dom.NoModificationAllowedErr(
    1250             "NamedNodeMap instance is read-only")
    1251 
    1252     def setNamedItemNS(self, node):
    1253         raise xml.dom.NoModificationAllowedErr(
    1254             "NamedNodeMap instance is read-only")
    1255 
    1256     def __getstate__(self):
    1257         return [self._seq]
    1258 
    1259     def __setstate__(self, state):
    1260         self._seq = state[0]
    1261 
    1262 defproperty(ReadOnlySequentialNamedNodeMap, "length",
    1263             doc="Number of entries in the NamedNodeMap.")
    1264 
    1265 
    1266 class Identified:
    1267     """Mix-in class that supports the publicId and systemId attributes."""
    1268 
    1269     __slots__ = 'publicId', 'systemId'
    1270 
    1271     def _identified_mixin_init(self, publicId, systemId):
    1272         self.publicId = publicId
    1273         self.systemId = systemId
    1274 
    1275     def _get_publicId(self):
    1276         return self.publicId
    1277 
    1278     def _get_systemId(self):
    1279         return self.systemId
    1280 
    1281 class DocumentType(Identified, Childless, Node):
    1282     nodeType = Node.DOCUMENT_TYPE_NODE
    1283     nodeValue = None
    1284     name = None
    1285     publicId = None
    1286     systemId = None
    1287     internalSubset = None
    1288 
    1289     def __init__(self, qualifiedName):
    1290         self.entities = ReadOnlySequentialNamedNodeMap()
    1291         self.notations = ReadOnlySequentialNamedNodeMap()
    1292         if qualifiedName:
    1293             prefix, localname = _nssplit(qualifiedName)
    1294             self.name = localname
    1295         self.nodeName = self.name
    1296 
    1297     def _get_internalSubset(self):
    1298         return self.internalSubset
    1299 
    1300     def cloneNode(self, deep):
    1301         if self.ownerDocument is None:
    1302             # it's ok
    1303             clone = DocumentType(None)
    1304             clone.name = self.name
    1305             clone.nodeName = self.name
    1306             operation = xml.dom.UserDataHandler.NODE_CLONED
    1307             if deep:
    1308                 clone.entities._seq = []
    1309                 clone.notations._seq = []
    1310                 for n in self.notations._seq:
    1311                     notation = Notation(n.nodeName, n.publicId, n.systemId)
    1312                     clone.notations._seq.append(notation)
    1313                     n._call_user_data_handler(operation, n, notation)
    1314                 for e in self.entities._seq:
    1315                     entity = Entity(e.nodeName, e.publicId, e.systemId,
    1316                                     e.notationName)
    1317                     entity.actualEncoding = e.actualEncoding
    1318                     entity.encoding = e.encoding
    1319                     entity.version = e.version
    1320                     clone.entities._seq.append(entity)
    1321                     e._call_user_data_handler(operation, n, entity)
    1322             self._call_user_data_handler(operation, self, clone)
    1323             return clone
    1324         else:
    1325             return None
    1326 
    1327     def writexml(self, writer, indent="", addindent="", newl=""):
    1328         writer.write("<!DOCTYPE ")
    1329         writer.write(self.name)
    1330         if self.publicId:
    1331             writer.write("%s  PUBLIC '%s'%s  '%s'"
    1332                          % (newl, self.publicId, newl, self.systemId))
    1333         elif self.systemId:
    1334             writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
    1335         if self.internalSubset is not None:
    1336             writer.write(" [")
    1337             writer.write(self.internalSubset)
    1338             writer.write("]")
    1339         writer.write(">"+newl)
    1340 
    1341 class Entity(Identified, Node):
    1342     attributes = None
    1343     nodeType = Node.ENTITY_NODE
    1344     nodeValue = None
    1345 
    1346     actualEncoding = None
    1347     encoding = None
    1348     version = None
    1349 
    1350     def __init__(self, name, publicId, systemId, notation):
    1351         self.nodeName = name
    1352         self.notationName = notation
    1353         self.childNodes = NodeList()
    1354         self._identified_mixin_init(publicId, systemId)
    1355 
    1356     def _get_actualEncoding(self):
    1357         return self.actualEncoding
    1358 
    1359     def _get_encoding(self):
    1360         return self.encoding
    1361 
    1362     def _get_version(self):
    1363         return self.version
    1364 
    1365     def appendChild(self, newChild):
    1366         raise xml.dom.HierarchyRequestErr(
    1367             "cannot append children to an entity node")
    1368 
    1369     def insertBefore(self, newChild, refChild):
    1370         raise xml.dom.HierarchyRequestErr(
    1371             "cannot insert children below an entity node")
    1372 
    1373     def removeChild(self, oldChild):
    1374         raise xml.dom.HierarchyRequestErr(
    1375             "cannot remove children from an entity node")
    1376 
    1377     def replaceChild(self, newChild, oldChild):
    1378         raise xml.dom.HierarchyRequestErr(
    1379             "cannot replace children of an entity node")
    1380 
    1381 class Notation(Identified, Childless, Node):
    1382     nodeType = Node.NOTATION_NODE
    1383     nodeValue = None
    1384 
    1385     def __init__(self, name, publicId, systemId):
    1386         self.nodeName = name
    1387         self._identified_mixin_init(publicId, systemId)
    1388 
    1389 
    1390 class DOMImplementation(DOMImplementationLS):
    1391     _features = [("core", "1.0"),
    1392                  ("core", "2.0"),
    1393                  ("core", None),
    1394                  ("xml", "1.0"),
    1395                  ("xml", "2.0"),
    1396                  ("xml", None),
    1397                  ("ls-load", "3.0"),
    1398                  ("ls-load", None),
    1399                  ]
    1400 
    1401     def hasFeature(self, feature, version):
    1402         if version == "":
    1403             version = None
    1404         return (feature.lower(), version) in self._features
    1405 
    1406     def createDocument(self, namespaceURI, qualifiedName, doctype):
    1407         if doctype and doctype.parentNode is not None:
    1408             raise xml.dom.WrongDocumentErr(
    1409                 "doctype object owned by another DOM tree")
    1410         doc = self._create_document()
    1411 
    1412         add_root_element = not (namespaceURI is None
    1413                                 and qualifiedName is None
    1414                                 and doctype is None)
    1415 
    1416         if not qualifiedName and add_root_element:
    1417             # The spec is unclear what to raise here; SyntaxErr
    1418             # would be the other obvious candidate. Since Xerces raises
    1419             # InvalidCharacterErr, and since SyntaxErr is not listed
    1420             # for createDocument, that seems to be the better choice.
    1421             # XXX: need to check for illegal characters here and in
    1422             # createElement.
    1423 
    1424             # DOM Level III clears this up when talking about the return value
    1425             # of this function.  If namespaceURI, qName and DocType are
    1426             # Null the document is returned without a document element
    1427             # Otherwise if doctype or namespaceURI are not None
    1428             # Then we go back to the above problem
    1429             raise xml.dom.InvalidCharacterErr("Element with no name")
    1430 
    1431         if add_root_element:
    1432             prefix, localname = _nssplit(qualifiedName)
    1433             if prefix == "xml" 
    1434                and namespaceURI != "http://www.w3.org/XML/1998/namespace":
    1435                 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
    1436             if prefix and not namespaceURI:
    1437                 raise xml.dom.NamespaceErr(
    1438                     "illegal use of prefix without namespaces")
    1439             element = doc.createElementNS(namespaceURI, qualifiedName)
    1440             if doctype:
    1441                 doc.appendChild(doctype)
    1442             doc.appendChild(element)
    1443 
    1444         if doctype:
    1445             doctype.parentNode = doctype.ownerDocument = doc
    1446 
    1447         doc.doctype = doctype
    1448         doc.implementation = self
    1449         return doc
    1450 
    1451     def createDocumentType(self, qualifiedName, publicId, systemId):
    1452         doctype = DocumentType(qualifiedName)
    1453         doctype.publicId = publicId
    1454         doctype.systemId = systemId
    1455         return doctype
    1456 
    1457     # DOM Level 3 (WD 9 April 2002)
    1458 
    1459     def getInterface(self, feature):
    1460         if self.hasFeature(feature, None):
    1461             return self
    1462         else:
    1463             return None
    1464 
    1465     # internal
    1466     def _create_document(self):
    1467         return Document()
    1468 
    1469 class ElementInfo(object):
    1470     """Object that represents content-model information for an element.
    1471 
    1472     This implementation is not expected to be used in practice; DOM
    1473     builders should provide implementations which do the right thing
    1474     using information available to it.
    1475 
    1476     """
    1477 
    1478     __slots__ = 'tagName',
    1479 
    1480     def __init__(self, name):
    1481         self.tagName = name
    1482 
    1483     def getAttributeType(self, aname):
    1484         return _no_type
    1485 
    1486     def getAttributeTypeNS(self, namespaceURI, localName):
    1487         return _no_type
    1488 
    1489     def isElementContent(self):
    1490         return False
    1491 
    1492     def isEmpty(self):
    1493         """Returns true iff this element is declared to have an EMPTY
    1494         content model."""
    1495         return False
    1496 
    1497     def isId(self, aname):
    1498         """Returns true iff the named attribute is a DTD-style ID."""
    1499         return False
    1500 
    1501     def isIdNS(self, namespaceURI, localName):
    1502         """Returns true iff the identified attribute is a DTD-style ID."""
    1503         return False
    1504 
    1505     def __getstate__(self):
    1506         return self.tagName
    1507 
    1508     def __setstate__(self, state):
    1509         self.tagName = state
    1510 
    1511 def _clear_id_cache(node):
    1512     if node.nodeType == Node.DOCUMENT_NODE:
    1513         node._id_cache.clear()
    1514         node._id_search_stack = None
    1515     elif _in_document(node):
    1516         node.ownerDocument._id_cache.clear()
    1517         node.ownerDocument._id_search_stack= None
    1518 
    1519 class Document(Node, DocumentLS):
    1520     __slots__ = ('_elem_info', 'doctype',
    1521                  '_id_search_stack', 'childNodes', '_id_cache')
    1522     _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
    1523                          Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
    1524 
    1525     implementation = DOMImplementation()
    1526     nodeType = Node.DOCUMENT_NODE
    1527     nodeName = "#document"
    1528     nodeValue = None
    1529     attributes = None
    1530     parentNode = None
    1531     previousSibling = nextSibling = None
    1532 
    1533 
    1534     # Document attributes from Level 3 (WD 9 April 2002)
    1535 
    1536     actualEncoding = None
    1537     encoding = None
    1538     standalone = None
    1539     version = None
    1540     strictErrorChecking = False
    1541     errorHandler = None
    1542     documentURI = None
    1543 
    1544     _magic_id_count = 0
    1545 
    1546     def __init__(self):
    1547         self.doctype = None
    1548         self.childNodes = NodeList()
    1549         # mapping of (namespaceURI, localName) -> ElementInfo
    1550         #        and tagName -> ElementInfo
    1551         self._elem_info = {}
    1552         self._id_cache = {}
    1553         self._id_search_stack = None
    1554 
    1555     def _get_elem_info(self, element):
    1556         if element.namespaceURI:
    1557             key = element.namespaceURI, element.localName
    1558         else:
    1559             key = element.tagName
    1560         return self._elem_info.get(key)
    1561 
    1562     def _get_actualEncoding(self):
    1563         return self.actualEncoding
    1564 
    1565     def _get_doctype(self):
    1566         return self.doctype
    1567 
    1568     def _get_documentURI(self):
    1569         return self.documentURI
    1570 
    1571     def _get_encoding(self):
    1572         return self.encoding
    1573 
    1574     def _get_errorHandler(self):
    1575         return self.errorHandler
    1576 
    1577     def _get_standalone(self):
    1578         return self.standalone
    1579 
    1580     def _get_strictErrorChecking(self):
    1581         return self.strictErrorChecking
    1582 
    1583     def _get_version(self):
    1584         return self.version
    1585 
    1586     def appendChild(self, node):
    1587         if node.nodeType not in self._child_node_types:
    1588             raise xml.dom.HierarchyRequestErr(
    1589                 "%s cannot be child of %s" % (repr(node), repr(self)))
    1590         if node.parentNode is not None:
    1591             # This needs to be done before the next test since this
    1592             # may *be* the document element, in which case it should
    1593             # end up re-ordered to the end.
    1594             node.parentNode.removeChild(node)
    1595 
    1596         if node.nodeType == Node.ELEMENT_NODE 
    1597            and self._get_documentElement():
    1598             raise xml.dom.HierarchyRequestErr(
    1599                 "two document elements disallowed")
    1600         return Node.appendChild(self, node)
    1601 
    1602     def removeChild(self, oldChild):
    1603         try:
    1604             self.childNodes.remove(oldChild)
    1605         except ValueError:
    1606             raise xml.dom.NotFoundErr()
    1607         oldChild.nextSibling = oldChild.previousSibling = None
    1608         oldChild.parentNode = None
    1609         if self.documentElement is oldChild:
    1610             self.documentElement = None
    1611 
    1612         return oldChild
    1613 
    1614     def _get_documentElement(self):
    1615         for node in self.childNodes:
    1616             if node.nodeType == Node.ELEMENT_NODE:
    1617                 return node
    1618 
    1619     def unlink(self):
    1620         if self.doctype is not None:
    1621             self.doctype.unlink()
    1622             self.doctype = None
    1623         Node.unlink(self)
    1624 
    1625     def cloneNode(self, deep):
    1626         if not deep:
    1627             return None
    1628         clone = self.implementation.createDocument(None, None, None)
    1629         clone.encoding = self.encoding
    1630         clone.standalone = self.standalone
    1631         clone.version = self.version
    1632         for n in self.childNodes:
    1633             childclone = _clone_node(n, deep, clone)
    1634             assert childclone.ownerDocument.isSameNode(clone)
    1635             clone.childNodes.append(childclone)
    1636             if childclone.nodeType == Node.DOCUMENT_NODE:
    1637                 assert clone.documentElement is None
    1638             elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
    1639                 assert clone.doctype is None
    1640                 clone.doctype = childclone
    1641             childclone.parentNode = clone
    1642         self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
    1643                                      self, clone)
    1644         return clone
    1645 
    1646     def createDocumentFragment(self):
    1647         d = DocumentFragment()
    1648         d.ownerDocument = self
    1649         return d
    1650 
    1651     def createElement(self, tagName):
    1652         e = Element(tagName)
    1653         e.ownerDocument = self
    1654         return e
    1655 
    1656     def createTextNode(self, data):
    1657         if not isinstance(data, str):
    1658             raise TypeError("node contents must be a string")
    1659         t = Text()
    1660         t.data = data
    1661         t.ownerDocument = self
    1662         return t
    1663 
    1664     def createCDATASection(self, data):
    1665         if not isinstance(data, str):
    1666             raise TypeError("node contents must be a string")
    1667         c = CDATASection()
    1668         c.data = data
    1669         c.ownerDocument = self
    1670         return c
    1671 
    1672     def createComment(self, data):
    1673         c = Comment(data)
    1674         c.ownerDocument = self
    1675         return c
    1676 
    1677     def createProcessingInstruction(self, target, data):
    1678         p = ProcessingInstruction(target, data)
    1679         p.ownerDocument = self
    1680         return p
    1681 
    1682     def createAttribute(self, qName):
    1683         a = Attr(qName)
    1684         a.ownerDocument = self
    1685         a.value = ""
    1686         return a
    1687 
    1688     def createElementNS(self, namespaceURI, qualifiedName):
    1689         prefix, localName = _nssplit(qualifiedName)
    1690         e = Element(qualifiedName, namespaceURI, prefix)
    1691         e.ownerDocument = self
    1692         return e
    1693 
    1694     def createAttributeNS(self, namespaceURI, qualifiedName):
    1695         prefix, localName = _nssplit(qualifiedName)
    1696         a = Attr(qualifiedName, namespaceURI, localName, prefix)
    1697         a.ownerDocument = self
    1698         a.value = ""
    1699         return a
    1700 
    1701     # A couple of implementation-specific helpers to create node types
    1702     # not supported by the W3C DOM specs:
    1703 
    1704     def _create_entity(self, name, publicId, systemId, notationName):
    1705         e = Entity(name, publicId, systemId, notationName)
    1706         e.ownerDocument = self
    1707         return e
    1708 
    1709     def _create_notation(self, name, publicId, systemId):
    1710         n = Notation(name, publicId, systemId)
    1711         n.ownerDocument = self
    1712         return n
    1713 
    1714     def getElementById(self, id):
    1715         if id in self._id_cache:
    1716             return self._id_cache[id]
    1717         if not (self._elem_info or self._magic_id_count):
    1718             return None
    1719 
    1720         stack = self._id_search_stack
    1721         if stack is None:
    1722             # we never searched before, or the cache has been cleared
    1723             stack = [self.documentElement]
    1724             self._id_search_stack = stack
    1725         elif not stack:
    1726             # Previous search was completed and cache is still valid;
    1727             # no matching node.
    1728             return None
    1729 
    1730         result = None
    1731         while stack:
    1732             node = stack.pop()
    1733             # add child elements to stack for continued searching
    1734             stack.extend([child for child in node.childNodes
    1735                           if child.nodeType in _nodeTypes_with_children])
    1736             # check this node
    1737             info = self._get_elem_info(node)
    1738             if info:
    1739                 # We have to process all ID attributes before
    1740                 # returning in order to get all the attributes set to
    1741                 # be IDs using Element.setIdAttribute*().
    1742                 for attr in node.attributes.values():
    1743                     if attr.namespaceURI:
    1744                         if info.isIdNS(attr.namespaceURI, attr.localName):
    1745                             self._id_cache[attr.value] = node
    1746                             if attr.value == id:
    1747                                 result = node
    1748                             elif not node._magic_id_nodes:
    1749                                 break
    1750                     elif info.isId(attr.name):
    1751                         self._id_cache[attr.value] = node
    1752                         if attr.value == id:
    1753                             result = node
    1754                         elif not node._magic_id_nodes:
    1755                             break
    1756                     elif attr._is_id:
    1757                         self._id_cache[attr.value] = node
    1758                         if attr.value == id:
    1759                             result = node
    1760                         elif node._magic_id_nodes == 1:
    1761                             break
    1762             elif node._magic_id_nodes:
    1763                 for attr in node.attributes.values():
    1764                     if attr._is_id:
    1765                         self._id_cache[attr.value] = node
    1766                         if attr.value == id:
    1767                             result = node
    1768             if result is not None:
    1769                 break
    1770         return result
    1771 
    1772     def getElementsByTagName(self, name):
    1773         return _get_elements_by_tagName_helper(self, name, NodeList())
    1774 
    1775     def getElementsByTagNameNS(self, namespaceURI, localName):
    1776         return _get_elements_by_tagName_ns_helper(
    1777             self, namespaceURI, localName, NodeList())
    1778 
    1779     def isSupported(self, feature, version):
    1780         return self.implementation.hasFeature(feature, version)
    1781 
    1782     def importNode(self, node, deep):
    1783         if node.nodeType == Node.DOCUMENT_NODE:
    1784             raise xml.dom.NotSupportedErr("cannot import document nodes")
    1785         elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
    1786             raise xml.dom.NotSupportedErr("cannot import document type nodes")
    1787         return _clone_node(node, deep, self)
    1788 
    1789     def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
    1790         if encoding is None:
    1791             writer.write('<?xml version="1.0" ?>'+newl)
    1792         else:
    1793             writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
    1794                 encoding, newl))
    1795         for node in self.childNodes:
    1796             node.writexml(writer, indent, addindent, newl)
    1797 
    1798     # DOM Level 3 (WD 9 April 2002)
    1799 
    1800     def renameNode(self, n, namespaceURI, name):
    1801         if n.ownerDocument is not self:
    1802             raise xml.dom.WrongDocumentErr(
    1803                 "cannot rename nodes from other documents;
    "
    1804                 "expected %s,
    found %s" % (self, n.ownerDocument))
    1805         if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
    1806             raise xml.dom.NotSupportedErr(
    1807                 "renameNode() only applies to element and attribute nodes")
    1808         if namespaceURI != EMPTY_NAMESPACE:
    1809             if ':' in name:
    1810                 prefix, localName = name.split(':', 1)
    1811                 if (  prefix == "xmlns"
    1812                       and namespaceURI != xml.dom.XMLNS_NAMESPACE):
    1813                     raise xml.dom.NamespaceErr(
    1814                         "illegal use of 'xmlns' prefix")
    1815             else:
    1816                 if (  name == "xmlns"
    1817                       and namespaceURI != xml.dom.XMLNS_NAMESPACE
    1818                       and n.nodeType == Node.ATTRIBUTE_NODE):
    1819                     raise xml.dom.NamespaceErr(
    1820                         "illegal use of the 'xmlns' attribute")
    1821                 prefix = None
    1822                 localName = name
    1823         else:
    1824             prefix = None
    1825             localName = None
    1826         if n.nodeType == Node.ATTRIBUTE_NODE:
    1827             element = n.ownerElement
    1828             if element is not None:
    1829                 is_id = n._is_id
    1830                 element.removeAttributeNode(n)
    1831         else:
    1832             element = None
    1833         n.prefix = prefix
    1834         n._localName = localName
    1835         n.namespaceURI = namespaceURI
    1836         n.nodeName = name
    1837         if n.nodeType == Node.ELEMENT_NODE:
    1838             n.tagName = name
    1839         else:
    1840             # attribute node
    1841             n.name = name
    1842             if element is not None:
    1843                 element.setAttributeNode(n)
    1844                 if is_id:
    1845                     element.setIdAttributeNode(n)
    1846         # It's not clear from a semantic perspective whether we should
    1847         # call the user data handlers for the NODE_RENAMED event since
    1848         # we're re-using the existing node.  The draft spec has been
    1849         # interpreted as meaning "no, don't call the handler unless a
    1850         # new node is created."
    1851         return n
    1852 
    1853 defproperty(Document, "documentElement",
    1854             doc="Top-level element of this document.")
    1855 
    1856 
    1857 def _clone_node(node, deep, newOwnerDocument):
    1858     """
    1859     Clone a node and give it the new owner document.
    1860     Called by Node.cloneNode and Document.importNode
    1861     """
    1862     if node.ownerDocument.isSameNode(newOwnerDocument):
    1863         operation = xml.dom.UserDataHandler.NODE_CLONED
    1864     else:
    1865         operation = xml.dom.UserDataHandler.NODE_IMPORTED
    1866     if node.nodeType == Node.ELEMENT_NODE:
    1867         clone = newOwnerDocument.createElementNS(node.namespaceURI,
    1868                                                  node.nodeName)
    1869         for attr in node.attributes.values():
    1870             clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
    1871             a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
    1872             a.specified = attr.specified
    1873 
    1874         if deep:
    1875             for child in node.childNodes:
    1876                 c = _clone_node(child, deep, newOwnerDocument)
    1877                 clone.appendChild(c)
    1878 
    1879     elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
    1880         clone = newOwnerDocument.createDocumentFragment()
    1881         if deep:
    1882             for child in node.childNodes:
    1883                 c = _clone_node(child, deep, newOwnerDocument)
    1884                 clone.appendChild(c)
    1885 
    1886     elif node.nodeType == Node.TEXT_NODE:
    1887         clone = newOwnerDocument.createTextNode(node.data)
    1888     elif node.nodeType == Node.CDATA_SECTION_NODE:
    1889         clone = newOwnerDocument.createCDATASection(node.data)
    1890     elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
    1891         clone = newOwnerDocument.createProcessingInstruction(node.target,
    1892                                                              node.data)
    1893     elif node.nodeType == Node.COMMENT_NODE:
    1894         clone = newOwnerDocument.createComment(node.data)
    1895     elif node.nodeType == Node.ATTRIBUTE_NODE:
    1896         clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
    1897                                                    node.nodeName)
    1898         clone.specified = True
    1899         clone.value = node.value
    1900     elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
    1901         assert node.ownerDocument is not newOwnerDocument
    1902         operation = xml.dom.UserDataHandler.NODE_IMPORTED
    1903         clone = newOwnerDocument.implementation.createDocumentType(
    1904             node.name, node.publicId, node.systemId)
    1905         clone.ownerDocument = newOwnerDocument
    1906         if deep:
    1907             clone.entities._seq = []
    1908             clone.notations._seq = []
    1909             for n in node.notations._seq:
    1910                 notation = Notation(n.nodeName, n.publicId, n.systemId)
    1911                 notation.ownerDocument = newOwnerDocument
    1912                 clone.notations._seq.append(notation)
    1913                 if hasattr(n, '_call_user_data_handler'):
    1914                     n._call_user_data_handler(operation, n, notation)
    1915             for e in node.entities._seq:
    1916                 entity = Entity(e.nodeName, e.publicId, e.systemId,
    1917                                 e.notationName)
    1918                 entity.actualEncoding = e.actualEncoding
    1919                 entity.encoding = e.encoding
    1920                 entity.version = e.version
    1921                 entity.ownerDocument = newOwnerDocument
    1922                 clone.entities._seq.append(entity)
    1923                 if hasattr(e, '_call_user_data_handler'):
    1924                     e._call_user_data_handler(operation, n, entity)
    1925     else:
    1926         # Note the cloning of Document and DocumentType nodes is
    1927         # implementation specific.  minidom handles those cases
    1928         # directly in the cloneNode() methods.
    1929         raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
    1930 
    1931     # Check for _call_user_data_handler() since this could conceivably
    1932     # used with other DOM implementations (one of the FourThought
    1933     # DOMs, perhaps?).
    1934     if hasattr(node, '_call_user_data_handler'):
    1935         node._call_user_data_handler(operation, node, clone)
    1936     return clone
    1937 
    1938 
    1939 def _nssplit(qualifiedName):
    1940     fields = qualifiedName.split(':', 1)
    1941     if len(fields) == 2:
    1942         return fields
    1943     else:
    1944         return (None, fields[0])
    1945 
    1946 
    1947 def _do_pulldom_parse(func, args, kwargs):
    1948     events = func(*args, **kwargs)
    1949     toktype, rootNode = events.getEvent()
    1950     events.expandNode(rootNode)
    1951     events.clear()
    1952     return rootNode
    1953 
    1954 def parse(file, parser=None, bufsize=None):
    1955     """Parse a file into a DOM by filename or file object."""
    1956     if parser is None and not bufsize:
    1957         from xml.dom import expatbuilder
    1958         return expatbuilder.parse(file)
    1959     else:
    1960         from xml.dom import pulldom
    1961         return _do_pulldom_parse(pulldom.parse, (file,),
    1962             {'parser': parser, 'bufsize': bufsize})
    1963 
    1964 def parseString(string, parser=None):
    1965     """Parse a file into a DOM from a string."""
    1966     if parser is None:
    1967         from xml.dom import expatbuilder
    1968         return expatbuilder.parseString(string)
    1969     else:
    1970         from xml.dom import pulldom
    1971         return _do_pulldom_parse(pulldom.parseString, (string,),
    1972                                  {'parser': parser})
    1973 
    1974 def getDOMImplementation(features=None):
    1975     if features:
    1976         if isinstance(features, str):
    1977             features = domreg._parse_feature_string(features)
    1978         for f, v in features:
    1979             if not Document.implementation.hasFeature(f, v):
    1980                 return None
    1981     return Document.implementation
    xml.dom.minidom
    每天更新一点点,温习一点点点,进步一点点
  • 相关阅读:
    Shiro-身份验证
    ORA-12514: TNS: no listener 解决方案
    Oracle创建用户、角色、授权、建表
    ORA-28547:connection to server failed, probable Oracle Net admin error错误,解决方法
    Python学习笔记
    Python学习笔记
    Python学习笔记
    winrm service
    C#动态编译并执行代码
    TypeScript的4种编译方式
  • 原文地址:https://www.cnblogs.com/lmgsanm/p/8379800.html
Copyright © 2011-2022 走看看