zoukankan      html  css  js  c++  java
  • 通过xsd schema结构来验证xml是否合法

     1 import sys
     2 import StringIO
     3 import lxml
     4 
     5 from lxml import etree
     6 from StringIO import StringIO
     7 
     8 # Construct XML relevant to the XML schema we're validating against. By altering the string, adding/removing elements
     9 # we can force different errors to occur when validating.
    10 xml = StringIO('''
    11 <CompanyDataRequest xmlns="http://xmlgw.companieshouse.gov.uk" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlgw.companieshouse.gov.uk http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd">
    12     <CompanyNumber>06937730</CompanyNumber>
    13     <CompanyAuthenticationCode>123456</CompanyAuthenticationCode>
    14     <MadeUpDate>2010-06-30x</MadeUpDate>
    15 </CompanyDataRequest>
    16 ''')
    17 
    18 # Clear any previous errors
    19 lxml.etree.clear_error_log()
    20 
    21 try:
    22     # Get the XML schema to validate against
    23     schema = lxml.etree.XMLSchema(file = 'http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd')
    24     # Parse string of XML
    25     xml_doc = lxml.etree.parse(xml)
    26     # Validate parsed XML against schema returning a readable message on failure
    27     schema.assertValid(xml_doc)
    28     # Validate parsed XML against schema returning boolean value indicating success/failure
    29     print 'schema.validate() returns "%s".' % schema.validate(xml_doc)
    30 
    31 except lxml.etree.XMLSchemaParseError, xspe:
    32     # Something wrong with the schema (getting from URL/parsing)
    33     print "XMLSchemaParseError occurred!"
    34     print xspe
    35 
    36 except lxml.etree.XMLSyntaxError, xse:
    37     # XML not well formed
    38     print "XMLSyntaxError occurred!"
    39     print xse
    40     
    41 except lxml.etree.DocumentInvalid, di:
    42     # XML failed to validate against schema
    43     print "DocumentInvalid occurred!"
    44 
    45     error = schema.error_log.last_error
    46     if error:
    47         # All the error properties (from libxml2) describing what went wrong
    48         print 'domain_name: ' + error.domain_name
    49         print 'domain: ' + str(error.domain)
    50         print 'filename: ' + error.filename # '<string>' cos var is a string of xml
    51         print 'level: ' + str(error.level)
    52         print 'level_name: ' + error.level_name # an integer
    53         print 'line: ' + str(error.line) # a unicode string that identifies the line where the error occurred.
    54         print 'message: ' + error.message # a unicode string that lists the message.
    55         print 'type: ' + str(error.type) # an integer
    56         print 'type_name: ' + error.type_name

    封装类

     1 #!/usr/bin/env python
     2 # -*- coding:utf-8 -*-
     3 # Author:Eric.yue
     4 
     5 import os
     6 import lxml.etree as ET
     7 from StringIO import StringIO
     8 import chardet
     9 
    10 
    11 class R3xmlCheck(object):
    12     def __init__(self, element_xml):
    13         self.elem_xml = element_xml
    14 
    15     def validate_xsd_xml(self, f_xml, elem_xsd):
    16         try:
    17             elem_xsd = elem_xsd.encode('utf-8')
    18             xsd_doc = StringIO(elem_xsd)
    19             xml_doc = StringIO(f_xml)
    20             xmlschema_doc = ET.parse(xsd_doc)
    21             xmlschema = ET.XMLSchema(xmlschema_doc)
    22             xml = ET.parse(xml_doc)
    23             xmlschema.assertValid(xml)
    24             print 'schema.validate() returns "%s".' % xmlschema.validate(xml)
    25 
    26         except ET.XMLSchemaParseError, xspe:
    27             # Something wrong with the schema (getting from URL/parsing)
    28             print "XMLSchemaParseError occurred!"
    29             print xspe
    30 
    31         except ET.XMLSyntaxError, xse:
    32             # XML not well formed
    33             print "XMLSyntaxError occurred!"
    34             print xse
    35 
    36         except ET.DocumentInvalid, di:
    37             # XML failed to validate against schema
    38             print "DocumentInvalid occurred!"
    39 
    40             error = xmlschema.error_log.last_error
    41             if error:
    42                 # All the error properties (from libxml2) describing what went wrong
    43                 print 'domain_name: ' + error.domain_name
    44                 print 'domain: ' + str(error.domain)
    45                 print 'filename: ' + error.filename  # '<string>' cos var is a string of xml
    46                 print 'level: ' + str(error.level)
    47                 print 'level_name: ' + error.level_name  # an integer
    48                 print 'line: ' + str(error.line)  # a unicode string that identifies the line where the error occurred.
    49                 print 'message: ' + error.message  # a unicode string that lists the message.
    50                 print 'type: ' + str(error.type)  # an integer
    51                 print 'type_name: ' + error.type_name
    52 
    53     def run(self):
    54         res = self.validate_xml(self.elem_xml)
    55         if res["result"] is not True:
    56             return res["info"]
    57 
    58         elem_xsd = self.get_xsd()
    59 
    60         with open(self.elem_xml) as f:
    61             f_xml = f.read()
    62             chardet_info = chardet.detect(f_xml)
    63             if chardet_info['encoding'] == 'ascii':
    64                 f_xml = f_xml.encode('utf-8')
    65             self.validate_xsd_xml(f_xml.strip(),elem_xsd)
    66 
    67     # matching schemaLocation url
    68     def get_xsd(self):
    69         with open("./xsd/multicacheschemas/MCCI_IN200100UV01.xsd") as f:
    70             elem_xsd = f.read()
    71             return elem_xsd
    72 
    73     def validate_xml(self, exml):
    74         rinfo = {}
    75         if os.path.exists(exml):
    76             try:
    77                 ET.parse(exml)
    78                 rinfo['result'] = True
    79             except Exception as err:
    80                 rinfo['result'] = False
    81                 rinfo['info'] = 'Parsing error info:{0}'.format(err)
    82         return rinfo
    83 
    84 if __name__ == "__main__":
    85     aa = R3xmlCheck("./xsd/aa.xml")
    86     aa.run()
  • 相关阅读:
    用wamp配置的环境,想用CMD连接mysql怎么连
    Mysql删除表
    MySQL创建表
    Leetcode 130. Surrounded Regions
    Leetcode 111. Minimum Depth of Binary Tree
    Leetcode 110. Balanced Binary Tree
    Leetcode 98. Validate Binary Search Tree
    Leetcode 99. Recover Binary Search Tree
    Leetcode 108. Convert Sorted Array to Binary Search Tree
    Leetcode 105. Construct Binary Tree from Preorder and Inorder Traversal
  • 原文地址:https://www.cnblogs.com/gide/p/9933949.html
Copyright © 2011-2022 走看看