zoukankan      html  css  js  c++  java
  • 通过xsd schema结构来验证xml是否合法

     1 import sys
     2 import StringIO
     3 import lxml
     4 
     5 from lxml import etree
     6 from StringIO import StringIO
     7 
     8 # Construct XML relevant to the XML schema we're validating against. By altering the string, adding/removing elements
     9 # we can force different errors to occur when validating.
    10 xml = StringIO('''
    11 <CompanyDataRequest xmlns="http://xmlgw.companieshouse.gov.uk" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlgw.companieshouse.gov.uk http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd">
    12     <CompanyNumber>06937730</CompanyNumber>
    13     <CompanyAuthenticationCode>123456</CompanyAuthenticationCode>
    14     <MadeUpDate>2010-06-30x</MadeUpDate>
    15 </CompanyDataRequest>
    16 ''')
    17 
    18 # Clear any previous errors
    19 lxml.etree.clear_error_log()
    20 
    21 try:
    22     # Get the XML schema to validate against
    23     schema = lxml.etree.XMLSchema(file = 'http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd')
    24     # Parse string of XML
    25     xml_doc = lxml.etree.parse(xml)
    26     # Validate parsed XML against schema returning a readable message on failure
    27     schema.assertValid(xml_doc)
    28     # Validate parsed XML against schema returning boolean value indicating success/failure
    29     print 'schema.validate() returns "%s".' % schema.validate(xml_doc)
    30 
    31 except lxml.etree.XMLSchemaParseError, xspe:
    32     # Something wrong with the schema (getting from URL/parsing)
    33     print "XMLSchemaParseError occurred!"
    34     print xspe
    35 
    36 except lxml.etree.XMLSyntaxError, xse:
    37     # XML not well formed
    38     print "XMLSyntaxError occurred!"
    39     print xse
    40     
    41 except lxml.etree.DocumentInvalid, di:
    42     # XML failed to validate against schema
    43     print "DocumentInvalid occurred!"
    44 
    45     error = schema.error_log.last_error
    46     if error:
    47         # All the error properties (from libxml2) describing what went wrong
    48         print 'domain_name: ' + error.domain_name
    49         print 'domain: ' + str(error.domain)
    50         print 'filename: ' + error.filename # '<string>' cos var is a string of xml
    51         print 'level: ' + str(error.level)
    52         print 'level_name: ' + error.level_name # an integer
    53         print 'line: ' + str(error.line) # a unicode string that identifies the line where the error occurred.
    54         print 'message: ' + error.message # a unicode string that lists the message.
    55         print 'type: ' + str(error.type) # an integer
    56         print 'type_name: ' + error.type_name

    封装类

     1 #!/usr/bin/env python
     2 # -*- coding:utf-8 -*-
     3 # Author:Eric.yue
     4 
     5 import os
     6 import lxml.etree as ET
     7 from StringIO import StringIO
     8 import chardet
     9 
    10 
    11 class R3xmlCheck(object):
    12     def __init__(self, element_xml):
    13         self.elem_xml = element_xml
    14 
    15     def validate_xsd_xml(self, f_xml, elem_xsd):
    16         try:
    17             elem_xsd = elem_xsd.encode('utf-8')
    18             xsd_doc = StringIO(elem_xsd)
    19             xml_doc = StringIO(f_xml)
    20             xmlschema_doc = ET.parse(xsd_doc)
    21             xmlschema = ET.XMLSchema(xmlschema_doc)
    22             xml = ET.parse(xml_doc)
    23             xmlschema.assertValid(xml)
    24             print 'schema.validate() returns "%s".' % xmlschema.validate(xml)
    25 
    26         except ET.XMLSchemaParseError, xspe:
    27             # Something wrong with the schema (getting from URL/parsing)
    28             print "XMLSchemaParseError occurred!"
    29             print xspe
    30 
    31         except ET.XMLSyntaxError, xse:
    32             # XML not well formed
    33             print "XMLSyntaxError occurred!"
    34             print xse
    35 
    36         except ET.DocumentInvalid, di:
    37             # XML failed to validate against schema
    38             print "DocumentInvalid occurred!"
    39 
    40             error = xmlschema.error_log.last_error
    41             if error:
    42                 # All the error properties (from libxml2) describing what went wrong
    43                 print 'domain_name: ' + error.domain_name
    44                 print 'domain: ' + str(error.domain)
    45                 print 'filename: ' + error.filename  # '<string>' cos var is a string of xml
    46                 print 'level: ' + str(error.level)
    47                 print 'level_name: ' + error.level_name  # an integer
    48                 print 'line: ' + str(error.line)  # a unicode string that identifies the line where the error occurred.
    49                 print 'message: ' + error.message  # a unicode string that lists the message.
    50                 print 'type: ' + str(error.type)  # an integer
    51                 print 'type_name: ' + error.type_name
    52 
    53     def run(self):
    54         res = self.validate_xml(self.elem_xml)
    55         if res["result"] is not True:
    56             return res["info"]
    57 
    58         elem_xsd = self.get_xsd()
    59 
    60         with open(self.elem_xml) as f:
    61             f_xml = f.read()
    62             chardet_info = chardet.detect(f_xml)
    63             if chardet_info['encoding'] == 'ascii':
    64                 f_xml = f_xml.encode('utf-8')
    65             self.validate_xsd_xml(f_xml.strip(),elem_xsd)
    66 
    67     # matching schemaLocation url
    68     def get_xsd(self):
    69         with open("./xsd/multicacheschemas/MCCI_IN200100UV01.xsd") as f:
    70             elem_xsd = f.read()
    71             return elem_xsd
    72 
    73     def validate_xml(self, exml):
    74         rinfo = {}
    75         if os.path.exists(exml):
    76             try:
    77                 ET.parse(exml)
    78                 rinfo['result'] = True
    79             except Exception as err:
    80                 rinfo['result'] = False
    81                 rinfo['info'] = 'Parsing error info:{0}'.format(err)
    82         return rinfo
    83 
    84 if __name__ == "__main__":
    85     aa = R3xmlCheck("./xsd/aa.xml")
    86     aa.run()
  • 相关阅读:
    数组方法之find
    检查数组中是否有NaN
    数组方法之includes
    数组方法之lastIndexOf
    数组方法之indexOf
    数组方法之forEach
    col-md-push-*和col-md-offset的区别
    35个jQuery小技巧!
    35个jQuery小技巧!
    VS 2013插件
  • 原文地址:https://www.cnblogs.com/gide/p/9933949.html
Copyright © 2011-2022 走看看