zoukankan      html  css  js  c++  java
  • python判断一个字符是否是xml合法字符

    项目中碰到的问题,记录如下,期望能对他人有用。

    def valid_XML_char_ordinal(c):
        """
        @summary:
                check if the char is a valid xml character
        @param c: the character to be checked
        @see: # http://www.w3.org/TR/2008/REC-xml-20081126/#charsets
        @result: True/False
        """
        return ( # conditions ordered by presumed frequency
            0x20 <= c <= 0xD7FF
            or c in (0x09, 0x0A, 0x0D)
            or 0xE000 <= c <= 0xFFFD
            or 0x10000 <= c <= 0x10FFFF
            )

    考虑对于其他非法xml字符通过base64加密处理,具体代码如下:

    try:
        import xml.sax.saxutils
    except ImportError:
        raise ImportError("requires xml.sax.saxutils package, pleas check if xml.sax.saxutils is installed!")
    import base64
    import logging
    
    logger = logging.getLogger(__name__)
    
    __all__ = ["escape", "unescape"]
    
    def escape(data):
        """
        @summary:
                Escape '&', '<', and '>' in a string of data.
                if the data is not ascii, then encode in base64
        @param data: the data to be processed
        @return
            {"base64": True | False,
             "data": data}
        """
    
        # check if all of the data is in ascii code
        is_base64 = False
        escaped_data = ""
        try:
            #data.decode("ascii")
            if data is None:
                data = ""
    
            is_base64 = False
            for c in data:
                if not valid_XML_char_ordinal(c):
                    is_base64 = True
                    break
            # check if need base64 encode
            if is_base64:
                logger.debug("%s is not ascii-encoded string, so i will encode it in base64")
                # base64 encode
                escaped_data = base64.b64encode(data)
            else:
                # check if the data should be escaped to be stored in xml
                escaped_data = xml.sax.saxutils.escape(data)
    
        except Exception, e:
            logger.excpetion(e)
    
        return {"base64": is_base64,
                "data": escaped_data}
    
    def unescape(data, is_base64 = False):
        """
        @summary:
                Unescape '&amp;', '&lt;', and '&gt;' in a string of data.
                if base64 is True, then base64 decode will be processed first
        @param data: the data to be processed
        @param base64: specify if the data is encoded by base64
        @result: unescaped data
        """
        # check if base64
        unescaped_data = data
        if is_base64:
            try:
                unescaped_data = base64.b64decode(data)
            except Exception, ex:
                logger.debug("some excpetion occured when invoke b64decode")
                logger.error(ex)
                print ex
        else:
            # unescape it
            unescaped_data = xml.sax.saxutils.unescape(data)
    
        return unescaped_data
  • 相关阅读:
    Item 16: 让const成员函数做到线程安全
    学习张鑫旭大神元素抛物线运动插件
    js根据浏览器对css3移动的支持,选择元素移动方式
    如何在图片加载完成前获取到图片宽高
    JavaScript和SVG实现点击连线
    多层级叠加问题
    闭包应用
    展示触摸屏网页打包成桌面应用(nw.js)
    获取鼠标坐标
    常用文档
  • 原文地址:https://www.cnblogs.com/Jerryshome/p/2490394.html
Copyright © 2011-2022 走看看