zoukankan      html  css  js  c++  java
  • xss过滤代码

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    from bs4 import BeautifulSoup
    
    
    class XSSFilter(object):
        __instance = None
    
        def __init__(self):
            # XSS白名单
            self.valid_tags = {
                "font": ['color', 'size', 'face', 'style'],
                'b': [],
                'div': [],
                "span": [],
                "table": [
                    'border', 'cellspacing', 'cellpadding'
                ],
                'th': [
                    'colspan', 'rowspan'
                ],
                'td': [
                    'colspan', 'rowspan'
                ],
                "a": ['href', 'target', 'name'],
                "img": ['src', 'alt', 'title'],
                'p': [
                    'align'
                ],
                "pre": ['class'],
                "hr": ['class'],
                'strong': []
            }
    
        def __new__(cls, *args, **kwargs):
            """
            单例模式
            :param cls:
            :param args:
            :param kwargs:
            :return:
            """
            if not cls.__instance:
                obj = object.__new__(cls, *args, **kwargs)
                cls.__instance = obj
            return cls.__instance
    
        def process(self, content):
            soup = BeautifulSoup(content, 'html.parser')
            # 遍历所有HTML标签
            for tag in soup.find_all(recursive=True):
                # 判断标签名是否在白名单中
                if tag.name not in self.valid_tags:
                    tag.hidden = True
                    if tag.name not in ['html', 'body']:
                        tag.hidden = True
                        tag.clear()
                    continue
                # 当前标签的所有属性白名单
                attr_rules = self.valid_tags[tag.name]
                keys = list(tag.attrs.keys())
                for key in keys:
                    if key not in attr_rules:
                        del tag[key]
    
            return soup.decode()
    
    
    if __name__ == '__main__':
        html = """<p class="title">
                            <b>The Dormouse's story</b>
                        </p>
                        <p class="story">
                            <div name='root'>
                                Once upon a time there were three little sisters; and their names were
                                <a href="http://example.com/elsie" class="sister c1" style='color:red;background-color:green;' id="link1"><!-- Elsie --></a>
                                <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
                                <a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>;
                                and they lived at the bottom of a well.
                                <script>alert(123)</script>
                            </div>
                        </p>
                        <p class="story">...</p>"""
    
        obj = XSSFilter()
        v = obj.process(html)
        print(v)
  • 相关阅读:
    [apue] FIFO:不是文件的文件
    [apue] 等待子进程的那些事儿
    [apue] popen/pclose 疑点解惑
    [apue] 使用 popen/pclose 的一点疑问
    [apue] 使用 poll 检测管道断开
    [apue] dup2的正确打开方式
    [apue] 管道原子写入量的一个疑问
    [apue] 测试管道容量的一些疑问
    【新阁教育】再也不用担心我的PLC通信不上了
    【新阁教育】三菱PLC的这个功能,真的很强大
  • 原文地址:https://www.cnblogs.com/sunhao96/p/9041795.html
Copyright © 2011-2022 走看看