zoukankan      html  css  js  c++  java
  • xss过滤

    content="""
    <p class='c1' id='i1'>
    asdfaa<span class='c2' style="font-family:NSimSun;">sdf<a>a</a>sdf</span>sdf
    </p>
    <p>
    <strong class='c2' id='i2'>asdf</strong>
    <script>alert(123)</script>
    </p>
    <h2>
    asdf
    </h2>
    <div>
    <p>
    </div>
    """
    # pip3 install beatifulsoup4
    # 对标签操作
    from bs4 import BeautifulSoup
    soup=BeautifulSoup(content,'html.parser') #传内容 和 html.parser解析器
    tag=soup.find('script') # 找出script标签
    tag.hidden=True #隐藏或删除script标签
    # print(tag)
    tag.clear() #清空找到标签的 里面的内容
    # print(tag)
    print(type(soup))
    content=soup.decode() #把soup对象转换成字符串
    print(type(content))
    print(content)


    # 对stype操作
    from bs4 import BeautifulSoup
    soup=BeautifulSoup(content,'html.parser')
    span=soup.find('span') #找出 span标签
    print(span.attrs) #获取span里面的属性 比如style=...
    del span.attrs['style'] #删除属性style

    content=soup.decode()
    print(content)

    # 删除白名单标签
    allow_tags=['p','strong'] #合法白名单标签
    from bs4 import BeautifulSoup
    soup=BeautifulSoup(content,'html.parser')
    for tag in soup.find_all():
    print(tag.name)
    if tag.name in allow_tags:#在白名单里面的不处理
    pass
    else:
    tag.hidden=True #不在白名单里面的标签隐藏消失
    tag.clear()
    content=soup.decode()
    print(content)

    #删除非白名单属性
    allow_tags={
    'p':['class'],
    'strong':['id']
    } #合法白名单属性
    from bs4 import BeautifulSoup
    soup=BeautifulSoup(content,'html.parser')
    for tag in soup.find_all():
    # print(tag.name)
    if tag.name in allow_tags:#在白名单里面的不处理
    pass
    else:
    tag.hidden=True #不在白名单里面的标签隐藏消失
    tag.clear()
    continue

    #用户提交标签的所有属性
    input_attrs=tag.attrs #字典 {'class':['c1]}
    # print(input_attrs)
    valid_attrs = allow_tags[tag.name] #[class]
    # print(valid_attrs)
    for k in list(input_attrs.keys()):#字典转换成列表 否则字典会是迭代器 #不能删除在迭代器中的数据
    if k in valid_attrs:
    pass
    else:
    del tag.attrs[k] #不在白名单的属性就删除
    content=soup.decode()
    print(content)

    #对象化
    class XSSFilter(object):
    __instance = None

    def __init__(self):
    # XSS白名单
    self.valid_tags = {
    "font": ['color', 'size', 'face', 'style'],
    'b': [],
    'div': [],
    "span": [],
    "table": [
    'border', 'cellspacing', 'cellpadding'
    ],
    'th': [
    'colspan', 'rowspan'
    ],
    'td': [
    'colspan', 'rowspan'
    ],
    "a": ['href', 'target', 'name'],
    "img": ['src', 'alt', 'title'],
    'p': [
    'align'
    ],
    "pre": ['class'],
    "hr": ['class'],
    'strong': []
    }

    def __new__(cls, *args, **kwargs):
    """
    单例模式
    :param cls:
    :param args:
    :param kwargs:
    :return:
    """
    if not cls.__instance:
    obj = object.__new__(cls, *args, **kwargs)
    cls.__instance = obj
    return cls.__instance

    def process(self, content):
    soup = BeautifulSoup(content, 'html.parser')
    # 遍历所有HTML标签
    for tag in soup.find_all(recursive=True):
    # 判断标签名是否在白名单中
    if tag.name not in self.valid_tags:
    tag.hidden = True
    if tag.name not in ['html', 'body']:
    tag.hidden = True
    tag.clear()
    continue
    # 当前标签的所有属性白名单
    attr_rules = self.valid_tags[tag.name]
    keys = list(tag.attrs.keys())
    for key in keys:
    if key not in attr_rules:
    del tag[key]

    return soup.decode()


    if __name__ == '__main__':
    html = """<p class="title">
    <b>The Dormouse's story</b>
    </p>
    <p class="story">
    <div name='root'>
    Once upon a time there were three little sisters; and their names were
    <a href="http://example.com/elsie" class="sister c1" style='color:red;' id="link1"><!-- Elsie --></a>
    <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
    <a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>;
    and they lived at the bottom of a well.
    <script>alert(123)</script>
    </div>
    </p>
    <p class="story">...</p>"""

    obj = XSSFilter()
    v = obj.process(html)
    print(v)



    #单例模式
    class Foo(object):
    instance = None

    def __init__(self):
    self.name = 'alex'

    @classmethod
    def get_instance(cls):
    if Foo.instance:
    return Foo.instance
    else:
    Foo.instance = Foo()
    return Foo.instance

    def process(self):
    return '123'

    obj1=Foo()
    obj2=Foo()
    print (id(obj1),id(obj2))

    # 单例模式
    obj1 = Foo.get_instance()
    obj2 = Foo.get_instance()
    print(id(obj1), id(obj2))


    #高级版单例模式
    class Foo(object):
    instance = None

    def __init__(self):
    print('init')
    self.name = 'alex'

    def __new__(cls, *args, **kwargs):
    print('new')
    #先执行new
    if Foo.instance:
    return Foo.instance
    else:
    Foo.instance = object.__new__(cls,*args,**kwargs)
    return Foo.instance

    obj1=Foo()
    obj2=Foo()
    print(obj1,obj2)


  • 相关阅读:
    如何编写测试用例
    bug的合规描述
    Linux常用命令学习
    测试用列设计
    软件质量管理
    测试的分类
    软件工程模型
    软件测试核心概念
    Thinking in C++ 第十三章 动态对象创建
    python urllib
  • 原文地址:https://www.cnblogs.com/leiwenbin627/p/11156726.html
Copyright © 2011-2022 走看看