zoukankan html css js c++ java

xss过滤

content="""
<p class='c1' id='i1'>
   asdfaa<span class='c2' style="font-family:NSimSun;">sdf<a>a</a>sdf</span>sdf
</p>
<p>
   <strong class='c2' id='i2'>asdf</strong>
   <script>alert(123)</script>
</p>
<h2>
   asdf
</h2>
<div>
    <p>
</div>
"""
# pip3 install beatifulsoup4
# 对标签操作
from bs4 import BeautifulSoup
soup=BeautifulSoup(content,'html.parser') #传内容 和 html.parser解析器
tag=soup.find('script')  # 找出script标签
tag.hidden=True  #隐藏或删除script标签
# print(tag)
tag.clear() #清空找到标签的 里面的内容
# print(tag)
print(type(soup))
content=soup.decode() #把soup对象转换成字符串
print(type(content))
print(content)


# 对stype操作
from bs4 import BeautifulSoup
soup=BeautifulSoup(content,'html.parser')
span=soup.find('span') #找出 span标签
print(span.attrs) #获取span里面的属性 比如style=...
del span.attrs['style'] #删除属性style

content=soup.decode()
print(content)

# 删除白名单标签
allow_tags=['p','strong']  #合法白名单标签
from bs4 import BeautifulSoup
soup=BeautifulSoup(content,'html.parser')
for tag in soup.find_all():
    print(tag.name)
    if tag.name in allow_tags:#在白名单里面的不处理
        pass
    else:
        tag.hidden=True #不在白名单里面的标签隐藏消失
        tag.clear()
content=soup.decode()
print(content)

#删除非白名单属性
allow_tags={
    'p':['class'],
    'strong':['id']
}  #合法白名单属性
from bs4 import BeautifulSoup
soup=BeautifulSoup(content,'html.parser')
for tag in soup.find_all():
    # print(tag.name)
    if tag.name in allow_tags:#在白名单里面的不处理
        pass
    else:
        tag.hidden=True #不在白名单里面的标签隐藏消失
        tag.clear()
        continue

    #用户提交标签的所有属性
    input_attrs=tag.attrs  #字典 {'class':['c1]}
    # print(input_attrs)
    valid_attrs = allow_tags[tag.name] #[class]
    # print(valid_attrs)
    for k in list(input_attrs.keys()):#字典转换成列表 否则字典会是迭代器 #不能删除在迭代器中的数据
        if k in valid_attrs:
            pass
        else:
            del tag.attrs[k] #不在白名单的属性就删除
content=soup.decode()
print(content)

#对象化
class XSSFilter(object):
    __instance = None

    def __init__(self):
        # XSS白名单
        self.valid_tags = {
            "font": ['color', 'size', 'face', 'style'],
            'b': [],
            'div': [],
            "span": [],
            "table": [
                'border', 'cellspacing', 'cellpadding'
            ],
            'th': [
                'colspan', 'rowspan'
            ],
            'td': [
                'colspan', 'rowspan'
            ],
            "a": ['href', 'target', 'name'],
            "img": ['src', 'alt', 'title'],
            'p': [
                'align'
            ],
            "pre": ['class'],
            "hr": ['class'],
            'strong': []
        }

    def __new__(cls, *args, **kwargs):
        """
        单例模式
        :param cls:
        :param args:
        :param kwargs:
        :return:
        """
        if not cls.__instance:
            obj = object.__new__(cls, *args, **kwargs)
            cls.__instance = obj
        return cls.__instance

    def process(self, content):
        soup = BeautifulSoup(content, 'html.parser')
        # 遍历所有HTML标签
        for tag in soup.find_all(recursive=True):
            # 判断标签名是否在白名单中
            if tag.name not in self.valid_tags:
                tag.hidden = True
                if tag.name not in ['html', 'body']:
                    tag.hidden = True
                    tag.clear()
                continue
            # 当前标签的所有属性白名单
            attr_rules = self.valid_tags[tag.name]
            keys = list(tag.attrs.keys())
            for key in keys:
                if key not in attr_rules:
                    del tag[key]

        return soup.decode()


if __name__ == '__main__':
    html = """<p class="title">
                        <b>The Dormouse's story</b>
                    </p>
                    <p class="story">
                        <div name='root'>
                            Once upon a time there were three little sisters; and their names were
                            <a href="http://example.com/elsie" class="sister c1" style='color:red;' id="link1"><!-- Elsie --></a>
                            <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
                            <a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>;
                            and they lived at the bottom of a well.
                            <script>alert(123)</script>
                        </div>
                    </p>
                    <p class="story">...</p>"""

    obj = XSSFilter()
    v = obj.process(html)
    print(v)



#单例模式

class Foo(object):
    instance = None

    def __init__(self):
        self.name = 'alex'

    @classmethod
    def get_instance(cls):
        if Foo.instance:
            return Foo.instance
        else:
            Foo.instance = Foo()
            return Foo.instance

    def process(self):
        return '123'

obj1=Foo()
obj2=Foo()
print (id(obj1),id(obj2))

# 单例模式
obj1 = Foo.get_instance()
obj2 = Foo.get_instance()
print(id(obj1), id(obj2))


#高级版单例模式
class Foo(object):
    instance = None

    def __init__(self):
        print('init')
        self.name = 'alex'

    def __new__(cls, *args, **kwargs):
        print('new')
        #先执行new
        if Foo.instance:
            return Foo.instance
        else:
            Foo.instance = object.__new__(cls,*args,**kwargs)
            return Foo.instance

obj1=Foo()
obj2=Foo()
print(obj1,obj2)

查看全文

相关阅读:
数据结构_队列和滑动窗口
 数据结构_栈和单调栈
 数据结构_链表及邻接表
 JavaSE多线程
 AppExtension总结
 FlutterBloc 2.1.1迁移至6.0.6
iOS通知总结
 Provider 4.3.2+2 f
Fish-Redux 研究
 王道考研复习-操作系统-内存管理(三)

原文地址：https://www.cnblogs.com/leiwenbin627/p/11156726.html