使用kingedit别人是可以输入script代码。这在后台是不允许script代码运行的。
这里主要使用beatifulSoup过滤
示例1
beatufulsoup4 from bs4 import Beatifulsoup soup=Beatifulsoup(content,"html.parse")#html.parse python内置解析器 tag=soup.find("scrip") tag.hidden=True #把标签隐藏 tag.clear #内容清空 span=soup.find("span") del span.attr("style") #删除span的style属性 content=soup.decode() #把解析的内容转字符串 #只显示固定标签内容 tags=["p","span"] for tag in soup.find_all(): if tag.name in tags: pass else: tag.hidden=True tag.clear() #显示固定属性 tags={ "p":["class"], "span":["id"], } for tag in soup.find_all(): if tag.name in tags: pass else: tag.hidden=True tag.clear() continue #用户提交标签的所有属性 input_attrs=tag.attrs #{"class":"c1","id":"i1"} valid_attrs=tags[tag.name] # for k in list(input_attrs.keys()): if k in valid_attrs: pass else: del input_attrs[k]
实例
#!/usr/bin/env python # -*- coding:utf-8 -*- from bs4 import BeautifulSoup class XSSFilter(object): __instance = None def __init__(self): # XSS白名单 self.valid_tags = { "font": ['color', 'size', 'face', 'style'], 'b': [], 'div': [], "span": [], "table": [ 'border', 'cellspacing', 'cellpadding' ], 'th': [ 'colspan', 'rowspan' ], 'td': [ 'colspan', 'rowspan' ], "a": ['href', 'target', 'name'], "img": ['src', 'alt', 'title'], 'p': [ 'align' ], "pre": ['class'], "hr": ['class'], 'strong': [] } def __new__(cls, *args, **kwargs): """ 单例模式 :param cls: :param args: :param kwargs: :return: """ if not cls.__instance: obj = object.__new__(cls, *args, **kwargs) cls.__instance = obj return cls.__instance def process(self, content): soup = BeautifulSoup(content, 'html.parser') # 遍历所有HTML标签 for tag in soup.find_all(recursive=True): # 判断标签名是否在白名单中 if tag.name not in self.valid_tags: tag.hidden = True if tag.name not in ['html', 'body']: tag.hidden = True tag.clear() continue # 当前标签的所有属性白名单 attr_rules = self.valid_tags[tag.name] keys = list(tag.attrs.keys()) for key in keys: if key not in attr_rules: del tag[key] return soup.decode() if __name__ == '__main__': html = """<p class="title"> <b>The Dormouse's story</b> </p> <p class="story"> <div name='root'> Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister c1" style='color:red;background-color:green;' id="link1"><!-- Elsie --></a> <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tilffffffffffffflie</a>; and they lived at the bottom of a well. <script>alert(123)</script> </div> </p> <p class="story">...</p>""" obj = XSSFilter() v = obj.process(html) print(v)