var filterWord={ words:"", tblRoot:{}, //敏感词文件 file:"sensitiveWords.txt", //载入敏感词组 load:function (file,callback) { file=file||this.file; var objHttp; if (window.ActiveXObject) { objHttp = new ActiveXObject("Microsoft.XMLHTTP"); }else { objHttp = new XMLHttpRequest(); objHttp.overrideMimeType("text/xml"); } objHttp.onreadystatechange = function () { if (objHttp.readyState != 4) return; this.words = objHttp.responseText; callback(objHttp.responseText); }; objHttp.open("GET", file, true); objHttp.send(null); }, //将关键字生成一颗树 makeTree:function (callback) { if(this.words==""){ this.load(this.file,function (words) { var strKeys = words; var arrKeys = strKeys.split(""); var tblCur = this.tblRoot = {}; var key; for (var i = 0, n = arrKeys.length; i < n; i++) { key = arrKeys[i]; //完成当前关键字 if (key == ';'){ tblCur.end = true; tblCur = this.tblRoot; continue; } //生成子节点 if (key in tblCur) tblCur = tblCur[key]; else tblCur = tblCur[key] = {}; } //最后一个关键字没有分割符 tblCur.end = true; callback(this.tblRoot); }); }else{ callback(this.tblRoot); } }, //标记出内容中敏感词的位置 searchWords:function (content,root) { var tblCur,p, v,i = 0,arrMatch = []; var n = content.length; while (i < n) { tblCur = root; p = i; v = 0; for (; ;) { if (!(tblCur = tblCur[content.charAt(p++)])) { i++; break; } //找到匹配敏感字 if (tblCur.end) v = p; } //最大匹配 if (v){ arrMatch.push(i - 1, v); i = v; } } return arrMatch; }, //标记敏感字 handle:function (strContent) { var mid,arrMatch,strHTML,arrHTML = [],p = 0; this.makeTree(function (data) { arrMatch = filterWord.searchWords(strContent,data); for (var i = 0, n = arrMatch.length; i < n; i += 2) { mid = arrMatch[i]; arrHTML.push(strContent.substring(p, mid), "<em>", strContent.substring(mid, p = arrMatch[i + 1]), "</em>"); } arrHTML.push(strContent.substring(p)); strHTML = arrHTML.join("").replace(/ /g, "<br>"); console.log(strHTML); }); } };
使用方式:
filterWord.handle("徐航撒飒飒阿萨飒飒刘孝朋啊啊撒飒飒")