zoukankan      html  css  js  c++  java
  • JavaScript实战笔记(三) 文本搜索

    借鉴 pdf.js 源码,实现文本搜索功能,包含大小写敏感和全字匹配选项,话不多说,直接上码

    var CharacterType = {
        SPACE: 0,
        ALPHA_LETTER: 1,
        PUNCT: 2,
        HAN_LETTER: 3,
        KATAKANA_LETTER: 4,
        HIRAGANA_LETTER: 5,
        HALFWIDTH_KATAKANA_LETTER: 6,
        THAI_LETTER: 7
    }
    
    function isAlphabeticalScript(charCode) { return charCode < 0x2E80 }
    function isAscii(charCode) { return (charCode & 0xFF80) === 0 }
    function isAsciiAlpha(charCode) { return charCode >= 0x61 && charCode <= 0x7A || charCode >= 0x41 && charCode <= 0x5A }
    function isAsciiDigit(charCode) { return charCode >= 0x30 && charCode <= 0x39 }
    function isAsciiSpace(charCode) { return charCode === 0x20 || charCode === 0x09 || charCode === 0x0D || charCode === 0x0A }
    function isThai(charCode) { return (charCode & 0xFF80) === 0x0E00 }
    function isHan(charCode) { return charCode >= 0x3400 && charCode <= 0x9FFF || charCode >= 0xF900 && charCode <= 0xFAFF }
    function isKatakana(charCode) { return charCode >= 0x30A0 && charCode <= 0x30FF }
    function isHiragana(charCode) { return charCode >= 0x3040 && charCode <= 0x309F }
    function isHalfwidthKatakana(charCode) { return charCode >= 0xFF60 && charCode <= 0xFF9F }
    
    function getCharacterType(charCode) {
        if (isAlphabeticalScript(charCode)) {
            if (isAscii(charCode)) {
                if (isAsciiSpace(charCode)) { return CharacterType.SPACE }
                else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) || charCode === 0x5F) { return CharacterType.ALPHA_LETTER }
                return CharacterType.PUNCT
            }
            else if (isThai(charCode)) { return CharacterType.THAI_LETTER }
            else if (charCode === 0xA0) { return CharacterType.SPACE }
            return CharacterType.ALPHA_LETTER
        }
        if (isHan(charCode)) { return CharacterType.HAN_LETTER }
        else if (isKatakana(charCode)) { return CharacterType.KATAKANA_LETTER }
        else if (isHiragana(charCode)) { return CharacterType.HIRAGANA_LETTER }
        else if (isHalfwidthKatakana(charCode)) { return CharacterType.HALFWIDTH_KATAKANA_LETTER }
        return CharacterType.ALPHA_LETTER
    }
    
    function isEntireWord(content, matchIdx, length) {
        var startIdx = matchIdx
        if (startIdx > 0) {
            var first = content.charCodeAt(startIdx)
            var limit = content.charCodeAt(startIdx - 1)
            if (getCharacterType(first) === getCharacterType(limit)) {
                return false
            }
        }
        var endIdx = matchIdx + length - 1
        if (endIdx < content.length - 1) {
            var last = content.charCodeAt(endIdx)
            var limit = content.charCodeAt(endIdx + 1)
            if (getCharacterType(last) === getCharacterType(limit)) {
                return false
            }
        }
        return true
    }
    
    /**
     * 在特定文本中搜索指定内容,返回结果索引
     * @param  {String}   query         要查询的内容
     * @param  {String}   content       待搜索的文本
     * @param  {Boolean}  caseSensitive 大小写敏感
     * @param  {Boolean}  entireWord    全字匹配
     * @return {[Number]}               结果索引
     */
    function search(query, content, caseSensitive, entireWord) {
        if (query.length === 0) {
            return
        }
        if (!caseSensitive) {
            query = query.toLowerCase()
            content = content.toLowerCase()
        }
        var matchRst = [], matchIdx = -query.length, queryLen = query.length
        while (true) {
            matchIdx = content.indexOf(query, matchIdx + queryLen)
            if (matchIdx === -1) {
                break
            }
            if (entireWord && !isEntireWord(content, matchIdx, queryLen)) {
                continue
            }
            matchRst.push(matchIdx)
        }
        return matchRst
    }
    

    一个用于测试的例子

    var content = 'Say Hello To Tomorrow. Say Goodbye To Yesterday.'
    
    var query = 'say'
    var result = search(query, content, true, false)
    console.log(result) // []
    var result = search(query, content, false, false)
    console.log(result) // [0, 23]
    
    var query = 'Good'
    var result = search(query, content, true, false)
    console.log(result) // [27]
    var result = search(query, content, true, true)
    console.log(result) // []
    

    【 阅读更多 JavaScript 系列文章,请看 JavaScript学习笔记

  • 相关阅读:
    2017-2018-1 20155226《信息安全系统设计基础》第5周学习总结
    2017-2018-1 20155226 《信息安全系统设计基础》第四周学习总结
    2017-2018-1 20155226《信息安全系统设计基础》第2周学习总结
    2017-2018-1 20155226 《信息安全系统设计基础》第四周课堂实践
    2017-2018-1 20155226《信息安全系统设计基础》第1周学习总结
    20155226 2016-2017-2 《Java程序设计》课程总结
    20155226 实验五 网络编程与安全
    20155226 实验四 Android开发基础
    2017-2018-20155220 《信息安全系统设计基础》第九周学习总结
    2017-2018-1 20155220 《信息安全系统设计基础》第八周学习总结
  • 原文地址:https://www.cnblogs.com/wsmrzx/p/12433671.html
Copyright © 2011-2022 走看看