zoukankan      html  css  js  c++  java
  • lucene.net 4.8 中文分词 高亮显示

    使用 SmartChineseAnalyzer 分词

    /// <summary>
            /// 返回高亮的文本块
            /// </summary>
            /// <param name="keywords">关键词</param>
            /// <returns></returns>
            static List<string> Search(string keywords)
            {
                List<string> results = new List<string>();
                try
                {
                    IndexReader reader = DirectoryReader.Open(FSDirectory.Open("Index_Data"));
                    var searcher = new IndexSearcher(reader);
                    var keyWordQuery = new BooleanQuery();
                    foreach (var item in GetKeyWords(keywords))
                    {
                        keyWordQuery.Add(new TermQuery(new Term("title", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("keyword", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("description", item)), Occur.SHOULD);
                    }
                    var hits = searcher.Search(keyWordQuery, 16).ScoreDocs;//最多返回16条结果
                    /* 语法高亮显示设置 */
                    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(LuceneVersion.LUCENE_48);
                    Query query = new QueryBuilder(analyzer).CreateBooleanQuery("description", keywords, Occur.SHOULD);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
                    Highlighter highlighter = new Highlighter(htmlFormatter,new QueryScorer(query));
                    highlighter.TextFragmenter= new SimpleFragmenter(256);//显示给用户的含有关键词的文本块的大小
    
                    foreach (var hit in hits)
                    {
                        var document = searcher.Doc(hit.Doc);                
                        String descriptionText = document.Get("description");
                        TokenStream tokenStream = analyzer.GetTokenStream("description", new System.IO.StringReader(descriptionText));
                        System.String result = highlighter.GetBestFragments(tokenStream, descriptionText, 16, "...");
                        results.Add(result); 
                        //results.Add(document.Get("description"));
                    }
                    return results;
                }
                catch (Exception)
                {
    
                    //throw;
                    MessageBox.Show("使用前请初始化资料库");
                    return results;
                }
    
            }
  • 相关阅读:
    CF833B The Bakery (线段树+DP)
    NOIP 2017 时间复杂度 (模拟)
    NOI 2018 屠龙勇士 (拓展中国剩余定理excrt+拓展欧几里得exgcd)
    中国剩余定理(excrt) 模板
    后缀自动机 模板
    luogu P4248 [AHOI2013]差异
    luogu P3975 [TJOI2015]弦论
    luogu P4770 [NOI2018]你的名字
    luogu P3726 [AH2017/HNOI2017]抛硬币
    luogu P3722 [AH2017/HNOI2017]影魔
  • 原文地址:https://www.cnblogs.com/kqw/p/13585553.html
Copyright © 2011-2022 走看看