zoukankan      html  css  js  c++  java
  • lucene.net 4.8 中文分词 高亮显示

    使用 SmartChineseAnalyzer 分词

    /// <summary>
            /// 返回高亮的文本块
            /// </summary>
            /// <param name="keywords">关键词</param>
            /// <returns></returns>
            static List<string> Search(string keywords)
            {
                List<string> results = new List<string>();
                try
                {
                    IndexReader reader = DirectoryReader.Open(FSDirectory.Open("Index_Data"));
                    var searcher = new IndexSearcher(reader);
                    var keyWordQuery = new BooleanQuery();
                    foreach (var item in GetKeyWords(keywords))
                    {
                        keyWordQuery.Add(new TermQuery(new Term("title", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("keyword", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("description", item)), Occur.SHOULD);
                    }
                    var hits = searcher.Search(keyWordQuery, 16).ScoreDocs;//最多返回16条结果
                    /* 语法高亮显示设置 */
                    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(LuceneVersion.LUCENE_48);
                    Query query = new QueryBuilder(analyzer).CreateBooleanQuery("description", keywords, Occur.SHOULD);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
                    Highlighter highlighter = new Highlighter(htmlFormatter,new QueryScorer(query));
                    highlighter.TextFragmenter= new SimpleFragmenter(256);//显示给用户的含有关键词的文本块的大小
    
                    foreach (var hit in hits)
                    {
                        var document = searcher.Doc(hit.Doc);                
                        String descriptionText = document.Get("description");
                        TokenStream tokenStream = analyzer.GetTokenStream("description", new System.IO.StringReader(descriptionText));
                        System.String result = highlighter.GetBestFragments(tokenStream, descriptionText, 16, "...");
                        results.Add(result); 
                        //results.Add(document.Get("description"));
                    }
                    return results;
                }
                catch (Exception)
                {
    
                    //throw;
                    MessageBox.Show("使用前请初始化资料库");
                    return results;
                }
    
            }
  • 相关阅读:
    boost库
    DISALLOW_COPY_AND_ASSIGN
    汇编语言入门
    gflags
    Segmentation Fault
    ubuntu16.04_cuda9.0_opencv3.4_cudnn_v7_caffe
    make: aarch64-himix100-linux-gcc: Command not found
    gtest
    glog
    [Go]go语言实战-go版本的supervisord编译安装与运行
  • 原文地址:https://www.cnblogs.com/kqw/p/13585553.html
Copyright © 2011-2022 走看看