zoukankan      html  css  js  c++  java
  • lucene.net 4.8 中文分词 高亮显示

    使用 SmartChineseAnalyzer 分词

    /// <summary>
            /// 返回高亮的文本块
            /// </summary>
            /// <param name="keywords">关键词</param>
            /// <returns></returns>
            static List<string> Search(string keywords)
            {
                List<string> results = new List<string>();
                try
                {
                    IndexReader reader = DirectoryReader.Open(FSDirectory.Open("Index_Data"));
                    var searcher = new IndexSearcher(reader);
                    var keyWordQuery = new BooleanQuery();
                    foreach (var item in GetKeyWords(keywords))
                    {
                        keyWordQuery.Add(new TermQuery(new Term("title", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("keyword", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("description", item)), Occur.SHOULD);
                    }
                    var hits = searcher.Search(keyWordQuery, 16).ScoreDocs;//最多返回16条结果
                    /* 语法高亮显示设置 */
                    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(LuceneVersion.LUCENE_48);
                    Query query = new QueryBuilder(analyzer).CreateBooleanQuery("description", keywords, Occur.SHOULD);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
                    Highlighter highlighter = new Highlighter(htmlFormatter,new QueryScorer(query));
                    highlighter.TextFragmenter= new SimpleFragmenter(256);//显示给用户的含有关键词的文本块的大小
    
                    foreach (var hit in hits)
                    {
                        var document = searcher.Doc(hit.Doc);                
                        String descriptionText = document.Get("description");
                        TokenStream tokenStream = analyzer.GetTokenStream("description", new System.IO.StringReader(descriptionText));
                        System.String result = highlighter.GetBestFragments(tokenStream, descriptionText, 16, "...");
                        results.Add(result); 
                        //results.Add(document.Get("description"));
                    }
                    return results;
                }
                catch (Exception)
                {
    
                    //throw;
                    MessageBox.Show("使用前请初始化资料库");
                    return results;
                }
    
            }
  • 相关阅读:
    hadoop之 hadoop日志存放路径
    grpc的数据包监控
    HTTP2 概述
    gRPC的简单Go例子
    win下环境变量的设置
    Go的pprof使用
    graphviz
    学习Golang的步骤建议
    golang 的 sync.WaitGroup
    【转】golang的channel的几种用法
  • 原文地址:https://www.cnblogs.com/kqw/p/13585553.html
Copyright © 2011-2022 走看看