zoukankan      html  css  js  c++  java
  • lucene.net 4.8 中文分词 高亮显示

    使用 SmartChineseAnalyzer 分词

    /// <summary>
            /// 返回高亮的文本块
            /// </summary>
            /// <param name="keywords">关键词</param>
            /// <returns></returns>
            static List<string> Search(string keywords)
            {
                List<string> results = new List<string>();
                try
                {
                    IndexReader reader = DirectoryReader.Open(FSDirectory.Open("Index_Data"));
                    var searcher = new IndexSearcher(reader);
                    var keyWordQuery = new BooleanQuery();
                    foreach (var item in GetKeyWords(keywords))
                    {
                        keyWordQuery.Add(new TermQuery(new Term("title", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("keyword", item)), Occur.SHOULD);
                        keyWordQuery.Add(new TermQuery(new Term("description", item)), Occur.SHOULD);
                    }
                    var hits = searcher.Search(keyWordQuery, 16).ScoreDocs;//最多返回16条结果
                    /* 语法高亮显示设置 */
                    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(LuceneVersion.LUCENE_48);
                    Query query = new QueryBuilder(analyzer).CreateBooleanQuery("description", keywords, Occur.SHOULD);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
                    Highlighter highlighter = new Highlighter(htmlFormatter,new QueryScorer(query));
                    highlighter.TextFragmenter= new SimpleFragmenter(256);//显示给用户的含有关键词的文本块的大小
    
                    foreach (var hit in hits)
                    {
                        var document = searcher.Doc(hit.Doc);                
                        String descriptionText = document.Get("description");
                        TokenStream tokenStream = analyzer.GetTokenStream("description", new System.IO.StringReader(descriptionText));
                        System.String result = highlighter.GetBestFragments(tokenStream, descriptionText, 16, "...");
                        results.Add(result); 
                        //results.Add(document.Get("description"));
                    }
                    return results;
                }
                catch (Exception)
                {
    
                    //throw;
                    MessageBox.Show("使用前请初始化资料库");
                    return results;
                }
    
            }
  • 相关阅读:
    简洁又漂亮的单网页404页源码(html格式404源码)
    运行bee run之后出现的错误以及解决方法
    window beego 安装出现的错误
    golang gin框架 使用swagger生成api文档
    go语言切片作为函数参数
    Go中函数接收器不能改变接收者的地址
    docker 删除none镜像
    redis下载安装
    git切换分支
    angular自定义验证器添加入模板驱动表单
  • 原文地址:https://www.cnblogs.com/kqw/p/13585553.html
Copyright © 2011-2022 走看看