zoukankan      html  css  js  c++  java
  • Lucene查询结果高亮

    检索结果高亮

    实现效果:

    这里写图片描述

    核心代码

    package ucas.ir.lucene;
    
    import java.io.File;
    import java.io.IOException;
    
    import javax.print.Doc;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.analysis.core.KeywordAnalyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.highlight.Fragmenter;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
    import org.apache.lucene.search.highlight.TokenSources;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    public class IndexSearch {
    
        public static void main(String[] args) {
            Directory directory = null;
            try {
                File indexpath = new File("/Users/yp/Documents/workspace/UCASIR/WebContent/index");
                if (indexpath.exists() != true) {
                    indexpath.mkdirs();
                }
                // 设置要查询的索引目录
                directory = FSDirectory.open(indexpath);
                // 创建indexSearcher
                DirectoryReader dReader = DirectoryReader.open(directory);
                IndexSearcher searcher = new IndexSearcher(dReader);
                // 设置分词方式
                Analyzer analyze2 = new StandardAnalyzer(Version.LUCENE_43);// 标准分词
                Analyzer analyzer = new IKAnalyzer();
    
                // 设置查询域
                String field="news_title";
                QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer);
                // 查询字符串
                Query query = parser.parse("阿法狗");
    
                QueryScorer scorer=new QueryScorer(query,field);
                SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style="color:red;">", "</span>");
                Highlighter highlighter=new Highlighter(fors, scorer);
    
                System.out.println("query:" + query.toString());
                // 返回前10条
                TopDocs topDocs = searcher.search(query, 10);
                if (topDocs != null) {
                    System.out.println("符合条件第文档总数:" + topDocs.totalHits);
    
                    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
                        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
                        TokenStream tokenStream=TokenSources.getAnyTokenStream(searcher.getIndexReader(), topDocs.scoreDocs[i].doc, field, analyzer);
                        Fragmenter  fragment=new SimpleSpanFragmenter(scorer);
                        highlighter.setTextFragmenter(fragment); 
                        //高亮news_title域
                        String str=highlighter.getBestFragment(tokenStream, doc.get("news_title"));//获取高亮的片段,可以对其数量进行限制  
                        System.out.println("高亮title:"+str);
                        tokenStream=TokenSources.getAnyTokenStream(searcher.getIndexReader(), topDocs.scoreDocs[i].doc, "news_summary", analyzer);
                        str=highlighter.getBestFragment(tokenStream, doc.get("news_summary"));//获取高亮的片段,可以对其数量进行限制  
                        System.out.println("高亮summary:"+str);
                    }
                }
                directory.close();
                dReader.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    

    结果:
    这里写图片描述

  • 相关阅读:
    裸眼 3D 技术是什么原理?
    裸眼3D全攻略3:拍摄3D—瞳距、镜距、视角偏转与空间感
    JFreeChart DateAxis用法
    remount issue on android 7.0
    获取WebView加载的网页内容并进行动态修改
    android自定义Activity窗口大小(theme运用)
    Android5.0免Root截屏,录屏
    coursera上的软件安全课程的课后阅读补充
    java,C#接口与C++的虚基类
    单元测试之C/C++
  • 原文地址:https://www.cnblogs.com/hainange/p/6153791.html
Copyright © 2011-2022 走看看