【Lucene】三个高亮显示模块的简单示例-Highlighter

zoukankan html css js c++ java

【Lucene】三个高亮显示模块的简单示例-Highlighter
Lucene针对高亮显示功能提供了两种实现方式,分别是Highlighter和FastVectorHighlighter

这里的三个示例都是使用Highlighter；

示例代码：
1. package com.tan.code;
3. import java.io.File;
4. import java.io.IOException;
5. import java.io.StringReader;
7. import org.apache.lucene.analysis.TokenStream;
8. import org.apache.lucene.analysis.core.SimpleAnalyzer;
9. import org.apache.lucene.document.Document;
10. import org.apache.lucene.index.DirectoryReader;
11. import org.apache.lucene.index.IndexReader;
12. import org.apache.lucene.index.Term;
13. import org.apache.lucene.queryparser.classic.ParseException;
14. import org.apache.lucene.queryparser.classic.QueryParser;
15. import org.apache.lucene.search.IndexSearcher;
16. import org.apache.lucene.search.Query;
17. import org.apache.lucene.search.ScoreDoc;
18. import org.apache.lucene.search.TermQuery;
19. import org.apache.lucene.search.TopDocs;
20. import org.apache.lucene.search.highlight.Highlighter;
21. import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
22. import org.apache.lucene.search.highlight.QueryScorer;
23. import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
24. import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
25. import org.apache.lucene.search.highlight.TokenSources;
26. import org.apache.lucene.store.Directory;
27. import org.apache.lucene.store.SimpleFSDirectory;
28. import org.apache.lucene.util.Version;
29. import org.wltea.analyzer.lucene.IKAnalyzer;
31. public class HighlighterTest {
33. // 高亮處理文本（以下内容纯属虚构）
34. private String text = "China has lots of people,most of them are very poor.China is very big.China become strong now,but the poor people is also poor than other controry";
36. // 原文高亮
37. public void highlighter() throws IOException, InvalidTokenOffsetsException {
39. TermQuery termQuery = new TermQuery(new Term("field", "china"));
40. TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_43)
41. .tokenStream("field", new StringReader(text));
43. QueryScorer queryScorer = new QueryScorer(termQuery);
44. Highlighter highlighter = new Highlighter(queryScorer);
45. highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));
46. System.out.println(highlighter.getBestFragment(tokenStream, text));
47. }
49. // 使用CSS進行高亮顯示處理
50. public void highlighter_CSS(String searchText) throws ParseException,
51. IOException, InvalidTokenOffsetsException {
53. // 創建查詢
54. QueryParser queryParser = new QueryParser(Version.LUCENE_43, "field",
55. new SimpleAnalyzer(Version.LUCENE_43));
56. Query query = queryParser.parse(searchText);
58. // 自定义标注高亮文本标签
59. SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(
60. "", "");
61. // 语汇单元化
62. TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_43)
63. .tokenStream("field", new StringReader(text));
65. // 創建QueryScoer
66. QueryScorer queryScorer = new QueryScorer(query, "field");
68. Highlighter highlighter = new Highlighter(htmlFormatter, queryScorer);
69. highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));
71. System.out.println(highlighter.getBestFragments(tokenStream, text, 4,
72. "..."));
73. }
75. // 高亮顯示搜索結果
76. public void highlighter_SR(String field, String searchText)
77. throws IOException, ParseException, InvalidTokenOffsetsException {
79. //本次示例为了简便直接使用之前实验建立的索引
80. Directory directory = new SimpleFSDirectory(new File("E://MyIndex"));
81. IndexReader reader = DirectoryReader.open(directory);// 读取目录
82. IndexSearcher search = new IndexSearcher(reader);// 初始化查询组件
83. QueryParser parser = new QueryParser(Version.LUCENE_43, field,
84. new IKAnalyzer(true));
86. Query query = parser.parse(searchText);
88. TopDocs td = search.search(query, 10000);// 获取匹配上元素的一个docid
89. ScoreDoc[] sd = td.scoreDocs;// 加载所有的Documnet文档
91. System.out.println("本次命中数据:" + sd.length);
92. QueryScorer scorer = new QueryScorer(query, "content");
94. Highlighter highlighter = new Highlighter(scorer);
95. highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
97. for (ScoreDoc scoreDoc : sd) {
98. Document document = search.doc(scoreDoc.doc);
99. String content = document.get("content");
100. TokenStream tokenStream = TokenSources.getAnyTokenStream(
101. search.getIndexReader(), scoreDoc.doc, "content", document,
102. new IKAnalyzer(true));
103. System.out.println(highlighter
104. .getBestFragment(tokenStream, content));
105. }
106. }
107. }
测试代码：
1. @Test
2. public void test() throws IOException, InvalidTokenOffsetsException,
3. ParseException {
4. // fail("Not yet implemented");
5. HighlighterTest highlighterTest = new HighlighterTest();
6. highlighterTest.highlighter();
7. highlighterTest.highlighter_CSS("china");
8. highlighterTest.highlighter_CSS("poor");
9. highlighterTest.highlighter_SR("content", "床前明月光");
10. }
测试结果：
1. China has lots of people,most of them are very poor。China is very big.China become strong now,but the poor people is also poor than other controry
2. China has lots of people,most of them are very poor。China is very big.China become strong now,but the poor people is also poor than other controry
3. China has lots of people,most of them are very poor。China is very big.China become strong now,but the poor people is also poor than other controry
4. 本次命中数据:1
5. 床前明月光，疑是地上霜
查看全文

相关阅读:
Dynamics 365 CRM large instance copy
Dynamics CRM Plug-in
Dynamics CRM Publisher
Dynamics 365 CRM Free up storage 清理Dynamics 365 CRM的空间
 账户和联系人 Accounts and Contacts 译
 Dynamics CRM Instances
Dynamics CRM Solution
微软Azure通知中心 (Azure Notification Hubs)
CLR(Common Language Runtime) 公共语言运行库
 构建Apache Web服务器

原文地址：https://www.cnblogs.com/dingjiaoyang/p/6115292.html