用lucene3实现搜索多字段并排序功能（设置权重）

zoukankan html css js c++ java

用lucene3实现搜索多字段并排序功能（设置权重）

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Version;public class TestLucene {
public static void main(String args[]) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
try {
TestLucene.doSearch("上", 1, 100);
} catch(Exception ex) {
// TODO
}
}@SuppressWarnings("deprecation")
public static void doSearch(String keyword, int page, int pageSize) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);// Store the index in memory:
Directory directory = new RAMDirectory();
// To store an index on disk, use this instead:
//Directory directory = FSDirectory.open("/tmp/testindex");
IndexWriter iwriter = new IndexWriter(directory, analyzer, true,
new IndexWriter.MaxFieldLength(25000));Post post = new Post();
post.setPostId(10001);
post.setTitle("校内八大俗");
post.setContent(" 有网友总结校内网类网站的八大俗：生活常识化妆技，十二星座小秘密；不看后悔成功录，论文大全雅思题。恋爱金句传送门，男默女泪蛋疼文；读到哪句心痛了？不顶不是中国人。相关日志推荐不成熟男人的十个标志中国大学排行榜2010版金融危机十项注意2008十大网络公敌将从我们生活里 ");
iwriter.addDocument(TestLucene.buildDocument(post));post.setPostId(10002);
post.setTitle("天使的眼神：一个摄影家镜头中的孩子（上）");
post.setContent(" 校内：之前已经有很多关于“天使”的文章了，这些小家伙一举一动往往最能打动我们。今天又收集了一些“天使的眼神”与大家分享，那清澈的眼神是否会直达你的心底，让你的心也一片清澈？另外，由于图片数量较多，就作两期发布，希望大家喜欢……");
iwriter.addDocument(TestLucene.buildDocument(post));post.setPostId(10003);
post.setTitle("冷组总是能出这么伟大的冷笑话");
post.setContent(" 鹅李卡|蘑菇蘑菇分享我的某位友邻说：据说大地震前有三个明显征兆： 1.井水异常；2.牲畜反应异常；3.专家出来辟谣。但是细心网友指出，第二条和第三条重复了。然后底下有人回应说：可能是喝了异常的井水。。。其实专家的嘴就像屁股一样,有图有真相!!!!");
iwriter.addDocument(TestLucene.buildDocument(post));iwriter.close();// Now search the index:
IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
// Parse a simple query that searches for "text":
//lucene的排序方式：默认是按照相关度排序，可以修改boosts的值来影响排序结果。
//1.使用QueryParser的第四个参数，设置字段的权重，影响相关度的值，按相关度排序是默认排序方式
//2.在给文档建索引的时候可以手动设置文档和field的boosts的值，这种也是影响相关度的值
//3.在使用indexSearcher.search()的时候使用第四个参数，设置排序的方式，当有了指定的排序方式之后，相关度排序将不起作用
// 设置查询字段的权重，影响相关度的值，进而影响排序结果
Map<String, Float> boosts = new HashMap<String, Float>();
boosts.put("name", 3.0f);
boosts.put("content", 1.1f);
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[]{"title","content"}, analyzer,boosts);
Query query = parser.parse(keyword);
Sort sort = new Sort();
sort.setSort(new SortField("postId", SortField.INT, false));
ScoreDoc[] hits = isearcher.search(query, null, 1000, sort).scoreDocs;
System.out.println(hits.length);
// Iterate through the results:
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits.doc);
System.out.println(hitDoc.get("postId") + ":" + hitDoc.get("title"));
}
isearcher.close();
directory.close();
}private static Document buildDocument(Post post) {
Document doc = new Document();
doc.add(new Field("postId", String.valueOf(post.getPostId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title", String.valueOf(post.getTitle()), Field.Store.YES,Field.Index.ANALYZED ));
doc.add(new Field("content", String.valueOf(post.getContent()), Field.Store.YES,Field.Index.ANALYZED ));
return doc;
}
}
//Post.java
public class Post {
private int postId;
private String title;
private String content;public int getPostId() {
return postId;
}
public void setPostId(int postId) {
this.postId = postId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}

查看全文

相关阅读:
杂记：Linux下gcc升级
 杂记：OSX下编译安装最新版RedisDesktopMmanager
查漏补缺：Vector中去重
 Mac下使用VScode进行C/C++开发
 添砖加瓦：几种常见的数据摘要算法（MD5、CRC32、SHA1和SHA256）
添砖加瓦：snappy无损压缩算法
 杂记：OSX 安装openssl
码海拾遗：内存四区
 Luogu 4284 [SHOI2014]概率充电器
 Luogu 4473 [国家集训队]飞飞侠

原文地址：https://www.cnblogs.com/weipeng/p/2455158.html