Lucene:
1. 举例: Eclipse的帮助;
2. 建立索引:只能为文本类型的数据建立索引
html(去掉标签), pdf等可以使用相应的工具转换为文本;
3. 原理:字典
key ---> recordNum
ab ---> 5,7,10
Document;
5
7
10
4. 使用:
4.1 建立java Project;
4.2 添加*.jar;[没有配置文件]目前为2.4版;
核心;分词器;高亮器;
4.3 写代码测试;
FirstTest
StringindexPath = "c:/luceneDemoIndex/";
Analyzeranalyzer = new StandardAnalyer();
@Test
testCreateIndex(){
Stringtitle = "xxx";
Stringcontent = "yyy";
//1.建立索引
MaxFieldLengthmaxFieldLength = MaxFieldLength.LIMITED;
//如果索引库不存在,则创建;
IndexWriterindexWriter = new IndexWriter(indexPath, analyzer,maxFieldLength);
//文档,字段
Documentdoc = new Document();
doc.add(newField("title", title, Store.YES, Index.ANALYZED));
doc.add(newField("content", content, Store.YES, Index.ANALYZED));
indexWriter.addDocument(doc);
//使用完一定要关闭
indexWriter.close();
}
voidtestSearch() {
StringqueryString ="document";
IndexSearcherindexSearcher = new IndexSearcher(indexPath);
StringdefaultFieldName = "content";
QueryParserqueryParser = new QueryParser(defaultFieldName, analyzer);
Query query =queryParser.parse(queryString); //查询条件
Filter filter =null; //过滤条件
int nDocs =100; //返回匹配的数目
//返回结果
TopDocstopDocs = indexSearcher.search(query, filter, nDocs);
System.out.println("共有【"+topDocs.totalHits+"条匹配记录");
List<Document>docs = newArrayList<Document>();
for( ScoreDoc scoreDoc : topDocs.scoreDocs ) {
intdocNum = scoreDoc.doc; //文档在索引库中的编号
Documentdoc = indexSearcher.doc(docNum); // 通过编号取出相应的文档
doc.add(doc);
}