package cn.tz.lucene; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; public class IndexSearchTest { @Test public void testIndexSearch() throws Exception{ //创建分词器 //Analyzer analyzer=new StandardAnalyzer(); Analyzer analyzer=new IKAnalyzer(); FSDirectory directory=FSDirectory.open(new File("d:\lucene")); //创建索引和文档的读对象 IndexReader reader=IndexReader.open(directory); //创建索引的搜索对象 IndexSearcher indexSearcher=new IndexSearcher(reader); //创建查询对象 //第一个参数:默认搜索域,没有指定搜索域时才使用的 QueryParser queryParser= new QueryParser("fileName",analyzer); //格式: 域名:搜索关键词 //Query query = queryParser.parse("fileName:apache"); Query query = queryParser.parse("fileName:not exit"); //第一个参数:查询语句对象 第二个参数:显示的数据条数 TopDocs topDocs = indexSearcher.search(query,5); System.out.println("***** 一共有"+topDocs.totalHits+" 条记录 *****"); //从搜索的结果中获取结果集 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc:scoreDocs){ //获取文档id int docId = scoreDoc.doc; //通过文档id从硬盘中读取对应得文件 Document doc = reader.document(docId); System.out.println("fileName:"+doc.get("fileName")); System.out.println("fileSize:"+doc.get("fileSize")); System.out.println("=================================="); } reader.close(); } /** * 使用TermQuery不需要分词器:它是分词后进行查询 * @throws Exception */ @Test public void testTermQuery() throws Exception{ Analyzer analyzer=new IKAnalyzer(); FSDirectory dir=FSDirectory.open(new File("d:\lucene")); //读对象 IndexReader reader=IndexReader.open(dir); //查询对象 Term term=new Term("fileName","apache"); Query query=new TermQuery(term); //搜索对象 IndexSearcher searcher=new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, 10); System.out.println("总条数: "+topDocs.totalHits); //从查询结果中获取结果集 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc:scoreDocs){ //获取文档ID int docID = scoreDoc.doc; //根据文档ID获取文档 Document document = reader.document(docID); System.out.println("文件名: "+document.get("fileName")); System.out.println("文件大小 : "+document.get("fileSize")); System.out.println("======================================"); } reader.close(); } /** * NumericRangeQuery: * 用于数字范围的查询 * 注意:只针对数字类型的Field域才可以进行检索 * 例如:LongFeild,FloatFeild... * @throws Exception */ @Test public void testNumericRangeQuery() throws Exception{ Analyzer analyzer=new IKAnalyzer(); //数据源 FSDirectory dir=FSDirectory.open(new File("d:\lucene")); IndexReader reader=IndexReader.open(dir); IndexSearcher search=new IndexSearcher(reader); //创建query对象 //参数:域名 最小值 最大值 是否包含最小值 是否包含最大值 NumericRangeQuery query=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true); TopDocs topDocs = search.search(query, 10); System.out.println("文件数量: "+topDocs.totalHits); //将查询结果转为结果集 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc:scoreDocs){ //获取文档ID int docID = scoreDoc.doc; //根据文档ID获取文档 Document doc = reader.document(docID); System.out.println("文件名称: "+doc.get("fileName")); System.out.println("文件大小: "+doc.get("fileSize")); System.out.println("========================="); } reader.close(); } /** * BooleanQuery:用于多个条件(组合)查询 * */ @Test public void testBooleanQuery() throws Exception{ FSDirectory dir=FSDirectory.open(new File("d:\lucene")); IndexReader reader=IndexReader.open(dir); IndexSearcher searcher=new IndexSearcher(reader); TermQuery termQuery=new TermQuery(new Term("fileName","apache")); NumericRangeQuery numericRangeQuery=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true); BooleanQuery booleanQuery=new BooleanQuery(); //Occur: //MUST:and //MUST_NOT:not //Should:or //查询文件名字包含有apache,文件大小在100-1000bit之内的 booleanQuery.add(termQuery, Occur.MUST); booleanQuery.add(numericRangeQuery,Occur.MUST); TopDocs topDocs=searcher.search(booleanQuery, 10); System.out.println("文件数量 : "+topDocs.totalHits); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for(ScoreDoc scoreDoc:scoreDocs){ int docId = scoreDoc.doc; Document document = reader.document(docId); System.out.println("文件名称: "+document.get("fileName")); System.out.println("文件大小: "+document.get("fileSize")); System.out.println("=============================="); } } /** * MultiFieldQueryParser:从多个域进行查询 * */ @Test public void testMultiFieldQueryParser() throws Exception{ Analyzer analyzer=new IKAnalyzer(); FSDirectory directory=FSDirectory.open(new File("d:\lucene")); IndexReader reader=IndexReader.open(directory); IndexSearcher searcher=new IndexSearcher(reader); //需求:查询文件名称和文件内容中包含有"apache"的内容 //从fileName、fileContent域中进行查询 String[] fields={"fileName","fileContent"}; MultiFieldQueryParser multiQueryParser=new MultiFieldQueryParser(fields, analyzer); Query query = multiQueryParser.parse("apache"); TopDocs topDocs=searcher.search(query, 5); System.out.println("总记录数: "+topDocs.totalHits); //根据查询结果返回结果集,并遍历 for(ScoreDoc scoreDoc:topDocs.scoreDocs){ int docId = scoreDoc.doc; Document doc = reader.document(docId); System.out.println("文档名称:"+doc.get("fileName")); System.out.println("文档大小:"+doc.get("fileSize")); System.out.println("==============================="); } } }