- package com.fpi.lucene.studying.myfirstlucene;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.util.Version;
- public class Indexer {
- public static void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException{
- Document doc = null;
- //创建索引。定义索引生成位置
- IndexWriter writer = new IndexWriter(FSDirectory.open(new File("d://test//myindex")),
- new StandardAnalyzer(Version.LUCENE_30),
- true,
- IndexWriter.MaxFieldLength.LIMITED);
- File dir = new File("d://test//mysmall");
- File[] fileList = dir.listFiles();
- for (int i = 0; i < fileList.length; i++) {
- //对于每一个要查找的文件都要新建一个document,理解为一个文件对应着一个document
- doc = new Document();
- //前边的contents随便起 只是一个代号而已。后边的代表把这篇文章做成索引,以供后续查找...
- doc.add(new Field("contents",new FileReader(fileList[i])));
- doc.add(new Field("filename",fileList[i].getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));
- //将文档写入索引
- writer.addDocument(doc);
- }
- // .optimize() Requests an "optimize" operation on an index, priming the index for the fastest available search.
- // 请求一个“optimize”上的索引操作,启动了最快的搜索索引。
- writer.optimize();
- writer.close();
- }
- public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {
- createIndex();
- }
- }
建立索引。。。。^
- package com.fpi.lucene.studying.myfirstlucene;
- import java.io.File;
- import java.io.IOException;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopScoreDocCollector;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class Searcher {
- // 关键字,要搜查的对象
- public static String key_word = "all";
- public static String field = "contents";
- public static void search() throws CorruptIndexException, IOException, ParseException{
- //打开索引所在地
- IndexSearcher sr = new IndexSearcher(FSDirectory.open(new File("d://test//myindex")),true);
- //词法分析器
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
- //解析器
- QueryParser parser = new QueryParser(Version.LUCENE_30,field,analyzer);
- //根据关键字查询
- Query query = parser.parse(key_word);
- TopScoreDocCollector collector = TopScoreDocCollector.create(100, false);
- //将所搜寻出来的结果以特定的形式放在collector中
- sr.search(query, collector);
- /**
- * topDocs():Returns the top docs that were collected by this collector.
- * 返回的是由这个collector收集的顶级文档。
- * .scoreDocs():The top hits for the query.
- * 用于查询的最高命中。
- */
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- System.out.println("搜索到符合标准的文档数目:"+hits.length);
- for (int i = 0; i < hits.length; i++) {
- Document doc = sr.doc(hits[i].doc); //依此遍历符合标准的文档
- System.out.println(doc.getField("filename")+" ---- "+hits[i].toString()+" ---- ");
- }
- System.out.println("you have " + collector.getTotalHits() +
- " file matched query '" + key_word + "':");
- }
- public static void main(String[] args) {
- try {
- search();
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (ParseException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
查找部分。^