在这里给大家来一些关于lucene in action的一些东东
你可以到:http://lucene.apache.org/ 了解更新,更全的关于lucene的信息。
下面我做的demo,分享给大家:
项目结构:
运行Index时,Run Configurations:
[两个参数之间有空格]
args[0] = "C:\Users\hongjie\Desktop\hongten_temp\hongten_index";
args[1] = "C:\Users\hongjie\Desktop\hongten_temp\lucene-3.5.0";
运行效果:
运行Searcher时,Run Configurations:
[两个参数之间有空格]
args[0] = "C:\Users\hongjie\Desktop\hongten_temp\hongten_index";
args[1] = "cnhances";
运行效果:
============================================================
代码部分:
============================================================
/lucene/src/com/b510/index/Indexer.java
1 /** 2 * 3 */ 4 package com.b510.index; 5 6 import java.io.File; 7 import java.io.FileFilter; 8 import java.io.FileReader; 9 10 import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 import org.apache.lucene.document.Document; 12 import org.apache.lucene.document.Field; 13 import org.apache.lucene.index.IndexWriter; 14 import org.apache.lucene.store.Directory; 15 import org.apache.lucene.store.FSDirectory; 16 import org.apache.lucene.util.Version; 17 18 /** 19 * 建立索引 20 * 21 * @author hongten(hongtenzone@foxmail.com)<br> 22 * @date 2013-4-5 23 */ 24 public class Indexer { 25 26 public static void main(String[] args) throws Exception { 27 if (args.length != 2) { 28 throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir><data dir>"); 29 } 30 // 在指定目录创建索引 31 String indexDir = args[0]; 32 String dataDir = args[1]; 33 34 long start = System.currentTimeMillis(); 35 Indexer index = new Indexer(indexDir); 36 int numIndexed; 37 try { 38 numIndexed = index.index(dataDir, new TextFilesFilter()); 39 } finally { 40 index.close(); 41 } 42 long end = System.currentTimeMillis(); 43 44 System.out.println("indexing " + numIndexed + " files took " + (end - start) + " millseconds!"); 45 } 46 47 private IndexWriter writer; 48 49 @SuppressWarnings("deprecation") 50 public Indexer(String indexDir) throws Exception { 51 Directory dir = FSDirectory.open(new File(indexDir)); 52 // create lucene index writer 53 writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_35), true, IndexWriter.MaxFieldLength.UNLIMITED); 54 } 55 56 /** 57 * close the IndexWriter 58 * 59 * @throws Exception 60 */ 61 public void close() throws Exception { 62 writer.close(); 63 } 64 65 /** 66 * 返回被索引文件数 67 * 68 * @param dataDir 69 * 文件目录 70 * @param filter 71 * @return 72 * @throws Exception 73 */ 74 public int index(String dataDir, FileFilter filter) throws Exception { 75 File[] files = new File(dataDir).listFiles(); 76 77 for (File f : files) { 78 if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { 79 indexFile(f); 80 } 81 } 82 // 返回被索引的文档数目 83 return writer.numDocs(); 84 } 85 86 /** 87 * 向lucene中添加文档 88 * 89 * @param f 90 * 文档 91 * @throws Exception 92 */ 93 private void indexFile(File f) throws Exception { 94 System.out.println("Indexing " + f.getCanonicalPath()); 95 Document doc = getDocument(f); 96 writer.addDocument(doc); 97 } 98 99 /** 100 * 添加索引 101 * 102 * @param f 103 * 被索引的文件 104 * @return 105 * @throws Exception 106 */ 107 protected Document getDocument(File f) throws Exception { 108 Document doc = new Document(); 109 doc.add(new Field("contents", new FileReader(f))); 110 doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); 111 doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); 112 return doc; 113 } 114 115 /** 116 * 只索引.txt文件,采用FileFilter 117 * 118 * @author hongten(hongtenzone@foxmail.com)<br> 119 * @date 2013-4-5 120 */ 121 private static class TextFilesFilter implements FileFilter { 122 @Override 123 public boolean accept(File pathname) { 124 return pathname.getName().toLowerCase().endsWith(".txt"); 125 } 126 } 127 128 }
/lucene/src/com/b510/search/Searcher.java
1 /** 2 * 3 */ 4 package com.b510.search; 5 6 import java.io.File; 7 import java.io.IOException; 8 9 import org.apache.lucene.analysis.standard.StandardAnalyzer; 10 import org.apache.lucene.document.Document; 11 import org.apache.lucene.queryParser.ParseException; 12 import org.apache.lucene.queryParser.QueryParser; 13 import org.apache.lucene.search.IndexSearcher; 14 import org.apache.lucene.search.Query; 15 import org.apache.lucene.search.ScoreDoc; 16 import org.apache.lucene.search.TopDocs; 17 import org.apache.lucene.store.Directory; 18 import org.apache.lucene.store.FSDirectory; 19 import org.apache.lucene.util.Version; 20 21 import com.b510.index.Indexer; 22 23 /** 24 * 搜索功能 25 * @author hongten(hongtenzone@foxmail.com)<br> 26 * @date 2013-4-5 27 */ 28 public class Searcher { 29 30 public static void main(String[] args) throws IllegalArgumentException, IOException, ParseException { 31 if (args.length != 2) { 32 throw new IllegalArgumentException("Usage: java " + Indexer.class.getName() + " <index dir><data dir>"); 33 } 34 // 在指定目录创建索引 35 String indexDir = args[0]; 36 String q = args[1]; 37 38 search(indexDir, q); 39 } 40 41 /** 42 * 搜索 43 * @param indexDir 搜索目录 44 * @param q 关键字 45 * @throws IOException 46 * @throws ParseException 47 */ 48 public static void search(String indexDir, String q) throws IOException, ParseException { 49 Directory dir = FSDirectory.open(new File(indexDir)); 50 IndexSearcher is = new IndexSearcher(dir); 51 52 QueryParser parser = new QueryParser(Version.LUCENE_35,"contents",new StandardAnalyzer(Version.LUCENE_35)); 53 Query query = parser.parse(q); 54 long start = System.currentTimeMillis(); 55 TopDocs hits = is.search(query, 10); 56 long end = System.currentTimeMillis(); 57 58 System.out.println("Found "+hits.totalHits + " document(s) (in " + (end - start) +" millsecondes) that matched query '"+ q+"':"); 59 60 for(ScoreDoc scoreDoc : hits.scoreDocs){ 61 Document doc = is.doc(scoreDoc.doc); 62 System.out.println(doc.get("fullpath")); 63 } 64 65 is.close(); 66 } 67 }