欢迎转载http://www.cnblogs.com/shizhongtao/p/3440479.html
接着上一篇,这里继续搜索,对于搜索和创建一样,首先你要确定搜索位置,然后用规定的类来读取。还要注意一点,确定分词器,因为不同的分词器所创建的分词规则不同。上篇我使用的是默认的分词器,这里我也先不管分词器。为了方便阅读,代码就全部粘上。
1 package com.bing.test; 2 3 import java.io.File; 4 import java.io.FileNotFoundException; 5 import java.io.FileReader; 6 import java.io.IOException; 7 8 import org.apache.lucene.analysis.Analyzer; 9 import org.apache.lucene.analysis.standard.StandardAnalyzer; 10 import org.apache.lucene.document.Document; 11 import org.apache.lucene.document.Field.Store; 12 import org.apache.lucene.document.FieldType; 13 import org.apache.lucene.document.StringField; 14 import org.apache.lucene.document.TextField; 15 import org.apache.lucene.index.DirectoryReader; 16 import org.apache.lucene.index.IndexReader; 17 import org.apache.lucene.index.IndexWriter; 18 import org.apache.lucene.index.IndexWriterConfig; 19 import org.apache.lucene.queryparser.classic.ParseException; 20 import org.apache.lucene.queryparser.classic.QueryParser; 21 import org.apache.lucene.search.IndexSearcher; 22 import org.apache.lucene.search.Query; 23 import org.apache.lucene.search.ScoreDoc; 24 import org.apache.lucene.search.TopDocs; 25 import org.apache.lucene.store.Directory; 26 import org.apache.lucene.store.FSDirectory; 27 import org.apache.lucene.util.Version; 28 29 /** 30 * @author bingyulei 31 * 32 */ 33 public class HelloLucene 34 { 35 36 Directory directory = null; 37 Document doc; 38 IndexWriter writer = null; 39 40 /** 41 * 42 * @param indexWriterPath 43 * 索引创建路径 44 * @param filePath 45 * 读取文件路径 46 */ 47 public void createIndex(String indexWriterPath, String filePath) 48 { 49 50 // 创建indexwriter 51 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);// 设置标准分词器 52 // ,默认是一元分词 53 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45, 54 analyzer);// 设置IndexWriterConfig 55 56 try 57 { 58 // 创建directory 59 // directory=RAMDirectory();//创建在内存中 60 // 创建在硬盘上 61 directory = FSDirectory.open(new File(indexWriterPath));// 打开存放索引的路径 62 writer = new IndexWriter(directory, iwc); 63 64 // 为document添加field 65 addFile(writer, filePath); 66 67 System.out.println("添加成功"); 68 } catch (IOException e) 69 { 70 // TODO Auto-generated catch block 71 e.printStackTrace(); 72 } 73 74 } 75 76 private void addFile(IndexWriter writer, String filePath) 77 { 78 File f = new File(filePath); 79 FieldType ft = new FieldType(); 80 ft.setIndexed(true);// 索引 81 ft.setStored(true);// 存储,数据量比较大,一般都是不鼓励存储,放在索引文件中会把索引文件撑大 82 ft.setTokenized(true); 83 for (File file : f.listFiles()) 84 { 85 try 86 { 87 // 创建Document对象 88 doc = new Document(); 89 // doc.add(new Field("content", new FileReader(file), ft)); 90 doc.add(new TextField("content", new FileReader(file))); 91 doc.add(new TextField("filename", file.getName(), Store.YES)); 92 doc.add(new StringField("path", file.getPath(), Store.YES)); 93 // 添加文档 94 writer.addDocument(doc); 95 writer.commit();// 提交数据 96 } catch (FileNotFoundException e) 97 { 98 // TODO Auto-generated catch block 99 e.printStackTrace(); 100 } catch (IOException e) 101 { 102 // TODO Auto-generated catch block 103 e.printStackTrace(); 104 } 105 106 } 107 } 108 109 /** 110 * 搜索 111 * 112 * @param path 113 * 搜索路径 114 * @param indexReaderPath 115 * 索引存放路径 116 */ 117 public void seacher(String indexReaderPath, String searthText) 118 { 119 IndexReader reader=null; 120 try 121 { 122 directory = FSDirectory.open(new File(indexReaderPath)); 123 // 创建读取索引的reader 124 reader = DirectoryReader.open(directory); 125 // 根据reader创建search 126 IndexSearcher searcher = new IndexSearcher(reader); 127 // 创建查询,第二个参数表示查询的字段名,第三个是分词器 128 QueryParser parser = new QueryParser(Version.LUCENE_45, "content", 129 new StandardAnalyzer(Version.LUCENE_45)); 130 // 搜索包含searthText的内容 131 Query query = parser.parse(searthText); 132 // 搜索返回10条记录 133 TopDocs tds = searcher.search(query, 10); 134 135 //获取scoredoc对象组, 136 ScoreDoc[] sds=tds.scoreDocs; 137 for(ScoreDoc sd:sds){ 138 //获取具体的doc 139 Document doc=searcher.doc(sd.doc); 140 System.out.println(doc.get("filename")+":"+doc.get("path")); 141 } 142 } catch (IOException e) 143 { 144 // TODO Auto-generated catch block 145 e.printStackTrace(); 146 }// 打开存放索引的路径 147 catch (ParseException e) 148 { 149 // TODO Auto-generated catch block 150 e.printStackTrace(); 151 }finally{ 152 if (reader!=null) 153 { 154 try 155 { 156 reader.close(); 157 } catch (IOException e) 158 { 159 // TODO Auto-generated catch block 160 e.printStackTrace(); 161 } 162 } 163 } 164 } 165 }
说明,"D:\lucene\file"是我复制lucene官方文档上的两段话,不过当你创建完索引之后,然后再修改文件内容,新加的内容并不能搜索出来。这个应该很好理解。
然后进行测试:searchTest,就可以得到那个文本文件中有"Changing Similarity"这段字符
package com.bing.test; import org.junit.Test; public class HelloLuceneTest { @Test public void writertest(){ HelloLucene test=new HelloLucene(); test.createIndex("D:\lucene\index","D:\lucene\file"); } @Test public void searchTest(){ HelloLucene test=new HelloLucene(); test.seacher("D:\lucene\index", "Changing Similarity"); } }