zoukankan html css js c++ java

lucene4入门（2）搜索

欢迎转载http://www.cnblogs.com/shizhongtao/p/3440479.html

接着上一篇，这里继续搜索，对于搜索和创建一样，首先你要确定搜索位置，然后用规定的类来读取。还要注意一点，确定分词器，因为不同的分词器所创建的分词规则不同。上篇我使用的是默认的分词器，这里我也先不管分词器。为了方便阅读，代码就全部粘上。

  1 package com.bing.test;
  2 
  3 import java.io.File;
  4 import java.io.FileNotFoundException;
  5 import java.io.FileReader;
  6 import java.io.IOException;
  7 
  8 import org.apache.lucene.analysis.Analyzer;
  9 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 10 import org.apache.lucene.document.Document;
 11 import org.apache.lucene.document.Field.Store;
 12 import org.apache.lucene.document.FieldType;
 13 import org.apache.lucene.document.StringField;
 14 import org.apache.lucene.document.TextField;
 15 import org.apache.lucene.index.DirectoryReader;
 16 import org.apache.lucene.index.IndexReader;
 17 import org.apache.lucene.index.IndexWriter;
 18 import org.apache.lucene.index.IndexWriterConfig;
 19 import org.apache.lucene.queryparser.classic.ParseException;
 20 import org.apache.lucene.queryparser.classic.QueryParser;
 21 import org.apache.lucene.search.IndexSearcher;
 22 import org.apache.lucene.search.Query;
 23 import org.apache.lucene.search.ScoreDoc;
 24 import org.apache.lucene.search.TopDocs;
 25 import org.apache.lucene.store.Directory;
 26 import org.apache.lucene.store.FSDirectory;
 27 import org.apache.lucene.util.Version;
 28 
 29 /**
 30  * @author bingyulei
 31  * 
 32  */
 33 public class HelloLucene
 34 {
 35 
 36     Directory directory = null;
 37     Document doc;
 38     IndexWriter writer = null;
 39 
 40     /**
 41      * 
 42      * @param indexWriterPath
 43      *            索引创建路径
 44      * @param filePath
 45      *            读取文件路径
 46      */
 47     public void createIndex(String indexWriterPath, String filePath)
 48     {
 49 
 50         // 创建indexwriter
 51         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);// 设置标准分词器
 52                                                                     // ,默认是一元分词
 53         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45,
 54                 analyzer);// 设置IndexWriterConfig
 55 
 56         try
 57         {
 58             // 创建directory
 59             // directory=RAMDirectory();//创建在内存中
 60             // 创建在硬盘上
 61             directory = FSDirectory.open(new File(indexWriterPath));// 打开存放索引的路径
 62             writer = new IndexWriter(directory, iwc);
 63 
 64             // 为document添加field
 65             addFile(writer, filePath);
 66 
 67             System.out.println("添加成功");
 68         } catch (IOException e)
 69         {
 70             // TODO Auto-generated catch block
 71             e.printStackTrace();
 72         }
 73 
 74     }
 75 
 76     private void addFile(IndexWriter writer, String filePath)
 77     {
 78         File f = new File(filePath);
 79         FieldType ft = new FieldType();
 80         ft.setIndexed(true);// 索引
 81         ft.setStored(true);// 存储，数据量比较大，一般都是不鼓励存储，放在索引文件中会把索引文件撑大
 82         ft.setTokenized(true);
 83         for (File file : f.listFiles())
 84         {
 85             try
 86             {
 87                 // 创建Document对象
 88                 doc = new Document();
 89                 // doc.add(new Field("content", new FileReader(file), ft));
 90                 doc.add(new TextField("content", new FileReader(file)));
 91                 doc.add(new TextField("filename", file.getName(), Store.YES));
 92                 doc.add(new StringField("path", file.getPath(), Store.YES));
 93                 // 添加文档
 94                 writer.addDocument(doc);
 95                 writer.commit();// 提交数据
 96             } catch (FileNotFoundException e)
 97             {
 98                 // TODO Auto-generated catch block
 99                 e.printStackTrace();
100             } catch (IOException e)
101             {
102                 // TODO Auto-generated catch block
103                 e.printStackTrace();
104             }
105 
106         }
107     }
108 
109     /**
110      * 搜索
111      * 
112      * @param path
113      *            搜索路径
114      * @param indexReaderPath
115      *            索引存放路径
116      */
117     public void seacher(String indexReaderPath, String searthText)
118     {
119         IndexReader reader=null;
120         try
121         {
122             directory = FSDirectory.open(new File(indexReaderPath));
123             // 创建读取索引的reader
124              reader = DirectoryReader.open(directory);
125             // 根据reader创建search
126             IndexSearcher searcher = new IndexSearcher(reader);
127             // 创建查询,第二个参数表示查询的字段名，第三个是分词器
128             QueryParser parser = new QueryParser(Version.LUCENE_45, "content",
129                     new StandardAnalyzer(Version.LUCENE_45));
130             // 搜索包含searthText的内容
131             Query query = parser.parse(searthText);
132             // 搜索返回10条记录
133             TopDocs tds = searcher.search(query, 10);
134             
135              //获取scoredoc对象组，
136              ScoreDoc[] sds=tds.scoreDocs;
137              for(ScoreDoc sd:sds){
138                  //获取具体的doc
139                  Document doc=searcher.doc(sd.doc);
140                  System.out.println(doc.get("filename")+":"+doc.get("path"));
141              }
142         } catch (IOException e)
143         {
144             // TODO Auto-generated catch block
145             e.printStackTrace();
146         }// 打开存放索引的路径
147         catch (ParseException e)
148         {
149             // TODO Auto-generated catch block
150             e.printStackTrace();
151         }finally{
152             if (reader!=null)
153             {
154                 try
155                 {
156                     reader.close();
157                 } catch (IOException e)
158                 {
159                     // TODO Auto-generated catch block
160                     e.printStackTrace();
161                 }
162             }
163         }
164     }
165 }

View Code

说明，"D:\lucene\file"是我复制lucene官方文档上的两段话，不过当你创建完索引之后，然后再修改文件内容，新加的内容并不能搜索出来。这个应该很好理解。

然后进行测试：searchTest,就可以得到那个文本文件中有"Changing Similarity"这段字符

package com.bing.test;

import org.junit.Test;

public class HelloLuceneTest
{
    @Test
    public void writertest(){
        HelloLucene test=new HelloLucene();
        test.createIndex("D:\lucene\index","D:\lucene\file");
    }
    @Test
    public void searchTest(){
        HelloLucene test=new HelloLucene();
        test.seacher("D:\lucene\index", "Changing Similarity");
    }
}

查看全文

相关阅读:
正则表达式在NLP中应用
 spring boot中异常：Error resolving template "xxx", template might not exist or might not be accessible...解决办法
 毕业设计6
毕业设计5
毕业设计4
毕业设计3
毕业设计2
毕业设计1
支付宝架构
 Javaee应用架构

原文地址：https://www.cnblogs.com/shizhongtao/p/3440479.html

最新文章
bernoulli数
 斯特林数学习
 kubernetes安装
 kubernetes基础
 shell基本使用
 协同过滤算法预测和推荐
 Elasticsearch为啥这么快
 Elasticsearch基础
 vue.js安装
 DOM 实例