zoukankan html css js c++ java

lucene4入门（1）

欢迎转载http://www.cnblogs.com/shizhongtao/p/3440325.html

lucene你可以理解为一种数据库，他是全文搜索的一种引擎。

1.首先去官网download最新的jar包，我下载的是4.5版本的，当然你也可以使用maven来下载，

2.新建项目，并把lucene-core-4.5.1.jar加入到项目中，其他需要的分词器等jar包，可以用的时候加入就可以。因为是入门创建java project就可以了。

3.lucene中主要分为三部分，分别是索引部分、分词部分、搜索部分。

索引部分：可以理解像字典中前面的查找索引
分词部分：就是将内容进行拆分，比如“我是好人”，这个词我们怎么去分词。“我”，“好人”，“人”等。
搜索部分：就是如何去查找了。

4.创建索引，因为lucene的最近的升级都是不兼容升级，编写代码时候一定写清版本号。

 1 import java.io.File;
 2 import java.io.IOException;
 3 
 4 import org.apache.lucene.analysis.Analyzer;
 5 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 6 import org.apache.lucene.document.Document;
 7 import org.apache.lucene.document.Field.Store;
 8 import org.apache.lucene.document.StringField;
 9 import org.apache.lucene.index.IndexWriter;
10 import org.apache.lucene.index.IndexWriterConfig;
11 import org.apache.lucene.store.Directory;
12 import org.apache.lucene.store.FSDirectory;
13 import org.apache.lucene.util.Version;
14 
15 /**
16  * @author bingyulei
17  *
18  */
19 public class HelloLucene
20 {
21     /**
22      *  建立索引
23      */
24     public void createIndex(String indexWriterPath){
25         // 创建directory
26         Directory directory=null;
27         // 创建indexwriter
28          Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_45);//设置标准分词器 ,默认是一元分词  
29          IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_45, analyzer);//设置IndexWriterConfig  
30          IndexWriter writer=null;
31         
32              try
33             {
34                  directory=    FSDirectory.open(new File(indexWriterPath));//打开存放索引的路径 
35                 writer=new IndexWriter(directory, iwc);
36                 // 创建Document对象
37                  Document doc=new Document(); 
38                 //为document添加field
39                     doc.add(new StringField("id", "1", Store.YES));//存储  
40                     doc.add(new StringField("name", "hello", Store.YES));//存储  
41                     doc.add(new StringField("content", "hello world!", Store.YES));//存储  
42                     //通过IndexWriter添加文档
43                     writer.addDocument(doc);
44                     writer.commit();//提交数据  
45                     System.out.println("添加成功");
46             } catch (IOException e)
47             {
48                 // TODO Auto-generated catch block
49                 e.printStackTrace();
50             }  
51         
52     }
53 }

View Code

5.然后测试代码

1 public class HelloLuceneTest
2 {
3     @Test
4     public void test(){
5         HelloLucene test=new HelloLucene();
6         test.createIndex("D:\lucene\index");
7     }
8 }

6.如果想要把电脑的文件假如索引，简单文档的话可以这样写。下图是文件

java代码：

  1 package com.bing.test;
  2 
  3 import java.io.File;
  4 import java.io.FileNotFoundException;
  5 import java.io.FileReader;
  6 import java.io.IOException;
  7 
  8 import org.apache.lucene.analysis.Analyzer;
  9 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 10 import org.apache.lucene.document.Document;
 11 import org.apache.lucene.document.Field.Store;
 12 import org.apache.lucene.document.FieldType;
 13 import org.apache.lucene.document.StringField;
 14 import org.apache.lucene.document.TextField;
 15 import org.apache.lucene.index.IndexWriter;
 16 import org.apache.lucene.index.IndexWriterConfig;
 17 import org.apache.lucene.store.Directory;
 18 import org.apache.lucene.store.FSDirectory;
 19 import org.apache.lucene.store.RAMDirectory;
 20 import org.apache.lucene.util.Version;
 21 
 22 
 23 /**
 24  * @author bingyulei
 25  * 
 26  */
 27 public class HelloLucene
 28 {
 29 
 30     Directory directory = null;
 31     Document doc;
 32     IndexWriter writer = null;
 33 
 34     /**
 35      * 
 36      * @param indexWriterPath 索引创建路径
 37      * @param filePath 读取文件路径
 38      */
 39     public void createIndex(String indexWriterPath, String filePath)
 40     {
 41 
 42         // 创建indexwriter
 43         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);// 设置标准分词器
 44                                                                     // ,默认是一元分词
 45         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45,
 46                 analyzer);// 设置IndexWriterConfig
 47 
 48         try
 49         {
 50             // 创建directory
 51             //directory=RAMDirectory();//创建在内存中
 52             //创建在硬盘上
 53             directory = FSDirectory.open(new File(indexWriterPath));// 打开存放索引的路径
 54             writer = new IndexWriter(directory, iwc);
 55             
 56             // 为document添加field
 57             addFile(writer,filePath);
 58             
 59             System.out.println("添加成功");
 60         } catch (IOException e)
 61         {
 62             // TODO Auto-generated catch block
 63             e.printStackTrace();
 64         }
 65 
 66     }
 67 
 68     private void addFile(IndexWriter writer,String filePath)
 69     {
 70         File f = new File(filePath);
 71         FieldType ft = new FieldType();
 72         ft.setIndexed(true);//索引
 73         ft.setStored(true);//存储，数据量比较大，一般都是不鼓励存储，放在索引文件中会把索引文件撑大
 74         ft.setTokenized(true);
 75         for (File file : f.listFiles())
 76         {
 77             try
 78             {
 79                 // 创建Document对象
 80                 doc = new Document();
 81                 //doc.add(new Field("content", new FileReader(file), ft));
 82                 doc.add(new TextField("content",new FileReader(file)));// 这个方法默认的Store的属性是NO
 83                 doc.add(new TextField("filename",file.getName(),Store.YES));
 84                 doc.add(new StringField("path", file.getPath(), Store.YES));
 85                 //添加文档
 86                 writer.addDocument(doc);
 87                 writer.commit();// 提交数据
 88             } catch (FileNotFoundException e)
 89             {
 90                 // TODO Auto-generated catch block
 91                 e.printStackTrace();
 92             } catch (IOException e)
 93             {
 94                 // TODO Auto-generated catch block
 95                 e.printStackTrace();
 96             }
 97             
 98         }
 99     }
100 }

View Code

测试代码：

 1 package com.bing.test;
 2 
 3 import org.junit.Test;
 4 
 5 public class HelloLuceneTest
 6 {
 7     @Test
 8     public void test(){
 9         HelloLucene test=new HelloLucene();
10         test.createIndex("D:\lucene\index","D:\lucene\file");
11     }
12 }

查看全文

相关阅读:
Codeforces Round #353 (Div. 2)
Codeforces Round #304 (Div. 2)
Codeforces Round #250 (Div. 2)D
Codeforces Round #368 (Div. 2)
hdu4348区间更新的主席树+标记永久化
 poj3468线段树标记永久化
 Educational Codeforces Round 35 (Rated for Div. 2)
一维数组取一部分
 序列化数组。
禅道常识

原文地址：https://www.cnblogs.com/shizhongtao/p/3440325.html