zoukankan      html  css  js  c++  java
  • lucene(全文搜索)_根据内容建立索引_源码下载

    在我们的开发过程中,会遇到这样的情况:

    给出下面的信息,让我们进行建立索引,并且进行搜索信息

    这个时候,我们应该怎样处理呢?

    要实现这样的功能,其实使用lucene会变得简单很多!!

    ========================================

    项目结构:

    运行

    1 @Test
    2     public void testIndex(){
    3         LuceneUtil util = new LuceneUtil();
    4         util.index();
    5     }

    效果:

    运行

    1 @Test
    2     public void testQuery(){
    3         LuceneUtil util = new LuceneUtil();
    4         util.query();
    5     }

    效果:

    =========================================================

    代码部分:

    =========================================================

    /lucene_0200_index/src/com/b510/lucene/util/LuceneUtil.java

      1 /**
      2  * 
      3  */
      4 package com.b510.lucene.util;
      5 
      6 import java.io.File;
      7 import java.io.IOException;
      8 
      9 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     10 import org.apache.lucene.document.Document;
     11 import org.apache.lucene.document.Field;
     12 import org.apache.lucene.index.CorruptIndexException;
     13 import org.apache.lucene.index.IndexReader;
     14 import org.apache.lucene.index.IndexWriter;
     15 import org.apache.lucene.index.IndexWriterConfig;
     16 import org.apache.lucene.store.Directory;
     17 import org.apache.lucene.store.FSDirectory;
     18 import org.apache.lucene.store.LockObtainFailedException;
     19 import org.apache.lucene.util.Version;
     20 
     21 /**
     22  * @author Hongten <br />
     23  * @date 2013-1-31
     24  */
     25 public class LuceneUtil {
     26 
     27     /**
     28      * 邮件id
     29      */
     30     private String[] ids = { "1", "2", "3", "4", "5", "6" };
     31     /**
     32      * 邮箱
     33      */
     34     private String[] emails = { "aa@sina.com", "bb@foxmail.com", "cc@qq.com",
     35             "dd@163.com", "ee@gmail.com", "ff@sina.com" };
     36     /**
     37      * 邮件内容
     38      */
     39     private String[] contents = { "hello,aa", "hello,bb", "hello,cc",
     40             "hello,dd", "hello,ee", "hello,ff" };
     41     /**
     42      * 邮件的附件
     43      */
     44     private int[] attachs = { 1, 5, 3, 4, 2, 6 };
     45     /**
     46      * 收件人的名称
     47      */
     48     private String[] names = { "hongten", "hanyuan", "Devide", "Tom", "Steven",
     49             "Shala" };
     50 
     51     private Directory directory = null;
     52 
     53     public LuceneUtil() {
     54         try {
     55             directory = FSDirectory.open(new File(
     56                     "D:/WordPlace/lucene/lucene_0200_index/lucene/index"));
     57         } catch (IOException e) {
     58             e.printStackTrace();
     59         }
     60     }
     61 
     62     /**
     63      * 创建索引
     64      */
     65     public void index() {
     66         IndexWriter writer = null;
     67         try {
     68             writer = new IndexWriter(directory, new IndexWriterConfig(
     69                     Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
     70             // 创建文档
     71             Document document = null;
     72             for (int i = 0; i < ids.length; i++) {
     73                 // Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,标题可以是这种方式存储
     74                 // Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档
     75                 // ==============================
     76                 // Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等
     77                 // Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,ID等,适用于精确搜索
     78                 // Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,这个norms中包括了创建索引的时间和权值等信息
     79                 // Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)
     80                 // Field.Index.NO:不进行分词
     81                 document = new Document();
     82                 document.add(new Field("id", ids[i], Field.Store.YES,
     83                         Field.Index.NOT_ANALYZED_NO_NORMS));
     84                 document.add(new Field("email", emails[i], Field.Store.YES,
     85                         Field.Index.NOT_ANALYZED));
     86                 document.add(new Field("content", contents[i], Field.Store.YES,
     87                         Field.Index.ANALYZED));
     88                 // document.add(new
     89                 // Field("attach",attachs[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
     90                 document.add(new Field("name", names[i], Field.Store.YES,
     91                         Field.Index.NOT_ANALYZED_NO_NORMS));
     92                 writer.addDocument(document);
     93             }
     94         } catch (CorruptIndexException e) {
     95             e.printStackTrace();
     96         } catch (LockObtainFailedException e) {
     97             e.printStackTrace();
     98         } catch (IOException e) {
     99             e.printStackTrace();
    100         } finally {
    101             if (writer != null) {
    102                 try {
    103                     writer.close();
    104                 } catch (CorruptIndexException e) {
    105                     e.printStackTrace();
    106                 } catch (IOException e) {
    107                     e.printStackTrace();
    108                 }
    109             }
    110         }
    111     }
    112 
    113     /**
    114      * 查询索引
    115      */
    116     public void query() {
    117         try {
    118             IndexReader reader = IndexReader.open(directory);
    119             System.out.println("文档数目:" + reader.numDocs());
    120             System.out.println("文档总数:" + reader.maxDoc());
    121         } catch (CorruptIndexException e) {
    122             e.printStackTrace();
    123         } catch (IOException e) {
    124             e.printStackTrace();
    125         }
    126     }
    127 }

    /lucene_0200_index/src/com/b510/lucene/test/IndexTest.java

    /**
     * 
     */
    package com.b510.lucene.test;
    
    import org.junit.Test;
    
    import com.b510.lucene.util.LuceneUtil;
    
    /**
     * @author Hongten <br />
     * @date 2013-1-31
     */
    public class IndexTest {
    
        @Test
        public void testIndex(){
            LuceneUtil util = new LuceneUtil();
            util.index();
        }
        
        @Test
        public void testQuery(){
            LuceneUtil util = new LuceneUtil();
            util.query();
        }
    }

     项目源码:https://files.cnblogs.com/hongten/lucene_0200_index.zip

    I'm Hongten

  • 相关阅读:
    PHP加速器
    sublime text3-代码片段配置
    CI源码引用使用--php引用demo,静态变量和引用关系
    配置nginx1.7.8支持pathinfo模式
    php多线程即时通讯
    linux上配置subversion服务器端安装配置并使用svn,windows本地检出,设置同步更新服务器的钩子
    time返回当前的 Unix 时间戳而$_SERVER["REQUEST_TIME"]得到请求开始时的时间戳
    yum命令学习
    linux自定义开机启动服务
    闲与忙
  • 原文地址:https://www.cnblogs.com/hongten/p/hongten_lucene_index.html
Copyright © 2011-2022 走看看