zoukankan      html  css  js  c++  java
  • lucene对日期(date)和整形(int)处理

    项目结构:

    运行效果:

    ==========================================

    代码部分:

    ==========================================

    /lucene_0400_dateAndInt/src/com/b510/lucene/util/LuceneUtil.java

      1 /**
      2  * 
      3  */
      4 package com.b510.lucene.util;
      5 
      6 import java.io.File;
      7 import java.io.IOException;
      8 import java.text.ParseException;
      9 import java.text.SimpleDateFormat;
     10 import java.util.Date;
     11 import java.util.HashMap;
     12 import java.util.Map;
     13 
     14 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     15 import org.apache.lucene.document.Document;
     16 import org.apache.lucene.document.Field;
     17 import org.apache.lucene.document.NumericField;
     18 import org.apache.lucene.index.CorruptIndexException;
     19 import org.apache.lucene.index.IndexReader;
     20 import org.apache.lucene.index.IndexWriter;
     21 import org.apache.lucene.index.IndexWriterConfig;
     22 import org.apache.lucene.index.Term;
     23 import org.apache.lucene.search.IndexSearcher;
     24 import org.apache.lucene.search.ScoreDoc;
     25 import org.apache.lucene.search.TermQuery;
     26 import org.apache.lucene.search.TopDocs;
     27 import org.apache.lucene.store.Directory;
     28 import org.apache.lucene.store.FSDirectory;
     29 import org.apache.lucene.store.LockObtainFailedException;
     30 import org.apache.lucene.util.Version;
     31 
     32 /**
     33  * @author Hongten <br />
     34  * @date 2013-1-31
     35  */
     36 public class LuceneUtil {
     37 
     38     /**
     39      * 邮件id
     40      */
     41     private String[] ids = { "1", "2", "3", "4", "5", "6" };
     42     /**
     43      * 邮箱
     44      */
     45     private String[] emails = { "aa@sina.com", "bb@foxmail.com", "cc@qq.com",
     46             "dd@163.com", "ee@gmail.com", "ff@sina.com" };
     47     /**
     48      * 邮件内容
     49      */
     50     private String[] contents = { "hello,aa,hi,hell world!!", 
     51                                   "hello,bb,i'm a boy", 
     52                                   "hello,cc",
     53                                   "hello,dd,welcome to my zone,this is a test hello", 
     54                                   "hello,ee,haha,xixi,hello world!!", 
     55                                   "hello,ff" };
     56     /**
     57      * 附件数
     58      */
     59     private int[] attachs = {1,5,3,2,1,6};
     60     /**
     61      * 日期
     62      */
     63     private Date[] dates = null;
     64     /**
     65      * 收件人的名称
     66      */
     67     private String[] names = { "hongten", "hanyuan", "Devide", "Tom", "Steven",
     68             "Shala" };
     69 
     70     private Directory directory = null;
     71     /**
     72      * 评分
     73      */
     74     private Map<String, Float> scores = new HashMap<String, Float>();
     75     
     76     public LuceneUtil() {
     77         try {
     78             setDates();
     79             scores.put("sina.com", 1.0f);
     80             scores.put("foxmail.com", 1.1f);
     81             directory = FSDirectory.open(new File(
     82                     "D:/WordPlace/lucene/lucene_0400_dateAndInt/lucene/index"));
     83         } catch (IOException e) {
     84             e.printStackTrace();
     85         }
     86     }
     87 
     88     /**
     89      * 创建日期
     90      */
     91     public void setDates(){
     92         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
     93         try {
     94             dates = new Date[ids.length];
     95             dates[0] = sdf.parse("2012-11-18");
     96             dates[1] = sdf.parse("2010-01-28");
     97             dates[2] = sdf.parse("2011-11-21");
     98             dates[3] = sdf.parse("2012-12-12");
     99             dates[4] = sdf.parse("2011-06-23");
    100             dates[5] = sdf.parse("2012-03-15");
    101         } catch (ParseException e) {
    102             e.printStackTrace();
    103         }
    104     }
    105     
    106     /**
    107      * 创建索引
    108      */
    109     public void index() {
    110         IndexWriter writer = null;
    111         try {
    112             writer = new IndexWriter(directory, new IndexWriterConfig(
    113                     Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
    114             //删除之前所建立的全部索引
    115             writer.deleteAll();
    116             // 创建文档
    117             Document document = null;
    118             for (int i = 0; i < ids.length; i++) {
    119                 // Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,标题可以是这种方式存储
    120                 // Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档
    121                 // ==============================
    122                 // Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等
    123                 // Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,ID等,适用于精确搜索
    124                 // Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,这个norms中包括了创建索引的时间和权值等信息
    125                 // Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)
    126                 // Field.Index.NO:不进行分词
    127                 document = new Document();
    128                 document.add(new Field("id", ids[i], Field.Store.YES,
    129                         Field.Index.NOT_ANALYZED_NO_NORMS));
    130                 document.add(new Field("email", emails[i], Field.Store.YES,
    131                         Field.Index.NOT_ANALYZED));
    132                 document.add(new Field("content", contents[i], Field.Store.YES,
    133                         Field.Index.ANALYZED));
    134                 document.add(new Field("name", names[i], Field.Store.YES,
    135                         Field.Index.NOT_ANALYZED_NO_NORMS));
    136                 document.add(new NumericField("attach", Field.Store.YES,true).setIntValue(attachs[i]));
    137                 document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
    138                 
    139                 //这里进行加权处理
    140                 String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
    141                 System.out.println(et);
    142                 if(scores.containsKey(et)){
    143                     document.setBoost(scores.get(et));
    144                 }else{
    145                     document.setBoost(0.6f);
    146                 }
    147                 writer.addDocument(document);
    148             }
    149         } catch (CorruptIndexException e) {
    150             e.printStackTrace();
    151         } catch (LockObtainFailedException e) {
    152             e.printStackTrace();
    153         } catch (IOException e) {
    154             e.printStackTrace();
    155         } finally {
    156             if (writer != null) {
    157                 try {
    158                     writer.close();
    159                 } catch (CorruptIndexException e) {
    160                     e.printStackTrace();
    161                 } catch (IOException e) {
    162                     e.printStackTrace();
    163                 }
    164             }
    165         }
    166     }
    167     
    168     /**
    169      * 搜索
    170      */
    171     public void search(){
    172         try {
    173             IndexReader reader = IndexReader.open(directory);
    174             IndexSearcher searcher = new IndexSearcher(reader);
    175             TermQuery query = new TermQuery(new Term("content","hello"));
    176             TopDocs tds =searcher.search(query, 10);
    177             for(ScoreDoc sd : tds.scoreDocs){
    178                 Document doc = searcher.doc(sd.doc);
    179                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
    180                 Date date = null;
    181                 /*try {
    182                     date = sdf.parse(doc.get("date"));
    183                 } catch (ParseException e) {
    184                     e.printStackTrace();
    185                 }*/
    186                 System.out.println("文档序号:["+sd.doc+"] 得分:["+sd.score+"] 邮件名称:["+doc.get("email")+"] 邮件人:["+doc.get("name")+"] 附件数:["+doc.get("attach")+"] 日期:["+doc.get("date")+"] 内容 : ["+doc.get("content")+"]");
    187             }
    188         } catch (CorruptIndexException e) {
    189             e.printStackTrace();
    190         } catch (IOException e) {
    191             e.printStackTrace();
    192         }
    193     }
    194 }

    /lucene_0400_dateAndInt/src/com/b510/lucene/test/IndexTest.java

     1 /**
     2  * 
     3  */
     4 package com.b510.lucene.test;
     5 
     6 import org.junit.Test;
     7 
     8 import com.b510.lucene.util.LuceneUtil;
     9 
    10 /**
    11  * @author Hongten <br />
    12  * @date 2013-1-31
    13  */
    14 public class IndexTest {
    15 
    16     @Test
    17     public void testIndex(){
    18         LuceneUtil util = new LuceneUtil();
    19         util.index();
    20     }
    21     
    22     @Test
    23     public void testSearch(){
    24         LuceneUtil util = new LuceneUtil();
    25         util.search();
    26     }
    27     
    28 }

    I'm Hongten

  • 相关阅读:
    axios
    es6
    $route监听路由变化
    容易挂
    自定义全局组件/插件
    eslintrc.js
    dev-server.js
    webpack-dev-server.js
    怎么消除间隔间的空白字符
    es6语法
  • 原文地址:https://www.cnblogs.com/hongten/p/hongten_lucene_date_int.html
Copyright © 2011-2022 走看看