zoukankan      html  css  js  c++  java
  • lucene 建立CRUD操作

    IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库
    这句会引发线程安全问题,在全剧终 IndexSearcher只能有一个对象才可以,所以在ArticleDocumentUtils中保存一个 并且引用它。
    indexSearcher为了提高效率,也是在内存中有缓存的所以需要commit才能放入索引文件数据库中


    数据库优化
    每次添加数据在索引文件夹下有很多小文件,为了合并小文件提高效率

    //优化,合并多个小文件为一个打文件
    LuceneUtils.getIndexWriter.optimize();


    //配置当小文件的数量达到多少个后就自动合并为一个大文件,最小2,默认10
    LucenenUtils.getIndexWriter().setMergeFactor(3);
    当增加数据的时候自动触发。

    Lucene.java

     1 package cn.itcast._util;
     2 
     3 import java.io.File;
     4 import java.io.IOException;
     5 
     6 import org.apache.lucene.analysis.Analyzer;
     7 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     8 import org.apache.lucene.index.CorruptIndexException;
     9 import org.apache.lucene.index.IndexWriter;
    10 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    11 import org.apache.lucene.store.Directory;
    12 import org.apache.lucene.store.FSDirectory;
    13 import org.apache.lucene.store.LockObtainFailedException;
    14 import org.apache.lucene.util.Version;
    15 
    16 public class LuceneUtils {
    17 
    18     private static Directory directory; // 索引库目录
    19     private static Analyzer analyzer; // 分词器
    20 
    21     private static IndexWriter indexWriter;
    22 
    23     static {
    24         try {
    25             // 这里应是读取配置文件得到的索引库目录
    26             directory = FSDirectory.open(new File("./indexDir"));
    27             analyzer = new StandardAnalyzer(Version.LUCENE_30);
    28         } catch (IOException e) {
    29             throw new RuntimeException(e);
    30         }
    31     }
    32 
    33     /**
    34      * 获取全局唯一的IndexWriter对象
    35      * 
    36      * @return
    37      */
    38     public static IndexWriter getIndexWriter() {
    39         // 在第一次使用IndexWriter是进行初始化
    40         if (indexWriter == null) {
    41             synchronized (LuceneUtils.class) { // 注意线程安全问题
    42                 if (indexWriter == null) {
    43                     try {
    44                         indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
    45                         System.out.println("=== 已经初始化 IndexWriter ===");
    46                     } catch (Exception e) {
    47                         throw new RuntimeException(e);
    48                     }
    49                 }
    50             }
    51 
    52             // 指定一段代码,会在JVM退出之前执行。
    53             Runtime.getRuntime().addShutdownHook(new Thread() {
    54                 public void run() {
    55                     try {
    56                         indexWriter.close();
    57                         System.out.println("=== 已经关闭 IndexWriter ===");
    58                     } catch (Exception e) {
    59                         throw new RuntimeException(e);
    60                     }
    61                 }
    62             });
    63         }
    64 
    65         return indexWriter;
    66     }
    67 
    68     public static Directory getDirectory() {
    69         return directory;
    70     }
    71 
    72     public static Analyzer getAnalyzer() {
    73         return analyzer;
    74     }
    75 
    76 }

    ArticleDocumentUtils.java

     1 package cn.itcast._util;
     2 
     3 import org.apache.lucene.document.Document;
     4 import org.apache.lucene.document.Field;
     5 import org.apache.lucene.document.Field.Index;
     6 import org.apache.lucene.document.Field.Store;
     7 import org.apache.lucene.util.NumericUtils;
     8 
     9 import cn.itcast._domain.Article;
    10 
    11 public class ArticleDocumentUtils {
    12 
    13     /**
    14      * 把Article转为Document
    15      * 
    16      * @param article
    17      * @return
    18      */
    19     public static Document articleToDocument(Article article) {
    20         Document doc = new Document();
    21 
    22         String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串!
    23         
    24         doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED)); // 注意:唯一标示符一般选择Index.NOT_ANALYZED
    25         doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
    26         doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
    27 
    28         return doc;
    29     }
    30 
    31     /**
    32      * 把Document转为Article
    33      * 
    34      * @param doc
    35      * @return
    36      */
    37     public static Article documentToArticle(Document doc) {
    38         Article article = new Article();
    39         
    40         Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字!
    41         
    42         article.setId(id);
    43         article.setTitle(doc.get("title"));
    44         article.setContent(doc.get("content"));
    45         
    46         return article;
    47     }
    48 
    49 }
    View Code

    QueryResult.java

     1 package cn.itcast._domain;
     2 
     3 import java.util.List;
     4 
     5 public class QueryResult {
     6     private List list; // 一段数据列表
     7     private int count; // 总记录数
     8 
     9     public QueryResult(List list, int count) {
    10         this.list = list;
    11         this.count = count;
    12     }
    13 
    14     public List getList() {
    15         return list;
    16     }
    17 
    18     public void setList(List list) {
    19         this.list = list;
    20     }
    21 
    22     public int getCount() {
    23         return count;
    24     }
    25 
    26     public void setCount(int count) {
    27         this.count = count;
    28     }
    29 
    30 }
    View Code

    ArticleIndexDao.java

      1 package cn.itcast.b_indexdao;
      2 
      3 import java.io.IOException;
      4 import java.util.ArrayList;
      5 import java.util.List;
      6 
      7 import org.apache.lucene.document.Document;
      8 import org.apache.lucene.index.Term;
      9 import org.apache.lucene.queryParser.MultiFieldQueryParser;
     10 import org.apache.lucene.queryParser.QueryParser;
     11 import org.apache.lucene.search.IndexSearcher;
     12 import org.apache.lucene.search.Query;
     13 import org.apache.lucene.search.TopDocs;
     14 import org.apache.lucene.util.NumericUtils;
     15 import org.apache.lucene.util.Version;
     16 
     17 import cn.itcast._domain.Article;
     18 import cn.itcast._domain.QueryResult;
     19 import cn.itcast._util.ArticleDocumentUtils;
     20 import cn.itcast._util.LuceneUtils;
     21 
     22 public class ArticleIndexDao {
     23 
     24     /**
     25      * 保存到索引库(建立索引)
     26      * 
     27      * @param article
     28      */
     29     public void save(Article article) {
     30         // 1,把Article转为Document
     31         Document doc = ArticleDocumentUtils.articleToDocument(article);
     32 
     33         // 2,添加到索引库中
     34         try {
     35             LuceneUtils.getIndexWriter().addDocument(doc); // 添加
     36             LuceneUtils.getIndexWriter().commit(); // 提交更改
     37         } catch (Exception e) {
     38             throw new RuntimeException(e);
     39         }
     40     }
     41 
     42     /**
     43      * 删除索引
     44      * 
     45      * Term :某字段中出现的某一个关键词(在索引库的目录中)
     46      * 
     47      * @param id
     48      */
     49     public void delete(Integer id) {
     50         try {
     51             String idStr = NumericUtils.intToPrefixCoded(id); // 一定要使用Lucene的工具类把数字转为字符串!
     52             Term term = new Term("id", idStr);
     53 
     54             LuceneUtils.getIndexWriter().deleteDocuments(term); // 删除所有含有这个Term的Document
     55             LuceneUtils.getIndexWriter().commit(); // 提交更改
     56         } catch (Exception e) {
     57             throw new RuntimeException(e);
     58         }
     59     }
     60 
     61     /**
     62      * 更新索引
     63      * 
     64      * @param article
     65      */
     66     public void update(Article article) {
     67         try {
     68             Term term = new Term("id", NumericUtils.intToPrefixCoded(article.getId())); // 一定要使用Lucene的工具类把数字转为字符串!
     69             Document doc = ArticleDocumentUtils.articleToDocument(article);
     70 
     71             LuceneUtils.getIndexWriter().updateDocument(term, doc); // 更新就是先删除再添加
     72             LuceneUtils.getIndexWriter().commit(); // 提交更改
     73 
     74             // indexWriter.deleteDocuments(term);
     75             // indexWriter.addDocument(doc);
     76         } catch (Exception e) {
     77             throw new RuntimeException(e);
     78         }
     79     }
     80 
     81     /**
     82      * * 搜索   用于分页的
     83      * 
     84      * @param queryString
     85      *            查询条件
     86      * @param first
     87      *            从结果列表的哪个索引开始获取数据
     88      * @param max
     89      *            最多获取多少条数据(如果没有这么多,就把剩余的都返回)
     90      * 
     91      * @return 一段数据列表 + 符合条件的总记录数
     92      */
     93     public QueryResult search(String queryString, int first, int max) {
     94         IndexSearcher indexSearcher = null;
     95         try {
     96             // 1,把查询字符串转为Query对象(在title与content中查询)
     97             QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, LuceneUtils.getAnalyzer());
     98             Query query = queryParser.parse(queryString);
     99 
    100             // 2,执行查询,得到中间结果
    101             indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());
    102             TopDocs topDocs = indexSearcher.search(query, first + max); // 最多返回前n条数据,这里要计算好,要返回足够数量的数据
    103             int count = topDocs.totalHits; // 符合条件的总记录数
    104 
    105             // 3,处理数据
    106             List<Article> list = new ArrayList<Article>();
    107             int endIndex = Math.min(first + max, topDocs.scoreDocs.length); // 计算结束的边界
    108 
    109             for (int i = first; i < endIndex; i++) { // 应只取一段数据
    110                 // 根据内部编号获取真正的Document数据
    111                 int docId = topDocs.scoreDocs[i].doc;
    112                 Document doc = indexSearcher.doc(docId);
    113                 // 把Document转换为Article
    114                 Article article = ArticleDocumentUtils.documentToArticle(doc);
    115                 list.add(article);
    116             }
    117 
    118             // 4,封装结果并返回
    119             return new QueryResult(list, count);
    120 
    121         } catch (Exception e) {
    122             throw new RuntimeException(e);
    123         } finally {
    124             // 关闭IndexSearcher
    125             if (indexSearcher != null) {
    126                 try {
    127                     indexSearcher.close();
    128                 } catch (IOException e) {
    129                     throw new RuntimeException(e);
    130                 }
    131             }
    132         }
    133     }
    134 }

    不分页的查询

    LuceneUtils.getIndexWriter()
     1     public List<Article> searchArticle(String condition) {
     2         // 执行搜索
     3         List<Article> list = new ArrayList<Article>();
     4         IndexSearcher indexSearcher = null;
     5         try {
     6             // 1,把查询字符串转为Query对象(默认只从title中查询)
     7             QueryParser queryParser = new MultiFieldQueryParser(
     8                     Version.LUCENE_30, new String[] { "title", "content" },
     9                     LuceneUtils.getAnalyzer());
    10             Query query = queryParser.parse(condition);
    11 
    12             // 2,执行查询,得到中间结果
    13             //indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库,会引发线程安全问题
             indexSearcher=LuceneUtils.getIndexWriter();
    14 TopDocs topDocs = indexSearcher.search(query, 1000); // 最多返回前n条结果 15 int count = topDocs.totalHits; 16 System.out.println("scoreDocs.length"+topDocs.scoreDocs.length); //一样 17 System.out.println("count"+count); //一样 18 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 19 20 // 3,处理结果 21 for (int i = 0; i < scoreDocs.length; i++) { 22 ScoreDoc scoreDoc = scoreDocs[i]; 23 float score = scoreDoc.score; // 相关度得分 24 int docId = scoreDoc.doc; // Document的内部编号 25 26 // 根据编号拿到Document数据 27 Document document = indexSearcher.doc(docId); 28 29 // 把Document转为Article 30 Article article=ArticleDocumentUtils.documentToArticle(document); 31 32 list.add(article); 33 } 34 } catch (Exception e) { 35 throw new RuntimeException(); 36 } finally { 37 try { 38 if (null != indexSearcher) 39 indexSearcher.close(); 40 } catch (Exception e) { 41 e.printStackTrace(); 42 } 43 } 44 return list; 45 }

    ArticleIndexDaoTest.java

     1 package cn.itcast.b_indexdao;
     2 
     3 import java.util.List;
     4 
     5 import org.junit.Test;
     6 
     7 import cn.itcast._domain.Article;
     8 import cn.itcast._domain.QueryResult;
     9 
    10 public class ArticleIndexDaoTest {
    11 
    12     private ArticleIndexDao indexDao = new ArticleIndexDao();
    13 
    14     @Test
    15     public void testSave() {
    16         // 准备数据
    17         Article article = new Article();
    18         article.setId(1);
    19         article.setTitle("准备Lucene的开发环境");
    20         article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");
    21 
    22         // 放到索引库中
    23         indexDao.save(article);
    24     }
    25 
    26     @Test
    27     public void testSave_25() {
    28         for (int i = 1; i <= 25; i++) {
    29             // 准备数据
    30             Article article = new Article();
    31             article.setId(i);
    32             article.setTitle("准备Lucene的开发环境");
    33             article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");
    34 
    35             // 放到索引库中
    36             indexDao.save(article);
    37         }
    38     }
    39 
    40     @Test
    41     public void testDelete() {
    42         indexDao.delete(1);
    43     }
    44 
    45     @Test
    46     public void testUpdate() {
    47         // 准备数据
    48         Article article = new Article();
    49         article.setId(1);
    50         article.setTitle("准备Lucene的开发环境");
    51         article.setContent("这是更新后的内容");
    52 
    53         // 更新到索引库中
    54         indexDao.update(article);
    55     }
    56     //用于分页的
    57     @Test
    58     public void testSearch() {
    59         // 准备查询条件
    60         String queryString = "lucene";
    61         // String queryString = "hibernate";
    62 
    63         // 执行搜索
    64         // QueryResult qr = indexDao.search(queryString, 0, 10000);
    65 
    66         // QueryResult qr = indexDao.search(queryString, 0, 10); // 第1页,每页10条
    67         // QueryResult qr = indexDao.search(queryString, 10, 10); // 第2页,每页10条
    68         QueryResult qr = indexDao.search(queryString, 20, 10); // 第3页,每页10条
    69 
    70         // 显示结果
    71         System.out.println("总结果数:" + qr.getCount());
    72         for (Article a : (List<Article>) qr.getList()) {
    73             System.out.println("------------------------------");
    74             System.out.println("id = " + a.getId());
    75             System.out.println("title = " + a.getTitle());
    76             System.out.println("content = " + a.getContent());
    77         }
    78     }
    79 
    80 }

     不分页查询测试

     1 @Test
     2     public void testSearchArticle() {
     3         // 准备查询条件
     4         String queryString = "lucene的";
     5         // String queryString = "hibernate";
     6 
     7         // 执行搜索
     8         List<Article> list =dao.searchArticle(queryString);
     9         
    10         // 显示结果
    11         System.out.println("总结果数:" + list.size());
    12         for (Article a : list) {
    13             System.out.println("------------------------------");
    14             System.out.println("id = " + a.getId());
    15             System.out.println("title = " + a.getTitle());
    16             System.out.println("content = " + a.getContent());
    17         }
    18     }
  • 相关阅读:
    Codeforces Round #107 (Div. 1) D Mission Impassable
    Codeforces Round #107 (Div. 1) C Smart Cheater
    Codeforces Round #104 (Div. 1) D Lucky Pair
    Codeforces Round #104 (Div. 1) C Lucky Subsequence
    拓扑排序&&欧拉(回)路
    复习笔记之矩阵快速幂(不定时更新)
    复习笔记之母函数
    树链剖分来一发
    最短路算法略解
    题目记录
  • 原文地址:https://www.cnblogs.com/friends-wf/p/3795299.html
Copyright © 2011-2022 走看看