zoukankan      html  css  js  c++  java
  • lucene 简单实用

      1 package com.kite.luncene.helloworld;
      2 
      3 import java.io.File;
      4 import java.util.ArrayList;
      5 import java.util.List;
      6 
      7 import org.apache.lucene.analysis.Analyzer;
      8 import org.apache.lucene.analysis.standard.StandardAnalyzer;
      9 import org.apache.lucene.document.Document;
     10 import org.apache.lucene.document.Field;
     11 import org.apache.lucene.document.Field.Index;
     12 import org.apache.lucene.document.Field.Store;
     13 import org.apache.lucene.index.IndexWriter;
     14 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
     15 import org.apache.lucene.queryParser.QueryParser;
     16 import org.apache.lucene.search.IndexSearcher;
     17 import org.apache.lucene.search.Query;
     18 import org.apache.lucene.search.ScoreDoc;
     19 import org.apache.lucene.search.TopDocs;
     20 import org.apache.lucene.store.Directory;
     21 import org.apache.lucene.store.FSDirectory;
     22 import org.apache.lucene.util.Version;
     23 import org.junit.Test;
     24 
     25 import com.kite.bean.Article;
     26 
     27 public class HelloWorld
     28 {
     29     @Test
     30     public void testCreate() throws Exception
     31     {
     32         /**
     33          * 1、创建一个article对象,并且把信息存放进去
     34          * 2、调用indexWriter的API把数据存放在索引库中
     35          * 3、关闭indexWriter
     36          */
     37         //创建一个article对象,并且把信息存放进去
     38         Article article = new Article();
     39         article.setId(1L);
     40         article.setTitle("java goodnice");
     41         article .setContent("多年来就是这么吊");
     42         
     43         //2、调用indexWriter的API把数据存放在索引库中
     44          /**
     45             * 创建一个IndexWriter
     46             *    参数三个
     47             *       1、索引库   指向索引库的位置
     48             *       2、分词器
     49             */
     50             //创建索引库
     51             Directory directory = FSDirectory.open(new File("./indexDir"));
     52             //创建分词器
     53             Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
     54         IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
     55         
     56         //把一个对象转换成document
     57         Document document = new Document();
     58         Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
     59         Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
     60         Field contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
     61         document.add(idField);
     62         document.add(titleField);
     63         document.add(contentField);
     64         indexWriter.addDocument(document);
     65         
     66         //3、关闭indexWriter
     67         indexWriter.close();
     68     }
     69     
     70     @Test
     71     public  void testSearchIndex() throws Exception
     72     {
     73         /**
     74          * 1.创建一个 indexSerach对象
     75          * 2.调用search方法进行检索
     76          * 3.输出内容
     77          */
     78         
     79         // 1.创建一个 indexSerach对象
     80         //--索引库
     81         Directory directory = FSDirectory.open(new File("./indexDir"));
     82         IndexSearcher searcher = new IndexSearcher(directory);
     83         //2..调用search方法进行检索
     84         //--
     85         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
     86         QueryParser queryParser = new QueryParser(Version.LUCENE_30, "id", analyzer);
     87         //设置搜索的关键字
     88         Query query = queryParser.parse("1");
     89         TopDocs topDocs = searcher.search(query, 10);
     90         //获得根据关键字查询出来的总的记录数
     91         int count = topDocs.totalHits;
     92         List<Article> articles = new ArrayList<Article>();
     93         //获得数组
     94         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
     95         for(ScoreDoc scoreDoc : scoreDocs)
     96         {
     97             //关键字得分
     98             float score = scoreDoc.score;
     99             //索引的下标
    100             int index = scoreDoc.doc;
    101             //根据索引获得document对象
    102             Document document = searcher.doc(index);
    103             //把document转化成article
    104             Article article = new Article();
    105             article.setId(Long.parseLong(document.get("id")));
    106             article.setTitle(document.get("title"));
    107             article.setContent(document.get("content"));
    108             articles.add(article);
    109         }
    110         for(Article article : articles)
    111         {
    112             System.out.println(article.getId());
    113             System.out.println(article.getTitle());
    114             System.out.println(article.getContent());
    115         }
    116     }
    117 }


    两个工具类
    package com.kite.luncene.utils;
    
    import java.io.File;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class LunceneUtils
    {
        //索引库
        public static Directory directory;
        //分词器
        public static Analyzer analyzer;
        static
        {
            try
            {
                directory = FSDirectory.open(new File("./indexDor"));
                analyzer = new StandardAnalyzer(Version.LUCENE_30);
            } catch (Exception e)
            {
                e.printStackTrace();
            }
            
        }
    }

    package com.kite.luncene.utils;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;
    
    import com.kite.bean.Article;
    
    public class DocumentUtils
    {
        /**
         * 通过 article获得document
         * @param article
         * @return
         */
        public static Document articleToDocument(Article article)
        {
            Document document = new Document();
            Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
            Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
            Field  contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
            document.add(idField);
            document.add(titleField);
            document.add(contentField);
            return document;
        }
        /**
         * 通过document 获得article对象
         * @param document
         * @return
         */
        public static Article documentToArticle(Document document) 
        {
            Article article = new Article();
            article.setId(Long.parseLong(document.get("id")));
            article.setTitle((document.get("title")));
            article.setContent(document.get("content"));
            return article;
        }
    }


    实用工具类实现简单的增删改查功能
    package com.kite.luncene.index;
    
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.util.Version;
    import org.junit.Test;
    
    import com.kite.bean.Article;
    import com.kite.luncene.utils.DocumentUtils;
    import com.kite.luncene.utils.LunceneUtils;
    
    public class ArticleIndex
    {
        @Test
        public void testCreateIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            Article article = new Article();
            article.setId(1L);
            article.setTitle("luncenes是一个好难写的东西");
            article.setContent("百度,谷歌是很好的搜索引擎");
            //通过工具类转换成document
            Document document = DocumentUtils.articleToDocument(article);
            indexWriter.addDocument(document);
            
            indexWriter.close();
        }
        @Test
        public void testSearchIndex() throws Exception
        {
            IndexSearcher indexSearcher = new IndexSearcher(LunceneUtils.directory);
            /**
             * Version.LUCENE_30 版本
             * "title"   根据那个字段
             * LunceneUtils.analyzer  分词器
             */
            QueryParser queryParser = new QueryParser(Version.LUCENE_30, "title", LunceneUtils.analyzer);
            //luncene  关键字
            Query query = queryParser.parse("luncene");
            TopDocs topDocs = indexSearcher.search(query, 2);
            //获得根据关键字查询到的所有的记录数
            int count = topDocs.totalHits;
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            List<Article> articles = new ArrayList<Article>();
            for(ScoreDoc scoreDoc : scoreDocs)
            {
                //scoreDoc.score 获得关键字得分
                float score = scoreDoc.score;
                //scoreDoc.doc 获得索引的下标
                int index = scoreDoc.doc;
                //通过索引的下标进行查询
                Document document = indexSearcher.doc(index);
                Article article = DocumentUtils.documentToArticle(document);
                articles.add(article);
            }
            //遍历输出
            for(Article article : articles)
            {
                System.out.println(article.getId().toString());
                System.err.println(article.getTitle());
                System.out.println(article.getContent());
            }
        }
        /**
         * 修改是先删除 然后进行添加
         * @throws Exception
         */
        @Test
        public void testDeleteIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            
            //indexWriter.deleteAll();--删除所有
            /*
             * term  关键字对象     
             *             title  字段名
             *             luncenes    关键字的内容
             */
            Term term = new Term("title", "luncenes");
            //根据关键字进行删除   会在文件夹中增加一个.del结尾的文件
            indexWriter.deleteDocuments(term);
            indexWriter.close();
        }
        @Test
        public void testUpdateIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            
            
            Term term = new Term("title","luncenes");
            Article article = new Article();
            article.setId(1L);
            article.setTitle("luncene是一个好难写的东西,少个s不解释");
            article.setContent("百度,谷歌是很好的搜索引擎");
            /*
             * term 根据关键字进行修改 
             * doc   修改后的内容
             */
            indexWriter.updateDocument(term, DocumentUtils.articleToDocument(article));
            indexWriter.close();
            
        }
    }
    
    
    
     
    
    
    
     
  • 相关阅读:
    supervisor 配置
    单链表
    二叉排序树
    python 排序
    64 位 Ubuntu 下 android adb 不可用解决方法
    python 获取文件夹大小
    Ubuntu mongodb 安装和配置
    关于c3p0配置详细说明
    dwr消息推送
    关于如果修改 ie 浏览器 文本模式
  • 原文地址:https://www.cnblogs.com/kite/p/3645571.html
Copyright © 2011-2022 走看看