zoukankan      html  css  js  c++  java
  • lucene 简单实用

      1 package com.kite.luncene.helloworld;
      2 
      3 import java.io.File;
      4 import java.util.ArrayList;
      5 import java.util.List;
      6 
      7 import org.apache.lucene.analysis.Analyzer;
      8 import org.apache.lucene.analysis.standard.StandardAnalyzer;
      9 import org.apache.lucene.document.Document;
     10 import org.apache.lucene.document.Field;
     11 import org.apache.lucene.document.Field.Index;
     12 import org.apache.lucene.document.Field.Store;
     13 import org.apache.lucene.index.IndexWriter;
     14 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
     15 import org.apache.lucene.queryParser.QueryParser;
     16 import org.apache.lucene.search.IndexSearcher;
     17 import org.apache.lucene.search.Query;
     18 import org.apache.lucene.search.ScoreDoc;
     19 import org.apache.lucene.search.TopDocs;
     20 import org.apache.lucene.store.Directory;
     21 import org.apache.lucene.store.FSDirectory;
     22 import org.apache.lucene.util.Version;
     23 import org.junit.Test;
     24 
     25 import com.kite.bean.Article;
     26 
     27 public class HelloWorld
     28 {
     29     @Test
     30     public void testCreate() throws Exception
     31     {
     32         /**
     33          * 1、创建一个article对象,并且把信息存放进去
     34          * 2、调用indexWriter的API把数据存放在索引库中
     35          * 3、关闭indexWriter
     36          */
     37         //创建一个article对象,并且把信息存放进去
     38         Article article = new Article();
     39         article.setId(1L);
     40         article.setTitle("java goodnice");
     41         article .setContent("多年来就是这么吊");
     42         
     43         //2、调用indexWriter的API把数据存放在索引库中
     44          /**
     45             * 创建一个IndexWriter
     46             *    参数三个
     47             *       1、索引库   指向索引库的位置
     48             *       2、分词器
     49             */
     50             //创建索引库
     51             Directory directory = FSDirectory.open(new File("./indexDir"));
     52             //创建分词器
     53             Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
     54         IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
     55         
     56         //把一个对象转换成document
     57         Document document = new Document();
     58         Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
     59         Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
     60         Field contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
     61         document.add(idField);
     62         document.add(titleField);
     63         document.add(contentField);
     64         indexWriter.addDocument(document);
     65         
     66         //3、关闭indexWriter
     67         indexWriter.close();
     68     }
     69     
     70     @Test
     71     public  void testSearchIndex() throws Exception
     72     {
     73         /**
     74          * 1.创建一个 indexSerach对象
     75          * 2.调用search方法进行检索
     76          * 3.输出内容
     77          */
     78         
     79         // 1.创建一个 indexSerach对象
     80         //--索引库
     81         Directory directory = FSDirectory.open(new File("./indexDir"));
     82         IndexSearcher searcher = new IndexSearcher(directory);
     83         //2..调用search方法进行检索
     84         //--
     85         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
     86         QueryParser queryParser = new QueryParser(Version.LUCENE_30, "id", analyzer);
     87         //设置搜索的关键字
     88         Query query = queryParser.parse("1");
     89         TopDocs topDocs = searcher.search(query, 10);
     90         //获得根据关键字查询出来的总的记录数
     91         int count = topDocs.totalHits;
     92         List<Article> articles = new ArrayList<Article>();
     93         //获得数组
     94         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
     95         for(ScoreDoc scoreDoc : scoreDocs)
     96         {
     97             //关键字得分
     98             float score = scoreDoc.score;
     99             //索引的下标
    100             int index = scoreDoc.doc;
    101             //根据索引获得document对象
    102             Document document = searcher.doc(index);
    103             //把document转化成article
    104             Article article = new Article();
    105             article.setId(Long.parseLong(document.get("id")));
    106             article.setTitle(document.get("title"));
    107             article.setContent(document.get("content"));
    108             articles.add(article);
    109         }
    110         for(Article article : articles)
    111         {
    112             System.out.println(article.getId());
    113             System.out.println(article.getTitle());
    114             System.out.println(article.getContent());
    115         }
    116     }
    117 }


    两个工具类
    package com.kite.luncene.utils;
    
    import java.io.File;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class LunceneUtils
    {
        //索引库
        public static Directory directory;
        //分词器
        public static Analyzer analyzer;
        static
        {
            try
            {
                directory = FSDirectory.open(new File("./indexDor"));
                analyzer = new StandardAnalyzer(Version.LUCENE_30);
            } catch (Exception e)
            {
                e.printStackTrace();
            }
            
        }
    }

    package com.kite.luncene.utils;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;
    
    import com.kite.bean.Article;
    
    public class DocumentUtils
    {
        /**
         * 通过 article获得document
         * @param article
         * @return
         */
        public static Document articleToDocument(Article article)
        {
            Document document = new Document();
            Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED);
            Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
            Field  contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
            document.add(idField);
            document.add(titleField);
            document.add(contentField);
            return document;
        }
        /**
         * 通过document 获得article对象
         * @param document
         * @return
         */
        public static Article documentToArticle(Document document) 
        {
            Article article = new Article();
            article.setId(Long.parseLong(document.get("id")));
            article.setTitle((document.get("title")));
            article.setContent(document.get("content"));
            return article;
        }
    }


    实用工具类实现简单的增删改查功能
    package com.kite.luncene.index;
    
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.util.Version;
    import org.junit.Test;
    
    import com.kite.bean.Article;
    import com.kite.luncene.utils.DocumentUtils;
    import com.kite.luncene.utils.LunceneUtils;
    
    public class ArticleIndex
    {
        @Test
        public void testCreateIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            Article article = new Article();
            article.setId(1L);
            article.setTitle("luncenes是一个好难写的东西");
            article.setContent("百度,谷歌是很好的搜索引擎");
            //通过工具类转换成document
            Document document = DocumentUtils.articleToDocument(article);
            indexWriter.addDocument(document);
            
            indexWriter.close();
        }
        @Test
        public void testSearchIndex() throws Exception
        {
            IndexSearcher indexSearcher = new IndexSearcher(LunceneUtils.directory);
            /**
             * Version.LUCENE_30 版本
             * "title"   根据那个字段
             * LunceneUtils.analyzer  分词器
             */
            QueryParser queryParser = new QueryParser(Version.LUCENE_30, "title", LunceneUtils.analyzer);
            //luncene  关键字
            Query query = queryParser.parse("luncene");
            TopDocs topDocs = indexSearcher.search(query, 2);
            //获得根据关键字查询到的所有的记录数
            int count = topDocs.totalHits;
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            List<Article> articles = new ArrayList<Article>();
            for(ScoreDoc scoreDoc : scoreDocs)
            {
                //scoreDoc.score 获得关键字得分
                float score = scoreDoc.score;
                //scoreDoc.doc 获得索引的下标
                int index = scoreDoc.doc;
                //通过索引的下标进行查询
                Document document = indexSearcher.doc(index);
                Article article = DocumentUtils.documentToArticle(document);
                articles.add(article);
            }
            //遍历输出
            for(Article article : articles)
            {
                System.out.println(article.getId().toString());
                System.err.println(article.getTitle());
                System.out.println(article.getContent());
            }
        }
        /**
         * 修改是先删除 然后进行添加
         * @throws Exception
         */
        @Test
        public void testDeleteIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            
            //indexWriter.deleteAll();--删除所有
            /*
             * term  关键字对象     
             *             title  字段名
             *             luncenes    关键字的内容
             */
            Term term = new Term("title", "luncenes");
            //根据关键字进行删除   会在文件夹中增加一个.del结尾的文件
            indexWriter.deleteDocuments(term);
            indexWriter.close();
        }
        @Test
        public void testUpdateIndex() throws Exception
        {
            IndexWriter indexWriter = new IndexWriter(LunceneUtils.directory, LunceneUtils.analyzer, MaxFieldLength.LIMITED);
            
            
            Term term = new Term("title","luncenes");
            Article article = new Article();
            article.setId(1L);
            article.setTitle("luncene是一个好难写的东西,少个s不解释");
            article.setContent("百度,谷歌是很好的搜索引擎");
            /*
             * term 根据关键字进行修改 
             * doc   修改后的内容
             */
            indexWriter.updateDocument(term, DocumentUtils.articleToDocument(article));
            indexWriter.close();
            
        }
    }
    
    
    
     
    
    
    
     
  • 相关阅读:
    Head first javascript(七)
    Python Fundamental for Django
    Head first javascript(六)
    Head first javascript(五)
    Head first javascript(四)
    Head first javascript(三)
    Head first javascript(二)
    Head first javascript(一)
    Sicily 1090. Highways 解题报告
    Python GUI programming(tkinter)
  • 原文地址:https://www.cnblogs.com/kite/p/3645571.html
Copyright © 2011-2022 走看看