zoukankan      html  css  js  c++  java
  • lucene4.4 索引的增删改查

    package com.lucene.test;

     

    import java.io.File;

    import java.io.FileReader;

    import java.io.IOException;

    import java.util.Date;

     

    import org.apache.log4j.Logger;

    import org.apache.lucene.analysis.standard.StandardAnalyzer;

    import org.apache.lucene.document.Document;

    import org.apache.lucene.document.Field.Store;

    import org.apache.lucene.document.IntField;

    import org.apache.lucene.document.StringField;

    import org.apache.lucene.document.TextField;

    import org.apache.lucene.index.DirectoryReader;

    import org.apache.lucene.index.DocsAndPositionsEnum;

    import org.apache.lucene.index.Fields;

    import org.apache.lucene.index.IndexWriter;

    import org.apache.lucene.index.IndexWriterConfig;

    import org.apache.lucene.index.MultiFields;

    import org.apache.lucene.index.Term;

    import org.apache.lucene.index.Terms;

    import org.apache.lucene.index.TermsEnum;

    import org.apache.lucene.queryparser.classic.ParseException;

    import org.apache.lucene.queryparser.classic.QueryParser;

    import org.apache.lucene.search.DocIdSetIterator;

    import org.apache.lucene.search.IndexSearcher;

    import org.apache.lucene.search.Query;

    import org.apache.lucene.search.ScoreDoc;

    import org.apache.lucene.search.TopDocs;

    import org.apache.lucene.store.Directory;

    import org.apache.lucene.store.FSDirectory;

    import org.apache.lucene.util.BytesRef;

    import org.apache.lucene.util.Version;

     

    publicclass IndexUtil {

        privatestaticfinal Logger LOGGER = Logger.getLogger(IndexUtil.class);

        private Directory directory = null;

        private DirectoryReader reader = null;

        private IndexWriterConfig config = null;

        private IndexWriter writer = null;

        publicstaticfinal IndexUtil Instance = new IndexUtil();

     

        private IndexUtil() {

            try {

                directory = FSDirectory.open(new File("D:/lucene/index"));

                config = new IndexWriterConfig(Version.LUCENE_44,

                        new StandardAnalyzer(Version.LUCENE_44));

            } catch (IOException e) {

                e.printStackTrace();

            }

        }

     

        /**

         *添加索引

         *@throwsIOException

         */

        publicvoid index() throws IOException {

            writer = new IndexWriter(directory, config);

            File file = new File("D:\lucene\example");

            Document document = null;

            int id = 0;

            long start = new Date().getTime();

            LOGGER.info("添加索引…………………………");

            for (File f : file.listFiles()) {

                document = new Document();

                document.add(new StringField("name",f.getName(), Store.YES));

                document.add(new IntField("id", id++,Store.YES));

                document.add(new StringField("path",f.getAbsolutePath(), Store.YES));

                document.add(new TextField("context", new FileReader(f)));

                writer.addDocument(document);

            }

            long end = new Date().getTime();

            LOGGER.info("添加索引完成,用时:" + (end - start) / 1000.0 + "s…………………………");

            writer.close();

        }

     

        /**

         *查询索引

         *@throwsIOException

         *@throwsParseException

         */

        publicvoid search() throws IOException, ParseException {

            reader = DirectoryReader.open(directory);

            QueryParser parser = newQueryParser(Version.LUCENE_44, "context",

                    new StandardAnalyzer(Version.LUCENE_44));

            Query query = parser.parse("lucene");

            IndexSearcher searcher = new IndexSearcher(reader);

            TopDocs docs = searcher.search(query,100);

            /**

             *reader.maxDoc()包含索引文档的总数包含可用的和已经删除的数量

             *reader.numDocs()当前可用的索引文档的数量不包含已经删除的

             *reader.numDeletedDocs()删除的索引文档的数量

             */

            LOGGER.info("总记录:" + docs.totalHits + " 命中文档数:" + docs.scoreDocs.length

                    + " 最大的文档数maxDoc" + reader.maxDoc() + " 删除文件数numDeletedDocs"

                    + reader.numDeletedDocs() + " numDocs" + reader.numDocs());

            for (ScoreDoc doc : docs.scoreDocs) {

                Document document = reader.document(doc.doc);

                LOGGER.info("id:" +document.get("id") + " name:"

                        + document.get("name") + " path:" + document.get("path"));

            }

            reader.close();

        }

     

        /**

         *更新索引

         *@throwsIOException

         */

        publicvoid update() throws IOException {

            writer = new IndexWriter(directory, config);

            Document document = new Document();

            document.add(new StringField("name", "新文件", Store.YES));

            document.add(new IntField("id", 12, Store.YES));

            document.add(new StringField("path", "D:\lucene\example\新文件.txt", Store.YES));

            writer.updateDocument(new Term("id", "2"),document);

            writer.commit();

            writer.close();

        }

       

        /**

         *删除索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)

         *@throwsIOException

         */

        publicvoid delete() throws IOException {

            writer = new IndexWriter(directory, config);

            writer.deleteDocuments(new Term("name", "11.txt"));

            writer.close();

        }

     

        /**

         *删除所有的索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)

         *@throwsIOException

         */

        publicvoid deleteAll() throws IOException {

            writer = new IndexWriter(directory, config);

            writer.deleteAll();

            writer.close();

        }

     

        /**

         *删除已经删除的索引对应上一个删除方法删除回收站的文件

         *@throwsIOException

         */

        publicvoid forceMergeDeletes() throws IOException {

            writer = new IndexWriter(directory, config);

            writer.forceMergeDeletes();// 清空回收站

            writer.close();

        }

        /**

         *显示所有的索引

         *@throwsIOException

         */

        publicvoid showIndex() throws IOException {

            reader = DirectoryReader.open(directory);

            Fields fields = MultiFields.getFields(reader); //获取directory中所有的field

                for (String field : fields) {

                    LOGGER.info(field);

                }

                //显示 field context的所有的分词

                Terms terms = fields.terms("context");

                TermsEnum termsEnum =  terms.iterator(null);

                BytesRef term = null;

                while ((term=termsEnum.next()) !=null) {

                    System.out.print(term.utf8ToString()+" ");//分词的内容

                    System.out.print(termsEnum.docFreq()+" ");//出现该分词的有文档的数量

                    System.out.print(termsEnum.totalTermFreq()+" ");//分词的总数

                    DocsAndPositionsEnumdocsAndPositionsEnum = termsEnum.docsAndPositions(null, null);

                    //如果要查询的字段没有被分词docsAndPositionsEnum就会为空继续循环

                    if(docsAndPositionsEnum==null){

                        continue;

                    }

                    int docId ;

                    while ((docId = docsAndPositionsEnum.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) {

                        Document document = reader.document(docId);//获取document对象

                        System.out.print(docId+" ");//分词的总数

                        System.out.print(document.get("name")+" ");//可以获取documentfield的值

                        int freq = docsAndPositionsEnum.freq();//document该分词出现的次数

                        for (int i = 0; i < freq; i++) {

                            System.out.print(docsAndPositionsEnum.nextPosition()+":"); //分词的位置

                             System.out.print("["+docsAndPositionsEnum.startOffset()+"");//分词起始偏移量的位置

                             System.out.print(docsAndPositionsEnum.endOffset()+"],");//分词结束偏移量的位置

                             System.out.print(docsAndPositionsEnum.getPayload()+" ");

                        }

                    }

                    System.out.println();

                }

            reader.close();

        }

     

    }

  • 相关阅读:
    json-server模拟REST API
    配置mysql使其允许外部ip进行登录
    java版的类似飞秋的局域网在线聊天项目
    微信小程序一键生成源码 在线制作定制功能强大的微信小程序
    nginx下laravel框架rewrite的设置
    高并发服务器架构--SEDA架构分析
    Laravel nginx 伪静态规则
    Laravel配置nginx环境
    Laravel 在 Nginx 中的参考配置两份
    微信小程序wxml和wxss样式
  • 原文地址:https://www.cnblogs.com/pangblog/p/3297230.html
Copyright © 2011-2022 走看看