zoukankan      html  css  js  c++  java
  • lucene中FSDirectory、RAMDirectory的用法

    package com.ljq.one;
    
    
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.NumberTools;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.queryParser.MultiFieldQueryParser;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.Filter;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.RAMDirectory;
    import org.junit.Test;
    
    public class DirectoryTest {
        // 数据源路径
        String dspath = "E:/workspace/mylucene/lucenes/IndexWriter addDocument's a javadoc .txt";
        //存放索引文件的位置,即索引库
        String indexpath = "E:/workspace/mylucene/luceneIndex";
        //分词器
        Analyzer analyzer = new StandardAnalyzer();
        
        /**
         * 创建索引,会抛异常,因为没对索引库进行保存
         * 
         * IndexWriter 用来操作(增、删、改)索引库的
         */
        @Test
        public void createIndex() throws Exception {
            //Directory dir=FSDirectory.getDirectory(indexpath);
            //内存存储:优点速度快,缺点程序退出数据就没了,所以记得程序退出时保存索引库,已FSDirectory结合使用
            //由于此处只暂时保存在内存中,程序退出时没进行索引库保存,因此在搜索时程序会报错
            Directory dir=new RAMDirectory();
            File file = new File(dspath);
            //Document存放经过组织后的数据源,只有转换为Document对象才可以被索引和搜索到
            Document doc = new Document();
            //文件名称
            doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
            //检索到的内容
            doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
            //文件大小
            doc.add(new Field("size", NumberTools.longToString(file.length()),
                    Store.YES, Index.NOT_ANALYZED));
            //检索到的文件位置
            doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
    
            // 建立索引
            //第一种方式
            //IndexWriter indexWriter = new IndexWriter(indexpath, analyzer, MaxFieldLength.LIMITED);
            //第二种方式
            IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);
            indexWriter.addDocument(doc);
            indexWriter.close();
        }
        
        /**
         * 创建索引(推荐)
         * 
         * IndexWriter 用来操作(增、删、改)索引库的
         */
        @Test
        public void createIndex2() throws Exception {
            Directory fsDir = FSDirectory.getDirectory(indexpath);
            //1、启动时读取
            Directory ramDir = new RAMDirectory(fsDir);
            
            // 运行程序时操作ramDir
            IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);
            
            //数据源
            File file = new File(dspath);
            // 添加 Document
            Document doc = new Document();
            //文件名称
            doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
            //检索到的内容
            doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
            //文件大小
            doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
            //检索到的文件位置
            doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
            ramIndexWriter.addDocument(doc);
            ramIndexWriter.close();
            
            //2、退出时保存
            IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);
            fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});
            
            // 优化操作
            fsIndexWriter.commit();
            fsIndexWriter.optimize();
            
            fsIndexWriter.close();
        }
        
        /**
         * 优化操作
         * 
         * @throws Exception
         */
        @Test
        public void createIndex3() throws Exception{
            Directory fsDir = FSDirectory.getDirectory(indexpath);
            IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, MaxFieldLength.LIMITED);
            
            fsIndexWriter.optimize();
            fsIndexWriter.close();
        }
        
        /**
         * 搜索
         * 
         * IndexSearcher 用来在索引库中进行查询
         */
        @Test
        public void search() throws Exception {
            //请求字段
            //String queryString = "document";
            String queryString = "adddocument";
    
            // 1,把要搜索的文本解析为 Query
            String[] fields = { "name", "content" };
            QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
            Query query = queryParser.parse(queryString);
    
            // 2,进行查询,从索引库中查找
            IndexSearcher indexSearcher = new IndexSearcher(indexpath);
            Filter filter = null;
            TopDocs topDocs = indexSearcher.search(query, filter, 10000);
            System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");
    
            // 3,打印结果
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                // 文档内部编号
                int index = scoreDoc.doc; 
                // 根据编号取出相应的文档
                Document doc = indexSearcher.doc(index);
                System.out.println("------------------------------");
                System.out.println("name = " + doc.get("name"));
                System.out.println("content = " + doc.get("content"));
                System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
                System.out.println("path = " + doc.get("path"));
            }
        }
    
        /**
         * 读取文件内容
         */
        public static String readFileContent(File file) {
            try {
                BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
                StringBuffer content = new StringBuffer();
                for (String line = null; (line = reader.readLine()) != null;) {
                    content.append(line).append("\n");
                }
                reader.close();
                return content.toString();
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        
    
    }
  • 相关阅读:
    【Linux】【jenkins】自动化部署一 安装jenkins及Jenkins工作目录迁移
    【Linux】【docker】docker私服安装
    【Linux】【docker】docker及docker-compose安装
    【Linux】【tomcat】tomcat8.5安装
    【Linux】【jdk】jdk8.0安装
    【Linux】【mysql】mysql8.0开启远程访问及常见问题
    【Linux】记录一个yum update和upgrade的区别
    【Linux】【gitlab】gitlab安装、备份、恢复、升级、内存消耗问题
    Python序列——列表
    Python序列——元组
  • 原文地址:https://www.cnblogs.com/linjiqin/p/2001574.html
Copyright © 2011-2022 走看看