zoukankan      html  css  js  c++  java
  • lucene中FSDirectory、RAMDirectory的用法

    package com.ljq.one;

    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.NumberTools;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.queryParser.MultiFieldQueryParser;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.Filter;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.RAMDirectory;
    import org.junit.Test;

    public class DirectoryTest {
    // 数据源路径
    String dspath = "E:/workspace/mylucene/lucenes/IndexWriter addDocument's a javadoc .txt";
    //存放索引文件的位置,即索引库
    String indexpath = "E:/workspace/mylucene/luceneIndex";
    //分词器
    Analyzer analyzer = new StandardAnalyzer();

    /**
    * 创建索引,会抛异常,因为没对索引库进行保存
    *
    * IndexWriter 用来操作(增、删、改)索引库的
    */
    @Test
    public void createIndex() throws Exception {
    //Directory dir=FSDirectory.getDirectory(indexpath);
    //内存存储:优点速度快,缺点程序退出数据就没了,所以记得程序退出时保存索引库,已FSDirectory结合使用
    //由于此处只暂时保存在内存中,程序退出时没进行索引库保存,因此在搜索时程序会报错
    Directory dir=new RAMDirectory();
    File file = new File(dspath);
    //Document存放经过组织后的数据源,只有转换为Document对象才可以被索引和搜索到
    Document doc = new Document();
    //文件名称
    doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
    //检索到的内容
    doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
    //文件大小
    doc.add(new Field("size", NumberTools.longToString(file.length()),
    Store.YES, Index.NOT_ANALYZED));
    //检索到的文件位置
    doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

    // 建立索引
    //第一种方式
    //IndexWriter indexWriter = new IndexWriter(indexpath, analyzer, MaxFieldLength.LIMITED);
    //第二种方式
    IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);
    indexWriter.addDocument(doc);
    indexWriter.close();
    }

    /**
    * 创建索引(推荐)
    *
    * IndexWriter 用来操作(增、删、改)索引库的
    */
    @Test
    public void createIndex2() throws Exception {
    Directory fsDir = FSDirectory.getDirectory(indexpath);
    //1、启动时读取
    Directory ramDir = new RAMDirectory(fsDir);

    // 运行程序时操作ramDir
    IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);

    //数据源
    File file = new File(dspath);
    // 添加 Document
    Document doc = new Document();
    //文件名称
    doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
    //检索到的内容
    doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
    //文件大小
    doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
    //检索到的文件位置
    doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
    ramIndexWriter.addDocument(doc);
    ramIndexWriter.close();

    //2、退出时保存
    IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);
    fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});

    // 优化操作
    fsIndexWriter.commit();
    fsIndexWriter.optimize();

    fsIndexWriter.close();
    }

    /**
    * 优化操作
    *
    * @throws Exception
    */
    @Test
    public void createIndex3() throws Exception{
    Directory fsDir = FSDirectory.getDirectory(indexpath);
    IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, MaxFieldLength.LIMITED);

    fsIndexWriter.optimize();
    fsIndexWriter.close();
    }

    /**
    * 搜索
    *
    * IndexSearcher 用来在索引库中进行查询
    */
    @Test
    public void search() throws Exception {
    //请求字段
    //String queryString = "document";
    String queryString = "adddocument";

    // 1,把要搜索的文本解析为 Query
    String[] fields = { "name", "content" };
    QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
    Query query = queryParser.parse(queryString);

    // 2,进行查询,从索引库中查找
    IndexSearcher indexSearcher = new IndexSearcher(indexpath);
    Filter filter = null;
    TopDocs topDocs = indexSearcher.search(query, filter, 10000);
    System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");

    // 3,打印结果
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
    // 文档内部编号
    int index = scoreDoc.doc;
    // 根据编号取出相应的文档
    Document doc = indexSearcher.doc(index);
    System.out.println("------------------------------");
    System.out.println("name = " + doc.get("name"));
    System.out.println("content = " + doc.get("content"));
    System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
    System.out.println("path = " + doc.get("path"));
    }
    }

    /**
    * 读取文件内容
    */
    public static String readFileContent(File file) {
    try {
    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
    StringBuffer content = new StringBuffer();
    for (String line = null; (line = reader.readLine()) != null;) {
    content.append(line).append(" ");
    }
    reader.close();
    return content.toString();
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    }

    }

  • 相关阅读:
    SQL Server 查看正在运行的事务信息的 2 种方法。
    SQL Server 查看正在运行的事务信息的 2 种方法。
    js防抖和限流
    js防抖和限流
    CSS cursor 属性
    CSS cursor 属性
    JS-中使用Math.round(x)保留1位小数点
    I/O系列教材 (一)- Java 的File类,以及常用方法
    异常处理系列教材 (五)- Java 自定义异常
    异常处理系列教材 (四)- java Throwable接口
  • 原文地址:https://www.cnblogs.com/adrianlamo/p/4305777.html
Copyright © 2011-2022 走看看