zoukankan html css js c++ java

Lucene的简单用法

1.创建索引

package com.DingYu.Test;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 我们的目标是把索引和文档存入索引库中， 所以首先我们需要创建一个索引库 然后创建一个IndexWrite对象把索引，和文档对象写入，
 * 文档对象中需要自己设置域，索引是通过分词器对域进行分词产生的， 所以我们需要分词器
 * 
 * @author 丁宇
 *
 */
public class LuceneTest {
    /**
     * 创建索引
     * @throws IOException
     */
    @Test
    public void createIndex() throws IOException {
        // 标准分词器
        Analyzer analyzer = new StandardAnalyzer();
        // 创建一个索引
        Directory directory = FSDirectory.open(Paths.get("D:\LuceneIndex"));
        // 创建一个IndexWriteConfig对象
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        // 创建一个IndexWrite对象
        IndexWriter write = new IndexWriter(directory, config);
        // 获得所有文件下的文件
        File[] files = new File("D:\LuceneTest").listFiles();
        for (File file : files) {
            // 创建一个文档对象
            Document document = new Document();
            // 增加一个filepath域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            Field field1 = new StoredField("filepath", file.getPath());
            // 增加一个filename域，会分词，会索引，
            Field field2 = new org.apache.lucene.document.TextField("filename", file.getName(), Store.YES);
            // 增加一个fileContent域，会分词，会索引，只放文件内容的索引
            Field field3 = new org.apache.lucene.document.TextField("filecontent", fileContent(file), Store.NO);
            // 增加一个filesize域，不分析 不索引 但会存储在索引库里 把文件路径放到域中
            Field field4 = new StoredField("filesize", file.length());
            document.add(field1);
            document.add(field2);
            document.add(field3);
            document.add(field4);
            write.addDocument(document);
        }
        write.close();
    }
    /**
     * 获得文件内容
     * @param file
     * @return
     */
    public String fileContent(File file)  {
        byte[] fileContent = new byte[(int) file.length()];
        FileInputStream in = null;
        try {
            in = new FileInputStream(file);
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        }
        try {
            in.read(fileContent);
        } catch (IOException e1) {
            e1.printStackTrace();
        }
        try {
            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        try {
            return new String(fileContent, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return null;
    }
}

2.查询索引

package com.DingYu.Test;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 查询索引
 * 
 * @author 丁宇
 *
 */
public class LuceneTest1 {
    // 获得IndexSearcher对象
    private IndexSearcher getIndexSearcher() throws IOException {
        // 指定索引库
        Directory directory = FSDirectory.open(Paths.get("D:\LuceneIndex"));
        // 打开索引库
        IndexReader reader = DirectoryReader.open(directory);
        // 创建查询的对象
        IndexSearcher searcher = new IndexSearcher(reader);
        return searcher;
    }

    // 输出查到的内容
    private void printIndex(TopDocs docs,IndexSearcher searcher) throws IOException {
        // 获得顶部匹配记录
        ScoreDoc[] scoreDocs = docs.scoreDocs;
        // 获得在索引库中存着的文档的id,利用id去寻找文档
        for (ScoreDoc scoreDoc : scoreDocs) {
            // 获得id
            int doc = scoreDoc.doc;
            // 获得文档
            Document document = searcher.doc(doc);
            // 获得这个文档的域
            System.out.println(document.get("filename"));
            System.out.println(document.get("filecontent"));
            System.out.println(document.get("filepath"));
            System.out.println(document.get("filesize"));
            System.out.println("------------------------");
        }
    }
    /**
     * 精准查询
     * @throws IOException
     */
    @Test
    public void termQueryIndex() throws IOException {

        IndexSearcher searcher = getIndexSearcher();
        // 选择合适的查询方法，这里用最简单的，具体的看下图
        Query query = new TermQuery(new Term("filename", "txt"));
        // 执行查询
        TopDocs docs = searcher.search(query, 2);
        //输出查询内容
        printIndex(docs, searcher);
        // 关闭索引库
        searcher.getIndexReader().close();
    }
    /**
     * 范围查询 五个参数 第一个域名，第二个第三个表示范围，第四个第五个表示是否包含最小值和最大值。
     * @throws IOException
     */
    @Test
    public void numRangeQueryIndex() throws IOException {
        IndexSearcher searcher = getIndexSearcher();
        // 选择合适的查询方法，这里用最简单的，具体的看下图
        Query query = NumericRangeQuery.newLongRange("filesize", 0L, 1000L, true, true);
        // 执行查询
        TopDocs docs = searcher.search(query, 2);
        //输出查询内容
        printIndex(docs, searcher);
        // 关闭索引库
        searcher.getIndexReader().close();
    }
    /**
     * 组合查询
     * @throws IOException 
     */
    @Test
    public void booleanQueryIndex() throws IOException {
        IndexSearcher searcher = getIndexSearcher();
        BooleanQuery booleanQuery = new BooleanQuery();
        Query query = new TermQuery(new Term("filename","txt"));
        Query query2 = NumericRangeQuery.newLongRange("filesize", 0L, 1000L, true, true);
        //表示query是必须的 query2也是必须 相当于并集
        booleanQuery.add(query,Occur.MUST);
        booleanQuery.add(query2, Occur.MUST);
        // 执行查询
        TopDocs docs = searcher.search(query, 2);
        //输出查询内容
        printIndex(docs, searcher);
        // 关闭索引库
        searcher.getIndexReader().close();
    }
}

3.删除索引

package com.DingYu.Test;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 删除索引 一般增删改都是同一个操作对象 这里使用IndexWriter对象
 * 
 * @author 丁宇
 *
 */
public class LuceneTest3 {
    /**
     * 获得IndexWrite对象
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWrite() throws IOException {
        Analyzer analyzer = new StandardAnalyzer();
        Directory directory = FSDirectory.open(Paths.get("D:\LuceneIndex"));
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        return new IndexWriter(directory, config);
    }

    /**
     * 删除所有的索引
     * 
     * @throws IOException
     */
    @Test
    public void deleteAllIndex() throws IOException {
        IndexWriter indexWrite = getIndexWrite();
        indexWrite.deleteAll();
        indexWrite.close();
    }
    /**
     * 根据条件删除索引，同时删除文档
     * @throws IOException
     */
    @Test
    public void deleteSomeIndex() throws IOException {
        IndexWriter indexWrite = getIndexWrite();
        Query query = new TermQuery(new Term("filename","txt"));
        indexWrite.deleteDocuments(query);
        indexWrite.close();        
    }
}

4.修改索引

package com.DingYu.Test;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 索引的修改
 * @author 丁宇
 *
 */
public class LuceneTest2 {
    
    private IndexWriter getIndexWriter() throws IOException {
        Analyzer analyzer = new StandardAnalyzer();
        Directory directory = FSDirectory.open(Paths.get("D:\LuceneIndex"));
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        return new IndexWriter(directory, config);
    }
    
    @Test
    public void updateIndex() throws IOException {
        IndexWriter indexWriter = getIndexWriter();
        Document document = new Document();
        document.add(new StringField("filename", "think in java", Store.YES));
        //update 就是删除一个你指定的 创建一个你想要的 。
        indexWriter.updateDocument(new Term("filecontent","txt"), document);
        indexWriter.close();
    }
}

查看全文

相关阅读:
友好城市, 美团笔试题
 字符串计数，美团笔试题
 公交车，美团笔试题
 交错序列，美团笔试题
 题目列表，美团笔试题，字符串数组比较
 图的遍历，美团笔试题
 最长全1串，美团笔试题
 外卖满减, 美团笔试题
 种花，美团笔试题
 考试策略，美团笔试题

原文地址：https://www.cnblogs.com/dddyyy/p/9842760.html