zoukankan html css js c++ java

Lucene学习

创建索引的例子：

package com.test;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class TestIndex {
    public static void main(String[] args) {
        TestIndex testIndex = new TestIndex();
        testIndex.creatIndex();

    }

    public void creatIndex() {
        // 这是索引存放的位置
        try {
            String indexPath = "f://index//";
            Directory dir;
            dir = FSDirectory.open(new File(indexPath));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            Document doc = new Document();
            doc.add(new Field("title", "我的Lucene", Field.Store.YES,
                    Field.Index.ANALYZED));
            doc.add(new Field("content", "Lucene 是一个简单的开源包", Field.Store.YES,
                    Field.Index.ANALYZED));
            writer.addDocument(doc);
            writer.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


}

在创建索引的时候

一般目录的话都是用 FSDirectory.open() 的，把索引写在内存的话（RAMDirectory)内存必须足够大，除非那种要求速度特快的项目

分析器的话，建议使用自己编写的分词器，Lucene自己带的分词器毕竟不能满足自己的需要（我自己曾写过一个分词器，前面提到过的）

查询索引的代码：

private void search() {
        try{
        String index = "Lucene";
        String field = "f://index//";
        IndexReader reader = IndexReader
                .open(FSDirectory.open(new File(field)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        QueryParser parser =new QueryParser(Version.LUCENE_31, "content", analyzer);
        Query query = parser.parse(index);
        searcher.search(query, null,100);
        TopDocs results = searcher.search(query, 10); //只取排名前10的搜索结果
        ScoreDoc[] hits = results.scoreDocs;
        Document doc = null;
        for (int i = 0; i < hits.length; i++) {
        doc = searcher.doc(hits[i].doc);
        String contents = doc.get("content");
        }
        searcher.close();
        reader.close();

        }catch(Exception e){
            e.printStackTrace();
        }
    }

更新索引的例子：（更新是删除和添加的结合）

public void updateIndex() {
        try {
            Directory dir = FSDirectory.open(new File("D:/index"));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            Document doc=new Document();
            doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED));
            doc.add(new Field("content","www",Field.Store.YES,Field.Index.ANALYZED));
            writer.updateDocument(new Term("id", "1"),doc);
            writer.close();

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

删除索引的例子：

public void testDelete() {
        try {
            Directory dir = FSDirectory.open(new File("D:/index"));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                    analyzer);
            iwc.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, iwc);
            
            writer.deleteDocuments(new Term("id","1"));
            writer.close();
            
            
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

删除索引的时候既可以用IndexWriter 也可以用IndexReader，

没有进行段合并的时候

IndexWriter 的删除并不是真正的删除，只是把他标记为已删除

可以用MaxDoc 和numDocs 方法来查看

查看全文

相关阅读:
No module named scrapy 成功安装scrapy，却无法import的解决方法
 linux装sqlite3
linux python3 安装scrapy 后提示 -bash: scrapy: 未找到命令
 使用splash爬去JavaScript动态请求的内容
 python之auto鼠标/键盘事件
 python分割txt文件
 [WPF]使用WindowChrome自定义Window Style
[UWP]浅谈按钮设计
 [UWP]用Shape做动画(2):使用与扩展PointAnimation
[UWP]用Shape做动画

原文地址：https://www.cnblogs.com/tomcattd/p/2842254.html