zoukankan      html  css  js  c++  java
  • Lucene学习

    创建索引的例子:

    package com.test;
    
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class TestIndex {
        public static void main(String[] args) {
            TestIndex testIndex = new TestIndex();
            testIndex.creatIndex();
    
        }
    
        public void creatIndex() {
            // 这是索引存放的位置
            try {
                String indexPath = "f://index//";
                Directory dir;
                dir = FSDirectory.open(new File(indexPath));
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                        analyzer);
                iwc.setOpenMode(OpenMode.CREATE);
                IndexWriter writer = new IndexWriter(dir, iwc);
                Document doc = new Document();
                doc.add(new Field("title", "我的Lucene", Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.add(new Field("content", "Lucene 是一个简单的开源包", Field.Store.YES,
                        Field.Index.ANALYZED));
                writer.addDocument(doc);
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    
    
    }

    在创建索引的时候

    一般 目录的话都是用 FSDirectory.open() 的,把索引写在内存的话(RAMDirectory)内存必须足够大,除非那种要求速度特快的项目

    分析器的话,建议使用自己编写的分词器,Lucene自己带的分词器毕竟不能满足自己的需要(我自己曾写过一个分词器,前面提到过的)

    查询索引的代码:

    private void search() {
            try{
            String index = "Lucene";
            String field = "f://index//";
            IndexReader reader = IndexReader
                    .open(FSDirectory.open(new File(field)));
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
            QueryParser parser =new QueryParser(Version.LUCENE_31, "content", analyzer);
            Query query = parser.parse(index);
            searcher.search(query, null,100);
            TopDocs results = searcher.search(query, 10); //只取排名前10的搜索结果
            ScoreDoc[] hits = results.scoreDocs;
            Document doc = null;
            for (int i = 0; i < hits.length; i++) {
            doc = searcher.doc(hits[i].doc);
            String contents = doc.get("content");
            }
            searcher.close();
            reader.close();
    
            }catch(Exception e){
                e.printStackTrace();
            }
        }

    更新索引的例子:(更新是删除和添加的结合)

    public void updateIndex() {
            try {
                Directory dir = FSDirectory.open(new File("D:/index"));
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                        analyzer);
                iwc.setOpenMode(OpenMode.CREATE);
                IndexWriter writer = new IndexWriter(dir, iwc);
                Document doc=new Document();
                doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED));
                doc.add(new Field("content","www",Field.Store.YES,Field.Index.ANALYZED));
                writer.updateDocument(new Term("id", "1"),doc);
                writer.close();
    
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    删除索引的例子:

    public void testDelete() {
            try {
                Directory dir = FSDirectory.open(new File("D:/index"));
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31,
                        analyzer);
                iwc.setOpenMode(OpenMode.CREATE);
                IndexWriter writer = new IndexWriter(dir, iwc);
                
                writer.deleteDocuments(new Term("id","1"));
                writer.close();
                
                
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    删除索引的时候既可以用IndexWriter 也可以用IndexReader,

    没有进行段合并的时候

    IndexWriter 的删除并不是真正的删除,只是把他标记为已删除

    可以用MaxDoc 和numDocs 方法来查看

  • 相关阅读:
    静态和伪静态
    数据库优化
    C#数组的排序(正序逆序)
    C# for和 foreach 的数组遍历 比较
    Python识别璇玑图中诗的数量
    Linux环境下配置matplotlib库使用中文绘图
    manjaro配置记录
    ubuntu环境下测试cache大小并校验
    ubuntu 单机配置hadoop
    cachestat 安装文档
  • 原文地址:https://www.cnblogs.com/tomcattd/p/2842254.html
Copyright © 2011-2022 走看看