zoukankan      html  css  js  c++  java
  • Lucene的学习及使用实验

    实验一下Lucene是怎么使用的。

    参考:http://www.importnew.com/12715.html (例子比较简单)

    http://www.yiibai.com/lucene/lucene_first_application.html (例子比较复杂)

    这里也有一个例子:http://www.tuicool.com/articles/aqIZNnE

    我用的版本比较高,是6.2.1版本,文档查阅:

    http://lucene.apache.org/core/6_2_1/core/index.html

    首先在Intellij里面创建一个Maven项目。名字为lucene-demo。(主要参考 http://www.importnew.com/12715.html )

    其中pom.xml如下:

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.myapp</groupId>
        <artifactId>lucene-demo</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <dependencies>
            <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>6.2.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>6.2.1</version>
            </dependency>
        </dependencies>
    
    
    </project>

    讲了一个package:com.myapp.lucene,里面class LuceneDemo,内容如下:

    package com.myapp.lucene;
    
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopScoreDocCollector;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.store.Directory;
    
    import java.io.IOException;
    
    /**
     * Created by baidu on 16/10/20.
     */
    public class LuceneDemo {
        // 0. Specify the analyzer for tokenizing text.
        // The same analyzer should be used for indexing and searching
        static StandardAnalyzer analyzer;
        static Directory index;
    
        static void prepareDoc() throws IOException{
            // 0. init analyzer
            analyzer = new StandardAnalyzer();
    
            // 1. create index
            index = new RAMDirectory();
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
    
            IndexWriter w = new IndexWriter(index, config);
    
            addDoc(w, "lucence tutorial", "123456");
            addDoc(w, "hi hi hi", "222");
            addDoc(w, "ok LUCENCE", "123");
            w.close();
        }
    
        static void addDoc(IndexWriter w, String text, String more) throws IOException{
            Document doc = new Document();
            doc.add(new TextField("text", text, Field.Store.YES));
            doc.add(new StringField("more", more, Field.Store.YES));
            w.addDocument(doc);
        }
    
        static void search(String str) throws ParseException, IOException {
            // 2. query
            Query q = new QueryParser("text", analyzer).parse(str);
    
            // 3. search
            int listNum = 10;
            IndexReader reader = DirectoryReader.open(index);
            IndexSearcher searcher = new IndexSearcher(reader);
            TopScoreDocCollector collector = TopScoreDocCollector.create(listNum);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
    
            // 4. display
            System.out.printf("Found %d docs.
    ", hits.length);
            for (int i=0; i<hits.length; i++) {
                int docId = hits[i].doc;
                Document doc = searcher.doc(docId);
                System.out.printf("Doc %d: text: %s, more: %s
    ", i+1, doc.get("text"), doc.get("more"));
            }
            reader.close();
    
        }
    
        public static void main(String[] args) {
            try {
                prepareDoc();
                search("Lucence");
            } catch (IOException e) {
                e.printStackTrace();
            } catch (ParseException e) {
                e.printStackTrace();
            }
    
        }
    }

    然后运行,能够成功:

    Found 2 docs.
    Doc 1: text: lucence tutorial, more: 123456
    Doc 2: text: ok LUCENCE, more: 123
    
    Process finished with exit code 0

    因为用的是RAMDirectory,所以应该没有创建实际的目录和文件。

    另外,代码和逻辑中有几点需要注意的地方:

    注意,对于需要分词的内容我们使用TextField,对于像id这样不需要分词的内容我们使用StringField。
    编码过程中,报过好几次错,关于Exception需要wrap或者throws的情况。
    有些API的版本升级了,参数和以前不一样。在实际的代码中根据实际要求有所修改。一般都是简化了。
  • 相关阅读:
    Linux的常用用法
    docker入门实践01
    airflow安装rest api插件发现airflow webserver服务不能启动的解决办法
    27.Spark中transformation的介绍
    1.Cloudera Manager安装
    win10系统不能ping通vmware虚假机解决办法
    在airflow的BashOperator中执行docker容器中的脚本容易忽略的问题
    AirFlow后台运行调度程序
    Airflow怎么删除系统自带的DAG任务
    airflow删除dag不在页面显示
  • 原文地址:https://www.cnblogs.com/charlesblc/p/5980525.html
Copyright © 2011-2022 走看看