zoukankan html css js c++ java

Lucene 快速入门

Lucene大大简化了在应用中集成全文搜索的功能。但实际上Lucene十分简单，我可以在五分钟之内向你展示如何使用Lucene。

1. 建立索引

为了简单起见，我们下面为一些字符串创建内存索引：

StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

Directory index = new RAMDirectory();

IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);

IndexWriter w = new IndexWriter(index, config);

addDoc(w, "Lucene in Action", "193398817");

addDoc(w, "Lucene for Dummies", "55320055Z");

addDoc(w, "Managing Gigabytes", "55063554A");

addDoc(w, "The Art of Computer Science", "9900333X");

w.close();

addDoc()方法把文档（译者注：这里的文档是Lucene中的Document类的实例）添加到索引中。

private static void addDoc(IndexWriter w, String title, String isbn) throws IOException {

  Document doc = new Document();

  doc.add(new TextField("title", title, Field.Store.YES));

  doc.add(new StringField("isbn", isbn, Field.Store.YES));

  w.addDocument(doc);

}

注意，对于需要分词的内容我们使用TextField，对于像id这样不需要分词的内容我们使用StringField。

2.搜索请求

我们从标准输入（stdin）中读入搜索请求，然后对它进行解析，最后创建一个Lucene中的Query对象。

String querystr = args.length > 0 ? args[0] : "lucene";

Query q = new QueryParser(Version.LUCENE_40, "title", analyzer).parse(querystr);

3.搜索

我们创建一个Searcher对象并且使用上面创建的Query对象来进行搜索，匹配到的前10个结果封装在TopScoreDocCollector对象里返回。

int hitsPerPage = 10;

IndexReader reader = IndexReader.open(index);

IndexSearcher searcher = new IndexSearcher(reader);

TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

searcher.search(q, collector);

ScoreDoc[] hits = collector.topDocs().scoreDocs;

4.展示

现在我们得到了搜索结果，我们需要想用户展示它。

System.out.println("Found " + hits.length + " hits.");

for(int i=0;i<hits.length;++i) {

    int docId = hits[i].doc;

    Document d = searcher.doc(docId);

    System.out.println((i + 1) + ". " + d.get("isbn") + "	" + d.get("title"));

}

完整代码如下：

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopScoreDocCollector;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.RAMDirectory;

import org.apache.lucene.util.Version;

 

import java.io.IOException;

 

public class HelloLucene {

  public static void main(String[] args) throws IOException, ParseException {

    // 0. Specify the analyzer for tokenizing text.

    //    The same analyzer should be used for indexing and searching

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

 

    // 1. create the index

    Directory index = new RAMDirectory();

 

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);

 

    IndexWriter w = new IndexWriter(index, config);

    addDoc(w, "Lucene in Action", "193398817");

    addDoc(w, "Lucene for Dummies", "55320055Z");

    addDoc(w, "Managing Gigabytes", "55063554A");

    addDoc(w, "The Art of Computer Science", "9900333X");

    w.close();

 

    // 2. query

    String querystr = args.length > 0 ? args[0] : "lucene";

 

    // the "title" arg specifies the default field to use

    // when no field is explicitly specified in the query.

    Query q = new QueryParser(Version.LUCENE_40, "title", analyzer).parse(querystr);

 

    // 3. search

    int hitsPerPage = 10;

    IndexReader reader = DirectoryReader.open(index);

    IndexSearcher searcher = new IndexSearcher(reader);

    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

    searcher.search(q, collector);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;

     

    // 4. display results

    System.out.println("Found " + hits.length + " hits.");

    for(int i=0;i<hits.length;++i) {

      int docId = hits[i].doc;

      Document d = searcher.doc(docId);

      System.out.println((i + 1) + ". " + d.get("isbn") + "	" + d.get("title"));

    }

 

    // reader can only be closed when there

    // is no need to access the documents any more.

    reader.close();

  }

 

  private static void addDoc(IndexWriter w, String title, String isbn) throws IOException {

    Document doc = new Document();

    doc.add(new TextField("title", title, Field.Store.YES));

 

    // use a string field for isbn because we don't want it tokenized

    doc.add(new StringField("isbn", isbn, Field.Store.YES));

    w.addDocument(doc);

  }

}

查看全文

相关阅读:
sqlserver 批量删除所有表语句
 C# 中的委托和事件
 Oracle建立用户
 C# Linq获取两个List或数组的差集交集
 Linux下Redis安装与配置操作说明
 word缩印
 centos7上的postgresql10安装和配置
 numpy技巧
 发票二维码扫描增强_06_持续优化
 发票二维码扫描增强_05_构建目标二维码

原文地址：https://www.cnblogs.com/taich-flute/p/7883123.html