zoukankan html css js c++ java

lucene 5.2.0学习笔记

package com.bc.cas.manager;

import com.bc.cas.dao.BookDao;

import com.bc.cas.model.entity.Book;

import com.google.common.base.Objects;

import com.google.common.collect.Lists;

import org.apache.log4j.helpers.LogLog;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;

import org.apache.lucene.analysis.util.CharArraySet;

import org.apache.lucene.document.*;

import org.apache.lucene.index.*;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.*;

import org.apache.lucene.store.FSDirectory;

import org.junit.Test;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.stereotype.Repository;

import org.springframework.util.CollectionUtils;

import java.io.IOException;

import java.nio.file.Paths;

import java.util.Arrays;

import java.util.Iterator;

import java.util.List;

/**

* @Author Created by Administrator on 2016/11/29.

* @Version V 1.0.0

* @Desc 索引管理器

@Repository

public class IndexManager {

@Autowired

private BookDao bookDao;

static FSDirectory dir;

static {

try {

dir = FSDirectory.open(Paths.get("d:/lucene/lucene052_index05"));

} catch (IOException e) {

e.printStackTrace();

}

/**

* 创建索引 -common

@Test

public void createIndex(List<Document> docs) throws IOException {

if (docs == null) {

return;

}

// 自定义停用词

List<String> strings = Arrays.asList("的", "在", "了", "呢", "，", "0", "：", ",", "是", "这", "那", "么");

CharArraySet unUsefulWorlds = new CharArraySet(strings, true);

// 加入系统默认停用词

Iterator<Object> itor = SmartChineseAnalyzer.getDefaultStopSet().iterator();

while (itor.hasNext()) unUsefulWorlds.add(itor.next());

//指定了停用词的分词器

Analyzer analyzer = new SmartChineseAnalyzer(unUsefulWorlds);

IndexWriterConfig config = new IndexWriterConfig(analyzer);

IndexWriter writer = new IndexWriter(dir, config);

writer.addDocuments(docs);

writer.commit();

writer.close();

}

/**

* 执行查询 -common

* @param query

* @throws IOException

* @throws ParseException

private static void doQuery(Query query) throws IOException, ParseException {

IndexReader reader = DirectoryReader.open(dir);

IndexSearcher searcher = new IndexSearcher(reader);

//执行query,获取指定条数的顶行记录

TopDocs topDocs = searcher.search(query, 10);

System.out.println("搜索出的总记录数为: " + topDocs.totalHits);

//评分文档集合

ScoreDoc[] docs = topDocs.scoreDocs;

for (ScoreDoc doc : docs) {

//获取文档id

int id = doc.doc;

//根据文档id查询文档对象

Document document = searcher.doc(id);

//打印信息

System.out.println(

Objects.toStringHelper("docuemnt")

.add("文档id", document.get("id"))

.add("文档名称", document.get("name"))

.add("文档图片", document.get("pic"))

.add("文档描述", document.get("description"))

.toString());

}

reader.close();

}

/**

* 测试新建索引

public void testCreateIndex() {

List<Book> bookList = bookDao.findAll();

if (CollectionUtils.isEmpty(bookList)) return;

List<Document> docList = Lists.newArrayList();

Document doc;

for (Book book : bookList) {

doc = new Document();

doc.add(new StoredField("id", book.getId()));

doc.add(new StringField("name", book.getName(), Field.Store.YES));

doc.add(new TextField("pic", book.getPic(), Field.Store.YES));

doc.add(new TextField("description", book.getDescription(), Field.Store.YES));

docList.add(doc);

}

try {

createIndex(docList);

} catch (IOException e) {

LogLog.error(e.getMessage(), e);

}

/**

* 查询

* @throws IOException

@Test

public static void testQuery() throws IOException, ParseException {

QueryParser parser = new QueryParser("description", new SmartChineseAnalyzer());

Query query = parser.parse("description:java AND lucene");

doQuery(query);

}

Field类的子类和说明, 以及用法:

StoredField(FieldName, FieldValue)	重载,支持多种类型	不分词	不索引	示例:
StringField(FieldName, String FieldValue, Store.YES)	字符串类型数据	不分词	索引	示例: 订单号, id, 手机号等
LongField(FieldName, Long FieldValue, Store.YES)	Long型数据	分词	索引	示例: 价格
TextField(FieldName, FieldValue, Store.YES) 或 TextField(FieldName, Reader)	字符串或者流	分词	索引

来自为知笔记(Wiz)

查看全文

相关阅读:
JavaScript学习总结(八)——JavaScript数组
 oracle数据库优化学习笔记
 把连续日期组织起来的算法
 转：andriod的盈利模式分析
 ASP.NET 页生命周期
 .NET垃圾回收机制[引用]
IIS 7.0 的 ASP.NET 应用程序生命周期
 table滑动选择行及从表记录对应js代码
 hdu 3594 Cactus
Java 计算器

原文地址：https://www.cnblogs.com/duenboa/p/6665473.html