zoukankan html css js c++ java

Lucene學習日志

1.概念

从全文数据中进行检索就叫全文检索(全文搜索)。是基于文本数据的搜索。

l结构化数据：指具有“固定格式”或“有限长度”的数据，如数据库，元数据等;

l非结构化数据：指不定长或无固定格式的数据，如邮件，word文档等;

l半结构化数据，如XML，HTML等，当根据需要可按结构化数据来处理，也可抽取出纯文本按非结构化数据来处理。

2.特點

相关度最高的排在最前面，官网中相关的网页排在最前面；

对摘要进行了截取；

关键词的高亮。

只关注文本,不考虑语义。

3.使用場景

--替换数据库模糊查询，提高查询速度

--全文索引是搜索引擎的基础

--只对“指定领域”的网站进行索引与搜索

4.核心

索引创建，索引搜索

5.入門

Apache Lucene是一个用Java写的高性能、可伸缩的全文检索引擎工具包，它可以方便的嵌入到各种应用（java应用）中实现针对应用的全文索引/检索功能。Lucene的目标是为各种中小型应用程序加入全文检索功能。

ElasticSearch 全文检索服务器，底层还是lucene，后面会讲

Lucene的核心作者：Doug Cutting是一位资深全文索引/检索专家。

--hellworld

　　Lucene的索引库和数据库一样，都提供相应的API来便捷操作。

　　Lucene中的索引维护使用IndexWriter，由这个类提供添删改相关的操作；索引的搜索则是使用IndexSearcher进行索引的搜索。HelloWorld代码如下,导入三个jar包：lucene-analyzers-common-5.5.0.jar，lucene-core-5.5.0.jar，lucene-queryparser-5.5.0.jar

6.項目實戰（ssm）

　　修改比较少，查询比较多，大量字符串的检索--lucene

　　步驟：　

　　1、前台，在页面中添加一个“重建索引”按钮。（以一个http请求代替）

　　2、前台，点击“重建索引”后，调用后台controller方法。

　　3、后台，查询需要创建索引的对象

　　4、后台，遍历这些对象，封装成Document对象

　　5、后台，添加索引

　　　1导包

<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>${lucene.version}</version>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>${lucene.version}</version>
</dependency>
<dependency>
    <groupId>cn.itsource</groupId>
    <artifactId>IKAnalyzer</artifactId>
    <version>2012.V5</version>
</dependency>
2.抽工具类

public enum  LuceneUtils {

    INSTANCE;

    private LuceneUtils(){}
    private static String indexPath;
    public Analyzer analyzer = new IKAnalyzer();
    //静态库为静态成员变量初始化值
    static{
        try {
            Properties properties = new Properties();
            properties.load(LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties"));
            indexPath = properties.getProperty("lucenen.indexpath");
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    /**
     * 获取IndexWriter
     * @return
     * @throws IOException
     */
    public IndexWriter getIndexWriter() throws IOException {
        //获取IndexWriter
        Directory directory = getDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(directory,config);
        return indexWriter;
    }

    /**
     * 获取IndexReader
     * @return
     * @throws IOException
     */
    public IndexReader getIndexReader() throws IOException {
        Directory directory = getDirectory();
        IndexReader reader = DirectoryReader.open(directory);
        return reader;
    }

    /**
     * 获取Directory
     * @return
     * @throws IOException
     */
    private Directory getDirectory() throws IOException {
        Path path = Paths.get(indexPath);
        return FSDirectory.open(path);
    }

}
3.接口

public interface IDocIndexHelper {

    /**
     * 更新索引
     * @param docs
     */
    void updateIndex(List<Doc> docs) throws Exception;

    /**
     * 查询
     * @param query
     * @return
     */
    PageList<Doc> searchIndex(BaseQuery query) throws Exception ;
}

4.实现类

public class DocIndexHelperImpl implements IDocIndexHelper {


    /**
     * 更新索引
     * @param docs
     */
    @Override
    public void updateIndex(List<Doc> docs) throws Exception {
        IndexWriter indexWriter = LuceneUtils.INSTANCE.getIndexWriter();
        //先删除
        indexWriter.deleteAll();
        //再添加
        for (Doc doc : docs) {
            Document document = parseDoc(doc);
            indexWriter.addDocument(document);
        }
        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * 索引搜索
     * @param query
     * @return
     */
    @Override
    public PageList<Doc> searchIndex(BaseQuery query) throws Exception {
        PageList<Doc> pageList = new PageList<>();

        //获取IndexReader
        IndexReader indexReader = LuceneUtils.INSTANCE.getIndexReader();
        //创建查询对象Query
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        builder.add(new TermQuery(new Term("recordtypenumber",query.getKeyword())), BooleanClause.Occur.SHOULD);
        builder.add(new QueryParser("recordtitle",LuceneUtils.INSTANCE.analyzer).parse(query.getKeyword()), BooleanClause.Occur.SHOULD);
        //创建IndexSearch
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //执行查询
        int n = query.getPage()*query.getRows();
        TopDocs topDocs = indexSearcher.search(builder.build(), n);
        //遍历结果,封装到Doc
        Integer total = topDocs.totalHits;
        pageList.setTotal(total.longValue());

        //拿到检索的结果
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        int begin = n-query.getRows();//有数学的逻辑思维
        for(int i=begin;i<scoreDocs.length;i++){
            int docID = scoreDocs[i].doc;
            Document document = indexSearcher.doc(docID);
            Doc doc = parseDocument(document);
            pageList.getRows().add(doc);
        }
        return pageList;
    }

    /**
     * 类型转换  Document转Doc
     * @param document
     * @return
     */
    private Doc parseDocument(Document document) {
        Doc doc = new Doc();
        doc.setId(Long.parseLong(document.get("id")));
        doc.setRecordtypenumber(document.get("recordtypenumber"));
        doc.setRecordtypeId(StringUtils.isEmpty(document.get("recordtypeId"))?null:Long.parseLong(document.get("recordtypeId")));
        doc.setRecordstoragesiteId(StringUtils.isEmpty(document.get("recordstoragesiteId"))?null:Long.parseLong(document.get("recordstoragesiteId")));
        doc.setRecordtitle(document.get("recordtitle"))；
        return doc;
    }

    /**
     * 类型转换
     * @param doc
     * @return
     */
    private Document parseDoc(Doc doc) {

        //这里为了节省时间，将所有的字段都存入到了所以库并进行了分词+索引


        //思考：那些Field需要在前台展示，哪些Field需要作为查询条件
        //需要展示的则要存储，需要查询的则需要分词+索引
        Document document = new Document();
        Class clazz = doc.getClass();
        java.lang.reflect.Field[] fields = clazz.getDeclaredFields();
        try {
            for (java.lang.reflect.Field field : fields) {
                field.setAccessible(true);
                String name = field.getName();
                String value = field.get(doc)!=null?field.get(doc).toString():"";
                document.add(new TextField(name,value, Field.Store.YES));
            }
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
        return document;
    }


}
--controller

/**
 * 更新索引
 * @return
 */
@RequestMapping(value = "/updateIndex",method = RequestMethod.GET)
@ResponseBody
public AjaxResult updateIndex(){
    try {
        docService.updateIndex();
        return new AjaxResult();
    } catch (Exception e) {
        e.printStackTrace();
        return new AjaxResult("更新失败!"+e.getMessage());
    }
}

--service

public interface IDocService extends IBaseService<Doc> {

    void updateIndex() throws Exception ;
}
--service实现类

/**
 * 更新索引
 */
@Override
public void updateIndex() throws Exception {
    //查询所有的数据
    List<Doc> docs = docMapper.selectAll();
    //更新到索引库
    docIndexHelper.updateIndex(docs);
}
--mapper中自己写sql

查看全文

相关阅读:
案例分析：从一则笑话分析需求的陷阱
 2019寒假培训第二天
 2019寒假培训第一天
 牛客网国庆集训派对Day6 题目 2018年
 unique STL讲解和模板
 SPFA 模板
 ACM Shenyang Onsite 2016 题目
 牛客网国庆集训派对Day5 题目 2018年
 The North American Invitational Programming Contest 2017 题目
 牛客网国庆集训派对Day4题目 2018年

原文地址：https://www.cnblogs.com/wgyi140724-/p/10575068.html