全文检索lucene6.1的检索方式

zoukankan html css js c++ java

全文检索lucene6.1的检索方式
背景：

工作任务完成后，闲暇之计给自己充充电！

Lucene是一个纯java全文检索工具包，采用倒排索引原理。

全文检索：指的是计算机索引程序通过扫描文章的每一个词，对每一个词建立一个索引，并指明该词在文章中出现的次数和位置。

索引的类型分为：1：为一索引、2：主键索引、3：聚集索引。索引就是加快检索表中数据的方法。

搜索：
一：按被搜索的资源类型
1、可以转为文本的
2、多媒体类型的
二：按照搜索方式：
1、不处理语义，只是找出现了指定词语的所有文本。（指对词语进行匹配）
基本概念：
1、使用流程：先建立索引，（索引库）在进行搜索。
2、使用Lucene的数据结构，document、field。
建立索引的过程：
1、定义一个语法分词器
2、确定索引存储的位置
3、创建IndexWriter，进行索引的写入
4、内容提取，进行索引文件的写入
5、关闭indexWriter
从索引库中搜索的过程：
1、打开存储位置
2、创建搜索器
3、类似SQL进行查询
4、处理结果
5、关闭DirectoryReader

-----------------------------------------------------------------------------------------------------------------
/**

* @项目名称：lucene

* @类名称：Article

* @类描述：这是一个文章实体类

* @创建人：YangChao

* @创建时间：2016年8月30日下午3:11:38

* @version 1.0.0

*/

public class Article {

    private Integer id;

    private String title;

    private String content;

}

/**

* @项目名称：lucene

* @类名称：DocumentUtils

* @类描述：文章实体类和Document的转换工具

* @创建人：YangChao

* @创建时间：2016年8月31日上午10:15:22

* @version 1.0.0

*/

public class DocumentUtils {

    public static Document article2Document(Article article) {

        Document doc = new Document();

        doc.add(new Field("id", article.getId().toString(), TextField.TYPE_STORED));

        doc.add(new Field("title", article.getTitle(), TextField.TYPE_STORED));

        doc.add(new Field("content", article.getContent(), TextField.TYPE_STORED));

        return doc;

    }



    public static Article document2Ariticle(Document doc) {

        Article article = new Article();

        article.setId(Integer.parseInt(doc.get("id")));

        article.setTitle(doc.get("title"));

        article.setContent(doc.get("content"));

        return article;

    }

}

/**

* @项目名称：lucene

* @类名称：LuceneUtils

* @类描述：获取分词器和索引位置

* @创建人：YangChao

* @创建时间：2016年8月31日上午9:48:06

* @version 1.0.0

*/

public class LuceneUtils {

    private static Logger logger = Logger.getLogger(LuceneUtils.class);

    private static Directory directory;

    private static Analyzer analyzer;

    static {

        try {

            directory = FSDirectory.open(Paths.get("./tmp/testindex"));

            // analyzer = new StandardAnalyzer();

            analyzer = new SmartChineseAnalyzer();

        } catch (Exception e) {

            logger.error("LuceneUtils error!", e);

        }

    }



    public static Directory getDirectory() {

        return directory;

    }



    public static Analyzer getAnalyzer() {

        return analyzer;

    }



    public static void closeIndexWriter(IndexWriter indexWriter) {

        if (indexWriter != null) {

            try {

                indexWriter.close();

            } catch (Exception e2) {

                logger.error("indexWriter.close error", e2);

            }

        }

    }



}

**

* @项目名称：lucene

* @类名称：QueryResult

* @类描述：结果集

* @创建人：YangChao

* @创建时间：2016年8月31日下午4:56:24

* @version 1.0.0

*/

public class QueryResult {

    private int count;

    private List list;



    public QueryResult() {

        super();

    }



    public QueryResult(int count, List list) {

        super();

        this.count = count;

        this.list = list;

    }

}

/**

* @项目名称：lucene

* @类名称：IndexDao

* @类描述：

* @创建人：YangChao

* @创建时间：2016年8月31日上午10:12:05

* @version 1.0.0

*/

public class IndexDao {

    private static Logger logger = Logger.getLogger(IndexDao.class);



    public void save(Article article) {

        Document doc = DocumentUtils.article2Document(article);

        IndexWriter indexWriter = null;

        try {

            IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());

            indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);

            indexWriter.addDocument(doc);

        } catch (Exception e) {

            logger.error("IndexDao.save error", e);

        } finally {

            LuceneUtils.closeIndexWriter(indexWriter);

        }

    }



    public void delete(String id) {

        IndexWriter indexWriter = null;

        try {

            Term term = new Term("id", id);

            IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());

            indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);

            indexWriter.deleteDocuments(term);// 删除含有指定term的所有文档

        } catch (Exception e) {

            logger.error("IndexDao.save error", e);

        } finally {

            LuceneUtils.closeIndexWriter(indexWriter);

        }

    }



    public void update(Article article) {

        Document doc = DocumentUtils.article2Document(article);

        IndexWriter indexWriter = null;

        try {

            Term term = new Term("id", article.getId().toString());

            IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());

            indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);

            indexWriter.updateDocument(term, doc);// 先删除，后创建。

        } catch (Exception e) {

            logger.error("IndexDao.save error", e);

        } finally {

            LuceneUtils.closeIndexWriter(indexWriter);

        }

    }



    public QueryResult search(String queryString, int firstResult, int maxResult) {

        List<Article> list = new ArrayList<Article>();

        try {

            DirectoryReader ireader = DirectoryReader.open(LuceneUtils.getDirectory());

            // 2、第二步，创建搜索器

            IndexSearcher isearcher = new IndexSearcher(ireader);



            // 3、第三步，类似SQL，进行关键字查询

            String[] fields = { "title", "content" };

            QueryParser parser = new MultiFieldQueryParser(fields, LuceneUtils.getAnalyzer());

            Query query = parser.parse("检索");



            TopDocs topDocs = isearcher.search(query, firstResult + maxResult);

            int count = topDocs.totalHits;// 总记录数

            System.out.println("总记录数为：" + topDocs.totalHits);// 总记录数

            ScoreDoc[] hits = topDocs.scoreDocs;// 第二个参数，指定最多返回前n条结果



            // 高亮

            Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");

            Scorer source = new QueryScorer(query);

            Highlighter highlighter = new Highlighter(formatter, source);



            // 摘要

//          Fragmenter fragmenter = new SimpleFragmenter(5);

//          highlighter.setTextFragmenter(fragmenter);



            // 处理结果

            int endIndex = Math.min(firstResult + maxResult, hits.length);

            for (int i = firstResult; i < endIndex; i++) {

                Document hitDoc = isearcher.doc(hits[i].doc);

                Article article = DocumentUtils.document2Ariticle(hitDoc);

                //

                String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "content", hitDoc.get("content"));

                if (text != null) {

                    article.setContent(text);

                }

                list.add(article);

            }

            ireader.close();

            return new QueryResult(count, list);

        } catch (Exception e) {

            logger.error("IndexDao.search error", e);

        }

        return null;

    }

}

lucence详细学习地址:http://www.cnblogs.com/zhuxiaojie/p/5277219.html
查看全文

相关阅读:
泛型
 内部类及匿名内部类
 BigDecimal
JodaTime简介
 Java中IO流
 Spring的ApplicationEvent的使用
 swagger文档使用(springboot项目)
http连接过程遇到的各种性能瓶颈
 http网络连接过程
 python中的TypeError: 'NavigableString' object is not callable错误

原文地址：https://www.cnblogs.com/poilk/p/6600186.html

全文检索lucene6.1的检索方式

背景：