zoukankan      html  css  js  c++  java
  • lucene案例-blog

      本demo功能:

      1)新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客;

      2)添加lucene索引时使用lucene-analyzers-smartcn中文分词,搜索的结果进行高亮显示。

      demo结构

      pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>2.1.1.RELEASE</version>
            <relativePath /> <!-- lookup parent from repository -->
        </parent>
        <groupId>com.oy</groupId>
        <artifactId>blog</artifactId>
        <version>1.0.0</version>
        <packaging>jar</packaging>
        <name>blog-demo</name>
        <description>Demo project for Spring Boot</description>
    
        <properties>
            <java.version>1.8</java.version>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-web</artifactId>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-tomcat</artifactId>
                <scope>provided</scope>
            </dependency>
    
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-thymeleaf</artifactId>
            </dependency>
    
            <dependency>
                <groupId>org.mybatis.spring.boot</groupId>
                <artifactId>mybatis-spring-boot-starter</artifactId>
                <version>1.3.2</version>
            </dependency>
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.36</version>
            </dependency>
    
            <dependency>
                <groupId>commons-lang</groupId>
                <artifactId>commons-lang</artifactId>
                <version>2.5</version>
            </dependency>
    
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.75</version>
            </dependency>
    
            <!-- 百度编辑器ueditor -->
            <dependency>
                <groupId>commons-fileupload</groupId>
                <artifactId>commons-fileupload</artifactId>
                <version>1.3.1</version>
            </dependency>
    
            <!-- lucene -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>5.3.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>5.3.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-smartcn</artifactId>
                <version>5.3.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>5.3.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-highlighter</artifactId>
                <version>5.3.1</version>
            </dependency>
    
        </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                </plugin>
            </plugins>
        </build>
    
    </project>

      application.properties

    server.port=80
    server.servlet.context-path=/
    
    logging.level.root=info
    logging.file=d:/logs/boot-demo.log
    
    #datasource
    spring.datasource.driver-class-name=com.mysql.jdbc.Driver
    spring.datasource.url=jdbc:mysql://127.0.0.1:3306/db_blog?useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2B8
    spring.datasource.username=root
    spring.datasource.password=
    spring.datasource.tomcat.min-idle=5
    
    ##################### MyBatis相关配置 [start] #####################
    #MyBatis映射文件
    mybatis.mapper-locations=classpath:com/oy/mapping/*.xml
    #扫描生成实体的别名,需要和注解@Alias联合使用
    mybatis.type-aliases-package=com.oy.entity
    #MyBatis配置文件,当你的配置比较复杂的时候,可 以使用
    #mybatis.config-location=
    #级联延迟加载。true:开启延迟加载
    mybatis.configuration.lazy-loading-enabled=true
    #积极的懒加载。false:按需加载
    mybatis.configuration.aggressive-lazy-loading=false
    ##################### MyBatis相关配置 [end]  ######################
    
    # 博客索引库目录
    indexDir=D:/blogLuceneIndexDir
    # 通过关键字查询博客索引库,对结果分页展示时每页的记录数
    blogLuceneIndexShowRows=10

      sql.txt

    CREATE TABLE `blog` (
      `id` int(11) NOT NULL AUTO_INCREMENT,
      `title` varchar(200) DEFAULT NULL,
      `summary` varchar(400) DEFAULT NULL,
      `releaseDate` datetime DEFAULT NULL,
      `content` text,
      `keyWord` varchar(200) DEFAULT NULL,
      `contentNoTag` text,
      PRIMARY KEY (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

      BlogController:新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客

    package com.oy.controller;
    
    import java.util.Date;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.beans.factory.annotation.Value;
    import org.springframework.stereotype.Controller;
    import org.springframework.ui.Model;
    import org.springframework.web.bind.annotation.GetMapping;
    import org.springframework.web.bind.annotation.PathVariable;
    import org.springframework.web.bind.annotation.PostMapping;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.RequestParam;
    import org.springframework.web.bind.annotation.ResponseBody;
    import org.springframework.web.servlet.config.annotation.ViewControllerRegistry;
    import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
    
    import com.oy.entity.Blog;
    import com.oy.lucene.BlogIndex;
    import com.oy.service.BlogService;
    import com.oy.util.StringUtil;
    
    /**
     * @author oy
     * @version 1.0
     * @date 2021年1月24日
     * @time 下午5:04:21
     */
    @Controller
    @RequestMapping("/blog")
    public class BlogController implements WebMvcConfigurer {
    
        @Value("${indexDir}")
        private String indexDir;
        // 通过关键字查询博客索引库,对结果分页展示时每页的记录数
        @Value("${blogLuceneIndexShowRows}")
        private Integer rows;
    
        @Autowired
        private BlogService blogService;
    
        @Override
        public void addViewControllers(ViewControllerRegistry registry) {
            registry.addViewController("/page/blog/save").setViewName("blog/save");
        }
    
        @GetMapping("/list")
        public String list(Model model) {
            Map<String, Object> queryInfo = new HashMap<>();
            model.addAttribute("blogList", blogService.findBlog(queryInfo));
            return "blog/list";
        }
    
        @GetMapping("/preEdit/{id}")
        public String preEdit(@PathVariable Integer id, Model model) {
            Blog blog = blogService.findById(id);
            model.addAttribute("blog", blog);
            return "blog/save";
        }
    
        @PostMapping("/save")
        public void save(Blog blog, Model model) throws Exception {
            BlogIndex blogIndex = new BlogIndex(indexDir);
            
            if (blog.getId() != null) { // 修改
                blogService.update(blog);
                blogIndex.updateIndex(blog); // 更新博客索引
            } else { // 添加
                blog.setReleaseDate(new Date());
                blogService.add(blog);
                blogIndex.addIndex(blog); // 给博客添加索引
            }
        }
    
        @GetMapping("/del/{id}")
        public String del(@PathVariable Integer id, Model model) throws Exception {
            blogService.deleteById(id);
            BlogIndex blogIndex = new BlogIndex(indexDir);
            blogIndex.deleteIndex("" + id);
            return "redirect:/blog/list";
        }
    
        @GetMapping("/{id}")
        @ResponseBody
        public Blog findById(@PathVariable Integer id, Model model) {
            Blog blog = blogService.findById(id);
            return blog;
        }
    
        // ==================================================================
        /**
         * 根据关键字查询相关博客信息
         * 
         * @param q
         *            搜索关键字
         * @param page
         *            当前页
         * @return
         * @throws Exception
         */
        @RequestMapping("/q")
        public String search(@RequestParam(value = "q", required = true) String q,
                @RequestParam(value = "page", required = false) String page, Model model) throws Exception {
            if (StringUtil.isEmpty(q)) {
                return "redirect:/blog/list";
            }
            
            // 前台不传当前页参数,则默认显示第1页
            if (StringUtil.isEmpty(page)) {
                page = "1";
            }
    
            // 根据关键字查询相关博客信息
            BlogIndex blogIndex = new BlogIndex(indexDir);
            List<Blog> blogList = blogIndex.searchBlog(q);
            System.out.println("据关键字查询相关博客信息, blogList:" + blogList);
    
            // 从blogList集合中取对应数据进行分页展示
            int start = (Integer.parseInt(page) - 1) * rows;
            int end = start + 10;
            if (end > blogList.size()) {
                end = blogList.size();
            }
            // subList方法返回索引[start,end)的list子集,包左不包右
            List<Blog> BlogSubList = blogList.subList(start, end);
    
            model.addAttribute("blogList", BlogSubList);
            model.addAttribute("q", q);
            model.addAttribute("resultTotal", blogList.size());
            return "blog/query";
        }
    }

      BlogIndex:操作lucene索引

    package com.oy.lucene;
    
    import java.io.StringReader;
    import java.nio.file.Paths;
    import java.util.LinkedList;
    import java.util.List;
    
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.BooleanClause;
    import org.apache.lucene.search.BooleanQuery;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.highlight.Fragmenter;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    import com.oy.entity.Blog;
    import com.oy.util.DateUtil;
    import com.oy.util.StringUtil;
    
    /**
     * 给博客添加索引
     * 
     * @author oy
     * @version 1.0
     * @date 2018年12月5日
     * @time 下午4:22:55
     */
    public class BlogIndex {
        private String indexDir; // 索引库目录
    
        /**
         * 构造方法
         * 
         * @param indexDir
         *            索引库目录
         * @throws Exception
         */
        public BlogIndex(String indexDir) throws Exception {
            this.indexDir = indexDir;
        }
    
        /**
         * 获取IndexWriter实例
         * 
         * @return
         * @throws Exception
         */
        public IndexWriter getIndexWriter() throws Exception {
            // 索引库目录
            Directory dir = FSDirectory.open(Paths.get(indexDir));
    
            // 使用中文分词器SmartChineseAnalyzer
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(dir, config);
            return writer;
        }
    
        /**
         * 添加索引
         * 
         * @param dataDir
         *            数据源目录
         * @throws Exception
         */
        public void addIndex(Blog blog) throws Exception {
            IndexWriter writer = getIndexWriter();
            Document doc = new Document();
            doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
            doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
            doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"),
                    Field.Store.YES));
            // content实际存储的是contentNoTag,即去除html标签后的内容
            doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
            writer.addDocument(doc);
            writer.close();
        }
    
        /**
         * 删除指定博客的索引
         * 
         * @param blogId
         * @throws Exception
         */
        public void deleteIndex(String blogId) throws Exception {
            IndexWriter writer = getIndexWriter();
            writer.deleteDocuments(new Term("id", blogId));
            writer.forceMergeDeletes(); // 强制删除
            writer.commit();
            writer.close();
        }
    
        /**
         * 更新博客索引
         * 
         * @param blog
         * @throws Exception
         */
        public void updateIndex(Blog blog) throws Exception {
            IndexWriter writer = getIndexWriter();
            Document doc = new Document();
            doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
            doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
            doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"),
                    Field.Store.YES));
            doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
            writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc);
            writer.close();
        }
    
        /**
         * 通过关键字搜索博客
         * 
         * @param queryStr
         *            搜索关键字
         * @return
         * @throws Exception
         */
        public List<Blog> searchBlog(String queryStr) throws Exception {
            // 创建IndexSearch对象
            Directory dir = FSDirectory.open(Paths.get(indexDir));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher indexSearcher = new IndexSearcher(reader);
    
            // 组合查询BooleanQuery
            BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    
            // 中文分词器smartcn
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    
            // 第一个查询条件:查询title
            QueryParser parser = new QueryParser("title", analyzer);
            Query query = parser.parse(queryStr);
    
            // 第二个查询条件:查询content
            QueryParser parser2 = new QueryParser("content", analyzer);
            Query query2 = parser2.parse(queryStr);
    
            booleanQuery.add(query, BooleanClause.Occur.SHOULD);
            booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
    
            // 执行搜索
            TopDocs hits = indexSearcher.search(booleanQuery.build(), 100);
    
            // 对搜索结果进行高亮设置
            QueryScorer scorer = new QueryScorer(query);
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='blue'>", "</font></b>");
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter);
    
            List<Blog> blogList = new LinkedList<Blog>();
            for (ScoreDoc scoreDoc : hits.scoreDocs) {
                Document doc = indexSearcher.doc(scoreDoc.doc);
                Blog blog = new Blog();
                blog.setId(Integer.parseInt(doc.get("id")));
                blog.setReleaseDate(DateUtil.formatString(doc.get("releaseDate"), "yyyy-MM-dd HH:mm:ss"));
    
                // 先获取title文本
                String title = doc.get("title");
    
                // 先获取content文本,并对文本中特殊字符进行转义
                // String content = StringEscapeUtils.escapeHtml(doc.get("content"));
                String content = doc.get("content");
                System.out.println("索引库存储的content:" + content);
    
                // 然后,对title文本中"命中率最高的部分"进行高亮显示
                if (title != null) {
                    TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                    // 获取title文本中"命中率最高的部分"
                    String hTitle = highlighter.getBestFragment(tokenStream, title);
                    if (StringUtil.isEmpty(hTitle)) {
                        // 如果没有命中,将整个title文本设置给blog对象
                        blog.setTitle(title);
                    } else {
                        blog.setTitle(hTitle);
                    }
                }
    
                // 然后,对content文本中"命中率最高的部分"进行高亮显示
                if (content != null) {
                    TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                    // 获取content文本中"命中率最高的部分"
                    String hContent = highlighter.getBestFragment(tokenStream, content);
                    if (StringUtil.isEmpty(hContent)) {
                        // 如果没有命中,将content文本前200个字符设置给blog对象
                        if (content.length() <= 2000) {
                            blog.setContent(content);
                        } else {
                            blog.setContent(content.substring(0, 2000));
                        }
                    } else {
                        blog.setContent(hContent);
                    }
                }
                blogList.add(blog);
            }
    
            return blogList;
        }
    }

      列表

      添加博客

      搜索

    ---

  • 相关阅读:
    svn出现黄色感叹号怎么办
    数据库设计三大范式
    windows server2008R2 64位 配置 mysql-8.0.15-winx64
    sqlquerystress
    锁表操作
    微软专用消息队列msmq的简单使用
    数据库上移和下移
    mvc全局时间输出格式化处理
    webapi jsonp处理
    泛型处理ToEntity
  • 原文地址:https://www.cnblogs.com/xy-ouyang/p/14322676.html
Copyright © 2011-2022 走看看