zoukankan      html  css  js  c++  java
  • 【lucene】一个简单的招聘网站的建立

    1.建立索引库: 核心代码如下

    package com.tabchanj.job.index;
    
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.highlight.Formatter;
    import org.apache.lucene.search.highlight.Fragmenter;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.Scorer;
    import org.apache.lucene.search.highlight.SimpleFragmenter;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    
    import com.tabchanj.job.domain.JobApply;
    import com.tabchanj.job.util.LuceneUtils;
    
    @SuppressWarnings("deprecation")
    public class JobApplyIndexHelper {
    
        public void createIndex(List<JobApply> jobApplies, boolean rebuild) {
            // 索引写入器,要保证全应用只有一个写入器,多个写入器同时写入会报错
            try {
                IndexWriter indexWriter = LuceneUtils.getIndexWriter();
                if (rebuild) {
                    indexWriter.deleteAll();
                    indexWriter.commit();
                }
                for (JobApply jobApply : jobApplies) {
                    // 将每一条数据包装为document
                    Document document = obj2Doc(jobApply);
                    // 添加到索引库中
                    indexWriter.addDocument(document);
                }
                indexWriter.commit();
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage());
            }
        }
    
        private Document obj2Doc(JobApply jobApply) {
            Document document = new Document();
            document.add(new Field("id", jobApply.getId() + "", Store.YES, Index.NO));
            document.add(new Field("title", jobApply.getTitle(), Store.YES, Index.NO));
            document.add(new Field("content", jobApply.getContent(), Store.NO, Index.ANALYZED));
            document.add(new Field("companyId", jobApply.getCompany().getId() + "", Store.YES, Index.NOT_ANALYZED));
            document.add(new Field("companyName", jobApply.getCompany().getName(), Store.YES, Index.ANALYZED));
            document.add(new Field("cityName", jobApply.getCity().getName(), Store.YES, Index.ANALYZED));
            document.add(new Field("cityId", jobApply.getCity().getId() + "", Store.YES, Index.NOT_ANALYZED));
            document.add(new Field("tradeId", jobApply.getTrade().getId() + "", Store.YES, Index.NOT_ANALYZED));
            document.add(new Field("tradeName", jobApply.getTrade().getName(), Store.YES, Index.ANALYZED));
            document.add(new Field("salaryScope", jobApply.getSalaryLevel().getName(), Store.YES, Index.ANALYZED));
            document.add(new Field("salaryId", jobApply.getSalaryLevel().getId() + "", Store.YES, Index.NOT_ANALYZED));
            return document;
    
        }
    //上面使用的LuceneUtils工具类代码如下:
    package
    com.tabchanj.job.util; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; /** * Lucene通用工具 * * @author tab * */ public class LuceneUtils { // 创建索引库 private static Directory directory = null; // 创建词法分析器 private static Analyzer analyzer = new IKAnalyzer(); // 创建索引写入器 private static IndexWriter indexWriter = null; public static Analyzer getAnalyzer() { return analyzer; } /** * 获取索引库文件路径 * * @return */ public static String getIndexPath() { StringBuilder pathname = new StringBuilder(Global.webAppPath); pathname.append("WEB-INF").append(File.separator).append("index").append(File.separator); return pathname.toString(); } /** * 获取索引库directory对象 * * @return */ public static Directory getDirectory() { try { if (directory == null) { directory = FSDirectory.open(new File(getIndexPath())); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return directory; } /** * 获取indexWriter * * @return */ public static IndexWriter getIndexWriter() { try { if (indexWriter == null) { synchronized (LuceneUtils.class) { if (indexWriter == null) { Version version = Version.LUCENE_4_10_4; indexWriter = new IndexWriter(getDirectory(), new IndexWriterConfig(version, getAnalyzer())); } } } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } return indexWriter; } /** * 关闭indexWriter */ public static void closeIndexWriter() { try { if (indexWriter != null) { indexWriter.close(); } } catch (Exception e) { throw new RuntimeException(e.getMessage()); } } }
    //上面用到的Gloal.webappPath常量如下
    package com.tabchanj.job.util;
    
    public class Global {
        public static String webAppPath="";
    }
    //用于在容器启动时通过下面的监听器给Global.webappPath常量赋值
    package com.tabchanj.job.listener;
    
    import javax.servlet.ServletContextEvent;
    import javax.servlet.ServletContextListener;
    
    import com.tabchanj.job.util.Global;
    import com.tabchanj.job.util.LuceneUtils;
    
    public class WebAppListener implements ServletContextListener {
    
        @Override
        public void contextDestroyed(ServletContextEvent event) {
            LuceneUtils.closeIndexWriter();
        }
    
        /**
         * 在容器启动时,初始化项目根目录常量
         */
        @Override
        public void contextInitialized(ServletContextEvent event) {
            Global.webAppPath = event.getServletContext().getRealPath("/");
        }
    }
    
    
    
     
     

    2.查询索引库: 核心代码如下

    /**
         * 
         * 搜索索引库
         * 
         * @param query
         */
        public List<Map<String, Object>> search(Query query) {
            List<Map<String, Object>> lists = new ArrayList<Map<String, Object>>();
            try {
                // 加载索引库
                IndexReader reader = IndexReader.open(LuceneUtils.getDirectory());
                // 获取加载了Indexreader的索引搜索器
                IndexSearcher searcher = new IndexSearcher(reader);
                // 获取结果对象
                TopDocs docs = searcher.search(query, 1000);
                // 获取结果数组
                ScoreDoc[] socres = docs.scoreDocs;
                // 创建高亮器=========================================================
                Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");//设置关键字的高亮格式
                Scorer fragmentScorer = new QueryScorer(query);
                Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
                Fragmenter fragmenter = new SimpleFragmenter(200);//设置包含关键字的片段长度
                highlighter.setTextFragmenter(fragmenter );
                // ====================================================================
                // 遍历结果数组
                for (ScoreDoc scoreDoc : socres) {
                    // 获取文档编号
                    int docNumber = scoreDoc.doc;
                    // 根据编号搜索文档
                    Document document = searcher.doc(docNumber);
                    // 把文档相应的字段封装到集合的map中
                    Map<String, Object> map = new HashMap<String, Object>();
                    // 在需要的字段使用高亮器
                    String title = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", document.get("title"));
                    System.out.println("title:before="+title);
                    //针对title中没有出先搜索的关键字的title直接输出其本来内容
                    if (title == null) {
                        title = document.get("title");
                    }
                    System.out.println("title:after="+title);
                    map.put("title", title);
                    map.put("city", document.get("cityName"));
                    map.put("company", document.get("companyName"));
                    map.put("trade", document.get("tradeName"));
                    map.put("salaryScope", document.get("salaryScope"));
                    lists.add(map);
                }
    
            } catch (Exception e) {
                e.printStackTrace();
                // throw new RuntimeException(e.getMessage());
            }
            return lists;
        }

    3.关键字高亮: 代码在第二步中

  • 相关阅读:
    阿里云盘上线了,使用中,1T空间,不限速(似乎)!
    在互联网大厂实习,虽然转正了,但编程能力很差,要主动离职吗?
    软件外包商都是黑心的吗?
    招聘信息薪资范围是12-20K,能否要20K的薪资?
    为何没有中文编程?
    为什么银行的IT部门都远离市区?
    想花钱速学互联网行业,大概花两三个月的时间,出来好找工作吗
    Java基础总结,超级全的面试题
    一套基于java的开源车牌识别算法
    Java 最常见的 208 道面试题(第八模块答案)网络
  • 原文地址:https://www.cnblogs.com/tabchanj/p/5756602.html
Copyright © 2011-2022 走看看