zoukankan      html  css  js  c++  java
  • java基础之----elasticsearch(Java客服端搜索实例)

    概述

    es是使用Java编写的,对Java原生比较支持,下面是使用Java写的一个demo,根据关键字进行搜索,并对搜索结果重排序,对部分字段惊醒高亮处理。

    public class EsIndexService { 
        protected Logger logger = LoggerFactory.getLogger(getClass());
    
        public static final String SCS_WEB_INDEX = "scs_web";
        public static final String QUESTION_TYPE = "question";
        public static final String QUESTION_PAIR_TYPE = "question_pair";
        public static final String DELETED_FLAG = "1";
        public static final String FIELD_CONTENT = "content";
        public static final String FIELD_ANSWER = "answer";
        public static final String FIELD_HRELATION_TYPE = "hRelation.relationType";
        public static final String FIELD_MRELATION_TYPE = "mRelation.relationType";
        public static final String FIELD_QUESTION1_ID = "question1.id";
        public static final String FIELD_QUESTION2_ID = "question2.id";
        public static final String FIELD_HRELATION_START_DATE = "hRelation.startDate";
        public static final String FIELD_HRELATION_EXPIRE_DATE = "hRelation.expireDate";
        public static final String FIELD_SCORE = "score";
        public static final String FIELD_COUNT = "count";
        public static final Float COUNT_FACTOR = 0.1f;
    
        @Autowired
        private QuestionDao questionDao;
    
        @Autowired
        private QuestionPairDao questionPairDao;
    
        @Autowired
        private DataSourceTransactionManager transactionManager;
    
        @Autowired
        private SearchLogDao searchLogDao;
    
        public Page<QuestionSearchResult> searchQuestion(Question question) throws IOException {
    
            //搜索问题为空直接返回
            if (StringUtils.isEmpty(question.getContent())) return emptyResult(question);
          //es原始搜索
            MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(FIELD_CONTENT, question.getContent());
         //开启模糊匹配
            matchQueryBuilder.fuzziness(Fuzziness.AUTO);
            //对原始结果进行重排序,采用的公式为newScore = oldScore + log(1 + 0.1*count),启动count为数据库中一个字段,意思是点击次数,这个优化的目的就是让点击数大的排在前面
            ScoreFunctionBuilder scoreFunctionBuilder = ScoreFunctionBuilders.
                    fieldValueFactorFunction(FIELD_COUNT).factor(COUNT_FACTOR).modifier(FieldValueFactorFunction.Modifier.LOG1P);
            FunctionScoreQueryBuilder queryBuilder = QueryBuilders.functionScoreQuery(matchQueryBuilder, scoreFunctionBuilder);
         //这个就是采用oldScore + log(1 + 0.1*count),控制中间的加号的,默认是multiply,就是相乘
            queryBuilder.boostMode(CombineFunction.SUM);
            return searchQuestion(question, queryBuilder, null, null);
        }
    
        public Page<QuestionSearchResult> topByCount(Question question) throws IOException {
            return searchQuestion(question, null, FIELD_COUNT, SortOrder.DESC);
        }
    
        public Page<QuestionSearchResult> searchQuestion(Question question, QueryBuilder queryBuilder, String orderBy, SortOrder order) throws IOException {
    
            Page<QuestionSearchResult> resultPage = emptyResult(question);
            SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
            searchSourceBuilder.trackScores(true);
            //进行分页的操作,这个是设置每页的大小
            searchSourceBuilder.size(question.getPage().getPageSize());
            if (question.getPage().getPageNo() > 1) {
                //设置从哪里开始搜索
                searchSourceBuilder.from((question.getPage().getPageNo() - 1) * question.getPage().getPageSize());
            }
    
            if (queryBuilder != null) searchSourceBuilder.query(queryBuilder);
            if (StringUtils.isNotEmpty(orderBy)) searchSourceBuilder.sort(orderBy, order);
    
          //高亮
            HighlightBuilder highlightBuilder = new HighlightBuilder();
           //高亮的结果会使用<em>中间是需要高亮的数据</em>括住
            highlightBuilder.field(FIELD_CONTENT).field(FIELD_ANSWER).requireFieldMatch(false);
            searchSourceBuilder.highlighter(highlightBuilder);
            
            SearchRequest searchRequest = new SearchRequest(SCS_WEB_INDEX);
            searchRequest.types(QUESTION_TYPE);
            searchRequest.source(searchSourceBuilder);
    
            SearchResponse response = EsUtil.client.search(searchRequest);
            logger.info("搜索问题[{}],结果:{}条,最高score:{},耗时:{}ms",
                    question.getContent(), response.getHits().totalHits,
                    response.getHits().getMaxScore(), response.getTookInMillis());
            int maxResultCount = Integer.parseInt(DictUtils.getDictValue("max_result_count", "scs_config", "30"));
            resultPage.setCount(response.getHits().totalHits > maxResultCount ? maxResultCount : response.getHits().totalHits);
            if (response.getHits().totalHits < 1) {
                return resultPage;
            }
            BigDecimal maxScore = BigDecimal.valueOf(response.getHits().getMaxScore());
            for (SearchHit hit : response.getHits()) {
                QuestionSearchResult tmp = new QuestionSearchResult();
                tmp.setSimilarQuestion((Question) JsonMapper.fromJsonString(hit.getSourceAsString(), Question.class));
                BigDecimal currentDocScore = BigDecimal.valueOf(hit.getScore());
                tmp.setEsScore(currentDocScore.divide(maxScore, 4, BigDecimal.ROUND_HALF_EVEN));
                logger.debug("搜索结果 score: {}, question: {}, count:{}, id: {}",
                        hit.getScore(), tmp.getSimilarQuestion().getContent(), tmp.getSimilarQuestion().getCount(), hit.getId());
                List<StringHighlightField> highlightList = new ArrayList<>();
                for (String key : hit.getHighlightFields().keySet()) {
                    highlightList.add(StringHighlightField.fromHighlightField(hit.getHighlightFields().get(key)));
                }
                tmp.setHighlightList(highlightList);
                resultPage.getList().add(tmp);
            }
            searchLogDao.insert(question.getContent(), response.getHits().getTotalHits());
            return resultPage;
    
        }
    public void bulkQuestion(List<Question> questions) throws IOException { bulk(questions, QUESTION_TYPE); } public void bulkQuestionPairs(List<QuestionPair> questionPairs) throws IOException { bulk(questionPairs, QUESTION_PAIR_TYPE); } public void bulk(List<?> list, String type) throws IOException { BulkRequest bulkRequest = new BulkRequest(); for (Object obj : list) { DataEntity entity = (DataEntity) obj; if (DELETED_FLAG.equals(entity.getDelFlag())) {
    //删除es中的数据,通过id,es的索引是scs_web,id只是type中的一个字段 DeleteRequest request
    = new DeleteRequest(SCS_WEB_INDEX, type, entity.getId()); bulkRequest.add(request); } else {
    // 设置一个查询的条件,使用id查询,如果查找不到,则添加文档数据
              //这个IndexRequest中有个参数,OpType.INDEX,默认是INDEX,意思就是如果es已经存在这条记录,会强制覆盖,而不是更新 IndexRequest request
    = new IndexRequest(SCS_WEB_INDEX, type, entity.getId()); request.source(JsonMapper.toJsonString(entity), XContentType.JSON); bulkRequest.add(request); } } EsUtil.client.bulk(bulkRequest); } @Scheduled(cron = "0/10 * * * * ?") public void cronIndex() throws IOException { logger.info("定时索引更新开始"); boolean continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setTimeout(10); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<Question> questionList = questionDao.findUnIndexed(Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100"))); if (!questionList.isEmpty()) { this.bulkQuestion(questionList); questionDao.updateIndexFlag(questionList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } continueFlag = true; while (continueFlag) { DefaultTransactionDefinition trans = new DefaultTransactionDefinition(); trans.setPropagationBehavior(DefaultTransactionDefinition.PROPAGATION_REQUIRES_NEW); trans.setTimeout(10); TransactionStatus transStatus = transactionManager.getTransaction(trans); try { List<QuestionPair> questionPairList = questionPairDao.findUnIndexed( Integer.valueOf(DictUtils.getDictValue("import_size", "scs_config", "100")), new BigDecimal(DictUtils.getDictValue("mark_score_min", "scs_config", "0.6"))); if (!questionPairList.isEmpty()) { this.bulkQuestionPairs(questionPairList); questionPairDao.updateIndexFlag(questionPairList); } else { continueFlag = false; } transactionManager.commit(transStatus); } catch (Throwable e) { transactionManager.rollback(transStatus); throw e; } } logger.info("定时索引更新结束"); }

     参考:https://blog.csdn.net/prestigeding/article/details/83351064

  • 相关阅读:
    引用赋值的问题
    mysql的笔记
    输入法失败
    eclipse的快捷键
    c++/c在两个文件公用一个变量
    用c++ sttring检测名字是否有空格
    QLineEdit的信号函数
    c++博客转载
    qt-博客
    QT聊天室--重大bug
  • 原文地址:https://www.cnblogs.com/gunduzi/p/12507688.html
Copyright © 2011-2022 走看看