zoukankan      html  css  js  c++  java
  • 初试ElasticSearch做菜谱搜索 整理思路

    1.docker部署elasticSearch集群及kibana服务

      借鉴https://blog.csdn.net/ctwy291314/article/details/111313419这位博主的部署方式。

    2.学习es的DSL语法

      推荐【慕课】ElasticSearch+Spark 构建高匹配度搜索服务+千人千面推荐系统

    3.logstash的logstash-input-jdbc插件对数据初始化全量索引构建

    4.阿里canal中间件完成准实时增量索引构建

    5.业务功能开发(中文IK分词器插件安装、定制化分词、同义词扩展、相关性重塑)

    6.总结一些东西

    GET cookbook/_search
    {
      "explain": true,
      "query": {
        "function_score": {
          "query": {
            "bool": {
              "must": [
                {
                  "multi_match": {
                    "query": "国庆佳节",
                    "fields": [
                      "name^10",   #权重
                      "introduction",
                      "description",
                      "materials",
                      "seasons",
                      "categories",
                      "platforms",
                      "themes",
                      "tags"
                    ],
                    "type": "most_fields"  #还有别的类型
                  } #如果term放到这里也可以过滤但是会有计分
                }
              ],
              "filter": [ #filter不参与计分 标签 状态 等建议放在这里
                {
                  "term": {
                    "verified": {
                      "value": "true"
                    }
                  }
                },
                {
                  "term": {
                    "grounding": {
                      "value": "true"
                    }
                  }
                },
                {
                  "term": {
                    "tags": "家常菜"
                  }
                },
                {
                  "term": {
                    "tags": "夜宵"
                  }
                }
              ]
            }
          },
          "functions": [ #额外自定义计分
            {
              "field_value_factor": {
                "field": "collect_count"
              },
              "weight": 0.00002  #权重
            },
            {
              "field_value_factor": {
                "field": "view_count"
              },
              "weight": 0.00002  #权重
            }
          ],
          "score_mode": "sum",
          "boost_mode": "sum"     #replace  function score 可以替换 query score 
        }
      },
      "sort": [ #sort 非_source下的字段依然有分数 否则不计分
        {
          "_score": {
            "order": "desc"
          }
        }
      ],
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags"
          }
      }
    }
    
    
    app内的排序可以使用sort不计分排序 也可以用boost_mode = replace  ,自定义分数来排序
    
    
    
    #分析索引分词
    GET cookbook/_analyze
    {
      "field": "tags",
      "text": ["创意菜 甜 西餐 甜品 电烤箱"]
    }
    #分析搜索分词
    GET _analyze?pretty
    {"text": ["ROKI"],"analyzer": "ik_max_word"}
    
    
    GET _analyze?pretty
    {"text": ["创意菜 甜 西餐 甜品 电烤箱"],"analyzer": "ik_smart"}

     2020-12-23 增 name字段支持中文拼音搜索

    PUT /cookbook/
    {
      "settings": {
        "number_of_shards": 10,
        "number_of_replicas": 3
      }
    }
    
    POST cookbook/_close
    
    PUT cookbook/_settings
    {
        "settings": {
            "index": {
                "analysis": {
                    "analyzer": {
                        "ik_pinyin_analyzer": {
                            "type": "custom",
                            "tokenizer": "ik_max_word",
                            "filter": ["my_pinyin"]
                        }
                    },
                    "filter": {
                        "my_pinyin": {
                            "type": "pinyin",
                            "keep_separate_first_letter": false,
                            "keep_full_pinyin": true,
                            "keep_original": false,
                            "limit_first_letter_length": 10,
                            "lowercase": true,
                            "remove_duplicated_term": true
                        }
                    }
                }
            }
        }
    }
    
    #字段映射
    PUT cookbook/_mappings
    {
      "dynamic": false,
      "properties": {
        "id": {
          "type": "integer"
        },
        "name": {
          "type": "text",
          "analyzer": "ik_pinyin_analyzer",
          "search_analyzer": "ik_pinyin_analyzer"
        },
        "introduction": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "description": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "collect_count": {
          "type": "integer"
        },
        "view_count": {
          "type": "integer"
        },
        "difficulty": {
          "type": "integer"
        },
        "need_time": {
          "type": "integer"
        },
        "prepare_desc": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "type": {
          "type": "integer"
        },
        "cookbook_type": {
          "type": "integer"
        },
        "recommend": {
          "type": "boolean"
        },
        "verified": {
          "type": "boolean"
        },
        "grounding": {
          "type": "boolean"
        },
        "allow_distribution": {
          "type": "boolean"
        },
        "tags": {
          "type": "text",
          "analyzer": "whitespace",
          "fielddata": true
        },
        "materials": {
          "type": "text",
          "analyzer": "ik_smart",
          "search_analyzer": "ik_smart"
        },
        "seasons": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "category_tags": {
          "type": "text",
          "analyzer": "whitespace",
          "fielddata": true
        },
        "categories": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "platforms": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "themes": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "pub_time": {
          "type": "date"
        }
      }
    }
    
    #打开索引
    POST cookbook/_open
    

     2020-12-24 记elasticsearch-rest-client 下的httpclient jar包冲突。查看7.9.3下引用的是 

    直接覆盖同版本的引用

     

    2020-12-28

    Ik分词 同时支持 拼音和同义词,可以根据自定义filter 多层嵌套

    # 先定义同义词分词器
    PUT cookbook/_settings
    {
      "settings": {
        "index": {
          "analysis": {
            "analyzer": {
              "ik_synonym_pinyin_max_word": {
                "type": "custom",
                "tokenizer": "ik_max_word",
                "filter": [
                  "my_synonym_filter",
                  "my_pinyin_filter"
                ]
              },
              "ik_synonym_pinyin_smart": {
                "type": "custom",
                "tokenizer": "ik_smart",
                "filter": [
                  "my_synonym_filter",
                  "my_pinyin_filter"
                ]
              }
            },
            "filter": {
              "my_synonym_filter": {
                "type": "synonym",
                "synonyms_path": "analysis-ik/synonyms.txt"
              },
              "my_pinyin_filter": {
                "type": "pinyin",
                "keep_separate_first_letter": false,
                "keep_full_pinyin": true,
                "keep_original": false,
                "limit_first_letter_length": 10,
                "lowercase": true,
                "remove_duplicated_term": true
              }
            }
          }
        }
      }
    }
    

      

  • 相关阅读:
    嵌入式Linux系统移植(二)——交叉编译工具集
    嵌入式linux系统移植(一)
    C语言常用关键语法精华总结
    ARM汇编常用指令
    嵌入式Linux系统移植——uboot常用命令
    VHDL的参数写在一个vhd文件里
    [PAT] 1077 Kuchiguse (20 分)Java
    [PAT] 1073 Scientific Notation (20 分)Java
    [PAT] 1069 The Black Hole of Numbers (20 分)Java
    [PAT] 1065 A+B and C (64bit) (20 分)Java
  • 原文地址:https://www.cnblogs.com/xuetieqi/p/14168694.html
Copyright © 2011-2022 走看看