zoukankan      html  css  js  c++  java
  • Elasticsearch拼音和ik分词器的结合应用

    一、创建索引时,自定义拼音分词和ik分词

    PUT /my_index
    {
        "index": {
            "analysis": {
                "analyzer": {
                    "ik_pinyin_analyzer": {  自定义分词name
                        "type": "custom",
                        "tokenizer": "ik_smart",
                        "filter": ["my_pinyin", "word_delimiter"]
                    },
                    "pinyin_analyzer": {
                        "type": "custom",
                        "tokenizer": "ik_max_word",
                        "filter": ["my_pinyin", "word_delimiter"]
                    }
                },
                "filter": {
                    "my_pinyin": {
                        "type" : "pinyin",
                        "keep_separate_first_letter" : false, 启用该选项时,将保留第一个字母分开,例如:刘德华ldh,默认:false,注意:查询结果也许是太模糊,由于长期过频
                        "keep_full_pinyin" : true,  当启用该选项,例如:刘德华> [ liudehua],默认值:true
                        "keep_original" : true, 启用此选项时,也将保留原始输入,默认值:false
                        "limit_first_letter_length" : 16, 设置first_letter结果的最大长度,默认值:16
    "lowercase" : true, 小写非中文字母,默认值:true
    "remove_duplicated_term" : true 启用此选项后,将删除重复的术语以保存索引,例如:de的de,default:false,注意:位置相关的查询可能会受到影响
    }
    }
    }
    }
    }

    二、创建mapping时,设置字段分词(注:相同索引下建不同的type时,相同字段名属性必须设一样)

    POST /my_index/user/_mapping
    {
        "user": {
            "properties": {
              "id":{
                "type":"integer"
              },
                "userName": {
                  "type": "text",
                  "store": "no",
                  "term_vector": "with_positions_offsets",
                  "analyzer": "ik_pinyin_analyzer",   自定义分词器name
                  "boost": 10,
                  "fielddata" : true,
                  "fields": {
                        "raw": {
                            "type": "keyword"    设置keyword时,对该字段不进行分析
                        }
                    }
                },
                "reason":{
                  "type": "text",
                  "store": "no",  字段store为true,这意味着这个field的数据将会被单独存储。这时候,如果你要求返回field1(store:yes),es会分辨出field1已经被存储了,因此不会从_source中加载,而是从field1的存储块中加载。
                  "term_vector": "with_positions_offsets",
                  "analyzer": "ik_pinyin_analyzer",
                  "boost": 10
                }
            }
        }
    }

    测试

    PUT /my_index/user/1
    {
      "id":1,
      "userName":"刘德华",
      "reason":"大帅哥"
    }
    
    PUT /my_index/user/2
    {
      "id":2,
      "userName":"刘德华",
      "reason":"中华人民"
    }

    不分词查询

    GET /my_index/user/_search
    {
      "query": {
        "match": {
          "userName.raw": "刘德华"
        }
      }
    }
    
    
    {
      "took": 0,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 2,
        "max_score": 0.2876821,
        "hits": [
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "2",
            "_score": 0.2876821,
            "_source": {
              "id": 2,
              "userName": "刘德华",
              "reason": "中华人民"
            }
          },
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "1",
            "_score": 0.2876821,
            "_source": {
              "id": 1,
              "userName": "刘德华",
              "reason": "大帅哥"
            }
          }
        ]
      }
    }

    分词查询

    GET /my_index/user/_search
    {
      "query": {
        "match": {
          "userName": "刘"
        }
      }
    }
    
    {
      "took": 0,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 2,
        "max_score": 0.31331712,
        "hits": [
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "2",
            "_score": 0.31331712,
            "_source": {
              "id": 2,
              "userName": "刘德华",
              "reason": "中华人民"
            }
          },
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "1",
            "_score": 0.31331712,
            "_source": {
              "id": 1,
              "userName": "刘德华",
              "reason": "大帅哥"
            }
          }
        ]
      }
    }

    拼音分词

    GET /my_index/user/_search
    {
      "query": {
        "match": {
          "reason": "shuai"
        }
      }
    }
    
    
    {
      "took": 0,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1,
        "max_score": 3.4884284,
        "hits": [
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "1",
            "_score": 3.4884284,
            "_source": {
              "id": 1,
              "userName": "刘德华",
              "reason": "大帅哥"
            }
          }
        ]
      }
    }

    分组聚合

    GET /my_index/user/_search
    { 
      "size":2,
      "query": {
        "match": {
          "userName": "liu"
        }
      },
      "aggs": {
        "group_by_meetingType": {
          "terms": {
            "field": "userName.raw"
          }
        }
      }
    }
    
    {
      "took": 1,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 2,
        "max_score": 3.133171,
        "hits": [
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "2",
            "_score": 3.133171,
            "_source": {
              "id": 2,
              "userName": "刘德华",
              "reason": "中华人民"
            }
          },
          {
            "_index": "my_index",
            "_type": "user",
            "_id": "1",
            "_score": 3.133171,
            "_source": {
              "id": 1,
              "userName": "刘德华",
              "reason": "大帅哥"
            }
          }
        ]
      },
      "aggregations": {
        "group_by_meetingType": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "刘德华",
              "doc_count": 2
            }
          ]
        }
      }
    }

    大神们这些都是个人理解哪里有一样的想法或建议欢迎评论!!!!!!!

    如果一个人没有梦想,和咸鱼有什么区别?
  • 相关阅读:
    JAVA DBUTils和连接池
    JAVA jsp技术
    java cookie学习
    grub引导
    更改网卡顺序及名称
    ambari 警告信息
    radhat7.2 救援模式
    radhat7.2 系统引导修复
    sehll编程入门
    HBase安装部署
  • 原文地址:https://www.cnblogs.com/dashuaiguo/p/9884064.html
Copyright © 2011-2022 走看看