zoukankan      html  css  js  c++  java
  • Search Api

    语法 范围
    /_search 集群上所有的索引
    /index1/_search index1
    /index1,index2/_search index1,index2
    /index*/_search 以index开头的索引

    Term :Beautiful Mind 等效于 Beautiful OR Mind。使用括号括起来:(Beautiful Mind)

    Phrase:"Beautiful Mind" 等效于 Beautiful AND Mind 。Phrase查询还要求前后顺序保持一致。使用引号

    一、Url Search 

    在url中使用查询参数

    //查询title字段包含2013的
    GET movies/_search?q=2012&df=title
    {
      "profile": "true"
    }
    //查询title字段包含2013的
    GET movies/_search?q=title:2012&sort=year:desc&from=0&size=10&timeout=1m
    {
      "profile": "true"
    }
    
    //查询所有字段包含2013的
    GET movies/_search?q=2012
    {
      "profile": "true"
    }
    
    //PhraseQuery
    GET movies/_search?q=title:"Beautiful Mind"
    {
      "profile": "true"
    }
    
    //TermQuery。两个Term在一起默认是 OR 的关系
    GET movies/_search?q=title:(Beautiful Mind)
    {
      "profile": "true"
    }
    
    //title 必须包括Beautiful 和 Mind
    GET movies/_search?q=title:(Beautiful AND Mind)
    {
      "profile": "true"
    }
    //title 必须包括Beautiful 和 Mind
    GET movies/_search?q=title:(Beautiful %2BMind)
    {
      "profile": "true"
    }
    
    //title 必须包括Beautiful 不能包括Mind
    GET movies/_search?q=title:(Beautiful NOT Mind)
    {
      "profile": "true"
    }
    
    //查询1980以后的电影
    GET movies/_search?q=year:>=1980
    {
      "profile": "true"
    }
    
    //title包含b开头的
    GET movies/_search?q=title:b*
    {
      "profile": "true"
    }
    
    //模糊匹配&近似匹配
    GET movies/_search?q=title:beautifl~1
    {
      "profile": "true"
    }
    GET movies/_search?q=title:"lord rings"~2
    {
      "profile": "true"
    }

    二、Request Body Search 

    使用elasticsearch提供的,基于json格式的更加完备的DSL

    // 分页查询第一页,每页1条数据
    GET kibana_sample_data_ecommerce/_search
    {
      "from": 0,
      "size": 1,
      "query": {
        "match_all": {}
      }
    }
    
    //根据order_date倒序
    GET kibana_sample_data_ecommerce/_search
    {
      "sort":[{"order_date":"desc"}], 
      "query": {
        "match_all": {}
      }
    }
    
    //只返回order_date字段
    GET kibana_sample_data_ecommerce/_search
    {
      "_source": ["order_date"], 
      "query": {"match_all": {}}
    }
    
    //脚本字段,新增一个new_field字段
    GET kibana_sample_data_ecommerce/_search
    {
      "script_fields": {
        "new_field": {
          "script": {
            "lang": "painless",
            "source": "doc['order_date'].value+'_hello'"
          }
        }
      }, 
      "query": {"match_all": {}}
    }
    
    //查询包含Last或者包含Christmas
    GET movies/_search
    {
      "query": {
        "match": {
          "title": "Last Christmas"
        }
      }
      , "profile": "true"
    }
    
    //查询即包含Last又包含Christmas
    GET movies/_search
    {
      "query": {
        "match": {
          "title": {
            "query": "Christmas Last",
            "operator": "and"
          }
        }
      }
    }
    
    //slop 指定中间忽略匹配的数量。搜索出的结果:One I Love, The
    GET movies/_search
    {
      "query": {
        "match_phrase": {
          "title":{
            "query":"one love",
            "slop": 1
          }
        }
      }
      
    }

    1,Query String&Simple  Query String

    //title包含Homeward和Bound
    GET movies/_search
    {
      "query": {
        "query_string": {
          "default_field": "title",
          "query": "Homeward AND Bound"
        }
      }
    }
    
    //title包含Homeward和Bound 或者 包含Lost和in
    GET movies/_search
    {
      "query": {
        "query_string": {
          "default_field": "title",
          "query": "(Homeward AND Bound) or (Lost and in)"
        }
      }
    }
    
    //title包含Homeward和Bound 
    GET movies/_search
    {
      "query": {
        "simple_query_string": {
          "query": "Homeward Bound",
          "fields": ["title"],
          "default_operator": "and"
        }
      }
    }

    2,term查询

    terms查询是用于结构化数据的查询。全文用match查询。而bool属于一种复合查询。可以结合terms查询和match查询

    GET movies/_search
    {
      "query": {
            "term": {
              "title.keyword": {
                "value": "Homeward Bound: The Incredible Journey"
              }
         }
      }
    }
    //跳过算分,提高性能
    GET movies/_search
    {
      "query": {
        "constant_score": {
          "filter": {
            "term": {
              "title.keyword": {
                "value": "Homeward Bound: The Incredible Journey"
              }
         }}
        }
      }
    }
    GET request_audit_logs/_search
    {
       "query": {
         "bool": {
           "filter": [
             {"term": {
               "url": "http://localhost:18908/api/User/Login"
             }},
             {
               "match":{
                 "request_content":"15607172222"
               }
             },
             {
               "range": {
                 "request_time": {
                   "gte": "2020-01-01 00:00:00"
                 }
               }
             }
           ]
         }
       }
    }

    3,Query&Filtering与多字符多字段查询

    gte:大等于
    lte:小等于
    gt:大于
    lt:小于

    must 必须匹配。贡献算分
    should 选择性匹配。贡献算分
    must_not

    Filter Context

    查询字句,必须不能匹配

    filter

    Filter Context

    必须匹配,但不贡献算分

    GET movies/_search
    {
      "query": {
        "bool": {
          "must": {"term": {"year":"1960"}},
          "filter": {"term":{"title.keyword":"Pollyanna"}},
          "must_not":{"range":{"year":{"lte":1961}}},
          "should": [
            {"term":{"genre.keyword": "Children"}},
            {"term":{"genre.keyword": "Comedy"}}
          ]
        }
      }
    }
    //title中包含The并且不包含Good
    GET movies/_search
    {
      
      "query": {
        "bool": {
          "must": [{"match": {"title": "The"}}],
          "must_not": [{"match": {"title": "Good"}}]
        }
      }
    }
    //将title中包含The的语句排在靠前,包含Grifters排在靠后
    GET movies/_search
    {
      "query": {
        "boosting": {
          "positive": {"match": {
            "title": "The"
          }},
          "negative": {"match": {
            "title": "Grifters"
          }},
          "negative_boost": 0.5
        }
        
      }
    }

    4,单字符串多字段查询:Dis Max Query

    //1,获取最佳匹配语句的评分_score
    //2,将其他匹配语句的评分与tie_breaker相乘
    //3,对以上评分求和并规范
    //tie_breaker是一个介于0-1之间的浮点数。0代表使用最佳匹配;1代表所有语句同等重要
    POST blogs/_search
    {
        "query": {
            "dis_max": {
                "queries": [
                    { "match": { "title": "Quick pets" }},
                    { "match": { "body":  "Quick pets" }}
                ],
                "tie_breaker": 0
            }
        }
    }

    5,单字符串多字段查询:Mult Match

    最佳字段(Best Fields):当字段之间相互竞争,有相互关联。例如title和body这样的字段。评分来自最匹配字段

    多数字段(Most Fields):处理英文内容时:一种常见的手段是,在主字段(English Analyzer),抽取词干,加入同义词,以匹配更多的文档。相同的文本,加入子字段(Standard Analyzer),以提供更加精确的匹配。其他字段作为匹配文档提高相关度的信号。匹配字段越多则越好

    混合字段(Corss Fields):对于某些实体,例如人名、地址、图书信息。需要在多字字段中确定信息,单个字段只能作为整理的一部分。希望在任何这些列出的字段中找到尽可能多的词

    POST blogs/_search
    {
      "query": {
        "multi_match": {
          "type": "best_fields",
          "query": "Quick pets",
          "fields": ["title","body"],
          "tie_breaker": 0.2,
          "minimum_should_match": "20%"
        }
      }
    }
    //英文分词器可以提高算分值,标准分词器可以提高精度
    POST titles/_bulk
    { "index": { "_id": 1 }}
    { "title": "My dog barks" }
    { "index": { "_id": 2 }}
    { "title": "I see a lot of barking dogs on the road " }
    PUT /titles
    {
      "mappings": {
        "properties": {
          "title": {
            "type": "text",
            "analyzer": "english",
            "fields": {"std": {"type": "text","analyzer": "standard"}}
          }
        }
      } 
    }
    GET /titles/_search
    {
       "query": {
            "multi_match": {
                "query":  "barking dogs",
                "type":   "most_fields",
                "fields": [ "title", "title.std" ]
            }
        }
    }
    GET /titles/_search
    {
       "query": {
            "multi_match": {
                "query":  "barking dogs",
                "type":   "cross_fields",
                "operator": "and", 
                "fields": [ "title", "title.std" ]
            }
        }
    }

     6,Search Template与 Index Alias

    //删除搜索模版
    DELETE _scripts/tmdb
    //设置搜索模版
    POST _scripts/tmdb
    {
      "script":{
        "lang": "mustache",
        "source": {
          "_source":["title"],
        "size":20,
         "query":{
           "bool": {
             "must": [
               {"term": {
                  "title.keyword":"{{q}}"
              }}
             ]
           }
         }
        }
      }
      
    }
    //使用搜索模版
    POST movies/_search/template
    {
      "id":"tmdb",
      "params": {
        "q":"Lamerica"
      }
    }
    
    //删除别名
    POST _aliases
    {
      "actions": [
        {
          "remove": {
            "index": "movies",
            "alias": "movies2"
          }
        }
      ]
      
    }
    //设置别名
    POST _aliases
    {
      "actions": [
        {
          "add": {
            "index": "movies",
            "alias": "movies2"
          }
        }
      ]
    }
    //使用别名查询
    GET movies2/_search

     7,Function Score Query优化算分

    //fields算分度 * votes
    POST /blogs/_search
    {
      "query": {
        "function_score": {
          "query": {
            "multi_match": {
              "query":    "popularity",
              "fields": [ "title", "content" ]
            }
          },
          "field_value_factor": {
            "field": "votes"
          }
        }
      }
    }
    
    //log(fields算分度 * votes)
    POST /blogs/_search
    {
      "query": {
        "function_score": {
          "query": {
            "multi_match": {
              "query":    "popularity",
              "fields": [ "title", "content" ]
            }
          },
          "field_value_factor": {
            "field": "votes",
            "modifier": "log1p"
          }
        }
      }
    }

     8,Term Suggester与Phrese Suggester

    missing 如索引中已经存在,就不建议提供

    popular 推荐出现频率更加高的词

    always 无论是否存在,都提供建议

    POST /articles/_search
    {
      "size": 1,
      "query": {
        "match": {
          "body": "lucen rock"
        }
      },
      "suggest": {
        "term": {
          "text": "lucen rock",
          "term": {
            "suggest_mode": "missing",
            "field": "body"
          }
        }
      }
    }

    phrase多增加了几个参数

    max_errors 最多可以拼错的terms数

    confidence 限制返回的结果数

    POST /articles/_search
    {
      "suggest": {
        "my-suggestion": {
          "text": "lucne and elasticsear rock hello world ",
          "phrase": {
            "field": "body",
            "max_errors":2,
            "confidence":0,
            "direct_generator":[{
              "field":"body",
              "suggest_mode":"always"
            }],
            "highlight": {
              "pre_tag": "<em>",
              "post_tag": "</em>"
            }
          }
        }
      }
    }

    9,自动补全与基于上下文的提示

    DELETE articles
    //设置mapper
    PUT articles
    {
      "mappings": {
        "properties": {
          "title_completion":{
            "type": "completion"
          }
        }
      }
    }
    
    POST articles/_bulk
    { "index" : { } }
    { "title_completion": "lucene is very cool"}
    { "index" : { } }
    { "title_completion": "Elasticsearch builds on top of lucene"}
    { "index" : { } }
    { "title_completion": "Elasticsearch rocks"}
    { "index" : { } }
    { "title_completion": "elastic is the company behind ELK stack"}
    { "index" : { } }
    { "title_completion": "Elk stack rocks"}
    { "index" : {} }
    
    
    POST articles/_search?pretty
    {
      "size": 0,
      "suggest": {
        "article-suggester": {
          "prefix": "elk",
          "completion": {
            "field": "title_completion"
          }
        }
      }
    }
    
    GET comments/_search
    DELETE comments
    PUT comments
    //设置mapper,多了contexts
    PUT comments/_mapping
    {
      "properties": {
        "comment_autocomplete":{
          "type": "completion",
          "contexts":[{
            "type":"category",
            "name":"comment_category"
          }]
        }
      }
    }
    
    POST comments/_doc
    {
      "comment":"I love the star war movies",
      "comment_autocomplete":{
        "input":["star wars"],
        "contexts":{
          "comment_category":"movies"
        }
      }
    }
    
    POST comments/_doc
    {
      "comment":"Where can I find a Starbucks",
      "comment_autocomplete":{
        "input":["starbucks"],
        "contexts":{
          "comment_category":"coffee"
        }
      }
    }
    
    
    POST comments/_search
    {
      "suggest": {
        "MY_SUGGESTION": {
          "prefix": "sta",
          "completion":{
            "field":"comment_autocomplete",
            "contexts":{
              "comment_category":"coffee"
            }
          }
        }
      }
    }
    View Code

    10,Search After与Scroll Api解决分页大于10000条数据问题

    //search_after:order_id为740002后面下一条数据
    GET kibana_sample_data_ecommerce/_search
    {
      "size": 1,
      "query": {"match_all": {}},
      "search_after":[740002],
      "sort": [{"order_id":"desc"}]
    }
    
    //先创建快照
    POST kibana_sample_data_ecommerce/_search?scroll=5m
    {
      "size": 1, 
      "query": {"match_all": {}}
    }
    //根据上一个scroll_id查询下一页数据
    POST _search/scroll
    {
      "scroll":"1m",
      "scroll_id":"DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAEuEWVHlEU0NNSFFSd2VQVElQX3Vza2Zfdw=="
    }

    11,使用乐观锁解决并发写入问题

    ①内部版本控制: if_seq_no+if_primary_term

    ②使用外部版本(使用其他数据库作为主要数据存储):version+version_type=external

    PUT products/_doc/1?if_seq_no=1&if_primary_term=1
    {
      "title":"iphone",
      "count":100
    }
    
    PUT products/_doc/1?version=30000&version_type=external
    {
      "title":"iphone",
      "count":100
    }

    三、Mapping

    1,设置Dynamic Mapping

      "true" "false" "strict"
    新增字段是否可保存 yes yes no
    新增字段是否可被搜索 yes no no
    Mapping会不会被更新 yes no no
    PUT user/_mapping
    {
      "dynamic":"true"
    }
    PUT user/_mapping
    {
      "dynamic":"false"
    }
    PUT user/_mapping
    {
      "dynamic":"strict"
    }

    2,定义mapping

    ①index控制字段是否可以被搜索

    ②null_value设置一个默认值"NULL",方便搜索null值字段

    ③text类型和keyword类型区别:text类型会使用默认分词器分词,当然你也可以为他指定特定的分词器。如果定义成keyword类型,那么默认就不会对其进行分词

    //查询user的mapping
    GET user/_mapping
    PUT employee
    {
      "mappings": {
        "properties": {
          "firstName":{
            "type": "text",
            "copy_to": "fullname"
          },
          "lastName":{
            "type": "text",
            "copy_to": "fullname"
          },
          "mobile":{
            "type": "text",
            "index": false
          },
          "age":{
            "type": "integer"
          },
          "cardNo":{
            "type": "keyword",
            "null_value": "NULL"
          }
        }
      }
    }
    
    //添加值
    POST employee/_doc
    {
      "firstName":"zhang",
      "lastName":"san",
      "mobile":"1300000000",
      "age":20,
      "cardNo":null
    }
    //查询cardNo是null的值
    GET employee/_search
    {
      "query": {
        "match": {
          "cardNo": "NULL"
        }
      }
    }
    //搜索报错
    GET employee/_search
    {
      "query": {
        "match": {
          "mobile": "1300000000"
        }
      }
    }

    四、Index Templete与Dynamic Templete

    1,Index Templete

    应用在所有的index上面。当一个索引被创建的时候

    ①应用elasticsearch默认的settings和mappings

    ②应用order数值低的index template中的设定

    ③应用order高的index template中的设定,之前的设定会被覆盖

    ④应用创建索引时,用户指定的settings和mappings,并覆盖之前模版中的设定

    //order数值:控制“merging”的过程。多个模版会merge在一起
    PUT _template/template_default
    {
      "index_patterns": ["*"],
      "order": 0,
      "version": 1,
      "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 1
      }
    }
    
    //创建test开头的索引时,主分片设置1,副本分片设置2,开启数值检测
    PUT _template/template_test
    {
      "index_patterns": ["test*"],
      "order": 1,
      "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 2
      },
      "mappings": {
        "date_detection": false,
        "numeric_detection": true
      }
    }
    
    //指定索引设置template
    PUT testmy
    {
      "settings": {
        "number_of_replicas": 5
      }
    }
    
    //查看template信息
    GET _template/template_default
    GET _template/template_test
    
    //删除
    DELETE testmy
    DELETE _template/template_default
    DELETE _template/template_test
    PUT request_audit_logs_v2
    {
      "mappings": {
        "properties": {
          "key":{
            "type": "keyword"
          },
          "post_type":{
            "type": "keyword"
          },
          "url":{
            "type": "keyword"
          },
          "api":{
            "type": "keyword"
          },
          "request_content":{
            "type": "object"
          },
          "hander":{
            "type": "text",
            "index": false
          },
          "status":{
            "type": "keyword"
          },
          "response_content":{
            "type": "object"
          },
          "request_time":{
            "type": "date",
            "format": "yyyy-MM-dd HH:mm:ss"
          },
          "response_time":{
            "type": "date",
            "format": "yyyy-MM-dd HH:mm:ss"
          },
          "exception":{
            "type": "text"
          },
          "run_time":{
            "type": "float"
          },
          "thread_num":{
            "type": "integer"
          },
          "create_time":{
            "type": "date",
            "format": "yyyy-MM-dd HH:mm:ss"
          },
          "modify_time":{
            "type": "date",
            "format": "yyyy-MM-dd HH:mm:ss"
          },
          "user_name":{
            "type": "keyword"
          }
        },
        "dynamic":"strict"
    
      }
    }

    2,Dynamic Templete

    设置在具体的index上面

    GET myindex/_search?q=full_name:zhang
    //将name.fitst和name.last映射到full_name字段上
    PUT myindex
    {
      "mappings": {
         "dynamic_templates":[
            { 
              "full_name":{
                "path_match":"name.*",
                "path_unmatch":"*.middle",
                "mapping":{
                  "type":"text",
                  "copy_to":"full_name"
                }
              }
            }
           ]
      }
    }
    
    POST myindex/_doc
    {
      "name":{
        "fitst":"zhang",
        "middle":"123",
        "last":"san"
      }
    }

    五、Aggregation

    Bucket Aggregation:一些列满足特定条件的文档集合

    Meric Aggregation:一些数学运算,可以对文档字段进行统计分析

    Pipeline Aggregation:对其他聚合结果进行二次聚合

    Matrix Aggregation:支持对多个字段的操做并提供一个结果矩阵

    //统计去往目的地的天气情况、价格情况
    GET kibana_sample_data_flights/_search
    {
      "size": 0,
      "aggs": {
        "flights_dest": {
          "terms": {
            "field": "DestCountry"
          },
          "aggs": {
            "stats_price": {
              "stats": {
                "field": "AvgTicketPrice"
              }
            },
            "wather":{
              "terms": {
                "field": "DestWeather"
              }
            }
          }
        }
      }
    }
    PUT /employees/
    {
      "mappings" : {
          "properties" : {
            "age" : {
              "type" : "integer"
            },
            "gender" : {
              "type" : "keyword"
            },
            "job" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 50
                }
              }
            },
            "name" : {
              "type" : "keyword"
            },
            "salary" : {
              "type" : "integer"
            }
          }
        }
    }
    
    PUT /employees/_bulk
    { "index" : {  "_id" : "1" } }
    { "name" : "Emma","age":32,"job":"Product Manager","gender":"female","salary":35000 }
    { "index" : {  "_id" : "2" } }
    { "name" : "Underwood","age":41,"job":"Dev Manager","gender":"male","salary": 50000}
    { "index" : {  "_id" : "3" } }
    { "name" : "Tran","age":25,"job":"Web Designer","gender":"male","salary":18000 }
    { "index" : {  "_id" : "4" } }
    { "name" : "Rivera","age":26,"job":"Web Designer","gender":"female","salary": 22000}
    { "index" : {  "_id" : "5" } }
    { "name" : "Rose","age":25,"job":"QA","gender":"female","salary":18000 }
    { "index" : {  "_id" : "6" } }
    { "name" : "Lucy","age":31,"job":"QA","gender":"female","salary": 25000}
    { "index" : {  "_id" : "7" } }
    { "name" : "Byrd","age":27,"job":"QA","gender":"male","salary":20000 }
    { "index" : {  "_id" : "8" } }
    { "name" : "Foster","age":27,"job":"Java Programmer","gender":"male","salary": 20000}
    { "index" : {  "_id" : "9" } }
    { "name" : "Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000 }
    { "index" : {  "_id" : "10" } }
    { "name" : "Bryant","age":20,"job":"Java Programmer","gender":"male","salary": 9000}
    { "index" : {  "_id" : "11" } }
    { "name" : "Jenny","age":36,"job":"Java Programmer","gender":"female","salary":38000 }
    { "index" : {  "_id" : "12" } }
    { "name" : "Mcdonald","age":31,"job":"Java Programmer","gender":"male","salary": 32000}
    { "index" : {  "_id" : "13" } }
    { "name" : "Jonthna","age":30,"job":"Java Programmer","gender":"female","salary":30000 }
    { "index" : {  "_id" : "14" } }
    { "name" : "Marshall","age":32,"job":"Javascript Programmer","gender":"male","salary": 25000}
    { "index" : {  "_id" : "15" } }
    { "name" : "King","age":33,"job":"Java Programmer","gender":"male","salary":28000 }
    { "index" : {  "_id" : "16" } }
    { "name" : "Mccarthy","age":21,"job":"Javascript Programmer","gender":"male","salary": 16000}
    { "index" : {  "_id" : "17" } }
    { "name" : "Goodwin","age":25,"job":"Javascript Programmer","gender":"male","salary": 16000}
    { "index" : {  "_id" : "18" } }
    { "name" : "Catherine","age":29,"job":"Javascript Programmer","gender":"female","salary": 20000}
    { "index" : {  "_id" : "19" } }
    { "name" : "Boone","age":30,"job":"DBA","gender":"male","salary": 30000}
    { "index" : {  "_id" : "20" } }
    { "name" : "Kathy","age":29,"job":"DBA","gender":"female","salary": 20000}
    
    
    # Metric 聚合,找到最低的工资
    GET employees/_search
    {
      "size": 0,
      "aggs": {
        "min_salary": {
          "min": {
            "field": "salary"
          }
        }
      }
    }
    
    # Metric 聚合,找到最高的工资
    GET employees/_search
    {
      "size": 0,
      "aggs": {
        "max_salary": {
          "max": {
            "field": "salary"
          }
        }
      }
    }
    
    
    # 多个 Metric 聚合,找到最低最高和平均工资
    GET employees/_search
    {
      "size": 0,
      "aggs": {
        "max_salary": {
          "max": {
            "field": "salary"
          }
        },
        "min_salary": {
          "min": {
            "field": "salary"
          }
        },
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    }
    
    # 一个聚合,输出多值
    GET employees/_search
    {
      "size": 0,
      "aggs": {
        "stats_salary": {
          "stats": {
            "field": "salary"
          }
        }
      }
    }
    
    # 对keword 进行聚合
    GET employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword"
          }
        }
      }
    }
    
    # 对 Text 字段进行 terms 聚合查询
    #对 Text 字段打开 fielddata,支持terms aggregation
    PUT employees/_mapping
    {
      "properties" : {
        "job":{
           "type":     "text",
           "fielddata": true
        }
      }
    }
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job"
          }
        }
      }
    }
    
    # cardinality 相当于distinct count
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "cardinate": {
          "cardinality": {
            "field": "job.keyword"
          }
        }
      }
    }
    
    
    # 对 性别的 keyword 进行聚合
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "gender": {
          "terms": {
            "field": "gender"
          }
        }
      }
    }
    
    #指定 bucket 的 size
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "ages_5": {
          "terms": {
            "field":"age",
            "size":3
          }
        }
      }
    }
    
    # 指定size,不同工种中,年纪最大的3个员工的具体信息
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword"
          },
          "aggs": {
            "old_employee": {
              "top_hits": {
                "size": 3,
                "sort": [{
                  "age": "desc"
                }]
              }
            }
          }
        }
      }
    }
    
    #自定义工资区间分桶
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "salary_range": {
          "range": {
            "field": "salary",
            "ranges": [
              {
                "to": 10000
              },
              {
                "from": 10000, 
                "to": 20000
              }
              ,
              {
                "key": ">=20000", 
                "from": 20000
              }
            ]
          }
        }
      }
    }
    
    
    #Salary Histogram,工资0到10万,以 5000一个区间进行分桶
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "salary_histrogram": {
          "histogram": {
            "field": "salary",
            "interval": 5000,
            "extended_bounds": {
              "min": 0,
              "max": 100000
            }
          }
        }
      }
    }
    
    
    # 嵌套聚合1,按照工作类型分桶,并统计工资信息
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "Job_salary_stats": {
          "terms": {
            "field": "job.keyword"
          },
          "aggs": {
            "salary": {
              "stats": {
                "field": "salary"
              }
            }
          }
        }
      }
    }
    
    # 多次嵌套。根据工作类型分桶,然后按照性别分桶,计算工资的统计信息
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "Job_gender_stats": {
          "terms": {
            "field": "job.keyword"
          },
          "aggs": {
            "gender_stats": {
              "terms": {
                "field": "gender"
              },
              "aggs": {
                "salary_stats": {
                  "stats": {
                    "field": "salary"
                  }
                }
              }
            }
          }
        }
      }
    }
    # 平均工资最低的工作类型
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "order": {
              "avg_salary": "desc"
            }
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "min_salary_by_job":{
          "min_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    }
    
    
    # 平均工资最高的工作类型
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "order": {
              "avg_salary": "desc"
            }
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "max_salary_by_job":
        {
          "max_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    }
    
    # 平均工资的平均工资
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "order": {
              "avg_salary": "desc"
            }
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "avg_salary_by_job":
        {
          "avg_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    }
    
    # 平均工资的统计分析
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10, 
            "order": {
              "avg_salary": "desc"
            }
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "stats_salary_by_job":
        {
          "stats_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    }
    
    # 平均工资的百分位数
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "percentiles_salary_by_job":{
          "percentiles_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    }
    
    #按照年龄对平均工资求导
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "age": {
          "histogram": {
            "field": "age",
            "min_doc_count": 1,
            "interval": 1
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            },
            "derivative_avg_salary":{
              "derivative": {
                "buckets_path": "avg_salary"
              }
            }
          }
        }
      }
    }

    作用范围

    # Query 年龄大于20岁的员工,根据job分桶
    POST employees/_search
    {
      "size": 0,
      "query": {
        "range": {
          "age": {
            "gte": 20
          }
        }
      },
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword"
            
          }
        }
      }
    }
    
    
    #field 年长的员工job分桶,和所有的员工job分桶
    POST employees/_search
    {
      "size": 0, 
      "aggs": {
        "older_person": {
          "filter": {"range": {
            "age": {
              "gte": 35
            }
          }},
          "aggs": {
            "jobs": {
              "terms": {
                "field": "job.keyword"
              }
            }
          }
        },
        "all_jobs":{
          "terms": {
            "field": "job.keyword"
          }
        }
      }
    }
    
    #Post field. 一条语句,找出所有的job类型。还能找到聚合后符合条件的结果
    #将分完桶的job为Javascript Programmer显示出来
    POST employees/_search
    {
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword"
          }
        }
      },
      "post_filter": {
        "match": {
          "job.keyword": "Javascript Programmer"
        }
      }
    }
    
    #global
    #global忽略query的条件限制
    POST employees/_search
    {
      "size": 0,
      "query": {
        "range": {
          "age": {
            "gte": 40
          }
        }
      },
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword"
            
          }
        },
        
        "all":{
          "global":{},
          "aggs":{
            "salary_avg":{
              "avg":{
                "field":"salary"
              }
            }
          }
        }
      }
    }

    排序

    #排序 order 根据分桶之后的数量进行排序
    POST employees/_search
    {
      "size": 0,
      "query": {
        "range": {
          "age": {
            "gte": 20
          }
        }
      },
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword",
            "order":[
              {"_count":"asc"},
              {"_key":"desc"}
              ]
            
          }
        }
      }
    }
    
    
    #排序 order 根据子聚合进行排序
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword",
            "order":[  {
                "avg_salary":"desc"
              }]
            
            
          },
        "aggs": {
          "avg_salary": {
            "avg": {
              "field":"salary"
            }
          }
        }
        }
      }
    }
    
    #排序 order 根据子统计min进行排序
    POST employees/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field":"job.keyword",
            "order":[  {
                "stats_salary.min":"desc"
              }]
            
            
          },
        "aggs": {
          "stats_salary": {
            "stats": {
              "field":"salary"
            }
          }
        }
        }
      }
    }

    聚合分析精准度问题:

    doc_count_error_upper_bound:被遗漏的term分桶,包含的文档,有可能的最大值

    sum_other_doc_count:除了返回结果bucket的terms以外,其他terms的文档总数(总数-返回的总数)

    size和shard_size的区别?
    size是最终返回多少个buckt的数量。
    shard_size是每个bucket在一个shard上取回的bucket的总数。然后,每个shard上的结果,会在coordinate节点上在做一次汇总,返回总数。

    ①如何解决Terms不准的问题:

      terms聚合分析不准的原因,数据分散在多个分片上,Coordinating Node无法获取数据全貌

      解决方案1:当数据量不大时,设置Primary Shard为1;实现准确性

      解决方案2:在分布式数据上,设置shard_size参数,提高精确度(原理:每次从shard上额外多获取数据,提升准确率)

    六、重建索引

    一般在以下几种情况下,需要重建索引

      索引的mappings发生变更:字段类型更改,分词器及字典更新

      索引的settings发生更改:索引的主分片数发生改变

      集群内,集群间需要做数据迁移

    Elasticsearch的内置提供API

      Update By Query:在现有索引上重建

      Reindex:在其他索引上重建索引

    # 修改 Mapping,增加子字段,使用英文分词器
    PUT blogs/_mapping
    {
          "properties" : {
            "content" : {
              "type" : "text",
              "fields" : {
                "english" : {
                  "type" : "text",
                  "analyzer":"english"
                }
              }
            }
          }
        }
    
    # Update所有文档
    POST blogs/_update_by_query
    {
    
    }  
    # 创建新的索引并且设定新的Mapping
    PUT blogs_fix/
    {
      "mappings": {
            "properties" : {
            "content" : {
              "type" : "text",
              "fields" : {
                "english" : {
                  "type" : "text",
                  "analyzer" : "english"
                }
              }
            },
            "keyword" : {
              "type" : "keyword"
            }
          }    
      }
    }
    
    # Reindx API
    POST  _reindex
    {
      "source": {
        "index": "blogs"
      },
      "dest": {
        "index": "blogs_fix"
      }
    }

    七、Ingest Pipeline 

    1,测试

    # 测试split tags
    POST _ingest/pipeline/_simulate
    {
      "pipeline": {
        "description": "to split blog tags",
        "processors": [
          {
            "split": {
              "field": "tags",
              "separator": ","
            }
          }
        ]
      },
      "docs": [
        {
          "_index": "index",
          "_id": "id",
          "_source": {
            "title": "Introducing big data......",
            "tags": "hadoop,elasticsearch,spark",
            "content": "You konw, for big data"
          }
        },
        {
          "_index": "index",
          "_id": "idxx",
          "_source": {
            "title": "Introducing cloud computering",
            "tags": "openstack,k8s",
            "content": "You konw, for cloud"
          }
        }
      ]
    }

    2,创建pipeline

    # 为ES添加一个 Pipeline
    PUT _ingest/pipeline/blog_pipeline
    {
      "description": "a blog pipeline",
      "processors": [
          {
            "split": {
              "field": "tags",
              "separator": ","
            }
          },
    
          {
            "set":{
              "field": "views",
              "value": 0
            }
          }
        ]
    }
    
    #查看Pipleline
    GET _ingest/pipeline/blog_pipeline
    
    
    #测试pipeline
    POST _ingest/pipeline/blog_pipeline/_simulate
    {
      "docs": [
        {
          "_source": {
            "title": "Introducing cloud computering",
            "tags": "openstack,k8s",
            "content": "You konw, for cloud"
          }
        }
      ]
    }

    3,修复之前的数据

    #增加update_by_query的条件
    POST tech_blogs/_update_by_query?pipeline=blog_pipeline
    {
        "query": {
            "bool": {
                "must_not": {
                    "exists": {
                        "field": "views"
                    }
                }
            }
        }
    }

    4,使用pipeline更新添加文档

    POST tech_blogs/_doc?pipeline=blog_pipeline
    {
      "title":"Introducing big data......",
      "tags":"hadoop,elasticsearch,spark",
      "content":"You konw, for big data"
    }
    PUT tech_blogs/_doc/2?pipeline=blog_pipeline
    {
      "title": "Introducing cloud computering",
      "tags": "openstack,k8s",
      "content": "You konw, for cloud"
    }
    PUT _ingest/pipeline/stackoverflow_pipeline
    {
      "description": "Pipeline for stackoverflow survey",
      "processors": [
        {
          "split": {
            "field": "DatabaseDesireNextYear",
            "separator": ";"
          }
        },
        
        {
          "split": {
            "field": "DatabaseWorkedWith",
            "separator": ";"
          }
        },
        
        {
          "split": {
            "field": "DevEnviron",
            "separator": ";"
          }
        },
        
        {
          "split": {
            "field": "MiscTechDesireNextYear",
            "separator": ";"
          }
        },
        
        {
          "split": {
            "field": "PlatformDesireNextYear",
            "separator": ";"
          }
        },
    
       {
          "split": {
            "field": "WebFrameDesireNextYear",
            "separator": ";"
          }
        }
        ,
    
       {
          "split": {
            "field": "Containers",
            "separator": ";"
          }
        }
    
      ]
    }
    案例
  • 相关阅读:
    记事本02
    助人快乐:笔记本连网
    高性能 架构实例 学习笔记
    食.运动.阅读
    The server name ... address could not be resolved
    Mysql 远程访问
    CSS布局 UI 学习笔记
    MySql 修改root密码
    C#:String类型中的CharAt 方法
    La_Lb_Lc
  • 原文地址:https://www.cnblogs.com/zd1994/p/12650357.html
Copyright © 2011-2022 走看看