zoukankan      html  css  js  c++  java
  • Elasticsearch没看文档之前,整理的一些知识

    1 基础

    index -> 数据库
    type -> 表
    document -> 行
    field -> 列
    -------------------------------------------------------------------
    字段属性
    type
    	String
    		text -> 可分词,不能聚合
    		keyword -> 可聚合,不能分词
    	数值类型
    		long.integer.short.byte.double.float.half_float.scaled_float
    	
    index
    	true -> 会被索引,默认true
    	false -> 不会被索引,如设置图片
    -------------------------------------------------------------------
    创建数据库 a1 分片5 副本1
    PUT a1
    {
      "settings": {
        "number_of_shards": 5,
        "number_of_replicas": 1
      }
    }
    
    查看数据库 a1
    GET a1
    
    查看所有数据库
    GET *
    
    删除数据库 a1
    DELETE a1
    

    2 基础

    //在 a1 数据库中建表 student
    //建立字段 name(类型text 指定分词) age(类型integer)
    PUT a1/_mapping/student
    {
      "properties": {
        "name": {
          "type": "text",
          "analyzer": "ik_max_word"
        },
        "age": {
          "type": "integer"
        }
      }
    }
    
    查看 a1 数据库中建立的表解构
    GET a1/_mapping
    
    往表中添加数据
    POST a1/student
    {
      "name":"小米手机",
      "age":11
    }
    
    查看 a1 数据库的全部数据
    _source -> 查询到的数据
    _id -> 文档的唯一标识
    GET a1/_search
    {
        "query":{
            "match_all": {}
        }
    }
    
    自定义 _id 为2 的数据
    POST a1/student/2
    {
      "name":"大米手机",
      "age":22
    }
    
    如果添加字段 address 不存在就会自动创建
    POST a1/student/3
    {
      "name":"小米电视4K",
      "age":33,
      "address":"安徽阜阳晶宫大酒店101"
    }
    
    修改数据如果对应 id 存在则删除之前的再添加 不存在则添加
    PUT a1/student/4
    {
      "name":"小米电脑",
      "age":44
    }
    
    删除id=4的文档
    DELETE a1/student/4
    

    3 基本查询

    基本查询
    
    match_all
    查询数据库 a1 所有数据
    GET a1/_search
    {
      "query": {
    	"match_all": {}
      }
    }
    ---
    {
      "took": 4,//查询耗时4毫秒
      "timed_out": false,//没有超时
      "_shards": {//分片
    	"total": 5,//一共5个
    	"successful": 5,//成功5个
    	"skipped": 0,//跳过0个
    	"failed": 0//失败0个
      },
      "hits": {//查询到的数据
    	"total": 3,//查询总条数
    	"max_score": 1,//最大得分1
    	"hits": [//查询到的数据所有文档
    	  {//一个文档
    		"_index": "a1",//数据库
    		"_type": "student",//表
    		"_id": "2",//注解 每个文档的唯一标识
    		"_score": 1,//得分是1 满分是最大得分
    		"_source": {//查询到的数据 包括 字段 字段值 -> k:v
    		  "name": "大米手机",
    		  "age": 22
    		}
    	  },
    	  {
    		"_index": "a1",
    		"_type": "student",
    		"_id": "CA2Yqm0Bmr19jrNQ7nRL",
    		"_score": 1,
    		"_source": {
    		  "name": "小米手机",
    		  "age": 11
    		}
    	  },
    	  {
    		"_index": "a1",
    		"_type": "student",
    		"_id": "3",
    		"_score": 1,
    		"_source": {
    		  "name": "小米电视4K",
    		  "age": 33,
    		  "address": "安徽阜阳小米酒店101"
    		}
    	  }
    	]
      }
    }
    ---
    match
    查询 name=小米电视
    因为使用了分词,默认是or 所以可匹配 -> 小米 or 电视
    GET a1/_search
    {
      "query": {
        "match": {
          "name": "小米电视"
        }
      }
    }
    
    查询分词,指定and可匹配 -> 小米 and 电视
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米电视",
            "operator": "and"
          }
        }
      }
    }
    
    可以指定分词的个数,
    1 -> 匹配任意一个词
    2 -> 匹配任意两个词
    3 -> 因为超过了分词量,所以匹配不到
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米电视",
            "minimum_should_match": 1
          }
        }
      }
    }
    
    3x0.66=1.98,因为1.98<2 所以匹配任意一个
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米智能电视",
            "minimum_should_match": "66%"
          }
        }
      }
    }
    
    3x0.67=2.01,因为2.01>2 所以匹配任意两个
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米智能电视",
            "minimum_should_match": "67%"
          }
        }
      }
    }
    
    查询 name 或 address 含有"小米"这个词
    GET a1/_search
    {
      "query": {
        "multi_match": {
          "query": "小米",
          "fields": ["name","address"]
        }
      }
    }
    
    精准匹配 age=11
    GET a1/_search
    {
      "query": {
        "term": {
          "age": {
            "value": 11
          }
        }
      }
    }
    
    多条精准匹配 age in [11,22,77]
    GET a1/_search
    {
      "query": {
        "terms": {
          "age": [11,22,77]
        }
      }
    }
    

    4 结果过滤

    结果过滤
    
    查询字段只显示 name age
    并且查询 age in [11,22,77]
    GET a1/_search
    {
      "_source": ["name","age"],
      "query": {
        "terms": {
          "age": [11,22,77]
        }
      }
    }
    
    查询所有 只显示"address"字段,没有此字段的显示空
    GET a1/_search
    {
      "_source": {
        "includes": ["address"]
      }
    }
    
    查询所有只除了"address"字段,其他全显示
    GET a1/_search
    {
      "_source": {
        "excludes": ["address"]
      }
    }
    

    5 高级查询

    高级查询
    
    与 must[{1},{2}] 满足所有
    查询"name"分词有"小米"并且"age"等于11或者22
    GET a1/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    
    非 must_not[{1},{2}] 不满足所有
    查询"name"分词没有"小米并且"age"不等于11或者22
    GET a1/_search
    {
      "query": {
        "bool": {
          "must_not": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    
    或 should[{1},{2}] 满足任意一个
    GET a1/_search
    {
      "query": {
        "bool": {
          "should": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    
    >  >=  <  <=
    lt lte gt gte
    查询 10<=age<30
    GET a1/_search
    {
      "query": {
        "range": {
          "age": {
            "gte": 10,
            "lt": 30
          }
        }
      }
    }
    

    6 过滤

    过滤
    
    查询"name"=小米并且10<=age<=20
    GET a1/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "name": "小米"
              }
            }
          ],
          "filter": {
            "range": {
              "age": {
                "gte": 10,
                "lte": 20
              }
            }
          }
        }
      }
    }
    
    排序
    
    查询"name"=小米并按照年龄降序
    GET a1/_search
    {
      "query": {
        "match": {
          "name": "小米"
        }
      },
      "sort": [
        {
          "age": {
            "order": "desc"
          }
        }
      ]
    }
    

    7 聚合

    聚合
    
    select count(color)//指标 -> 对桶计算
    from table 
    group by color//桶 -> 满足条件的文档集合
    
    1.通过国家划分文档(桶)
    2.然后通过性别划分每个国家(桶)
    3.然后通过年龄区间划分每种性别(桶)
    4.最后,为每个年龄区间计算平均薪酬(指标)
    所有的这些都在一个请求内完成并且只遍历一次数据!
    
    PUT /cars
    {
      "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
      },
      "mappings": {
        "transactions": {
          "properties": {
            "color": {
              "type": "keyword"
            },
            "make": {
              "type": "keyword"
            }
          }
        }
      }
    }
    
    执行这一条 先执行上一条
    POST /cars/transactions/_bulk
    { "index": {}}
    { "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2014-10-28" }
    { "index": {}}
    { "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
    { "index": {}}
    { "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-05-18" }
    { "index": {}}
    { "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2014-07-02" }
    { "index": {}}
    { "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2014-08-19" }
    { "index": {}}
    { "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
    { "index": {}}
    { "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2014-01-01" }
    { "index": {}}
    { "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2014-02-12" }
    
    ---1 -> 尝试聚合 terms
    按照"color"聚合 聚合名称为"popular_colors"
    GET /cars/_search
    {
        "size" : 0,
        "aggs" : { 
            "popular_colors" : { 
                "terms" : {"field" : "color"}
            }
        }
    }
    
    聚合结果 -> 红色汽车卖的最好
    hits 结果为空,因为我们设置size:0
    aggregations 聚合结果
    popular_colors 聚合名称
    buckets 桶的集合
    	key 这个桶对应"color"的值
    	doc_count 这个桶中文档数量
    	
    {
      "took": 3,
      "timed_out": false,
      "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 8,//查询到全部8条
        "max_score": 0,
        "hits": []//显示0条 设置了 size=0
      },
      "aggregations": {
        "popular_colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "red",
              "doc_count": 4
            },
            {
              "key": "blue",
              "doc_count": 2
            },
            {
              "key": "green",
              "doc_count": 2
            }
          ]
        }
      }
    }
    
    ---2 -> 添加度量指标 avg
    对聚合后的桶再进行度量算平均值
    GET cars/_search
    {
      "size": 0,
      "aggs": {
        "popular_colors": {
          "terms": {"field": "color"},
          "aggs": {
            "avg_price": {
              "avg": {"field": "price"}
            }
          }
        }
      }
    }
    聚合结果 -> 红色汽车平均价格最贵
    ...
      "aggregations": {
        "popular_colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "red",
              "doc_count": 4,
              "avg_price": {
                "value": 32500
              }
            },
            {
              "key": "blue",
              "doc_count": 2,
              "avg_price": {
                "value": 20000
              }
            },
            {
              "key": "green",
              "doc_count": 2,
              "avg_price": {
                "value": 21000
              }
            }
          ]
        }
      }
    ...
    ---3 -> 嵌套桶 terms avg terms
    对聚合的结果再次进行聚合,桶嵌套桶
    GET /cars/_search
    {
        "size" : 0,
        "aggs" : { 
            "popular_colors" : { 
                "terms" : {"field" : "color"},
                "aggs":{
                    "avg_price": { 
                       "avg": {"field": "price"}
                    },
                    "maker":{
                        "terms":{"field":"make"}
                    }
                }
            }
        }
    }
    
    聚合结果 -> 每种颜色的各个车型卖出去多少量
    + 原来的color桶和avg计算我们不变
    + maker:在嵌套的aggs下新添一个桶,叫做maker
    + terms:桶的划分类型依然是词条
    + filed:这里根据make字段进行划分
    ...
      "aggregations": {
        "popular_colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "red",
              "doc_count": 4,
              "maker": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "honda",
                    "doc_count": 3
                  },
                  {
                    "key": "bmw",
                    "doc_count": 1
                  }
                ]
              },
              "avg_price": {
                "value": 32500
              }
            },
            {
              "key": "blue",
              ...
            },
            {
              "key": "green",
              ...
            }
          ]
        }
      }
    ...
    
    ---4 -> 最后的修改 terms avg min max terms terms
    1>
    	一共有多少种颜色
    	每种颜色卖多少辆车
    	每种颜色平均价格是多少
    2>
    	每种颜色都是什么车
    	每种车有多少辆
    3>
    	每种车价格是多少
    GET /cars/_search
    {
        "size" : 0,
        "aggs" : { 
            "popular_colors" : { 
                "terms" : {  "field" : "color"},
                "aggs":{
                    "avg_price": { 
                       "avg": { "field": "price" }
                    },
                    "min_price":{
                      "min": {"field": "price"}
                    },
                    "max_price":{
                      "max": {"field": "price"}
                    },
                    "maker":{
                        "terms":{"field":"make"},
                        "aggs": {
                          "a_price": {
                            "terms": {"field": "price" }
                          }
                        }
                    }
                }
            }
        }
    }
    
    聚合结果
    	其中红色汽车4辆
    	红色最贵80000 最便宜10000
    	其中3辆honda本田 1辆bmw
    	其中本田20000 卖2辆 本田10000卖1辆 宝马bmw80000 卖一辆
    ...
      "aggregations": {
        "popular_colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "red",
              "doc_count": 4,
              "max_price": {
                "value": 80000
              },
              "min_price": {
                "value": 10000
              },
              "maker": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "honda",
                    "doc_count": 3,
                    "a_price": {
                      "doc_count_error_upper_bound": 0,
                      "sum_other_doc_count": 0,
                      "buckets": [
                        {
                          "key": 20000,
                          "doc_count": 2
                        },
                        {
                          "key": 10000,
                          "doc_count": 1
                        }
                      ]
                    }
                  },
                  {
                    "key": "bmw",
                    "doc_count": 1,
                    "a_price": {
                      "doc_count_error_upper_bound": 0,
                      "sum_other_doc_count": 0,
                      "buckets": [
                        {
                          "key": 80000,
                          "doc_count": 1
                        }
                      ]
                    }
                  }
                ]
              },
              "avg_price": {
                "value": 32500
              }
            },
            {
              "key": "blue",
              ...
            },
            {
              "key": "green",
              ...
            }
          ]
        }
      }
    ...
    
    ---5 -> 条形图 histogram interval
    聚合名称"a_price"
    根据字段"price"进行5000一次分割
    从最小到最大,例如
    [-1,1,4999,5000,5001,10000,10001] -> 
    
    [-5000,0) -> -1	
    [0,5000) -> 1 4999
    [5000,10000) -> 5000 5001
    [10000,15000) -> 10000 10001
    
    key=0 [0,5000)
    key=5000 [5000,10000)
    GET /cars/_search
    {
      "size":0,
      "aggs":{
        "a_price":{
          "histogram": {
            "field": "price",
            "interval": 5000
          }
        }
      }
    }
    
    聚合结果
    ...
      "aggregations": {
        "a_price": {
          "buckets": [
            {
              "key": 10000,
              "doc_count": 2
            },
            {
              "key": 15000,
              "doc_count": 1
            },
            {
              "key": 20000,
              "doc_count": 2
            },
            {
              "key": 25000,
              "doc_count": 1
            },
            {
              "key": 30000,
              "doc_count": 1
            },
            {
              "key": 35000,
              "doc_count": 0
            },
            {
              "key": 40000,
              "doc_count": 0
            },
            {
              "key": 45000,
              "doc_count": 0
            },
            {
              "key": 50000,
              "doc_count": 0
            },
            {
              "key": 55000,
              "doc_count": 0
            },
            {
              "key": 60000,
              "doc_count": 0
            },
            {
              "key": 65000,
              "doc_count": 0
            },
            {
              "key": 70000,
              "doc_count": 0
            },
            {
              "key": 75000,
              "doc_count": 0
            },
            {
              "key": 80000,
              "doc_count": 1
            }
          ]
        }
      }
    ...
    ---6 -> 条形图 histogram interval min_doc_count
    最少有一条记录
    GET /cars/_search
    {
      "size":0,
      "aggs":{
        "a_price":{
          "histogram": {
            "field": "price",
            "interval": 5000,
            "min_doc_count": 1
          }
        }
      }
    }
    
    聚合结果
    ...
      "aggregations": {
        "a_price": {
          "buckets": [
            {
              "key": 10000,
              "doc_count": 2
            },
            {
              "key": 15000,
              "doc_count": 1
            },
            {
              "key": 20000,
              "doc_count": 2
            },
            {
              "key": 25000,
              "doc_count": 1
            },
            {
              "key": 30000,
              "doc_count": 1
            },
            {
              "key": 80000,
              "doc_count": 1
            }
          ]
        }
      }
    ...
    
    ---7 -> 按时间统计 date_histogram interval format min_doc_count
    按月分割 至少匹配一条
    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales": {
             "date_histogram": {
                "field": "sold",
                "interval": "month", 
                "format": "yyyy-MM-dd",
                "min_doc_count": 1
             }
          }
       }
    }
    聚合结果
    ...
      "aggregations": {
        "sales": {
          "buckets": [
            {
              "key_as_string": "2014-01-01",
              "key": 1388534400000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-02-01",
              "key": 1391212800000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-05-01",
              "key": 1398902400000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-07-01",
              "key": 1404172800000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-08-01",
              "key": 1406851200000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-10-01",
              "key": 1412121600000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-11-01",
              "key": 1414800000000,
              "doc_count": 2
            }
          ]
        }
      }
    ...
    ---8 -> 返回空桶 date_histogram interval format min_doc_count extended_bounds min max
    按月分割查询整年的数据 查询一共12条 用于做统计图之类的
    如果数据只到11月 则不会显示12月,就需要加extended_bounds用来限制整年
    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales": {
             "date_histogram": {
                "field": "sold",
                "interval": "month",
                "format": "yyyy-MM-dd",
                "min_doc_count" : 0, 
                "extended_bounds" : { 
                    "min" : "2014-01-01",
                    "max" : "2014-12-31"
                }
             }
          }
       }
    }
    聚合结果
    ...
      "aggregations": {
        "sales": {
          "buckets": [
            {
              "key_as_string": "2014-01-01",
              "key": 1388534400000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-02-01",
              "key": 1391212800000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-03-01",
              "key": 1393632000000,
              "doc_count": 0
            },
            {
              "key_as_string": "2014-04-01",
              "key": 1396310400000,
              "doc_count": 0
            },
            {
              "key_as_string": "2014-05-01",
              "key": 1398902400000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-06-01",
              "key": 1401580800000,
              "doc_count": 0
            },
            {
              "key_as_string": "2014-07-01",
              "key": 1404172800000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-08-01",
              "key": 1406851200000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-09-01",
              "key": 1409529600000,
              "doc_count": 0
            },
            {
              "key_as_string": "2014-10-01",
              "key": 1412121600000,
              "doc_count": 1
            },
            {
              "key_as_string": "2014-11-01",
              "key": 1414800000000,
              "doc_count": 2
            },
            {
              "key_as_string": "2014-12-01",
              "key": 1417392000000,
              "doc_count": 0
            }
          ]
        }
      }
    ...
    ---9 -> 时间统计 扩展案例 date_histogram interval format min_doc_count extended_bounds min max terms sum
    按照季度进行划分全年 分为4个季度
    每个季度卖了多少钱
    每个季度每种车卖了多少辆 每种车卖多少钱
    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales": {
             "date_histogram": {
                "field": "sold",
                "interval": "quarter", 
                "format": "yyyy-MM-dd",
                "min_doc_count" : 0,
                "extended_bounds" : {
                    "min" : "2014-01-01",
                    "max" : "2014-12-31"
                }
             },
             "aggs": {
                "per_make_sum": {
                   "terms": {"field": "make"},
                   "aggs": {
                      "sum_price": {"sum": { "field": "price" }}
                   }
                },
                "total_sum": {"sum": { "field": "price" }}
             }
          }
       }
    }
    
    聚合结果
    第一季度[2014-01-01,2014-04-01) 共卖了2辆车 卖了105000
    其中1辆bmw宝马 80000 1辆ford福特 25000
    ...
      "aggregations": {
        "sales": {
          "buckets": [
            {
              "key_as_string": "2014-01-01",
              "key": 1388534400000,
              "doc_count": 2,
              "per_make_sum": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "bmw",
                    "doc_count": 1,
                    "sum_price": {
                      "value": 80000
                    }
                  },
                  {
                    "key": "ford",
                    "doc_count": 1,
                    "sum_price": {
                      "value": 25000
                    }
                  }
                ]
              },
              "total_sum": {
                "value": 105000
              }
            },
            {
              "key_as_string": "2014-04-01",
    		  ...
            },
            {
              "key_as_string": "2014-07-01",
              ...
            },
            {
              "key_as_string": "2014-10-01",
              ...
            }
          ]
        }
      }
    ...
    ---10 -> stats 包括 count min max avg sum
    GET cars/_search
    {
      "size": 0,
      "aggs": {
        "price": {
          "stats": {"field": "price"}
        }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "price": {
          "count": 8,
          "min": 10000,
          "max": 80000,
          "avg": 26500,
          "sum": 212000
        }
      }
    ...
    

    8 聚合

    	GET /cars/transactions/_search
    	{
    		"size" : 0,
    		"aggs" : {
    			"colors" : {
    				"terms" : {"field" : "color"}
    			}
    		}
    	}
    上等价于下
    	GET /cars/transactions/_search
    	{
    		"size" : 0,
    		"query" : {
    			"match_all" : {}
    		},
    		"aggs" : {
    			"colors" : {
    				"terms" : {"field" : "color"}
    			}
    		}
    	}
    
    ---1 -> 范围限定的聚合 query match aggs avg
    对查询的结果进行聚合
    GET cars/_search
    {
      "size": 0, 
      "query": {
        "match": {"make": "ford"}
      }, 
      "aggs": {
        "avg_1": {
          "avg": {"field": "price"}
        }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "avg_1": {
          "value": 27500//对查询到的结果进行求平均值
        }
      }
    ...
    ---2 -> 范围限定的聚合 query match aggs avg global aggs avg
    对查询的结果进行聚合 并对全部数据也进行聚合
    GET cars/_search
    {
      "size": 0, 
      "query": {
        "match": {"make": "ford"}
      }, 
      "aggs": {
        "avg_1": {//对查询的结果进行聚合
          "avg": {"field": "price"}
        },
        "all":{//聚合桶名称
          "global": {},
           "aggs": {
            "avg_2": {//对全部数据求平均值的桶
              "avg": {"field": "price"}
            }
          }
        }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "all": {
          "doc_count": 8,
          "avg_2": {
            "value": 26500
          }
        },
        "avg_1": {
          "value": 27500
        }
      }
    ...
    ---3 -> 过滤 query constant_score filter range aggs avg
    使用"constant_score"的"filter"进行过滤,再对过滤的结果进行聚合
    GET /cars/transactions/_search
    {
        "size" : 0,
        "query" : {
            "constant_score": {
                "filter": {
                    "range": {
                        "price": {"gte": 30000}
                    }
                }
            }
        },
        "aggs" : {
            "single_avg_price": {
                "avg" : { "field" : "price" }
            }
        }
    }
    聚合结果
    ...
      "aggregations": {
        "single_avg_price": {
          "value": 55000
        }
      }
    ...
    ---4 -> 过滤桶 query match aggs filter range aggs avg
    对查询结果进行过滤(现在时间的前一个月)再进行计算平均值
    GET /cars/transactions/_search
    {
       "size" : 0,
       "query":{
          "match": {"make": "ford"}
       },
       "aggs":{
          "recent_sales": {
             "filter": { 
                "range": {"sold": {"from": "now-1M"}}
             },
             "aggs": {
                "average_price":{"avg": {"field": "price"}}
             }
          }
       }
    }
    聚合结果
    ...
      "aggregations": {
        "recent_sales": {
          "meta": {},
          "doc_count": 0,
          "average_price": {
            "value": null
          }
        }
      }
    ...
    ---5 -> 后过滤器 query match post_filter term aggs terms
    "post_filter"只影响搜索结果"hits",不影响聚合结果"aggregations"
    GET /cars/transactions/_search
    {
        "size" : 0,
        "query": {
            "match": {"make": "ford"}
        },
        "post_filter": {    
            "term" : {"color" : "green"}
        },
        "aggs" : {
            "all_colors": {
                "terms" : { "field" : "color" }
            }
        }
    }
    聚合结果
    ...
      "hits": {
        "total": 1,//只有一条
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "all_colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "blue",
              "doc_count": 1
            },
            {
              "key": "green",
              "doc_count": 1
            }
          ]
        }
      }
    ...
    ---6 -> 内置排序 aggs terms order _count _term
    桶默认按照"doc_count"降序
    我们可以使用内置"order"排序
    _count
    	按"doc_count"大小排序。对 terms 、 histogram 、 date_histogram 有效。
    _term
    	按照"key"字母顺序排序。只在 terms 内使用。
    _key
    	按每个桶的键值数值排序(理论上与 _term 类似)。 只在 histogram 和 date_histogram 内使用。
    GET cars/_search
    {
      "size": 0,
      "aggs": {
        "colors": {
          "terms": {
            "field": "color",
            "order": {
              "_count": "asc"
            }
          }
        }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "blue",
              "doc_count": 2
            },
            {
              "key": "green",
              "doc_count": 2
            },
            {
              "key": "red",
              "doc_count": 4
            }
          ]
        }
      }
    ...
    ---7 -> 按度量排序 aggs terms order aggs avg
    按照计算的平均价格的桶名称进行平均价格升序
    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "makes" : {
                "terms" : {
                  "field" : "make",
                  "order": {"avg_price" : "asc" }
                },
                "aggs": {
                    "avg_price": {"avg": {"field": "price"}}
                }
            }
        }
    }
    聚合结果
    ...
      "aggregations": {
        "makes": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "toyota",
              "doc_count": 2,
              "avg_price": {
                "value": 13500
              }
            },
            {
              "key": "honda",
              "doc_count": 3,
              "avg_price": {
                "value": 16666.666666666668
              }
            },
            {
              "key": "ford",
              "doc_count": 2,
              "avg_price": {
                "value": 27500
              }
            },
            {
              "key": "bmw",
              "doc_count": 1,
              "avg_price": {
                "value": 80000
              }
            }
          ]
        }
      }
    ...
    ---8 -> 按度量排序 aggs terms order aggs extended_stats
    按照扩展统计的方差进行升序
    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "terms" : {
                  "field" : "color",
                  "order": {"stats.variance" : "asc" }
                },
                "aggs": {
                    "stats": {"extended_stats": {"field": "price"}}
                }
            }
        }
    }
    聚合结果
    ...
      "aggregations": {
        "colors": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "blue",
              "doc_count": 2,
              "stats": {
                "count": 2,
                "min": 15000,
                "max": 25000,
                "avg": 20000,
                "sum": 40000,
                "sum_of_squares": 850000000,
                "variance": 25000000,
                "std_deviation": 5000,
                "std_deviation_bounds": {
                  "upper": 30000,
                  "lower": 10000
                }
              }
            },
            {
              "key": "green",
              ...
            },
            {
              "key": "red",
              ...
            }
          ]
        }
      }
    ...
    ---9 -> 基于"深度"度量排序 aggs histogram interval order aggs filter terms aggs extended_stats
    a2>a3.variance 表示"a2"中的"a3"的"variance"属性
    按照价格两万一次分割,过滤了只取"red","green"一共6个文档,并且根据分割块进行价格计算扩展统计,
    根据分割每一块的扩展统计的方差来升序排列,并且排除分割内至少数量为1
    这里"a1"//单值桶 "a2"//多值桶 "a3"//度量指标
    GET cars/_search
    {
      "size": 0,
      "aggs": {
        "a1": {
          "histogram": {
            "field": "price",
            "interval": 20000,
            "min_doc_count": 1, 
            "order": {"a2>a3.variance": "asc"}
          },
        "aggs": {
          "a2": {
            "filter": {
              "terms": {"color": ["red","green"]}
            },
          "aggs": {
            "a3": {
              "extended_stats": {"field": "price"}
            }
          }
          }
        }
        }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "a1": {//多值桶
          "buckets": [
            {
              "key": 80000,//[80000,100000)有1条
              "doc_count": 1,
              "a2": {//单值桶
                "doc_count": 1,//[80000,100000) 并且属于["red","green"]有1条
                "a3": {
                  "count": 1,
                  "min": 80000,
                  "max": 80000,
                  "avg": 80000,
                  "sum": 80000,
                  "sum_of_squares": 6400000000,
                  "variance": 0,//属于["red","green"]1条的方差
                  "std_deviation": 0,
                  "std_deviation_bounds": {
                    "upper": 80000,
                    "lower": 80000
                  }
                }
              }
            },
            {
              "key": 0,//[0,20000)有3条
              "doc_count": 3,
              "a2": {
                "doc_count": 2,//[0,20000) 并且属于["red","green"]有2条
                "a3": {
                 ...
                  "variance": 1000000,//属于["red","green"]2条的方差
                 ...
                }
              }
            },
            {
              "key": 20000,//[20000,40000)有4条
              "doc_count": 4,
              "a2": {
                "doc_count": 3,//[20000,40000) 并且属于["red","green"]有3条
                "a3": {
                 ...
                  "variance": 22222222.22222225,//属于["red","green"]3条的方差
                 ...
                }
              }
            }
          ]
        }
      }
    ...
    ---10 -> 统计去重后的数量 aggs cardinality
    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "distinct_colors" : {
                "cardinality" : {"field" : "color"}
            }
        }
    }
    聚合结果
    ...
      "aggregations": {
        "distinct_colors": {
          "value": 3
        }
      }
    ...
    ---11 -> 统计去重后的数量 aggs date_histogram interval min_doc_count aggs cardinality
    按日期每个月分割一次,至少一条才显示,每个月根据颜色都去重
    GET /cars/transactions/_search
    {
      "size" : 0,
      "aggs" : {
          "months" : {
            "date_histogram": {
              "field": "sold",
              "interval": "month",
              "min_doc_count": 1
            },
            "aggs": {
              "distinct_colors" : {
                  "cardinality" : {"field" : "color"}
              }
            }
          }
      }
    }
    聚合结果
    ...
      "aggregations": {
        "months": {
          "buckets": [
            ...
            {
              "key_as_string": "2014-08-01T00:00:00.000Z",
              "key": 1406851200000,
              "doc_count": 1,
              "distinct_colors": {
                "value": 1
              }
            },
            {
              "key_as_string": "2014-10-01T00:00:00.000Z",
              "key": 1412121600000,
              "doc_count": 1,
              "distinct_colors": {
                "value": 1
              }
            },
            {
              "key_as_string": "2014-11-01T00:00:00.000Z",
              "key": 1414800000000,
              "doc_count": 2,
              "distinct_colors": {
                "value": 1
              }
            }
          ]
        }
      }
    ...
    ---12 -> 统计去重后的数量 aggs cardinality precision_threshold
    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "distinct_colors" : {
                "cardinality" : {
                  "field" : "color",
                  "precision_threshold" : 100 //指定精度,范围[0,40000]超过四万也会为四万
                }
            }
        }
    }
    聚合结果
    ...
      "aggregations": {
        "distinct_colors": {
          "value": 3
        }
      }
    ...
    
  • 相关阅读:
    day9习题
    生产者消费者模型(吃包子例子)
    map 函数----filter函数
    #返回值包含函数
    #把函数当作参数传给另一个函数
    异常和错误!
    递归调用
    局部和全局案例!!
    全局变量与局部变量2
    全局变量与局部变量
  • 原文地址:https://www.cnblogs.com/taopanfeng/p/11684557.html
Copyright © 2011-2022 走看看