zoukankan      html  css  js  c++  java
  • ElasticSearch-聚合bucket学习

    DELETE cars
    PUT cars
    {
      "mappings": {
        "transactions": {
          "properties": {
            "price": {
              "type":"long"
            },
            "color": {
              "type":"keyword"
            },
            "make": {
              "type":"keyword"
            },
            "sold": {
              "type":"date"
            }
          }
        }
      }
    }
    
    POST /cars/transactions/_bulk
    { "index": {}}
    { "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2014-10-28" }
    { "index": {}}
    { "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
    { "index": {}}
    { "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-05-18" }
    { "index": {}}
    { "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2014-07-02" }
    { "index": {}}
    { "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2014-08-19" }
    { "index": {}}
    { "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
    { "index": {}}
    { "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2014-01-01" }
    { "index": {}}
    { "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2014-02-12" }
    

    ----------Filter Aggregation-------

    红色车的数量

    POST /cars/transactions/_search?size=0
    {
        "aggs" : {
            "red_cars" : {
                "filter" : { "term": { "color": "red" } }
            }
        }
    }
    

    ----------Filters Aggregation-------

    统计红色车、蓝色车各多少个

    POST /cars/transactions/_search
    {
      "size": 0,
      "aggs" : {
        "cars" : {
          "filters" : {
            "filters" : {
              "red_cars" :   { "match" : { "color" : "red"   }},
              "blue_cars" : { "match" : { "color" : "blue" }}
            }
          }
        }
      }
    }
    

    统计红色车、蓝色车各多少个,并计算两种颜色车的平均价格

    POST /cars/transactions/_search
    {
      "size": 0,
      "aggs" : {
        "cars" : {
          "filters" : {
            "filters" : {
              "red_cars" :   { "match" : { "color" : "red"   }},
              "blue_cars" : { "match" : { "color" : "blue" }}
            }
          },
          "aggs" : {
              "avg_price" : { "avg" : { "field" : "price" } }
          }
        }
      }
    }
    

    ----------Date Histogram Aggregation-------

    每月销售多少台汽车

    interval参数: year, quarter, month, week, day, hour, minute, second
    interval参数:还可以写具体的时间,比如24h,90m

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales_over_time": {
             "date_histogram": {
                "field": "sold",
                "interval": "month", 
                "format": "yyyy-MM-dd" 
             }
          }
       }
    }
    

    指定周期90分钟

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales_over_time": {
             "date_histogram": {
                "field": "sold",
                "interval": "90m", 
                "format": "yyyy-MM-dd HH:mm:ss" 
             }
          }
       }
    }
    

    加入keyed参数,使返回的buckets不作为一个数组返回

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs": {
          "sales_over_time": {
             "date_histogram": {
                "field": "sold",
                "interval": "month", 
                "format": "yyyy-MM-dd",
                "keyed":true
             }
          }
       }
    }
    

    ----------Date Range Aggregation-------

    按照售卖日期范围统计车辆数量

    GET /cars/transactions/_search
    {
      "size": 0,
      "aggs": {
        "range": {
          "date_range": {
            "field": "sold",
            "format": "yyyy-MM-dd",
            "ranges": [
              {"from": "now-36M/M"},
              {"to": "now-24M/M"},
              {"from": "now-36M/M","to": "now-12M/M"}
            ]
          }
        }
      }
    }
    

    加入keyed参数,使返回的buckets不作为一个数组返回,并指定key值

    GET /cars/transactions/_search
    {
      "size": 0,
      "aggs": {
        "range": {
          "date_range": {
            "field": "sold",
            "format": "yyyy-MM-dd",
            "ranges": [
              {"from": "now-36M/M","key":"36months"},
              {"to": "now-24M/M","key":"2years_ago"},
              {"from": "now-36M/M","to": "now-12M/M"}
            ],
            "keyed":true
          }
        }
      }
    }
    

    按照售卖日期范围统计车辆数量,并计算该周期内的平均售卖价格

    GET /cars/transactions/_search
    {
      "size": 0,
      "aggs": {
        "range": {
          "date_range": {
            "field": "sold",
            "format": "yyyy-MM-dd",
            "ranges": [
              {"from": "now-36M/M","key":"36months"},
              {"to": "now-24M/M","key":"2years_ago"},
              {"from": "now-36M/M","to": "now-12M/M"}
            ],
            "keyed":true
          },
          "aggs" : {
              "avg_price" : { "avg" : { "field" : "price" } }
          }
        }
      }
    }
    

    ----------Histogram Aggregation-------

    直方图,按照20000为区间进行分桶

    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "histogram" : {
                  "field" : "price",
                  "interval": 20000
                }
            }
        }
    }
    

    min_doc_count参数,限制桶内至少有几个才显示

    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "histogram" : {
                  "field" : "price",
                  "interval": 20000,
                  "min_doc_count": 1
                }
            }
        }
    }
    

    extended_bounds参数,扩展显示范围

    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "histogram" : {
                  "field" : "price",
                  "interval": 20000,
                  "extended_bounds": {
                        "min" : 0,
                        "max" : 200000
                  }
                }
            }
        }
    }
    

    增加排序,按照桶名降序

    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "histogram" : {
                  "field" : "price",
                  "interval": 20000,
                  "order" : { "_key" : "desc" }
                }
            }
        }
    }
    

    增加排序,按照统计数量排序

    GET /cars/transactions/_search
    {
        "size" : 0,
        "aggs" : {
            "colors" : {
                "histogram" : {
                  "field" : "price",
                  "interval": 20000,
                  "order" : { "_count" : "desc" }
                }
            }
        }
    }
    

    直方图,按照20000为区间进行分桶,并进行汇总

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs":{
          "price":{
             "histogram":{ 
                "field": "price",
                "interval": 20000
             },
             "aggs":{
                "price_sum": {
                   "sum": { 
                     "field" : "price"
                   }
                 }
             }
          }
       }
    }
    

    直方图,按照20000为区间进行分桶,并进行汇总

    按照子聚合的指标进行排序

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs":{
          "price":{
             "histogram":{ 
                "field": "price",
                "interval": 20000,
                "order":{ "price_sum.value" : "desc" }
             },
             "aggs":{
                "price_sum": {
                   "sum": { 
                     "field" : "price"
                   }
                 }
             }
          }
       }
    }
    

    增加keyed参数

    GET /cars/transactions/_search
    {
       "size" : 0,
       "aggs":{
          "price":{
             "histogram":{ 
                "field": "price",
                "interval": 20000,
                "order":{ "price_sum.value" : "desc" }
                ,"keyed":true
             },
             "aggs":{
                "price_sum": {
                   "sum": { 
                     "field" : "price"
                   }
                 }
             }
          }
       }
    }
    

    ----------Terms Aggregation-------

    按照某个字段的词条进行分桶

    在每个分片上先获取前几个数量最多的词条,然后再整体二次重排,所以可能会有误差

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make"
                }
            }
        }
    }
    

    按照词条的字母顺序排序

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make",
                    "order" : { "_term" : "asc" }
                }
            }
        }
    }
    

    min_doc_count:用于限制只提取出现次数大于多少次的词条

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make",
                    "min_doc_count": 3
                }
            }
        }
    }
    

    使用脚本,进行修改field内容

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "script" : {
                        "inline": "'make:'+doc['make'].value",
                        "lang": "painless"
                    }
                }
            }
        }
    }
    

    同上

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make",
                    "script" : {
                        "inline" : "'make: ' +_value",
                        "lang" : "painless"
                    }
                }
            }
        }
    }
    

    使用正则表达式过滤词条

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make",
                    "include" : ".*o.*",
                    "exclude" : "f.*"
                }
            }
        }
    }
    

    使用精确指定的词条进行分桶

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "make_terms" : {
                "terms" : {
                    "field" : "make",
                     "include" : ["mazda", "honda"]
                }
            }
        }
    }
    

    ----------Range Aggregation-------

    按照指定的范围区间分桶,并计算数量

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "price_ranges" : {
                "range" : {
                    "field" : "price",
                    "ranges" : [
                        { "to" : 20000 },
                        { "from" : 20000, "to" : 50000 },
                        { "from" : 50000 }
                    ]
                }
            }
        }
    }
    

    用script脚本指定field

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "price_ranges" : {
                "range" : {
                    "script" : {
                        "lang": "painless",
                        "inline": "doc['price'].value"
                    },
                    "ranges" : [
                        { "to" : 20000 },
                        { "from" : 20000, "to" : 50000 },
                        { "from" : 50000 }
                    ]
                }
            }
        }
    }
    

    在分桶前,通过脚本更改值

    GET /cars/transactions/_search
    {
        "size": 0, 
        "aggs" : {
            "price_ranges" : {
                "range" : {
                    "field" :"price",
                    "script" : {
                        "lang": "painless",
                        "inline": "_value * params.rate",
                        "params" : {
                            "rate" : 2.5
                        }
                    },
                    "ranges" : [
                        { "to" : 20000 },
                        { "from" : 20000, "to" : 50000 },
                        { "from" : 50000 }
                    ]
                }
            }
        }
    }
    

    ----------Global Aggregation-------

    用global来计算所有的文档

    GET /cars/transactions/_search?size=0
    {
        "query" : {
            "match" : { "make" : "honda" }
        },
        "aggs" : {
            "all_makes" : {
                "global" : {}, 
                "aggs" : { 
                    "avg_price" : { "avg" : { "field" : "price" } }
                }
            },
            "honda_make": { "avg" : { "field" : "price" } }
        }
    }
    

    验证一下global计算是否正确

    GET /cars/transactions/_search?size=0
    {
        "query" : {
            "match_all" : { }
        },
        "aggs" : {
            "all_make": { "avg" : { "field" : "price" } }
        }
    }
    

    ----------IP Range Aggregation-------

    DELETE ips
    PUT ips
    {
      "mappings": {
        "transactions": {
          "properties": {
            "ip": {
              "type":"ip"
            }
          }
        }
      }
    }
    
    POST /ips/doc/_bulk
    { "index": {}}
    { "ip" : "192.168.1.1"}
    { "index": {}}
    { "ip" : "192.168.1.10"}
    { "index": {}}
    { "ip" : "192.168.1.102"}
    { "index": {}}
    { "ip" : "192.168.1.150"}
    { "index": {}}
    { "ip" : "192.168.1.160"}
    { "index": {}}
    { "ip" : "192.168.1.250"}
    

    按照指定的ip范围分桶,并统计数量

    GET /ips/doc/_search
    {
        "size": 0, 
        "aggs" : {
            "ip_ranges" : {
                "ip_range" : {
                    "field" : "ip",
                    "ranges" : [
                        {"from" : "192.168.1.1" },
                        {"to" : "192.168.2.1" },
                        {"from" : "192.168.1.1","to" : "192.168.3.200" }
                    ]
                }
            }
        }
    }
    

    通过子网掩码范围分桶

    192.168.1.0/24表示:192.168.1.1至192.168.1.254

    192.168.2.0/25:192.168.2.1至192.168.2.126

    GET /ips/doc/_search
    {
        "size": 0, 
        "aggs" : {
            "ip_ranges" : {
                "ip_range" : {
                    "field" : "ip",
                    "ranges" : [
                        { "mask" : "192.168.1.0/24" },
                        { "mask" : "192.168.2.0/25" }
                    ]
                }
            }
        }
    }
    

    加入keyed参数

    GET /ips/doc/_search
    {
        "size": 0, 
        "aggs" : {
            "ip_ranges" : {
                "ip_range" : {
                    "field" : "ip",
                    "ranges" : [
                        { "mask" : "192.168.1.0/24" },
                        { "mask" : "192.168.2.0/25" }
                    ],
                    "keyed": true
                }
            }
        }
    }
    

    ----------Geo Distance Aggregation-------

    DELETE /museums
    PUT /museums
    {
        "mappings": {
            "doc": {
                "properties": {
                    "location": {
                        "type": "geo_point"
                    }
                }
            }
        }
    }
    
    POST /museums/doc/_bulk?refresh
    {"index":{"_id":1}}
    {"location": "52.374081,4.912350", "name": "NEMO Science Museum"}
    {"index":{"_id":2}}
    {"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}
    {"index":{"_id":3}}
    {"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}
    {"index":{"_id":4}}
    {"location": "51.222900,4.405200", "name": "Letterenhuis"}
    {"index":{"_id":5}}
    {"location": "48.861111,2.336389", "name": "Musée du Louvre"}
    {"index":{"_id":6}}
    {"location": "48.860000,2.327000", "name": "Musée d'Orsay"}
    

    指定坐标点多少距离范围内的分桶文档,默认单位:m(米)

    POST /museums/_search?size=0
    {
        "aggs" : {
            "rings_around_amsterdam" : {
                "geo_distance" : {
                    "field" : "location",
                    "origin" : "52.3760, 4.894",
                    "ranges" : [
                        { "to" : 100000 },
                        { "from" : 100000, "to" : 300000 },
                        { "from" : 300000 }
                    ]
                }
            }
        }
    }
    

    指定单位为公里

    可以使用:mi (miles英里), in (inches英寸), yd (yards码尺), km (kilometers), cm (centimeters), mm (millimeters).

    POST /museums/_search?size=0
    {
        "aggs" : {
            "rings_around_amsterdam" : {
                "geo_distance" : {
                    "field" : "location",
                    "origin" : "52.3760, 4.894",
                    "unit" : "km",
                    "ranges" : [
                        { "to" : 100000 },
                        { "from" : 100000, "to" : 300000 },
                        { "from" : 300000 }
                    ]
                }
            }
        }
    }
    

    指定距离模式

    distance_type:arc弧度(默认,精度高,计算准确),plane(性能更好,速度更快,但精度稍差)

    POST /museums/_search?size=0
    {
        "aggs" : {
            "rings" : {
                "geo_distance" : {
                    "field" : "location",
                    "origin" : "52.3760, 4.894",
                    "unit" : "km",
                    "distance_type" : "plane",
                    "ranges" : [
                        { "to" : 100 },
                        { "from" : 100, "to" : 300 },
                        { "from" : 300 }
                    ]
                }
            }
        }
    }
    

    使用keyed

    POST /museums/_search?size=0
    {
        "aggs" : {
            "rings_around_amsterdam" : {
                "geo_distance" : {
                    "field" : "location",
                    "origin" : "52.3760, 4.894",
                    "ranges" : [
                        { "to" : 100000 },
                        { "from" : 100000, "to" : 300000 },
                        { "from" : 300000 }
                    ],
                    "keyed": true
                }
            }
        }
    }


    作者:壹点零
    链接:https://www.jianshu.com/p/f79309adb63b
    来源:简书
    著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
  • 相关阅读:
    python-进程池实例
    python-进程通过队列模拟数据的下载
    python-多进程模板
    python-多线程同步中创建互斥锁解决资源竞争的问题
    CentOS6.5配置网络
    解决CentOS系统Yum出现"Cannot find a valid baseurl for repo"问题
    CentOS 6.5安装图形界面
    Centos安装git
    Web前端优化,提高加载速度
    谁说写代码的不懂生活
  • 原文地址:https://www.cnblogs.com/cdchencw/p/12470746.html
Copyright © 2011-2022 走看看