zoukankan      html  css  js  c++  java
  • elasticsearch系列六:聚合分析(聚合分析简介、指标聚合、桶聚合)

    一、聚合分析简介

     1. ES聚合分析是什么?

    聚合分析是数据库中重要的功能特性,完成对一个查询的数据集中数据的聚合计算,如:找出某字段(或计算表达式的结果)的最大值、最小值,计算和、平均值等。ES作为搜索引擎兼数据库,同样提供了强大的聚合分析能力。

    对一个数据集求最大、最小、和、平均值等指标的聚合,在ES中称为指标聚合   metric

    而关系型数据库中除了有聚合函数外,还可以对查询出的数据进行分组group by,再在组上进行指标聚合。在 ES 中group by 称为分桶桶聚合 bucketing

    ES中还提供了矩阵聚合(matrix)、管道聚合(pipleline),但还在完善中。 

     2. ES聚合分析查询的写法

     在查询请求体中以aggregations节点按如下语法定义聚合分析:

    "aggregations" : {
        "<aggregation_name>" : { <!--聚合的名字 -->
            "<aggregation_type>" : { <!--聚合的类型 -->
                <aggregation_body> <!--聚合体:对哪些字段进行聚合 -->
            }
            [,"meta" : {  [<meta_data_body>] } ]? <!---->
            [,"aggregations" : { [<sub_aggregation>]+ } ]? <!--在聚合里面在定义子聚合 -->
        }
        [,"<aggregation_name_2>" : { ... } ]*<!--聚合的名字 -->
    }

     说明:

    aggregations 也可简写为 aggs

     3. 聚合分析的值来源

    聚合计算的值可以取字段的值,也可是脚本计算的结果

    二、指标聚合

    1. max min sum avg

    示例1:查询所有客户中余额的最大值

    POST /bank/_search?
    {
      "size": 0, 
      "aggs": {
        "masssbalance": {
          "max": {
            "field": "balance"
          }
        }
      }
    }

     结果1:

    {
      "took": 2080,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "masssbalance": {
          "value": 49989
        }
      }
    }

    示例2:查询年龄为24岁的客户中的余额最大值

    POST /bank/_search?
    {
      "size": 2, 
      "query": {
        "match": {
          "age": 24
        }
      },
      "sort": [
        {
          "balance": {
            "order": "desc"
          }
        }
      ],
      "aggs": {
        "max_balance": {
          "max": {
            "field": "balance"
          }
        }
      }
    }

     结果2:

    {
      "took": 5,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 42,
        "max_score": null,
        "hits": [
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "697",
            "_score": null,
            "_source": {
              "account_number": 697,
              "balance": 48745,
              "firstname": "Mallory",
              "lastname": "Emerson",
              "age": 24,
              "gender": "F",
              "address": "318 Dunne Court",
              "employer": "Exoplode",
              "email": "malloryemerson@exoplode.com",
              "city": "Montura",
              "state": "LA"
            },
            "sort": [
              48745
            ]
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "917",
            "_score": null,
            "_source": {
              "account_number": 917,
              "balance": 47782,
              "firstname": "Parks",
              "lastname": "Hurst",
              "age": 24,
              "gender": "M",
              "address": "933 Cozine Avenue",
              "employer": "Pyramis",
              "email": "parkshurst@pyramis.com",
              "city": "Lindcove",
              "state": "GA"
            },
            "sort": [
              47782
            ]
          }
        ]
      },
      "aggregations": {
        "max_balance": {
          "value": 48745
        }
      }
    }

     示例3:值来源于脚本,查询所有客户的平均年龄是多少,并对平均年龄加10

    POST /bank/_search?size=0
    {
      "aggs": {
        "avg_age": {
          "avg": {
            "script": {
              "source": "doc.age.value"
            }
          }
        },
        "avg_age10": {
          "avg": {
            "script": {
              "source": "doc.age.value + 10"
            }
          }
        }
      }
    }

     结果3:

    {
      "took": 86,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "avg_age": {
          "value": 30.171
        },
        "avg_age10": {
          "value": 40.171
        }
      }
    }

     示例4:指定field,在脚本中用_value 取字段的值

    POST /bank/_search?size=0
    {
      "aggs": {
        "sum_balance": {
          "sum": {
            "field": "balance",
            "script": {
                "source": "_value * 1.03"
            }
          }
        }
      }
    }

     结果4:

    {
      "took": 165,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "sum_balance": {
          "value": 26486282.11
        }
      }
    }

     示例5:为没有值字段指定值。如未指定,缺失该字段值的文档将被忽略。

    POST /bank/_search?size=0
    {
      "aggs": {
        "avg_age": {
          "avg": {
            "field": "age",
            "missing": 18
          }
        }
      }
    }

     2. 文档计数 count

     示例1:统计银行索引bank下年龄为24的文档数量

    POST /bank/_doc/_count
    {
      "query": {
        "match": {
          "age" : 24
        }
      }
    }

     结果1:

    {
      "count": 42,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      }
    }

     3. Value count 统计某字段有值的文档数

    示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_count": {
          "value_count": {
            "field": "age"
          }
        }
      }
    }

     结果1:

    {
      "took": 2022,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_count": {
          "value": 1000
        }
      }
    }

     4. cardinality  值去重计数

    示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_count": {
          "cardinality": {
            "field": "age"
          }
        },
        "state_count": {
          "cardinality": {
            "field": "state.keyword"
          }
        }
      }
    }

     说明:state的使用它的keyword版

     结果1:

    {
      "took": 2074,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "state_count": {
          "value": 51
        },
        "age_count": {
          "value": 21
        }
      }
    }

     5. stats 统计 count max min avg sum 5个值

     示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_stats": {
          "stats": {
            "field": "age"
          }
        }
      }
    }

     结果1:

    {
      "took": 7,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_stats": {
          "count": 1000,
          "min": 20,
          "max": 40,
          "avg": 30.171,
          "sum": 30171
        }
      }
    }

     6. Extended stats

    高级统计,比stats多4个统计结果: 平方和、方差、标准差、平均值加/减两个标准差的区间

     示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_stats": {
          "extended_stats": {
            "field": "age"
          }
        }
      }
    }

     结果1:

    {
      "took": 7,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_stats": {
          "count": 1000,
          "min": 20,
          "max": 40,
          "avg": 30.171,
          "sum": 30171,
          "sum_of_squares": 946393,
          "variance": 36.10375899999996,
          "std_deviation": 6.008640362012022,
          "std_deviation_bounds": {
            "upper": 42.18828072402404,
            "lower": 18.153719275975956
          }
        }
      }
    }

     7. Percentiles 占比百分位对应的值统计

    对指定字段(脚本)的值按从小到大累计每个值对应的文档数的占比(占所有命中文档数的百分比),返回指定占比比例对应的值。默认返回[ 1, 5, 25, 50, 75, 95, 99 ]分位上的值。如下中间的结果,可以理解为:占比为50%的文档的age值 <= 31,或反过来:age<=31的文档数占总命中文档数的50%

     示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_percents": {
          "percentiles": {
            "field": "age"
          }
        }
      }
    }

    结果1:

    {
      "took": 87,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_percents": {
          "values": {
            "1.0": 20,
            "5.0": 21,
            "25.0": 25,
            "50.0": 31,
            "75.0": 35.00000000000001,
            "95.0": 39,
            "99.0": 40
          }
        }
      }
    }

     结果说明:

    占比为50%的文档的age值 <= 31,或反过来:age<=31的文档数占总命中文档数的50%

     示例2:指定分位值

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_percents": {
          "percentiles": {
            "field": "age",
            "percents" : [95, 99, 99.9] 
          }
        }
      }
    }

     结果2:

    {
      "took": 8,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_percents": {
          "values": {
            "95.0": 39,
            "99.0": 40,
            "99.9": 40
          }
        }
      }
    }

     8. Percentiles rank 统计值小于等于指定值的文档占比

     示例1:统计年龄小于25和30的文档的占比,和第7项相反

    POST /bank/_search?size=0
    {
      "aggs": {
        "gge_perc_rank": {
          "percentile_ranks": {
            "field": "age",
            "values": [
              25,
              30
            ]
          }
        }
      }
    }

    结果2:

    {
      "took": 8,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "gge_perc_rank": {
          "values": {
            "25.0": 26.1,
            "30.0": 49.2
          }
        }
      }
    }

     结果说明:年龄小于25的文档占比为26.1%,年龄小于30的文档占比为49.2%,

     9. Geo Bounds aggregation 求文档集中的地理位置坐标点的范围

    参考官网链接:

    https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-geobounds-aggregation.html

    10. Geo Centroid aggregation  求地理位置中心点坐标值

    参考官网链接:

    https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-geocentroid-aggregation.html

    三、桶聚合

    1. Terms Aggregation  根据字段值项分组聚合 

     示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age"
          }
        }
      }
    }

     结果1:

    {
      "took": 2000,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 463,
          "buckets": [
            {
              "key": 31,
              "doc_count": 61
            },
            {
              "key": 39,
              "doc_count": 60
            },
            {
              "key": 26,
              "doc_count": 59
            },
            {
              "key": 32,
              "doc_count": 52
            },
            {
              "key": 35,
              "doc_count": 52
            },
            {
              "key": 36,
              "doc_count": 52
            },
            {
              "key": 22,
              "doc_count": 51
            },
            {
              "key": 28,
              "doc_count": 51
            },
            {
              "key": 33,
              "doc_count": 50
            },
            {
              "key": 34,
              "doc_count": 49
            }
          ]
        }
      }
    }

     结果说明:

    "doc_count_error_upper_bound": 0:文档计数的最大偏差值

    "sum_other_doc_count": 463:未返回的其他项的文档数

    默认情况下返回按文档计数从高到低的前10个分组:

     "buckets": [
            {
              "key": 31,
              "doc_count": 61
            },
            {
              "key": 39,
              "doc_count": 60
            },
        .............
    ]

     年龄为31的文档有61个,年龄为39的文档有60个

     size 指定返回多少个分组:

    示例2:指定返回20个分组

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "size": 20
          }
        }
      }
    }

     结果2:

    {
      "took": 9,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 35,
          "buckets": [
            {
              "key": 31,
              "doc_count": 61
            },
            {
              "key": 39,
              "doc_count": 60
            },
            {
              "key": 26,
              "doc_count": 59
            },
            {
              "key": 32,
              "doc_count": 52
            },
            {
              "key": 35,
              "doc_count": 52
            },
            {
              "key": 36,
              "doc_count": 52
            },
            {
              "key": 22,
              "doc_count": 51
            },
            {
              "key": 28,
              "doc_count": 51
            },
            {
              "key": 33,
              "doc_count": 50
            },
            {
              "key": 34,
              "doc_count": 49
            },
            {
              "key": 30,
              "doc_count": 47
            },
            {
              "key": 21,
              "doc_count": 46
            },
            {
              "key": 40,
              "doc_count": 45
            },
            {
              "key": 20,
              "doc_count": 44
            },
            {
              "key": 23,
              "doc_count": 42
            },
            {
              "key": 24,
              "doc_count": 42
            },
            {
              "key": 25,
              "doc_count": 42
            },
            {
              "key": 37,
              "doc_count": 42
            },
            {
              "key": 27,
              "doc_count": 39
            },
            {
              "key": 38,
              "doc_count": 39
            }
          ]
        }
      }
    }
    View Code

     示例3:每个分组上显示偏差值

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "size": 5,
            "shard_size": 20,
            "show_term_doc_count_error": true
          }
        }
      }
    }

     结果3:

    {
      "took": 8,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 25,
          "sum_other_doc_count": 716,
          "buckets": [
            {
              "key": 31,
              "doc_count": 61,
              "doc_count_error_upper_bound": 0
            },
            {
              "key": 39,
              "doc_count": 60,
              "doc_count_error_upper_bound": 0
            },
            {
              "key": 26,
              "doc_count": 59,
              "doc_count_error_upper_bound": 0
            },
            {
              "key": 32,
              "doc_count": 52,
              "doc_count_error_upper_bound": 0
            },
            {
              "key": 36,
              "doc_count": 52,
              "doc_count_error_upper_bound": 0
            }
          ]
        }
      }
    }

     示例4:shard_size 指定每个分片上返回多少个分组

    shard_size 的默认值为:
    索引只有一个分片:= size
    多分片:= size * 1.5 + 10

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "size": 5,
            "shard_size": 20
          }
        }
      }
    }

     结果4:

    {
      "took": 8,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 25,
          "sum_other_doc_count": 716,
          "buckets": [
            {
              "key": 31,
              "doc_count": 61
            },
            {
              "key": 39,
              "doc_count": 60
            },
            {
              "key": 26,
              "doc_count": 59
            },
            {
              "key": 32,
              "doc_count": 52
            },
            {
              "key": 36,
              "doc_count": 52
            }
          ]
        }
      }
    }

     order  指定分组的排序

     示例5:根据文档计数排序

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "order" : { "_count" : "asc" }
          }
        }
      }
    }

     结果5:

    {
      "took": 3,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 584,
          "buckets": [
            {
              "key": 29,
              "doc_count": 35
            },
            {
              "key": 27,
              "doc_count": 39
            },
            {
              "key": 38,
              "doc_count": 39
            },
            {
              "key": 23,
              "doc_count": 42
            },
            {
              "key": 24,
              "doc_count": 42
            },
            {
              "key": 25,
              "doc_count": 42
            },
            {
              "key": 37,
              "doc_count": 42
            },
            {
              "key": 20,
              "doc_count": 44
            },
            {
              "key": 40,
              "doc_count": 45
            },
            {
              "key": 21,
              "doc_count": 46
            }
          ]
        }
      }
    }

     示例6:根据分组值排序

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "order" : { "_key" : "asc" }
          }
        }
      }
    }

     结果6:

    {
      "took": 10,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 549,
          "buckets": [
            {
              "key": 20,
              "doc_count": 44
            },
            {
              "key": 21,
              "doc_count": 46
            },
            {
              "key": 22,
              "doc_count": 51
            },
            {
              "key": 23,
              "doc_count": 42
            },
            {
              "key": 24,
              "doc_count": 42
            },
            {
              "key": 25,
              "doc_count": 42
            },
            {
              "key": 26,
              "doc_count": 59
            },
            {
              "key": 27,
              "doc_count": 39
            },
            {
              "key": 28,
              "doc_count": 51
            },
            {
              "key": 29,
              "doc_count": 35
            }
          ]
        }
      }
    }

    示例7:取分组指标值排序

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "terms": {
            "field": "age",
            "order": {
              "max_balance": "asc"
            }
          },
          "aggs": {
            "max_balance": {
              "max": {
                "field": "balance"
              }
            },
            "min_balance": {
              "min": {
                "field": "balance"
              }
            }
          }
        }
      }
    }

     结果7:

    {
      "took": 28,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 511,
          "buckets": [
            {
              "key": 27,
              "doc_count": 39,
              "min_balance": {
                "value": 1110
              },
              "max_balance": {
                "value": 46868
              }
            },
            {
              "key": 39,
              "doc_count": 60,
              "min_balance": {
                "value": 3589
              },
              "max_balance": {
                "value": 47257
              }
            },
            {
              "key": 37,
              "doc_count": 42,
              "min_balance": {
                "value": 1360
              },
              "max_balance": {
                "value": 47546
              }
            },
            {
              "key": 32,
              "doc_count": 52,
              "min_balance": {
                "value": 1031
              },
              "max_balance": {
                "value": 48294
              }
            },
            {
              "key": 26,
              "doc_count": 59,
              "min_balance": {
                "value": 1447
              },
              "max_balance": {
                "value": 48466
              }
            },
            {
              "key": 33,
              "doc_count": 50,
              "min_balance": {
                "value": 1314
              },
              "max_balance": {
                "value": 48734
              }
            },
            {
              "key": 24,
              "doc_count": 42,
              "min_balance": {
                "value": 1011
              },
              "max_balance": {
                "value": 48745
              }
            },
            {
              "key": 31,
              "doc_count": 61,
              "min_balance": {
                "value": 2384
              },
              "max_balance": {
                "value": 48758
              }
            },
            {
              "key": 34,
              "doc_count": 49,
              "min_balance": {
                "value": 3001
              },
              "max_balance": {
                "value": 48997
              }
            },
            {
              "key": 29,
              "doc_count": 35,
              "min_balance": {
                "value": 3596
              },
              "max_balance": {
                "value": 49119
              }
            }
          ]
        }
      }
    }
    View Code

     示例8:筛选分组-正则表达式匹配值

    GET /_search
    {
        "aggs" : {
            "tags" : {
                "terms" : {
                    "field" : "tags",
                    "include" : ".*sport.*",
                    "exclude" : "water_.*"
                }
            }
        }
    }

     示例9:筛选分组-指定值列表

    GET /_search
    {
        "aggs" : {
            "JapaneseCars" : {
                 "terms" : {
                     "field" : "make",
                     "include" : ["mazda", "honda"]
                 }
             },
            "ActiveCarManufacturers" : {
                 "terms" : {
                     "field" : "make",
                     "exclude" : ["rover", "jensen"]
                 }
             }
        }
    }

     示例10:根据脚本计算值分组

    GET /_search
    {
        "aggs" : {
            "genres" : {
                "terms" : {
                    "script" : {
                        "source": "doc['genre'].value",
                        "lang": "painless"
                    }
                }
            }
        }
    }

     示例1:缺失值处理

    GET /_search
    {
        "aggs" : {
            "tags" : {
                 "terms" : {
                     "field" : "tags",
                     "missing": "N/A" 
                 }
             }
        }
    }

     结果10:

    {
      "took": 2059,
      "timed_out": false,
      "_shards": {
        "total": 58,
        "successful": 58,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1015,
        "max_score": 1,
        "hits": [
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "25",
            "_score": 1,
            "_source": {
              "account_number": 25,
              "balance": 40540,
              "firstname": "Virginia",
              "lastname": "Ayala",
              "age": 39,
              "gender": "F",
              "address": "171 Putnam Avenue",
              "employer": "Filodyne",
              "email": "virginiaayala@filodyne.com",
              "city": "Nicholson",
              "state": "PA"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "44",
            "_score": 1,
            "_source": {
              "account_number": 44,
              "balance": 34487,
              "firstname": "Aurelia",
              "lastname": "Harding",
              "age": 37,
              "gender": "M",
              "address": "502 Baycliff Terrace",
              "employer": "Orbalix",
              "email": "aureliaharding@orbalix.com",
              "city": "Yardville",
              "state": "DE"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "99",
            "_score": 1,
            "_source": {
              "account_number": 99,
              "balance": 47159,
              "firstname": "Ratliff",
              "lastname": "Heath",
              "age": 39,
              "gender": "F",
              "address": "806 Rockwell Place",
              "employer": "Zappix",
              "email": "ratliffheath@zappix.com",
              "city": "Shaft",
              "state": "ND"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "119",
            "_score": 1,
            "_source": {
              "account_number": 119,
              "balance": 49222,
              "firstname": "Laverne",
              "lastname": "Johnson",
              "age": 28,
              "gender": "F",
              "address": "302 Howard Place",
              "employer": "Senmei",
              "email": "lavernejohnson@senmei.com",
              "city": "Herlong",
              "state": "DC"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "126",
            "_score": 1,
            "_source": {
              "account_number": 126,
              "balance": 3607,
              "firstname": "Effie",
              "lastname": "Gates",
              "age": 39,
              "gender": "F",
              "address": "620 National Drive",
              "employer": "Digitalus",
              "email": "effiegates@digitalus.com",
              "city": "Blodgett",
              "state": "MD"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "145",
            "_score": 1,
            "_source": {
              "account_number": 145,
              "balance": 47406,
              "firstname": "Rowena",
              "lastname": "Wilkinson",
              "age": 32,
              "gender": "M",
              "address": "891 Elton Street",
              "employer": "Asimiline",
              "email": "rowenawilkinson@asimiline.com",
              "city": "Ripley",
              "state": "NH"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "183",
            "_score": 1,
            "_source": {
              "account_number": 183,
              "balance": 14223,
              "firstname": "Hudson",
              "lastname": "English",
              "age": 26,
              "gender": "F",
              "address": "823 Herkimer Place",
              "employer": "Xinware",
              "email": "hudsonenglish@xinware.com",
              "city": "Robbins",
              "state": "ND"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "190",
            "_score": 1,
            "_source": {
              "account_number": 190,
              "balance": 3150,
              "firstname": "Blake",
              "lastname": "Davidson",
              "age": 30,
              "gender": "F",
              "address": "636 Diamond Street",
              "employer": "Quantasis",
              "email": "blakedavidson@quantasis.com",
              "city": "Crumpler",
              "state": "KY"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "208",
            "_score": 1,
            "_source": {
              "account_number": 208,
              "balance": 40760,
              "firstname": "Garcia",
              "lastname": "Hess",
              "age": 26,
              "gender": "F",
              "address": "810 Nostrand Avenue",
              "employer": "Quiltigen",
              "email": "garciahess@quiltigen.com",
              "city": "Brooktrails",
              "state": "GA"
            }
          },
          {
            "_index": "bank",
            "_type": "_doc",
            "_id": "222",
            "_score": 1,
            "_source": {
              "account_number": 222,
              "balance": 14764,
              "firstname": "Rachelle",
              "lastname": "Rice",
              "age": 36,
              "gender": "M",
              "address": "333 Narrows Avenue",
              "employer": "Enaut",
              "email": "rachellerice@enaut.com",
              "city": "Wright",
              "state": "AZ"
            }
          }
        ]
      },
      "aggregations": {
        "tags": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "N/A",
              "doc_count": 1014
            },
            {
              "key": "red",
              "doc_count": 1
            }
          ]
        }
      }
    }
    View Code

    2.  filter Aggregation  对满足过滤查询的文档进行聚合计算

     在查询命中的文档中选取符合过滤条件的文档进行聚合,先过滤再聚合

    示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_terms": {
          "filter": {"match":{"gender":"F"}},
          "aggs": {
            "avg_age": {
              "avg": {
                "field": "age"
              }
            }
          }
        }
      }
    }

     结果1:

    {
      "took": 163,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_terms": {
          "doc_count": 493,
          "avg_age": {
            "value": 30.3184584178499
          }
        }
      }
    }

     3. Filters Aggregation  多个过滤组聚合计算

    示例1:

     准备数据:

    PUT /logs/_doc/_bulk?refresh
    {"index":{"_id":1}}
    {"body":"warning: page could not be rendered"}
    {"index":{"_id":2}}
    {"body":"authentication error"}
    {"index":{"_id":3}}
    {"body":"warning: connection timed out"}

    获取组合过滤后聚合的结果:

    GET logs/_search
    {
      "size": 0,
      "aggs": {
        "messages": {
          "filters": {
            "filters": {
              "errors": {
                "match": {
                  "body": "error"
                }
              },
              "warnings": {
                "match": {
                  "body": "warning"
                }
              }
            }
          }
        }
      }
    }

     上面的结果:

    {
      "took": 18,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 3,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "messages": {
          "buckets": {
            "errors": {
              "doc_count": 1
            },
            "warnings": {
              "doc_count": 2
            }
          }
        }
      }
    }

     示例2:为其他值组指定key

    GET logs/_search
    {
      "size": 0,
      "aggs": {
        "messages": {
          "filters": {
            "other_bucket_key": "other_messages",
            "filters": {
              "errors": {
                "match": {
                  "body": "error"
                }
              },
              "warnings": {
                "match": {
                  "body": "warning"
                }
              }
            }
          }
        }
      }
    }

     结果2:

    {
      "took": 5,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 3,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "messages": {
          "buckets": {
            "errors": {
              "doc_count": 1
            },
            "warnings": {
              "doc_count": 2
            },
            "other_messages": {
              "doc_count": 0
            }
          }
        }
      }
    }

     4. Range Aggregation 范围分组聚合

     示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_range": {
          "range": {
            "field": "age",
            "ranges": [
              {
                "to": 25
              },
              {
                "from": 25,
                "to": 35
              },
              {
                "from": 35
              }
            ]
          },
          "aggs": {
            "bmax": {
              "max": {
                "field": "balance"
              }
            }
          }
        }
      }
    }

     结果1:

    {
      "took": 7,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_range": {
          "buckets": [
            {
              "key": "*-25.0",
              "to": 25,
              "doc_count": 225,
              "bmax": {
                "value": 49587
              }
            },
            {
              "key": "25.0-35.0",
              "from": 25,
              "to": 35,
              "doc_count": 485,
              "bmax": {
                "value": 49795
              }
            },
            {
              "key": "35.0-*",
              "from": 35,
              "doc_count": 290,
              "bmax": {
                "value": 49989
              }
            }
          ]
        }
      }
    }

    示例2:为组指定key

    POST /bank/_search?size=0
    {
      "aggs": {
        "age_range": {
          "range": {
            "field": "age",
            "keyed": true,
            "ranges": [
              {
                "to": 25,
                "key": "Ld"
              },
              {
                "from": 25,
                "to": 35,
                "key": "Md"
              },
              {
                "from": 35,
                "key": "Od"
              }
            ]
          }
        }
      }
    }

    结果2:

    {
      "took": 2,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "age_range": {
          "buckets": {
            "Ld": {
              "to": 25,
              "doc_count": 225
            },
            "Md": {
              "from": 25,
              "to": 35,
              "doc_count": 485
            },
            "Od": {
              "from": 35,
              "doc_count": 290
            }
          }
        }
      }
    }

    5. Date Range Aggregation  时间范围分组聚合

    示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "range": {
          "date_range": {
            "field": "date",
            "format": "MM-yyy",
            "ranges": [
              {
                "to": "now-10M/M"
              },
              {
                "from": "now-10M/M"
              }
            ]
          }
        }
      }
    }

    结果1:

    {
      "took": 115,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "range": {
          "buckets": [
            {
              "key": "*-2017-08-01T00:00:00.000Z",
              "to": 1501545600000,
              "to_as_string": "2017-08-01T00:00:00.000Z",
              "doc_count": 0
            },
            {
              "key": "2017-08-01T00:00:00.000Z-*",
              "from": 1501545600000,
              "from_as_string": "2017-08-01T00:00:00.000Z",
              "doc_count": 0
            }
          ]
        }
      }
    }

    6. Date Histogram Aggregation  时间直方图(柱状)聚合

    就是按天、月、年等进行聚合统计。可按 year (1y), quarter (1q), month (1M), week (1w), day (1d), hour (1h), minute (1m), second (1s) 间隔聚合或指定的时间间隔聚合。

    示例1:

    POST /bank/_search?size=0
    {
      "aggs": {
        "sales_over_time": {
          "date_histogram": {
            "field": "date",
            "interval": "month"
          }
        }
      }
    }

    结果1:

    {
      "took": 9,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 1000,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "sales_over_time": {
          "buckets": []
        }
      }
    }

    7. Missing Aggregation  缺失值的桶聚合

    POST /bank/_search?size=0
    {
        "aggs" : {
            "account_without_a_age" : {
                "missing" : { "field" : "age" }
            }
        }
    }

    8. Geo Distance Aggregation  地理距离分区聚合

    参考官网链接:

    https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geodistance-aggregation.html

  • 相关阅读:
    P2155 [SDOI2008]沙拉公主的困惑
    P4345 [SHOI2015]超能粒子炮·改
    乘法逆元
    P1608 路径统计
    P1342 请柬
    一些网址
    20/08/02测试
    ivqBlog 开源博客 (angularjs + express + mongodb)
    angularjs, nodejs, express, gulp, karma, jasmine 前端方案整合
    参照nopCommerce框架开发(NextCMS)
  • 原文地址:https://www.cnblogs.com/leeSmall/p/9215909.html
Copyright © 2011-2022 走看看