zoukankan      html  css  js  c++  java
  • ElasticSearch6.x版本聚合分析整理

    ElasticSearch6.x版本聚合分析整理

    ES将聚合分析主要分为如下4类

    1. Bucket,分桶类型,类似SQL中的GROUP BY语法
    2. Metric,指标分析类型,如计算最大值 , 最小值,平均值等
    3. Pipeline,管道分析类型,基于上一级的聚合分析结果进行再分析
    4. Matrix,矩阵分析类型

    Metric聚合分析

    主要分如下两类:

    1.单值分析,只输出一个分析结果

    ​ min,max,avg,sum

    ​ cardinality

    2.多值分析,输出多个分析结果

    ​ stats,extended stats

    ​ percentile,percentile rank

    ​ top hits

    需要使用到的数据:

    POST test_search_index/doc/_bulk
    {"index":{"_id":"1"}}
    {"username":"alfred way","job":"java engineer","age":18,"birth":"1990-01-02","isMarried":false,"salary":10000}
    {"index":{"_id":"2"}}
    {"username":"tom","job":"java senior engineer","age":28,"birth":"1980-05-07","isMarried":true,"salary":30000}
    {"index":{"_id":"3"}}
    {"username":"lee","job":"ruby engineer","age":22,"birth":"1985-08-07","isMarried":false,"salary":15000}
    {"index":{"_id":"4"}}
    {"username":"Nick","job":"web engineer","age":23,"birth":"1989-08-07","isMarried":false,"salary":8000}
    {"index":{"_id":"5"}}
    {"username":"Niko","job":"web engineer","age":18,"birth":"1994-08-07","isMarried":false,"salary":5000}
    {"index":{"_id":"6"}}
    {"username":"Michell","job":"ruby engineer","age":26,"birth":"1987-08-07","isMarried":false,"salary":12000}
    
    Metric聚合分析

    返回数值类字段的平均值

    GET  test_search_index/_search
    {
    	#不需要返回文档列表
        "size":0, 
        "aggs":{
      		#名字
            "min_age":{
      			#关键词
                "min":{ 
                    "field":"age"
                }
            }
        }
    }
    
    #返回结果:
    {
        "took": 5,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "min_age": {
                "value": 18
            }
        }
    }
    
    #返回数值类字段的最大值
    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
            "max_age":{
                "max":{
                    "field":"age"
                }
            }
        }
    }
    
    #返回结果
    {
        "took": 7,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "max_age": {
                "value": 28
            }
        }
    }
    
    
    #返回数值类字段的平均值
    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
            "avg_age":{
                "avg":{
                    "field":"age"
                }
            }
        }
    }
    
    #返回结果
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "avg_age": {
                "value": 22.5
            }
        }
    }
    
    #返回数值字段的总和
    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
            "sum_age":{
                "sum":{
                    "field":"age"
                }
            }
        }
    }
    
    {
        "took": 3,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "sum_age": {
                "value": 135
            }
        }
    }
    
    #一次返回多个结果
    {
        "size":0,
        "aggs":{
        	"min_age":{
        		"min":{
        			"field":"age"
        		}
        	},
        	"max_age":{
        		"max":{
        			"field":"age"
        		}
        	},
            "sum_age":{
                "sum":{
                    "field":"age"
                }
            }
        }
    }
    
    #返回结果
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "max_age": {
                "value": 28
            },
            "sum_age": {
                "value": 135
            },
            "min_age": {
                "value": 18
            }
        }
    }
    

    Metric聚合分析--Cardinality

    Cardinality,意为集合的势,或者基数,是指不同数值的个数,类似SQL中的distinct count概念

    GET  test_search_index/_search
    {
        "size":10,
        "aggs":{
        	"count_of_job":{
        		"cardinality":{
        			"field":"job.keyword"
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 4,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 1,
            "hits": [
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "5",
                    "_score": 1,
                    "_source": {
                        "username": "Niko",
                        "job": "web engineer",
                        "age": 18,
                        "birth": "1994-08-07",
                        "isMarried": false,
                        "salary": 5000
                    }
                },
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "2",
                    "_score": 1,
                    "_source": {
                        "username": "tom",
                        "job": "java senior engineer",
                        "age": 28,
                        "birth": "1980-05-07",
                        "isMarried": true,
                        "salary": 30000
                    }
                },
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "4",
                    "_score": 1,
                    "_source": {
                        "username": "Nick",
                        "job": "web engineer",
                        "age": 23,
                        "birth": "1989-08-07",
                        "isMarried": false,
                        "salary": 8000
                    }
                },
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "6",
                    "_score": 1,
                    "_source": {
                        "username": "Michell",
                        "job": "ruby engineer",
                        "age": 26,
                        "birth": "1987-08-07",
                        "isMarried": false,
                        "salary": 12000
                    }
                },
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "1",
                    "_score": 1,
                    "_source": {
                        "username": "alfred way",
                        "job": "java engineer",
                        "age": 18,
                        "birth": "1990-01-02",
                        "isMarried": false,
                        "salary": 10000
                    }
                },
                {
                    "_index": "test_search_index",
                    "_type": "doc",
                    "_id": "3",
                    "_score": 1,
                    "_source": {
                        "username": "lee",
                        "job": "ruby engineer",
                        "age": 22,
                        "birth": "1985-08-07",
                        "isMarried": false,
                        "salary": 15000
                    }
                }
            ]
        },
        "aggregations": {
            "count_of_job": {
                "value": 4
            }
        }
    }
    

    Metric聚合分析-Stats

    返回一系列数值类型的统计值,包含min,max,avg,sum和count

    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
        	"stats_age":{
        		"stats":{
        			"field":"age"
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 1,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "stats_age": {
                "count": 6,
                "min": 18,
                "max": 28,
                "avg": 22.5,
                "sum": 135
            }
        }
    }
    

    Metric聚合分析-Extended Stats

    对stats的扩展,包含了更多的统计数据,如方差,标准差等

    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
        	"stats_age":{
        		"extended_stats":{
        			"field":"age"
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "stats_age": {
                "count": 6,
                "min": 18,
                "max": 28,
                "avg": 22.5,
                "sum": 135,
                "sum_of_squares": 3121,
                "variance": 13.916666666666666,
                "std_deviation": 3.730504880933232,
                "std_deviation_bounds": {
                    "upper": 29.961009761866464,
                    "lower": 15.038990238133536
                }
            }
        }
    }
    

    Metric聚合分析-Percentile

    百分位数统计

    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
        	"per_age":{
        		"percentiles":{
        			"field":"salary"
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 6,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "per_age": {
              	#代表有百分之一的人工资在5000以下,百分之二十五的人工资在8000以下....
                "values": {
                    "1.0": 5000,
                    "5.0": 5000,
                    "25.0": 8000,
                    "50.0": 11000,
                    "75.0": 15000,
                    "95.0": 30000,
                    "99.0": 30000
                }
            }
        }
    }
    
    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
        	"per_age":{
        		"percentile_ranks":{
        			"field":"salary",
        			"values":[
        				11000,
        				30000
        			]
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "per_age": {
                "values": {
                    "11000.0": 50,
                    "30000.0": 100
                }
            }
        }
    }
    
    

    Metric聚合分析-Top Hits

    一般用于分桶后获取该桶内最匹配的顶部文档列表,即详情数据

    #先按照job分桶, 然后在桶内做年龄的排序
    GET  test_search_index/_search
    {
        "size":0,
        "aggs":{
        	"jobs":{
        		"terms":{
        			"field":"job.keyword",
        			"size":10
        		},
        		"aggs":{
        			"top_employee":{
        				"top_hits":{
        					"size":10,
        					"sort":[
        					{
        						"age":{
        							"order":"desc"
        						}
        					}
        					]
        				}
        			}
        		}
        	}
        }
    }
    
    #返回
    {
        "took": 42,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "top_employee": {
                            "hits": {
                                "total": 2,
                                "max_score": null,
                                "hits": [
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "6",
                                        "_score": null,
                                        "_source": {
                                            "username": "Michell",
                                            "job": "ruby engineer",
                                            "age": 26,
                                            "birth": "1987-08-07",
                                            "isMarried": false,
                                            "salary": 12000
                                        },
                                        "sort": [
                                            26
                                        ]
                                    },
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "3",
                                        "_score": null,
                                        "_source": {
                                            "username": "lee",
                                            "job": "ruby engineer",
                                            "age": 22,
                                            "birth": "1985-08-07",
                                            "isMarried": false,
                                            "salary": 15000
                                        },
                                        "sort": [
                                            22
                                        ]
                                    }
                                ]
                            }
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "top_employee": {
                            "hits": {
                                "total": 2,
                                "max_score": null,
                                "hits": [
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "4",
                                        "_score": null,
                                        "_source": {
                                            "username": "Nick",
                                            "job": "web engineer",
                                            "age": 23,
                                            "birth": "1989-08-07",
                                            "isMarried": false,
                                            "salary": 8000
                                        },
                                        "sort": [
                                            23
                                        ]
                                    },
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "5",
                                        "_score": null,
                                        "_source": {
                                            "username": "Niko",
                                            "job": "web engineer",
                                            "age": 18,
                                            "birth": "1994-08-07",
                                            "isMarried": false,
                                            "salary": 5000
                                        },
                                        "sort": [
                                            18
                                        ]
                                    }
                                ]
                            }
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "top_employee": {
                            "hits": {
                                "total": 1,
                                "max_score": null,
                                "hits": [
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "1",
                                        "_score": null,
                                        "_source": {
                                            "username": "alfred way",
                                            "job": "java engineer",
                                            "age": 18,
                                            "birth": "1990-01-02",
                                            "isMarried": false,
                                            "salary": 10000
                                        },
                                        "sort": [
                                            18
                                        ]
                                    }
                                ]
                            }
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "top_employee": {
                            "hits": {
                                "total": 1,
                                "max_score": null,
                                "hits": [
                                    {
                                        "_index": "test_search_index",
                                        "_type": "doc",
                                        "_id": "2",
                                        "_score": null,
                                        "_source": {
                                            "username": "tom",
                                            "job": "java senior engineer",
                                            "age": 28,
                                            "birth": "1980-05-07",
                                            "isMarried": true,
                                            "salary": 30000
                                        },
                                        "sort": [
                                            28
                                        ]
                                    }
                                ]
                            }
                        }
                    }
                ]
            }
        }
    }
    
    Bucket聚合分析

    Terms

    该分桶策略最简单,直接按照term来分桶,如果是text类型,则按照分次后的结果分桶

    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1
                    }
                ]
            }
        }
    }
    

    Range

    通过制定数值的范围来设定分桶规则

    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"salary_range":{
    			"range":{
    				"field":"salary",
    				"ranges":[
    				{
    					"to":10000
    				},
    				{
    					"from":10000,
    					"to":20000
    				},
    				{
    					"from":20000
    				}
    				]
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 3,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "salary_range": {
                "buckets": [
                    {
                        "key": "*-10000.0",
                        "to": 10000,
                        "doc_count": 2
                    },
                    {
                        "key": "10000.0-20000.0",
                        "from": 10000,
                        "to": 20000,
                        "doc_count": 3
                    },
                    {
                        "key": "20000.0-*",
                        "from": 20000,
                        "doc_count": 1
                    }
                ]
            }
        }
    }
    

    Date Range

    通过制定日期的范围来设定分桶规则

    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"date_range":{
    			"range":{
    				"field":"birth",
    				"format":"yyyy",
    				"ranges":[
    				{
    					"from":"1980",
    					"to":"1990"
    				},
    				{
    					"from":"1990",
    					"to":"2000"
    				},
    				{	
    					"from":"2000"
    				}
    				]
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 3,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "date_range": {
                "buckets": [
                    {
                        "key": "1980-1990",
                        "from": 315532800000,
                        "from_as_string": "1980",
                        "to": 631152000000,
                        "to_as_string": "1990",
                        "doc_count": 4
                    },
                    {
                        "key": "1990-2000",
                        "from": 631152000000,
                        "from_as_string": "1990",
                        "to": 946684800000,
                        "to_as_string": "2000",
                        "doc_count": 2
                    },
                    {
                        "key": "2000-*",
                        "from": 946684800000,
                        "from_as_string": "2000",
                        "doc_count": 0
                    }
                ]
            }
        }
    }
    

    Historgram

    直方图,以固定间隔的策略来分隔数据

    #表示间隔5000分隔工资的分布情况, 最小0,最大40000
    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"salary_hist":{
    			"histogram":{
    				"field":"salary",
    				 "interval":5000,
    				 "extended_bounds":{
    				 	"min":0,
    				 	"max":40000
    				 }
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "salary_hist": {
                "buckets": [
                    {
                        "key": 0,
                        "doc_count": 0
                    },
                    {
                        "key": 5000,
                        "doc_count": 2
                    },
                    {
                        "key": 10000,
                        "doc_count": 2
                    },
                    {
                        "key": 15000,
                        "doc_count": 1
                    },
                    {
                        "key": 20000,
                        "doc_count": 0
                    },
                    {
                        "key": 25000,
                        "doc_count": 0
                    },
                    {
                        "key": 30000,
                        "doc_count": 1
                    },
                    {
                        "key": 35000,
                        "doc_count": 0
                    },
                    {
                        "key": 40000,
                        "doc_count": 0
                    }
                ]
            }
        }
    }
    

    Date Historgram

    针对日期的直方图或者柱状图,是时序数据分析中常用的聚合分析类型

    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"salary_hist":{
    			"date_histogram":{
    				"field":"birth",
    				 "interval":"year",
    				 "format":"yyyy"
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 4,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "salary_hist": {
                "buckets": [
                    {
                        "key_as_string": "1980",
                        "key": 315532800000,
                        "doc_count": 1
                    },
                    {
                        "key_as_string": "1981",
                        "key": 347155200000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1982",
                        "key": 378691200000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1983",
                        "key": 410227200000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1984",
                        "key": 441763200000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1985",
                        "key": 473385600000,
                        "doc_count": 1
                    },
                    {
                        "key_as_string": "1986",
                        "key": 504921600000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1987",
                        "key": 536457600000,
                        "doc_count": 1
                    },
                    {
                        "key_as_string": "1988",
                        "key": 567993600000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1989",
                        "key": 599616000000,
                        "doc_count": 1
                    },
                    {
                        "key_as_string": "1990",
                        "key": 631152000000,
                        "doc_count": 1
                    },
                    {
                        "key_as_string": "1991",
                        "key": 662688000000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1992",
                        "key": 694224000000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1993",
                        "key": 725846400000,
                        "doc_count": 0
                    },
                    {
                        "key_as_string": "1994",
                        "key": 757382400000,
                        "doc_count": 1
                    }
                ]
            }
        }
    }
    
    Bucket+Metric聚合分析

    分桶后再分桶

    1. GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"age_range":{
    					"range":{
    						"field":"age",
    						"ranges":[
    						{"to":20},
    						{"from":20,"to":30},
    						{"from":30}
    						]
    					}
    				}
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "age_range": {
                            "buckets": [
                                {
                                    "key": "*-20.0",
                                    "to": 20,
                                    "doc_count": 0
                                },
                                {
                                    "key": "20.0-30.0",
                                    "from": 20,
                                    "to": 30,
                                    "doc_count": 2
                                },
                                {
                                    "key": "30.0-*",
                                    "from": 30,
                                    "doc_count": 0
                                }
                            ]
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "age_range": {
                            "buckets": [
                                {
                                    "key": "*-20.0",
                                    "to": 20,
                                    "doc_count": 1
                                },
                                {
                                    "key": "20.0-30.0",
                                    "from": 20,
                                    "to": 30,
                                    "doc_count": 1
                                },
                                {
                                    "key": "30.0-*",
                                    "from": 30,
                                    "doc_count": 0
                                }
                            ]
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "age_range": {
                            "buckets": [
                                {
                                    "key": "*-20.0",
                                    "to": 20,
                                    "doc_count": 1
                                },
                                {
                                    "key": "20.0-30.0",
                                    "from": 20,
                                    "to": 30,
                                    "doc_count": 0
                                },
                                {
                                    "key": "30.0-*",
                                    "from": 30,
                                    "doc_count": 0
                                }
                            ]
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "age_range": {
                            "buckets": [
                                {
                                    "key": "*-20.0",
                                    "to": 20,
                                    "doc_count": 0
                                },
                                {
                                    "key": "20.0-30.0",
                                    "from": 20,
                                    "to": 30,
                                    "doc_count": 1
                                },
                                {
                                    "key": "30.0-*",
                                    "from": 30,
                                    "doc_count": 0
                                }
                            ]
                        }
                    }
                ]
            }
        }
    }
    
    2.分桶后进行数据分析
    GET  test_search_index/_search
    
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"salary":{
    					"stats":{
    						"field":"salary"
    					}
    				}
    			}
    		}
    	}
    }
    
    {
        "took": 7,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "salary": {
                            "count": 2,
                            "min": 12000,
                            "max": 15000,
                            "avg": 13500,
                            "sum": 27000
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "salary": {
                            "count": 2,
                            "min": 5000,
                            "max": 8000,
                            "avg": 6500,
                            "sum": 13000
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "salary": {
                            "count": 1,
                            "min": 10000,
                            "max": 10000,
                            "avg": 10000,
                            "sum": 10000
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "salary": {
                            "count": 1,
                            "min": 30000,
                            "max": 30000,
                            "avg": 30000,
                            "sum": 30000
                        }
                    }
                ]
            }
        }
    }
    
    Pipeline聚合分析

    针对聚合分析的结果再次进行聚合分析,而且支持链式调用

    Pipeline的分析结果会输出到原结果中,根据输出位置的不同,分为以下两类:

    1.Parent结果内嵌到现有的聚合分析结果中

    ​ Derivative

    ​ Moving Average

    ​ Cumulative Sum

    2.Sibling结果与现有聚合分析结果同级

    ​ Max/Min/Avg/Sum Bucket

    ​ Stats/Extended Stats Bucket

    ​ Percentitles Bucket

    Sibling - Min Bucket

    找出所有Bucket中值最小的Bucket名称和值

    1.聚合分析求job的和
    2.job里面内嵌套了一个求平均salary
    3.然后用一个同级的 Min Bucket求上面平均工资里面最小的那个
    
    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				}
    			}
    		},
    		"min_salary_by_job":{
    			"min_bucket":{
    				"buckets_path":"jobs>avg_salary"
    			}
    		}
    	}
    }
    
    {
        "took": 5,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 13500
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 6500
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 10000
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 30000
                        }
                    }
                ]
            },
            "min_salary_by_job": {
                "value": 6500,
                "keys": [
                    "web engineer"
                ]
            }
        }
    }
    
    找出所有Bucket中值最大的Bucket名称和值
    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				}
    			}
    		},
    		"max_salary_by_job":{
    			"max_bucket":{
    				"buckets_path":"jobs>avg_salary"
    			}
    		}
    	}
    }
    
    找出所有Bucket中值平均值
    GET  test_search_index/_search
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				}
    			}
    		},
    		"avg_salary_by_job":{
    			"avg_bucket":{
    				"buckets_path":"jobs>avg_salary"
    			}
    		}
    	}
    }
    
    计算所有Bucket值的Stats分析
    GET  test_search_index/_search
    
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				}
    			}
    		},
    		"stats_salary_by_job":{
    			"stats_bucket":{
    				"buckets_path":"jobs>avg_salary"
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 3,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 13500
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 6500
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 10000
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 30000
                        }
                    }
                ]
            },
            "stats_salary_by_job": {
                "count": 4,
                "min": 6500,
                "max": 30000,
                "avg": 15000,
                "sum": 60000
            }
        }
    }
    
    计算所有Bucket值的百分位数
    GET  test_search_index/_search
    
    {
    	"size":0,
    	"aggs":{
    		"jobs":{
    			"terms":{
    				"field":"job.keyword",
    				"size":10
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				}
    			}
    		},
    		"percentiles_salary_by_job":{
    			"percentiles_bucket":{
    				"buckets_path":"jobs>avg_salary"
    			}
    		}
    	}
    }
    
    {
        "took": 1,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "jobs": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "ruby engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 13500
                        }
                    },
                    {
                        "key": "web engineer",
                        "doc_count": 2,
                        "avg_salary": {
                            "value": 6500
                        }
                    },
                    {
                        "key": "java engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 10000
                        }
                    },
                    {
                        "key": "java senior engineer",
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 30000
                        }
                    }
                ]
            },
            "percentiles_salary_by_job": {
                "values": {
                    "1.0": 6500,
                    "5.0": 6500,
                    "25.0": 10000,
                    "50.0": 13500,
                    "75.0": 13500,
                    "95.0": 30000,
                    "99.0": 30000
                }
            }
        }
    }
    
    Parent- Derivative

    计算Bucket值的导数

    GET  test_search_index/_search
    
    {
    	"size":0,
    	"aggs":{
    		"birth":{
    			"date_histogram":{
    				"field":"birth",
    				"interval":"year",
    				"min_doc_count":0
    			},
    			"aggs":{
    				"avg_salary":{
    					"avg":{
    						"field":"salary"
    					}
    				},
    				"derivative_avg_salary":{
    					"derivative":{
    						"buckets_path":"avg_salary"
    					}
    				}
    			}
    		}
    	}
    }
    
    #返回
    {
        "took": 2,
        "timed_out": false,
        "_shards": {
            "total": 5,
            "successful": 5,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 6,
            "max_score": 0,
            "hits": []
        },
        "aggregations": {
            "birth": {
                "buckets": [
                    {
                        "key_as_string": "1980-01-01T00:00:00.000Z",
                        "key": 315532800000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 30000
                        }
                    },
                    {
                        "key_as_string": "1981-01-01T00:00:00.000Z",
                        "key": 347155200000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1982-01-01T00:00:00.000Z",
                        "key": 378691200000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1983-01-01T00:00:00.000Z",
                        "key": 410227200000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1984-01-01T00:00:00.000Z",
                        "key": 441763200000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1985-01-01T00:00:00.000Z",
                        "key": 473385600000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 15000
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1986-01-01T00:00:00.000Z",
                        "key": 504921600000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1987-01-01T00:00:00.000Z",
                        "key": 536457600000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 12000
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1988-01-01T00:00:00.000Z",
                        "key": 567993600000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1989-01-01T00:00:00.000Z",
                        "key": 599616000000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 8000
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1990-01-01T00:00:00.000Z",
                        "key": 631152000000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 10000
                        },
                        "derivative_avg_salary": {
                            "value": 2000
                        }
                    },
                    {
                        "key_as_string": "1991-01-01T00:00:00.000Z",
                        "key": 662688000000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1992-01-01T00:00:00.000Z",
                        "key": 694224000000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1993-01-01T00:00:00.000Z",
                        "key": 725846400000,
                        "doc_count": 0,
                        "avg_salary": {
                            "value": null
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    },
                    {
                        "key_as_string": "1994-01-01T00:00:00.000Z",
                        "key": 757382400000,
                        "doc_count": 1,
                        "avg_salary": {
                            "value": 5000
                        },
                        "derivative_avg_salary": {
                            "value": null
                        }
                    }
                ]
            }
        }
    }
    
  • 相关阅读:
    管理者的主要任务是什么?
    管理者的最基本职责是什么?
    管理者的两大职责:“管事”和“理人”
    最近的思考
    刨根问底Objective-C Runtime
    Objective-C 中的类和对象
    ios动态添加属性的几种方法
    多态 oc c++ 与oc category
    利用Objective-C运行时hook函数的三种方法
    Hook技术
  • 原文地址:https://www.cnblogs.com/luozhiyun/p/9354813.html
Copyright © 2011-2022 走看看