zoukankan      html  css  js  c++  java
  • search(15)- elastic4s-sorting buckets

      聚合结果buckets默认以doc_count 排序方式呈现,即: _count asc 表达。其它还有 _term, _key 为排序控制元素。_key适用于histogram,date_histogram,如下:

    POST /cartxns/_search
    {
      "aggs": {
        "colors": {
          "terms": {
            "field": "color.keyword",
            "order": {
              "_count": "asc"
            }
          }
        }
      }
    }
    
    ...
    
      "aggregations" : {
        "colors" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "blue",
              "doc_count" : 2
            },
            {
              "key" : "green",
              "doc_count" : 2
            },
            {
              "key" : "red",
              "doc_count" : 4
            }
          ]
        }
      }

    elastic4s表达式如下:

     val aggTerms = search("cartxns").aggregations(
         termsAggregation("colors")
           .field("color.keyword")
          .order(TermsOrder("_count",false))
      )
      println(aggTerms.show)
    
      val termsResult = client.execute(aggTerms).await
    
      if (termsResult.isSuccess) {
        termsResult.result.aggregations.terms("colors").buckets
          .foreach(b => println(s"${b.key},${b.docCount}"))
      } else println(s"error: ${termsResult.error.causedBy.getOrElse("unknown")}")
    
    ...
    
    POST:/cartxns/_search?
    StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
    red,4
    blue,2
    green,2

    再来一个date_histogram聚合例子: 

    POST /cartxns/_search
    {
      "aggs": {
        "monthly_sales": {
          "date_histogram": {
            "field": "sold",
            "calendar_interval": "1M",
            "format": "yyyy-MM-dd"
            , "order": {
              "_count": "desc"
            }
          }
        }
      }
    }
    
    ...
    
      "aggregations" : {
        "monthly_sales" : {
          "buckets" : [
            {
              "key_as_string" : "2014-11-01",
              "key" : 1414800000000,
              "doc_count" : 2
            },
            {
              "key_as_string" : "2014-01-01",
              "key" : 1388534400000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-02-01",
              "key" : 1391212800000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-05-01",
              "key" : 1398902400000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-07-01",
              "key" : 1404172800000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-08-01",
              "key" : 1406851200000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-10-01",
              "key" : 1412121600000,
              "doc_count" : 1
            },
            {
              "key_as_string" : "2014-03-01",
              "key" : 1393632000000,
              "doc_count" : 0
            },
            {
              "key_as_string" : "2014-04-01",
              "key" : 1396310400000,
              "doc_count" : 0
            },
            {
              "key_as_string" : "2014-06-01",
              "key" : 1401580800000,
              "doc_count" : 0
            },
            {
              "key_as_string" : "2014-09-01",
              "key" : 1409529600000,
              "doc_count" : 0
            }
          ]
        }
      }

    elastic4s示例:

     val aggHist = search("cartxns").aggregations(
          dateHistogramAggregation("monthly_sales")
          .field("sold")
          .calendarInterval(DateHistogramInterval.Month)
          .format("yyyy-MM-dd")
          .minDocCount(1)
          .order(HistogramOrder("_key",false))
      )
    
      println(aggTerms.show)
    
      val histResult = client.execute(aggHist).await
    
      if (histResult.isSuccess) {
        histResult.result.aggregations.dateHistogram("monthly_sales").buckets
          .foreach(b => println(s"${b.date},${b.docCount}"))
      } else println(s"error: ${histResult.error.causedBy.getOrElse("unknown")}")
    
    ...
    
    POST:/cartxns/_search?
    StringEntity({"aggs":{"colors":{"terms":{"field":"color.keyword","order":{"_count":"desc"}}}}},Some(application/json))
    2014-11-01,2
    2014-10-01,1
    2014-08-01,1
    2014-07-01,1
    2014-05-01,1
    2014-02-01,1
    2014-01-01,1

    _count,_terms,_key三种固定排序当然是不足以表达实际的聚合结果。以度量结果进行排序才真正能够做到有针对性的,灵活的,广覆盖面的聚合结果排序,如:

    POST /cartxns/_search
    {
      "aggs": {
        "makes": {
          "terms": {
            "field": "make.keyword",
            "size": 10
            , "order": {
              "avg_price": "desc"
            }
          },
          "aggs": {
            "avg_price": {
              "avg": {"field": "price"}
            }
          }
        }
      }
    }
    
    ...
    
      "aggregations" : {
        "makes" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "bmw",
              "doc_count" : 1,
              "avg_price" : {
                "value" : 80000.0
              }
            },
            {
              "key" : "ford",
              "doc_count" : 2,
              "avg_price" : {
                "value" : 27500.0
              }
            },
            {
              "key" : "honda",
              "doc_count" : 3,
              "avg_price" : {
                "value" : 16666.666666666668
              }
            },
            {
              "key" : "toyota",
              "doc_count" : 2,
              "avg_price" : {
                "value" : 13500.0
              }
            }
          ]
        }
      }

    以上是个以avg_price倒排序的例子。elastic4s示范:

     val aggAvg = search("cartxns").aggregations(
        termsAggregation("makes")
          .field("make.keyword")
          .order(TermsOrder("avg_price",false)).subaggs(
        avgAggregation("avg_price").field("price")
        )
      )
      println(aggAvg.show)
    
      val avgResult = client.execute(aggAvg).await
    
      if (avgResult.isSuccess) {
        avgResult.result.aggregations.terms("makes").buckets
          .foreach(b => println(s"${b.key},${b.docCount},${b.avg("avg_price").value}"))
      } else println(s"error: ${avgResult.error.causedBy.getOrElse("unknown")}")
    
    ...
    
    POST:/cartxns/_search?
    StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"avg_price":"desc"}},"aggs":{"avg_price":{"avg":{"field":"price"}}}}}},Some(application/json))
    bmw,1,80000.0
    ford,2,27500.0
    honda,3,16666.666666666668
    toyota,2,13500.0

    可以用 .path 方式来实现对多值度量结果的排序,如:

    POST /cartxns/_search
    {
      "aggs": {
        "colors": {
          "terms": {
            "field": "color.keyword",
            "size": 10,
            "order": {
              "stats.sum": "desc"
            }
          },
          "aggs": {
            "stats": {
              "extended_stats": {
                "field": "price"
              }
            }
          }
        }
      }
    }
    
    ...
    
    "aggregations" : {
        "colors" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "red",
              "doc_count" : 4,
              "stats" : {
                "count" : 4,
                "min" : 10000.0,
                "max" : 80000.0,
                "avg" : 32500.0,
                "sum" : 130000.0,
                "sum_of_squares" : 7.3E9,
                "variance" : 7.6875E8,
                "std_deviation" : 27726.341266023544,
                "std_deviation_bounds" : {
                  "upper" : 87952.6825320471,
                  "lower" : -22952.68253204709
                }
              }
            },
            {
              "key" : "green",
              "doc_count" : 2,
              "stats" : {
                "count" : 2,
                "min" : 12000.0,
                "max" : 30000.0,
                "avg" : 21000.0,
                "sum" : 42000.0,
                "sum_of_squares" : 1.044E9,
                "variance" : 8.1E7,
                "std_deviation" : 9000.0,
                "std_deviation_bounds" : {
                  "upper" : 39000.0,
                  "lower" : 3000.0
                }
              }
            },
            {
              "key" : "blue",
              "doc_count" : 2,
              "stats" : {
                "count" : 2,
                "min" : 15000.0,
                "max" : 25000.0,
                "avg" : 20000.0,
                "sum" : 40000.0,
                "sum_of_squares" : 8.5E8,
                "variance" : 2.5E7,
                "std_deviation" : 5000.0,
                "std_deviation_bounds" : {
                  "upper" : 30000.0,
                  "lower" : 10000.0
                }
              }
            }
          ]
        }
      }

    extended_stats返回多值。我们可以用stats.sum来选择sum值。elastic4s示范:

    val aggStats = search("cartxns").aggregations(
        termsAggregation("makes")
          .field("make.keyword")
          .order(TermsOrder("price_stats.sum",false)).subaggs(
          extendedStatsAggregation("price_stats").field("price")
        )
      )
      println(aggStats.show)
    
      val sumResult = client.execute(aggStats).await
    
      if (sumResult.isSuccess) {
        sumResult.result.aggregations.terms("makes").buckets
          .foreach(b => println(s"${b.key},${b.docCount}," + s"${b.extendedStats("price_stats").sum}"))
      } else println(s"error: ${sumResult.error.causedBy.getOrElse("unknown")}")
    
    ...
    
    POST:/cartxns/_search?
    StringEntity({"aggs":{"makes":{"terms":{"field":"make.keyword","order":{"price_stats.sum":"desc"}},"aggs":{"price_stats":{"extended_stats":{"field":"price"}}}}}},Some(application/json))
    bmw,1,80000.0
    ford,2,55000.0
    honda,3,50000.0
    toyota,2,27000.0

    最后,用作排序的度量结果可能在聚合结构的内层。我们可以用>符合来代表上一层结构:

    POST /cartxns/_search
    {
      "aggs": {
        "sales": {
          "histogram": {
            "field": "price",
            "interval": 20000,
            "min_doc_count": 1, 
            "order": {
              "red_green>stats.avg": "desc"
            }
          },
          "aggs": {
            "red_green": {
              "filter": {"terms": {"color.keyword" : ["red","green"]}
              },
              "aggs": {
                "stats": {
                  "extended_stats": {
                    "field": "price"
                  }
                }
              }
            }
          }
        }
      }
    }
    
    ...
    
      "aggregations" : {
        "salse" : {
          "buckets" : [
            {
              "key" : 80000.0,
              "doc_count" : 1,
              "red_green" : {
                "doc_count" : 1,
                "stats" : {
                  "count" : 1,
                  "min" : 80000.0,
                  "max" : 80000.0,
                  "avg" : 80000.0,
                  "sum" : 80000.0,
                  "sum_of_squares" : 6.4E9,
                  "variance" : 0.0,
                  "std_deviation" : 0.0,
                  "std_deviation_bounds" : {
                    "upper" : 80000.0,
                    "lower" : 80000.0
                  }
                }
              }
            },
            {
              "key" : 20000.0,
              "doc_count" : 4,
              "red_green" : {
                "doc_count" : 3,
                "stats" : {
                  "count" : 3,
                  "min" : 20000.0,
                  "max" : 30000.0,
                  "avg" : 23333.333333333332,
                  "sum" : 70000.0,
                  "sum_of_squares" : 1.7E9,
                  "variance" : 2.222222222222225E7,
                  "std_deviation" : 4714.04520791032,
                  "std_deviation_bounds" : {
                    "upper" : 32761.42374915397,
                    "lower" : 13905.242917512693
                  }
                }
              }
            },
            {
              "key" : 0.0,
              "doc_count" : 3,
              "red_green" : {
                "doc_count" : 2,
                "stats" : {
                  "count" : 2,
                  "min" : 10000.0,
                  "max" : 12000.0,
                  "avg" : 11000.0,
                  "sum" : 22000.0,
                  "sum_of_squares" : 2.44E8,
                  "variance" : 1000000.0,
                  "std_deviation" : 1000.0,
                  "std_deviation_bounds" : {
                    "upper" : 13000.0,
                    "lower" : 9000.0
                  }
                }
              }
            }
          ]
        }
      }

    elastic4s:

     val innerStats = search("cartxns").aggregations(
        histogramAggregation("sales")
            .field("price")
            .interval(20000)
            .minDocCount(1)
            .order(HistogramOrder("red_green>stats.sum",false)).subaggs(
          filterAggregation("red_green").query(
            termsQuery("color.keyword","red","green")
          ).subaggs(
            extendedStatsAggregation("stats").field("price")
          )
        )
      )
    
      println(innerStats.show)
    
      val innerResult = client.execute(innerStats).await
    
      if (innerResult.isSuccess) {
        val hist = innerResult.result.aggregations.histogram("sales")
        hist.buckets
          .foreach(b => println(s"${b.key},${b.docCount}," +
            s"${b.filter("red_green").extendedStats("stats").sum}"))
      } else println(s"error: ${innerResult.error.causedBy.getOrElse("unknown")}")
    
    ...
    
    POST:/cartxns/_search?
    StringEntity({"aggs":{"sales":{"histogram":{"interval":20000.0,"min_doc_count":1,"order":{"red_green>stats.sum":"desc"},"field":"price"},"aggs":{"red_green":{"filter":{"terms":{"color.keyword":["red","green"]}},"aggs":{"stats":{"extended_stats":{"field":"price"}}}}}}}},Some(application/json))
    80000.0,1,80000.0
    20000.0,4,70000.0
    0.0,3,22000.0
  • 相关阅读:
    NGINX
    nginx修改上传文件大小限制
    Mysql主从复制机制原理
    MongoDB系列---用户及权限管理02
    MongoDB系列---入门安装操作01
    浅谈原理--hashCode方法
    ActiveMQ学习总结------原生实战操作(下)03
    dubbo配置负载均衡、集群环境
    ActiveMQ学习总结------入门篇01
    vsftpd上传文件大小为0(主动模式)
  • 原文地址:https://www.cnblogs.com/tiger-xc/p/12910945.html
Copyright © 2011-2022 走看看