zoukankan      html  css  js  c++  java
  • 把 Elasticsearch 当数据库使:聚合后排序

    使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。有的时候分桶聚合之后会产生很多的桶,我们只对其中部分的桶关心。最简单的办法就是排序之后然后取前几位的结果。

    ORDER BY _term

    SQL

    $ cat << EOF | ./es_query.py http://127.0.0.1:9200
    SELECT ipo_year, COUNT(*) FROM symbol GROUP BY ipo_year ORDER BY ipo_year LIMIT 2
    EOF
    {"COUNT(*)": 4, "ipo_year": 1972}
    {"COUNT(*)": 1, "ipo_year": 1973}

    Elasticsearch

    {
      "aggs": {
        "ipo_year": {
          "terms": {
            "field": "ipo_year", 
            "order": [
              {
                "_term": "asc"
              }
            ], 
            "size": 2
          }, 
          "aggs": {}
        }
      }, 
      "size": 0
    }

    因为 ipo_year 是 GROUP BY 的字段,所以按这个排序用_term指代。

    {
      "hits": {
        "hits": [], 
        "total": 6714, 
        "max_score": 0.0
      }, 
      "_shards": {
        "successful": 1, 
        "failed": 0, 
        "total": 1
      }, 
      "took": 3, 
      "aggregations": {
        "ipo_year": {
          "buckets": [
            {
              "key": 1972, 
              "doc_count": 4
            }, 
            {
              "key": 1973, 
              "doc_count": 1
            }
          ], 
          "sum_other_doc_count": 2893, 
          "doc_count_error_upper_bound": 0
        }
      }, 
      "timed_out": false
    }

    ORDER BY _count

    SQL

    $ cat << EOF | ./es_query.py http://127.0.0.1:9200
    SELECT ipo_year, COUNT(*) AS ipo_count FROM symbol GROUP BY ipo_year ORDER BY ipo_count LIMIT 2
    EOF
    {"ipo_count": 1, "ipo_year": 1973}
    {"ipo_count": 2, "ipo_year": 1980}

    Elasticsearch

    {
      "aggs": {
        "ipo_year": {
          "terms": {
            "field": "ipo_year", 
            "order": [
              {
                "_count": "asc"
              }
            ], 
            "size": 2
          }, 
          "aggs": {}
        }
      }, 
      "size": 0
    }
    {
      "hits": {
        "hits": [], 
        "total": 6714, 
        "max_score": 0.0
      }, 
      "_shards": {
        "successful": 1, 
        "failed": 0, 
        "total": 1
      }, 
      "took": 2, 
      "aggregations": {
        "ipo_year": {
          "buckets": [
            {
              "key": 1973, 
              "doc_count": 1
            }, 
            {
              "key": 1980, 
              "doc_count": 2
            }
          ], 
          "sum_other_doc_count": 2895, 
          "doc_count_error_upper_bound": -1
        }
      }, 
      "timed_out": false
    }

    ORDER BY 指标

    SQL

    $ cat << EOF | ./es_query.py http://127.0.0.1:9200
        SELECT ipo_year, MAX(market_cap) AS max_market_cap FROM symbol 
        GROUP BY ipo_year ORDER BY max_market_cap LIMIT 2
    EOF
    {"max_market_cap": 826830000.0, "ipo_year": 1982}
    {"max_market_cap": 847180000.0, "ipo_year": 2016}

    Elasticsearch

    {
      "aggs": {
        "ipo_year": {
          "terms": {
            "field": "ipo_year", 
            "order": [
              {
                "max_market_cap": "asc"
              }
            ], 
            "size": 2
          }, 
          "aggs": {
            "max_market_cap": {
              "max": {
                "field": "market_cap"
              }
            }
          }
        }
      }, 
      "size": 0
    }
    {
      "hits": {
        "hits": [], 
        "total": 6714, 
        "max_score": 0.0
      }, 
      "_shards": {
        "successful": 1, 
        "failed": 0, 
        "total": 1
      }, 
      "took": 20, 
      "aggregations": {
        "ipo_year": {
          "buckets": [
            {
              "max_market_cap": {
                "value": 826830000.0
              }, 
              "key": 1982, 
              "doc_count": 4
            }, 
            {
              "max_market_cap": {
                "value": 847180000.0
              }, 
              "key": 2016, 
              "doc_count": 6
            }
          ], 
          "sum_other_doc_count": 2888, 
          "doc_count_error_upper_bound": -1
        }
      }, 
      "timed_out": false
    }

    HISTOGRAM 和 ORDER BY

    除了 terms aggregation,其他 aggregation 也支持 order by 但是并不完善。比如 histogram aggregation 支持 sort 但是并不支持 size (也就是可以ORDER BY 但是不能 LIMIT)。官方有计划增加一个通用的支持 LIMIT 的方式,不过还没有实现:https://github.com/elastic/elasticsearch/issues/14928

    SQL

    $ cat << EOF | ./es_query.py http://127.0.0.1:9200
        SELECT ipo_year_range, MAX(market_cap) AS max_market_cap FROM symbol 
        GROUP BY histogram(ipo_year, 10) AS ipo_year_range ORDER BY ipo_year_range 
    EOF
    {"ipo_year_range": 1970, "max_market_cap": 18370000000.0}
    {"ipo_year_range": 1980, "max_market_cap": 522690000000.0}
    {"ipo_year_range": 1990, "max_market_cap": 230940000000.0}
    {"ipo_year_range": 2000, "max_market_cap": 470490000000.0}
    {"ipo_year_range": 2010, "max_market_cap": 287470000000.0}

    Elasticsearch

    {
      "aggs": {
        "ipo_year_range": {
          "aggs": {
            "max_market_cap": {
              "max": {
                "field": "market_cap"
              }
            }
          }, 
          "histogram": {
            "field": "ipo_year", 
            "interval": 10, 
            "order": {
              "_key": "asc"
            }
          }
        }
      }, 
      "size": 0
    }
    {
      "hits": {
        "hits": [], 
        "total": 6714, 
        "max_score": 0.0
      }, 
      "_shards": {
        "successful": 1, 
        "failed": 0, 
        "total": 1
      }, 
      "took": 2, 
      "aggregations": {
        "ipo_year_range": {
          "buckets": [
            {
              "max_market_cap": {
                "value": 18370000000.0
              }, 
              "key": 1970, 
              "doc_count": 5
            }, 
            {
              "max_market_cap": {
                "value": 522690000000.0
              }, 
              "key": 1980, 
              "doc_count": 155
            }, 
            {
              "max_market_cap": {
                "value": 230940000000.0
              }, 
              "key": 1990, 
              "doc_count": 598
            }, 
            {
              "max_market_cap": {
                "value": 470490000000.0
              }, 
              "key": 2000, 
              "doc_count": 745
            }, 
            {
              "max_market_cap": {
                "value": 287470000000.0
              }, 
              "key": 2010, 
              "doc_count": 1395
            }
          ]
        }
      }, 
      "timed_out": false
    }
  • 相关阅读:
    C++ char和string的区别
    解读机器学习基础概念:VC维的来龙去脉 | 数盟
    链接集锦
    MSSQL数据库日志满的快速解决办法
    oracle执行update时卡死问题的解决办法
    正则表达式中/i,/g,/ig,/gi,/m的区别和含义
    windows下sqlplus怎么连接远程oracle
    C#中TransactionScope的使用方法和原理
    C#设置Cookies .
    IIS7及以上伪静态报错404
  • 原文地址:https://www.cnblogs.com/sha0830/p/5553863.html
Copyright © 2011-2022 走看看