zoukankan      html  css  js  c++  java
  • es聚合操作

    GET /jzt_study_detail/content/_search
    {
      "size": 0, 
      "aggs": {
        "group_by_channelids": {
          "terms": {
            "field": "channelIds",
            "size": 5,
            "order": {
              "sum_views": "desc"
            }
          },
          "aggs": {
            "avg_views": {
              "avg": {
                "field": "views"
              }
            },
            "sum_views": {
              "sum": {
                "field": "views"
              }
            }
          }
        }
      }
    }

    使用terms进行分组操作,用于分组操作字段应该设置成不分词,然后通过内部包含aggs的max以及avg去进行数据聚合以及字段排序。

     ----------------------------------------------------

    buck aggs方式分为两种,一种是range(按照数据区间分组),另一种是term(按照指定的字段分组,多个字段可嵌套分组),上面的json为按照term分组

    按照range分组如下:

    POST /index_test_1/type_test_1/_search
    {
      "size": 0, 
      "aggs": {
        "group_by_views": {
          "range": {
            "field": "views",
            "ranges": [
              {
                "key": "<2", 
                "to": 2
              },
              {
                "to": 3,
                "from": 2
              },
              {
                "to": 4,
                "from": 3
              }
              ,
              {
                "key": ">4", 
                "from": 4
              }
            ]
          },
          "aggs":{
            "sum_views":{
             "sum":{
               "field": "views"
              }
            },
            "avg_views":{
              "avg": {
                "field": "views"
              }
            }
          }
        }
      }
    }

     分组操作后,可以使用top_hit方式求组内某一指标的排序topN,如下json:

    POST /index_test_1/type_test_1/_search
    {
      "size": 0,
      "aggs": {
        "group_view": {
          "terms": {
            "field": "content_type",
            "size": 10
          },
          "aggs": {
            "top_title": {
             "top_hits": {
               "size": 10,
               "sort": [
                 {
                   "views":{
                     "order": "desc"
                   }
                 }
                ]
             }
            }
          }
        }
        
      }
    }

     ================================

    PUT /index_test_1
    {
      "mappings": {
        "type_test_1":{
          "properties":{
            "title":{
              "type":"text",
              "fielddata":true,
              "fields": {"raw": {"type": "keyword"}}
            },
             "views":{
               "type":"long"
            },
            "content_type":{
               "type":"keyword"
            }
          }
        }
      }
    }
    POST /index_test_1/type_test_1/a1
    {
      "title":"我的名字",
      "views":1,
      "content_type":"viedo"
    }
    
    
    POST /index_test_1/type_test_1/a2
    {
      "title":"中国人的名字",
      "views":2,
      "content_type":"viedo"
    }
    
    
    POST /index_test_1/type_test_1/a3
    {
      "title":"中国人是最牛的",
      "views":3,
      "content_type":"word"
    }
    
    
    POST /index_test_1/type_test_1/a4
    {
      "title":"我爱你中国",
      "views":4,
      "content_type":"word"
    }
    
    POST /index_test_1/type_test_1/a5
    {
      "title":"在水中的国家",
      "views":5,
      "content_type":"word"
    }
    GET /index_test_1/_search
    {
      "size": 0, 
      "aggs": {
        "views_histogram": {
          "histogram": {
            "field": "views",
            "interval": 2,
            "min_doc_count": 0,
            "extended_bounds": {
              "min": 0,
              "max": 6
            },
            "order": {
              "sum_views": "desc"
            }
          },
          "aggs": {
            "sum_views": {
              "sum": {
                "field": "views"
              }
            }
          }
        }
      }
    }

    以上是数据示例,按照区间段进行分组后聚合views并排序入下:

    POST /index_test_1/type_test_1/_search
    {
      "size": 0, 
      "aggs": {
        "histogram_views": {
          "histogram": {
            "field": "views", 按照该字段进行分区间汇总,比如柱状图
            "interval": 2 , 区间窗口间隔为2
         "min_doc_count": 0, 空的区间段返回0
    "extended_bounds" : { "min" : 0, 分区间的开始位置 "max" : 6 分区间的结束位置 } }, "aggs":{ "group_by_title":{ "terms": { "field": "title.raw", 不能使用分词后的聚合方式,raw已经指定了keyword不分词。 "order": { "sum_views": "asc" } }, "aggs": { "sum_views": { "sum": { "field": "views" } } } } } } } }

     如果涉及按照月份或者季度聚合应该使用如下形式:

    GET /tvs/sales/_search
    {
      "size": 0,
      "aggs": {
        "group_by_sold_date": {
          "date_histogram": {
            "field": "sold_date",
            "interval": "quarter", 也可以是month
            "format": "yyyy-MM-dd", 
            "min_doc_count": 0, 即使为空的区间,也要返回
            "extended_bounds" : {
              "min" : "2016-01-01",
              "max" : "2017-01-01"
            }
          },
          "aggs": {
            "group_by_brand": {
              "terms": {
                "field": "brand"
              },
              "aggs": {
                "sum_price": {
                  "sum": {
                    "field": "price"
                  }
                }
              }
            },
            "total_sum_price" : {
              "sum": {
                "field": "price"
              }
            }
          }
        }
      }
    }

     检索自定义分数计算函数:

    GET /index_test_1/type_test_1/_search
    {
      "query": {
        "function_score": {
          "query": {
            "bool": {
              "must": [
               {
                "match": {
                  "title": {
                    "query":"中国",
                    "boost": 0.1
                  }
                }
              }
            ]
            }
          } ,
          "functions": [
             {
                "field_value_factor": {
                  "field": "views"
                  #views字段参与自定义分数计算,获取views的值后乘以weight
                },
             "weight":10
            }
          ]
             ,
        "score_mode": "sum",
        "boost_mode": "sum" #搜索分词分值与自定义分值函数计算使用加法,这样整体的分值计算不会出现突增突降问题
        }
      },
      "sort": {
        "_score": {
          "order": "desc"
        }
      }
    }
  • 相关阅读:
    Java 集合 — ArrayList
    Java 线程 — ScheduledThreadPoolExecutor
    Java 线程 — ThreadPoolExecutor
    Java 线程 — ThreadLocal
    Java 线程 — ConcurrentLinkedQueue
    Java 线程 — ConcurrentHashMap
    Java 线程 — AbstractQueuedSynchronizer
    Java 线程 — JMM Java内存模型
    Java 线程 — synchronized、volatile、锁
    spring源码 — 三、AOP代理生成
  • 原文地址:https://www.cnblogs.com/zzq-include/p/13564993.html
Copyright © 2011-2022 走看看