zoukankan      html  css  js  c++  java
  • es界面的分组,求平均值的操作

    第一个分析需求:计算每个tag下的商品数量

    默认情况下,大部分字段都是被索引的(有个倒排索引),以使得他们可以被搜索。
    然而,在脚本中排序、聚合和访问字段的值,需要不同的搜索访问模式。

    搜索需要回答的问题是“哪些文档包含这些搜索的内容?”,而排序和聚合需要回答的问题是“这个文档中这个字段的值是什么?”

    大部分字段都可以使用index-time,磁盘上的doc_values用于这个数据的访问模式;
    然而,text字段不支持doc_values。

    代替的是,text字段使用一个叫做fielddata的数据结构,该数据结构含义是查询时内存数据结构。该数据结构是按需求首次构建在一个被用于聚合、排序和在脚本的字段上。
    它是通过读取从磁盘每段的整个倒排索引来构建的,倒排搜索的内容<->文档关系,其存储在jvm堆上的内存上。

    默认情况下text字段是没有开启的:
    聚合时需要对相应的字段做处理如下,否则会报错:
    Fielddata is disabled on text fields by default.
    Set fielddata=true on [your_field_name] in order to load fielddata
    in memory by uninverting the inverted index. Note that this can however use significant memory.

    PUT my_index/_mapping/my_type
    {
    "properties": {
    "my_field": { ①
    "type": "text",
    "fielddata": true
    }
    }
    }

    GET /ecommerce/product/_search
    {
    "aggs": {
    "group_by_tags": {
    "terms": { "field": "tags" }
    }
    }
    }

    将文本field的fielddata属性设置为true

    PUT /ecommerce/_mapping/product
    {
    "properties": {
    "tags": {
    "type": "text",
    "fielddata": true
    }
    }
    }

    GET /ecommerce/product/_search
    {
    "aggs": {
    "group_by_aggs": {
    "terms": {
    "field": "tags"
    }
    }
    }
    }

    {
    "took": 2,
    "timed_out": false,
    "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
    },
    "hits": {
    "total": 3,
    "max_score": 1,
    "hits": [
    {
    "_index": "ecommerce",
    "_type": "product",
    "_id": "2",
    "_score": 1,
    "_source": {
    "name": "jiajieshi yagao",
    "desc": "youxiao fangzhu",
    "price": 25,
    "producer": "jiajieshi producer",
    "tags": [
    "fangzhu"
    ]
    }
    },
    {
    "_index": "ecommerce",
    "_type": "product",
    "_id": "1",
    "_score": 1,
    "_source": {
    "name": "gaolujie yagao",
    "desc": "gaoxiao meibai",
    "price": 30,
    "producer": "gaolujie producer",
    "tags": [
    "meibai",
    "fangzhu"
    ]
    }
    },
    {
    "_index": "ecommerce",
    "_type": "product",
    "_id": "3",
    "_score": 1,
    "_source": {
    "name": "zhonghua yagao",
    "desc": "caoben zhiwu",
    "price": 40,
    "producer": "zhonghua producer",
    "tags": [
    "qingxin"
    ]
    }
    }
    ]
    },
    "aggregations": {
    "group_by_aggs": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "fangzhu",
    "doc_count": 2
    },
    {
    "key": "meibai",
    "doc_count": 1
    },
    {
    "key": "qingxin",
    "doc_count": 1
    }
    ]
    }
    }
    }

    ----------------------------------------------------------------------------------------------------------------

    第二个聚合分析的需求:对名称中包含yagao的商品,计算每个tag下的商品数量

    GET /ecommerce/product/_search
    {
    "size": 0,
    "query": {
    "match": {
    "name": "yagao"
    }
    },
    "aggs": {
    "all_tags": {
    "terms": {
    "field": "tags"
    }
    }
    }
    }

    ----------------------------------------------------------------------------------------------------------------

    第三个聚合分析的需求:先分组,再算每组的平均值,计算每个tag下的商品的平均价格

    GET /ecommerce/product/_search
    {
    "size": 0,
    "aggs": {
    "group_by_tags": {
    "terms": {
    "field": "tags"
    },
    "aggs": {
    "avg_price": {
    "avg": {
    "field": "price"
    }
    }
    }
    }
    }
    }

    {
    "took": 62,
    "timed_out": false,
    "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
    },
    "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
    },
    "aggregations": {
    "group_by_tags": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "fangzhu",
    "doc_count": 2,
    "avg_price": {
    "value": 27.5
    }
    },
    {
    "key": "meibai",
    "doc_count": 1,
    "avg_price": {
    "value": 30
    }
    },
    {
    "key": "qingxin",
    "doc_count": 1,
    "avg_price": {
    "value": 40
    }
    }
    ]
    }
    }
    }
    ----------------------------------------------------------------------------------------------------------------

    第四个数据分析需求:计算每个tag下的商品的平均价格,并且按照平均价格降序排序:terms 条件的意思

    GET /ecommerce/product/_search
    {
    "size": 0,
    "aggs" : {
    "all_tags" : {
    "terms" : { "field" : "tags", "order": { "avg_price": "desc" } },
    "aggs" : {
    "avg_price" : {
    "avg" : { "field" : "price" }
    }
    }
    }
    }
    }

    ----------------------------------------------------------------------------------------------------------------

    第五个数据分析需求:按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格

    GET /ecommerce/product/_search
    {
    "size": 0,
    "aggs": {
    "goup_by_price": {
    "range": {
    "field": "price",
    "ranges": [
    {
    "from": 0,
    "to": 20
    },{
    "from": 20,
    "to": 40
    },{
    "from": 40,
    "to": 50
    }
    ]
    },
    "aggs": {
    "group_tags": {
    "terms": {
    "field": "tags"
    },
    "aggs": {
    "avg_price": {
    "avg": {
    "field": "price"
    }
    }
    }
    }
    }
    }
    }
    }

    结果:

    {
    "took": 72,
    "timed_out": false,
    "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
    },
    "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
    },
    "aggregations": {
    "goup_by_price": {
    "buckets": [
    {
    "key": "0.0-20.0",
    "from": 0,
    "to": 20,
    "doc_count": 0,
    "group_tags": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": []
    }
    },
    {
    "key": "20.0-40.0",
    "from": 20,
    "to": 40,
    "doc_count": 2,
    "group_tags": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "fangzhu",
    "doc_count": 2,
    "avg_price": {
    "value": 27.5
    }
    },
    {
    "key": "meibai",
    "doc_count": 1,
    "avg_price": {
    "value": 30
    }
    }
    ]
    }
    },
    {
    "key": "40.0-50.0",
    "from": 40,
    "to": 50,
    "doc_count": 1,
    "group_tags": {
    "doc_count_error_upper_bound": 0,
    "sum_other_doc_count": 0,
    "buckets": [
    {
    "key": "qingxin",
    "doc_count": 1,
    "avg_price": {
    "value": 40
    }
    }
    ]
    }
    }
    ]
    }
    }
    }

  • 相关阅读:
    APP测试
    Pycharm+Rf框架的自动化
    Robot Framework框架做UI自动化测试的介绍
    python-元组
    python-list一些用法
    [Python]之列表list
    接口测试用例(安全测试)
    cookie与session机制
    接口测试与网络通讯原理
    简单常用的SQL命令
  • 原文地址:https://www.cnblogs.com/siye1989/p/11559093.html
Copyright © 2011-2022 走看看