zoukankan      html  css  js  c++  java
  • Elasticsearch中关于transform的一个问题分析

    背景:现在有一个业务,派件业务,业务员今天去派件(扫描产生一条派件记录),派件可能会有重复派件的情况,第二天再派送(记录被更新,以最新的派件操作为准)。现在需要分业务员按天统计每天的派件数量。
    es版本:7.15.1
    1、创建索引:

    PUT t_test_001
    {
      "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 1
      },
      "mappings": {
        "properties": {
          "city_id": {
            "type": "long"
          },
          "city_name": {
            "type": "keyword"
          },
          "create_time": {
            "type": "date"
          },
          "push_date": {
            "type": "date"
          },
          "update_time": {
            "type": "date"
          }
        }
      }
    }

    2、插入测试数据

    POST /t_test_001/_bulk
    { "index": {}}
    { "order_no" : 1,"employee":"张三",  "create_time" : "2021-12-06T08:00:00.000Z", "push_date" : "2021-12-06T08:00:00.000Z", "update_time" : "2021-12-06T08:00:00.000Z"}
    { "index": {}}
    { "order_no" : 2,"employee":"张三",  "create_time" : "2021-12-06T08:00:00.000Z", "push_date" : "2021-12-06T08:00:00.000Z", "update_time" : "2021-12-06T08:00:00.000Z"}
    { "index": {}}
    { "order_no" : 3,"employee":"张三",  "create_time" : "2021-12-07T00:00:00.000Z", "push_date" : "2021-12-07T00:00:00.000Z", "update_time" : "2021-12-07T00:00:00.000Z"}
    { "index": {}}
    { "order_no" : 4,"employee":"张三",  "create_time" : "2021-12-07T00:00:00.000Z", "push_date" : "2021-12-07T00:00:00.000Z", "update_time" : "2021-12-07T00:00:00.000Z"}
    { "index": {}}
    { "order_no" : 5,"employee":"王五",  "create_time" : "2021-12-06T08:00:00.000Z", "push_date" : "2021-12-06T08:00:00.000Z", "update_time" : "2021-12-06T08:00:00.000Z"}
    { "index": {}}
    { "order_no" : 6,"employee":"王五",  "create_time" : "2021-12-06T08:00:00.000Z", "push_date" : "2021-12-06T08:00:00.000Z", "update_time" : "2021-12-06T08:00:00.000Z"}
    { "index": {}}
    { "order_no" : 7,"employee":"王五",  "create_time" : "2021-12-07T00:00:00.000Z", "push_date" : "2021-12-07T00:00:00.000Z", "update_time" : "2021-12-07T00:00:00.000Z"}
    { "index": {}}
    { "order_no" : 8,"employee":"王五",  "create_time" : "2021-12-07T00:00:00.000Z", "push_date" : "2021-12-07T00:00:00.000Z", "update_time" : "2021-12-07T00:00:00.000Z"}

    3、查询一下看看

    GET /t_test_001/_search
    {
      "size": 10
    }

    结果:

    {
      "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 8,
          "relation" : "eq"
        },
        "max_score" : 1.0,
        "hits" : [
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "GLztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 1,
              "employee" : "张三",
              "create_time" : "2021-12-06T08:00:00.000Z",
              "push_date" : "2021-12-06T08:00:00.000Z",
              "update_time" : "2021-12-06T08:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "Gbztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 2,
              "employee" : "张三",
              "create_time" : "2021-12-06T08:00:00.000Z",
              "push_date" : "2021-12-06T08:00:00.000Z",
              "update_time" : "2021-12-06T08:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "Grztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 3,
              "employee" : "张三",
              "create_time" : "2021-12-07T00:00:00.000Z",
              "push_date" : "2021-12-07T00:00:00.000Z",
              "update_time" : "2021-12-07T00:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "G7ztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 4,
              "employee" : "张三",
              "create_time" : "2021-12-07T00:00:00.000Z",
              "push_date" : "2021-12-07T00:00:00.000Z",
              "update_time" : "2021-12-07T00:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "HLztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 5,
              "employee" : "王五",
              "create_time" : "2021-12-06T08:00:00.000Z",
              "push_date" : "2021-12-06T08:00:00.000Z",
              "update_time" : "2021-12-06T08:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "Hbztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 6,
              "employee" : "王五",
              "create_time" : "2021-12-06T08:00:00.000Z",
              "push_date" : "2021-12-06T08:00:00.000Z",
              "update_time" : "2021-12-06T08:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "Hrztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 7,
              "employee" : "王五",
              "create_time" : "2021-12-07T00:00:00.000Z",
              "push_date" : "2021-12-07T00:00:00.000Z",
              "update_time" : "2021-12-07T00:00:00.000Z"
            }
          },
          {
            "_index" : "t_test_001",
            "_type" : "_doc",
            "_id" : "H7ztkn0BDKE3xmcewwIG",
            "_score" : 1.0,
            "_source" : {
              "order_no" : 8,
              "employee" : "王五",
              "create_time" : "2021-12-07T00:00:00.000Z",
              "push_date" : "2021-12-07T00:00:00.000Z",
              "update_time" : "2021-12-07T00:00:00.000Z"
            }
          }
        ]
      }
    }

    4、创建一个transform,将数据按天、业务员  聚合

    PUT _transform/t_test_transform
    {
      "id": "t_test_transform",
      "source": {
        "index": [
          "t_test_001"
        ]
      },
      "dest": {
        "index": "t_test_x"
      },
      "frequency": "60s",
      "sync": {
        "time": {
          "field": "update_time",
          "delay": "60s"
        }
      },
      "pivot": {
        "group_by": {
          "employee": {
            "terms": {
              "field": "employee"
            }
          },
          "push_date": {
            "date_histogram": {
              "field": "push_date",
              "calendar_interval": "1d"
            }
          }
        },
        "aggregations": {
          "sum_all": {
            "value_count": {
              "field": "_id"
            }
          }
        }
      }
    }

    5、开启transform

    POST _transform/t_test_transform/_start

    6、查看transform转换的索引结果

    GET /t_test_x/_search
    {}

    结果:如图,张三2021-12-06和07号各派送两单:

     7、12月7号,订单order_no = 1的单子再次被张三派送;数据被更新

    POST /t_test_001/_update/GLztkn0BDKE3xmcewwIG
    {
      "doc": {
        "push_date": "2021-12-07T03:27:12.000Z",
        "update_time": "2021-12-07T03:27:12.000Z"
      }
    }

    注意模拟操作数据的真实性,更新时间在上一个检查点之后!【截图中的检查点时间是北京时间】

    8、预期transfrom转换的结果是张三12-6号的派单统计数据由2减少为1;12-7号的派单数据从2增加到3。


    9、查询transform转换的索引结果

    GET /t_test_x/_search
    {}

    结果:张三12-6号的派单统计数据为2没有减少,不符合预期;12-7号的派单数据为3,符合预期。

     10,再查询一下原始数据:

    GET /t_test_001/_search
    {}

    11、再统计一下数据:

    GET /t_test_001/_search
    {
      "size": 0,
      "aggs": {
        "employee": {
          "terms": {
            "field": "employee"
          },
          "aggs": {
            "push_date": {
              "date_histogram": {
                "field": "push_date",
                "calendar_interval": "1d"
              }
            }
          }
        }
      }
    }

    结果很显然:张三 12-06号派送量为1,12-07号派送量为3!!!而transform统计的结果,此时就错了!!!这个怎么理解呢?是他es的transform不支持这种场景数据变化的聚合,还是说这是一个bug呢?我理解,可能是因为考虑到性能的原因,es的transform在这种场景下是有这种问题的。

    若有错误之处,望大家指正。谢谢。

  • 相关阅读:
    Redis五种数据结构解析
    深入理解JAVA线程池
    数据看板superset在Windows环境下安装
    为什么要用docker
    非常完善的两个微服务框架比较(SpringCloud与Dubbo)
    Nginx配置之负载均衡、限流、缓存、黑名单和灰度发布
    jq
    22 道高频 JavaScript 手写面试题及答案
    input 限制 中文输入
    vue(js)点击目标div以外区域将目标div隐藏
  • 原文地址:https://www.cnblogs.com/hbuuid/p/15651476.html
Copyright © 2011-2022 走看看