zoukankan      html  css  js  c++  java
  • druid json格式数据源,多层嵌套官方样例测试,发现两处语法错误bug

    官方提供了下面的样例进行嵌套json扁平化:

    {
     "timestamp": "2015-09-12T12:10:53.155Z",
     "dim1": "qwerty",
     "dim2": "asdf",
     "dim3": "zxcv",
     "ignore_me": "ignore this",
     "metrica": 9999,
     "foo": {"bar": "abc"},
     "foo.bar": "def",
     "nestmet": {"val": 42},
     "hello": [1.0, 2.0, 3.0, 4.0, 5.0],
     "mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],
     "world": [{"hey": "there"}, {"tree": "apple"}],
     "thing": {"food": ["sandwich", "pizza"]}
    }
    我对这个样例进行了批量,并传输至kakfa中,截取一小段:
    {"timestamp": "2018-12-20T14:12:39","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:39","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:40","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:40","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:40","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:41","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:41","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    {"timestamp": "2018-12-20T14:12:41","dim1": "qwerty","dim2": "asdf","dim3": "zxcv","ignore_me": "ignore this","metrica": 9999,"foo": {"bar": "abc"},"foo.bar": "def","nestmet":{"val": 42},"hello": [1.0, 2.0, 3.0, 4.0, 5.0],"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],"world": [{"hey": "there"}, {"tree": "apple"}],"thing": {"food": ["sandwich", "pizza"]}}
    官方给的解析方式:
    "parseSpec": {
      "format": "json",
      "flattenSpec": {
        "useFieldDiscovery": true,
        "fields": [
          {
            "type": "root",
            "name": "dim1"
          },
          "dim2",
          {
            "type": "path",
            "name": "foo.bar",
            "expr": "$.foo.bar"
          },
          {
            "type": "root",
            "name": "foo.bar"
          },
          {
            "type": "path",
            "name": "path-metric",
            "expr": "$.nestmet.val"
          },
          {
            "type": "path",
            "name": "hello-0",
            "expr": "$.hello[0]"
          },
          {
            "type": "path",
            "name": "hello-4",
            "expr": "$.hello[4]"
          },
          {
            "type": "path",
            "name": "world-hey",
            "expr": "$.world[0].hey"
          },
          {
            "type": "path",
            "name": "worldtree",
            "expr": "$.world[1].tree"
          },
          {
            "type": "path",
            "name": "first-food",
            "expr": "$.thing.food[0]"
          },
          {
            "type": "path",
            "name": "second-food",
            "expr": "$.thing.food[1]"
          },
          {
            "type": "jq",
            "name": "first-food-by-jq",
            "expr": ".thing.food[1]"
          },
          {
            "type": "jq",
            "name": "hello-total",
            "expr": ".hello | sum"
          }
        ]
      },
      "dimensionsSpec" : {
       "dimensions" : [],
       "dimensionsExclusions": ["ignore_me"]
      },
      "timestampSpec" : {
       "format" : "auto",
       "column" : "timestamp"
      }
    }

    生成数据源后,发现不读取kafka数据,检查发现拉取数据的进程失败了,原因是有相同字段field出现:

          {
            "type": "path",
            "name": "foo.bar",
            "expr": "$.foo.bar"
          },
          {
            "type": "root",
            "name": "foo.bar"
          },

    修改为这样重启:

          {
            "type": "path",
            "name": "foo-bar",
            "expr": "$.foo.bar"
          },
          {
            "type": "root",
            "name": "foo.bar"
          },

    重启后日志仍然报错,原因是jq没有sum函数:

            "type": "jq",
            "name": "hello-total",
            "expr": ".hello | sum"
          }

    去掉后重启恢复正常。

    找到原因后,又测试了3层,4层嵌套,都能扁平化flatten,没找到不确定长度数组怎么添加field key。

    jq的函数没找到怎么数组sum求和。

    jackson-jq  github: https://github.com/eiiches/jackson-jq

    jackson-jq  官网:https://stedolan.github.io/jq/

    json-path github:https://github.com/json-path/JsonPath

    官方路径:http://druid.io/docs/latest/ingestion/flatten-json.html

      

  • 相关阅读:
    第三次冲刺
    [操作系统]实验四
    第二个冲刺5.0
    第二个冲刺
    学术诚信与职业道德--个人感想
    软件工程——sprint 1回顾总结
    [读书笔记]
    sprint5.0
    [操作系统]3.0
    学习进度条
  • 原文地址:https://www.cnblogs.com/kcrist/p/10153982.html
Copyright © 2011-2022 走看看