zoukankan      html  css  js  c++  java
  • hive解析json数组数据

    /*
    样本数据
    60862 2227577 CN020004021585629448991 oe8GejqY8gkJwP8FX0149SltKi94 1585584000 [{"key":"key1","value":"营养健康"},{"key":"key2","value":null},{"key":"key3","value":"十分满意"},{"key":"key4","value":"十分满意"},{"key":"key5","value":null},{"key":"key6","value":null},{"key":"key7","value":"十分满意"},{"key":"key8","value":null},{"key":"key9","value":null},{"key":"key10","value":"十分满意"},{"key":"key11","value":null},{"key":"key12","value":null},{"key":"key13","value":null},{"key":"key14","value":null},{"key":"key15","value":null},{"key":"key16","value":null},{"key":"key17","value":null},{"key":"key18","value":null},{"key":"key19","value":null},{"key":"key20","value":null},{"key":"key21","value":null},{"key":"key22","value":null},{"key":"key23","value":null},{"key":"key24","value":null},{"key":"key25","value":null},{"key":"key26","value":null},{"key":"key27","value":null},{"key":"key28","value":null},{"key":"key29","value":"十分满意"},{"key":"key30","value":null},{"key":"key31","value":null},{"key":"key32","value":"十分满意"},{"key":"key33","value":null},{"key":"key34","value":null},{"key":"key35","value":"是"},{"key":"key36","value":null},{"key":"key37","value":null},{"key":"key38","value":null},{"key":"key39","value":null},{"key":"key40","value":null},{"key":"key41","value":null},{"key":"key42","value":null},{"key":"key43","value":null},{"key":"key44","value":null},{"key":"key45","value":null},{"key":"key46","value":"10 – 10, 一定会"},{"key":"key47","value":null},{"key":"key48","value":null},{"key":"key49","value":null},{"key":"key50","value":null},{"key":"key51","value":null},{"key":"key52","value":null},{"key":"key53","value":null},{"key":"key54","value":"否"},{"key":"key55","value":null},{"key":"key56","value":null},{"key":"key57","value":null},{"key":"key58","value":null},{"key":"key59","value":"1人"},{"key":"key60","value":null},{"key":"key61","value":"上班族"},{"key":"key62","value":null},{"key":"key63","value":"健康控脂"}] 2020-04-01 07:43:35 2020-04-02 15:05:33 MrTang 2020-04-01
    */
    -- hive array类型测试
    drop table test_json ;
    CREATE external TABLE if not exists test_json
    ( id string ,
    surveyId string COMMENT '评价ID',
    tradeNo string COMMENT '订单号',
    openId string ,
    identifier string COMMENT '查询时间标识,表示同步何日的数据',
    answer string COMMENT '问卷答案详情',
    created string ,
    extract_time string,
    operator string
    )
    partitioned by (date string)
    row format delimited
    fields terminated by ' '
    stored as textfile

    -- ; #加载数据
    alter table test_json add partition (date='2020-04-01')
    location '/user/gbw/voucher_answer/2020/202004/2020-04-01'
    -- 将数组json拆分后存入临时表 test_json_tmp
    drop table test_json_tmp;
    create table test_json_tmp
    as
    SELECT a.id
    ,a.surveyId -- '评价ID',
    ,a.tradeNo -- '订单号',
    ,a.openId
    ,a.identifier -- '查询时间标识,表示同步何日的数据',
    ,b.answer -- '问卷答案详情',
    ,a.created
    -- extract_time ,
    ,'MrTang' as operator
    from test_json a
    lateral view explode(split(regexp_replace(regexp_replace(a.answer , '\]|\[' ,'') ,'\}\,\{','\}\;\{' ),'\;') ) b as answer
    ;
    -- 解析 拆分数组json后的单个json数据
    select tradeNo -- '订单号'
    ,answer
    ,get_json_object(answer,'$.key') as key
    ,get_json_object(answer,'$.value') as value
    from test_json_tmp
    where get_json_object(answer,'$.key')='key4' -- 友善度
    and get_json_object(answer,'$.value')='十分满意'

  • 相关阅读:
    JVM 综述
    看 Netty 在 Dubbo 中如何应用
    Netty 心跳服务之 IdleStateHandler 源码分析
    Netty 高性能之道
    Netty 解码器抽象父类 ByteToMessageDecoder 源码解析
    Netty 源码剖析之 unSafe.write 方法
    Netty 出站缓冲区 ChannelOutboundBuffer 源码解析(isWritable 属性的重要性)
    Netty 源码剖析之 unSafe.read 方法
    Netty 内存回收之 noCleaner 策略
    Netty 源码阅读的思考------耗时业务到底该如何处理
  • 原文地址:https://www.cnblogs.com/guweiwei/p/14976831.html
Copyright © 2011-2022 走看看