zoukankan      html  css  js  c++  java
  • hive-sql

    #原始示例数据
    #1,k1|k2|k3|k1
    #2,k1|k1|k5|k3|k6
    #3,k3|k6|k7|k8

    #建表语句
    CREATE TABLE IF NOT EXISTS words(id INT,kw STRING)
    ROW FORMAT delimited
    FIELDS TERMINATED BY ','
    collection items TERMINATED BY '|';

    #加载数据(local从本地,否则从hdfs;overwrite覆盖原有数据)
    load data [local] inpath 'J:/test.txt' [overwrite] into table words;

    #字段转行(拼接成1个字段),结果如下
    1 k1
    1 k2
    1 k3
    1 k1
    2 k1
    2 k1
    2 k5
    2 k3
    2 k6
    3 k3
    3 k6
    3 k7
    3 k8
    SELECT t1.id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #字段转行
    SELECT t1.id ,t1.k_w ,COUNT(1) FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #相同id数据分组后,将多行转1个字段类型为集合字符串,结果如下
    1 k1:2,k2:1,k3:1
    2 k1:2,k3:1,k5:1,k6:1
    3 k3:1,k6:1,k7:1,k8:1
    SELECT t2.id AS id,CONCAT_WS(',',collect_set(t2.kw_time)) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;

    #相同id数据分组后,将多行转1个字段类型为集合,结果如下
    1 ["k1:2","k2:1","k3:1"]
    2 ["k1:2","k3:1","k5:1","k6:1"]
    3 ["k3:1","k6:1","k7:1","k8:1"]
    SELECT t2.id AS id,collect_set(t2.kw_time) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;
  • 相关阅读:
    阻塞队列之LinkedTransferQueue
    BlockingQueue drainTo()
    jedis常用API
    在Redis集群中使用pipeline批量插入
    序列化
    springmvc单文件上传
    hibernateValidate
    springmvc使用spring自带日期类型验证
    springmvc自定义日期编辑器
    springmvc__SimpleUrlHandlerMapping(对访问地址进行加工,以键值对的形式)
  • 原文地址:https://www.cnblogs.com/zzq-include/p/10836877.html
Copyright © 2011-2022 走看看