zoukankan      html  css  js  c++  java
  • hive-sql

    #原始示例数据
    #1,k1|k2|k3|k1
    #2,k1|k1|k5|k3|k6
    #3,k3|k6|k7|k8

    #建表语句
    CREATE TABLE IF NOT EXISTS words(id INT,kw STRING)
    ROW FORMAT delimited
    FIELDS TERMINATED BY ','
    collection items TERMINATED BY '|';

    #加载数据(local从本地,否则从hdfs;overwrite覆盖原有数据)
    load data [local] inpath 'J:/test.txt' [overwrite] into table words;

    #字段转行(拼接成1个字段),结果如下
    1 k1
    1 k2
    1 k3
    1 k1
    2 k1
    2 k1
    2 k5
    2 k3
    2 k6
    3 k3
    3 k6
    3 k7
    3 k8
    SELECT t1.id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #字段转行
    SELECT t1.id ,t1.k_w ,COUNT(1) FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #相同id数据分组后,将多行转1个字段类型为集合字符串,结果如下
    1 k1:2,k2:1,k3:1
    2 k1:2,k3:1,k5:1,k6:1
    3 k3:1,k6:1,k7:1,k8:1
    SELECT t2.id AS id,CONCAT_WS(',',collect_set(t2.kw_time)) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;

    #相同id数据分组后,将多行转1个字段类型为集合,结果如下
    1 ["k1:2","k2:1","k3:1"]
    2 ["k1:2","k3:1","k5:1","k6:1"]
    3 ["k3:1","k6:1","k7:1","k8:1"]
    SELECT t2.id AS id,collect_set(t2.kw_time) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;
  • 相关阅读:
    动态规划--数字三角形问题
    C#操作Office.word(一)
    【算法参考】动态规划
    数据结构--图的定义和存储结构
    重定向输入输出流--freopen
    C++内联函数、函数模板之于头文件
    C++静态局部对象
    C++函数模板
    条款05:了解C++默默编写并调用哪些函数
    设计模式 之 装饰器模式
  • 原文地址:https://www.cnblogs.com/zzq-include/p/10836877.html
Copyright © 2011-2022 走看看