zoukankan      html  css  js  c++  java
  • hive-sql

    #原始示例数据
    #1,k1|k2|k3|k1
    #2,k1|k1|k5|k3|k6
    #3,k3|k6|k7|k8

    #建表语句
    CREATE TABLE IF NOT EXISTS words(id INT,kw STRING)
    ROW FORMAT delimited
    FIELDS TERMINATED BY ','
    collection items TERMINATED BY '|';

    #加载数据(local从本地,否则从hdfs;overwrite覆盖原有数据)
    load data [local] inpath 'J:/test.txt' [overwrite] into table words;

    #字段转行(拼接成1个字段),结果如下
    1 k1
    1 k2
    1 k3
    1 k1
    2 k1
    2 k1
    2 k5
    2 k3
    2 k6
    3 k3
    3 k6
    3 k7
    3 k8
    SELECT t1.id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #字段转行
    SELECT t1.id ,t1.k_w ,COUNT(1) FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w;


    #相同id数据分组后,将多行转1个字段类型为集合字符串,结果如下
    1 k1:2,k2:1,k3:1
    2 k1:2,k3:1,k5:1,k6:1
    3 k3:1,k6:1,k7:1,k8:1
    SELECT t2.id AS id,CONCAT_WS(',',collect_set(t2.kw_time)) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;

    #相同id数据分组后,将多行转1个字段类型为集合,结果如下
    1 ["k1:2","k2:1","k3:1"]
    2 ["k1:2","k3:1","k5:1","k6:1"]
    3 ["k3:1","k6:1","k7:1","k8:1"]
    SELECT t2.id AS id,collect_set(t2.kw_time) AS kw_time_set FROM (
    SELECT t1.id AS id,CONCAT_WS(':',t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
    SELECT id,k_w FROM words lateral VIEW explode(split(kw,'\|')) f AS k_w
    ) AS t1
    GROUP BY t1.id,t1.k_w
    ) AS t2
    GROUP BY t2.id;
  • 相关阅读:
    nsstring -->nsdictionary
    卫辉市,
    iPhone 手机内存,
    连接错误,等的好漫长,
    BZOJ 1631==USACO 2007== POJ 3268 Cow Party奶牛派对
    POJ 2837 Til the Cows Come Home
    POJ 1285 确定比赛名次
    Codevs 2833 奇怪的梦境
    Codevs 1063 合并果子
    Codevs 1231 最优布线问题
  • 原文地址:https://www.cnblogs.com/zzq-include/p/10836877.html
Copyright © 2011-2022 走看看