zoukankan      html  css  js  c++  java
  • hadoop之hive高级操作

    在输出结果较多,需要输出到文件中时,可以在hive CLI之外执行hive -e "sql" > output.txt操作

    但当SQL语句太长或太多时,这种方式不是很方便,可以考虑将SQL语句存为sql.hql文件中,然后执行 hive -f sql.hql >output.txt操作

    如果是多个语句,且要输出到多个文件,只好把SQL写在shell脚本中,下面附一个例子

    start_day=$1
    end_day=$2
    start_date=`date  +"%Y-%m-%d" -d  "${start_day}"`
    end_date=`date  +"%Y-%m-%d" -d  "${end_day}"`
    
    
    active="
    use ycappdata;
    select ctl_dt,'active' ,count(distinct dvid) from sa_daydau_detail 
    where ctl_dt between '${start_date}' and '${end_date}'
    group by ctl_dt,'active' ;"
    
    
    
    loss="
    use ycappdata;
    select date_add(from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd'),15),'loss' ,count(distinct deviceid) from ext_db_apploginstats
    where from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd') between date_sub('${start_date}',15) and date_sub('${end_date}',15)
    group by date_add(from_unixtime(unix_timestamp(lastactivedate,'yyyy/MM/dd hh:mm:ss'),'yyyy-MM-dd'),15),'loss';"
    
    
    active_month_distribute="
    use ycappdata;
    select a.ctl_dt,'active_month_distribute',concat('m',month(start_dt)),count(distinct b.dvid) from 
    (select ctl_dt,dvid from sa_daydau_detail where ctl_dt between '${start_date}' and '${end_date}')a 
    left outer join 
    (select start_dt,dvid from sa_firststartdate_dvid where start_dt between '2017-01-01' and '${end_date}')b 
    on lower(a.dvid)=lower(b.dvid)
    group by a.ctl_dt,'active_month_distribute',concat('m',month(start_dt)) ;"
    
    active_date_distribute="
    use ycappdata;
    select a.ctl_dt,'active_date_distribute',
    case when datediff(a.ctl_dt,b.start_dt)=0 then 'd0' when datediff(a.ctl_dt,b.start_dt)<=30 then 'd30'
    when datediff(a.ctl_dt,b.start_dt)<=60 then 'd60'  when datediff(a.ctl_dt,b.start_dt)<=90 then 'd90'
    when datediff(a.ctl_dt,b.start_dt)<=120 then 'd120' when datediff(a.ctl_dt,b.start_dt)<=150 then 'd150'
    when datediff(a.ctl_dt,b.start_dt)<=180 then 'd180' else 'd181' end,count(distinct b.dvid) from 
    (select ctl_dt,dvid from sa_daydau_detail where ctl_dt between '${start_date}' and '${end_date}')a 
    left outer join 
    (select start_dt,dvid from sa_firststartdate_dvid where start_dt between '2017-01-01' and '${end_date}')b 
    on lower(a.dvid)=lower(b.dvid)
    group by a.ctl_dt,'active_date_distribute',case when datediff(a.ctl_dt,b.start_dt)=0 then 'd0'  when datediff(a.ctl_dt,b.start_dt)<=30 then 'd30'
    when datediff(a.ctl_dt,b.start_dt)<=60 then 'd60'   when datediff(a.ctl_dt,b.start_dt)<=90 then 'd90'
    when datediff(a.ctl_dt,b.start_dt)<=120 then 'd120' when datediff(a.ctl_dt,b.start_dt)<=150 then 'd150'
    when datediff(a.ctl_dt,b.start_dt)<=180 then 'd180' else 'd181' end ;"
    
    
    
    hive -e "${active}"                  >> app_operate.txt
    hive -e "${loss}"                    >> app_operate.txt
    hive -e "${active_month_distribute}" >> app_operate.txt
    hive -e "${active_date_distribute}"  >> app_operate.txt
    
    
    while [ ${start_day} -le ${end_day} ]
    do
    current_date=`date  +"%Y-%m-%d" -d  "${start_day}"`
    
    week_active="
    use ycappdata;
    select '${current_date}','week_active',count(distinct dvid)  from sa_daydau_detail 
    where ctl_dt between date_sub('${current_date}',pmod(datediff('${current_date}', '2017-01-02'), 7)) and '${current_date}'
    group by '${current_date}','week_active';  "
    
    month_active="
    use ycappdata;
    select '${current_date}','month_active',count(distinct dvid)  from sa_daydau_detail 
    where ctl_dt between trunc('${current_date}','MM') and '${current_date}'
    group by '${current_date}','month_active';  "
    
    active_active_distribute="
    use ycappdata;
    select '${current_date}','active_active_distribute',concat('d',days),count(distinct ab.dvid) from 
    (select b.dvid,count(distinct b.ctl_dt) as days from 
    (select ctl_dt,dvid from sa_daydau_detail
    where ctl_dt='${current_date}')a 
    join 
    (select ctl_dt,dvid from sa_daydau_detail
    where ctl_dt between date_sub('${current_date}',14) and '${current_date}')b 
    on  lower(a.dvid)=lower(b.dvid)
    group by b.dvid )ab 
    group by '${current_date}','active_active_distribute',concat('d',days);"
    
    newuser_retain="
    use ycappdata;
    select a.start_dt,'newuser_retain',concat('d',datediff(b.ctl_dt,a.start_dt)),count(distinct b.dvid) from 
    (select start_dt,dvid from sa_firststartdate_dvid
    where start_dt between date_sub('${current_date}',30) and '${current_date}')a 
    left outer join
    (select ctl_dt,dvid from sa_daydau_detail 
    where ctl_dt between date_sub('${current_date}',30) and '${current_date}')b 
    on lower(a.dvid)=lower(b.dvid)
    group by a.start_dt,'newuser_retain',concat('d',datediff(b.ctl_dt,a.start_dt)); "
    
    active_retain="
    use ycappdata;
    select a.ctl_dt,'active_retain',concat('d',datediff(b.ctl_dt,a.ctl_dt)),count(distinct b.dvid) from 
    (select ctl_dt,dvid from sa_daydau_detail 
    where ctl_dt between date_sub('${current_date}',30) and '${current_date}')a 
    left outer join
    (select ctl_dt,dvid from sa_daydau_detail 
    where ctl_dt between date_sub('${current_date}',30) and '${current_date}')b 
    on lower(a.dvid)=lower(b.dvid)
    where a.ctl_dt<=b.ctl_dt
    group by a.ctl_dt,'active_retain',concat('d',datediff(b.ctl_dt,a.ctl_dt)); "
    
    
    
    
    
    echo "${week_active}" 
    echo "${month_active}" 
    echo "${active_active_distribute}" 
    echo "${newuser_retain}" 
    echo "${active_retain}" 
    
    hive -e "${week_active}"                 >> app_operate.txt
    hive -e "${month_active}"                >> app_operate.txt
    hive -e "${active_active_distribute}"    >> app_operate.txt
    hive -e "${newuser_retain}"              >> app_operate.txt
    hive -e "${active_retain}"               >> app_operate.txt
    start_day=`date  +"%Y%m%d" -d  "${start_day} 1 days" `
    done
  • 相关阅读:
    ll command not found 当ll无法识别的解决办法
    idea控制台全屏
    查看centos版本号
    java Error: 无法访问org.apache.http.annotation.ThreadSafe 找不到org.apache.http.annotation.ThreadSafe的类文件
    DigestUtils.md5Hex()加密
    JAVA 8 '::' 关键字
    CVE-2020-1472 NetLogon特权提升漏洞
    OpenSSH的scp命令注入漏洞(CVE-2020-15778)
    redis未授权访问漏洞&简单利用&总结
    常见web信息泄露
  • 原文地址:https://www.cnblogs.com/wangbin2188/p/8252293.html
Copyright © 2011-2022 走看看