zoukankan      html  css  js  c++  java
  • hive分析脚步p_fact_bi_browser_t.sql


    set hive.cli.print.header=true;
    set mapred.max.split.size=256000000;
    set mapred.min.split.size.per.node=256000000;
    set mapred.min.split.size.per.rack=256000000;
    set hive.exec.reducers.max=200;
    set hive.exec.reducers.bytes.per.reducer=1000000000;
    set hive.exec.compress.output=false;
    set hive.exec.compress.intermediate=true;
    set hive.exec.parallel=true;
    set hive.exec.parallel.thread.number=8;
    set hive.map.aggr=true;
    set hive.groupby.mapaggr.checkinterval=100000;
    set hive.groupby.skewindata=true;
    set hive.warehouse.subdir.inherit.perms=false;
    set hive.skewjoin.key=100000;
    set hive.optimize.skewjoin=true;
    set hive.auto.convert.join=false;
    set hive.vectorized.execution.enabled=false;

    use parsedb;

    insert overwrite table FACT_BI_BROWSER_T_00
    select
    'o2o' as channel,
    get_json_object(t.json, '$.session_id') as session_id,
    get_json_object(t.json, '$.user_id') as app_id,
    get_json_object(t.json, '$.ip') as ip,
    get_json_object(t.json, '$.ref_url') as ref_url,
    get_json_object(t.json, '$.url') as url,
    get_json_object(t.json, '$.stay_time') as stay_time,
    get_json_object(t.json, '$.log_time') as log_time,
    get_json_object(t.json, '$.browser') as browser
    from ods_db.ber_o2o_pv t
    where ( get_json_object(t.json, '$.session_id') is not null and get_json_object(t.json, '$.session_id') <> '' )
    and get_json_object(t.json, '$.extend') is null
    and get_json_object(t.json, '$.log_time') >= '${v_fm_tm}'
    and get_json_object(t.json, '$.log_time') < '${v_to_tm}';


    --insert overwrite table FACT_BI_BROWSER_T_00
    --select
    --t.channel,
    --t.session_id,
    --t.app_id,
    --t.ip,
    --t.ref_url,
    --t.url,
    --t.stay_time,
    --t.log_time,
    --t.browser
    --from FACT_BI_BROWSER_T_00 t;

    --2浏览量(PV)
    insert overwrite table FACT_BI_BROWSER_T_01
    select
    t.browser as BR_NAME,
    count(t.url) as BI_VALUE,
    to_date(t.log_time) as BI_DATETIME,
    t.channel as BI_CHANNEL
    from FACT_BI_BROWSER_T_00 t
    group by t.channel,
    t.browser,
    to_date(t.log_time);

    insert overwrite table FACT_BI_BROWSER_T partition (year='${year}',month='${month}',day='${day}')
    select
    regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", "") as ID,
    1 as BI_TYPE,
    t.BR_NAME,
    t.BI_VALUE,
    CONCAT(t.BI_DATETIME,' 00:00:00') AS BI_DATETIME,
    t.BI_CHANNEL
    from FACT_BI_BROWSER_T_01 t;

  • 相关阅读:
    asp.net大文件(视频)上传技术
    asp.net大文件(视频)上传实例解析
    asp.net大文件(视频)上传示例
    asp.net大文件(视频)上传实例
    ubuntu电脑查看显卡型号
    ERROR: After October 2020 you may experience errors when installing or updating packages
    tensorflow各个版本需要的CUDA版本以及Cudnn的对应关系
    pycharm设置多GPU运行
    conda常用命令
    ubuntu更改.condarc文件
  • 原文地址:https://www.cnblogs.com/heguoxiu/p/10120644.html
Copyright © 2011-2022 走看看