zoukankan      html  css  js  c++  java
  • AJK_HIVE_SQL

    -- 功能描述: TEST_SPIDER建表语句集合
    -- 涉及集市表: ajk_hse_detail_crawl,ajk_hse_detail_crawl_fq,ajk_hse_list_pr_crawl,ajk_hse_list_pr_crawl_fq,ajk_cmnt_detail_crawl,ajk_cmnt_detail_crawl_fq,ajk_cmnt_list_pr_crawl,ajk_cmnt_list_pr_crawl_fq
    -- 颗粒度: 月
    -- 创建人: 周钊 20210609
    -- 最后修改人: 周钊 20210609
    -- 审批人:
    -- 备注:工作交接


    -- 一些设置的语句
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    set spark.executor.instances=6;
    set spark.serializer=org.apache.spark.serializer.KryoSerializer;
    set hive.exec.max.dynamic.partitions.pernode=99999;
    set hive.exec.max.dynamic.partitions=999999;
    set hive.exec.max.created.files=999999;
    set hive.map.aggr=true;
    set hive.groupby.skewindata=true;
    set hive.log.explain.output=false;


    -- test_spider 把csv文件的数据导入到数据表中
    load data inpath '/user/XXX/test.csv' into test_spider.XXX;


    -- test_spider 把导入的二手挂牌详情数据去重并插入分区表中
    INSERT INTO test_spider.ajk_hse_detail_crawl_fq SELECT
    uuid(),
    T1.ajk_area_orig,
    T1.ajk_cmnt_id_orig,
    T1.ajk_cmnt_name_orig,
    T1.ajk_alias_name_orig,
    T1.ajk_cmnt_addr_orig,
    T1.get_method,
    T1.ajk_srh_vle,
    T1.ajk_hse_title,
    T1.ajk_hse_title_url,
    T1.ajk_hse_list_pr,
    T1.ajk_hse_cmnt_nm,
    T1.ajk_hse_dtl_url,
    T1.ajk_hse_loc_dist,
    T1.ajk_hse_biz_dist,
    T1.ajk_hse_loc_add,
    T1.ajk_hse_bld_year,
    T1.ajk_hse_tpcd,
    T1.ajk_hse_prop_year,
    T1.ajk_hse_prop_type,
    T1.ajk_hse_is_first,
    T1.ajk_hse_type,
    T1.ajk_hse_blg_area,
    T1.ajk_hse_twd,
    T1.ajk_hse_ttl_flr,
    T1.ajk_hse_flr,
    T1.ajk_hse_is_elvt,
    T1.ajk_hse_is_heat,
    T1.ajk_hse_per_pr,
    T1.ajk_ref_hse_dwn_pmt,
    T1.ajk_ref_hse_mthly_pmt,
    T1.ajk_hse_deg_dect,
    T1.ajk_hse_year_per,
    T1.ajk_hse_is_unq,
    T1.ajk_hse_put_tm,
    T1.ajk_hse_shk,
    T1.ajk_hse_bad,
    T1.ajk_hse_crawl_tm,
    T1.batch_id,
    T1.is_avl,
    T1.crt_tm,
    T1.crt_psn,
    T1.upt_tm,
    T1.upt_psn,
    T1.ajk_prov,
    T1.ajk_city_orig
    FROM
    (
    SELECT
    T.*,
    row_number() over (
    PARTITION BY T.ajk_prov,
    T.ajk_city_orig,
    T.ajk_city_orig,
    T.ajk_area_orig,
    T.ajk_hse_cmnt_nm,
    T.ajk_hse_title,
    T.ajk_hse_title_url,
    T.ajk_hse_per_pr
    ) RN
    FROM
    test_spider.ajk_hse_detail_crawl T
    ) T1
    WHERE
    T1.RN = 1
    AND T1.BATCH_ID = "202104011000";


    -- test_spider 把导入的二手挂牌均价数据去重并插入分区表中
    INSERT INTO test_spider.ajk_hse_list_pr_crawl_fq SELECT
    uuid(),
    AJK_AREA_ORIG,
    AJK_CMNT_ID_ORIG,
    AJK_CMNT_NM_ORIG,
    AJK_ALIAS_NM_ORIG,
    AJK_CMNT_ADDR_ORIG,
    GET_METHOD,
    SRH_VAL,
    AJK_TITLE,
    AJK_HSE_URL,
    AJK_HSE_LIST_PR,
    AJK_HSE_SQ_LIST_PR,
    AJK_HSE_TYPE,
    AJK_HSE_AREA,
    AJK_HSE_TWD,
    AJK_HSE_DEG_DECT,
    AJK_HSE_FLR_TYPE,
    AJK_HSE_TTL_FLR,
    AJK_HSE_BLD_YEAR,
    AJK_HSE_BLD_STRU,
    AJK_HSE_POS,
    AJK_HSE_CRAWL_TM,
    BATCH_ID,
    IS_AVL,
    CRT_TM,
    CRT_PSN,
    UPT_TM,
    UPT_PSN,
    AJK_PROV,
    AJK_CITY_ORIG
    FROM
    (
    SELECT
    T.*,
    row_number() over (
    PARTITION BY T.AJK_PROV,
    T.AJK_CITY_ORIG,
    T.AJK_AREA_ORIG,
    T.AJK_CMNT_ID_ORIG,
    T.AJK_CMNT_NM_ORIG,
    T.AJK_ALIAS_NM_ORIG,
    T.AJK_CMNT_ADDR_ORIG,
    T.GET_METHOD,
    T.SRH_VAL,
    T.AJK_TITLE,
    T.AJK_HSE_URL,
    T.AJK_HSE_LIST_PR,
    T.AJK_HSE_SQ_LIST_PR,
    T.AJK_HSE_TYPE,
    T.AJK_HSE_AREA,
    T.AJK_HSE_TWD,
    T.AJK_HSE_DEG_DECT,
    T.AJK_HSE_FLR_TYPE,
    T.AJK_HSE_TTL_FLR,
    T.AJK_HSE_BLD_YEAR,
    T.AJK_HSE_BLD_STRU,
    T.AJK_HSE_POS,
    T.BATCH_ID
    ) RN
    FROM
    test_spider.ajk_hse_list_pr_crawl T
    ) T1
    WHERE
    T1.RN = 1
    AND T1.BATCH_ID = "202104011000";


    -- test_spider 把导入的二手小区详情数据去重并插入分区表中
    INSERT INTO test_spider.ajk_cmnt_detail_crawl_fq SELECT
    uuid(),
    T1.AJK_AREA_ORIG,
    T1.AJK_CMNT_ID_ORIG,
    T1.AJK_CMNT_NAME_ORIG,
    T1.AJK_ALIAS_NAME_ORIG,
    T1.AJK_CMNT_ADDR_ORIG,
    T1.GET_METHOD,
    T1.SRH_VAL,
    T1.AJK_CMNT,
    T1.AJK_CMNT_URL,
    T1.AJK_CMNT_LOC_DIST,
    T1.AJK_CMNT_LOC_ADD,
    T1.AJK_CMNT_PER_PR,
    T1.AJK_CMNT_HOA_TYPE,
    T1.AJK_CMNT_TOT_AREA,
    T1.AJK_CMNT_BLD_YEAR,
    T1.AJK_CMNT_VOL_RTO,
    T1.AJK_CMNT_DEV_NM,
    T1.AJK_CMNT_HOA_NM,
    T1.AJK_CMNT_RLT_SHL,
    T1.AJK_CMNT_BIZ_DIST,
    T1.AJK_CMNT_HOA_FEE,
    T1.AJK_CMNT_HSE_TTL,
    T1.AJK_CMNT_TTL_PARK,
    T1.AJK_CMNT_GRN_RTO,
    T1.AJK_CMNT_SEC_TTL,
    T1.AJK_CMNT_RENT_TTL,
    T1.AJK_CMNT_3YEAR_PR,
    T1.AJK_CMNT_3YEAR_BIZ_PR,
    T1.AJK_CMNT_LOT,
    T1.AJK_CMNT_LAT,
    T1.AJK_CMNT_GP_NUM,
    T1.AJK_CMNT_TAG,
    T1.AJK_CMNT_CRAWL_TM,
    T1.BATCH_ID,
    T1.IS_AVL,
    T1.CRT_TM,
    T1.CRT_PSN,
    T1.UPT_TM,
    T1.UPT_PSN,
    T1.ajk_prov,
    T1.ajk_city_orig
    FROM
    (
    SELECT
    T.*,
    row_number() over (
    PARTITION BY T.ajk_prov,
    T.ajk_city_orig,
    T.AJK_AREA_ORIG,
    T.AJK_CMNT_ID_ORIG,
    T.AJK_CMNT_NAME_ORIG,
    T.AJK_ALIAS_NAME_ORIG,
    T.AJK_CMNT_ADDR_ORIG,
    T.GET_METHOD,
    T.SRH_VAL,
    T.AJK_CMNT,
    T.AJK_CMNT_URL,
    T.AJK_CMNT_LOC_DIST,
    T.AJK_CMNT_LOC_ADD,
    T.AJK_CMNT_PER_PR,
    T.AJK_CMNT_HOA_TYPE,
    T.AJK_CMNT_TOT_AREA,
    T.AJK_CMNT_BLD_YEAR,
    T.AJK_CMNT_VOL_RTO,
    T.AJK_CMNT_DEV_NM,
    T.AJK_CMNT_HOA_NM,
    T.AJK_CMNT_RLT_SHL,
    T.AJK_CMNT_BIZ_DIST,
    T.AJK_CMNT_HOA_FEE,
    T.AJK_CMNT_HSE_TTL,
    T.AJK_CMNT_TTL_PARK,
    T.AJK_CMNT_GRN_RTO,
    T.AJK_CMNT_SEC_TTL,
    T.AJK_CMNT_RENT_TTL,
    T.AJK_CMNT_3YEAR_PR,
    T.AJK_CMNT_3YEAR_BIZ_PR,
    T.AJK_CMNT_LOT,
    T.AJK_CMNT_LAT,
    T.AJK_CMNT_GP_NUM,
    T1.AJK_CMNT_TAG,
    T.BATCH_ID
    ) RN
    FROM
    test_spider.ajk_cmnt_detail_crawl T
    ) T1
    WHERE
    T1.RN = 1
    AND T1.BATCH_ID = "202104011000";


    -- test_spider 把导入的二手小区均价数据去重并插入分区表中
    INSERT INTO test_spider.ajk_cmnt_list_pr_crawl_fq SELECT
    uuid(),
    T1.AJK_AREA_ORIG,
    T1.AJK_CMNT_ID_ORIG,
    T1.AJK_CMNT_NM_ORIG,
    T1.AJK_ALIAS_NM_ORIG,
    T1.AJK_COMM_ADDR_ORIG,
    T1.GET_METHOD,
    T1.SRH_VAL,
    T1.AJK_CMNT_NM,
    T1.AJK_CMNT_URL,
    T1.AJK_CMNT_PER_PR,
    T1.AJK_CMNT_AREA,
    T1.AJK_CMNT_LOC,
    T1.AJK_CMNT_CRAWL_TM,
    T1.BATCH_ID,
    T1.IS_AVL,
    T1.CRT_TM,
    T1.CRT_PSN,
    T1.UPT_TM,
    T1.UPT_PSN,
    T1.AJK_PROV,
    T1.AJK_CITY_ORIG
    FROM
    (
    SELECT
    T.*,
    row_number() over (
    PARTITION BY T.AJK_PROV,
    T.AJK_CITY_ORIG,
    T.AJK_AREA_ORIG,
    T.AJK_CMNT_NM,
    T.AJK_CMNT_URL,
    T.AJK_CMNT_PER_PR,
    T.AJK_CMNT_AREA,
    T.AJK_CMNT_LOC
    ) RN
    FROM
    test_spider.ajk_cmnt_list_pr_crawl T
    ) T1
    WHERE
    T1.RN = 1
    AND T1.BATCH_ID = "202104011000";


    -- ODS_DATA_COLLECTION - 插入ODS数据表
    insert into table ods_data_collection.XXX select * from test_spider.XXX_fq;

  • 相关阅读:
    javascript Math.random()随机数函数
    asp.net 前台获取后台c#代码信息
    关于C#网站一般处理程序(ashx)中session的问题
    怎样才能在一般处理文件中创建新的Session和访问已经存在的Session?
    使用SqlParameter向数据库中插入数据
    C#串口编程学习简单实例
    认识nodejs
    01.Javascript中的接口Interface [转载]
    动态添加脚本,并触发回调函数 初步实现按需加载
    JS正则表达式 收藏
  • 原文地址:https://www.cnblogs.com/bigriverx/p/15061266.html
Copyright © 2011-2022 走看看