zoukankan      html  css  js  c++  java
  • 胖子哥的大数据之路(10)- 基于Hive构建数据仓库实例

    一、引言

      基于Hive+Hadoop模式构建数据仓库,是大数据时代的一个不错的选择,本文以郑商所每日交易行情数据为案例,探讨数据Hive数据导入的操作实例。

    二、源数据-每日行情数据

    三、建表脚本

    CREATE TABLE IF NOT EXISTS t_day_detail(
    id STRING,
    lastday FLOAT,
    today FLOAT,
    highest FLOAT,
    lowest FLOAT,
    today_end FLOAT,
    today_jisuan FLOAT,
    updown1 FLOAT,
    updown2 FLOAT,
    sum int,
    empity int,
    rise int,
    turnover FLOAT,
    delivery FLOAT
    )
    PARTITIONED BY (dt STRING,product STRING);
    

     四、数据导入1

    1 load data local inpath '/home/hadoop/source/in'
    2 overwrite into table t_day_detail
    3 partition(dt='2014-04-22',product='1');

    五、数据导入2

    1 load data local inpath '/home/hadoop/source/in'
    2 overwrite into table t_day_detail
    3 partition(dt='2014-04-23',product='1');

    六、执行结果

    hive> select * from t_day_detail
        > ;
    OK
    CF405,17365.0,17390.0,17390.0,17360.0,17380.0,17380.0,15,15,72.0,1090.0,-36,625.66,0.0  NULL    NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF407,17275.0,17370.0,17415.0,17320.0,17320.0,17365.0,45,90,22.0,52.0,2,191.01,0.0      NULL    NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF409,17450.0,17380.0,17395.0,17310.0,17320.0,17330.0,-130,-120,7860.0,34584.0,-940,68099.08,0.0        NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF411,16370.0,16315.0,16350.0,16220.0,16255.0,16240.0,-115,-130,984.0,17436.0,-380,7990.01,0.0  NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF501,16130.0,16030.0,16085.0,15920.0,15995.0,15970.0,-135,-160,26210.0,115120.0,-1906,209311.56,0.0    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF503,16195.0,16030.0,16065.0,16000.0,16065.0,16045.0,-130,-150,60.0,526.0,12,481.42,0.0        NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-22      1
    CF405,17365.0,17390.0,17390.0,17360.0,17380.0,17380.0,15,15,72.0,1090.0,-36,625.66,0.0  NULL    NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    CF407,17275.0,17370.0,17415.0,17320.0,17320.0,17365.0,45,90,22.0,52.0,2,191.01,0.0      NULL    NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    CF409,17450.0,17380.0,17395.0,17310.0,17320.0,17330.0,-130,-120,7860.0,34584.0,-940,68099.08,0.0        NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    CF411,16370.0,16315.0,16350.0,16220.0,16255.0,16240.0,-115,-130,984.0,17436.0,-380,7990.01,0.0  NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    CF501,16130.0,16030.0,16085.0,15920.0,15995.0,15970.0,-135,-160,26210.0,115120.0,-1906,209311.56,0.0    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    CF503,16195.0,16030.0,16065.0,16000.0,16065.0,16045.0,-130,-150,60.0,526.0,12,481.42,0.0        NULL    NULL    NULL    NULLNULL     NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL    2014-04-23      1
    Time taken: 0.391 seconds
    hive> 

    七、问题聚焦

    (1)Hive分区设置的原则和技巧?

    (2)Hive分区内行记录完全一致问题如何解决?

    (3)Hive中文乱码问题如何解决?

  • 相关阅读:
    Find the Smallest K Elements in an Array
    Count of Smaller Number
    Number of Inversion Couple
    Delete False Elements
    Sort Array
    Tree Diameter
    Segment Tree Implementation
    Java Programming Mock Tests
    zz Morris Traversal方法遍历二叉树(非递归,不用栈,O(1)空间)
    Algorithm about SubArrays & SubStrings
  • 原文地址:https://www.cnblogs.com/hadoopdev/p/3683583.html
Copyright © 2011-2022 走看看