zoukankan      html  css  js  c++  java
  • kylin2.4.1订单案例详细构建流程

    一、Hive订单数据仓库构建:

    hive表创建可以在命令行中直接完成,也可以在Hue中完成,本文在Hue中的完成,如下图:

     下文的样例文本文件下载地址:https://files-cdn.cnblogs.com/files/qqflying/KylinData.zip

    1. 创建事实表并插入数据

    执行1: DROP TABLE IF EXISTS default.fact_order ;

    执行2:

    create table default.fact_order (
    time_key string,
    product_key string,
    salesperson_key string,
    custom_key string,
    quantity_ordered bigint,
    order_dollars bigint,
    cost_dollars bigint
    )
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    STORED AS TEXTFILE;

    执行3:load data local inpath '/data/fact_order.txt' overwrite into table default.fact_order;

    fact_order.txt

    2016-05-01,pd001,sp001,ct001,100,2000,1000
    2016-05-01,pd001,sp002,ct002,100,2000,1000
    2016-05-01,pd001,sp003,ct002,100,2000,1000
    2016-05-01,pd002,sp002,ct002,100,2000,1000
    2016-05-01,pd003,sp003,ct001,100,2000,1000
    2016-05-01,pd001,sp003,ct001,100,2000,1000
    2016-05-01,pd001,sp002,ct001,100,2000,1000
    2016-05-01,pd001,sp003,ct002,100,2000,1000
    2016-05-01,pd002,sp001,ct001,100,2000,1000
    2016-05-01,pd003,sp001,ct001,100,2000,1000
    2016-05-01,pd004,sp001,ct001,50,1000,600
    2016-05-02,pd001,sp001,ct001,50,1000,600
    2016-05-02,pd001,sp002,ct002,100,2000,1000
    2016-05-02,pd001,sp003,ct002,100,2000,1000
    2016-05-02,pd002,sp001,ct001,50,1000,600
    2016-05-02,pd003,sp001,ct001,50,1000,600
    2016-05-02,pd004,sp001,ct001,50,1000,600
    2016-05-03,pd001,sp001,ct001,50,1000,600
    2016-05-03,pd001,sp002,ct002,100,2000,1000
    2016-05-03,pd001,sp003,ct002,100,2000,1000
    2016-05-04,pd002,sp001,ct001,700,14000,10000
    2016-05-04,pd003,sp001,ct001,700,14000,10000
    2016-05-04,pd004,sp001,ct001,100,2000,1000
    2016-05-05,pd001,sp001,ct001,100,2000,1000
    2016-05-05,pd001,sp002,ct002,700,14000,10000
    2016-05-05,pd001,sp003,ct002,700,14000,10000
    2016-05-05,pd002,sp001,ct001,100,2000,1000
    2016-05-05,pd003,sp001,ct001,100,2000,1000
    2016-05-05,pd004,sp001,ct001,100,2000,1000
    2016-05-06,pd001,sp001,ct001,100,2000,1000
    2016-05-06,pd001,sp002,ct002,100,2000,1000
    2016-05-06,pd001,sp003,ct002,100,2000,1000
    2016-05-07,pd002,sp001,ct001,100,2000,1000
    2016-05-07,pd003,sp001,ct001,100,2000,1000
    2016-05-07,pd004,sp001,ct001,50,1000,600
    2016-05-07,pd002,sp001,ct001,100,2000,1000
    2016-05-07,pd003,sp001,ct001,100,2000,1000
    2016-05-07,pd004,sp001,ct001,50,1000,600
    2016-05-08,pd001,sp001,ct001,50,1000,600
    2016-05-08,pd001,sp002,ct002,100,2000,1000
    2016-05-08,pd001,sp003,ct002,100,2000,1000
    2016-05-08,pd001,sp001,ct001,50,1000,600
    2016-05-08,pd001,sp002,ct002,100,2000,1000
    2016-05-08,pd001,sp003,ct002,100,2000,1000
    2016-05-08,pd001,sp001,ct001,50,1000,600
    2016-05-08,pd001,sp002,ct002,100,2000,1000
    2016-05-08,pd001,sp003,ct002,100,2000,1000
    2016-05-09,pd002,sp001,ct001,50,1000,600
    2016-05-09,pd003,sp001,ct001,50,1000,600
    2016-05-09,pd004,sp001,ct001,50,1000,600
    2016-05-09,pd001,sp001,ct001,50,1000,600
    2016-05-09,pd002,sp001,ct001,50,1000,600
    2016-05-09,pd003,sp001,ct001,50,1000,600
    2016-05-09,pd004,sp001,ct001,50,1000,600
    2016-05-09,pd001,sp001,ct001,50,1000,600
    2016-05-09,pd001,sp002,ct002,100,2000,1000
    2016-05-09,pd004,sp003,ct002,100,2000,1000
    2016-05-09,pd002,sp001,ct001,700,14000,10000
    2016-05-09,pd003,sp003,ct001,700,14000,10000
    2016-05-09,pd004,sp003,ct001,100,2000,1000
    2016-05-10,pd001,sp001,ct001,100,2000,1000
    2016-05-10,pd001,sp002,ct002,700,14000,10000
    2016-05-10,pd001,sp003,ct002,700,14000,10000
    2016-05-10,pd002,sp001,ct001,100,2000,1000
    2016-05-11,pd003,sp003,ct001,100,2000,1000
    2016-05-11,pd004,sp001,ct001,100,2000,1000
    2016-05-12,pd001,sp001,ct001,100,2000,1000
    2016-05-12,pd004,sp002,ct002,100,2000,1000
    2016-05-12,pd001,sp003,ct002,100,2000,1000
    2016-05-12,pd001,sp001,ct001,100,2000,1000
    2016-05-12,pd004,sp002,ct002,100,2000,1000
    2016-05-12,pd001,sp003,ct002,100,2000,1000
    2016-05-13,pd002,sp001,ct001,100,2000,1000
    2016-05-13,pd003,sp001,ct001,100,2000,1000
    2016-05-13,pd004,sp001,ct001,50,1000,600
    2016-05-14,pd001,sp001,ct001,50,1000,600
    2016-05-14,pd001,sp002,ct002,100,2000,1000
    2016-05-14,pd001,sp003,ct002,100,2000,1000
    2016-05-15,pd002,sp001,ct001,50,1000,600
    2016-05-15,pd003,sp001,ct001,50,1000,600
    2016-05-15,pd004,sp001,ct001,50,1000,600
    2016-05-15,pd002,sp001,ct001,50,1000,600
    2016-05-15,pd003,sp001,ct001,50,1000,600
    2016-05-15,pd004,sp001,ct001,50,1000,600
    2016-05-15,pd002,sp001,ct001,50,1000,600
    2016-05-15,pd003,sp001,ct001,50,1000,600
    2016-05-15,pd004,sp001,ct001,50,1000,600
    2016-05-16,pd001,sp001,ct001,50,1000,600
    2016-05-16,pd001,sp002,ct002,100,2000,1000
    2016-05-16,pd001,sp003,ct002,100,2000,1000
    2016-05-16,pd001,sp001,ct001,50,1000,600
    2016-05-16,pd001,sp002,ct002,100,2000,1000
    2016-05-16,pd001,sp003,ct002,100,2000,1000
    2016-05-17,pd002,sp001,ct001,700,14000,10000
    2016-05-17,pd003,sp001,ct001,700,14000,10000
    2016-05-17,pd004,sp001,ct001,100,2000,1000
    2016-05-17,pd002,sp001,ct001,700,14000,10000
    2016-05-17,pd003,sp001,ct001,700,14000,10000
    2016-05-17,pd004,sp001,ct001,100,2000,1000
    2016-05-18,pd001,sp001,ct001,100,2000,1000
    2016-05-18,pd003,sp002,ct001,700,14000,10000
    2016-05-18,pd001,sp003,ct002,700,14000,10000
    2016-05-19,pd002,sp001,ct001,100,2000,1000
    2016-05-19,pd003,sp001,ct002,100,2000,1000
    2016-05-20,pd001,sp001,ct001,100,2000,1000
    2016-05-20,pd002,sp002,ct002,100,2000,1000
    2016-05-20,pd003,sp003,ct001,100,2000,1000
    2016-05-20,pd004,sp001,ct001,100,2000,1000
    2016-05-20,pd001,sp002,ct002,100,2000,1000
    2016-05-20,pd002,sp001,ct002,100,2000,1000

    2. 创建天维度表dim_day(同样也分三步执行)

    DROP TABLE IF EXISTS default.dim_day ;

    create table default.dim_day (
    day_key string,
    full_day string,
    month_name string,
    quarter string,
    year string
    )
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    STORED AS TEXTFILE; 
    load data local inpath '/data/dim_day.txt' overwrite into table default.dim_day;

     dim_day.txt
      
    2016-05-01,2016-05-01,201605,2016q2,2016
    2016-05-02,2016-05-02,201605,2016q2,2016
    2016-05-03,2016-05-03,201605,2016q2,2016
    2016-05-04,2016-05-04,201605,2016q2,2016
    2016-05-05,2016-05-05,201605,2016q2,2016
    2016-05-06,2016-05-06,201605,2016q2,2016
    2016-05-07,2016-05-07,201605,2016q2,2016
    2016-05-08,2016-05-08,201605,2016q2,2016
    2016-05-09,2016-05-09,201605,2016q2,2016
    2016-05-10,2016-05-10,201605,2016q2,2016
    2016-05-11,2016-05-11,201605,2016q2,2016
    2016-05-12,2016-05-12,201605,2016q2,2016
    2016-05-13,2016-05-13,201605,2016q2,2016
    2016-05-14,2016-05-14,201605,2016q2,2016
    2016-05-15,2016-05-15,201605,2016q2,2016
    2016-05-16,2016-05-16,201605,2016q2,2016
    2016-05-17,2016-05-17,201605,2016q2,2016
    2016-05-18,2016-05-18,201605,2016q2,2016
    2016-05-19,2016-05-19,201605,2016q2,2016
    2016-05-20,2016-05-20,201605,2016q2,2016

    3. 创建售卖员的维度表salesperson_dim
     
    DROP TABLE IF EXISTS default.dim_salesperson ;
     
    create table default.dim_salesperson (
    salesperson_key string,
    salesperson string,
    salesperson_id string,
    region string,
    region_code string
    )
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    STORED AS TEXTFILE;
      
    load data local inpath '/data/dim_salesperson.txt' overwrite into table default.dim_salesperson;
      
     dim_salesperson.txt
      
    sp001,hongbin,sp001,beijing,10086
    sp002,hongming,sp002,beijing,10086
    sp003,hongmei,sp003,beijing,10086

    4. 创建客户维度 custom_dim

     
     DROP TABLE IF EXISTS default.dim_custom ;
      
    create table default.dim_custom (
    custom_key string,
    custom_name string,
    custorm_id string,
    headquarter_states string,
    billing_address string,
    billing_city string,
    billing_state string,
    industry_name string
    )
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    STORED AS TEXTFILE;
     
    load data local inpath '/data/dim_custom.txt' overwrite into table default.dim_custom;

     dim_custom.txt
      
    ct001,custom_john,ct001,beijing,zgx-beijing,beijing,beijing,internet                    
    ct002,custom_herry,ct002,henan,shlinjie,shangdang,henan,internet     
     
     
     
     
    5. 创建产品维度表并插入数据
     
     DROP TABLE IF EXISTS default.dim_product ;                                              
                                                                                              
    create table default.dim_product (                                                      
    product_key string,                                                                 
    product_name string,                                                                
    product_id string,                                                                  
    product_desc string,                                                                
    sku string,                                                                         
    brand string,                                                                       
    brand_code string,                                                                  
    brand_manager string,                                                               
    category string,                                                                    
    category_code string                                                                
    )                                                                                       
    ROW FORMAT DELIMITED FIELDS TERMINATED BY ','                                           
    STORED AS TEXTFILE;                                                                     
                           
    load data local inpath '/data/dim_product.txt' overwrite into table default.dim_product;      
     dim_product.txt
      
    pd001,Box-Large,pd001,Box-Large-des,large1.0,brand001,brandcode001,brandmanager001,Packing,cate001
    pd002,Box-Medium,pd001,Box-Medium-des,medium1.0,brand001,brandcode001,brandmanager001,Packing,cate001
    pd003,Box-small,pd001,Box-small-des,small1.0,brand001,brandcode001,brandmanager001,Packing,cate001
    pd004,Evelope,pd001,Evelope_des,large3.0,brand001,brandcode001,brandmanager001,Pens,cate002

     
    这样一个星型的结构表在hive中创建完毕, 实际上一个离线的数据仓库已经完成, 它包含一个主题, 即商品订单.

    三.Kylin的Project创建与数据同步
    1.单击"Manage Project" 
    2.单击"New Project"
    3.输入"Project Name", WareHouse_01
    4.Submit


    1.选择WareHouse_01,选择"Data Source" tab页
    2.单击"Load Hive Table"
    3.输入需要同步的表
      "DEFAULT.FACT_ORDER,DEFAULT.DIM_DAY,DEFAULT.DIM_PRODUCT,DEFAULT.DIM_SALESPERSON,DEFAULT.DIM_CUSTOM"
    4.Sync

    四.Kylin的Model创建
    1.选择"Models" tab页,单击"New Model"
    2."Model Name"输入,WareHouse_01_Model
    3.选择"Fact Table"为 DEFAULT.FACT_ORDER;再 添加Lookup Table;


    4.选取每张表的哪些列字段作为Dimensions
     ID Table Name           Columns
     1 DEFAULT.FACT_ORDER  TIME_KEY PRODUCT_KEY SALESPERSON_KEY CUSTOM_KEY
     2 DEFAULT.DIM_DAY          FULL_DAY
     3 DEFAULT.DIM_PRODUCT  PRODUCT_NAME
     4 DEFAULT.DIM_SALESPERSON  SALESPERSON
     5 DEFAULT.DIM_CUSTOM  CUSTOM_NAME

    5.选取DEFAULT.FACT_ORDER表的哪些列字段作为measures
            QUANTITY_ORDERED ORDER_DOLLARS COST_DOLLARS

    6.a.选取 "Partition Date Column"为DEFAULT.FACT_ORDER.TIME_KEY,格式 yyyy-MM-dd
      b.对于"Filter"条件,由于没有要过滤的条件,故不填写

    7.Save

    五.Kylin的Cube创建

     

    1.选择"Models" tab页,单击"New Cube“

    2.Cube Info:
              "Model Name"选择,WareHouse_01_Model
               "Cube Name"输入,cube01

    3.Dismensions:
              单击"Auto Generator",依据情况选择维度的列,全选

    4.Measures:
              a.单击"+Measure",添加要聚合计算的度量,添加: sum(QUANTITY_ORDERED),sum(ORDER_DOLLARS)
              b.Expression: SUM/MIN/MAX/COUNT/COUNT_DISTINCT/TOP_N/RAW
    5.Refresh Setting:
              a.Auto Merge Thresholds,自动合并阈值,7~28 days
       b.Retention Threshold,保留天数,60
       c.Partition Start Date,非常重要,是后面build cube的开始日期

    6.Advanced Setting:
            --Aggregation Groups:
       a.Includes: TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY , CUSTOM_KEY
       b.Mandatory Dimensions: TIME_KEY
       c.Hierarchy Dimensions: PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY
       d.Joint Dimensions: 无
           --Rowkeys:
     TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY 4个字段为dict字典编码
     
    7.Configuration Overwrites: 无

    8.Overview:
              保存cube

    五.Cube Build

    1.选择 cube01,单击”Action”,选择Build

    2.填写End Date,Submit

    3.单击”Monitor”,观察Job

    4.等待Process100% (Any Errors)

    5.返回cube01,查看 cube size 和 Source Records等字段更新

     
    六.Hive* kyin 查询对比

    点击(此处)折叠或打开

    1. 1.2016-05-01到2016-05-15期间的每天的订单数量,订单金额,订单成本
    2. Hive: 65.816 s
    3. select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars) from fact_order as fact 
      where fact.time_key >= "2016-05-01" and fact.time_key <= "2016-05-15" 
      group by fact.time_key order by fact.time_key;
    4. Kylin: 0.32s-->0.27s 
    5. select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars) from fact_order as fact 
      where fact.time_key between '2016-05-01' and '2016-05-15'
      group by fact.time_key order by fact.time_key

    1. 2.2016-05-01到2016-05-15期间的每天的产品的订单量
    2. Hive: 100.336s
    3. select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact 
      inner join dim_day as dday on fact.time_key = dday.day_key 
      inner join dim_product as dsp on fact.product_key = dsp.product_key 
      where dday.full_day >= "2016-05-01" and dday.full_day <= "2016-05-15" 
      group by dday.full_day,dsp.product_name
      order by dday.full_day,dsp.product_name;
    4. Kylin:0.93s-->0.39s
    5. select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact 
      inner join dim_day as dday on fact.time_key = dday.day_key 
      inner join dim_product as dsp on fact.product_key = dsp.product_key 
      where dday.full_day >= '2016-05-01' and dday.full_day <= '2016-05-15' 
      group by dday.full_day,dsp.product_name
      order by dday.full_day,dsp.product_name

    本文参考:

    http://blog.itpub.net/30089851/viewspace-2122586/

    http://www.mamicode.com/info-detail-2332910.html

  • 相关阅读:
    HTTP协议
    python中os模块简介
    什么是HTML
    Django框架
    python递归
    web工程中的各种路径(eclipse开发)
    小白向:web中利用request.getPart()上传文件到服务器
    利用 html+css 画同心圆(concentric circles)——绝对布局与相对布局
    理解z-index和css中的层叠顺序问题(大神技术博的读后感?)
    二、系统初始化
  • 原文地址:https://www.cnblogs.com/qqflying/p/10943010.html
Copyright © 2011-2022 走看看