zoukankan      html  css  js  c++  java
  • hive提前过滤重要性

    hive提前过滤

    create table sospdm.tmp_yinfei_test_01 
    (
        id string
    )
    partitioned by (statis_date string)
    ;
    
    create table sospdm.tmp_yinfei_test_02 
    (
        id string
    )
    partitioned by (statis_date string)
    ;
    
    select t1.* 
    from tmp_yinfei_test_01 t1 
    left join tmp_yinfei_test_02 t2
    on t1.id=t2.id
    where t1.statis_date='20190408' and t2.statis_date='20190408'
    ;
    select t1.* 
    from tmp_yinfei_test_01 t1 
    left join tmp_yinfei_test_02 t2
    on t1.id=t2.id and t1.statis_date='20190408' and t2.statis_date='20190408'
    ;
    select t1.*
    from 
    (
    select * from tmp_yinfei_test_01 where statis_date='20190408'
    ) t1 
    left join 
    (
    select * from tmp_yinfei_test_02 where statis_date='20190408'
    ) t2 
    on t1.id=t2.id
    ;
    =========================test1=====================================
    
    explain select t1.* 
    from tmp_yinfei_test_01 t1 
    left join tmp_yinfei_test_02 t2
    on t1.id=t2.id
    where t1.statis_date='20190408' and t2.statis_date='20190408'
    ;
    
    hive> explain select t1.* 
        > from tmp_yinfei_test_01 t1
        > left join tmp_yinfei_test_02 t2
        > on t1.id=t2.id
        > where t1.statis_date='20190408' and t2.statis_date='20190408'
        > ;
    OK
    STAGE DEPENDENCIES:
      Stage-1 is a root stage
      Stage-0 depends on stages: Stage-1
    
    STAGE PLANS:
      Stage: Stage-1
        Map Reduce
          Map Operator Tree:
              TableScan
                alias: t1
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Filter Operator
                  predicate: (statis_date = '20190408') (type: boolean)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  Reduce Output Operator
                    key expressions: id (type: string)
                    sort order: +
                    Map-reduce partition columns: id (type: string)
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
              TableScan
                alias: t2
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Reduce Output Operator
                  key expressions: id (type: string)
                  sort order: +
                  Map-reduce partition columns: id (type: string)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  value expressions: statis_date (type: string)
          Reduce Operator Tree:
            Join Operator
              condition map:
                   Left Outer Join0 to 1
              keys:
                0 id (type: string)
                1 id (type: string)
              outputColumnNames: _col0, _col6
              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
              Filter Operator
                predicate: (_col6 = '20190408') (type: boolean)
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Select Operator
                  expressions: _col0 (type: string), '20190408' (type: string)
                  outputColumnNames: _col0, _col1
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  File Output Operator
                    compressed: true
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    
      Stage: Stage-0
        Fetch Operator
          limit: -1
          Processor Tree:
            ListSink
    
    Time taken: 0.399 seconds, Fetched: 58 row(s)
    
    结论:t2表会扫全表
    =========================test2=====================================
    explain select t1.* 
    from tmp_yinfei_test_01 t1 
    left join tmp_yinfei_test_02 t2
    on t1.id=t2.id and t1.statis_date='20190408' and t2.statis_date='20190408'
    ;
    STAGE DEPENDENCIES:
      Stage-1 is a root stage
      Stage-0 depends on stages: Stage-1
    
    STAGE PLANS:
      Stage: Stage-1
        Map Reduce
          Map Operator Tree:
              TableScan
                alias: t1
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Reduce Output Operator
                  key expressions: id (type: string)
                  sort order: +
                  Map-reduce partition columns: id (type: string)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  value expressions: statis_date (type: string)
              TableScan
                alias: t2
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Filter Operator
                  predicate: (statis_date = '20190408') (type: boolean)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  Reduce Output Operator
                    key expressions: id (type: string)
                    sort order: +
                    Map-reduce partition columns: id (type: string)
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
          Reduce Operator Tree:
            Join Operator
              condition map:
                   Left Outer Join0 to 1
              filter predicates:
                0 {(VALUE._col0 = '20190408')}
                1 
              keys:
                0 id (type: string)
                1 id (type: string)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
              File Output Operator
                compressed: true
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    
      Stage: Stage-0
        Fetch Operator
          limit: -1
          Processor Tree:
            ListSink
    结论:t1表会扫全表
    =========================test3=====================================
    explain select t1.*
    from 
    (
    select * from tmp_yinfei_test_01 where statis_date='20190408'
    ) t1 
    left join 
    (
    select * from tmp_yinfei_test_02 where statis_date='20190408'
    ) t2 
    on t1.id=t2.id
    ;
    STAGE DEPENDENCIES:
      Stage-1 is a root stage
      Stage-0 depends on stages: Stage-1
    
    STAGE PLANS:
      Stage: Stage-1
        Map Reduce
          Map Operator Tree:
              TableScan
                alias: tmp_yinfei_test_01
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Filter Operator
                  predicate: (statis_date = '20190408') (type: boolean)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  Select Operator
                    expressions: id (type: string), '20190408' (type: string)
                    outputColumnNames: _col0, _col1
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                    Reduce Output Operator
                      key expressions: _col0 (type: string)
                      sort order: +
                      Map-reduce partition columns: _col0 (type: string)
                      Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                      value expressions: _col1 (type: string)
              TableScan
                alias: tmp_yinfei_test_02
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                Filter Operator
                  predicate: (statis_date = '20190408') (type: boolean)
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  Select Operator
                    expressions: id (type: string)
                    outputColumnNames: _col0
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                    Reduce Output Operator
                      key expressions: _col0 (type: string)
                      sort order: +
                      Map-reduce partition columns: _col0 (type: string)
                      Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
          Reduce Operator Tree:
            Join Operator
              condition map:
                   Left Outer Join0 to 1
              keys:
                0 _col0 (type: string)
                1 _col0 (type: string)
              outputColumnNames: _col0, _col1
              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
              File Output Operator
                compressed: true
                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    
      Stage: Stage-0
        Fetch Operator
          limit: -1
          Processor Tree:
            ListSink
  • 相关阅读:
    网络技术:网络互联模型
    二柱子代码版(应该不会在动它了)
    验证码-java界面版
    动手动脑1002
    动手动脑1001
    动手动脑0930
    动手动脑0929
    Java Web学习-0823
    Java Web学习-0822
    Java Web学习-0821
  • 原文地址:https://www.cnblogs.com/yin-fei/p/10751608.html
Copyright © 2011-2022 走看看