zoukankan      html  css  js  c++  java
  • Phoenix Tips (1)

    1. 没有插入值的字段(即为Null的字段),在HBase中不会存储实际数据。


    创建表: create table t1 (id varchar not null primary key, cf1.a varchar, cf1.b varchar, cf2.c varchar,cf2.d varchar, cf2.e varchar );

    插入数据到c、d字段,a、b、e字段为Null :upsert into t1(id,c,d) values('0000000001','ccc','ddd');

    查看数据:

    select * from t1;
    +------------+------------+------------+------------+------------+------------+
    |     ID                |     A       |     B       |       C      |     D        |   E            |  
    +------------+------------+------------+------------+------------+------------+
    | 0000000001 | null       | null       | ccc        | ddd        |     null      |     
    +------------+------------+------------+------------+------------+------------+


    hbase(main):028:0> scan 'T1'
    ROW                              COLUMN+CELL                                                                                   
     0000000001                       column=CF1:_0, timestamp=1415238625585, value=                                                
     0000000001                      column=CF2:C, timestamp=1415238625585, value=ccc                                              
     0000000001                      column=CF2:D, timestamp=1415238625585, value=ddd 


    插入数据到a、d字段,b,c ,e字段为Null : upsert into t1 values('0000000002','aaa',null,null,'ddd',null);

    select * from t1;
    +------------+------------+------------+------------+------------+------------+
    |     ID                |     A        |     B       |     C       |     D         |     E        |
    +------------+------------+------------+------------+------------+------------+
    | 0000000001 | null        | null       | ccc        | ddd        |     null    |
    | 0000000002 | aaa        | null       | null       | ddd         |     null     |
    +------------+------------+------------+------------+------------+------------+

    hbase(main):029:0> scan 'T1'
    ROW                              COLUMN+CELL                                                                                   
     0000000001                      column=CF1:_0, timestamp=1415238625585, value=                                                
     0000000001                      column=CF2:C, timestamp=1415238625585, value=ccc                                              
     0000000001                      column=CF2:D, timestamp=1415238625585, value=ddd      

                                            
     0000000002                      column=CF1:A, timestamp=1415238882597, value=aaa                                              
     0000000002                      column=CF1:_0, timestamp=1415238882597, value=                                                
     0000000002                      column=CF2:D, timestamp=1415238882597, value=ddd  


    2、索引结构

    创建索引:create index index_t1_a_b_in_c_d on t1(a,c) include (b,d)

    实际创建一个表:

    hbase(main):031:0> describe 'INDEX_T1_A_C_IN_B_D'
                                                                                        
     'INDEX_T1_A_C_IN_B_D', {TABLE_ATTRIBUTES => {MAX_FILESIZE => '7516192768', coprocessor$1 => '|org.apache.phoenix.coprocessor.ScanRegionObserver|1|', coprocessor$2 => '|org.apache.phoenix.coprocessor.UngroupedAggregateRegionObserver|1|', coprocessor$3 =>oprocessor$4=>'|org.apache.phoenix.coprocessor.ServerCachingEndpointImpl|1|',  coprocessor$5 => '|org.apache.hadoop.hbase.regionserver.LocalIndexSplitter|1|'}, { NAME =>'CF1', DATA_BLOCK_ENCODING => 'FAST_DIFF', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS => 'true', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'CF2', DATA_BLOCK_ENCODING => 'FAST_DIFF', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0',TTL => 'FOREVER', KEEP_DELETED_CELLS => 'true', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} 


    插入数据:

    upsert into t1 values('0000000001','10','11','12','13',"14");

    upsert into t1 values('0000000002','20','21','22','23',"24");

    upsert into t1 values('0000000003','30','31','32','33',"24");


    select * from t1;
    +------------+------------+------------+------------+----------+------------+
    |     ID                  |     A      |     B      |     C      |     D        |       E        |
    +------------+------------+------------+------------+----------+------------+
    | 0000000001 | 10         | 11         | 12         | 13         |      14        |
    | 0000000002 | 20         | 21         | 22         | 23         |       24       |
    | 0000000003 | 30         | 31         | 32         | 33         |       34       | 
    +------------+------------+------------+------------+----------+------------+


    索引INDEX_T1_A_C_IN_B_D

    select * from INDEX_T1_A_C_IN_B_D;
    +------------+------------+------------+------------+------------+
    |   CF1:A    |   CF2:C    |    :ID               |   CF1:B    |   CF2:D    |
    +------------+------------+------------+------------+------------+
    | 10            | 12            | 0000000001 | 11             | 13         |
    | 20            | 22            | 0000000002 | 21             | 23         |
    | 30            | 32            | 0000000003 | 31             | 33         |
    +------------+------------+------------+------------+------------+


    scan 'INDEX_T1_A_C_IN_B_D'
    ROW                                                   COLUMN+CELL                                                                                   
     10x0012x000000000001          column=CF1:CF1:B, timestamp=1415240129108, value=11                                           
     10
    x0012x000000000001          column=CF1:_0, timestamp=1415240129108, value=                                                
     10x0012x000000000001          column=CF2:CF2:D, timestamp=1415240129108, value=13  

                                             
     20x0022x000000000002          column=CF1:CF1:B, timestamp=1415240135689, value=21                                           
     20
    x0022x000000000002          column=CF1:_0, timestamp=1415240135689, value=                                                
     20x0022x000000000002          column=CF2:CF2:D, timestamp=1415240135689, value=23  

                                             
     30x0032x000000000003          column=CF1:CF1:B, timestamp=1415240148794, value=31                                           
     30
    x0032x000000000003          column=CF1:_0, timestamp=1415240148794, value=                                                
     30x0032x000000000003          column=CF2:CF2:D, timestamp=1415240148794, value=33 


    3、如果索引中有null

    upsert into t1 values('0000000004',null,'31',null,'33', '34');


    T1数据:
    +------------+------------+------------+------------+----------+---------+
    |     ID                  |     A      |     B      |     C      |     D        |   E         |
    +------------+------------+------------+------------+---------+----------+
    | 0000000004 | null       | 31         | null       | 33         |   34       |

    +------------+------------+------------+------------+----------+---------+

    索引INDEX_T1_A_C_IN_B_D数据:

    +------------+------------+--------------------+-------------+------------+
    |   CF1:A    |   CF2:C    |    :ID                  |   CF1:B    |   CF2:D    |
    +------------+------------+--------------------+------------+------------+
    | null          | null           | 0000000004   | 31             | 33           |

    +------------+------------+--------------------+-------------+------------+


    ROW                                              COLUMN+CELL                                                                                   
     x00x000000000004              column=CF1:CF1:B, timestamp=1415240916266, value=31                                           
     x00x000000000004              column=CF1:_0, timestamp=1415240916266, value=                                                
     x00x000000000004              column=CF2:CF2:D, timestamp=1415240916266, value=33 


    4. 索引使用

    其定义为:

    create index index_t1_a_b_in_c_d on t1(a,c) include (b,d)


    (1)、索引中没有包括字段 e, 故只有select字段中有e,就不会用到索引:

    explain select  * from t1  where a='10' ;     

    CLIENT PARALLEL 1-WAY FULL SCAN OVER T1

            SERVER FILTER BY CF1.A = '10'  

    explain select  a, e from t1 where a='10'; 
    CLIENT PARALLEL 1-WAY FULL SCAN OVER T1

            SERVER FILTER BY CF1.A = '10'  


    (2)、不使用e,则可以

    explain select a,b,c,d from t1 where a='10';

     CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10'] 


    explain select d,c,b,a from t1 where a='10' and c='12';

    |CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10','12']  


    (3)、索引 a、b 的顺序可以调整

    explain select d,c,b,a from t1 where c='10' and a='12';

     

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12','10'] 

    explain select d,c,b,a from t1 where c>'10' and a='12'; 

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['12','10'] - ['12',*]


    explain select d,c,b,a from t1 where c='10' and a>'12'; 

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12'] - [*]

             SERVER FILTER BY C = '10'


    explain select d,c,b,a from t1 where c>'10' and a>'12'; 

     CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12'] - [*] 

             SERVER FILTER BY C > '10'


    (4)、混合使用索引字段a 和include 字段b,可以用到索引

    explain select a,b,c,d from t1 where a='10' and b='11';

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10']    

               SERVER FILTER BY CF1.B = '11' 


    (5)、混合使用索引字段a 和非include 字段e,则用不到索引

    explain select d,c,b,a from t1 where a='10' and e='12';

    CLIENT PARALLEL 1-WAY FULL SCAN OVER T1  

              SERVER FILTER BY (CF1.A = '10' AND CF2.E = '12')  

    (6)、Group By  可以使用索引  

    explain select b, max(c), min(d) from t1 where a>'10' group by b 

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10'] - [*]  
         SERVER AGGREGATE INTO DISTINCT ROWS BY [CF1.B]  
      CLIENT MERGE SORT


    explain select a, max(c), min(d) from t1 where b>'10' group by a

    CLIENT PARALLEL 32-WAY FULL SCAN OVER INDEX_T1_A_C_IN_B_D 

          SERVER FILTER BY CF1.B > '10' 
         SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A] 
    CLIENT MERGE SORT


    explain select a, max(b), min(d) from t1 where c>'10' group by a;

     CLIENT PARALLEL 32-WAY FULL SCAN OVER INDEX_T1_A_C_IN_B_D 
         SERVER FILTER BY C > '10' 
         SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  
      CLIENT MERGE SORT 

     
    explain select a, count(b), max(c), min(d) from t1 where a>'1' and b>'11' group by a 
    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*] 
          SERVER FILTER BY CF1.B > '11' 
          SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  
      CLIENT MERGE SORT

      

    explain select a, count(b), max(c), min(d) from t1 where a>'1' and c>'11' group by a

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*]  
          SERVER FILTER BY C > '11'  
          SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  

      CLIENT MERGE SORT  


    explain select count(a), count(b), c, min(d) from t1 where a>'1' and c>'11' group by c ;

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*]  
         SERVER FILTER BY C > '11'  
          SERVER AGGREGATE INTO DISTINCT ROWS BY [C]  
      CLIENT MERGE SORT 

    explain select count(a), count(b), c, min(d) from t1 where a='10' and c>'11' group by c 
    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['10','11'] - ['10',*]  
         SERVER AGGREGATE INTO DISTINCT ROWS BY [C]  
     CLIENT MERGE SORT

    explain select count(a), b, max(c), min(d) from t1 where a='10' and c>'11' group by b ;

    CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['10','11'] - ['10',*] 
          SERVER AGGREGATE INTO DISTINCT ROWS BY [CF1.B]  
     CLIENT MERGE SORT


  • 相关阅读:
    sort函数的运用
    数据库设计三大范式
    LeetCode 64. 求1+2+…+n
    LeetCode 876. 链表的中间结点
    单链表的创建和基本操作 C语言
    LeetCode 365. 水壶问题
    LeetCode 1160. 拼写单词
    LeetCode 409. 最长回文串
    LeetCode 836. 矩形重叠
    LeetCode 7. 整数反转
  • 原文地址:https://www.cnblogs.com/leeeee/p/7276388.html
Copyright © 2011-2022 走看看