zoukankan      html  css  js  c++  java
  • clickHouse 常用命令

    ---客户端链接工具
    clickhouse-client -m -u root -p root


    --大小写敏感的
    CREATE TEMPORARY TABLE IF NOT EXISTS test.member_new_simple2 ENGINE = Memory 
    AS SELECT ID,user_id,member_grade,accum_amount from  test.member_new;

    -- SHOW PROCESSLIST
    查询正在进行执行的sql操作

    关于 update/delete操作

    clickhouse关于update/delete操作是异步后台进行,而且成本很高,所以尽量减少和避免进行update/delete操作

    通过 select * from system.mutations    可以判断后台操作进行是否完成 


    -------------- create -------------------- 
    create TABLE test.test( id Int32,create_date Date ,c2 Nullable(String) ) 
    engine=MergeTree(create_date,id,(c3,c2),8192);


    create TABLE test.test2( id Int32,create_date Date ,c2 Nullable(String) ) 
    engine=MergeTree(create_date,id,8192);


    --支持联合索引
    MergeTree(EventDate, (CounterID, EventDate), 8192)

    MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192)


    CREATE TABLE t (a UInt8) ENGINE = Memory;

    split -b 20g table_name.sql user_group

    -------------- ALTER-------------------- 
    ALTER查询只支持MergeTree表,。该查询有几个变体。
    ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|MODIFY COLUMN
    --更改列的类型

    alter TABLE test.ontime_wpp_t MODIFY COLUMN TailNum Nullable(String);

    alter TABLE test.ontime_wpp_t ADD COLUMN TailNum2 Nullable(String)  after   Div5TailNum ;

    alter TABLE test.ontime_wpp_t drop COLUMN TailNum2;

    表变更数据系统监控
    select * from system.mutations where is_done = 0 order by create_time desc limit 1;

    删除表
    drop table cdm_dwd.dwd_ord_car_sharing_df on cluster crm_4shards_1replicas;


    select max(ckbizdate) from cdm_dwd.dwd_mkt_coupon_ord_df 

    变更表名
    RENAME TABLE test.ontime_wpp_t to test.ontime_wpp_t2;

    集群操作

    RENAME TABLE cdm_dwd.dwd_ord_carsh_base_df2 to cdm_dwd.dwd_ord_carsh_base_df on cluster crm_4shards_1replicas;


    SET param = value
    允许您设置param值,如果指定了全局,则为会话或服务器(全局)设置设置。
    在创建全局设置时,设置并不适用于已经运行的会话,包括当前会话。它只会用于新会话。当服务器重新启动时,
    使用SET的全局设置就会丢失。要在服务器重新启动后进行设置

    OPTIMIZE table test.ontime;

    仅支持MergeTree引擎, 用于回收闲置的数据库空间,当表上的数据行被删除时,
    所占据的磁盘空间并没有立即被回收,使用了OPTIMIZE TABLE命令后这些空间将被回收,
    并且对磁盘上的数据行进行重排

    数据导出
    在相关节点执行
    echo 'select * from test.ads_user_portrait_vertical_df_cls' | curl localhost:8123?database=test -uroot:root -d @- > table_name.sql


    2、导入数据,以tab作为分割符:

    导入数据库的本机执行:cat table_name.sql | clickhouse-client --query="INSERT INTO database.table_name FORMAT TabSeparated"
         
         
    cat /root/user_lable_local_mid_cluster.tgz | clickhouse-client --user hadoop --password hadoop --query="INSERT INTO lmmbase.user_lable_local_mid_cluster FORMAT CSV";
    cat /home/hadoop/work_wpp/user_label_uid_cluster | clickhouse-client --user hadoop --password hadoop --query="INSERT INTO lmmbase.user_label_uid_cluster FORMAT CSV";

    cat /tmp/test_user2| clickhouse-client --user hadoop --password hadoop --query="INSERT INTO lmmbase.test_user2 FORMAT CSV";

    -------------- INSERT-------------------- 
    不严格插入数据,没有出现的列自动填充为默认值
    INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22)

    严格插入数据,每一列都必须出现在上面
    INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23)

    cat /tmp/user_point_info | clickhouse-client --query="INSERT INTO test.user_point_info FORMAT CSV";

    cat /home/hadoop/ontime | clickhouse-client  --query="INSERT INTO test.ontime_wpp FORMAT CSV";

    cat /root/wpp5.log | clickhouse-client --query="INSERT INTO test.test FORMAT CSV";

    clickhouse-client -m --user hadoop --password hadoop --query="truncate table lmmbase.user_label_uid on cluster crm_4shards_1replicas";

    ssh hadoop@dn1 "/bin/bash /home/hadoop/app/otherApp/truncate_user_label_uid_data.sh"

    clickhouse-client --query=" alter table  test.ads_user_portrait_vertical_df delete where create_time ='2019-10-17' ";

    clickhouse-client --query=" alter table  default.test_df delete where ckbizdate ='2019-10-17' ";

    相关压测,同时执行相关sql,看下机器负载
    */2 * * * * clickhouse-client -m --query="select t_mac,t_type,count(*) cnt from carendpoint_porlog_cls group by t_mac,t_type order by cnt desc limit 100;"
    */2 * * * * clickhouse-client  -m --query="select t_mac,count(*) cnt from carendpoint_porlog_cls group by t_mac order by cnt desc limit 100;"
    */2 * * * * clickhouse-client  -m --query="select t_type,count(*) cnt from carendpoint_porlog_cls group by t_type order by cnt desc limit 100;"

    */1 * * * * clickhouse-client  -m --query="select t_ip,t_type,count(*) cnt from carendpoint_porlog_cls group by t_ip,t_type order by cnt desc limit 100;" >> /root/wpp1.log
    */1 * * * * clickhouse-client  -m --query="select t_ip,count(*) cnt from carendpoint_porlog_cls group by t_ip order by cnt desc limit 100;" >> /root/wpp2.log
    */1 * * * * clickhouse-client  -m --query="select event,count(*) cnt from carendpoint_porlog_cls group by event order by cnt desc limit 100;" >> /root/wpp2.log

     
     

    直接支持hdfs

    drop table test_hdfs;
    CREATE  TABLE test_hdfs
    (
    rowid Int64,
    create_date String,
    memo String
    )
    ENGINE=HDFS('hdfs://gtdata03:8020/input/test2/test2.txt','CSV');


    sql 文件执行

    clickhouse-client -h  127.0.0.1 -m -d system -q "select * from parts " > test.sql 

    dn4
    clickhouse-client -h  127.0.0.1 -m -d system -q '/home/hadoop/ch.sql' > test.sql 

    select today()-365;


    drop table lmmbase.user_label_uid_hive_cluster on cluster crm_4shards_1replicas;

    drop table lmmbase.user_label_uid_hive on cluster crm_4shards_1replicas;


    create database  on cluster crm_4shards_1replicas;

    row_number 函数支持,下面是支持 !!!!!!!!!!!!!
    select *,rowNumberInAllBlocks() rn
    from(
    select id,create_date,c2
    from test 
    order by  create_date desc 
    limit 2 by id
    )aa ;


    SELECT *, rn +1 -min_rn current, max_rn - min_rn + 1 last FROM (
    SELECT *, rowNumberInAllBlocks() rn FROM (
    SELECT i_device, i_time
    FROM tbl
    ORDER BY i_device, i_time
    ) t
    ) t1 LEFT JOIN (
    SELECT i_device, min(rn) min_rn, max(rn) max_rn FROM (
    SELECT *, rowNumberInAllBlocks() rn FROM (
    SELECT i_device, i_time
    FROM tbl
    ORDER BY i_device, i_time
    ) t
    ) t GROUP BY i_device
    ) t2 USING (i_device)

    select trainId,ap_mac,
     rowNumberInAllBlocks() rn
    from carendpoint_usermacprobe 
    group by trainId,ap_mac
    limit 10;


    select trainId,ap_mac,
    rowNumberInAllBlocks() rn
    from carendpoint_usermacprobe 
    group by trainId,ap_mac
    order by  trainId desc 
    limit 3 by trainId;


    dn3 

    select id,create_date,c2,
    rowNumberInAllBlocks() rn
    from test 
    group by  id,create_date,c2
    order by  create_date desc 
    limit 3 by id;


    select *,rowNumberInAllBlocks() rn
    from(
    select id,create_date,c2
    from test 
    group by  id,create_date,c2
    order by  create_date desc 
    limit 2 by id
    )aa ;

    insert into test select 11,'2019-07-25','tt5';


    select trainId,ap_mac,
    ROW_NUMBER() OVER (PARTITION BY trainId order by trainId  ) AS  row 
    from carendpoint_usermacprobe limit ;


    ----------------------delete 
    删除表的数据,对主键支持的可以,非主键有时数据删除有问题
    alter table user_label_uid_hive delete where id = 1000000010002 
    alter table user_label_uid_hive delete where id = 1000000010002 

    truncate table ads_user_portrait_vertical_df_cls   ON CLUSTER crm_4shards_1replicas;


    -- 数据去重
    select user_id, order_id, create_time, update_time from unity_order 
    where create_time> '2018-09-01' order by create_time asc limit 2 by user_id limit 20;

    insert into user2( ROWKEY2, AV, IMEI, MAC, CHID, PB, PLAT,UID,PTIME) values
    ( "20190905", "2.1.6", "54b6aeba8581413ca487eda6caa62a55", "7c:76:68:19:34:16", "NULL22", "HONOR", "2", "tt", "2019-09-05");
    ,( "20190905_USERID257641565864162599", "1.0.0", "USERID257641565864162599", "other", "other", "iPhone", "3", "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhY2NvdW50SWQiOiIxMTA0OTQzMTM5Mjg3ODU5MjAwIiwibW9iaWxlIjoiMTc2ODIzMDIwNDkiLCJ0eXAiOiJKV1QiLCJleHAiOjE1Njg1Mjc3MDgsImNvcnBDb2RlIjoiQ1JHVCIsImFsZyI6IkhTMjU2IiwiaWF0IjoxNTY3NjYzNzA4LCJsb2dpblNvdXJjZSI6IjEifQ.tE_59BIsNqGdbUe5HslFZ93_vyNQE2EUHwvlyOkCQhc", "2019-09-05")


    CREATE TABLE test.user2 (`ROWKEY2` String, `AV` String, `IMEI` String, `MAC` String, `CHID` String, `PB` String, `PLAT` String, `UID` String, `PTIME` Date) ENGINE = MergeTree(PTIME, ROWKEY2, 8192)


    insert into user2( ROWKEY2, AV, IMEI, MAC, CHID,  PB, PLAT,UID,PTIME) values
    ("20190905", "2.1.6", "54b6aeba8581413ca487eda6caa62a55", "7c:76:68:19:34:16",  "ttt", "HONOR", "2", "tt", "2019-09-05");


    alter table user_label_uid_hive delete where id = 1000000010002 


    alter table user_label_uid_hive delete where user_id = 10035 

    select id,user_id from  user_label_uid_hive  where user_id = 10035 


    alter table user_label_uid_hive update browse_category_id = 11 where mobile_number in (13482154101) ;


    alter table carendpoint_porlog_cls update utime = 999 where t_mac='VIVO-Y55A';

    select count(*) from carendpoint_porlog_cls where t_mac='VIVO-Y55A' limit 10;


    select * from carendpoint_porlog where t_mac='VIVO-Y55A' limit 10;

    alter table carendpoint_porlog update user_agent = '999' where t_mac='VIVO-Y55A' ;


    alter table carendpoint_porlog update user_agent = '666' where utime=-7200 and t_type='MI8-xiaomishouji';
    OPTIMIZE table carendpoint_porlog;


    select * from carendpoint_porlog where utime=-7200 and t_type='MI8-xiaomishouji'

    更新时加上索引优化。这样查询才快点,十亿级别更新,十秒完成。

    alter table carendpoint_porlog update user_agent = '999000' where t_mac='VIVO-Y55A' ;


    alter table carendpoint_porlog delete where t_mac='VIVO-Y55A' ;

    alter table carendpoint_porlog delete where utime=-7200  ;

    select * from carendpoint_porlog where t_mac='VIVO-Y55A' limit 10;

    select count(*) from carendpoint_porlog where t_mac='VIVO-Y55A' limit 10;
     

    alter table user_label_uid_hive update browse_category_id = 19999 where  browse_category_id in (18880);
    select count(*) from  user_label_uid_hive  where browse_category_id in (19999); 

    alter table user_label_uid_hive update browse_category_id = 11888 where  browse_category_id in (11);
    select count(*) from  user_label_uid_hive  where browse_category_id in (11888); 


    alter table user_label_uid_hive delete where mobile_number in (13028815355) ;


    --- 直接所有表数据删除

    truncate table lmmbase.user_label_uid on cluster crm_4shards_1replicas;


    create database test on cluster crm_4shards_1replicas;

    truncate table default.t22_cluster on cluster 'crm_4shards_1replicas';


    alter table cdm_dws.dws_log_full_di delete where gbizdate = '20190930'  on cluster crm_4shards_1replicas;


    alter table cdm_dws.dws_log_full_di  on cluster crm_4shards_1replicas delete where gbizdate = '20190930'  ;


    drop  table cdm_dwd.dwd_ord_carsh_base_df_cls on cluster crm_4shards_1replicas;


    rename table cdm_dwd.dwd_ord_carsh_base_df_cls on cluster crm_4shards_1replicas;

    create TABLE default.t3 ( id Int32,create_date Date ,c2 String,c3 Nullable(String) )
    engine=MergeTree(create_date,id,8192);


    create TABLE default.t31 ON CLUSTER crm_4shards_1replicas( id Int32,create_date Date ,c2 String,c3 Nullable(String) )
    engine=MergeTree(create_date,id,8192);


    create TABLE default.t3 on cluster crm_4shards_1replicas ( id Int32,create_date Date ,c2 Nullable(String) )
    engine=MergeTree(create_date,id,(c3,c2),8192);


    create table default.t3 on cluster crm_4shards_1replicas (id UInt8, name String, t Date)  
    engine = MergeTree(t, id, id, 2);

    create table default.t3_cluster on cluster crm_4shards_1replicas (id UInt8, name String, t Date)  
    engine = Distributed('crm_4shards_1replicas', 'default', 't3', rand());

    --------------------cxw --直接建表

    drop  DATABASE db2 ON CLUSTER crm_4shards_1replicas;

    truncate table default.t1 on cluster crm_4shards_1replicas;

    truncate table lmmbase.test_user2 on cluster crm_4shards_1replicas;

    truncate table lmmbase.user_label_uid_hive on cluster crm_4shards_1replicas;

    create TABLE default.porlog_test on cluster crm_4shards_1replicas ( mac String ,train_id Nullable(String),create_date Date) engine=MergeTree(create_date,mac,8192);

    create table default.porlog_test_cls on cluster crm_4shards_1replicas ( mac String ,train_id Nullable(String),create_date Date ) 
    engine = Distributed('crm_4shards_1replicas', 'default', 'porlog_test', rand());


    create TABLE test.test2( id Int32,create_date Date ,c2 Nullable(String) ) 
    engine=MergeTree(create_date,id,8192);

    insert into default.test_cluster values
    (11,'2018-10-10','11'),
    (22,'2018-10-11','22'),
    (33,'2018-10-11','33');


    select id,create_date,c2,any(c3) from default.test_cluster;

    select id,create_date,c2 from default.test_cluster where id<5
    union all
    select id,create_date,c2 from default.test_cluster where id>11


    drop table crm_pub.prod_startdistrict_detail_cluster  on cluster crm_4shards_1replicas;

    drop table test.user_point_info2_cls  on cluster crm_4shards_1replicas;

    drop table crm_pub.biz_category_cluster  on cluster crm_4shards_1replicas;

    drop table lmmbase.user_label_uid_hive  on cluster crm_4shards_1replicas;

    drop database crm_user  on cluster crm_4shards_1replicas;

    drop database crm_market  on cluster crm_4shards_1replicas;

    create database cdm_dws  on cluster crm_4shards_1replicas;

    drop database test  on cluster crm_4shards_1replicas;


    drop table lmmbase.user_label_uid_hive  on cluster crm_4shards_1replicas;

    drop table lmmbase.user_label_uid_hive_cluster  on cluster crm_4shards_1replicas;

    clickhouse-client -m 

    ---------------------------------output--------------------  
    select * from test.test_user2 INTO OUTFILE '/tmp/test_user2' format CSV;

    select * from test.user_point_info limit 500000 INTO OUTFILE '/tmp/user_point_info' format CSV;


    -- 复杂表查询

    drop TABLE l.user_orders;

    create TABLE test.user_orders( order_id Int64,create_time Date,user_id Nullable(String),
    user_name Nullable(String),order_status Nullable(String),user_status Nullable(String) ) 
    engine=MergeTree(create_time,order_id,8192);


    insert into test.user_orders 
    select distinct order_id,create_time,user_id,user_name,order_status,user_status
    from( 
    select order_id, user_no AS user_id,order_status,toDate(create_time) as create_time   
    from test.unity_order ) 
    ANY LEFT JOIN
    (select user_id,user_name,user_status,
    mobile_number from  default.user_user ) 
    USING user_id ;

    -------------------------select  相关格式代码------------------------- 


    --- union all

    select id,create_date,c2 from test.test where id<5
    union all 
    select id,create_date,c2 from test.test where id>11;
    select groupArray(variable_id) from test.seo_test limit 10;


    -- TSKV
    SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime 
    WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier FORMAT TSKV;

    --csv 逗号分割
    SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime 
    WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier FORMAT CSV;


    --JSON 逗号分割,可用于 http接口调用
    SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime 
    WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier FORMAT JSON;

    --XML 逗号分割,可用于 http接口调用
    SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime 
    WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier FORMAT XML;


    ---生成的数据打包成json 数据格式
    select user_id,groupArray(order_id) from test.unity_order GROUP BY user_id limit 10;

    wget -O- -q 'http://localhost:8123/?query=SELECT 1'

    ----- http 请求查询结果

    echo 'SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM test.ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier' | curl 'http://localhost:8123/?query=' --data-binary @-
    格式化返回结果
    echo 'SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM test.ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier  FORMAT Pretty' | curl 'http://localhost:8123/?query=' --data-binary @-

    echo 'CREATE TABLE t1 (a UInt8) ENGINE = Memory' | POST 'http://localhost:8123/'
    echo 'insert into t1 (a) values (10)' | POST 'http://localhost:8123/'
    GET "http://localhost:8123?query=select * from t"
    echo 'drop table t1' | POST 'http://localhost:8123/'

    jdbc 方式连接
    https://github.com/yandex/clickhouse-jdbc


    -----web访问界面
    https://github.com/tabixio/tabix


    clickhouse作为目前所有开源MPP计算框架中计算速度最快的,它在做多列的表,同时行数很多的表的查询时,性能是很让人兴奋的,但是在做表的join时,它的性能是不如单宽表查询的,但是了解了clickhouse在做join的时候的处理过程,利用的好的话,也会带来很大的效率提升,下面就详细介绍一下:

       1.clickhouse做LEFT JOIN时的操作过程

           比如做两表JOIN时,会优先计算右表结果,将右表结果存储到内存中,然后跟左边表的数据进行做match

       2.join操作的秒级产出

          首先,clickhouse在做单表查询时速度是快的惊人的,因此只要左表和右表查出来的数据量是不大的,那么整个查询响应时间仍然会是秒级产出的。


    -- ANY LEFT JOIN -- 匹配一条则结束
    select distinct order_id,user_id,user_name,user_status,
    mobile_number as mobile,order_status 
    from( 
    select order_id, user_no AS user_id,order_status   
    from test.unity_order where order_id in(55719970 ,55720002)
    ) ANY LEFT JOIN
    (select user_id,user_name,user_status,
    mobile_number from  default.user_user ) 
    USING user_id FORMAT CSV;

    -- ALL LEFT JOIN 
    select distinct order_id,user_id,user_name,user_status,
    mobile_number as mobile,order_status 
    from( 
    select order_id, user_no AS user_id,order_status   
    from test.unity_order where order_id in(55719970 ,55720002)
    ) ALL LEFT JOIN
    (select user_id,user_name,user_status,
    mobile_number from  default.user_user ) 
    USING user_id FORMAT CSV;


    -- 三个表相连
     
    select distinct order_id,user_id,user_name,user_status,
    mobile_number as mobile,order_status,send_time,send_id
    from (
        select distinct order_id,user_id,user_name,user_status,
        mobile_number,order_status 
        from( 
        select order_id, user_no AS user_id,order_status   
        from test.unity_order_cluster where order_id in(55719970 ,55720002)
        ) ANY INNER JOIN
        (select user_id,user_name,user_status,
        mobile_number from  default.user_user )
        using user_id
    )
    ANY left JOIN 
    (select target as mobile_number,send_time,id as send_id
    from  test.mkt_sms_log_cluster ) 
    USING mobile_number
    FORMAT CSV;


    ---------------array join 
    -- https://clickhouse.yandex/docs/en/query_language/select/
    --------------- array join 
    CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = Memory;

    INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []);
    INSERT INTO arrays_test VALUES ('Goodbye2', [23]);
    SELECT * FROM arrays_test;

    SELECT s, arr FROM arrays_test ARRAY JOIN arr;
    SELECT s, arr, a FROM arrays_test ARRAY JOIN arr AS a;

    -- 针对map进行相加操作
    -- 相同大小的多个数组可以在数组连接子句中进行逗号分隔。在这种情况下,连接是同时执行的
    SELECT s, arr, a, num, mapped 
    FROM arrays_test ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 2, arr) AS mapped;

    SELECT s, arr, a, num, arrayEnumerate(arr) 
    FROM arrays_test ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num

    ---------------nested join

    CREATE TABLE nested_test (s String, nesta Nested(x UInt8, y UInt32)) ENGINE = Memory;
    INSERT INTO nested_test VALUES ('Hello', [1,2], [10,20]), 
    ('World', [3,4,5], [30,40,50]), ('Goodbye', [], []);
    INSERT INTO nested_test VALUES ('Goodbye_nest', [3,3], [101,201]); 


    SELECT * FROM nested_test;
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta;

    针对某个进行查询
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta where nesta.x=3;

    --一个用户下挂了多个订单
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta where s='Hello';

    --对x值进行拆分
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta.x;

    --对x值进行拆分
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta.y;

    -- 对x,y进行拆分
    SELECT s, nesta.x, nesta.y FROM nested_test ARRAY JOIN nesta.x, nesta.y;

    ---- 对数组进行枚举
    SELECT s, num FROM nested_test  ARRAY JOIN arrayEnumerate(nesta.x) as num;

    --
    SELECT s, n.x, n.y, nesta.x, nesta.y, num FROM nested_test 
    ARRAY JOIN nesta AS n, arrayEnumerate(nesta.x) as num;


    ------------------------nested join 2级嵌套 ----------------------------------

    ---
    CREATE TABLE nested_test2 (s String, nest1 Nested(x UInt8, y UInt32),nest2 Nested(x2 Int64,y2 Int64))
     ENGINE = Memory;
    INSERT INTO nested_test2 VALUES ('Hello',[1,2],[10,20],[11,21],[11,21]),
    ('World', [3,4,5], [30,40,50],[31,41,51], [301,401,501]), ('Goodbye', [], [], [], []),('Goodbye_nest', [3,3], [101,201],[31,31], [1011,2011]);

    INSERT INTO nested_test2 VALUES ('Goodbye_nest', [3,3], [101,201],[31,31], [1011,2011]); 


    SELECT * FROM nested_test2;
    SELECT s, nest1.x, nest1.y,nest2.x2 FROM nested_test2 ARRAY JOIN nest1;

    针对某个进行查询
    SELECT s, nest1.x, nest1.y FROM nested_test2 ARRAY JOIN nest1 where nest1.x=3;

    --一个用户下挂了多个订单
    SELECT s, nest1.x, nest1.y,nest2.x2 ,nest2.y2  FROM nested_test2 ARRAY JOIN nest1 where s='Hello';

    --对x值进行拆分
    SELECT s, nest1.x, nest1.y FROM nested_test2 ARRAY JOIN nest1.x;

    --对x值进行拆分
    SELECT s, nest1.x, nest1.y FROM nested_test2 ARRAY JOIN nest1.y;

    -- 对x,y进行拆分
    SELECT s, nest1.x, nest1.y FROM nested_test2 ARRAY JOIN nest1.x, nest1.y;

    ---- 对数组进行枚举
    SELECT s, num FROM nested_test2  ARRAY JOIN arrayEnumerate(nest1.x) as num;

    --
    SELECT s, n.x, n.y, nest1.x, nest1.y,num FROM nested_test2 
    ARRAY JOIN nest1 AS n, arrayEnumerate(nest1.x) as num;

    select order_id,user_id,user_no from unity_order where user_no is null limit 10;


    --  临时表(TEMPORARY)

    在所有情况下,如果指定临时表,就会创建临时表。临时表有以下特点:

    临时表在会话结束时消失,包括连接丢失。
    一个临时表是用内存引擎创建的。其他表引擎不受支持。
    DB不能为临时表指定。它是在数据库之外创建的。
    如果临时表与另一个表的名称相同,并且查询指定表名而不指定DB,则使用临时表。
    对于分布式查询处理,查询中使用的临时表被传递给远程服务器。

    CREATE TEMPORARY TABLE IF NOT EXISTS test.member_new_cp ENGINE = MergeTree 
    AS SELECT * from  test.member_new;

    --success
    CREATE TEMPORARY TABLE IF NOT EXISTS test.member_new_cp ENGINE = Memory 
    AS SELECT * from  test.member_new;


    -- 创建视图

    CREATE VIEW member_new_view AS SELECT * from test.member_new;

    CREATE VIEW member_new_view_simple AS SELECT ID,user_id,member_grade from test.member_new;

    drop table member_new_orders_view;
    CREATE VIEW member_new_orders_view AS 
    SELECT user_id,count(order_id) as cnt
    from  (
    SELECT user_id,member_grade from test.member_new 
    )any left join 
    (select user_no as user_id,order_id from test.unity_order
    )using user_id
    group by user_id
    order by cnt desc;


    drop table member_new_orders_view;
    CREATE VIEW member_new_orders_view AS 
    SELECT user_id,count(order_id) as cnt
    from  (
    SELECT user_id,member_grade from test.member_new 
    )any left join 
    (select user_no as user_id,order_id from test.unity_order
    )using user_id
    group by user_id
    order by cnt desc;

    -- 全匹配数据 all left join 

    drop table member_new_orders_view_all;
    CREATE VIEW member_new_orders_view_all AS 
    SELECT user_id,count(order_id) as cnt
    from  (
    SELECT user_id,member_grade from test.member_new 
    )all left join 
    (select user_no as user_id,order_id from test.unity_order
    )using user_id
    group by user_id
    order by cnt desc;

    --查询操作符和子查询可以发生在查询的任何部分,包括聚合函数和lambda函数
    select id,create_date,c2,(select max(AirlineID) from test.ontime_wpp) as aa from test.test;


    SELECT
        EventDate,
        avg(UserID IN
        (
            SELECT UserID
            FROM test.hits
            WHERE EventDate = toDate('2014-03-17')
        )) AS ratio
    FROM test.hits
    GROUP BY EventDate
    ORDER BY EventDate Asc

    select length(remark) from test.unity_order limit 10;


    select order_id,length(remark) len,remark ,str(remark,0,3)
    from test.unity_order where length(remark) >= 1 limit 100;


    --

    create table t1 (id UInt16, name String) ENGINE=TinyLog;
    create table t2 (id UInt16, name String) ENGINE=TinyLog;
    create table t3 (id UInt16, name String) ENGINE=TinyLog;

    insert into t1(id, name) values (1, 'first');
    insert into t1(id, name) values (12, 'first2');

    insert into t1(id, name) values (13, 'first4');
    insert into t2(id, name) values (2, 'xxxx');
    insert into t3(id, name) values (12, 'i am in t3');

    insert into t1(id, name) values (1, 'first');

    insert into test_df select 11, 'first2','2019-10-13';

    CREATE TABLE default.test_df (`code` String, `description` Nullable(String), `ckbizdate` Date) ENGINE = MergeTree(ckbizdate, code, 8192);

    insert into test_df select 132, 'first2','2019-10-20';

    ssh dn2 'clickhouse-client --query="alter table  default.test_df delete where ckbizdate =2019-10-20 " '

    --表进行合并查询,不支持更新操作
    create table t_merge (id UInt16, name String) ENGINE=Merge(currentDatabase(), '^t');

    select  * from t_merge 

    select _table,* from t_merge order by id desc

    --- 
    create table t_repalce (gmt  Date, id UInt16, name String, point UInt16) 
    ENGINE=ReplacingMergeTree(gmt, (name), 10, point);


    insert into t_repalce (gmt, id, name, point) values ('2017-07-10', 1, 'a', 20);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-10', 1, 'a', 30);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 1, 'a', 20);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 1, 'a', 30);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 1, 'a', 10);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 2, 'a', 10);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 3, 'b', 10);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 3, 'c', 10);
    insert into t_repalce (gmt, id, name, point) values ('2017-07-11', 3, 'c', 20);

    --  多值判断
    SELECT 
        number, 
        caseWithoutExpression((number > 5) AND (number < 10), number + 1,
        (number >= 10) AND (number < 15), number * 2, 0)
    FROM system.numbers 
    WHERE number < 20
    LIMIT 16;

    -- 行转列
    select user_id,groupUniqArray(order_id) 
    from unity_order 
    group by user_id 
    having count(*) >4 limit 10; 

    select user_id,arrayMap(x-> toString(x), groupArray(order_id))
    from unity_order where order_id>0 and user_id != ''  group by user_id  having count(*) > 4 limit 10;

    -- json 格式化返回结果 
    select user_id,concat('[',toString(groupUniqArray(order_id)),']' ) json
    from unity_order 
    where user_id != ''  
    group by user_id  
    having count(*) >4 limit 10; 


    参考技术
    https://www.zouyesheng.com/clickhouse.html#toc8
    https://blog.csdn.net/vagabond6/article/details/79556968

    https://zhuanlan.zhihu.com/p/22165241
    常用函数
    https://blog.csdn.net/vagabond6/article/details/79580371
    ————————————————
    版权声明:本文为CSDN博主「wppwpp1」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/wppwpp1/java/article/details/106021361

  • 相关阅读:
    mysql 需要掌握的重点
    Java基础知识之常见关键字以及概念总结
    abstract类中method
    java异常继承何类,运行时异常与一般异常的区别
    Java关键字final、static使用总结
    JAVA读取XML文件
    关于ApplicationContext的初始化
    web.xml配置详解
    maven javaProject打包发布成服务
    Spring Boot Actuator 配置和应用
  • 原文地址:https://www.cnblogs.com/cbugs/p/13394429.html
Copyright © 2011-2022 走看看