zoukankan      html  css  js  c++  java
  • db timedb / ClickHouse / clickhouse

    s

    https://clickhouse.yandex

    clickhouse 基础知识

    https://www.jianshu.com/p/a5bf490247ea  

    《大数据实时分析领域的黑马ClickHouse》二次解读

    https://blog.csdn.net/haitianxueyuan521/article/details/80983001

    ClickHouse 1秒,Vertica 5.42秒,Hive 279秒;

    ClickHouse比Vertia快约5倍,比Hive快279倍,比My SQL 快801倍 

    ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)。

    https://clickhouse.yandex/docs/zh/ 

    常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。

    ClickHouse集群安装

    yum install -y libicu.x86_64

    rpm -ivh /opt/soft/clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-server-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-client-19.5.3.8-1.el7.x86_64.rpm

    mkdir -p /etc/clickhouse-server/conf.d

    mkdir -p /data/clickhouse/data;mkdir -p /data/clickhouse/log;mkdir -p /data/clickhouse/tmp

    chmod 777 -R /etc/clickhouse-server/conf.d

    chmod 777 -R /data  注:若启动失败,全部chmod 777 再走一遍,个渣CK软件。

    修改文件:/etc/clickhouse-server/config.xml

    修改文件:/etc/clickhouse-server/users.xml

    创建文件:/etc/clickhouse-server/conf.d/clickhouse_remote_servers.xml

    新建文件:/etc/clickhouse-server/conf.d/zookeeper-servers.xml

    新建文件:/etc/clickhouse-server/conf.d/macros.xml

    ClickHouse版本升级

    https://my.oschina.net/aubao/blog/2244621 

     安装clickhouse步骤,依次如下执行安装。

    [root@centos7 ~]# yum install -y unixODBC libicudata 

    [root@centos7 ~]# yum install -y libicu.x86_64

    [root@centos7 ~]# grep -q sse4_2 /proc/cpuinfo && echo “SSE 4.2 supported” || echo “SSE 4.2 not supported”
    “SSE 4.2 supported”

    1、安装rpm -ivh clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm2、安装rpm -ivh clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm 
    3、安装rpm -ivh clickhouse-server-19.5.3.8-1.el7.x86_64.rpm
    4、安装rpm -ivh clickhouse-client-19.5.3.8-1.el7.x86_64.rpm  

    [root@centos7 ~]# rpm -qa|grep clickhouse
    clickhouse-server-common-19.5.3.8-1.el7.x86_64
    clickhouse-common-static-19.5.3.8-1.el7.x86_64
    clickhouse-client-19.5.3.8-1.el7.x86_64
    clickhouse-server-19.5.3.8-1.el7.x86_64

    [root@centos7 ~]# clickhouse-server -V

    ClickHouse server version 19.5.3.1

    [root@centos7 ~]# /etc/init.d/clickhouse-server restart
    Start clickhouse-server service: Path to data directory in /etc/clickhouse-server/config.xml: /var/lib/clickhouse/
    DONE

    自定义ClickHouse 部署

    1、修改文件:/etc/clickhouse-server/config.xml

    <log>/opt/log/clickhouse-server/clickhouse-server.log</log>
    <errorlog>/opt/log/clickhouse-server/clickhouse-server.err.log</errorlog>

    Clickhouse单机部署以及从mysql增量同步数据

    https://www.cnblogs.com/gomysql/p/11199856.html 

    //创建数据库
    CREATE DATABASE datacollector on CLUSTER logs

    //创建日志事件物理表
    CREATE TABLE datacollector.log_event ON CLUSTER logs(id Int64,ip String,mac String,time DateTime,eId Int64,eCount Int64,eMsg String,eClass String,eLine Int64,eExceptions String) ENGINE = ReplicatedMergeTree('/clickhouse/logs/log_event/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(time) ORDER BY (id,mac) SETTINGS index_granularity = 8192
    //创建日志事件分布式表
    CREATE TABLE datacollector.log_event_all ON CLUSTER logs AS datacollector.log_event ENGINE = Distributed(logs, datacollector, log_event, rand())
    //查询某个任务的日志数据
    SELECT MAX(id) as max_id,SUM(eCount) as counts,eClass,eLine,groupArray(eMsg) as msgs,groupArray(time) as times,count() FROM datacollector.log_event_all WHERE id = 1 GROUP BY eClass,eLine

    //创建sample数据的物理表
    CREATE TABLE datacollector.perf_sample ON CLUSTER logs (id Int64,des String,sPath String,rTime Int64,counts Int64,mac String,cTime DateTime) ENGINE = ReplicatedMergeTree('/clickhouse/logs/perf_sample/{shard_statistics}', '{replica_statistics}')PARTITION BY cTime ORDER BY (id) SETTINGS index_granularity = 8192;
    //创建sample数据的分布式表
    CREATE TABLE datacollector.perf_sample_all ON CLUSTER logs AS datacollector.perf_sample ENGINE = Distributed(logs, datacollector, perf_sample, rand())
    //创建sample数据的物化视图
    CREATE MATERIALIZED VIEW datacollector.v_perf_sample ON CLUSTER logs ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/logs/v_perf_sample/{shard_statistics}', '{replica_statistics}') PARTITION BY id ORDER BY (id,sPath,des,rTime)as select id,sPath,des,rTime,sumState(counts) as numbers,anyState(mac) as address,maxState(cTime) as time from datacollector.perf_sample group by id,sPath,des,rTime
    //创建samples数据的分布式物化视图
    CREATE TABLE datacollector.v_perf_sample_all ON CLUSTER logs AS datacollector.v_perf_sample ENGINE = Distributed(logs, datacollector, v_perf_sample,rand())
    //查询某个任务的聚合结果
    SELECT id,sPath,des,rTime,sumMerge(numbers),maxMerge(time) FROM datacollector.v_perf_sample_all WHERE id = 20 group by id,sPath,des,rTime
    //查询所有任务的聚合结果
    SELECT id,sPath,des,rTime,sumMerge(numbers),maxMerge(time) FROM datacollector.v_perf_sample_all group by id,sPath,des,rTime


    //创建agent数据的物理表
    CREATE TABLE datacollector.agent_status ON CLUSTER logs (id Int64,ip String,mac String,fMem Int64,tMem Int64,uCPU Float32,receive Int64,send Int64,cTime DateTime) ENGINE = ReplicatedMergeTree('/clickhouse/logs/agent_status/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(cTime) ORDER BY (id,ip,mac) SETTINGS index_granularity = 8192
    //创建agent数据的分布式表
    CREATE TABLE datacollector.agent_status_all ON CLUSTER logs AS datacollector.agent_status ENGINE = Distributed(logs, datacollector, agent_status, rand())
    //查询agent数据
    SELECT * FROM datacollector.agent_status_all


    //创建事务数据的物理表
    CREATE TABLE datacollector.perf_transaction ON CLUSTER logs(id Int64,ip String,mac String,sPath String,errors Int64,tNum Int64,des String,tests Int64,thread String,rt Float32,rx Int64,minRT Float32,maxRT Float32, tps Int64,fBRT Int64,cTime DateTime,tTime Float32)ENGINE = ReplicatedMergeTree('/clickhouse/logs/perf_transaction/{shard_statistics}', '{replica_statistics}')PARTITION BY toYYYYMMDD(cTime) ORDER BY (id) SETTINGS index_granularity = 8192;
    //创建事务数据的分布式表
    CREATE TABLE datacollector.perf_transaction_all ON CLUSTER logs AS datacollector.perf_transaction ENGINE = Distributed(logs, datacollector, perf_transaction, rand())
    //创建事务数据的物列化视图
    CREATE MATERIALIZED VIEW datacollector.v_perf_transaction ON CLUSTER logs ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/logs/v_perf_transaction/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(create_time) ORDER BY (id,path,des,create_time) as select id,sPath as path,des,sumState(errors) as total_errors,sumState(toFloat32OrZero(thread)) as total_threads,sumState(rt) as total_response_time,sumState(rx) as total_traffic,sumState(tests) as total_tests,sumState(fBRT) as total_first_byte_response_time,minState(minRT) as min_response_time,maxState(maxRT) as max_response_time,sumState(tps) as total_tps,cTime as create_time,sumState(tTime) as total_time from datacollector.perf_transaction group by id,path,des,create_time
    //创建事务数据的分布式物列化视图
    CREATE TABLE datacollector.v_perf_transaction_all ON CLUSTER logs AS datacollector.v_perf_transaction ENGINE = Distributed(logs, datacollector, v_perf_transaction,rand())
    //查询事务聚合结果
    SELECT id,path,des,create_time,sumMerge(total_errors) as total_errors,sumMerge(total_threads) as total_threads,sumMerge(total_time)/sumMerge(total_tests) as response_time ,sumMerge(total_first_byte_response_time) as total_first_byte_response_time,minMerge(min_response_time) as min_response_time,maxMerge(max_response_time) as max_response_time,sumMerge(total_tps) as total_tps FROM datacollector.v_perf_transaction_all GROUP BY id,path,des,create_time


     

    end

    yum install -y libicu.x86_64

    rpm -ivh /opt/soft/clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-server-19.5.3.8-1.el7.x86_64.rpm

    rpm -ivh /opt/soft/clickhouse-client-19.5.3.8-1.el7.x86_64.rpm

  • 相关阅读:
    驾照更换说明
    批处理创建快捷方式
    AC中保存数据与查询数据
    logger日志模块
    如何将python脚本转化为exe
    numpy学习
    request是个什么东西
    django的test文件的使用方式
    高频正则表达式
    dir 的作用
  • 原文地址:https://www.cnblogs.com/lindows/p/11474697.html
Copyright © 2011-2022 走看看