zoukankan      html  css  js  c++  java
  • 项目实战 从 0 到 1 学习之Flink(17)Flink实时日志数据写入Clickhouse

    一、背景

    每天上百亿的日志数据实时查询是个挑战,在架构设计上采用了Kafka + Flink + Clickhouse+Redash,实现海量数据的实时分析。计算层,我们开发了基于Flink计算引擎的实时数据平台,简化开发流程,数据通过配置化实现动态Schema生成,底层数据解析统一,无需重复造轮子,整个数据链路,从数据的采集,转换,存储,可视化,无需写一行代码,配置化完成。本文主要介绍实时日志数据写入Clickhouse的实践。

    Flink Clickhouse Sink

    <dependency>
        <groupId>ru.yandex.clickhouse</groupId>
        <artifactId>clickhouse-jdbc</artifactId>
        <version>0.1.50</version>
    </dependency>
    public class ClickhouseSink extends RichSinkFunction<Row> implements Serializable {
        private String tablename;
        private String[] tableColums; 
        private List<String> types;   
        private String[] columns;    
        private String username;     
        private String password;     
        private String[] ips;   
        private String drivername = "ru.yandex.clickhouse.ClickHouseDriver";
        private List<Row> list = new ArrayList<>();
        private List<PreparedStatement> preparedStatementList = new ArrayList<>();
        private List<Connection> connectionList = new ArrayList<>();
        private List<Statement> statementList = new ArrayList<>();
    
        private long lastInsertTime = 0L;
        private long insertCkTimenterval = 4000L;
        // 插入的批次
        private int insertCkBatchSize = 10000;
    
        public ClickhouseSink(String tablename, String username, String password, String[] ips, String[] tableColums, List<String> types, String[] columns) {
            this.tablename = tablename;
            this.username = username;
            this.password = password;
            this.ips = ips;
            this.tableColums = tableColums;
            this.types = types;
            this.columns = columns;  // 新增字段
        }
    
        // 插入数据
        public void insertData(List<Row> rows, PreparedStatement preparedStatement, Connection connection) throws SQLException {
    
            for (int i = 0; i < rows.size(); ++i) {
                Row row = rows.get(i);
                for (int j = 0; j < this.tableColums.length; ++j) {
                    if (null != row.getField(j)) {
                        preparedStatement.setObject(j + 1, row.getField(j));
    
                    } else {
                        preparedStatement.setObject(j + 1, "null");
                    }
                }
                preparedStatement.addBatch();
            }
    
            preparedStatement.executeBatch();
            connection.commit();
            preparedStatement.clearBatch();
        }
    
    
        /**
         * 新增字段修改表添加列
         *
         * @param statement
         * @throws Exception
         */
        public void tableAddColumn(Statement statement) {
            try {
                if (null != this.columns && this.columns.length > 0) {
    
                    /**
                     * table 增加字段
                     */
                    // 获取原表字段名
                    String querySql = "select * from " + this.tablename + " limit 1";
    
                    ResultSet rs = statement.executeQuery(querySql);
                    ResultSetMetaData rss = rs.getMetaData();
                    int columnCount = rss.getColumnCount();
    
                    List<String> orgTabCols = new ArrayList<>();
                    for (int i = 1; i <= columnCount; ++i) {
                        orgTabCols.add(rss.getColumnName(i));
                    }
    
                    // 对比两个数组,判断新增字段是否在原来的表中
                    Collection collection = new ArrayList<String>(orgTabCols);
                    boolean exists = collection.removeAll(Arrays.asList(this.columns));
    
                    // 新增字段不在原来的表中,执行添加列操作
                    if (!exists) {
    
                        for (int i = 0; i < this.columns.length; ++i) {
                            String str = "";
                            String str_all = "";
    
                            StringBuilder sb = null;
                            StringBuilder sb_all = null;
                            if (i == 0) {
                                sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1));
                                sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1));
    
                            } else {
                                sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]);
    
                                sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]);
                            }
    
                            if (StringUtils.isNotEmpty(sb.toString())) {
                                statement.executeUpdate(sb.toString());
                            }
    
                            if (StringUtils.isNotEmpty(sb_all.toString())) {
                                statement.executeUpdate(sb_all.toString());
                            }
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        // 根据IP创建连接
        public void createConnection() throws Exception {
    
            // 插入语句
            String insertStr = StrUtils.clickhouseInsertValue(this.tableColums, this.tablename);
            // 创建表
            List<String> createtableStrList = StrUtils.clickhouseCreatTable(this.tableColums, this.tablename, Constant.CKCLUSTERNAME, this.tableColums[3], this.types);
            // 创建数据库
            String create_database_str = "create database if not exists " + this.tablename.split("\.")[0];
    
            for (String ip : this.ips) {
                String url = "jdbc:clickhouse://" + ip + ":8123";
                Connection connection = DriverManager.getConnection(url, this.username, this.password);
                Statement statement = connection.createStatement();
    
                // 执行创建数据库
                statement.executeUpdate(create_database_str);
    
                // 执行创建表
                statement.executeUpdate(createtableStrList.get(0));
                statement.executeUpdate(createtableStrList.get(1));
    
                // 增加表字段
                tableAddColumn(statement);
    
                this.statementList.add(statement);
                PreparedStatement preparedStatement = connection.prepareStatement(insertStr);
                connection.setAutoCommit(false);
                this.preparedStatementList.add(preparedStatement);
                this.connectionList.add(connection);
            }
    
        }
    
    
        @Override
        public void open(Configuration parameters) throws Exception {
    
            Class.forName(this.drivername);
    
            // 创建连接
            createConnection();
        }
    
        @Override
        public void invoke(Row row, Context context) throws Exception {
            
            // 轮询写入各个local表,避免单节点数据过多
            if (null != row) {
                Random random = new Random();
                int index = random.nextInt(this.ips.length);
                switch (index) {
    
                    case 0:
                        if(list.size() >= this.insertCkBatchSize || isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(0),connectionList.get(0));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
                    case 1:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(1),connectionList.get(1));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
                    case 2:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(2),connectionList.get(2));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
                        break;
                    case 3:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(3),connectionList.get(3));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
                    case 4:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(4),connectionList.get(4));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
                    case 5:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(5),connectionList.get(5));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
    
                    case 6:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(6),connectionList.get(6));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
                        break;
                    case 7:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(7),connectionList.get(7));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
                        break;
    
                    case 8:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(8),connectionList.get(8));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
                        break;
                    case 9:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(9),connectionList.get(9));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
    
                        break;
                    case 10:
                        if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                            insertData(list,preparedStatementList.get(10),connectionList.get(10));
                            list.clear();
                            this.lastInsertTime = System.currentTimeMillis();
                        } else {
                            list.add(row);
                        }
                        break;
    
                }
            }
        }
    
        @Override
        public void close() throws Exception {
    
            for (Statement statement : this.statementList) {
                if (null != statement) {
                    statement.close();
                }
            }
    
            for (PreparedStatement preparedStatement : this.preparedStatementList) {
                if (null != preparedStatement) {
                    preparedStatement.close();
                }
            }
    
            for (Connection connection : this.connectionList) {
                if (null != connection) {
                    connection.close();
                }
            }
        }
    
        /**
         * 根据时间判断是否插入数据
         *
         * @return
         */
        private boolean isTimeToDoInsert() {
            long currTime = System.currentTimeMillis();
            return currTime - this.lastInsertTime >= this.insertCkTimenterval;
        }
    }

    通过自定义Sink方式写入Clickhouse,底层还是使用JDBC的方式,要注意插入不要过于频繁,否则会报错误(数据插入的频率大于数据合并)批次插入,批次最好设置大点,轮询写入每个节点方式有待优化。

    作者:大码王

    -------------------------------------------

    个性签名:独学而无友,则孤陋而寡闻。做一个灵魂有趣的人!

    如果觉得这篇文章对你有小小的帮助的话,记得在右下角点个“推荐”哦,博主在此感谢!

    万水千山总是情,打赏一分行不行,所以如果你心情还比较高兴,也是可以扫码打赏博主,哈哈哈(っ•?ω•?)っ???!

  • 相关阅读:
    17个Web前端开发工程师必看的国外网站
    CSS 绘制三角形气泡框
    Javascript冒泡事件
    页面文本框的只读属性readonly的设置与使用focus-blur事件方法的区别
    javascript中的类型的准确检测
    认识Console,调试Javascript
    仿美团页面
    EDM开发之四:错误记录
    EDM开发之一:系统概述
    EDM开发之二:SMTP服务器
  • 原文地址:https://www.cnblogs.com/huanghanyu/p/13632734.html
Copyright © 2011-2022 走看看